<?php
/**
 * Post Creator - Digital Rise Auto Poster
 * 
 * Professional WordPress.org compliant post creation
 * - Bulletproof content cleaning (removes ALL CSS/JS garbage)
 * - Preserves links with target="_blank"
 * - Preserves bold/italic text
 * - WebP conversion & Image SEO
 * - Professional image gallery styling
 * 
 * @package Digital_Rise_Auto_Poster
 * @since 1.0.0
 */

if ( ! defined( 'ABSPATH' ) ) {
    exit;
}

class DRAP_Poster {

    /**
     * Database instance
     * @var DRAP_Database
     */
    private $db;

    /**
     * Constructor
     */
    public function __construct( $db ) {
        $this->db = $db;
    }

    /**
     * Create post from feed item
     * 
     * @param array  $item     Feed item data
     * @param object $campaign Campaign object
     * @return int|WP_Error    Post ID or error
     */
    public function create( $item, $campaign ) {
        $options = maybe_unserialize( $campaign->options );
        if ( ! is_array( $options ) ) {
            $options = array();
        }

        // Prepare title
        $post_title = ! empty( $item['title'] ) ? $this->clean_text( $item['title'] ) : __( 'Untitled', 'digital-rise-auto-poster' );

        // Prepare content with professional cleaning
        $raw_content = ! empty( $item['content'] ) ? $item['content'] : $item['description'];
        $content = $this->process_content( $raw_content, $post_title );

        // Add source link if enabled
        if ( ! empty( $options['add_source_link'] ) && ! empty( $item['url'] ) ) {
            $source_text = ! empty( $options['source_link_text'] ) ? $options['source_link_text'] : __( 'Source', 'digital-rise-auto-poster' );
            $content .= $this->build_source_link( $item['url'], $source_text );
        }

        // Prepare post data
        $post_data = array(
            'post_title'   => $post_title,
            'post_content' => $content,
            'post_status'  => $campaign->post_status,
            'post_author'  => $campaign->post_author,
            'post_type'    => 'post',
        );

        // Use original date if enabled
        if ( ! empty( $options['use_original_date'] ) && ! empty( $item['date'] ) ) {
            $post_data['post_date'] = $item['date'];
            $post_data['post_date_gmt'] = get_gmt_from_date( $item['date'] );
        }

        // Add excerpt
        if ( ! empty( $item['description'] ) ) {
            $post_data['post_excerpt'] = wp_trim_words( $this->clean_text( $item['description'] ), 55 );
        }

        // Insert post
        $post_id = wp_insert_post( $post_data, true );

        if ( is_wp_error( $post_id ) ) {
            return $post_id;
        }

        // Mark as imported (duplicate prevention)
        $this->db->mark_imported( $post_id, $item['url'] );

        // Store meta
        update_post_meta( $post_id, '_drap_campaign_id', $campaign->id );
        update_post_meta( $post_id, '_drap_source_url', $item['url'] );
        update_post_meta( $post_id, '_drap_imported', current_time( 'mysql' ) );

        // Set categories
        $categories = maybe_unserialize( $campaign->post_category );
        if ( ! empty( $categories ) ) {
            wp_set_post_categories( $post_id, array_map( 'absint', (array) $categories ) );
        }

        // Set tags
        $tags = maybe_unserialize( $campaign->post_tags );
        if ( ! empty( $tags ) ) {
            wp_set_post_tags( $post_id, $tags );
        }

        // Add feed categories as tags
        if ( ! empty( $options['feed_cats_as_tags'] ) && ! empty( $item['categories'] ) ) {
            wp_set_post_tags( $post_id, $item['categories'], true );
        }

        // Set featured image
        if ( ! empty( $options['import_image'] ) && ! empty( $item['image'] ) ) {
            $this->set_featured_image(
                $post_id,
                $item['image'],
                $post_title,
                ! empty( $options['convert_webp'] ),
                ! empty( $options['image_seo'] )
            );
        }

        return $post_id;
    }

    /**
     * Process and clean content - MAIN CLEANING FUNCTION
     * 
     * Strategy:
     * 1. Extract and protect valid HTML elements (links, images, formatting)
     * 2. Strip EVERYTHING else (all text that isn't in valid tags)
     * 3. Rebuild clean HTML from protected elements
     * 
     * @param string $content   Raw content
     * @param string $post_title Post title for image SEO
     * @return string           Clean content
     */
    private function process_content( $content, $post_title = '' ) {
        if ( empty( $content ) ) {
            return '';
        }

        // Step 1: Decode entities
        $content = html_entity_decode( $content, ENT_QUOTES, 'UTF-8' );

        // Step 2: Remove dangerous/unwanted tags completely (including content)
        $content = preg_replace( '/<script\b[^>]*>.*?<\/script>/is', '', $content );
        $content = preg_replace( '/<style\b[^>]*>.*?<\/style>/is', '', $content );
        $content = preg_replace( '/<noscript\b[^>]*>.*?<\/noscript>/is', '', $content );
        $content = preg_replace( '/<svg\b[^>]*>.*?<\/svg>/is', '', $content );
        $content = preg_replace( '/<iframe\b[^>]*>.*?<\/iframe>/is', '', $content );
        $content = preg_replace( '/<form\b[^>]*>.*?<\/form>/is', '', $content );
        $content = preg_replace( '/<input\b[^>]*>/i', '', $content );
        $content = preg_replace( '/<button\b[^>]*>.*?<\/button>/is', '', $content );
        $content = preg_replace( '/<select\b[^>]*>.*?<\/select>/is', '', $content );
        $content = preg_replace( '/<textarea\b[^>]*>.*?<\/textarea>/is', '', $content );

        // Step 3: Process links - add target="_blank" and rel="noopener"
        $content = preg_replace_callback(
            '/<a\s+([^>]*)>(.*?)<\/a>/is',
            array( $this, 'process_link' ),
            $content
        );

        // Step 4: Remove all HTML comments
        $content = preg_replace( '/<!--.*?-->/s', '', $content );

        // Step 5: Use WordPress sanitization with allowed tags
        $allowed_tags = array(
            'p'          => array( 'class' => true, 'id' => true ),
            'br'         => array(),
            'hr'         => array(),
            'strong'     => array(),
            'b'          => array(),
            'em'         => array(),
            'i'          => array( 'class' => true ), // Allow class for potential icons we'll filter
            'u'          => array(),
            'a'          => array(
                'href'   => true,
                'title'  => true,
                'target' => true,
                'rel'    => true,
            ),
            'img'        => array(
                'src'    => true,
                'alt'    => true,
                'title'  => true,
                'width'  => true,
                'height' => true,
                'class'  => true,
            ),
            'ul'         => array(),
            'ol'         => array(),
            'li'         => array(),
            'blockquote' => array( 'cite' => true ),
            'h2'         => array(),
            'h3'         => array(),
            'h4'         => array(),
            'h5'         => array(),
            'h6'         => array(),
            'table'      => array(),
            'thead'      => array(),
            'tbody'      => array(),
            'tr'         => array(),
            'th'         => array(),
            'td'         => array(),
            'figure'     => array(),
            'figcaption' => array(),
        );

        $content = wp_kses( $content, $allowed_tags );

        // Step 6: Remove empty tags (multiple passes)
        for ( $i = 0; $i < 10; $i++ ) {
            $old = $content;
            $content = preg_replace( '/<(p|div|span|li|h[1-6]|strong|b|em|i|u|td|th|figure|figcaption)(\s[^>]*)?>(\s|&nbsp;)*<\/\1>/i', '', $content );
            if ( $old === $content ) break;
        }

        // Step 7: Remove emoji and invalid images
        $content = preg_replace_callback(
            '/<img\s+[^>]*src=["\']([^"\']+)["\'][^>]*>/i',
            array( $this, 'filter_image' ),
            $content
        );

        // Step 8: Clean up whitespace and line breaks
        $content = preg_replace( '/(\s*<br\s*\/?>\s*){3,}/i', '<br><br>', $content );
        $content = preg_replace( '/\n{3,}/', "\n\n", $content );
        $content = preg_replace( '/[ \t]+/', ' ', $content );

        // Step 9: Remove any remaining CSS-like garbage text
        // This catches text that looks like CSS selectors/properties outside of tags
        $content = $this->remove_css_garbage( $content );

        // Step 10: Style images with professional gallery
        $content = $this->style_content_images( $content, $post_title );

        // Step 11: Final cleanup
        $content = trim( $content );

        return $content;
    }

    /**
     * Process individual link - ensure target="_blank" and clean text
     */
    private function process_link( $matches ) {
        $attributes = $matches[1];
        $link_text = $matches[2];

        // Clean the link text (remove any CSS garbage inside)
        $link_text = $this->clean_text( $link_text );

        // Skip empty links
        if ( empty( trim( $link_text ) ) ) {
            return '';
        }

        // Parse existing attributes
        $href = '';
        if ( preg_match( '/href=["\']([^"\']+)["\']/i', $attributes, $href_match ) ) {
            $href = $href_match[1];
        }

        // Skip if no valid href
        if ( empty( $href ) || $href === '#' ) {
            return $link_text; // Return just the text
        }

        // Validate URL
        if ( ! filter_var( $href, FILTER_VALIDATE_URL ) ) {
            return $link_text;
        }

        // Get title if exists
        $title = '';
        if ( preg_match( '/title=["\']([^"\']+)["\']/i', $attributes, $title_match ) ) {
            $title = esc_attr( $title_match[1] );
        }

        // Build clean link with target="_blank"
        $new_link = '<a href="' . esc_url( $href ) . '" target="_blank" rel="noopener nofollow"';
        if ( $title ) {
            $new_link .= ' title="' . $title . '"';
        }
        $new_link .= '>' . $link_text . '</a>';

        return $new_link;
    }

    /**
     * Filter image - remove emoji and invalid images
     */
    private function filter_image( $matches ) {
        $img_tag = $matches[0];
        $src = isset( $matches[1] ) ? $matches[1] : '';

        if ( $this->is_invalid_image( $src ) ) {
            return ''; // Remove invalid image
        }

        return $img_tag;
    }

    /**
     * Check if image URL is invalid (emoji, icon, tracking pixel, etc.)
     */
    private function is_invalid_image( $url ) {
        if ( empty( $url ) ) {
            return true;
        }

        $url_lower = strtolower( $url );

        // Invalid patterns
        $invalid = array(
            // Emoji
            's.w.org',
            '/emoji/',
            '/twemoji/',
            '/smilies/',
            'emoji',
            // Tracking
            'pixel',
            'beacon',
            'tracker',
            'analytics',
            '1x1',
            'spacer',
            'blank.gif',
            'transparent',
            // Icons & UI
            '/icon',
            '/logo',
            'favicon',
            'gravatar',
            '/avatar',
            '.svg',
            // Ads
            'doubleclick',
            'googlesyndication',
            'adsense',
            'adserver',
            'feedburner',
            // Social tracking
            'facebook.com/tr',
            'twitter.com/i/',
            // Data URI
            'data:image',
        );

        foreach ( $invalid as $pattern ) {
            if ( strpos( $url_lower, $pattern ) !== false ) {
                return true;
            }
        }

        // Check for small image dimensions in URL
        if ( preg_match( '/[\/\-_](\d{1,2})x(\d{1,2})[\/\.\-_]/i', $url ) ) {
            return true;
        }
        if ( preg_match( '/\/(\d{1,2})x(\d{1,2})\//', $url ) ) {
            return true;
        }

        return false;
    }

    /**
     * Remove CSS garbage from text content
     * This is the nuclear option - removes anything that looks like CSS
     */
    private function remove_css_garbage( $content ) {
        // Split content into text nodes and HTML tags
        $parts = preg_split( '/(<[^>]+>)/s', $content, -1, PREG_SPLIT_DELIM_CAPTURE );

        $clean_parts = array();

        foreach ( $parts as $part ) {
            // If it's an HTML tag, keep it
            if ( preg_match( '/^<[^>]+>$/', $part ) ) {
                $clean_parts[] = $part;
                continue;
            }

            // It's text content - clean it
            $text = $part;

            // Remove CSS selectors: .class-name, #id-name, [attribute]
            $text = preg_replace( '/[\.\#][a-zA-Z_\-][a-zA-Z0-9_\-:,\.\#\[\]="\'>\s\+\~\*]+(?=[\s,\{]|$)/s', '', $text );

            // Remove CSS properties: property: value;
            $text = preg_replace( '/[a-zA-Z\-]+\s*:\s*[^;}{<>]+;/s', '', $text );

            // Remove CSS blocks: { ... }
            $text = preg_replace( '/\{[^}]*\}/s', '', $text );

            // Remove @rules: @media, @keyframes, etc.
            $text = preg_replace( '/@[a-zA-Z\-]+[^{;]*[{;]/s', '', $text );

            // Remove CSS functions: var(...), calc(...), etc.
            $text = preg_replace( '/[a-zA-Z\-]+\([^)]*\)/', '', $text );

            // Remove icon font text: __facebook__, __icon-name__
            $text = preg_replace( '/__[a-zA-Z0-9_\-]+__/', '', $text );
            $text = preg_replace( '/__[a-zA-Z0-9_\-]+/', '', $text );

            // Remove leftover brackets and symbols
            $text = preg_replace( '/[\{\}]/', '', $text );
            $text = preg_replace( '/\[\s*\]/', '', $text );
            $text = preg_replace( '/\(\s*\)/', '', $text );

            // Remove lines that are only punctuation/symbols
            $lines = explode( "\n", $text );
            $clean_lines = array();
            foreach ( $lines as $line ) {
                $stripped = trim( $line );
                // Keep if has actual content (letters or numbers)
                if ( preg_match( '/[a-zA-Z0-9À-ÿ]/', $stripped ) ) {
                    $clean_lines[] = $line;
                }
            }
            $text = implode( "\n", $clean_lines );

            // Clean multiple spaces
            $text = preg_replace( '/[ \t]+/', ' ', $text );

            $clean_parts[] = $text;
        }

        return implode( '', $clean_parts );
    }

    /**
     * Clean plain text (for titles, excerpts)
     */
    private function clean_text( $text ) {
        // Remove HTML tags
        $text = wp_strip_all_tags( $text );

        // Decode entities
        $text = html_entity_decode( $text, ENT_QUOTES, 'UTF-8' );

        // Remove CSS garbage
        $text = preg_replace( '/[\.\#][a-zA-Z_\-][a-zA-Z0-9_\-:,\.\#\[\]="\'>\s\+\~]+/', '', $text );
        $text = preg_replace( '/\{[^}]*\}/', '', $text );
        $text = preg_replace( '/__[a-zA-Z0-9_\-]+__/', '', $text );
        $text = preg_replace( '/__[a-zA-Z0-9_\-]+/', '', $text );

        // Clean whitespace
        $text = preg_replace( '/\s+/', ' ', $text );

        return trim( $text );
    }

    /**
     * Style content images with professional gallery
     */
    private function style_content_images( $content, $post_title ) {
        // Find all valid images
        preg_match_all( '/<img[^>]+src=["\']([^"\']+)["\'][^>]*>/i', $content, $matches, PREG_SET_ORDER );

        if ( empty( $matches ) ) {
            return $content;
        }

        // Filter to only valid images
        $valid_images = array();
        foreach ( $matches as $match ) {
            if ( ! $this->is_invalid_image( $match[1] ) ) {
                $valid_images[] = $match;
            }
        }

        if ( empty( $valid_images ) ) {
            return $content;
        }

        // Single image style
        $single_style = 'max-width:100%;height:auto;border-radius:12px;box-shadow:0 4px 20px rgba(0,0,0,0.12);margin:20px auto;display:block;';

        // Gallery style (for 2+ images)
        if ( count( $valid_images ) >= 2 ) {
            $gallery_html = '<div class="drap-gallery" style="display:grid;grid-template-columns:repeat(auto-fill,minmax(280px,1fr));gap:16px;margin:24px 0;padding:20px;background:linear-gradient(145deg,#f8f9fa,#e9ecef);border-radius:16px;box-shadow:inset 0 2px 4px rgba(0,0,0,0.06);">';

            foreach ( $valid_images as $img ) {
                $src = esc_url( $img[1] );
                $alt = esc_attr( $post_title );
                $gallery_html .= '<img src="' . $src . '" alt="' . $alt . '" title="' . $alt . '" style="width:100%;height:200px;object-fit:cover;border-radius:10px;box-shadow:0 3px 12px rgba(0,0,0,0.15);transition:transform 0.3s ease;" loading="lazy">';

                // Remove original from content
                $content = str_replace( $img[0], '', $content );
            }

            $gallery_html .= '</div>';

            // Insert gallery at the beginning or where first image was
            $content = $gallery_html . $content;
        } else {
            // Style single image
            $img = $valid_images[0];
            $src = esc_url( $img[1] );
            $alt = esc_attr( $post_title );
            $new_img = '<img src="' . $src . '" alt="' . $alt . '" title="' . $alt . '" style="' . $single_style . '" loading="lazy">';
            $content = str_replace( $img[0], $new_img, $content );
        }

        return $content;
    }

    /**
     * Build source link HTML
     */
    private function build_source_link( $url, $text ) {
        $host = wp_parse_url( $url, PHP_URL_HOST );
        return sprintf(
            '<p style="margin-top:30px;padding-top:20px;border-top:1px solid #e0e0e0;font-size:14px;color:#666;">%s: <a href="%s" target="_blank" rel="noopener nofollow" style="color:#0066cc;">%s</a></p>',
            esc_html( $text ),
            esc_url( $url ),
            esc_html( $host )
        );
    }

    /**
     * Set featured image with WebP conversion and SEO
     */
    private function set_featured_image( $post_id, $url, $post_title, $convert_webp = false, $image_seo = false ) {
        // Validate URL
        if ( ! filter_var( $url, FILTER_VALIDATE_URL ) ) {
            return false;
        }

        // Skip invalid images
        if ( $this->is_invalid_image( $url ) ) {
            return false;
        }

        // WordPress media functions
        require_once ABSPATH . 'wp-admin/includes/media.php';
        require_once ABSPATH . 'wp-admin/includes/file.php';
        require_once ABSPATH . 'wp-admin/includes/image.php';

        // Store source URL for debugging
        update_post_meta( $post_id, '_drap_source_image', $url );

        // Download image
        $tmp = download_url( $url );
        if ( is_wp_error( $tmp ) ) {
            return false;
        }

        // Generate filename from post title
        $filename = sanitize_file_name( $post_title );
        $filename = preg_replace( '/[^a-zA-Z0-9\-_]/', '-', $filename );
        $filename = preg_replace( '/-+/', '-', $filename );
        $filename = trim( $filename, '-' );
        $filename = substr( $filename, 0, 50 );

        if ( empty( $filename ) ) {
            $filename = 'image-' . $post_id;
        }

        // Get extension
        $ext = strtolower( pathinfo( parse_url( $url, PHP_URL_PATH ), PATHINFO_EXTENSION ) );
        if ( ! in_array( $ext, array( 'jpg', 'jpeg', 'png', 'gif', 'webp' ), true ) ) {
            $ext = 'jpg';
        }

        // Convert to WebP if enabled
        if ( $convert_webp && in_array( $ext, array( 'jpg', 'jpeg', 'png' ), true ) ) {
            $webp = $this->convert_to_webp( $tmp, $ext );
            if ( $webp ) {
                @unlink( $tmp );
                $tmp = $webp;
                $ext = 'webp';
            }
        }

        // Prepare file array
        $file_array = array(
            'name'     => $filename . '.' . $ext,
            'tmp_name' => $tmp,
        );

        // Upload to media library
        $attachment_id = media_handle_sideload( $file_array, $post_id );

        // Cleanup temp file
        if ( file_exists( $tmp ) ) {
            @unlink( $tmp );
        }

        if ( is_wp_error( $attachment_id ) ) {
            return false;
        }

        // Set image SEO attributes
        if ( $image_seo ) {
            wp_update_post( array(
                'ID'           => $attachment_id,
                'post_title'   => $post_title,
                'post_excerpt' => $post_title,
                'post_content' => $post_title,
            ) );
            update_post_meta( $attachment_id, '_wp_attachment_image_alt', $post_title );
        }

        // Set as featured image
        set_post_thumbnail( $post_id, $attachment_id );

        return true;
    }

    /**
     * Convert image to WebP format
     */
    private function convert_to_webp( $file, $ext ) {
        if ( ! function_exists( 'imagecreatefromjpeg' ) ) {
            return false;
        }

        $image = null;

        switch ( $ext ) {
            case 'jpg':
            case 'jpeg':
                $image = @imagecreatefromjpeg( $file );
                break;
            case 'png':
                $image = @imagecreatefrompng( $file );
                if ( $image ) {
                    imagepalettetotruecolor( $image );
                    imagealphablending( $image, true );
                    imagesavealpha( $image, true );
                }
                break;
        }

        if ( ! $image ) {
            return false;
        }

        $webp_file = $file . '.webp';

        if ( @imagewebp( $image, $webp_file, 85 ) ) {
            imagedestroy( $image );
            return $webp_file;
        }

        imagedestroy( $image );
        return false;
    }
}
