<?php
/**
 * Feed Processor - Digital Rise Auto Poster
 * 
 * Simple RSS feed fetching with clear duplicate handling
 * 
 * @package Digital_Rise_Auto_Poster
 */

if ( ! defined( 'ABSPATH' ) ) {
    exit;
}

class DRAP_Feed {

    private $db;

    public function __construct( $db ) {
        $this->db = $db;
    }

    /**
     * Get items from feed
     * 
     * @param string $url Feed URL
     * @param int $limit Max items to return
     * @param bool $skip_duplicates Whether to skip duplicates
     * @param bool $fetch_full_content Whether to fetch full content from original URL
     * @return array|WP_Error
     */
    public function get_items( $url, $limit = 1, $skip_duplicates = true, $fetch_full_content = false ) {
        // Clear WordPress feed cache
        $this->clear_cache();

        // Fetch feed
        add_filter( 'wp_feed_cache_transient_lifetime', array( $this, 'cache_time' ) );
        $feed = fetch_feed( $url );
        remove_filter( 'wp_feed_cache_transient_lifetime', array( $this, 'cache_time' ) );

        if ( is_wp_error( $feed ) ) {
            return $feed;
        }

        $feed_items = $feed->get_items( 0, 500 ); // Scan up to 500 items to find non-duplicates

        if ( empty( $feed_items ) ) {
            return array();
        }

        $items = array();

        foreach ( $feed_items as $item ) {
            // Stop if we have enough
            if ( count( $items ) >= $limit ) {
                break;
            }

            $item_url = $item->get_permalink();
            
            // Skip empty URLs
            if ( empty( $item_url ) ) {
                continue;
            }

            // Check duplicate if needed
            if ( $skip_duplicates && $this->db->is_duplicate( $item_url ) ) {
                continue; // Skip and check next item
            }

            // Get image from feed
            $image_url = $this->get_image( $item );
            
            // If no image found and fetch_full_content is enabled, get from original page
            if ( empty( $image_url ) && $fetch_full_content ) {
                $page_data = $this->fetch_from_url( $item_url );
                if ( ! empty( $page_data['image'] ) ) {
                    $image_url = $page_data['image'];
                }
            }

            // Extract item data
            $items[] = array(
                'title'       => $this->clean_title( $item->get_title() ),
                'url'         => $item_url,
                'content'     => $item->get_content(),
                'description' => $item->get_description(),
                'date'        => $item->get_date( 'Y-m-d H:i:s' ),
                'author'      => $this->get_author( $item ),
                'image'       => $image_url,
                'categories'  => $this->get_categories( $item ),
            );
        }

        return $items;
    }

    /**
     * Fetch featured image (and optionally content) from the original article URL
     */
    public function fetch_from_url( $url ) {
        $result = array(
            'image' => '',
            'content' => '',
        );

        // Fetch the page
        $response = wp_remote_get( $url, array(
            'timeout' => 15,
            'user-agent' => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
        ) );

        if ( is_wp_error( $response ) ) {
            return $result;
        }

        $html = wp_remote_retrieve_body( $response );
        if ( empty( $html ) ) {
            return $result;
        }

        // Try to get Open Graph image (most reliable for featured image)
        if ( preg_match( '/<meta[^>]+property=["\']og:image["\'][^>]+content=["\']([^"\']+)["\'][^>]*>/i', $html, $matches ) ) {
            $result['image'] = $matches[1];
        } elseif ( preg_match( '/<meta[^>]+content=["\']([^"\']+)["\'][^>]+property=["\']og:image["\'][^>]*>/i', $html, $matches ) ) {
            $result['image'] = $matches[1];
        }
        
        // Try Twitter card image
        if ( empty( $result['image'] ) ) {
            if ( preg_match( '/<meta[^>]+name=["\']twitter:image["\'][^>]+content=["\']([^"\']+)["\'][^>]*>/i', $html, $matches ) ) {
                $result['image'] = $matches[1];
            } elseif ( preg_match( '/<meta[^>]+content=["\']([^"\']+)["\'][^>]+name=["\']twitter:image["\'][^>]*>/i', $html, $matches ) ) {
                $result['image'] = $matches[1];
            }
        }

        // Try to find WordPress featured image in content
        if ( empty( $result['image'] ) ) {
            if ( preg_match( '/<img[^>]+class=["\'][^"\']*wp-post-image[^"\']*["\'][^>]+src=["\']([^"\']+)["\'][^>]*>/i', $html, $matches ) ) {
                $result['image'] = $matches[1];
            } elseif ( preg_match( '/<img[^>]+src=["\']([^"\']+)["\'][^>]+class=["\'][^"\']*wp-post-image[^"\']*["\'][^>]*>/i', $html, $matches ) ) {
                $result['image'] = $matches[1];
            }
        }

        // Try first large image in article content
        if ( empty( $result['image'] ) ) {
            // Look for images in article/main content area
            if ( preg_match_all( '/<img[^>]+src=["\']([^"\']+)["\'][^>]*>/i', $html, $matches ) ) {
                foreach ( $matches[1] as $img ) {
                    if ( ! $this->is_invalid_image( $img ) && strpos( $img, 'wp-content/uploads' ) !== false ) {
                        $result['image'] = $img;
                        break;
                    }
                }
            }
        }

        // Validate and clean the image URL
        if ( ! empty( $result['image'] ) ) {
            // Make absolute URL if relative
            if ( strpos( $result['image'], '//' ) === 0 ) {
                $result['image'] = 'https:' . $result['image'];
            } elseif ( strpos( $result['image'], '/' ) === 0 ) {
                $parsed = parse_url( $url );
                $result['image'] = $parsed['scheme'] . '://' . $parsed['host'] . $result['image'];
            }
            
            // Final validation
            if ( $this->is_invalid_image( $result['image'] ) ) {
                $result['image'] = '';
            }
        }

        return $result;
    }

    /**
     * Count available (non-duplicate) items
     */
    public function count_available( $url ) {
        $this->clear_cache();

        add_filter( 'wp_feed_cache_transient_lifetime', array( $this, 'cache_time' ) );
        $feed = fetch_feed( $url );
        remove_filter( 'wp_feed_cache_transient_lifetime', array( $this, 'cache_time' ) );

        if ( is_wp_error( $feed ) ) {
            return 0;
        }

        $items = $feed->get_items( 0, 500 );
        $count = 0;
        $duplicates = 0;

        foreach ( $items as $item ) {
            $url = $item->get_permalink();
            if ( empty( $url ) ) continue;

            if ( $this->db->is_duplicate( $url ) ) {
                $duplicates++;
            } else {
                $count++;
            }
        }

        return array(
            'total' => count( $items ),
            'available' => $count,
            'duplicates' => $duplicates,
        );
    }

    /**
     * Validate feed URL
     */
    public function validate( $url ) {
        if ( ! filter_var( $url, FILTER_VALIDATE_URL ) ) {
            return new WP_Error( 'invalid', 'Invalid URL format' );
        }

        $feed = fetch_feed( $url );

        if ( is_wp_error( $feed ) ) {
            return $feed;
        }

        return array(
            'valid' => true,
            'title' => $feed->get_title(),
            'items' => $feed->get_item_quantity(),
        );
    }

    private function clean_title( $title ) {
        $title = html_entity_decode( $title, ENT_QUOTES, 'UTF-8' );
        $title = wp_strip_all_tags( $title );
        $title = preg_replace( '/\s+/', ' ', $title );
        return trim( $title );
    }

    private function get_author( $item ) {
        $author = $item->get_author();
        if ( $author ) {
            return $author->get_name() ?: $author->get_email();
        }
        return '';
    }

    private function get_image( $item ) {
        // Try enclosure first (most reliable for featured images)
        $enclosure = $item->get_enclosure();
        if ( $enclosure ) {
            $link = $enclosure->get_link();
            $type = $enclosure->get_type();
            if ( $link && ( ! $type || strpos( $type, 'image' ) !== false ) ) {
                // Skip emoji and small images
                if ( ! $this->is_invalid_image( $link ) ) {
                    return $link;
                }
            }
        }

        // Try media:content (WordPress feeds often use this)
        if ( method_exists( $item, 'get_item_tags' ) ) {
            // media:content
            $media = $item->get_item_tags( 'http://search.yahoo.com/mrss/', 'content' );
            if ( ! empty( $media[0]['attribs']['']['url'] ) ) {
                $url = $media[0]['attribs']['']['url'];
                if ( ! $this->is_invalid_image( $url ) ) {
                    return $url;
                }
            }

            // media:thumbnail
            $thumb = $item->get_item_tags( 'http://search.yahoo.com/mrss/', 'thumbnail' );
            if ( ! empty( $thumb[0]['attribs']['']['url'] ) ) {
                $url = $thumb[0]['attribs']['']['url'];
                if ( ! $this->is_invalid_image( $url ) ) {
                    return $url;
                }
            }

            // Check for featured image in content:encoded or description
            // WordPress RSS feeds include featured image in post content
        }

        // Extract from content - look for LARGE images only
        $content = $item->get_content();
        $found_image = $this->extract_real_image( $content );
        if ( $found_image ) {
            return $found_image;
        }

        // Try description
        $desc = $item->get_description();
        $found_image = $this->extract_real_image( $desc );
        if ( $found_image ) {
            return $found_image;
        }

        return '';
    }

    /**
     * Check if image URL is invalid (emoji, tracking pixel, icon, etc.)
     */
    private function is_invalid_image( $url ) {
        $url_lower = strtolower( $url );
        
        // Skip emoji images
        if ( strpos( $url_lower, 'emoji' ) !== false ) {
            return true;
        }
        if ( strpos( $url_lower, 's.w.org' ) !== false ) {
            return true;
        }
        if ( strpos( $url_lower, 'twemoji' ) !== false ) {
            return true;
        }
        
        // Skip base64
        if ( strpos( $url_lower, 'data:' ) === 0 ) {
            return true;
        }
        
        // Skip tracking and small images
        $invalid_patterns = array(
            'feedburner',
            'pixel',
            'gravatar',
            'avatar',
            '/icon',
            'logo',
            '1x1',
            'blank.gif',
            'spacer',
            'transparent',
            'loading',
            'spinner',
            'badge',
            'button',
            'banner',
            '/ad/',
            '/ads/',
            'doubleclick',
            'googlesyndication',
            'facebook.com/tr',
            'analytics',
            '.svg', // Skip SVG icons
            '16x16',
            '20x20',
            '24x24',
            '32x32',
            '48x48',
            '64x64',
            '72x72', // Emoji size
            '96x96',
        );
        
        foreach ( $invalid_patterns as $pattern ) {
            if ( strpos( $url_lower, $pattern ) !== false ) {
                return true;
            }
        }
        
        return false;
    }

    /**
     * Extract real image from HTML content
     * Looks for images that are likely featured images (larger, wp-content, etc.)
     */
    private function extract_real_image( $html ) {
        if ( empty( $html ) ) {
            return '';
        }

        // Find all images
        preg_match_all( '/<img[^>]+src=["\']([^"\']+)["\'][^>]*>/i', $html, $matches );
        
        if ( empty( $matches[1] ) ) {
            return '';
        }

        foreach ( $matches[1] as $img_url ) {
            // Skip invalid images
            if ( $this->is_invalid_image( $img_url ) ) {
                continue;
            }

            // Prefer wp-content/uploads images (WordPress featured images)
            if ( strpos( $img_url, 'wp-content/uploads' ) !== false ) {
                return $img_url;
            }
        }

        // If no wp-content image found, return first valid image
        foreach ( $matches[1] as $img_url ) {
            if ( ! $this->is_invalid_image( $img_url ) ) {
                return $img_url;
            }
        }

        return '';
    }

    private function get_categories( $item ) {
        $cats = array();
        $item_cats = $item->get_categories();
        if ( $item_cats ) {
            foreach ( $item_cats as $cat ) {
                $label = $cat->get_label();
                if ( $label ) {
                    $cats[] = $label;
                }
            }
        }
        return $cats;
    }

    private function clear_cache() {
        global $wpdb;
        $wpdb->query( "DELETE FROM {$wpdb->options} WHERE option_name LIKE '_transient_feed_%' OR option_name LIKE '_transient_timeout_feed_%'" );
    }

    public function cache_time() {
        return 60; // 1 minute cache
    }
}
