<?php
/**
 * Link Scanner for Screaming Fixes
 *
 * Scans WordPress database to find posts containing specific URLs
 */

if (!defined('ABSPATH')) {
    exit;
}

class SF_Link_Scanner {

    /**
     * Cache for URL search results
     * @var array
     */
    private $cache = [];

    /**
     * Post types to search
     * @var array
     */
    private $post_types = [];

    /**
     * Post statuses to search
     * @var array
     */
    private $post_statuses = [];

    /**
     * Constructor
     */
    public function __construct() {
        // Default post types to search
        $this->post_types = apply_filters('sf_searchable_post_types', [
            'post',
            'page',
        ]);

        // Default post statuses to search
        $this->post_statuses = apply_filters('sf_searchable_post_statuses', [
            'publish',
            'draft',
            'pending',
            'private',
        ]);
    }

    /**
     * Find all posts containing a specific URL
     *
     * @param string $url URL to search for
     * @param array $options Search options
     * @return array Array of post objects with match info
     */
    public function find_posts_with_url($url, $options = []) {
        global $wpdb;

        // Check cache
        $cache_key = md5($url . serialize($options));
        if (isset($this->cache[$cache_key])) {
            return $this->cache[$cache_key];
        }

        $post_types = $options['post_types'] ?? $this->post_types;
        $post_statuses = $options['post_statuses'] ?? $this->post_statuses;
        $limit = $options['limit'] ?? 1000;

        // Build post types placeholder
        $post_type_placeholders = implode(',', array_fill(0, count($post_types), '%s'));
        $status_placeholders = implode(',', array_fill(0, count($post_statuses), '%s'));

        // Prepare the query
        $query = $wpdb->prepare(
            "SELECT ID, post_title, post_content, post_type, post_status, post_date
             FROM {$wpdb->posts}
             WHERE post_content LIKE %s
             AND post_status IN ({$status_placeholders})
             AND post_type IN ({$post_type_placeholders})
             AND post_type NOT IN ('revision', 'nav_menu_item', 'customize_changeset')
             LIMIT %d",
            array_merge(
                ['%' . $wpdb->esc_like($url) . '%'],
                $post_statuses,
                $post_types,
                [$limit]
            )
        );

        $posts = $wpdb->get_results($query);

        // Enhance results with match context
        $results = [];
        foreach ($posts as $post) {
            $match_info = $this->get_match_info($post->post_content, $url);

            $results[] = [
                'post_id' => $post->ID,
                'post_title' => $post->post_title,
                'post_type' => $post->post_type,
                'post_status' => $post->post_status,
                'post_date' => $post->post_date,
                'edit_url' => get_edit_post_link($post->ID, 'raw'),
                'view_url' => get_permalink($post->ID),
                'match_count' => $match_info['count'],
                'contexts' => $match_info['contexts'],
            ];
        }

        // Cache results
        $this->cache[$cache_key] = $results;

        return $results;
    }

    /**
     * Find all posts containing any URL from a list
     *
     * @param array $urls Array of URLs to search for
     * @param array $options Search options
     * @return array Grouped results by URL
     */
    public function find_posts_with_urls($urls, $options = []) {
        $results = [];

        foreach ($urls as $url) {
            $posts = $this->find_posts_with_url($url, $options);
            if (!empty($posts)) {
                $results[$url] = [
                    'url' => $url,
                    'posts' => $posts,
                    'post_count' => count($posts),
                ];
            }
        }

        return $results;
    }

    /**
     * Get match info for a URL in content
     *
     * @param string $content Post content
     * @param string $url URL to find
     * @return array Match count and context snippets
     */
    private function get_match_info($content, $url) {
        $count = substr_count($content, $url);
        $contexts = [];

        // Find contexts around each match
        $pos = 0;
        $max_contexts = 3;

        while (($pos = strpos($content, $url, $pos)) !== false && count($contexts) < $max_contexts) {
            $start = max(0, $pos - 50);
            $end = min(strlen($content), $pos + strlen($url) + 50);

            $context = substr($content, $start, $end - $start);

            // Clean up context
            $context = wp_strip_all_tags($context);
            $context = preg_replace('/\s+/', ' ', $context);
            $context = trim($context);

            if ($start > 0) {
                $context = '...' . $context;
            }
            if ($end < strlen($content)) {
                $context = $context . '...';
            }

            $contexts[] = $context;
            $pos += strlen($url);
        }

        return [
            'count' => $count,
            'contexts' => $contexts,
        ];
    }

    /**
     * Find all posts containing an image URL
     *
     * @param string $image_url Image URL to search for
     * @param array $options Search options
     * @return array Posts containing the image
     */
    public function find_posts_with_image($image_url, $options = []) {
        global $wpdb;

        $post_types = $options['post_types'] ?? $this->post_types;
        $post_statuses = $options['post_statuses'] ?? $this->post_statuses;
        $limit = $options['limit'] ?? 1000;

        // Extract filename for broader search
        $filename = basename(wp_parse_url($image_url, PHP_URL_PATH));

        $post_type_placeholders = implode(',', array_fill(0, count($post_types), '%s'));
        $status_placeholders = implode(',', array_fill(0, count($post_statuses), '%s'));

        // Search for both full URL and filename
        $query = $wpdb->prepare(
            "SELECT ID, post_title, post_content, post_type, post_status
             FROM {$wpdb->posts}
             WHERE (post_content LIKE %s OR post_content LIKE %s)
             AND post_status IN ({$status_placeholders})
             AND post_type IN ({$post_type_placeholders})
             LIMIT %d",
            array_merge(
                [
                    '%' . $wpdb->esc_like($image_url) . '%',
                    '%' . $wpdb->esc_like($filename) . '%',
                ],
                $post_statuses,
                $post_types,
                [$limit]
            )
        );

        $posts = $wpdb->get_results($query);

        $results = [];
        foreach ($posts as $post) {
            // Verify the image is actually in content (not just filename match)
            if (strpos($post->post_content, $image_url) !== false ||
                preg_match('/<img[^>]+' . preg_quote($filename, '/') . '/', $post->post_content)) {

                $results[] = [
                    'post_id' => $post->ID,
                    'post_title' => $post->post_title,
                    'post_type' => $post->post_type,
                    'post_status' => $post->post_status,
                    'edit_url' => get_edit_post_link($post->ID, 'raw'),
                    'view_url' => get_permalink($post->ID),
                ];
            }
        }

        return $results;
    }

    /**
     * Count total posts containing a URL
     *
     * @param string $url URL to search for
     * @return int Count of posts
     */
    public function count_posts_with_url($url) {
        global $wpdb;

        $post_type_placeholders = implode(',', array_fill(0, count($this->post_types), '%s'));
        $status_placeholders = implode(',', array_fill(0, count($this->post_statuses), '%s'));

        $count = $wpdb->get_var($wpdb->prepare(
            "SELECT COUNT(*)
             FROM {$wpdb->posts}
             WHERE post_content LIKE %s
             AND post_status IN ({$status_placeholders})
             AND post_type IN ({$post_type_placeholders})",
            array_merge(
                ['%' . $wpdb->esc_like($url) . '%'],
                $this->post_statuses,
                $this->post_types
            )
        ));

        return (int) $count;
    }

    /**
     * Find all internal links in a post
     *
     * @param int $post_id Post ID
     * @return array Array of link URLs found
     */
    public function find_links_in_post($post_id) {
        $post = get_post($post_id);

        if (!$post) {
            return [];
        }

        return $this->extract_links_from_content($post->post_content);
    }

    /**
     * Extract all links from HTML content
     *
     * @param string $content HTML content
     * @return array Array of link info
     */
    public function extract_links_from_content($content) {
        $links = [];

        // Match <a> tags
        if (preg_match_all('/<a\s+[^>]*href=["\']([^"\']+)["\'][^>]*>(.*?)<\/a>/is', $content, $matches, PREG_SET_ORDER)) {
            foreach ($matches as $match) {
                $links[] = [
                    'url' => $match[1],
                    'anchor_text' => wp_strip_all_tags($match[2]),
                    'full_tag' => $match[0],
                ];
            }
        }

        return $links;
    }

    /**
     * Find all images in a post
     *
     * @param int $post_id Post ID
     * @return array Array of image info
     */
    public function find_images_in_post($post_id) {
        $post = get_post($post_id);

        if (!$post) {
            return [];
        }

        return $this->extract_images_from_content($post->post_content);
    }

    /**
     * Extract all images from HTML content
     *
     * @param string $content HTML content
     * @return array Array of image info
     */
    public function extract_images_from_content($content) {
        $images = [];

        // Match <img> tags
        if (preg_match_all('/<img\s+[^>]*>/is', $content, $matches)) {
            foreach ($matches[0] as $img_tag) {
                $image = [
                    'full_tag' => $img_tag,
                ];

                // Extract src
                if (preg_match('/src=["\']([^"\']+)["\']/i', $img_tag, $src_match)) {
                    $image['src'] = $src_match[1];
                }

                // Extract alt
                if (preg_match('/alt=["\']([^"\']*)["\']/', $img_tag, $alt_match)) {
                    $image['alt'] = $alt_match[1];
                } else {
                    $image['alt'] = '';
                }

                // Extract class
                if (preg_match('/class=["\']([^"\']+)["\']/i', $img_tag, $class_match)) {
                    $image['class'] = $class_match[1];
                }

                if (!empty($image['src'])) {
                    $images[] = $image;
                }
            }
        }

        return $images;
    }

    /**
     * Set searchable post types
     *
     * @param array $post_types Post types to search
     */
    public function set_post_types($post_types) {
        $this->post_types = $post_types;
    }

    /**
     * Set searchable post statuses
     *
     * @param array $statuses Post statuses to search
     */
    public function set_post_statuses($statuses) {
        $this->post_statuses = $statuses;
    }

    /**
     * Clear the internal cache
     */
    public function clear_cache() {
        $this->cache = [];
    }

    /**
     * Get attachment ID from image URL
     *
     * @param string $image_url Image URL
     * @return int|null Attachment ID or null
     */
    public function get_attachment_id_from_url($image_url) {
        global $wpdb;

        // Remove size suffix if present (-150x150, -300x200, etc.)
        $image_url = preg_replace('/-\d+x\d+(?=\.[a-z]+$)/i', '', $image_url);

        // Try to find by guid
        $attachment_id = $wpdb->get_var($wpdb->prepare(
            "SELECT ID FROM {$wpdb->posts} WHERE guid = %s AND post_type = 'attachment'",
            $image_url
        ));

        if ($attachment_id) {
            return (int) $attachment_id;
        }

        // Try to find by meta
        $attachment_id = $wpdb->get_var($wpdb->prepare(
            "SELECT post_id FROM {$wpdb->postmeta}
             WHERE meta_key = '_wp_attached_file'
             AND meta_value LIKE %s",
            '%' . $wpdb->esc_like(basename($image_url))
        ));

        return $attachment_id ? (int) $attachment_id : null;
    }

    /**
     * Get all posts linking to a domain
     *
     * @param string $domain Domain to search for
     * @param array $options Search options
     * @return array Posts with links to the domain
     */
    public function find_posts_linking_to_domain($domain, $options = []) {
        global $wpdb;

        // Clean domain
        $domain = preg_replace('#^https?://#', '', $domain);
        $domain = rtrim($domain, '/');

        $post_type_placeholders = implode(',', array_fill(0, count($this->post_types), '%s'));
        $status_placeholders = implode(',', array_fill(0, count($this->post_statuses), '%s'));
        $limit = $options['limit'] ?? 1000;

        $query = $wpdb->prepare(
            "SELECT ID, post_title, post_content, post_type, post_status
             FROM {$wpdb->posts}
             WHERE (post_content LIKE %s OR post_content LIKE %s)
             AND post_status IN ({$status_placeholders})
             AND post_type IN ({$post_type_placeholders})
             LIMIT %d",
            array_merge(
                [
                    '%href="http://' . $wpdb->esc_like($domain) . '%',
                    '%href="https://' . $wpdb->esc_like($domain) . '%',
                ],
                $this->post_statuses,
                $this->post_types,
                [$limit]
            )
        );

        $posts = $wpdb->get_results($query);

        $results = [];
        foreach ($posts as $post) {
            $links = $this->extract_links_from_content($post->post_content);

            // Filter to only links matching the domain
            $matching_links = array_filter($links, function ($link) use ($domain) {
                $link_domain = wp_parse_url($link['url'], PHP_URL_HOST);
                return $link_domain && strpos($link_domain, $domain) !== false;
            });

            if (!empty($matching_links)) {
                $results[] = [
                    'post_id' => $post->ID,
                    'post_title' => $post->post_title,
                    'post_type' => $post->post_type,
                    'edit_url' => get_edit_post_link($post->ID, 'raw'),
                    'links' => array_values($matching_links),
                    'link_count' => count($matching_links),
                ];
            }
        }

        return $results;
    }
}
