<?php
/**
 * CSV Parser for Screaming Fixes
 *
 * Parses CSV files from Screaming Frog and other sources
 */

if (!defined('ABSPATH')) {
    exit;
}

class SF_CSV_Parser {

    /**
     * Maximum rows to process (reduced from 50k to prevent memory exhaustion)
     * @var int
     */
    private $max_rows = 10000;

    /**
     * Detected delimiter
     * @var string
     */
    private $delimiter = ',';

    /**
     * Columns to extract (null = all columns)
     * @var array|null
     */
    private $extract_columns = null;

    /**
     * Parse a CSV file
     *
     * @param string $file_path Path to CSV file
     * @param array $options Parsing options:
     *                       - max_rows: Maximum rows to parse (default: 10000)
     *                       - extract_columns: Array of column names to extract (null = all)
     * @return array|WP_Error Parsed data or error
     */
    public function parse($file_path, $options = []) {
        if (!file_exists($file_path)) {
            return new WP_Error('file_not_found', __('CSV file not found.', 'screaming-fixes'));
        }

        if (!is_readable($file_path)) {
            return new WP_Error('file_not_readable', __('CSV file is not readable.', 'screaming-fixes'));
        }

        // Set column filter from options
        $this->extract_columns = $options['extract_columns'] ?? null;

        // Detect delimiter
        $this->delimiter = $this->detect_delimiter($file_path);

        $handle = fopen($file_path, 'r');

        if (!$handle) {
            return new WP_Error('file_open_failed', __('Could not open CSV file.', 'screaming-fixes'));
        }

        // Handle BOM
        $bom = fread($handle, 3);
        if ($bom !== "\xEF\xBB\xBF") {
            rewind($handle);
        }

        // Get headers
        $headers = fgetcsv($handle, 0, $this->delimiter);

        if (!$headers) {
            fclose($handle);
            return new WP_Error('no_headers', __('Could not read CSV headers.', 'screaming-fixes'));
        }

        // Clean headers
        $headers = $this->clean_headers($headers);

        // Build column index map if filtering
        $column_indices = null;
        if ($this->extract_columns !== null) {
            $column_indices = $this->build_column_index_map($headers, $this->extract_columns);
        }

        // Parse rows
        $data = [];
        $row_count = 0;
        $total_rows_in_file = 0;
        $max_rows = $options['max_rows'] ?? $this->max_rows;

        while (($row = fgetcsv($handle, 0, $this->delimiter)) !== false) {
            $total_rows_in_file++;

            // Skip empty rows
            if (count($row) === 1 && empty($row[0])) {
                continue;
            }

            // Only process up to max_rows
            if ($row_count >= $max_rows) {
                continue; // Keep counting total rows but don't store
            }

            // Map row to headers (with optional column filtering)
            $parsed_row = $this->map_row_to_headers($headers, $row, $column_indices);

            if ($parsed_row) {
                $data[] = $parsed_row;
                $row_count++;
            }

            // Periodic garbage collection for large files
            if ($row_count % 5000 === 0) {
                gc_collect_cycles();
            }
        }

        fclose($handle);

        if (empty($data)) {
            return new WP_Error('no_data', __('CSV file contains no data rows.', 'screaming-fixes'));
        }

        $is_truncated = $total_rows_in_file > $max_rows;

        return [
            'headers' => $column_indices !== null ? array_keys($column_indices) : $headers,
            'rows' => $data,
            'count' => count($data),
            'total_in_file' => $total_rows_in_file,
            'truncated' => $is_truncated,
            'truncated_message' => $is_truncated
                ? sprintf(__('Showing %d of %d rows. Large files are automatically limited for performance.', 'screaming-fixes'), count($data), $total_rows_in_file)
                : null,
        ];
    }

    /**
     * Parse CSV from string content
     *
     * @param string $content CSV content
     * @param array $options Parsing options
     * @return array|WP_Error Parsed data or error
     */
    public function parse_string($content, $options = []) {
        // Create temp file
        $temp_file = wp_tempnam('sf_csv_');

        if (!$temp_file) {
            return new WP_Error('temp_file_failed', __('Could not create temporary file.', 'screaming-fixes'));
        }

        file_put_contents($temp_file, $content);

        $result = $this->parse($temp_file, $options);

        unlink($temp_file);

        return $result;
    }

    /**
     * Detect CSV delimiter
     *
     * @param string $file_path Path to CSV file
     * @return string Delimiter character
     */
    private function detect_delimiter($file_path) {
        $handle = fopen($file_path, 'r');

        if (!$handle) {
            return ',';
        }

        // Read first few lines
        $sample = '';
        for ($i = 0; $i < 5; $i++) {
            $line = fgets($handle);
            if ($line === false) {
                break;
            }
            $sample .= $line;
        }

        fclose($handle);

        // Count delimiter occurrences
        $delimiters = [',', ';', "\t", '|'];
        $counts = [];

        foreach ($delimiters as $delimiter) {
            $counts[$delimiter] = substr_count($sample, $delimiter);
        }

        // Return the most common delimiter
        arsort($counts);
        return key($counts);
    }

    /**
     * Clean and normalize headers
     *
     * @param array $headers Raw headers
     * @return array Cleaned headers
     */
    private function clean_headers($headers) {
        return array_map(function ($header) {
            // Remove BOM and non-printable characters
            $header = preg_replace('/[\x00-\x1F\x80-\xFF]/', '', $header);

            // Trim whitespace
            $header = trim($header);

            // Convert to lowercase for consistency
            $header = strtolower($header);

            // Normalize common variations
            $normalizations = [
                'status_code' => 'status code',
                'statuscode' => 'status code',
                'alt_text' => 'alt text',
                'alttext' => 'alt text',
                'redirect_url' => 'redirect url',
                'redirecturl' => 'redirect url',
                'source_url' => 'source',
                'sourceurl' => 'source',
                'dest_url' => 'destination',
                'desturl' => 'destination',
                'destination_url' => 'destination',
            ];

            return $normalizations[$header] ?? $header;
        }, $headers);
    }

    /**
     * Build a map of column names to their indices in the CSV
     *
     * @param array $headers All CSV headers
     * @param array $wanted_columns Columns we want to extract
     * @return array Map of column_name => index
     */
    private function build_column_index_map($headers, $wanted_columns) {
        $map = [];

        foreach ($wanted_columns as $wanted) {
            $wanted_lower = strtolower(trim($wanted));
            foreach ($headers as $index => $header) {
                // Match exact or partial (e.g., "status code" matches "status code")
                if ($header === $wanted_lower || strpos($header, $wanted_lower) !== false) {
                    $map[$header] = $index;
                    break;
                }
            }
        }

        return $map;
    }

    /**
     * Map a row to headers
     *
     * @param array $headers Column headers
     * @param array $row Row data
     * @param array|null $column_indices Optional map of column_name => index for filtering
     * @return array|null Mapped row or null if invalid
     */
    private function map_row_to_headers($headers, $row, $column_indices = null) {
        $mapped = [];

        // If we have a column filter, only extract those columns
        if ($column_indices !== null) {
            foreach ($column_indices as $header => $index) {
                $value = isset($row[$index]) ? trim($row[$index]) : '';
                $mapped[$header] = $value;
            }
        } else {
            // Extract all columns
            foreach ($headers as $index => $header) {
                $value = isset($row[$index]) ? trim($row[$index]) : '';
                $mapped[$header] = $value;
            }
        }

        // Skip rows that appear to be all empty
        $non_empty = array_filter($mapped, function ($value) {
            return !empty($value);
        });

        if (empty($non_empty)) {
            return null;
        }

        return $mapped;
    }

    /**
     * Get specific columns from parsed data
     *
     * @param array $data Parsed CSV data
     * @param array $columns Column names to extract
     * @return array Filtered data
     */
    public function get_columns($data, $columns) {
        if (empty($data['rows'])) {
            return [];
        }

        $filtered = [];

        foreach ($data['rows'] as $row) {
            $filtered_row = [];
            foreach ($columns as $column) {
                if (isset($row[$column])) {
                    $filtered_row[$column] = $row[$column];
                }
            }
            if (!empty($filtered_row)) {
                $filtered[] = $filtered_row;
            }
        }

        return $filtered;
    }

    /**
     * Filter rows by column value
     *
     * @param array $data Parsed CSV data
     * @param string $column Column to filter
     * @param mixed $value Value to match
     * @param string $operator Comparison operator
     * @return array Filtered rows
     */
    public function filter_rows($data, $column, $value, $operator = '=') {
        if (empty($data['rows'])) {
            return [];
        }

        return array_filter($data['rows'], function ($row) use ($column, $value, $operator) {
            if (!isset($row[$column])) {
                return false;
            }

            $cell_value = $row[$column];

            switch ($operator) {
                case '=':
                case '==':
                    return $cell_value == $value;
                case '===':
                    return $cell_value === $value;
                case '!=':
                case '<>':
                    return $cell_value != $value;
                case '>':
                    return (float) $cell_value > (float) $value;
                case '<':
                    return (float) $cell_value < (float) $value;
                case '>=':
                    return (float) $cell_value >= (float) $value;
                case '<=':
                    return (float) $cell_value <= (float) $value;
                case 'contains':
                    return strpos($cell_value, $value) !== false;
                case 'starts_with':
                    return strpos($cell_value, $value) === 0;
                case 'ends_with':
                    return substr($cell_value, -strlen($value)) === $value;
                case 'empty':
                    return empty($cell_value);
                case 'not_empty':
                    return !empty($cell_value);
                default:
                    return false;
            }
        });
    }

    /**
     * Get unique values from a column
     *
     * @param array $data Parsed CSV data
     * @param string $column Column name
     * @return array Unique values
     */
    public function get_unique_values($data, $column) {
        if (empty($data['rows'])) {
            return [];
        }

        $values = array_map(function ($row) use ($column) {
            return $row[$column] ?? null;
        }, $data['rows']);

        return array_unique(array_filter($values, function ($value) {
            return $value !== null && $value !== '';
        }));
    }

    /**
     * Export data to CSV string
     *
     * @param array $data Data to export
     * @param array $columns Optional specific columns to include
     * @return string CSV content
     */
    public function export_to_csv($data, $columns = []) {
        if (empty($data)) {
            return '';
        }

        $output = fopen('php://temp', 'r+');

        // Get headers from first row if columns not specified
        if (empty($columns)) {
            $columns = array_keys(reset($data));
        }

        // Write header row
        fputcsv($output, $columns);

        // Write data rows
        foreach ($data as $row) {
            $csv_row = [];
            foreach ($columns as $column) {
                $csv_row[] = $row[$column] ?? '';
            }
            fputcsv($output, $csv_row);
        }

        rewind($output);
        $csv_content = stream_get_contents($output);
        fclose($output);

        return $csv_content;
    }

    /**
     * Set maximum rows to process
     *
     * @param int $max Maximum rows
     */
    public function set_max_rows($max) {
        $this->max_rows = absint($max);
    }

    /**
     * Get column mapping for a module type
     *
     * @param string $module_id Module identifier
     * @return array Column mapping
     */
    public function get_column_mapping($module_id) {
        $mappings = [
            'broken-links' => [
                'url' => ['destination', 'url', 'address', 'link'],
                'source' => ['source', 'from', 'page', 'source url'],
                'status_code' => ['status code', 'status', 'response code', 'http status'],
                'anchor' => ['anchor', 'anchor text', 'link text'],
                'type' => ['type', 'link type'],
            ],
            'redirect-chains' => [
                'original_url' => ['address', 'url', 'original url', 'source url', 'initial url'],
                'redirect_url' => ['redirect url', 'redirect to', 'redirects to'],
                'final_url' => ['final url', 'final destination', 'destination'],
                'status_code' => ['status code', 'status', 'redirect type'],
                'hops' => ['hops', 'number of redirects', 'chain length'],
            ],
            'image-alt-text' => [
                'src' => ['src', 'url', 'image url', 'source', 'image'],
                'alt_text' => ['alt text', 'alt', 'alternative text', 'alt attribute'],
                'status_code' => ['status code', 'status'],
                'size' => ['size', 'file size', 'bytes'],
            ],
        ];

        return $mappings[$module_id] ?? [];
    }

    /**
     * Map parsed data to standardized column names
     *
     * @param array $data Parsed CSV data
     * @param string $module_id Module identifier
     * @return array Standardized data
     */
    public function standardize_columns($data, $module_id) {
        $mapping = $this->get_column_mapping($module_id);

        if (empty($mapping) || empty($data['rows'])) {
            return $data;
        }

        $standardized_rows = [];

        foreach ($data['rows'] as $row) {
            $standardized_row = [];

            foreach ($mapping as $standard_name => $possible_names) {
                foreach ($possible_names as $possible_name) {
                    if (isset($row[$possible_name]) && !empty($row[$possible_name])) {
                        $standardized_row[$standard_name] = $row[$possible_name];
                        break;
                    }
                }
            }

            // Include any unmapped columns
            foreach ($row as $key => $value) {
                if (!isset($standardized_row[$key])) {
                    $standardized_row[$key] = $value;
                }
            }

            if (!empty($standardized_row)) {
                $standardized_rows[] = $standardized_row;
            }
        }

        return [
            'headers' => array_keys($mapping),
            'rows' => $standardized_rows,
            'count' => count($standardized_rows),
            'truncated' => $data['truncated'] ?? false,
        ];
    }
}
