<?php
/**
 * Lightweight Extraction Pressure Telemetry
 * 
 * Weekly rolling counters using WordPress transients (7-day window),
 * no database bloat, no IP storage.
 */

namespace BlockAI;

class Telemetry {
    
    private $transient_prefix = 'block_ai_daily_';
    private $counter_transient = 'block_ai_weekly_counters';
    
    /**
     * Get retention days based on setting
     */
    private function get_retention_days() {
        $retention = get_option('block_ai_stats_retention', 'weekly');
        switch ($retention) {
            case 'daily':
                return 1;
            case 'weekly':
                return 7;
            case 'monthly':
                return 30;
            case 'ongoing':
                return 0; // 0 means no automatic deletion
            default:
                return 7;
        }
    }
    
    /**
     * Initialize telemetry
     */
    public function init() {
        // Daily cleanup hook (removes old data based on retention setting)
        if (!wp_next_scheduled('block_ai_weekly_cleanup')) {
            wp_schedule_event(time(), 'daily', 'block_ai_weekly_cleanup');
        }
        add_action('block_ai_weekly_cleanup', [$this, 'cleanup_old_counters']);
    }
    
    /**
     * Increment blocked request counter
     */
    public function increment_blocked($reason = 'unknown') {
        $today = gmdate('Y-m-d');
        $counters = $this->get_counters();
        
        // Initialize today's data if not exists
        if (!isset($counters['days'][$today])) {
            $counters['days'][$today] = [
                'total_blocked' => 0,
                'by_reason' => [],
                'by_category' => [],
                'top_user_agents' => [],
            ];
        }
        
        // Increment today's counters
        $counters['days'][$today]['total_blocked']++;
        $counters['days'][$today]['by_reason'][$reason] = ($counters['days'][$today]['by_reason'][$reason] ?? 0) + 1;
        
        // Track user agent category (sanitized)
        $ua = isset($_SERVER['HTTP_USER_AGENT']) ? sanitize_text_field(wp_unslash($_SERVER['HTTP_USER_AGENT'])) : 'unknown';
        // Limit length to prevent DoS
        if (strlen($ua) > 512) {
            $ua = substr($ua, 0, 512);
        }
        $category = $this->categorize_user_agent($ua);
        $counters['days'][$today]['by_category'][$category] = ($counters['days'][$today]['by_category'][$category] ?? 0) + 1;
        
        // Track top user agent (simple string, no IP)
        if (!isset($counters['days'][$today]['top_user_agents'][$ua])) {
            $counters['days'][$today]['top_user_agents'][$ua] = 0;
        }
        $counters['days'][$today]['top_user_agents'][$ua]++;
        
        // Keep only top 10 user agents for today
        arsort($counters['days'][$today]['top_user_agents']);
        $counters['days'][$today]['top_user_agents'] = array_slice($counters['days'][$today]['top_user_agents'], 0, 10, true);
        
        // Update last updated date
        $counters['last_updated'] = $today;
        
        // Cleanup old data
        $this->cleanup_old_data($counters);
        
        $this->save_counters($counters);
    }
    
    /**
     * Get weekly counters (last 7 days)
     */
    public function get_counters() {
        $counters = get_transient($this->counter_transient);
        
        if ($counters === false) {
            $counters = [
                'days' => [],
                'last_updated' => gmdate('Y-m-d'),
            ];
        }
        
        // Initialize structure if needed
        if (!isset($counters['days'])) {
            $counters['days'] = [];
        }
        
        // Cleanup old data when retrieving
        $this->cleanup_old_data($counters);
        
        return $counters;
    }
    
    /**
     * Cleanup data older than retention period
     */
    private function cleanup_old_data(&$counters) {
        if (!isset($counters['days']) || !is_array($counters['days'])) {
            return;
        }
        
        $retention_days = $this->get_retention_days();
        
        // If ongoing (0 days), don't delete anything automatically
        if ($retention_days === 0) {
            return;
        }
        
        $today = gmdate('Y-m-d');
        $cutoff_date = gmdate('Y-m-d', strtotime("-{$retention_days} days"));
        
        foreach ($counters['days'] as $date => $data) {
            if ($date < $cutoff_date) {
                unset($counters['days'][$date]);
            }
        }
    }
    
    /**
     * Save counters
     */
    private function save_counters($counters) {
        $retention_days = $this->get_retention_days();
        
        // If ongoing (0 days), store for a very long time (10 years)
        if ($retention_days === 0) {
            set_transient($this->counter_transient, $counters, 10 * YEAR_IN_SECONDS);
        } else {
            // Store for retention period + 1 day buffer
            set_transient($this->counter_transient, $counters, ($retention_days + 1) * DAY_IN_SECONDS);
        }
    }
    
    /**
     * Cleanup old counters (runs daily via cron)
     */
    public function cleanup_old_counters() {
        $counters = $this->get_counters();
        $this->cleanup_old_data($counters);
        $this->save_counters($counters);
    }
    
    /**
     * Categorize user agent
     */
    private function categorize_user_agent($ua) {
        $ua_lower = strtolower($ua);
        
        // Cloud hosting / datacenter
        $cloud_patterns = [
            'aws', 'amazon', 'google cloud', 'azure', 'digitalocean',
            'linode', 'vultr', 'ovh', 'hetzner', 'scaleway',
            'rackspace', 'cloudflare', 'fastly', 'akamai'
        ];
        
        foreach ($cloud_patterns as $pattern) {
            if (stripos($ua_lower, $pattern) !== false) {
                return 'cloud_hosting';
            }
        }
        
        // Known bot/crawler
        $bot_patterns = [
            'bot', 'crawler', 'spider', 'scraper', 'curl', 'wget',
            'python', 'java', 'go-http', 'node', 'scrapy'
        ];
        
        foreach ($bot_patterns as $pattern) {
            if (stripos($ua_lower, $pattern) !== false) {
                return 'bot_crawler';
            }
        }
        
        // Browser (common browsers like Chrome, Firefox, Safari, etc.)
        $browser_patterns = [
            'chrome', 'firefox', 'safari', 'edge', 'opera', 'msie'
        ];
        
        foreach ($browser_patterns as $pattern) {
            if (stripos($ua_lower, $pattern) !== false) {
                return 'browser';
            }
        }
        
        return 'unknown';
    }
    
    /**
     * Get formatted stats for admin display
     */
    public function get_formatted_stats() {
        $counters = $this->get_counters();
        $today = gmdate('Y-m-d');
        
        // Get today's count for the main stat display
        $today_count = isset($counters['days'][$today]) ? ($counters['days'][$today]['total_blocked'] ?? 0) : 0;
        
        // Aggregate data across all days in retention period
        $total_blocked = 0;
        $aggregated_by_reason = [];
        $aggregated_by_category = [];
        $aggregated_top_user_agents = [];
        $date_range = [];
        
        if (isset($counters['days']) && is_array($counters['days'])) {
            foreach ($counters['days'] as $date => $day_data) {
                $total_blocked += $day_data['total_blocked'] ?? 0;
                $date_range[] = $date;
                
                // Aggregate by reason
                foreach ($day_data['by_reason'] ?? [] as $reason => $count) {
                    $aggregated_by_reason[$reason] = ($aggregated_by_reason[$reason] ?? 0) + $count;
                }
                
                // Aggregate by category
                foreach ($day_data['by_category'] ?? [] as $category => $count) {
                    $aggregated_by_category[$category] = ($aggregated_by_category[$category] ?? 0) + $count;
                }
                
                // Aggregate top user agents
                foreach ($day_data['top_user_agents'] ?? [] as $ua => $count) {
                    $aggregated_top_user_agents[$ua] = ($aggregated_top_user_agents[$ua] ?? 0) + $count;
                }
            }
        }
        
        // Sort aggregated user agents and get top one
        arsort($aggregated_top_user_agents);
        $top_ua = 'None';
        if (!empty($aggregated_top_user_agents)) {
            if (function_exists('array_key_first')) {
                $top_ua = array_key_first($aggregated_top_user_agents);
            } else {
                reset($aggregated_top_user_agents);
                $top_ua = key($aggregated_top_user_agents);
            }
        }
        
        // Get top category
        $top_category = 'None';
        $top_category_count = 0;
        foreach ($aggregated_by_category as $cat => $count) {
            if ($count > $top_category_count) {
                $top_category = $cat;
                $top_category_count = $count;
            }
        }
        
        // Calculate date range
        sort($date_range);
        $retention = get_option('block_ai_stats_retention', 'weekly');
        $retention_labels = [
            'daily' => 'Today',
            'weekly' => 'Last 7 days',
            'monthly' => 'Last 30 days',
            'ongoing' => 'All time'
        ];
        $date_range_str = $retention_labels[$retention] ?? 'Last 7 days';
        
        if (!empty($date_range)) {
            $start_date = reset($date_range);
            $end_date = end($date_range);
            if ($start_date === $end_date) {
                $date_range_str = gmdate('M j, Y', strtotime($start_date));
            } else {
                $date_range_str = gmdate('M j', strtotime($start_date)) . ' - ' . gmdate('M j, Y', strtotime($end_date));
            }
        }
        
        // Get today's count for the "today" stat display
        $today = gmdate('Y-m-d');
        $today_count = isset($counters['days'][$today]) ? ($counters['days'][$today]['total_blocked'] ?? 0) : 0;
        
        return [
            'total_blocked' => $today_count, // Today's count for display
            'top_user_agent' => $top_ua,
            'top_category' => $top_category,
            'by_reason' => $aggregated_by_reason,
            'date_range' => $date_range_str,
            'date' => gmdate('M j, Y'), // Today's date for the stat box
            'days_tracked' => count($date_range),
        ];
    }
}

