#!/usr/bin/env python3
"""
BetPredictAI - Sistema di Aggiornamento Automatico

Esegue quotidianamente:
1. Scarica nuovi risultati delle partite
2. Verifica le predizioni passate
3. Aggiorna il database
4. Ri-addestra il modello periodicamente (settimanale)
5. Genera nuove predizioni per partite future

Uso: python3 auto_update.py [--retrain]
"""

import os
import sys
import json
import pandas as pd
import numpy as np
import requests
from datetime import datetime, timedelta
import time
import subprocess
import logging

# Setup logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s [%(levelname)s] %(message)s',
    handlers=[
        logging.FileHandler('/var/www/html/bet.cuttalo.com/logs/auto_update.log'),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger(__name__)

BASE_DIR = os.path.dirname(os.path.abspath(__file__))
DATA_DIR = os.path.join(BASE_DIR, '..', 'data')
MODELS_DIR = os.path.join(BASE_DIR, '..', 'models')
LOGS_DIR = os.path.join(BASE_DIR, '..', 'logs')

# Ensure directories exist
os.makedirs(DATA_DIR, exist_ok=True)
os.makedirs(LOGS_DIR, exist_ok=True)

LEAGUES = {
    'E0': {'name': 'Premier League', 'country': 'England'},
    'D1': {'name': 'Bundesliga', 'country': 'Germany'},
    'I1': {'name': 'Serie A', 'country': 'Italy'},
    'SP1': {'name': 'La Liga', 'country': 'Spain'},
    'F1': {'name': 'Ligue 1', 'country': 'France'},
}

def get_current_season():
    """Get current season code (e.g., '2425' for 2024/25)"""
    now = datetime.now()
    year = now.year
    month = now.month

    if month >= 8:  # Season starts in August
        return f"{str(year)[2:]}{str(year+1)[2:]}"
    else:
        return f"{str(year-1)[2:]}{str(year)[2:]}"


def download_latest_results():
    """Download latest match results from football-data.co.uk"""
    logger.info("📥 Downloading latest match results...")

    season = get_current_season()
    new_matches = []

    for league_code, league_info in LEAGUES.items():
        url = f"https://www.football-data.co.uk/mmz4281/{season}/{league_code}.csv"

        try:
            df = pd.read_csv(url, encoding='utf-8', on_bad_lines='skip')

            if len(df) > 0:
                df['league_code'] = league_code
                df['league_name'] = league_info['name']
                df['season'] = f"20{season[:2]}/20{season[2:]}"
                new_matches.append(df)
                logger.info(f"  ✓ {league_info['name']}: {len(df)} matches")

            time.sleep(0.5)  # Rate limiting

        except Exception as e:
            logger.warning(f"  ✗ {league_info['name']}: {e}")

    if new_matches:
        combined = pd.concat(new_matches, ignore_index=True)
        logger.info(f"  Total: {len(combined)} matches downloaded")
        return combined

    return None


def update_historical_data(new_df):
    """Update historical data with new matches"""
    logger.info("🔄 Updating historical database...")

    historical_path = os.path.join(DATA_DIR, 'advanced_historical_matches.csv')

    if os.path.exists(historical_path):
        existing_df = pd.read_csv(historical_path, low_memory=False)

        # Create unique match ID
        new_df['match_id'] = new_df.apply(
            lambda x: f"{x.get('Date', '')}_{x.get('HomeTeam', '')}_{x.get('AwayTeam', '')}",
            axis=1
        )
        existing_df['match_id'] = existing_df.apply(
            lambda x: f"{x.get('date', '')}_{x.get('home_team', '')}_{x.get('away_team', '')}",
            axis=1
        )

        # Find new matches
        existing_ids = set(existing_df['match_id'].values)
        new_matches = new_df[~new_df['match_id'].isin(existing_ids)]

        if len(new_matches) > 0:
            logger.info(f"  Found {len(new_matches)} new matches")

            # Standardize and append
            # ... (simplified - would need full column mapping)

            return len(new_matches)
        else:
            logger.info("  No new matches to add")
            return 0

    return 0


def verify_past_predictions():
    """Verify predictions that should now have results"""
    logger.info("🔍 Verifying past predictions...")

    predictions_path = os.path.join(DATA_DIR, 'predictions_upcoming.json')
    verified_path = os.path.join(DATA_DIR, 'predictions_verified.json')

    if not os.path.exists(predictions_path):
        logger.info("  No predictions to verify")
        return

    with open(predictions_path) as f:
        predictions = json.load(f)

    # Load existing verified predictions
    verified = []
    if os.path.exists(verified_path):
        with open(verified_path) as f:
            verified = json.load(f)

    today = datetime.now().date()
    updated_predictions = []
    newly_verified = 0

    for pred in predictions:
        match_date = datetime.strptime(pred['date'], '%Y-%m-%d').date()

        if match_date < today:
            # This match should have a result - try to verify
            # In production, would fetch actual result from API
            # For now, move to verified list for manual checking
            pred['status'] = 'pending_verification'
            verified.append(pred)
            newly_verified += 1
        else:
            # Keep future predictions
            updated_predictions.append(pred)

    # Save updated predictions
    with open(predictions_path, 'w') as f:
        json.dump(updated_predictions, f, indent=2)

    # Save verified predictions
    with open(verified_path, 'w') as f:
        json.dump(verified, f, indent=2)

    logger.info(f"  Moved {newly_verified} predictions to verification queue")
    logger.info(f"  Remaining upcoming: {len(updated_predictions)}")


def generate_new_predictions():
    """Generate predictions for upcoming matches"""
    logger.info("🔮 Generating new predictions...")

    try:
        # Run the fixture collector
        result = subprocess.run(
            ['python3', 'fixture_collector.py'],
            cwd=BASE_DIR,
            capture_output=True,
            text=True,
            timeout=300
        )

        if result.returncode == 0:
            logger.info("  ✓ Predictions generated successfully")
        else:
            logger.error(f"  ✗ Error: {result.stderr}")

    except Exception as e:
        logger.error(f"  ✗ Failed to generate predictions: {e}")


def check_retrain_needed():
    """Check if model retraining is needed (weekly)"""
    meta_path = os.path.join(MODELS_DIR, 'betting_model_v3_meta.json')

    if not os.path.exists(meta_path):
        return True

    with open(meta_path) as f:
        meta = json.load(f)

    trained_at = datetime.fromisoformat(meta.get('trained_at', '2000-01-01'))
    days_since_training = (datetime.now() - trained_at).days

    logger.info(f"  Last training: {days_since_training} days ago")

    return days_since_training >= 7  # Retrain weekly


def retrain_model():
    """Retrain the model with latest data"""
    logger.info("🧠 Retraining model...")

    try:
        # First, collect advanced data
        result = subprocess.run(
            ['python3', 'advanced_data_collector.py'],
            cwd=BASE_DIR,
            capture_output=True,
            text=True,
            timeout=600
        )

        if result.returncode != 0:
            logger.error(f"  ✗ Data collection failed: {result.stderr}")
            return False

        # Then train
        result = subprocess.run(
            ['python3', 'train_model_v3.py'],
            cwd=BASE_DIR,
            capture_output=True,
            text=True,
            timeout=1800
        )

        if result.returncode == 0:
            logger.info("  ✓ Model retrained successfully")
            return True
        else:
            logger.error(f"  ✗ Training failed: {result.stderr}")
            return False

    except Exception as e:
        logger.error(f"  ✗ Retrain error: {e}")
        return False


def regenerate_verification():
    """Regenerate verification results with V4 model"""
    logger.info("📊 Regenerating verification results (V4)...")

    try:
        # Try V4 first, fall back to V3
        v4_script = os.path.join(BASE_DIR, 'generate_verification_v4.py')
        v3_script = os.path.join(BASE_DIR, 'generate_verification.py')

        script = v4_script if os.path.exists(v4_script) else v3_script
        script_name = 'V4' if 'v4' in script else 'V3'

        result = subprocess.run(
            ['python3', script],
            cwd=BASE_DIR,
            capture_output=True,
            text=True,
            timeout=180
        )

        if result.returncode == 0:
            logger.info(f"  ✓ Verification {script_name} regenerated")
        else:
            logger.error(f"  ✗ Error: {result.stderr}")

    except Exception as e:
        logger.error(f"  ✗ Verification error: {e}")


def restart_api():
    """Restart the API server to load new model"""
    logger.info("🔄 Restarting API server...")

    try:
        result = subprocess.run(
            ['sudo', 'systemctl', 'restart', 'bet-backend.service'],
            capture_output=True,
            text=True,
            timeout=30
        )

        if result.returncode == 0:
            logger.info("  ✓ API restarted")
        else:
            logger.warning(f"  ⚠ Restart warning: {result.stderr}")

    except Exception as e:
        logger.error(f"  ✗ Restart error: {e}")


def main():
    """Main update routine"""
    logger.info("="*60)
    logger.info("🚀 BetPredictAI - Aggiornamento Automatico")
    logger.info(f"   Data: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    logger.info("="*60)

    force_retrain = '--retrain' in sys.argv

    # 1. Download latest results
    new_data = download_latest_results()

    # 2. Update database
    if new_data is not None:
        new_count = update_historical_data(new_data)

    # 3. Verify past predictions
    verify_past_predictions()

    # 4. Check if retrain needed
    if force_retrain or check_retrain_needed():
        logger.info("\n📚 Retraining scheduled...")
        if retrain_model():
            restart_api()

    # 5. Regenerate verification with current model
    regenerate_verification()

    # 6. Generate new predictions
    generate_new_predictions()

    logger.info("\n" + "="*60)
    logger.info("✅ Aggiornamento completato!")
    logger.info("="*60)


if __name__ == '__main__':
    main()
