#!/usr/bin/env python3
"""
BET.CUTTALO.COM - AUTOMATIC DATA PIPELINE
Sistema automatico per:
1. Raccolta dati giornaliera
2. Aggiornamento fixtures
3. Verifica risultati
4. Re-training periodico del modello
"""

import os
import sys
import json
import logging
import subprocess
from datetime import datetime, timedelta
from pathlib import Path

# Setup paths
BASE_DIR = Path(__file__).parent.parent
ML_DIR = BASE_DIR / 'ml'
DATA_DIR = BASE_DIR / 'data'
MODELS_DIR = BASE_DIR / 'models'
LOGS_DIR = BASE_DIR / 'logs'

LOGS_DIR.mkdir(exist_ok=True)

# Setup logging
log_file = LOGS_DIR / f'pipeline_{datetime.now().strftime("%Y%m%d")}.log'
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler(log_file),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger(__name__)


class DataPipeline:
    """Pipeline automatica per aggiornamento dati e modelli"""

    def __init__(self):
        self.status = {
            'last_run': None,
            'data_updated': False,
            'fixtures_updated': False,
            'model_trained': False,
            'errors': []
        }
        self.load_status()

    def load_status(self):
        """Carica stato precedente"""
        status_file = DATA_DIR / 'pipeline_status.json'
        if status_file.exists():
            with open(status_file) as f:
                self.status = json.load(f)

    def save_status(self):
        """Salva stato corrente"""
        self.status['last_run'] = datetime.now().isoformat()
        status_file = DATA_DIR / 'pipeline_status.json'
        with open(status_file, 'w') as f:
            json.dump(self.status, f, indent=2)

    def run_script(self, script_name: str, description: str) -> bool:
        """Esegue uno script Python"""
        script_path = ML_DIR / script_name
        if not script_path.exists():
            logger.error(f"Script non trovato: {script_path}")
            return False

        logger.info(f"Esecuzione: {description}...")
        try:
            result = subprocess.run(
                [sys.executable, str(script_path)],
                capture_output=True,
                text=True,
                timeout=3600,  # 1 hour timeout
                cwd=str(ML_DIR)
            )

            if result.returncode == 0:
                logger.info(f"  Completato: {description}")
                return True
            else:
                logger.error(f"  Errore in {script_name}:")
                logger.error(result.stderr[:500])
                self.status['errors'].append({
                    'script': script_name,
                    'time': datetime.now().isoformat(),
                    'error': result.stderr[:200]
                })
                return False

        except subprocess.TimeoutExpired:
            logger.error(f"  Timeout: {script_name}")
            return False
        except Exception as e:
            logger.error(f"  Eccezione: {e}")
            return False

    def update_data(self) -> bool:
        """Aggiorna tutti i dati storici"""
        logger.info("\n" + "=" * 50)
        logger.info("FASE 1: AGGIORNAMENTO DATI")
        logger.info("=" * 50)

        success = self.run_script(
            'comprehensive_data_collector.py',
            'Raccolta dati completa'
        )

        self.status['data_updated'] = success
        return success

    def update_fixtures(self) -> bool:
        """Aggiorna fixtures partite future"""
        logger.info("\n" + "=" * 50)
        logger.info("FASE 2: AGGIORNAMENTO FIXTURES")
        logger.info("=" * 50)

        success = self.run_script(
            'fixture_collector.py',
            'Raccolta fixtures'
        )

        self.status['fixtures_updated'] = success
        return success

    def verify_results(self) -> bool:
        """Verifica risultati predizioni passate"""
        logger.info("\n" + "=" * 50)
        logger.info("FASE 3: VERIFICA RISULTATI")
        logger.info("=" * 50)

        success = self.run_script(
            'realtime_verifier.py',
            'Verifica risultati'
        )

        return success

    def generate_predictions(self) -> bool:
        """Genera nuove predizioni"""
        logger.info("\n" + "=" * 50)
        logger.info("FASE 4: GENERAZIONE PREDIZIONI")
        logger.info("=" * 50)

        success = self.run_script(
            'generate_verification_v4.py',
            'Generazione predizioni'
        )

        return success

    def retrain_model(self, force: bool = False) -> bool:
        """Re-train del modello (settimanale o forzato)"""
        logger.info("\n" + "=" * 50)
        logger.info("FASE 5: TRAINING MODELLO")
        logger.info("=" * 50)

        # Controlla se è necessario il retraining
        model_file = MODELS_DIR / 'xgb_model.joblib'
        should_train = force

        if model_file.exists():
            mod_time = datetime.fromtimestamp(model_file.stat().st_mtime)
            days_old = (datetime.now() - mod_time).days

            if days_old >= 7:
                logger.info(f"Modello vecchio di {days_old} giorni, ritraining...")
                should_train = True
            else:
                logger.info(f"Modello aggiornato ({days_old} giorni fa), skip training")
        else:
            should_train = True

        if should_train:
            success = self.run_script(
                'train_model_v4_robust.py',
                'Training modello V4'
            )
            self.status['model_trained'] = success
            return success

        return True

    def generate_stats(self) -> bool:
        """Genera statistiche frontend"""
        logger.info("\n" + "=" * 50)
        logger.info("FASE 6: STATISTICHE FRONTEND")
        logger.info("=" * 50)

        success = self.run_script(
            'generate_historical_slips.py',
            'Generazione statistiche'
        )

        return success

    def run_daily(self):
        """Pipeline giornaliera: fixtures + verifiche + predizioni"""
        logger.info("=" * 60)
        logger.info("PIPELINE GIORNALIERA - " + datetime.now().strftime("%Y-%m-%d %H:%M"))
        logger.info("=" * 60)

        self.update_fixtures()
        self.verify_results()
        self.generate_predictions()

        self.save_status()
        logger.info("\nPipeline giornaliera completata!")

    def run_weekly(self):
        """Pipeline settimanale: dati completi + training"""
        logger.info("=" * 60)
        logger.info("PIPELINE SETTIMANALE - " + datetime.now().strftime("%Y-%m-%d %H:%M"))
        logger.info("=" * 60)

        self.update_data()
        self.update_fixtures()
        self.verify_results()
        self.retrain_model()
        self.generate_predictions()
        self.generate_stats()

        self.save_status()
        logger.info("\nPipeline settimanale completata!")

    def run_full(self):
        """Pipeline completa: tutto"""
        logger.info("=" * 60)
        logger.info("PIPELINE COMPLETA - " + datetime.now().strftime("%Y-%m-%d %H:%M"))
        logger.info("=" * 60)

        self.update_data()
        self.update_fixtures()
        self.verify_results()
        self.retrain_model(force=True)
        self.generate_predictions()
        self.generate_stats()

        self.save_status()
        logger.info("\nPipeline completa!")
        self.print_summary()

    def print_summary(self):
        """Stampa riepilogo"""
        print("\n" + "=" * 60)
        print("RIEPILOGO PIPELINE")
        print("=" * 60)
        print(f"  Dati aggiornati: {'OK' if self.status['data_updated'] else 'SKIP/FAIL'}")
        print(f"  Fixtures aggiornati: {'OK' if self.status['fixtures_updated'] else 'SKIP/FAIL'}")
        print(f"  Modello trainato: {'OK' if self.status['model_trained'] else 'SKIP/FAIL'}")
        print(f"  Errori: {len(self.status['errors'])}")
        print("=" * 60)


def main():
    import argparse

    parser = argparse.ArgumentParser(description='BetPredictAI Data Pipeline')
    parser.add_argument('mode', choices=['daily', 'weekly', 'full'],
                       help='Modalità pipeline')
    parser.add_argument('--force-train', action='store_true',
                       help='Forza retraining modello')

    args = parser.parse_args()

    pipeline = DataPipeline()

    if args.mode == 'daily':
        pipeline.run_daily()
    elif args.mode == 'weekly':
        pipeline.run_weekly()
    elif args.mode == 'full':
        pipeline.run_full()


if __name__ == '__main__':
    main()
