"""
BET.CUTTALO.COM - FREE DATA COLLECTOR
Raccoglie dati REALI e GRATUITI da football-data.co.uk
Nessuna API key richiesta - dati CSV pubblici
"""

import requests
import pandas as pd
import os
from datetime import datetime
from typing import List, Dict
import time
import json

DATA_DIR = os.path.join(os.path.dirname(__file__), '..', 'data')
os.makedirs(DATA_DIR, exist_ok=True)


class FreeFootballDataCollector:
    """
    Raccoglie dati GRATUITI da football-data.co.uk
    - Dati storici dal 1993 ad oggi
    - Aggiornamento settimanale
    - Nessuna API key richiesta
    """

    BASE_URL = "https://www.football-data.co.uk"

    # Campionati disponibili
    LEAGUES = {
        'E0': {'name': 'Premier League', 'country': 'England', 'tier': 1},
        'E1': {'name': 'Championship', 'country': 'England', 'tier': 2},
        'E2': {'name': 'League One', 'country': 'England', 'tier': 3},
        'E3': {'name': 'League Two', 'country': 'England', 'tier': 4},
        'SC0': {'name': 'Scottish Premier League', 'country': 'Scotland', 'tier': 1},
        'D1': {'name': 'Bundesliga', 'country': 'Germany', 'tier': 1},
        'D2': {'name': '2. Bundesliga', 'country': 'Germany', 'tier': 2},
        'I1': {'name': 'Serie A', 'country': 'Italy', 'tier': 1},
        'I2': {'name': 'Serie B', 'country': 'Italy', 'tier': 2},
        'SP1': {'name': 'La Liga', 'country': 'Spain', 'tier': 1},
        'SP2': {'name': 'La Liga 2', 'country': 'Spain', 'tier': 2},
        'F1': {'name': 'Ligue 1', 'country': 'France', 'tier': 1},
        'F2': {'name': 'Ligue 2', 'country': 'France', 'tier': 2},
        'N1': {'name': 'Eredivisie', 'country': 'Netherlands', 'tier': 1},
        'B1': {'name': 'Jupiler Pro League', 'country': 'Belgium', 'tier': 1},
        'P1': {'name': 'Primeira Liga', 'country': 'Portugal', 'tier': 1},
        'T1': {'name': 'Super Lig', 'country': 'Turkey', 'tier': 1},
        'G1': {'name': 'Super League', 'country': 'Greece', 'tier': 1},
    }

    # Top 5 campionati per default
    TOP_LEAGUES = ['E0', 'D1', 'I1', 'SP1', 'F1']

    def __init__(self):
        self.session = requests.Session()
        self.session.headers.update({
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
        })

    def _get_season_code(self, year: int) -> str:
        """Converte anno in codice stagione (es. 2324 per 2023-24)"""
        return f"{str(year)[-2:]}{str(year+1)[-2:]}"

    def _download_csv(self, league: str, season: str) -> pd.DataFrame:
        """Scarica CSV per un campionato e stagione"""
        url = f"{self.BASE_URL}/mmz4281/{season}/{league}.csv"
        try:
            response = self.session.get(url, timeout=30)
            response.raise_for_status()

            # Leggi CSV
            from io import StringIO
            df = pd.read_csv(StringIO(response.text), encoding='utf-8', on_bad_lines='skip')
            return df
        except Exception as e:
            print(f"  Errore download {league} {season}: {e}")
            return pd.DataFrame()

    def collect_all_data(self,
                         leagues: List[str] = None,
                         start_year: int = 2015,
                         end_year: int = 2025) -> pd.DataFrame:
        """
        Raccoglie tutti i dati storici disponibili
        """
        if leagues is None:
            leagues = self.TOP_LEAGUES

        all_matches = []

        for league in leagues:
            league_info = self.LEAGUES.get(league, {'name': league, 'country': 'Unknown'})
            print(f"\n{'='*60}")
            print(f"Scaricando {league_info['name']} ({league_info['country']})")
            print('='*60)

            for year in range(start_year, end_year):
                season = self._get_season_code(year)
                print(f"  Stagione {year}/{year+1} ({season})...", end=" ")

                df = self._download_csv(league, season)

                if df.empty:
                    print("VUOTO")
                    continue

                # Standardizza colonne
                df_processed = self._process_dataframe(df, league, year)

                if not df_processed.empty:
                    all_matches.append(df_processed)
                    print(f"OK - {len(df_processed)} partite")
                else:
                    print("SKIP")

                time.sleep(0.5)  # Rate limiting gentile

        if not all_matches:
            print("\nNessun dato raccolto!")
            return pd.DataFrame()

        # Combina tutti i dati
        final_df = pd.concat(all_matches, ignore_index=True)

        # Ordina per data
        final_df['Date'] = pd.to_datetime(final_df['Date'], errors='coerce')
        final_df = final_df.sort_values('Date', ascending=False)

        # Salva
        output_path = os.path.join(DATA_DIR, 'historical_matches.csv')
        final_df.to_csv(output_path, index=False)

        print(f"\n{'='*60}")
        print(f"TOTALE: {len(final_df)} partite salvate in {output_path}")
        print('='*60)

        return final_df

    def _process_dataframe(self, df: pd.DataFrame, league: str, year: int) -> pd.DataFrame:
        """Processa e standardizza il DataFrame"""

        # Colonne necessarie
        required_cols = ['Date', 'HomeTeam', 'AwayTeam', 'FTHG', 'FTAG', 'FTR']

        # Verifica colonne esistenti
        if not all(col in df.columns for col in required_cols):
            return pd.DataFrame()

        league_info = self.LEAGUES.get(league, {'name': league, 'country': 'Unknown', 'tier': 1})

        processed = pd.DataFrame({
            'match_id': range(len(df)),
            'Date': df['Date'],
            'league_code': league,
            'league_name': league_info['name'],
            'country': league_info['country'],
            'tier': league_info.get('tier', 1),
            'season': f"{year}/{year+1}",
            'season_year': year,

            # Squadre
            'home_team': df['HomeTeam'],
            'away_team': df['AwayTeam'],

            # Risultato
            'home_goals': pd.to_numeric(df['FTHG'], errors='coerce'),
            'away_goals': pd.to_numeric(df['FTAG'], errors='coerce'),
            'result': df['FTR'].map({'H': 2, 'D': 1, 'A': 0}),

            # Gol primo tempo (se disponibili)
            'home_goals_ht': pd.to_numeric(df.get('HTHG', 0), errors='coerce'),
            'away_goals_ht': pd.to_numeric(df.get('HTAG', 0), errors='coerce'),

            # Tiri (se disponibili)
            'home_shots': pd.to_numeric(df.get('HS', 0), errors='coerce'),
            'away_shots': pd.to_numeric(df.get('AS', 0), errors='coerce'),
            'home_shots_target': pd.to_numeric(df.get('HST', 0), errors='coerce'),
            'away_shots_target': pd.to_numeric(df.get('AST', 0), errors='coerce'),

            # Falli e cartellini
            'home_fouls': pd.to_numeric(df.get('HF', 0), errors='coerce'),
            'away_fouls': pd.to_numeric(df.get('AF', 0), errors='coerce'),
            'home_corners': pd.to_numeric(df.get('HC', 0), errors='coerce'),
            'away_corners': pd.to_numeric(df.get('AC', 0), errors='coerce'),
            'home_yellow': pd.to_numeric(df.get('HY', 0), errors='coerce'),
            'away_yellow': pd.to_numeric(df.get('AY', 0), errors='coerce'),
            'home_red': pd.to_numeric(df.get('HR', 0), errors='coerce'),
            'away_red': pd.to_numeric(df.get('AR', 0), errors='coerce'),

            # Quote (se disponibili)
            'odds_home': pd.to_numeric(df.get('B365H', df.get('BWH', df.get('PSH', 0))), errors='coerce'),
            'odds_draw': pd.to_numeric(df.get('B365D', df.get('BWD', df.get('PSD', 0))), errors='coerce'),
            'odds_away': pd.to_numeric(df.get('B365A', df.get('BWA', df.get('PSA', 0))), errors='coerce'),
        })

        # Rimuovi righe con dati mancanti essenziali
        processed = processed.dropna(subset=['home_goals', 'away_goals', 'result'])

        # Crea ID univoco
        processed['match_id'] = processed.apply(
            lambda r: f"{league}_{year}_{r.name}", axis=1
        )

        return processed

    def get_team_stats(self, df: pd.DataFrame) -> Dict:
        """Calcola statistiche per squadra dal DataFrame"""
        teams_stats = {}

        for _, row in df.iterrows():
            home = row['home_team']
            away = row['away_team']

            # Init squadre
            for team in [home, away]:
                if team not in teams_stats:
                    teams_stats[team] = {
                        'played': 0, 'won': 0, 'draw': 0, 'lost': 0,
                        'goals_for': 0, 'goals_against': 0, 'points': 0,
                        'home_played': 0, 'home_won': 0,
                        'away_played': 0, 'away_won': 0,
                    }

            # Update home team
            teams_stats[home]['played'] += 1
            teams_stats[home]['home_played'] += 1
            teams_stats[home]['goals_for'] += row['home_goals']
            teams_stats[home]['goals_against'] += row['away_goals']

            # Update away team
            teams_stats[away]['played'] += 1
            teams_stats[away]['away_played'] += 1
            teams_stats[away]['goals_for'] += row['away_goals']
            teams_stats[away]['goals_against'] += row['home_goals']

            # Risultato
            if row['result'] == 2:  # Home win
                teams_stats[home]['won'] += 1
                teams_stats[home]['home_won'] += 1
                teams_stats[home]['points'] += 3
                teams_stats[away]['lost'] += 1
            elif row['result'] == 0:  # Away win
                teams_stats[away]['won'] += 1
                teams_stats[away]['away_won'] += 1
                teams_stats[away]['points'] += 3
                teams_stats[home]['lost'] += 1
            else:  # Draw
                teams_stats[home]['draw'] += 1
                teams_stats[away]['draw'] += 1
                teams_stats[home]['points'] += 1
                teams_stats[away]['points'] += 1

        return teams_stats

    def get_league_standings(self, df: pd.DataFrame, league: str, season_year: int) -> pd.DataFrame:
        """Calcola classifica per un campionato e stagione"""
        # Filtra dati
        league_df = df[(df['league_code'] == league) & (df['season_year'] == season_year)]

        if league_df.empty:
            return pd.DataFrame()

        # Calcola stats
        teams_stats = self.get_team_stats(league_df)

        # Converti in DataFrame
        standings = pd.DataFrame.from_dict(teams_stats, orient='index')
        standings['team'] = standings.index
        standings['goal_diff'] = standings['goals_for'] - standings['goals_against']

        # Ordina per punti e differenza reti
        standings = standings.sort_values(
            ['points', 'goal_diff', 'goals_for'],
            ascending=[False, False, False]
        )
        standings['position'] = range(1, len(standings) + 1)

        return standings.reset_index(drop=True)


class LiveScoreCollector:
    """
    Raccoglie risultati live e fixture da API gratuite
    """

    def __init__(self):
        self.session = requests.Session()

    def get_todays_matches(self) -> List[Dict]:
        """
        Ottieni partite di oggi da OpenLigaDB (gratuito)
        """
        matches = []

        # Bundesliga - OpenLigaDB (gratuito, no key)
        try:
            today = datetime.now().strftime('%Y-%m-%d')
            url = f"https://api.openligadb.de/getmatchdata/bl1/{datetime.now().year}"
            response = self.session.get(url, timeout=30)
            data = response.json()

            for match in data:
                match_date = match.get('matchDateTime', '')[:10]
                if match_date == today:
                    matches.append({
                        'league': 'Bundesliga',
                        'league_code': 'D1',
                        'home_team': match['team1']['teamName'],
                        'away_team': match['team2']['teamName'],
                        'date': match['matchDateTime'],
                        'home_goals': match.get('matchResults', [{}])[0].get('pointsTeam1'),
                        'away_goals': match.get('matchResults', [{}])[0].get('pointsTeam2'),
                        'status': 'LIVE' if match.get('matchIsFinished') == False else 'FINISHED'
                    })
        except Exception as e:
            print(f"Errore OpenLigaDB: {e}")

        return matches

    def get_upcoming_fixtures(self, days_ahead: int = 7) -> List[Dict]:
        """
        Ottieni prossime partite
        """
        fixtures = []

        # Bundesliga fixtures
        try:
            url = f"https://api.openligadb.de/getmatchdata/bl1/{datetime.now().year}"
            response = self.session.get(url, timeout=30)
            data = response.json()

            today = datetime.now()

            for match in data:
                try:
                    match_date = datetime.fromisoformat(match['matchDateTime'].replace('Z', '+00:00'))
                    if 0 <= (match_date.replace(tzinfo=None) - today).days <= days_ahead:
                        if not match.get('matchIsFinished', True):
                            fixtures.append({
                                'league': 'Bundesliga',
                                'league_code': 'D1',
                                'home_team': match['team1']['teamName'],
                                'away_team': match['team2']['teamName'],
                                'date': match['matchDateTime'],
                                'matchday': match.get('group', {}).get('groupOrderID', 0)
                            })
                except:
                    continue
        except Exception as e:
            print(f"Errore fixtures: {e}")

        return fixtures


def create_training_features(df: pd.DataFrame) -> pd.DataFrame:
    """
    Crea feature avanzate per il training della rete neurale
    """
    # Ordina per data
    df = df.sort_values('Date').copy()

    # Calcola statistiche rolling per ogni squadra
    teams = set(df['home_team'].unique()) | set(df['away_team'].unique())

    # Statistiche ultimi N match
    n_matches = 5

    features_list = []

    for idx, row in df.iterrows():
        home = row['home_team']
        away = row['away_team']
        current_date = row['Date']
        league = row['league_code']

        # Ultimi match della squadra di casa
        home_recent = df[
            ((df['home_team'] == home) | (df['away_team'] == home)) &
            (df['Date'] < current_date) &
            (df['league_code'] == league)
        ].tail(n_matches)

        # Ultimi match della squadra ospite
        away_recent = df[
            ((df['home_team'] == away) | (df['away_team'] == away)) &
            (df['Date'] < current_date) &
            (df['league_code'] == league)
        ].tail(n_matches)

        # Calcola feature home team
        home_stats = calculate_team_form(home_recent, home)
        away_stats = calculate_team_form(away_recent, away)

        # Head to head
        h2h = df[
            (((df['home_team'] == home) & (df['away_team'] == away)) |
             ((df['home_team'] == away) & (df['away_team'] == home))) &
            (df['Date'] < current_date)
        ].tail(5)

        h2h_stats = calculate_h2h_stats(h2h, home, away)

        features = {
            'match_id': row['match_id'],
            'Date': current_date,
            'league_code': league,
            'home_team': home,
            'away_team': away,
            'result': row['result'],

            # Home team form
            'home_form_points': home_stats['form_points'],
            'home_form_goals_scored': home_stats['goals_scored'],
            'home_form_goals_conceded': home_stats['goals_conceded'],
            'home_form_wins': home_stats['wins'],
            'home_home_strength': home_stats['home_strength'],

            # Away team form
            'away_form_points': away_stats['form_points'],
            'away_form_goals_scored': away_stats['goals_scored'],
            'away_form_goals_conceded': away_stats['goals_conceded'],
            'away_form_wins': away_stats['wins'],
            'away_away_strength': away_stats['away_strength'],

            # Head to head
            'h2h_home_wins': h2h_stats['home_wins'],
            'h2h_draws': h2h_stats['draws'],
            'h2h_away_wins': h2h_stats['away_wins'],

            # Differenziali
            'form_diff': home_stats['form_points'] - away_stats['form_points'],
            'goals_diff': (home_stats['goals_scored'] - home_stats['goals_conceded']) -
                         (away_stats['goals_scored'] - away_stats['goals_conceded']),
        }

        features_list.append(features)

    return pd.DataFrame(features_list)


def calculate_team_form(matches: pd.DataFrame, team: str) -> Dict:
    """Calcola forma recente di una squadra"""
    if matches.empty:
        return {
            'form_points': 0, 'goals_scored': 0, 'goals_conceded': 0,
            'wins': 0, 'home_strength': 0, 'away_strength': 0
        }

    points = 0
    goals_scored = 0
    goals_conceded = 0
    wins = 0
    home_wins = 0
    home_games = 0
    away_wins = 0
    away_games = 0

    for _, match in matches.iterrows():
        is_home = match['home_team'] == team

        if is_home:
            home_games += 1
            goals_scored += match['home_goals']
            goals_conceded += match['away_goals']
            if match['result'] == 2:
                points += 3
                wins += 1
                home_wins += 1
            elif match['result'] == 1:
                points += 1
        else:
            away_games += 1
            goals_scored += match['away_goals']
            goals_conceded += match['home_goals']
            if match['result'] == 0:
                points += 3
                wins += 1
                away_wins += 1
            elif match['result'] == 1:
                points += 1

    n = len(matches)
    return {
        'form_points': points / n if n > 0 else 0,
        'goals_scored': goals_scored / n if n > 0 else 0,
        'goals_conceded': goals_conceded / n if n > 0 else 0,
        'wins': wins / n if n > 0 else 0,
        'home_strength': home_wins / home_games if home_games > 0 else 0.5,
        'away_strength': away_wins / away_games if away_games > 0 else 0.5,
    }


def calculate_h2h_stats(matches: pd.DataFrame, home: str, away: str) -> Dict:
    """Calcola statistiche head-to-head"""
    if matches.empty:
        return {'home_wins': 0, 'draws': 0, 'away_wins': 0}

    home_wins = 0
    draws = 0
    away_wins = 0

    for _, match in matches.iterrows():
        if match['home_team'] == home:
            if match['result'] == 2:
                home_wins += 1
            elif match['result'] == 1:
                draws += 1
            else:
                away_wins += 1
        else:
            if match['result'] == 0:
                home_wins += 1
            elif match['result'] == 1:
                draws += 1
            else:
                away_wins += 1

    n = len(matches)
    return {
        'home_wins': home_wins / n if n > 0 else 0,
        'draws': draws / n if n > 0 else 0,
        'away_wins': away_wins / n if n > 0 else 0,
    }


if __name__ == "__main__":
    print("="*70)
    print("BET.CUTTALO.COM - DOWNLOAD DATI REALI")
    print("="*70)

    collector = FreeFootballDataCollector()

    # Scarica tutti i dati disponibili (ultimi 10 anni)
    df = collector.collect_all_data(
        leagues=['E0', 'D1', 'I1', 'SP1', 'F1'],  # Top 5 campionati
        start_year=2015,
        end_year=2025
    )

    if not df.empty:
        print(f"\n{'='*70}")
        print("STATISTICHE DATI RACCOLTI")
        print('='*70)
        print(f"Totale partite: {len(df)}")
        print(f"\nPer campionato:")
        print(df.groupby('league_name').size().sort_values(ascending=False))
        print(f"\nDistribuzione risultati:")
        print(f"  Home Win: {(df['result'] == 2).sum()} ({(df['result'] == 2).mean()*100:.1f}%)")
        print(f"  Draw:     {(df['result'] == 1).sum()} ({(df['result'] == 1).mean()*100:.1f}%)")
        print(f"  Away Win: {(df['result'] == 0).sum()} ({(df['result'] == 0).mean()*100:.1f}%)")
