"""
Genera dati di esempio per training iniziale
"""
import pandas as pd
import numpy as np
import os

DATA_DIR = os.path.join(os.path.dirname(__file__), '..', 'data')
os.makedirs(DATA_DIR, exist_ok=True)

def generate_sample_matches(n_matches=2000):
    """Genera partite fittizie realistiche per training"""
    np.random.seed(42)

    matches = []

    competitions = ['PL', 'SA', 'PD', 'BL1']
    teams_per_comp = {
        'PL': ['Manchester City', 'Arsenal', 'Liverpool', 'Aston Villa', 'Tottenham',
               'Manchester United', 'Newcastle', 'Brighton', 'West Ham', 'Chelsea',
               'Bournemouth', 'Crystal Palace', 'Wolves', 'Fulham', 'Everton',
               'Brentford', 'Nottingham Forest', 'Luton', 'Burnley', 'Sheffield United'],
        'SA': ['Inter', 'Milan', 'Juventus', 'Atalanta', 'Bologna', 'Roma', 'Lazio',
               'Fiorentina', 'Napoli', 'Torino', 'Monza', 'Genoa', 'Lecce', 'Cagliari',
               'Verona', 'Udinese', 'Empoli', 'Sassuolo', 'Frosinone', 'Salernitana'],
        'PD': ['Real Madrid', 'Barcelona', 'Girona', 'Atletico Madrid', 'Athletic Bilbao',
               'Real Sociedad', 'Real Betis', 'Valencia', 'Villarreal', 'Getafe',
               'Osasuna', 'Alaves', 'Sevilla', 'Mallorca', 'Las Palmas', 'Celta Vigo',
               'Rayo Vallecano', 'Cadiz', 'Granada', 'Almeria'],
        'BL1': ['Bayern Munich', 'Bayer Leverkusen', 'Stuttgart', 'RB Leipzig', 'Dortmund',
                'Frankfurt', 'Hoffenheim', 'Freiburg', 'Augsburg', 'Werder Bremen',
                'Wolfsburg', 'Union Berlin', 'Bochum', 'Mainz', 'Monchengladbach',
                'Koln', 'Heidenheim', 'Darmstadt', 'Greuther Furth', 'Hertha Berlin']
    }

    for i in range(n_matches):
        comp = np.random.choice(competitions)
        teams = teams_per_comp[comp]

        home_idx = np.random.randint(0, len(teams))
        away_idx = np.random.randint(0, len(teams))
        while away_idx == home_idx:
            away_idx = np.random.randint(0, len(teams))

        home_team = teams[home_idx]
        away_team = teams[away_idx]

        # Simula posizioni in classifica
        home_position = home_idx + 1
        away_position = away_idx + 1

        # Genera statistiche basate sulla posizione
        home_points = max(0, 60 - home_position * 3 + np.random.randint(-5, 6))
        away_points = max(0, 60 - away_position * 3 + np.random.randint(-5, 6))

        home_won = max(0, int(home_points / 3) + np.random.randint(-2, 3))
        away_won = max(0, int(away_points / 3) + np.random.randint(-2, 3))

        home_draw = np.random.randint(2, 8)
        away_draw = np.random.randint(2, 8)

        home_lost = max(0, 20 - home_won - home_draw + np.random.randint(-2, 3))
        away_lost = max(0, 20 - away_won - away_draw + np.random.randint(-2, 3))

        home_goals_for = max(0, 50 - home_position * 2 + np.random.randint(-10, 10))
        away_goals_for = max(0, 50 - away_position * 2 + np.random.randint(-10, 10))

        home_goals_against = max(0, 20 + home_position + np.random.randint(-5, 10))
        away_goals_against = max(0, 20 + away_position + np.random.randint(-5, 10))

        # Calcola risultato basato su forza relativa + fattore casa
        home_strength = home_points + 5  # bonus casa
        away_strength = away_points

        prob_home = home_strength / (home_strength + away_strength + 20)
        prob_away = away_strength / (home_strength + away_strength + 20)
        prob_draw = 1 - prob_home - prob_away

        rand = np.random.random()
        if rand < prob_home:
            result = 2  # home win
            home_goals = np.random.randint(1, 5)
            away_goals = np.random.randint(0, home_goals)
        elif rand < prob_home + prob_draw:
            result = 1  # draw
            home_goals = np.random.randint(0, 4)
            away_goals = home_goals
        else:
            result = 0  # away win
            away_goals = np.random.randint(1, 5)
            home_goals = np.random.randint(0, away_goals)

        matches.append({
            'match_id': i + 1000,
            'date': f'2024-{np.random.randint(1,13):02d}-{np.random.randint(1,29):02d}',
            'competition': comp,
            'season': np.random.choice([2023, 2024]),
            'home_team': home_team,
            'away_team': away_team,
            'home_team_id': home_idx + 100,
            'away_team_id': away_idx + 100,
            'home_goals': home_goals,
            'away_goals': away_goals,
            'result': result,
            'home_position': home_position,
            'home_points': home_points,
            'home_won': home_won,
            'home_draw': home_draw,
            'home_lost': home_lost,
            'home_goals_for': home_goals_for,
            'home_goals_against': home_goals_against,
            'home_goal_diff': home_goals_for - home_goals_against,
            'away_position': away_position,
            'away_points': away_points,
            'away_won': away_won,
            'away_draw': away_draw,
            'away_lost': away_lost,
            'away_goals_for': away_goals_for,
            'away_goals_against': away_goals_against,
            'away_goal_diff': away_goals_for - away_goals_against,
            'position_diff': home_position - away_position,
            'points_diff': home_points - away_points,
        })

    df = pd.DataFrame(matches)
    output_path = os.path.join(DATA_DIR, 'historical_matches.csv')
    df.to_csv(output_path, index=False)
    print(f"Generati {len(df)} match in {output_path}")

    # Mostra distribuzione risultati
    print(f"\nDistribuzione risultati:")
    print(f"  Home Win: {(df['result'] == 2).sum()} ({(df['result'] == 2).mean()*100:.1f}%)")
    print(f"  Draw:     {(df['result'] == 1).sum()} ({(df['result'] == 1).mean()*100:.1f}%)")
    print(f"  Away Win: {(df['result'] == 0).sum()} ({(df['result'] == 0).mean()*100:.1f}%)")

    return df


if __name__ == "__main__":
    generate_sample_matches(2000)
