#!/usr/bin/env python3
"""
BetPredictAI - Universal Match Verifier

Sistema multi-source per verificare risultati di TUTTE le leghe:
1. Primary: football-data.org (top 5 leagues)
2. Secondary: Sofascore API (all leagues worldwide)
3. Fallback: API-Football via RapidAPI (if configured)

Uso: python3 universal_verifier.py [--force] [--days N]
"""

import os
import sys
import json
import requests
from datetime import datetime, timedelta
import time
import logging
import re
from urllib.parse import quote

# Setup logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s [%(levelname)s] %(message)s',
    handlers=[
        logging.FileHandler('/var/www/html/bet.cuttalo.com/logs/universal_verifier.log'),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger(__name__)

BASE_DIR = os.path.dirname(os.path.abspath(__file__))
DATA_DIR = os.path.join(BASE_DIR, '..', 'data')
LOGS_DIR = os.path.join(BASE_DIR, '..', 'logs')

os.makedirs(DATA_DIR, exist_ok=True)
os.makedirs(LOGS_DIR, exist_ok=True)

# Load API keys from .env
def load_env():
    env_vars = {}
    env_path = os.path.join(BASE_DIR, '..', '.env')
    if os.path.exists(env_path):
        with open(env_path) as f:
            for line in f:
                line = line.strip()
                if line and not line.startswith('#') and '=' in line:
                    key, value = line.split('=', 1)
                    env_vars[key.strip()] = value.strip()
    return env_vars

ENV = load_env()
FOOTBALL_DATA_API_KEY = ENV.get('FOOTBALL_DATA_API_KEY', '')
RAPIDAPI_KEY = ENV.get('RAPIDAPI_KEY', '')

# League mappings for football-data.org
FOOTBALL_DATA_COMPETITIONS = {
    'E0': 'PL', 'E1': 'ELC', 'D1': 'BL1', 'I1': 'SA', 'SP1': 'PD', 'F1': 'FL1'
}

# Common user agents for web requests
USER_AGENTS = [
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
    'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
]


class FlashscoreAPI:
    """Flashscore/Livescore data via public endpoints"""

    def __init__(self):
        self.session = requests.Session()
        self.session.headers.update({
            'User-Agent': USER_AGENTS[0],
            'Accept': '*/*',
        })

    def search_match(self, home_team, away_team, match_date):
        """Search for match result using multiple methods"""
        # Try livescore.com API
        result = self._try_livescore(home_team, away_team, match_date)
        if result.get('found'):
            return result

        # Try soccerway
        result = self._try_soccerway(home_team, away_team, match_date)
        if result.get('found'):
            return result

        return {'found': False}

    def _try_livescore(self, home_team, away_team, match_date):
        """Try livescore.com for results"""
        try:
            # Format date for livescore
            date_obj = datetime.strptime(match_date, '%Y-%m-%d')
            formatted = date_obj.strftime('%Y%m%d')

            url = f"https://prod-public-api.livescore.com/v1/api/app/date/soccer/{formatted}/0"
            headers = {
                'User-Agent': USER_AGENTS[0],
                'Accept': 'application/json',
            }

            response = self.session.get(url, headers=headers, timeout=10)

            if response.status_code == 200:
                data = response.json()
                stages = data.get('Stages', [])

                for stage in stages:
                    events = stage.get('Events', [])
                    for event in events:
                        e_home = event.get('T1', [{}])[0].get('Nm', '') if event.get('T1') else ''
                        e_away = event.get('T2', [{}])[0].get('Nm', '') if event.get('T2') else ''

                        if self._teams_match(home_team, e_home) and self._teams_match(away_team, e_away):
                            status = event.get('Eps', '')

                            if status in ['FT', 'AET', 'AP']:  # Full Time, After Extra Time, After Penalties
                                home_score = event.get('Tr1', 0)
                                away_score = event.get('Tr2', 0)

                                return {
                                    'found': True,
                                    'home_goals': int(home_score) if home_score else 0,
                                    'away_goals': int(away_score) if away_score else 0,
                                    'status': 'finished',
                                    'source': 'livescore.com',
                                    'tournament': stage.get('Cnm', '') + ' - ' + stage.get('Snm', ''),
                                }

        except Exception as e:
            logger.debug(f"Livescore error: {e}")

        return {'found': False}

    def _try_soccerway(self, home_team, away_team, match_date):
        """Try soccerway for results - fallback"""
        # Soccerway requires more complex scraping, skip for now
        return {'found': False}

    def _teams_match(self, team1, team2):
        """Fuzzy match team names"""
        t1 = self._normalize(team1)
        t2 = self._normalize(team2)

        if t1 == t2:
            return True
        if t1 in t2 or t2 in t1:
            return True

        # Check first significant word
        w1 = t1.split()[0] if t1 else ''
        w2 = t2.split()[0] if t2 else ''
        if len(w1) >= 4 and len(w2) >= 4 and (w1 == w2 or w1.startswith(w2) or w2.startswith(w1)):
            return True

        return False

    def _normalize(self, name):
        """Normalize team name for matching"""
        if not name:
            return ''
        n = name.lower().strip()
        # Remove common prefixes/suffixes
        prefixes = ['fc ', 'ac ', 'as ', 'ss ', 'sc ', 'rc ', 'us ', 'afc ', 'ssc ', 'og ', 'rb ', '1. ']
        for p in prefixes:
            if n.startswith(p):
                n = n[len(p):]
        suffixes = [' fc', ' sc', ' cf']
        for s in suffixes:
            if n.endswith(s):
                n = n[:-len(s)]
        return n.replace('-', ' ').replace("'", '').strip()


class FootballDataAPI:
    """football-data.org API for top 5 leagues"""

    BASE_URL = "https://api.football-data.org/v4"

    def __init__(self, api_key):
        self.api_key = api_key
        self.session = requests.Session()
        if api_key:
            self.session.headers.update({'X-Auth-Token': api_key})

    def get_matches(self, competition_code, date_from, date_to):
        """Get matches for a competition in date range"""
        if not self.api_key:
            return []

        try:
            url = f"{self.BASE_URL}/competitions/{competition_code}/matches"
            params = {'dateFrom': date_from, 'dateTo': date_to}

            response = self.session.get(url, params=params, timeout=10)

            if response.status_code == 200:
                return response.json().get('matches', [])
            elif response.status_code == 429:
                logger.warning("football-data.org rate limited")
                time.sleep(60)
            else:
                logger.warning(f"football-data.org returned {response.status_code}")

        except Exception as e:
            logger.error(f"football-data.org error: {e}")

        return []


class APIFootball:
    """API-Football via RapidAPI (optional)"""

    BASE_URL = "https://api-football-v1.p.rapidapi.com/v3"

    def __init__(self, api_key):
        self.api_key = api_key
        self.session = requests.Session()
        if api_key:
            self.session.headers.update({
                'X-RapidAPI-Key': api_key,
                'X-RapidAPI-Host': 'api-football-v1.p.rapidapi.com'
            })

    def get_fixtures_by_date(self, date_str):
        """Get all fixtures for a date"""
        if not self.api_key:
            return []

        try:
            url = f"{self.BASE_URL}/fixtures"
            params = {'date': date_str}

            response = self.session.get(url, params=params, timeout=10)

            if response.status_code == 200:
                data = response.json()
                return data.get('response', [])

        except Exception as e:
            logger.error(f"API-Football error: {e}")

        return []


class UniversalVerifier:
    """Main verifier that uses multiple sources"""

    def __init__(self):
        self.livescore = FlashscoreAPI()
        self.football_data = FootballDataAPI(FOOTBALL_DATA_API_KEY)
        self.api_football = APIFootball(RAPIDAPI_KEY)

        self.results_cache = {}  # Cache results to reduce API calls

    def get_match_result(self, home_team, away_team, match_date, league_code=''):
        """
        Get match result from multiple sources.
        Returns: dict with 'found', 'home_goals', 'away_goals', 'actual', 'source'
        """
        cache_key = f"{home_team}_{away_team}_{match_date}"

        if cache_key in self.results_cache:
            return self.results_cache[cache_key]

        result = {'found': False}

        # Try Livescore.com first (covers all leagues)
        logger.info(f"  Checking Livescore.com for {home_team} vs {away_team}...")
        result = self.livescore.search_match(home_team, away_team, match_date)

        if result.get('found') and result.get('status') == 'finished':
            result['actual'] = self._determine_outcome(result['home_goals'], result['away_goals'])
            self.results_cache[cache_key] = result
            return result

        # Try football-data.org for known competitions
        if league_code in FOOTBALL_DATA_COMPETITIONS and FOOTBALL_DATA_API_KEY:
            logger.info(f"  Checking football-data.org...")
            comp_code = FOOTBALL_DATA_COMPETITIONS[league_code]
            matches = self.football_data.get_matches(comp_code, match_date, match_date)

            for match in matches:
                m_home = match.get('homeTeam', {}).get('shortName', '')
                m_away = match.get('awayTeam', {}).get('shortName', '')

                if self._teams_match(home_team, m_home) and self._teams_match(away_team, m_away):
                    if match.get('status') == 'FINISHED':
                        score = match.get('score', {})
                        home_goals = score.get('fullTime', {}).get('home', 0)
                        away_goals = score.get('fullTime', {}).get('away', 0)

                        result = {
                            'found': True,
                            'home_goals': home_goals,
                            'away_goals': away_goals,
                            'actual': self._determine_outcome(home_goals, away_goals),
                            'status': 'finished',
                            'source': 'football-data.org'
                        }
                        self.results_cache[cache_key] = result
                        return result

            time.sleep(1)  # Rate limiting

        # Try API-Football if configured
        if RAPIDAPI_KEY and not result.get('found'):
            logger.info(f"  Checking API-Football...")
            fixtures = self.api_football.get_fixtures_by_date(match_date)

            for fixture in fixtures:
                teams = fixture.get('teams', {})
                f_home = teams.get('home', {}).get('name', '')
                f_away = teams.get('away', {}).get('name', '')

                if self._teams_match(home_team, f_home) and self._teams_match(away_team, f_away):
                    status = fixture.get('fixture', {}).get('status', {}).get('short', '')

                    if status == 'FT':  # Full Time
                        goals = fixture.get('goals', {})
                        home_goals = goals.get('home', 0)
                        away_goals = goals.get('away', 0)

                        result = {
                            'found': True,
                            'home_goals': home_goals,
                            'away_goals': away_goals,
                            'actual': self._determine_outcome(home_goals, away_goals),
                            'status': 'finished',
                            'source': 'api-football'
                        }
                        self.results_cache[cache_key] = result
                        return result

        return result

    def _determine_outcome(self, home_goals, away_goals):
        """Determine match outcome from score"""
        if home_goals > away_goals:
            return 'HOME'
        elif away_goals > home_goals:
            return 'AWAY'
        else:
            return 'DRAW'

    def _teams_match(self, team1, team2):
        """Fuzzy match team names"""
        return self.livescore._teams_match(team1, team2)


def load_verification_results():
    """Load current verification results"""
    path = os.path.join(DATA_DIR, 'verification_results.json')
    if os.path.exists(path):
        with open(path) as f:
            return json.load(f)
    return {
        'generated_at': datetime.now().isoformat(),
        'model_version': 'V4-Advanced',
        'summary': {'total': 0, 'correct': 0, 'accuracy': 0},
        'by_confidence': {},
        'by_league': {},
        'results': []
    }


def save_verification_results(data):
    """Save verification results with recalculated stats"""
    results = data.get('results', [])
    total = len(results)
    correct = sum(1 for r in results if r.get('correct', False))

    data['summary'] = {
        'total': total,
        'correct': correct,
        'accuracy': round(100 * correct / total, 1) if total > 0 else 0
    }
    data['generated_at'] = datetime.now().isoformat()

    # Recalculate by confidence
    by_confidence = {}
    for threshold in [40, 45, 50, 55, 60, 65, 70]:
        high_conf = [r for r in results if r.get('confidence', 0) >= threshold]
        if high_conf:
            high_conf_correct = sum(1 for r in high_conf if r.get('correct', False))
            by_confidence[threshold] = {
                'total': len(high_conf),
                'correct': high_conf_correct,
                'accuracy': round(100 * high_conf_correct / len(high_conf), 1)
            }
    data['by_confidence'] = by_confidence

    # Recalculate by league
    leagues = set(r.get('league') for r in results if r.get('league'))
    by_league = {}
    for league in leagues:
        league_results = [r for r in results if r.get('league') == league]
        if league_results:
            league_correct = sum(1 for r in league_results if r.get('correct', False))
            by_league[league] = {
                'name': league_results[0].get('league_name', league),
                'total': len(league_results),
                'correct': league_correct,
                'accuracy': round(100 * league_correct / len(league_results), 1)
            }
    data['by_league'] = by_league

    path = os.path.join(DATA_DIR, 'verification_results.json')
    with open(path, 'w') as f:
        json.dump(data, f, indent=2)

    logger.info(f"Saved: {correct}/{total} correct ({data['summary']['accuracy']}%)")


def load_pending_bets():
    """Load pending bets that need verification"""
    bets_path = os.path.join(DATA_DIR, 'virtual_bets.json')
    if not os.path.exists(bets_path):
        return {}

    with open(bets_path) as f:
        return json.load(f)


def get_matches_to_verify(days_back=3):
    """Get unique matches from pending bets that need verification"""
    bets = load_pending_bets()
    matches_to_check = []
    seen = set()

    for username, user_bets in bets.items():
        for bet in user_bets:
            if bet.get('status') != 'pending':
                continue

            for match in bet.get('matches', []):
                home = match.get('home_team', '')
                away = match.get('away_team', '')
                date = match.get('date', '')

                key = f"{home}_{away}_{date}"
                if key not in seen:
                    seen.add(key)
                    matches_to_check.append({
                        'home_team': home,
                        'away_team': away,
                        'date': date,
                        'league_code': match.get('league_code', ''),
                        'league_name': match.get('league_name', ''),
                        'prediction': match.get('prediction', ''),
                        'confidence': match.get('confidence', 50)
                    })

    return matches_to_check


def verify_all_pending():
    """Verify all pending matches from bets"""
    logger.info("=" * 60)
    logger.info("Universal Match Verifier")
    logger.info(f"Time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    logger.info("=" * 60)

    verifier = UniversalVerifier()
    verification = load_verification_results()

    # Get existing verified matches
    verified_keys = set()
    for r in verification.get('results', []):
        key = f"{r.get('home_team')}_{r.get('away_team')}_{r.get('date')}"
        verified_keys.add(key)

    # Get matches to verify
    matches = get_matches_to_verify()
    logger.info(f"Found {len(matches)} unique matches in pending bets")

    verified_count = 0
    not_found = 0
    already_done = 0

    for match in matches:
        home = match['home_team']
        away = match['away_team']
        date = match['date']

        key = f"{home}_{away}_{date}"

        # Skip if already verified
        if key in verified_keys:
            already_done += 1
            continue

        logger.info(f"\nVerifying: {home} vs {away} ({date})")

        # Get result
        result = verifier.get_match_result(home, away, date, match.get('league_code', ''))

        if result.get('found') and result.get('status') == 'finished':
            actual = result['actual']
            predicted = match.get('prediction', '')
            is_correct = actual == predicted

            logger.info(f"  Result: {result['home_goals']}-{result['away_goals']} ({actual})")
            logger.info(f"  Predicted: {predicted} | Correct: {is_correct}")
            logger.info(f"  Source: {result.get('source', 'unknown')}")

            # Add to verification
            entry = {
                'league': match.get('league_code', ''),
                'league_name': match.get('league_name', result.get('tournament', '')),
                'date': date,
                'time': match.get('time', ''),
                'home_team': home,
                'away_team': away,
                'home_goals': result['home_goals'],
                'away_goals': result['away_goals'],
                'actual': actual,
                'actual_code': 2 if actual == 'HOME' else (1 if actual == 'DRAW' else 0),
                'predicted': predicted,
                'predicted_code': 2 if predicted == 'HOME' else (1 if predicted == 'DRAW' else 0),
                'confidence': match.get('confidence', 50),
                'correct': is_correct,
                'verified_at': datetime.now().isoformat(),
                'source': result.get('source', 'unknown')
            }

            verification['results'].insert(0, entry)
            verified_keys.add(key)
            verified_count += 1

        elif result.get('status') == 'in_progress':
            logger.info(f"  Match still in progress")
        else:
            logger.info(f"  Result not found")
            not_found += 1

        # Rate limiting
        time.sleep(1)

    # Sort by date
    verification['results'] = sorted(
        verification['results'],
        key=lambda x: x.get('date', ''),
        reverse=True
    )

    # Save
    if verified_count > 0:
        save_verification_results(verification)

    logger.info("\n" + "=" * 60)
    logger.info("Verification Complete")
    logger.info(f"  New verified: {verified_count}")
    logger.info(f"  Not found: {not_found}")
    logger.info(f"  Already done: {already_done}")
    logger.info("=" * 60)


def main():
    force = '--force' in sys.argv

    # Parse days argument
    days = 3
    for i, arg in enumerate(sys.argv):
        if arg == '--days' and i + 1 < len(sys.argv):
            try:
                days = int(sys.argv[i + 1])
            except ValueError:
                pass

    verify_all_pending()


if __name__ == '__main__':
    main()
