# üöÄ BestTrading - GPU Training (Colab)

Addestra modelli RANGING, VOLATILE, SCALPER con GPU.

**Come usare:**
1. Runtime ‚Üí Change runtime type ‚Üí GPU
2. Carica i dati (o connetti al DB)
3. Esegui tutte le celle
4. Scarica il modello addestrato

In [None]:
# Verifica GPU
!nvidia-smi
import torch
print(f"\n‚úÖ PyTorch: {torch.__version__}")
print(f"‚úÖ CUDA disponibile: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"‚úÖ GPU: {torch.cuda.get_device_name(0)}")

In [None]:
# Installa dipendenze
!pip install psycopg2-binary pandas numpy -q

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
from collections import deque
import random
import json
from datetime import datetime, timedelta

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

## üìä Opzione 1: Carica dati da file CSV

Se non puoi connetterti al DB, carica un CSV con colonna `price`

In [None]:
# OPZIONE 1: Carica da CSV
# Decommentare e caricare il file
# from google.colab import files
# uploaded = files.upload()
# df = pd.read_csv(list(uploaded.keys())[0])
# prices = df['price'].values.tolist()

## üìä Opzione 2: Connetti al DB PostgreSQL

In [None]:
# OPZIONE 2: Connetti al DB
import psycopg2

# Configura connessione (modifica con i tuoi dati)
DB_CONFIG = {
    'host': '91.134.4.25',  # IP pubblico server
    'port': 5432,
    'database': 'bestrading',
    'user': 'bestrading',
    'password': 'UQyvjfZIvUtpqlksPfKeq2MmXgGiG3y5'
}

PAIR = 'BTC/EUR'
DAYS = 7  # Ultimi N giorni di dati

def load_prices_from_db():
    conn = psycopg2.connect(**DB_CONFIG)
    cur = conn.cursor()
    
    end_date = datetime.now()
    start_date = end_date - timedelta(days=DAYS)
    
    cur.execute("""
        SELECT (state->>'mid')::float as price
        FROM state_snapshots
        WHERE pair = %s
          AND timestamp >= %s
          AND timestamp < %s
          AND (state->>'mid')::float > 0
        ORDER BY timestamp ASC
    """, (PAIR, start_date, end_date))
    
    prices = [row[0] for row in cur.fetchall()]
    cur.close()
    conn.close()
    return prices

try:
    prices = load_prices_from_db()
    print(f"‚úÖ Caricati {len(prices)} prezzi per {PAIR}")
    print(f"   Range: {min(prices):.2f} - {max(prices):.2f}")
except Exception as e:
    print(f"‚ùå Errore connessione DB: {e}")
    print("   Usa l'opzione CSV sopra")

## üß† Neural Network (PyTorch + GPU)

In [None]:
class TradingNN(nn.Module):
    def __init__(self, input_size, hidden_sizes, output_size=3):
        super().__init__()
        layers = []
        prev_size = input_size
        for h in hidden_sizes:
            layers.append(nn.Linear(prev_size, h))
            layers.append(nn.LeakyReLU(0.01))
            prev_size = h
        layers.append(nn.Linear(prev_size, output_size))
        self.net = nn.Sequential(*layers)
    
    def forward(self, x):
        return self.net(x)
    
    def get_action(self, state, epsilon=0):
        if random.random() < epsilon:
            return random.randint(0, 2)
        with torch.no_grad():
            state_t = torch.FloatTensor(state).unsqueeze(0).to(device)
            q_values = self(state_t)
            return q_values.argmax(1).item()

## üìà Trading Environments

In [None]:
class RangingTradingEnv:
    """Mean reversion per mercati laterali"""
    def __init__(self, prices, initial_capital=10000):
        self.prices = prices
        self.initial_capital = initial_capital
        self.fee_rate = 0.001
        self.reset()
    
    def reset(self):
        self.idx = 30
        self.capital = self.initial_capital
        self.position = 0
        self.entry_price = 0
        self.trades = 0
        self.wins = 0
        return self.get_state()
    
    def get_state(self):
        if self.idx >= len(self.prices):
            return None
        price = self.prices[self.idx]
        features = []
        
        # Returns
        for lb in [1, 2, 3, 5, 10]:
            if self.idx >= lb:
                features.append((price - self.prices[self.idx - lb]) / self.prices[self.idx - lb] * 100)
            else:
                features.append(0)
        
        # Bollinger Band position
        if self.idx >= 20:
            window = self.prices[self.idx - 20:self.idx]
            mean = np.mean(window)
            std = np.std(window)
            z_score = (price - mean) / std if std > 0 else 0
            features.append(max(-3, min(3, z_score)))
            features.append(std / mean * 100)
        else:
            features.extend([0, 0])
        
        # RSI
        if self.idx >= 15:
            gains, losses = 0, 0
            for i in range(1, 15):
                change = self.prices[self.idx - i + 1] - self.prices[self.idx - i]
                if change > 0:
                    gains += change
                else:
                    losses -= change
            rs = gains / losses if losses > 0 else 100
            rsi = 100 - (100 / (1 + rs))
            features.append((rsi - 50) / 50)
        else:
            features.append(0)
        
        # Position info
        features.append(self.position)
        pnl = 0
        if self.position != 0 and self.entry_price > 0:
            pnl = self.position * (price - self.entry_price) / self.entry_price
        features.append(pnl * 100)
        
        return features
    
    def step(self, action):
        price = self.prices[self.idx]
        reward = 0
        
        if self.position != 0:
            pnl = self.position * (price - self.entry_price) / self.entry_price
            if action == 0 or (action == 1 and self.position == -1) or (action == 2 and self.position == 1):
                net_pnl = pnl - self.fee_rate
                self.capital *= (1 + net_pnl)
                self.trades += 1
                if net_pnl > 0:
                    self.wins += 1
                reward = net_pnl * 100
                self.position = 0
                self.entry_price = 0
        
        if self.position == 0:
            if action == 1:
                self.position = 1
                self.entry_price = price
                self.capital *= (1 - self.fee_rate)
            elif action == 2:
                self.position = -1
                self.entry_price = price
                self.capital *= (1 - self.fee_rate)
        
        self.idx += 1
        next_state = self.get_state()
        done = next_state is None
        
        if done and self.position != 0:
            final_price = self.prices[-1]
            pnl = self.position * (final_price - self.entry_price) / self.entry_price
            net_pnl = pnl - self.fee_rate
            self.capital *= (1 + net_pnl)
            self.trades += 1
            if net_pnl > 0:
                self.wins += 1
            reward += net_pnl * 100
        
        return next_state, reward, done


class VolatileTradingEnv:
    """Per alta volatilit√† - cattura grandi movimenti"""
    def __init__(self, prices, initial_capital=10000):
        self.prices = prices
        self.initial_capital = initial_capital
        self.fee_rate = 0.001
        self.reset()
    
    def reset(self):
        self.idx = 30
        self.capital = self.initial_capital
        self.position = 0
        self.entry_price = 0
        self.trades = 0
        self.wins = 0
        return self.get_state()
    
    def get_state(self):
        if self.idx >= len(self.prices):
            return None
        price = self.prices[self.idx]
        features = []
        
        for lb in [1, 3, 5, 10, 20]:
            if self.idx >= lb:
                features.append((price - self.prices[self.idx - lb]) / self.prices[self.idx - lb] * 100)
            else:
                features.append(0)
        
        if self.idx >= 20:
            rets = [(self.prices[self.idx - i + 1] - self.prices[self.idx - i]) / self.prices[self.idx - i] 
                    for i in range(1, 21)]
            volatility = np.sqrt(np.mean(np.array(rets) ** 2)) * 100
            features.append(volatility)
            recent_vol = np.sqrt(np.mean(np.array(rets[:5]) ** 2))
            older_vol = np.sqrt(np.mean(np.array(rets[10:15]) ** 2))
            features.append((recent_vol - older_vol) / older_vol if older_vol > 0 else 0)
        else:
            features.extend([0, 0])
        
        if self.idx >= 10:
            ma5 = np.mean(self.prices[self.idx - 5:self.idx])
            ma10 = np.mean(self.prices[self.idx - 10:self.idx])
            features.append((ma5 - ma10) / ma10 * 100)
        else:
            features.append(0)
        
        features.append(self.position)
        pnl = 0
        if self.position != 0 and self.entry_price > 0:
            pnl = self.position * (price - self.entry_price) / self.entry_price
        features.append(pnl * 100)
        
        return features
    
    def step(self, action):
        price = self.prices[self.idx]
        reward = 0
        
        if self.position != 0:
            pnl = self.position * (price - self.entry_price) / self.entry_price
            if action == 0 or (action == 1 and self.position == -1) or (action == 2 and self.position == 1):
                net_pnl = pnl - self.fee_rate
                self.capital *= (1 + net_pnl)
                self.trades += 1
                if net_pnl > 0:
                    self.wins += 1
                reward = net_pnl * 100
                self.position = 0
                self.entry_price = 0
        
        if self.position == 0:
            if action == 1:
                self.position = 1
                self.entry_price = price
                self.capital *= (1 - self.fee_rate)
            elif action == 2:
                self.position = -1
                self.entry_price = price
                self.capital *= (1 - self.fee_rate)
        
        self.idx += 1
        next_state = self.get_state()
        done = next_state is None
        
        if done and self.position != 0:
            final_price = self.prices[-1]
            pnl = self.position * (final_price - self.entry_price) / self.entry_price
            net_pnl = pnl - self.fee_rate
            self.capital *= (1 + net_pnl)
            self.trades += 1
            if net_pnl > 0:
                self.wins += 1
            reward += net_pnl * 100
        
        return next_state, reward, done


class ScalperTradingEnv:
    """Trade veloci, micro movimenti"""
    def __init__(self, prices, initial_capital=10000):
        self.prices = prices
        self.initial_capital = initial_capital
        self.fee_rate = 0.0008
        self.max_holding = 30
        self.reset()
    
    def reset(self):
        self.idx = 20
        self.capital = self.initial_capital
        self.position = 0
        self.entry_price = 0
        self.entry_idx = 0
        self.trades = 0
        self.wins = 0
        return self.get_state()
    
    def get_state(self):
        if self.idx >= len(self.prices):
            return None
        price = self.prices[self.idx]
        features = []
        
        for lb in [1, 2, 3, 5]:
            if self.idx >= lb:
                features.append((price - self.prices[self.idx - lb]) / self.prices[self.idx - lb] * 100)
            else:
                features.append(0)
        
        if self.idx >= 10:
            rets = [(self.prices[self.idx - i + 1] - self.prices[self.idx - i]) / self.prices[self.idx - i] 
                    for i in range(1, 11)]
            features.append(np.sqrt(np.mean(np.array(rets) ** 2)) * 100)
        else:
            features.append(0)
        
        if self.idx >= 5:
            ma5 = np.mean(self.prices[self.idx - 5:self.idx])
            features.append((price - ma5) / ma5 * 100)
        else:
            features.append(0)
        
        features.append(self.position)
        pnl = 0
        if self.position != 0 and self.entry_price > 0:
            pnl = self.position * (price - self.entry_price) / self.entry_price
        features.append(pnl * 100)
        features.append(min(1, (self.idx - self.entry_idx) / self.max_holding) if self.position != 0 else 0)
        
        return features
    
    def step(self, action):
        price = self.prices[self.idx]
        reward = 0
        must_exit = self.position != 0 and (self.idx - self.entry_idx) >= self.max_holding
        
        if self.position != 0:
            pnl = self.position * (price - self.entry_price) / self.entry_price
            if must_exit or action == 0 or (action == 1 and self.position == -1) or (action == 2 and self.position == 1):
                net_pnl = pnl - self.fee_rate
                self.capital *= (1 + net_pnl)
                self.trades += 1
                if net_pnl > 0:
                    self.wins += 1
                reward = net_pnl * 150
                self.position = 0
                self.entry_price = 0
        
        if self.position == 0 and not must_exit:
            if action == 1:
                self.position = 1
                self.entry_price = price
                self.entry_idx = self.idx
                self.capital *= (1 - self.fee_rate)
            elif action == 2:
                self.position = -1
                self.entry_price = price
                self.entry_idx = self.idx
                self.capital *= (1 - self.fee_rate)
        
        self.idx += 1
        next_state = self.get_state()
        done = next_state is None
        
        if done and self.position != 0:
            final_price = self.prices[-1]
            pnl = self.position * (final_price - self.entry_price) / self.entry_price
            net_pnl = pnl - self.fee_rate
            self.capital *= (1 + net_pnl)
            self.trades += 1
            if net_pnl > 0:
                self.wins += 1
            reward += net_pnl * 150
        
        return next_state, reward, done

## üéØ Replay Buffer & Training

In [None]:
class ReplayBuffer:
    def __init__(self, max_size=30000):
        self.buffer = deque(maxlen=max_size)
    
    def add(self, exp):
        self.buffer.append(exp)
    
    def sample(self, n):
        return random.sample(self.buffer, min(n, len(self.buffer)))
    
    def __len__(self):
        return len(self.buffer)


def train_model(prices, model_type, EnvClass, input_size, hidden_sizes, episodes=300):
    print(f"\n{'‚ïê'*60}")
    print(f"   Training {model_type.upper()} model")
    print(f"{'‚ïê'*60}")
    print(f"üìà {len(prices)} price points")
    
    if len(prices) < 500:
        print("‚ùå Not enough data")
        return None
    
    buy_hold = (prices[-1] - prices[0]) / prices[0] * 100
    print(f"üìä Buy & Hold: {buy_hold:+.2f}%")
    
    # Create networks
    nn = TradingNN(input_size, hidden_sizes).to(device)
    target_nn = TradingNN(input_size, hidden_sizes).to(device)
    target_nn.load_state_dict(nn.state_dict())
    
    optimizer = optim.Adam(nn.parameters(), lr=0.0005)
    buffer = ReplayBuffer(30000)
    
    gamma = 0.95
    epsilon = 1.0
    epsilon_decay = 0.996
    epsilon_min = 0.03
    batch_size = 64
    
    best_return = float('-inf')
    best_model_state = None
    
    print(f"\nüéØ Training {episodes} episodes...\n")
    
    for ep in range(episodes):
        env = EnvClass(prices)
        state = env.reset()
        total_reward = 0
        
        while state is not None:
            action = nn.get_action(state, epsilon)
            next_state, reward, done = env.step(action)
            
            if next_state is not None:
                buffer.add((state, action, reward, next_state, done))
            total_reward += reward
            
            if len(buffer) >= batch_size:
                batch = buffer.sample(batch_size)
                states = torch.FloatTensor([e[0] for e in batch]).to(device)
                actions = torch.LongTensor([e[1] for e in batch]).to(device)
                rewards = torch.FloatTensor([e[2] for e in batch]).to(device)
                next_states = torch.FloatTensor([e[3] for e in batch]).to(device)
                dones = torch.FloatTensor([e[4] for e in batch]).to(device)
                
                current_q = nn(states).gather(1, actions.unsqueeze(1))
                next_q = target_nn(next_states).max(1)[0].detach()
                target_q = rewards + gamma * next_q * (1 - dones)
                
                loss = nn.functional.smooth_l1_loss(current_q.squeeze(), target_q)
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
            
            state = next_state
        
        epsilon = max(epsilon_min, epsilon * epsilon_decay)
        
        if ep % 10 == 0:
            target_nn.load_state_dict(nn.state_dict())
        
        if (ep + 1) % 50 == 0:
            eval_env = EnvClass(prices)
            eval_state = eval_env.reset()
            while eval_state is not None:
                action = nn.get_action(eval_state, 0)
                eval_state, _, _ = eval_env.step(action)
            
            ret = (eval_env.capital - 10000) / 10000 * 100
            win_rate = eval_env.wins / eval_env.trades * 100 if eval_env.trades > 0 else 0
            
            marker = '‚≠ê' if ret > best_return else '  '
            print(f"{marker} Ep {ep+1:3d}: Return={ret:+7.2f}%, Trades={eval_env.trades:3d}, Win={win_rate:4.1f}%")
            
            if ret > best_return:
                best_return = ret
                best_model_state = nn.state_dict().copy()
    
    if best_model_state:
        nn.load_state_dict(best_model_state)
    
    # Final eval
    print(f"\n{'‚îÄ'*60}")
    print(f"   FINAL RESULTS - {model_type.upper()}")
    print(f"{'‚îÄ'*60}")
    
    final_env = EnvClass(prices)
    state = final_env.reset()
    while state is not None:
        action = nn.get_action(state, 0)
        state, _, _ = final_env.step(action)
    
    final_return = (final_env.capital - 10000) / 10000 * 100
    win_rate = final_env.wins / final_env.trades * 100 if final_env.trades > 0 else 0
    
    print(f"   üí∞ Return: {final_return:+.2f}%")
    print(f"   üìä B&H: {buy_hold:+.2f}%")
    print(f"   {'‚úÖ BEATS B&H!' if final_return > buy_hold else '‚ùå B&H wins'}")
    print(f"   üéØ Trades: {final_env.trades}, Win Rate: {win_rate:.1f}%")
    
    return nn, {
        'model_type': model_type,
        'return': final_return,
        'buy_hold': buy_hold,
        'trades': final_env.trades,
        'win_rate': win_rate
    }

## üöÄ TRAIN ALL MODELS

In [None]:
# Train RANGING model
ranging_nn, ranging_metrics = train_model(
    prices, 'ranging', RangingTradingEnv, 
    input_size=10, hidden_sizes=[48, 24], episodes=300
)

In [None]:
# Train VOLATILE model
volatile_nn, volatile_metrics = train_model(
    prices, 'volatile', VolatileTradingEnv,
    input_size=10, hidden_sizes=[48, 24], episodes=300
)

In [None]:
# Train SCALPER model
scalper_nn, scalper_metrics = train_model(
    prices, 'scalper', ScalperTradingEnv,
    input_size=9, hidden_sizes=[32, 16], episodes=400
)

## üíæ Export Models (formato JS compatibile)

In [None]:
def export_model_js(nn, model_type, metrics):
    """Esporta in formato compatibile con train-specialized.mjs"""
    layers = []
    state_dict = nn.state_dict()
    
    # Convert PyTorch layers to JS format
    layer_idx = 0
    for name, param in state_dict.items():
        if 'weight' in name:
            weights = param.cpu().numpy().tolist()
            bias_name = name.replace('weight', 'bias')
            biases = state_dict[bias_name].cpu().numpy().tolist()
            layers.append({
                'weights': weights,
                'biases': biases
            })
    
    model_data = {
        'type': 'PureNN',
        'network': {'layers': layers},
        'metrics': {
            'totalReturn': metrics['return'],
            'buyHoldReturn': metrics['buy_hold'],
            'winRate': metrics['win_rate'],
            'totalTrades': metrics['trades'],
            'modelType': model_type,
            'trainedAt': datetime.now().isoformat(),
            'trainedOn': 'Colab GPU'
        }
    }
    
    filename = f"BTC_EUR_{model_type}_model.json"
    with open(filename, 'w') as f:
        json.dump(model_data, f)
    
    print(f"‚úÖ Saved {filename}")
    return filename

# Export all models
files = []
if ranging_nn:
    files.append(export_model_js(ranging_nn, 'ranging', ranging_metrics))
if volatile_nn:
    files.append(export_model_js(volatile_nn, 'volatile', volatile_metrics))
if scalper_nn:
    files.append(export_model_js(scalper_nn, 'scalper', scalper_metrics))

In [None]:
# Download models
from google.colab import files
for f in files:
    files.download(f)
print("\nüéâ Tutti i modelli scaricati!")

## üì§ Opzione: Salva direttamente nel DB

In [None]:
def save_to_db(nn, model_type, metrics, pair='BTC/EUR'):
    """Salva modello direttamente nel database"""
    try:
        conn = psycopg2.connect(**DB_CONFIG)
        cur = conn.cursor()
        
        # Build model data (same as export)
        layers = []
        state_dict = nn.state_dict()
        for name, param in state_dict.items():
            if 'weight' in name:
                weights = param.cpu().numpy().tolist()
                bias_name = name.replace('weight', 'bias')
                biases = state_dict[bias_name].cpu().numpy().tolist()
                layers.append({'weights': weights, 'biases': biases})
        
        model_data = {
            'type': 'PureNN',
            'network': {'layers': layers},
            'metrics': {
                'totalReturn': metrics['return'],
                'buyHoldReturn': metrics['buy_hold'],
                'winRate': metrics['win_rate'],
                'totalTrades': metrics['trades'],
                'modelType': model_type,
                'trainedAt': datetime.now().isoformat(),
                'trainedOn': 'Colab GPU'
            }
        }
        
        model_name = f"{pair.replace('/', '_')}_{model_type}"
        
        cur.execute("""
            INSERT INTO ml_models (pair, weights, accuracy, training_samples, last_trained_at)
            VALUES (%s, %s, %s, %s, NOW())
            ON CONFLICT (pair) DO UPDATE SET
                weights = EXCLUDED.weights,
                accuracy = EXCLUDED.accuracy,
                training_samples = EXCLUDED.training_samples,
                last_trained_at = NOW()
        """, (model_name, json.dumps(model_data), metrics['win_rate'] / 100, len(prices)))
        
        conn.commit()
        cur.close()
        conn.close()
        
        print(f"‚úÖ Saved {model_name} to database")
    except Exception as e:
        print(f"‚ùå Error saving to DB: {e}")

# Uncomment to save directly to DB
# save_to_db(ranging_nn, 'ranging', ranging_metrics)
# save_to_db(volatile_nn, 'volatile', volatile_metrics)
# save_to_db(scalper_nn, 'scalper', scalper_metrics)