"""
Pre-compute ALL features for ALL timesteps
Run this LOCALLY on server, then upload to Kaggle
"""

import numpy as np
import pandas as pd

print("=" * 70)
print("   Pre-computing Features for GPU Training")
print("=" * 70)

# Load price data - USE FULL HISTORICAL DATA
csv_path = '/var/www/html/bestrading.cuttalo.com/scripts/prices_BTC_EUR_full.csv'
print(f"Loading from: {csv_path}")
df = pd.read_csv(csv_path)
# The CSV has: timestamp,open,high,low,close,vwap,volume,count
prices = df['close'].values
print(f"Loaded {len(prices)} price points")
print(f"Price range: {min(prices):.2f} - {max(prices):.2f}")

def compute_features(prices, idx):
    """Compute exactly 24 features for a given index"""
    if idx < 60:
        return None

    price = prices[idx]
    hist = prices[max(0, idx-60):idx+1]
    features = []

    # === 1. Returns at multiple timeframes (4 features) ===
    for period in [1, 5, 10, 20]:
        if len(hist) > period:
            ret = (price - hist[-1-period]) / hist[-1-period]
            features.append(max(-0.1, min(0.1, ret)) * 10)
        else:
            features.append(0)

    # === 2. Volatility features (3 features) ===
    if len(hist) >= 20:
        returns = [(hist[i] - hist[i-1]) / max(hist[i-1], 0.0001) for i in range(1, min(20, len(hist)))]
        rv = np.sqrt(np.mean(np.array(returns) ** 2)) * np.sqrt(252 * 24 * 60) * 100
        features.append(min(2, rv / 50))
        features.append(min(2, rv / 50))
        features.append(min(2, abs(returns[0]) * 100 / 30 if returns else 0))
    else:
        features.extend([0, 0, 0])

    # === 3. Flow features (3 features) ===
    if len(hist) >= 5:
        flow = (price - hist[-5]) / hist[-5] * 50
        flow = max(-1, min(1, flow))
    else:
        flow = 0
    features.append(flow)
    features.append(0)
    features.append(flow * 0.8)

    # === 4. Spread and liquidity (2 features) ===
    features.append(0.2)
    features.append(0.5)

    # === 5. Entropy and data quality (2 features) ===
    features.append(0.5)
    features.append(1)

    # === 6. Momentum - MA crossover (1 feature) ===
    if len(hist) >= 10:
        ma5 = np.mean(hist[-5:])
        ma10 = np.mean(hist[-10:])
        features.append(max(-1, min(1, (ma5 - ma10) / ma10 * 100)))
    else:
        features.append(0)

    # === 7. RSI (1 feature) ===
    if len(hist) >= 15:
        gains, losses = [], []
        for i in range(1, min(14, len(hist))):
            change = hist[-i] - hist[-i-1]
            if change > 0:
                gains.append(change)
            elif change < 0:
                losses.append(abs(change))
        avg_gain = sum(gains) / 14.0 if gains else 0.0
        avg_loss = sum(losses) / 14.0 if losses else 0.0
        if avg_loss == 0:
            rsi = 100.0 if avg_gain > 0 else 50.0
        elif avg_gain == 0:
            rsi = 0.0
        else:
            rs = avg_gain / avg_loss
            rsi = 100.0 - (100.0 / (1.0 + rs))
        features.append((rsi - 50.0) / 50.0)
    else:
        features.append(0)

    # === 8. Flow momentum (1 feature) ===
    if len(hist) >= 6:
        flow_now = (price - hist[-2]) / max(hist[-2], 0.0001) * 50
        flow_5ago = (hist[-5] - hist[-6]) / max(hist[-6], 0.0001) * 50
        features.append(max(-1, min(1, flow_now - flow_5ago)))
    else:
        features.append(0)

    # === 9. Volatility regime (1 feature) ===
    features.append(1)

    # === POSITION INFO (6 features) - will be added dynamically during training ===
    # These are placeholders - actual values computed during simulation
    features.extend([0, 0, 0, 0, 0, 0])

    return features

# Pre-compute all features
print("\nPre-computing features for all timesteps...")
all_features = []
valid_indices = []

for idx in range(60, len(prices)):
    features = compute_features(prices, idx)
    if features and len(features) == 24:
        all_features.append(features)
        valid_indices.append(idx)

    if (idx - 60) % 2000 == 0:
        print(f"  Processed {idx - 60}/{len(prices) - 60} timesteps...")

features_array = np.array(all_features, dtype=np.float32)
prices_array = np.array([prices[i] for i in valid_indices], dtype=np.float32)
indices_array = np.array(valid_indices, dtype=np.int32)

print(f"\nFeatures shape: {features_array.shape}")
print(f"Prices shape: {prices_array.shape}")

# Save to files
output_dir = '/var/www/html/bestrading.cuttalo.com/scripts/kaggle-kernel-v2'
np.save(f'{output_dir}/features.npy', features_array)
np.save(f'{output_dir}/prices_aligned.npy', prices_array)
np.save(f'{output_dir}/indices.npy', indices_array)

print(f"\n✓ Saved features.npy ({features_array.shape})")
print(f"✓ Saved prices_aligned.npy ({prices_array.shape})")
print(f"✓ Saved indices.npy ({indices_array.shape})")

# Verify
print(f"\nSample features (first row): {features_array[0][:5]}...")
print(f"Sample features (last row): {features_array[-1][:5]}...")

print("\n" + "=" * 70)
print("   DONE! Now upload these files to Kaggle dataset")
print("=" * 70)
