Development of Volatility Forecasting Model
Volatility forecasting is a separate and highly valued task in trading. Knowing expected volatility allows: adapting position size, choosing optimal spread width for market-making, proper option pricing and risk management planning.
Volatility Metrics
Realized Volatility — historical volatility computed from historical returns:
import numpy as np
import pandas as pd
def realized_volatility(returns, window=24, annualize=True):
"""
Parkinson estimate of RV — standard for daily data
"""
rv = returns.rolling(window).std()
if annualize:
rv = rv * np.sqrt(365 * 24) # annualized for hourly data
return rv
def realized_volatility_parkinson(highs, lows, window=24, annualize=True):
"""
Parkinson estimator uses High/Low — more efficient estimate
"""
log_hl = (np.log(highs) - np.log(lows)) ** 2
rv_parkinson = np.sqrt(log_hl.rolling(window).mean() / (4 * np.log(2)))
if annualize:
rv_parkinson = rv_parkinson * np.sqrt(365 * 24)
return rv_parkinson
def realized_volatility_garman_klass(opens, highs, lows, closes, window=24):
"""
Garman-Klass: uses O/H/L/C — most efficient estimator
"""
log_hl = 0.5 * (np.log(highs/lows)) ** 2
log_co = (2*np.log(2) - 1) * (np.log(closes/opens)) ** 2
gk = np.sqrt((log_hl - log_co).rolling(window).mean() * 365 * 24)
return gk
GARCH Models
GARCH(1,1) — classic conditional volatility model. Today's volatility depends on yesterday's volatility and yesterday's squared return:
from arch import arch_model
import warnings
def fit_garch_model(returns, model_type='GARCH', p=1, q=1, vol='GARCH', dist='t'):
"""
dist='t': Student's t-distribution better describes crypto fat tails
"""
# returns in percentage for numerical stability
returns_pct = returns * 100
model = arch_model(
returns_pct,
vol=vol, # 'GARCH', 'EGARCH', 'GJR-GARCH'
p=p, q=q,
dist=dist, # 'normal', 't', 'ged'
mean='Constant'
)
with warnings.catch_warnings():
warnings.simplefilter('ignore')
result = model.fit(disp='off', options={'maxiter': 500})
return result
def forecast_volatility_garch(garch_result, horizon=24):
"""Forecast volatility for next N periods"""
forecast = garch_result.forecast(horizon=horizon, reindex=False)
# Conditional variance (then take sqrt for std)
variance_forecast = forecast.variance.values[-1]
vol_forecast = np.sqrt(variance_forecast) / 100 # back to units
return vol_forecast
# EGARCH: asymmetric model (bad news > good news in volatility)
# GJR-GARCH: similarly, leverage effect
ML Models for Volatility
HAR-RV (Heterogeneous Autoregressive Realized Volatility): linear model with different horizons:
def create_har_features(realized_vol, horizons=[1, 5, 22]):
"""
HAR model: RV is predicted through its own lagged averages
"""
features = {}
for h in horizons:
features[f'rv_avg_{h}d'] = realized_vol.rolling(h).mean().shift(1)
return pd.DataFrame(features).dropna()
from sklearn.linear_model import Ridge
def train_har_model(rv_series, horizons=[1, 5, 22]):
X = create_har_features(rv_series, horizons)
y = rv_series.shift(-1).dropna() # predict tomorrow's RV
# Align indices
common_idx = X.index.intersection(y.index)
X, y = X.loc[common_idx], y.loc[common_idx]
model = Ridge(alpha=0.1)
model.fit(X, y)
return model
LSTM for Volatility: captures nonlinear patterns and long-term memory:
import torch
import torch.nn as nn
class VolatilityLSTM(nn.Module):
def __init__(self, input_size=10, hidden_size=64, output_horizon=24):
super().__init__()
self.lstm = nn.LSTM(input_size, hidden_size, 2, batch_first=True, dropout=0.2)
self.fc = nn.Sequential(
nn.Linear(hidden_size, 32),
nn.ReLU(),
nn.Linear(32, output_horizon) # predict full horizon
)
def forward(self, x):
out, _ = self.lstm(x)
return self.fc(out[:, -1, :])
Realized GARCH (Combined Approach)
Combines advantages of GARCH and Realized Volatility:
def realized_garch_forecast(returns, rv_history, omega=0.1, alpha=0.1,
beta=0.8, gamma=0.5):
"""
Simplified Realized GARCH:
h_t = omega + alpha * rv_{t-1} + beta * h_{t-1} + gamma * z_{t-1}^2
"""
h = np.zeros(len(returns))
h[0] = rv_history.iloc[0] ** 2
for t in range(1, len(returns)):
h[t] = (omega +
alpha * rv_history.iloc[t-1] ** 2 +
beta * h[t-1] +
gamma * returns.iloc[t-1] ** 2)
return np.sqrt(h)
Evaluating Volatility Forecast Quality
def evaluate_vol_forecast(actual_rv, predicted_rv):
"""Metrics for evaluating forecast quality"""
mse = np.mean((actual_rv - predicted_rv) ** 2)
mae = np.mean(np.abs(actual_rv - predicted_rv))
# Mincer-Zarnowitz regression (forecast unbiasedness)
from scipy.stats import linregress
slope, intercept, r_value, p_value, _ = linregress(predicted_rv, actual_rv)
# QLIKE (quasi-likelihood) — standard metric for vol forecasting
qlike = np.mean(actual_rv / predicted_rv - np.log(actual_rv / predicted_rv) - 1)
return {
'mse': mse, 'mae': mae, 'r_squared': r_value**2,
'mz_slope': slope, 'mz_intercept': intercept,
'qlike': qlike
}
Application in Trading
Position sizing: at high predicted volatility → reduce position size:
def vol_adjusted_position_size(base_size, predicted_vol, target_vol=0.02):
scaling = target_vol / max(predicted_vol, 1e-8)
return base_size * min(scaling, 2.0) # max 2x of baseline
Dynamic stop-loss: stop at N × predicted_vol from entry.
Options pricing: for crypto options predicted_vol is used as implied volatility estimate.
Developing a volatility forecasting system: GARCH/EGARCH, HAR-RV, LSTM models, ensemble averaging, integration with position sizing and forecast accuracy monitoring.







