AI match-fixing detection system
Match-fixing is a threat to the integrity of sports and financial crimes through betting. The AI system analyzes atypical odds movements, bettor behavior, and game statistics, identifying correlations that are impossible without prior collusion.
Data sources
Multi-channel analysis:
data_sources = {
'betting_markets': {
'data': 'коэффициенты 50+ букмекеров + Betfair биржа',
'metrics': ['odds_movement', 'trading_volume', 'market_liquidity'],
'frequency': 'real-time (30-секундные снапшоты)'
},
'in_game_data': {
'data': 'статистика матча: голы, желтые карточки, замены, expected goals',
'metrics': ['xG_deviation', 'shot_quality', 'defensive_intensity'],
'provider': 'Opta, StatsBomb, Wyscout'
},
'player_data': {
'data': 'физические показатели GPS/heart rate (если доступны)',
'metrics': ['sprint_count', 'distance_covered', 'heart_rate_zones']
},
'social_media': {
'data': 'ставки в социальных сетях, инсайдерская информация',
'nlp': 'детекция "tips" до матча, необычная уверенность'
}
}
Analysis of the movement of coefficients
Abnormal Movement Detection odds:
import numpy as np
import pandas as pd
from scipy.stats import zscore
def analyze_odds_movement(odds_history: pd.DataFrame,
match_id: str) -> dict:
"""
Нормальное движение коэффициентов: реакция на новости (травмы, состав),
балансировка позиций букмекером.
Аномальное: резкое движение без публичных новостей = инсайдерская ставка.
"""
match_odds = odds_history[odds_history['match_id'] == match_id].sort_values('timestamp')
if len(match_odds) < 10:
return {'status': 'insufficient_data'}
# Ключевые метрики движения
opening_odds_h = match_odds.iloc[0]['odds_home']
closing_odds_h = match_odds.iloc[-1]['odds_home']
movement_pct = abs(closing_odds_h - opening_odds_h) / opening_odds_h * 100
# Timing: когда происходит движение?
hours_before_kickoff = (match_odds['kickoff'] - match_odds['timestamp']).dt.total_seconds() / 3600
# Аномальное движение ночью или за 6+ часов до матча (не ответ на состав)
early_movement_mask = hours_before_kickoff > 12
early_movement_pct = match_odds[early_movement_mask]['odds_home'].pct_change().abs().sum()
# Steam move: одновременное движение у всех букмекеров (синхронный сигнал)
if 'bookmaker_id' in match_odds.columns:
bookmaker_movements = match_odds.groupby('bookmaker_id')['odds_home'].pct_change().abs()
sync_movement = (bookmaker_movements > 0.03).groupby(match_odds['timestamp']).mean()
steam_detected = (sync_movement > 0.7).any() # > 70% букмекеров двигаются одновременно
else:
steam_detected = False
# Comparison с историческим baseline для этого типа матча
historical_movement_mean = 5.0 # % — типичное движение
historical_movement_std = 2.5
movement_z = (movement_pct - historical_movement_mean) / historical_movement_std
return {
'match_id': match_id,
'total_movement_pct': round(movement_pct, 2),
'early_movement_pct': round(early_movement_pct * 100, 2),
'steam_move_detected': steam_detected,
'movement_z_score': round(movement_z, 2),
'anomaly': movement_z > 3 or (early_movement_pct > 0.05 and steam_detected),
'risk_level': 'high' if movement_z > 4 else ('medium' if movement_z > 3 else 'low')
}
Game statistics analysis
Deviation from expected performance:
def analyze_performance_deviation(match_stats: dict,
player_id: str,
season_stats: pd.DataFrame) -> dict:
"""
Игрок, возможно, играет ниже своих возможностей.
xG-модель: насколько сложны были удары? Нормально ли не забить?
"""
# Сезонная точность игрока
player_season = season_stats[season_stats['player_id'] == player_id]
if player_season.empty:
return {'status': 'no_baseline'}
season_conversion_rate = player_season['goals'].sum() / (player_season['shots_on_target'].sum() + 1)
# В текущем матче
match_xG = match_stats.get('xG', 0)
match_goals = match_stats.get('goals', 0)
match_shots = match_stats.get('shots', 0)
# xG overperformance: если xG высокий, но голей нет
xg_performance = match_goals - match_xG # negative = underperformance
# Физическая интенсивность
distance_covered = match_stats.get('distance_km', 10.5)
season_avg_distance = player_season['distance_km'].mean()
distance_deviation = (distance_covered - season_avg_distance) / (season_avg_distance + 1e-9)
# Необычно низкая вовлечённость
touches = match_stats.get('touches', 50)
season_avg_touches = player_season['touches'].mean()
touch_deviation = (touches - season_avg_touches) / (season_avg_touches + 1e-9)
# Composite score
underperformance_score = (
max(0, -xg_performance / 0.5) * 0.4 + # не реализовал xG
max(0, -distance_deviation) * 0.3 + # мало бегал
max(0, -touch_deviation) * 0.3 # мало касаний
)
return {
'player_id': player_id,
'xg_performance': round(xg_performance, 3),
'distance_deviation': round(distance_deviation, 3),
'touch_deviation': round(touch_deviation, 3),
'underperformance_score': round(underperformance_score, 3),
'suspicious': underperformance_score > 0.6
}
Betting patterns
Coordinated Bid Detection:
def detect_coordinated_betting(bet_records: pd.DataFrame, match_id: str) -> dict:
"""
Синхронные крупные ставки от разных аккаунтов = скоординированная игра.
Один бенефициар, несколько аккаунтов — split betting.
"""
match_bets = bet_records[bet_records['match_id'] == match_id]
# Временная кластеризация крупных ставок
large_bets = match_bets[match_bets['stake_usd'] > match_bets['stake_usd'].quantile(0.95)]
if len(large_bets) < 3:
return {'sufficient_data': False}
# Ставки за короткий промежуток (< 5 минут)
large_bets_sorted = large_bets.sort_values('timestamp')
time_diffs = large_bets_sorted['timestamp'].diff().dt.total_seconds()
rapid_cluster = (time_diffs < 300).sum()
# Похожие суммы (round numbers)
round_number_bets = large_bets['stake_usd'].apply(
lambda x: x % 100 == 0 or x % 1000 == 0
).mean()
# Один outcome — необычная концентрация
outcome_concentration = large_bets['outcome_bet'].value_counts().max() / len(large_bets)
coordination_score = (
min(1, rapid_cluster / 5) * 0.4 +
round_number_bets * 0.3 +
(outcome_concentration - 0.5) * 2 * 0.3
)
return {
'match_id': match_id,
'large_bets_count': len(large_bets),
'rapid_cluster_count': rapid_cluster,
'round_number_ratio': round(round_number_bets, 2),
'outcome_concentration': round(outcome_concentration, 2),
'coordination_score': round(coordination_score, 3),
'flag_for_investigation': coordination_score > 0.6
}
Integration: ESTS (Sports Betting Integrity), Sportradar Fraud Detection API, ESSA (European Sports Security Association). Notification to the Sports Governing Body (FIFA, UEFA, national federations) upon detection of critical patterns.
Deadlines: Odds movement detection + performance baseline + dashboard — 4-5 weeks. Coordinated betting graph, social media NLP, real-time alerts, ESSA integration — 3-4 months.







