Developing an AI system for detecting money laundering
Anti-Money Laundering (AML) is an area with strict regulatory requirements (Federal Law No. 115, FATF recommendations, EU AMLD) and a high cost of errors. False negatives (missed transactions) → regulator fines. False positives → blocked accounts of bona fide clients → complaints. ML reduces FPR by 20-40% compared to standard systems while maintaining or improving the detection rate.
Typology of money laundering schemes
Structuring (Smurfing): Splitting a large amount into multiple smaller transactions below the automatic control threshold (RUB 600,000 in the Russian Federation).
Layering: Multi-level transfers through a chain of accounts/jurisdictions to obscure the source of funds.
Integration: Investing in laundered funds into legitimate businesses (payment for services of affiliated structures, investments in real estate).
Red flags:
- Transactions in equal amounts (RUB 999,000 is below the threshold)
- Atypical activity: no - no - no - 50 transactions in one day
- Geographical discrepancies: client from Saratov, transactions in Singapore
- New account → high turnover → withdrawal → closure (one-day account)
Feature Engineering
Transactional characteristics:
def extract_transaction_features(transaction_history, lookback_days=90):
"""
Признаки на основе истории транзакций клиента
"""
df = transaction_history.copy()
features = {
# Объём транзакций
'total_amount_30d': df[df['days_ago'] <= 30]['amount'].sum(),
'transaction_count_30d': len(df[df['days_ago'] <= 30]),
'avg_transaction_amount': df['amount'].mean(),
'amount_std': df['amount'].std(),
# Временные паттерны
'transactions_per_active_day': len(df) / df['date'].nunique(),
'max_transactions_single_day': df.groupby('date').size().max(),
'night_transaction_ratio': (df['hour'] < 6).mean(),
'weekend_activity_change': calculate_weekend_ratio(df),
# Суммы около порогов
'near_threshold_pct': (df['amount'].between(550000, 610000)).mean(),
'round_amount_pct': (df['amount'] % 1000 == 0).mean(),
# Контрагенты
'unique_counterparties': df['counterparty_id'].nunique(),
'counterparty_concentration': df.groupby('counterparty_id')['amount'].sum().max() / df['amount'].sum(),
'new_counterparty_ratio': (df['is_new_counterparty'] == True).mean(),
# Географические
'foreign_transaction_ratio': (df['country'] != 'RU').mean(),
'high_risk_jurisdiction_pct': df['country'].isin(HIGH_RISK_COUNTRIES).mean()
}
return features
Network Features (Graph Features):
import networkx as nx
def compute_network_features(account_id, transaction_graph):
"""
Транзакции как граф: узлы = счета, рёбра = переводы
Центральные узлы в подозрительных сетях = высокий риск
"""
G = transaction_graph
# PageRank: насколько центральный узел в транзакционной сети
pagerank = nx.pagerank(G, weight='amount')
# Betweenness: является ли счёт промежуточным в длинных цепочках
betweenness = nx.betweenness_centrality(G, weight='amount')
# Кластеры: принадлежность к подозрительной группе счетов
communities = nx.community.greedy_modularity_communities(G.to_undirected())
community_risk = assess_community_risk(account_id, communities, G)
return {
'pagerank_score': pagerank.get(account_id, 0),
'betweenness_score': betweenness.get(account_id, 0),
'community_risk': community_risk,
'in_degree': G.in_degree(account_id),
'out_degree': G.out_degree(account_id)
}
ML models for AML
LightGBM with AML settings:
import lightgbm as lgb
from sklearn.metrics import roc_auc_score, average_precision_score
# Class imbalance: SAR (Suspicious Activity Report) < 1% транзакций
n_normal = (y_train == 0).sum()
n_sar = (y_train == 1).sum()
scale_pos_weight = n_normal / n_sar
model = lgb.LGBMClassifier(
n_estimators=500,
scale_pos_weight=scale_pos_weight,
learning_rate=0.05,
num_leaves=31,
min_child_samples=20, # предотвращение overfitting на редких паттернах
feature_fraction=0.8
)
# Threshold настройка: в AML recall важнее precision
# Регулятор ожидает низкий FNR (не пропускать реальное отмывание)
from sklearn.metrics import precision_recall_curve
precision, recall, thresholds = precision_recall_curve(y_val, y_scores)
# Выбираем threshold с recall >= 0.85
optimal_threshold = thresholds[np.argmax(recall >= 0.85)]
GNN for network analysis:
import torch
from torch_geometric.nn import GCNConv, SAGEConv
class AMLGraphNN(torch.nn.Module):
"""
Graph Neural Network для анализа транзакционных сетей
Более эффективен для layering-схем (цепочки переводов)
"""
def __init__(self, node_features, edge_features, hidden_dim=64):
super().__init__()
self.conv1 = SAGEConv(node_features, hidden_dim)
self.conv2 = SAGEConv(hidden_dim, hidden_dim)
self.edge_mlp = torch.nn.Linear(edge_features, hidden_dim)
self.classifier = torch.nn.Linear(hidden_dim * 2, 1)
def forward(self, node_features, edge_index, edge_features):
x = torch.relu(self.conv1(node_features, edge_index))
x = torch.relu(self.conv2(x, edge_index))
# Edge-level prediction: подозрительный ли перевод
edge_emb = self.edge_mlp(edge_features)
source_emb = x[edge_index[0]]
target_emb = x[edge_index[1]]
edge_repr = torch.cat([source_emb, target_emb], dim=1)
return torch.sigmoid(self.classifier(edge_repr))
Pravilovy + ML hybrid
Transaction Monitoring System (TMS):
class HybridAMLSystem:
def __init__(self, rule_engine, ml_model, threshold=0.5):
self.rules = rule_engine
self.model = ml_model
self.threshold = threshold
def evaluate_transaction(self, transaction, customer_history):
# Уровень 1: правиловые сценарии (детерминистические)
rule_alerts = self.rules.evaluate(transaction)
# Уровень 2: ML-скор риска
features = extract_transaction_features(customer_history)
ml_score = self.model.predict_proba([features])[0][1]
# Комбинация: любое правило ИЛИ высокий ML-скор
final_risk = max(
rule_alerts.max_risk_score if rule_alerts else 0,
ml_score
)
if final_risk > self.threshold:
return SARCandidate(
transaction=transaction,
risk_score=final_risk,
triggered_rules=rule_alerts,
ml_explanation=shap_explain(self.model, features)
)
Regulatory compliance
FZ-115 (Russia):
- Mandatory control for transactions > 600,000 rubles.
- Transfer of SAR to Rosfinmonitoring (FinCERT)
- Delivery time: 3 working days from the date of the transaction
FATF / EU AMLD:
- KYC (Know Your Customer) during onboarding
- Continuous monitoring throughout the relationship
- Risk-based approach: enhanced due diligence (EDD) for high-risk clients
Explainability for regulator:
import shap
def explain_sar_decision(model, features, feature_names):
"""
Регулятор требует обоснования каждого SAR
SHAP значения → текстовое описание причин
"""
explainer = shap.TreeExplainer(model)
shap_values = explainer.shap_values(features)
top_factors = sorted(
zip(feature_names, shap_values[0]),
key=lambda x: abs(x[1]),
reverse=True
)[:5]
explanation = "\n".join([
f"- {name}: {'повысил' if val > 0 else 'снизил'} риск на {abs(val):.2f}"
for name, val in top_factors
])
return explanation
Timeframe: Regulatory TMS + basic transaction features + LightGBM + Rosfinmonitoring reporting — 6-8 weeks. GNN for network analysis, graph community detection, explainability, EDD workflow, real-time scoring API — 4-5 months.







