Development of an AI system for analyzing communication quality
Network quality of service (QoS/QoE) is a multidimensional characteristic: latency, packet loss, jitter, and throughput. Machine learning systems analyze these parameters in real time, detect degradation before user complaints, and localize the problem in the network topology.
Communication quality metrics
Hierarchy of QoS indicators:
network_kpis = {
# Физический уровень
'signal_to_noise_ratio_db': 'SNR < 10 dB = деградация',
'bit_error_rate': 'BER > 1e-6 = проблема',
'optical_power_dbm': 'для оптики: отклонение ±3 dB от нормы',
# Транспортный уровень
'packet_loss_pct': '> 1% = заметно для голоса, > 0.1% критично для видео',
'latency_ms': 'RTT: < 20 мс = отлично, > 100 мс = деградация голоса',
'jitter_ms': '> 30 мс = разрыв голосового вызова',
'throughput_mbps': 'отклонение от SLA > 20% = нарушение',
# Прикладной уровень (QoE)
'mos_score': 'Mean Opinion Score 1-5: < 3.5 = жалобы',
'video_buffering_ratio': '> 2% = заметно пользователю',
'call_setup_success_rate': '< 99% = проблема в IMS/SS7'
}
Anomaly in KPI time series
EWMA + adaptive thresholds:
import numpy as np
import pandas as pd
class NetworkKPIMonitor:
def __init__(self, alpha=0.1, sigma_multiplier=3.0):
self.alpha = alpha
self.sigma_multiplier = sigma_multiplier
self.ewma_mean = {}
self.ewma_var = {}
def update(self, kpi_name: str, value: float) -> dict:
if kpi_name not in self.ewma_mean:
self.ewma_mean[kpi_name] = value
self.ewma_var[kpi_name] = 0.0
return {'status': 'initializing'}
# Обновление EWMA
prev_mean = self.ewma_mean[kpi_name]
self.ewma_mean[kpi_name] = (
self.alpha * value + (1 - self.alpha) * prev_mean
)
self.ewma_var[kpi_name] = (
(1 - self.alpha) * (self.ewma_var[kpi_name] +
self.alpha * (value - prev_mean)**2)
)
std = np.sqrt(self.ewma_var[kpi_name])
upper_bound = self.ewma_mean[kpi_name] + self.sigma_multiplier * std
lower_bound = self.ewma_mean[kpi_name] - self.sigma_multiplier * std
anomaly = value > upper_bound or value < lower_bound
return {
'kpi': kpi_name,
'value': value,
'expected': self.ewma_mean[kpi_name],
'upper_bound': upper_bound,
'anomaly': anomaly,
'deviation_sigma': (value - self.ewma_mean[kpi_name]) / (std + 1e-9)
}
Correlation analysis of degradation
Topological localization of the problem:
import networkx as nx
def localize_network_degradation(anomaly_events: list,
topology_graph: nx.Graph) -> dict:
"""
Если аномалии одновременно на нескольких сегментах —
ищем общий upstream узел (root cause).
"""
# Группируем аномальные узлы
degraded_nodes = set(e['node_id'] for e in anomaly_events
if e['anomaly'] and e['timestamp'] == max(e['timestamp']
for e in anomaly_events))
# Для каждой пары деградированных узлов — найти LCA (Least Common Ancestor)
suspect_nodes = {}
for u, v in combinations(degraded_nodes, 2):
try:
paths = list(nx.all_simple_paths(topology_graph, u, v, cutoff=5))
for path in paths:
for node in path:
if node not in degraded_nodes:
suspect_nodes[node] = suspect_nodes.get(node, 0) + 1
except nx.NetworkXNoPath:
pass
if suspect_nodes:
root_cause = max(suspect_nodes, key=suspect_nodes.get)
return {
'root_cause_node': root_cause,
'confidence': suspect_nodes[root_cause] / len(degraded_nodes),
'affected_downstream': list(degraded_nodes)
}
return {'root_cause_node': None, 'affected_downstream': list(degraded_nodes)}
MOS Prediction for Voice/Video
User Experience Prediction:
from sklearn.ensemble import GradientBoostingRegressor
def build_mos_prediction_model(network_samples: pd.DataFrame) -> GradientBoostingRegressor:
"""
Предсказание MOS из сетевых метрик — без субъективного опроса пользователей.
E-model ITU-T G.107 как baseline, ML улучшает точность.
"""
features = [
'packet_loss_pct',
'latency_ms',
'jitter_ms',
'codec_type_encoded', # G.711=0, G.722=1, Opus=2
'plc_effectiveness' # Packet Loss Concealment quality
]
# E-model baseline как дополнительный признак
network_samples['e_model_r_factor'] = network_samples.apply(
lambda row: compute_e_model_r_factor(
row['latency_ms'], row['packet_loss_pct'], row['jitter_ms']
), axis=1
)
model = GradientBoostingRegressor(n_estimators=200, max_depth=4)
model.fit(network_samples[features + ['e_model_r_factor']],
network_samples['mos_score'])
return model
def compute_e_model_r_factor(latency, loss_pct, jitter):
"""Упрощённая E-model формула ITU-T G.107"""
r_base = 93.2
r_latency = 0.024 * latency + 0.11 * max(0, latency - 177.3)
r_loss = 11 + 40 * np.log(1 + 10 * loss_pct / 100)
return max(0, r_base - r_latency - r_loss)
SLA Compliance Tracking
Automatic SLA Violation Counting:
def track_sla_compliance(kpi_history: pd.DataFrame,
sla_thresholds: dict,
contract_id: str) -> dict:
violations = {}
for kpi, threshold in sla_thresholds.items():
if kpi not in kpi_history.columns:
continue
total_minutes = len(kpi_history)
violation_minutes = len(kpi_history[kpi_history[kpi] > threshold])
availability = (total_minutes - violation_minutes) / total_minutes * 100
violations[kpi] = {
'sla_target': threshold,
'availability_pct': round(availability, 4),
'violation_minutes': violation_minutes,
'sla_breach': availability < 99.9 # стандартный SLA для операторов
}
return {
'contract_id': contract_id,
'period': f"{kpi_history.index.min()} — {kpi_history.index.max()}",
'kpi_compliance': violations,
'overall_compliance': all(not v['sla_breach'] for v in violations.values())
}
Integration with OSS/BSS stack: Northbound API for Nokia NetAct, Ericsson ENM, and Huawei U2000. SNMP/NETCONF for active equipment. Grafana + InfluxDB for visualization, PagerDuty / Zabbix for alerting.
Timeframe: EWMA KPI monitoring + anomalies + dashboard — 2-3 weeks. Topological localization, MOS prediction, SLA compliance tracker, OSS/BSS integration — 2-3 months.







