AI Compensation Benchmarking System Implementation
AI compensation benchmarking automates comparison of salary rates with market data. The system parses open sources (hh.ru, LinkedIn, Glassdoor), normalizes data by grade and location, builds a predictive model of market rate, and generates recommendations for compensation correction.
Salary Data Collection and Normalization
import pandas as pd
import numpy as np
from anthropic import Anthropic
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.preprocessing import LabelEncoder
import re
class CompensationBenchmarkSystem:
def __init__(self):
self.llm = Anthropic()
self.model = None
self.encoders = {}
self.market_data = None
def normalize_job_title(self, titles: list[str]) -> list[str]:
"""Job title normalization via LLM"""
batch_size = 20
normalized = []
for i in range(0, len(titles), batch_size):
batch = titles[i:i + batch_size]
titles_str = "\n".join([f"{j+1}. {t}" for j, t in enumerate(batch)])
response = self.llm.messages.create(
model="claude-3-5-sonnet-20241022",
max_tokens=500,
messages=[{
"role": "user",
"content": f"""Normalize these job titles to standard categories.
Use format: Junior/Middle/Senior/Lead/Principal + Function.
Functions: Software Engineer, Data Engineer, ML Engineer, Data Scientist, Product Manager,
DevOps Engineer, QA Engineer, Frontend Engineer, Backend Engineer, Full Stack Engineer.
Titles:
{titles_str}
Return only normalized titles, one per line, same order."""
}]
)
normalized.extend(response.content[0].text.strip().split('\n'))
return normalized
def extract_grade_from_title(self, title: str) -> tuple[str, str]:
"""Grade and specialization extraction"""
grades = {
'junior': 1, 'intern': 0, 'trainee': 0,
'middle': 2, 'regular': 2,
'senior': 3, 'sr.': 3,
'lead': 4, 'tech lead': 4,
'principal': 5, 'staff': 5,
'architect': 6, 'distinguished': 7
}
title_lower = title.lower()
grade = 'middle' # default
grade_level = 2
for g, level in grades.items():
if g in title_lower:
grade = g
grade_level = level
break
return grade, grade_level
def build_market_dataset(self, raw_data: pd.DataFrame) -> pd.DataFrame:
"""
raw_data: title, salary_from, salary_to, location, company_size,
industry, remote, experience_years, skills (list)
"""
df = raw_data.copy()
# Salary normalization to single currency (USD)
df['salary_mid'] = (df['salary_from'].fillna(df['salary_to']) +
df['salary_to'].fillna(df['salary_from'])) / 2
# Normalized positions
df['normalized_title'] = self.normalize_job_title(df['title'].tolist())
df['grade'], df['grade_level'] = zip(*df['normalized_title'].apply(self.extract_grade_from_title))
# Categorical feature encoding
for col in ['grade', 'location', 'company_size', 'industry']:
le = LabelEncoder()
df[f'{col}_encoded'] = le.fit_transform(df[col].fillna('unknown'))
self.encoders[col] = le
# Skills as quantitative features
popular_skills = ['python', 'sql', 'machine learning', 'kubernetes',
'aws', 'spark', 'tensorflow', 'pytorch', 'java', 'go']
for skill in popular_skills:
df[f'skill_{skill}'] = df['skills'].apply(
lambda s: 1 if isinstance(s, list) and skill in [x.lower() for x in s] else 0
)
self.market_data = df
return df
Predictive Market Rate Model
def train_salary_model(self, market_df: pd.DataFrame):
"""Market salary prediction model training"""
feature_cols = (
['grade_level', 'experience_years', 'remote'] +
[col for col in market_df.columns if col.endswith('_encoded')] +
[col for col in market_df.columns if col.startswith('skill_')]
)
X = market_df[feature_cols].fillna(0)
y = market_df['salary_mid']
from sklearn.model_selection import cross_val_score
self.model = GradientBoostingRegressor(
n_estimators=300,
max_depth=5,
learning_rate=0.05,
subsample=0.8,
random_state=42
)
self.model.fit(X, y)
self.feature_cols = feature_cols
cv_scores = cross_val_score(self.model, X, y, cv=5, scoring='r2')
return {'r2': cv_scores.mean(), 'r2_std': cv_scores.std()}
def predict_market_salary(self, position: dict) -> dict:
"""
Market rate prediction for a position.
position: {title, location, company_size, industry, experience_years, skills, remote}
"""
# Feature preparation
grade, grade_level = self.extract_grade_from_title(position.get('title', ''))
features = {'grade_level': grade_level, 'experience_years': position.get('experience_years', 3)}
for col in ['location', 'company_size', 'industry']:
le = self.encoders.get(col)
val = position.get(col, 'unknown')
try:
features[f'{col}_encoded'] = le.transform([val])[0]
except ValueError:
features[f'{col}_encoded'] = 0 # Unknown category
skills = [s.lower() for s in position.get('skills', [])]
popular_skills = ['python', 'sql', 'machine learning', 'kubernetes',
'aws', 'spark', 'tensorflow', 'pytorch', 'java', 'go']
for skill in popular_skills:
features[f'skill_{skill}'] = 1 if skill in skills else 0
X = pd.DataFrame([features])[self.feature_cols].fillna(0)
predicted = self.model.predict(X)[0]
# Get percentiles from historical data
similar = self.market_data[
(self.market_data['grade_level'] == grade_level) &
(self.market_data['location'] == position.get('location', ''))
]['salary_mid']
return {
'predicted_salary': predicted,
'p25': np.percentile(similar, 25) if len(similar) > 10 else predicted * 0.85,
'p50': np.percentile(similar, 50) if len(similar) > 10 else predicted,
'p75': np.percentile(similar, 75) if len(similar) > 10 else predicted * 1.15,
'p90': np.percentile(similar, 90) if len(similar) > 10 else predicted * 1.25,
'sample_size': len(similar)
}
Compensation Gap Analysis
def analyze_compensation_gaps(self, employees_df: pd.DataFrame) -> dict:
"""
employees_df: employee_id, title, current_salary, location,
company_size, industry, experience_years, skills
"""
results = []
for _, emp in employees_df.iterrows():
market = self.predict_market_salary(emp.to_dict())
current = emp['current_salary']
gap_pct = (current - market['p50']) / market['p50'] * 100
results.append({
'employee_id': emp['employee_id'],
'title': emp['title'],
'current_salary': current,
'market_p50': market['p50'],
'market_p75': market['p75'],
'gap_pct': gap_pct,
'risk': 'high' if gap_pct < -15 else 'medium' if gap_pct < -5 else 'low',
'recommended_adjustment': max(0, market['p50'] - current)
})
df = pd.DataFrame(results)
# LLM interpretation
summary_stats = {
'total_employees': len(df),
'underpaid_high_risk': len(df[df['risk'] == 'high']),
'underpaid_medium_risk': len(df[df['risk'] == 'medium']),
'total_adjustment_needed': df['recommended_adjustment'].sum(),
'avg_gap_pct': df['gap_pct'].mean(),
'worst_gap_roles': df.nsmallest(5, 'gap_pct')[['title', 'gap_pct']].to_dict('records')
}
response = self.llm.messages.create(
model="claude-3-5-sonnet-20241022",
max_tokens=500,
messages=[{
"role": "user",
"content": f"""You are an HR Director. Analyze compensation gap.
Statistics:
{summary_stats}
Provide recommendations:
1. Priority groups for correction
2. Budget for compensation (correction amounts)
3. Personnel retention risks
4. Implementation timeframes for changes"""
}]
)
return {
'employees': df,
'summary': summary_stats,
'recommendations': response.content[0].text
}
Typical benchmarking cycle without AI: manual data collection (1-2 weeks), position normalization (3-5 days), gap analysis (2-3 days). With AI system: complete cycle in 4-6 hours. Savings from retention risk mitigation through timely correction: 1.5-3 annual salaries for each retained key employee.







