AI Auto-Generation of Unit Tests
Everyone writes unit tests, but few do it systematically — especially for legacy code with no tests and no clear starting point. An AI generator creates pytest/Jest/JUnit tests directly from source code, analyzing logic through AST and discovering scenarios humans would miss.
Python: pytest Generation via AST + LLM
import ast
import inspect
from langchain_openai import ChatOpenAI
from pathlib import Path
class UnitTestGenerator:
PYTEST_PROMPT = """Generate pytest unit tests for the function.
Function code:
```python
{function_code}
Module dependencies: {imports}
AST analysis:
- Cyclomatic complexity: {complexity}
- Conditional branches: {branches}
- External dependency calls: {external_calls}
Test requirements:
- Use @pytest.mark.parametrize for data sets
- Mock external dependencies via pytest-mock (mocker.patch)
- Test all branches: every if/elif/else condition
- Test raises: for every raise in code
- Use fixtures for reusable objects
- Test names: test_{function_name}_{scenario} (e.g. test_calculate_tax_zero_income)
Return only test code with import section."""
def __init__(self):
self.llm = ChatOpenAI(model="gpt-4o", temperature=0.1)
def generate_tests_for_file(self, source_path: str) -> str:
source = Path(source_path).read_text(encoding="utf-8")
tree = ast.parse(source)
all_tests = []
for node in ast.walk(tree):
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
if node.name.startswith("_"):
continue # skip private methods
func_source = ast.get_source_segment(source, node)
analysis = self._analyze_function(node, source)
tests = self._generate_function_tests(func_source, analysis, source)
all_tests.append(tests)
return self._merge_test_files(all_tests, source_path)
def _analyze_function(self, node, source: str) -> dict:
"""AST analysis of function before generation"""
branches = []
external_calls = []
raises = []
for child in ast.walk(node):
if isinstance(child, ast.If):
cond = ast.get_source_segment(source, child.test)
branches.append(cond)
elif isinstance(child, ast.Call):
if isinstance(child.func, ast.Attribute):
call = f"{ast.get_source_segment(source, child.func.value)}.{child.func.attr}"
external_calls.append(call)
elif isinstance(child, ast.Raise):
if child.exc:
raises.append(ast.get_source_segment(source, child.exc))
return {
"complexity": self._cyclomatic_complexity(node),
"branches": branches[:5], # top 5
"external_calls": list(set(external_calls))[:5],
"raises": raises
}
def _generate_function_tests(self, func_code: str, analysis: dict, source: str) -> str:
imports = self._extract_imports(source)
result = self.llm.invoke(
self.PYTEST_PROMPT.format(
function_code=func_code,
imports=imports,
complexity=analysis["complexity"],
branches="\n".join(analysis["branches"]),
external_calls="\n".join(analysis["external_calls"])
)
)
return result.content
### TypeScript/Jest Generation
```python
JEST_PROMPT = """Generate Jest unit tests for TypeScript function.
```typescript
{function_code}
Requirements:
- Use describe/it blocks
- jest.fn() for mocks
- beforeEach for setup
- expect().toBe() / toEqual() / toThrow()
- Test should import only what's needed
- Don't use any — only strict typing in tests
Coverage: successful scenarios, boundary values, errors. Return TypeScript code only."""
async def generate_jest_tests(self, ts_function: str) -> str:
result = await self.llm.ainvoke(
self.JEST_PROMPT.format(function_code=ts_function)
)
return result.content
### Automatic Problem Detection in Generated Tests
Generated tests sometimes have syntax errors or incorrect assertions. Add validation loop:
```python
import subprocess
class TestValidator:
def validate_and_fix(self, test_code: str, source_file: str) -> str:
"""Runs tests and fixes errors in loop"""
temp_test_file = "/tmp/test_generated.py"
Path(temp_test_file).write_text(test_code)
for attempt in range(3):
result = subprocess.run(
["pytest", temp_test_file, "-x", "--tb=short",
f"--rootdir={Path(source_file).parent}"],
capture_output=True, text=True, timeout=60
)
if result.returncode == 0:
break
# Fix errors via LLM
fix_prompt = f"""Fix pytest tests that are failing.
Tests:
{test_code}
Error:
{result.stdout[-2000:]}
Return fixed tests (code only)."""
test_code = self.llm.invoke(fix_prompt).content
Path(temp_test_file).write_text(test_code)
return test_code
Integration via pre-commit Hook
# .pre-commit-config.yaml
repos:
- repo: local
hooks:
- id: ai-test-generator
name: AI Unit Test Generator
entry: python scripts/generate_tests.py
language: python
pass_filenames: true
types: [python]
stages: [push] # only on push, not on every commit
Case study: Python payments processing service, 12,000 lines of code, 0 unit tests (legacy). Ran generator on entire codebase: 340 tests in 45 minutes. After validation loop: 298 passed without changes, 42 required 1–2 fix iterations. Of 298 working tests — 11 failed on actual code, revealing bugs: incorrect handling of negative amounts, error on empty transaction list, wrong timezone in deadline calculation.
Timeframe: generator for one language (Python/TypeScript) with validation loop: 2–3 weeks; multilingual with CI/CD integration: 4–5 weeks.







