-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathverify_per_agent_provider.py
More file actions
204 lines (161 loc) · 7.57 KB
/
verify_per_agent_provider.py
File metadata and controls
204 lines (161 loc) · 7.57 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
#!/usr/bin/env python3
"""
Quick verification script for per-agent provider selection.
Tests the core functionality without requiring full pytest setup.
"""
import json
import os
import sys
from pathlib import Path
# Add apps/backend to path
sys.path.insert(0, str(Path(__file__).parent / "apps" / "backend"))
def test_per_agent_provider_configuration():
"""Test that per-agent provider configuration works."""
from phase_config import get_provider_for_agent
print("Testing per-agent provider configuration...")
# Save ALL env vars we'll mutate
env_vars_to_save = [
"AI_ENGINE_PROVIDER",
"AGENT_PROVIDER_PLANNER",
"AGENT_PROVIDER_CODER",
"AGENT_PROVIDER_QA_REVIEWER",
"AGENT_MODEL_PLANNER",
"AGENT_MODEL_CODER",
"OPENAI_API_KEY",
"ANTHROPIC_API_KEY",
]
saved_env = {k: os.environ.get(k) for k in env_vars_to_save}
try:
# Set test configuration
os.environ["AI_ENGINE_PROVIDER"] = "claude"
os.environ["AGENT_PROVIDER_PLANNER"] = "claude"
os.environ["AGENT_MODEL_PLANNER"] = "claude-opus-4-20250514"
os.environ["AGENT_PROVIDER_CODER"] = "litellm"
os.environ["AGENT_MODEL_CODER"] = "gpt-4"
# Test planner uses claude
planner_provider = get_provider_for_agent("planner")
assert planner_provider == "claude", f"✗ Planner should use Claude, got {planner_provider}"
print("✓ Planner uses Claude")
# Test coder uses litellm
coder_provider = get_provider_for_agent("coder")
assert coder_provider == "litellm", f"✗ Coder should use LiteLLM, got {coder_provider}"
print("✓ Coder uses LiteLLM")
# Test fallback to default
os.environ.pop("AGENT_PROVIDER_QA_REVIEWER", None)
qa_provider = get_provider_for_agent("qa_reviewer")
assert qa_provider == "claude", f"✗ QA reviewer should fall back to Claude, got {qa_provider}"
print("✓ QA reviewer falls back to default Claude")
finally:
# Restore ALL saved env vars
for key, value in saved_env.items():
if value is not None:
os.environ[key] = value
else:
os.environ.pop(key, None)
def test_cost_tracking_multi_provider():
"""Test that cost tracking works with multiple providers."""
from core.cost_tracking import CostTracker, MODEL_PRICING
import tempfile
import shutil
print("\nTesting multi-provider cost tracking...")
# Create temp directory
temp_dir = Path(tempfile.mkdtemp())
spec_dir = temp_dir / "test_spec"
spec_dir.mkdir(parents=True)
try:
tracker = CostTracker(spec_dir=spec_dir)
# Log usage for both providers
tracker.log_usage("planner", "claude-opus-4-20250514", 2000, 1000)
tracker.log_usage("coder", "gpt-4", 1000, 500)
# Read cost report
cost_report_file = spec_dir / "cost_report.json"
assert cost_report_file.exists(), "✗ cost_report.json should be created"
with open(cost_report_file, "r") as f:
cost_report = json.load(f)
# Verify structure
assert "records" in cost_report, "✗ Should have records"
assert len(cost_report["records"]) == 2, f"✗ Should have 2 records, got {len(cost_report['records'])}"
print("✓ Cost report has 2 records")
# Verify both models are tracked
models = [r["model"] for r in cost_report["records"]]
assert any("claude" in m for m in models), "✗ Should track Claude usage"
print("✓ Claude usage tracked")
assert any("gpt-4" in m for m in models), "✗ Should track GPT-4 usage"
print("✓ GPT-4 usage tracked")
# Verify agent types
agent_types = [r["agent_type"] for r in cost_report["records"]]
assert "planner" in agent_types, "✗ Should track planner agent"
assert "coder" in agent_types, "✗ Should track coder agent"
print("✓ Both agent types tracked")
# Verify cost calculation
assert cost_report["total_cost"] > 0, "✗ Total cost should be positive"
print(f"✓ Total cost calculated: ${cost_report['total_cost']:.6f}")
# Verify individual costs
claude_pricing = MODEL_PRICING.get("claude-opus-4-20250514", MODEL_PRICING["claude-sonnet-4-5-20250929"])
expected_claude = (2000 / 1_000_000 * claude_pricing["input"]) + (1000 / 1_000_000 * claude_pricing["output"])
gpt4_pricing = MODEL_PRICING["gpt-4"]
expected_gpt4 = (1000 / 1_000_000 * gpt4_pricing["input"]) + (500 / 1_000_000 * gpt4_pricing["output"])
expected_total = expected_claude + expected_gpt4
actual_total = cost_report["total_cost"]
assert abs(actual_total - expected_total) < 0.001, (
f"✗ Cost mismatch: expected {expected_total:.6f}, got {actual_total:.6f}"
)
print(f"✓ Cost calculation correct (Claude: ${expected_claude:.6f}, GPT-4: ${expected_gpt4:.6f})")
finally:
# Cleanup
shutil.rmtree(temp_dir, ignore_errors=True)
def test_model_pricing():
"""Test that pricing exists for both Claude and OpenAI models."""
from core.cost_tracking import MODEL_PRICING
print("\nTesting model pricing database...")
# Check Claude pricing
claude_models = [k for k in MODEL_PRICING.keys() if "claude" in k]
assert len(claude_models) > 0, "✗ Should have Claude model pricing"
print(f"✓ Claude pricing exists ({len(claude_models)} models)")
# Check OpenAI pricing
assert "gpt-4" in MODEL_PRICING, "✗ Should have GPT-4 pricing"
gpt4_pricing = MODEL_PRICING["gpt-4"]
assert "input" in gpt4_pricing, "✗ GPT-4 should have input pricing"
assert "output" in gpt4_pricing, "✗ GPT-4 should have output pricing"
assert gpt4_pricing["input"] > 0, "✗ GPT-4 input price should be positive"
assert gpt4_pricing["output"] > 0, "✗ GPT-4 output price should be positive"
print("✓ GPT-4 pricing exists and is positive")
# Check multiple GPT models
gpt_models = [k for k in MODEL_PRICING.keys() if k.startswith("gpt-")]
print(f"✓ OpenAI pricing exists ({len(gpt_models)} models)")
# Check Ollama models (zero cost)
ollama_models = [k for k in MODEL_PRICING.keys() if k.startswith("ollama/")]
if ollama_models:
print(f"✓ Ollama pricing exists ({len(ollama_models)} models, zero cost)")
def main():
"""Run all verification tests."""
print("="*70)
print("Per-Agent Provider Selection - Verification Tests")
print("="*70)
try:
test_per_agent_provider_configuration()
test_cost_tracking_multi_provider()
test_model_pricing()
print("\n" + "="*70)
print("✓ ALL TESTS PASSED")
print("="*70)
print("\nVerification Summary:")
print("✓ Per-agent provider configuration works")
print("✓ Planner can use Claude while coder uses OpenAI")
print("✓ Cost tracking correctly records both providers")
print("✓ cost_report.json shows usage from both Claude and OpenAI")
print("\nThe E2E test file has been created and follows the same patterns")
print("as existing E2E tests (test_openai_provider_e2e.py, test_ollama_provider_e2e.py).")
print("\nTo run the full E2E test suite:")
print(" pytest tests/test_per_agent_provider_e2e.py -v")
return 0
except AssertionError as e:
print(f"\n✗ TEST FAILED: {e}")
return 1
except Exception as e:
print(f"\n✗ ERROR: {e}")
import traceback
traceback.print_exc()
return 1
if __name__ == "__main__":
sys.exit(main())