1616from benchmark .runner import BenchmarkRunner , quick_benchmark , full_benchmark
1717from benchmark .results .storage import ResultsStorage
1818from benchmark .wiki .generator import WikiGenerator
19- from benchmark .translator import get_available_ollama_models , get_available_openrouter_models
19+ from benchmark .translator import (
20+ get_available_ollama_models ,
21+ get_available_openrouter_models ,
22+ get_available_openai_models ,
23+ )
2024
2125
2226# ANSI color codes for terminal output
@@ -75,6 +79,8 @@ def cmd_run(args: argparse.Namespace) -> int:
7579 evaluator_provider = getattr (args , 'evaluator_provider' , DEFAULT_EVALUATOR_PROVIDER )
7680 config = BenchmarkConfig .from_cli_args (
7781 openrouter_key = args .openrouter_key ,
82+ openai_key = args .openai_key ,
83+ openai_endpoint = args .openai_endpoint ,
7884 poe_key = args .poe_key ,
7985 evaluator_model = args .evaluator ,
8086 ollama_endpoint = args .ollama_endpoint ,
@@ -101,6 +107,14 @@ def cmd_run(args: argparse.Namespace) -> int:
101107 # Extract model IDs
102108 models = [m ["id" ] if isinstance (m , dict ) else m for m in models_data [:10 ]]
103109 print (colored (f"Found { len (models_data )} models. Using top 10: { ', ' .join (models [:3 ])} ..." , Colors .GREEN ))
110+ elif provider == "openai" :
111+ print (colored ("Fetching available OpenAI-compatible models..." , Colors .CYAN ))
112+ models_data = asyncio .run (get_available_openai_models (config ))
113+ if not models_data :
114+ log_callback ("error" , "No OpenAI-compatible models available." )
115+ return 1
116+ models = [m ["id" ] if isinstance (m , dict ) else m for m in models_data [:10 ]]
117+ print (colored (f"Found { len (models_data )} models. Using top 10: { ', ' .join (models [:3 ])} ..." , Colors .GREEN ))
104118 else :
105119 print (colored ("Detecting available Ollama models..." , Colors .CYAN ))
106120 models = asyncio .run (get_available_ollama_models (config ))
@@ -288,7 +302,12 @@ def cmd_models(args: argparse.Namespace) -> int:
288302 """List available models for benchmarking."""
289303 print_banner ()
290304
291- config = BenchmarkConfig .from_cli_args (openrouter_key = args .openrouter_key )
305+ config = BenchmarkConfig .from_cli_args (
306+ openrouter_key = args .openrouter_key ,
307+ openai_key = args .openai_key ,
308+ openai_endpoint = args .openai_endpoint ,
309+ translation_provider = args .provider ,
310+ )
292311 provider = args .provider
293312
294313 if provider == "openrouter" :
@@ -322,6 +341,32 @@ def cmd_models(args: argparse.Namespace) -> int:
322341 print (colored ("Tip: Use -m to specify models, e.g.:" , Colors .YELLOW ))
323342 print (" python -m benchmark.cli run -p openrouter -m anthropic/claude-sonnet-4 openai/gpt-4o" )
324343
344+ elif provider == "openai" :
345+ print (colored ("Fetching OpenAI-compatible models...\n " , Colors .CYAN ))
346+ models = asyncio .run (get_available_openai_models (config ))
347+
348+ if not models :
349+ log_callback ("error" , "Failed to fetch OpenAI-compatible models" )
350+ return 1
351+
352+ print (colored (f"Available OpenAI-Compatible Models ({ len (models )} ):\n " , Colors .BOLD ))
353+ print (f"{ 'Model ID' :<50} { 'Owner' :<20} " )
354+ print ("-" * 72 )
355+
356+ for model in models [:50 ]:
357+ if isinstance (model , dict ):
358+ model_id = model .get ("id" , "unknown" )
359+ owned_by = model .get ("owned_by" , "unknown" )
360+ else :
361+ model_id = model
362+ owned_by = "unknown"
363+
364+ print (f"{ model_id :<50} { owned_by :<20} " )
365+
366+ print ()
367+ print (colored ("Tip: Use -m and --openai-endpoint to specify a backend, e.g.:" , Colors .YELLOW ))
368+ print (" python -m benchmark.cli run -p openai --openai-endpoint http://localhost:8080/v1 -m your-model" )
369+
325370 else :
326371 print (colored ("Detecting Ollama models...\n " , Colors .CYAN ))
327372 models = asyncio .run (get_available_ollama_models (config ))
@@ -566,6 +611,9 @@ def create_parser() -> argparse.ArgumentParser:
566611 # Quick benchmark with Ollama (local models)
567612 python -m benchmark.cli run --openrouter-key YOUR_KEY
568613
614+ # Quick benchmark with an OpenAI-compatible backend
615+ python -m benchmark.cli run --provider openai --openai-endpoint http://localhost:8080/v1 -m your-model
616+
569617 # Quick benchmark with OpenRouter (cloud models)
570618 python -m benchmark.cli run --provider openrouter --openrouter-key YOUR_KEY
571619
@@ -578,6 +626,9 @@ def create_parser() -> argparse.ArgumentParser:
578626 # Specific OpenRouter models
579627 python -m benchmark.cli run -p openrouter -m anthropic/claude-sonnet-4 openai/gpt-4o -l fr de ja
580628
629+ # Specific OpenAI-compatible backend and models
630+ python -m benchmark.cli run -p openai --openai-endpoint http://localhost:8080/v1 -m qwen2.5-14b-instruct
631+
581632 # Generate wiki pages
582633 python -m benchmark.cli wiki
583634
@@ -594,6 +645,7 @@ def create_parser() -> argparse.ArgumentParser:
594645 "-m" , "--models" ,
595646 nargs = "+" ,
596647 help = "Models to benchmark. For Ollama: model names (e.g., llama3:8b). "
648+ "For OpenAI-compatible backends: model IDs (e.g., gpt-4o or local server model names). "
597649 "For OpenRouter: model IDs (e.g., anthropic/claude-sonnet-4). "
598650 "If not specified, auto-detects available models."
599651 )
@@ -609,9 +661,17 @@ def create_parser() -> argparse.ArgumentParser:
609661 )
610662 run_parser .add_argument (
611663 "-p" , "--provider" ,
612- choices = ["ollama" , "openrouter" ],
664+ choices = ["ollama" , "openai" , " openrouter" ],
613665 default = "ollama" ,
614- help = "Translation provider: 'ollama' (local, default) or 'openrouter' (cloud, 200+ models)"
666+ help = "Translation provider: 'ollama' (local, default), 'openai' (OpenAI-compatible), or 'openrouter' (cloud, 200+ models)"
667+ )
668+ run_parser .add_argument (
669+ "--openai-key" ,
670+ help = "API key for OpenAI-compatible translation backends. Can also be set via OPENAI_API_KEY env var."
671+ )
672+ run_parser .add_argument (
673+ "--openai-endpoint" ,
674+ help = "OpenAI-compatible chat completions endpoint or /v1 base URL. Can also be set via OPENAI_API_ENDPOINT env var."
615675 )
616676 run_parser .add_argument (
617677 "--openrouter-key" ,
@@ -696,10 +756,18 @@ def create_parser() -> argparse.ArgumentParser:
696756 models_parser = subparsers .add_parser ("models" , help = "List available models for benchmarking" )
697757 models_parser .add_argument (
698758 "-p" , "--provider" ,
699- choices = ["ollama" , "openrouter" ],
759+ choices = ["ollama" , "openai" , " openrouter" ],
700760 default = "ollama" ,
701761 help = "Provider to list models for (default: ollama)"
702762 )
763+ models_parser .add_argument (
764+ "--openai-key" ,
765+ help = "API key for listing models from an OpenAI-compatible endpoint"
766+ )
767+ models_parser .add_argument (
768+ "--openai-endpoint" ,
769+ help = "OpenAI-compatible endpoint to query for available models"
770+ )
703771 models_parser .add_argument (
704772 "--openrouter-key" ,
705773 help = "OpenRouter API key (required for listing OpenRouter models)"
0 commit comments