Skip to content

Commit 728ddcf

Browse files
authored
Merge pull request #121 from 0FL01/feat/nim-provider-clean-v2
Feat/nim provider clean
2 parents e4a4913 + c5f0ab7 commit 728ddcf

12 files changed

Lines changed: 195 additions & 6 deletions

File tree

.env.example

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,15 @@ POE_MODEL=Claude-Sonnet-4
5757
# Popular models: Claude-Sonnet-4, Claude-Opus-4.1, GPT-4o, Gemini-2.5-Pro, Llama-3.1-405B, Grok-4
5858
# Poe also provides access to millions of community-created bots
5959

60+
# NVIDIA NIM Settings (Hosted cloud API for Llama, Mistral, and other models)
61+
# Get your API key at: https://build.nvidia.com/
62+
NIM_API_KEY=
63+
NIM_MODEL=meta/llama-3.1-8b-instruct
64+
# Available models: meta/llama-3.1-8b-instruct, meta/llama-3.1-70b-instruct,
65+
# meta/llama-3.1-405b-instruct, mistralai/mixtral-8x7b-instruct-v0.1
66+
# See all models at: https://build.nvidia.com/explore/discover
67+
# NIM_API_ENDPOINT=https://integrate.api.nvidia.com/v1/chat/completions # Optional, default endpoint
68+
6069
# Translation Settings
6170
# Source language: Auto-detected from uploaded file (uses langdetect library)
6271
# Target language: Auto-detected from browser language (can be changed in UI)

src/api/blueprints/config_routes.py

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@ def get_config_path():
4545
DEEPSEEK_API_KEY,
4646
DEEPSEEK_MODEL,
4747
POE_API_KEY,
48+
NIM_API_KEY,
49+
NIM_MODEL,
4850
POE_MODEL,
4951
MAX_TOKENS_PER_CHUNK,
5052
OUTPUT_FILENAME_PATTERN
@@ -119,6 +121,8 @@ def get_available_models():
119121
return _get_deepseek_models(api_key)
120122
elif provider == 'poe':
121123
return _get_poe_models(api_key)
124+
elif provider == 'nim':
125+
return _get_nim_models(api_key)
122126
elif provider == 'openai':
123127
# Get endpoint from request for LM Studio support
124128
if request.method == 'POST':
@@ -158,12 +162,14 @@ def mask_api_key(key):
158162
"mistral_api_key": mask_api_key(MISTRAL_API_KEY),
159163
"deepseek_api_key": mask_api_key(DEEPSEEK_API_KEY),
160164
"poe_api_key": mask_api_key(POE_API_KEY),
165+
"nim_api_key": mask_api_key(NIM_API_KEY),
161166
"gemini_api_key_configured": bool(GEMINI_API_KEY),
162167
"openai_api_key_configured": bool(OPENAI_API_KEY),
163168
"openrouter_api_key_configured": bool(OPENROUTER_API_KEY),
164169
"mistral_api_key_configured": bool(MISTRAL_API_KEY),
165170
"deepseek_api_key_configured": bool(DEEPSEEK_API_KEY),
166171
"poe_api_key_configured": bool(POE_API_KEY),
172+
"nim_api_key_configured": bool(NIM_API_KEY),
167173
"output_filename_pattern": OUTPUT_FILENAME_PATTERN
168174
}
169175

@@ -412,6 +418,95 @@ def _get_poe_models(provided_api_key=None):
412418
"error": f"Error connecting to Poe API: {str(e)}"
413419
})
414420

421+
def _get_nim_models(provided_api_key=None):
422+
"""Get available models from NVIDIA NIM API"""
423+
from src.config import NIM_API_ENDPOINT
424+
425+
api_key = _resolve_api_key(provided_api_key, 'NIM_API_KEY', NIM_API_KEY)
426+
427+
# Use NIM_MODEL from .env, fallback to meta/llama-3.1-8b-instruct
428+
default_model = NIM_MODEL if NIM_MODEL else "meta/llama-3.1-8b-instruct"
429+
430+
if not api_key:
431+
return jsonify({
432+
"models": [],
433+
"model_names": [],
434+
"default": default_model,
435+
"status": "api_key_missing",
436+
"count": 0,
437+
"error": "NVIDIA NIM API key is required. Get your key at https://build.nvidia.com/"
438+
})
439+
440+
try:
441+
# Determine base URL from endpoint
442+
base_url = NIM_API_ENDPOINT.replace('/chat/completions', '').rstrip('/')
443+
models_url = f"{base_url}/models"
444+
headers = {'Authorization': f'Bearer {api_key}'}
445+
446+
response = requests.get(models_url, headers=headers, timeout=10)
447+
448+
if response.status_code == 200:
449+
data = response.json()
450+
models_data = data.get('data', [])
451+
452+
if models_data:
453+
# Filter and format models
454+
models = []
455+
for m in models_data:
456+
model_id = m.get('id', '')
457+
# Skip embedding models and other non-chat models
458+
if 'embedding' in model_id.lower() or 'whisper' in model_id.lower():
459+
continue
460+
models.append({
461+
'id': model_id,
462+
'name': model_id,
463+
'owned_by': m.get('owned_by', 'nvidia')
464+
})
465+
466+
# Sort models by name
467+
models.sort(key=lambda x: x['name'].lower())
468+
469+
if models:
470+
model_ids = [m['id'] for m in models]
471+
if default_model not in model_ids and model_ids:
472+
default_model = model_ids[0]
473+
return jsonify({
474+
"models": models,
475+
"model_names": model_ids,
476+
"default": default_model,
477+
"status": "nim_connected",
478+
"count": len(models)
479+
})
480+
481+
# If API call failed, return empty with error
482+
return jsonify({
483+
"models": [],
484+
"model_names": [],
485+
"default": default_model,
486+
"status": "nim_error",
487+
"count": 0,
488+
"error": f"Failed to retrieve NVIDIA NIM models (HTTP {response.status_code})"
489+
})
490+
491+
except requests.exceptions.ConnectionError:
492+
return jsonify({
493+
"models": [],
494+
"model_names": [],
495+
"default": default_model,
496+
"status": "nim_error",
497+
"count": 0,
498+
"error": "Could not connect to NVIDIA NIM API. Check your internet connection."
499+
})
500+
except Exception as e:
501+
return jsonify({
502+
"models": [],
503+
"model_names": [],
504+
"default": default_model,
505+
"status": "nim_error",
506+
"count": 0,
507+
"error": f"Error connecting to NVIDIA NIM API: {str(e)}"
508+
})
509+
415510
def _get_openai_models(provided_api_key=None, api_endpoint=None):
416511
"""Get available models from OpenAI-compatible API
417512
@@ -777,6 +872,8 @@ def save_settings():
777872
'DEEPSEEK_MODEL',
778873
'POE_API_KEY',
779874
'POE_MODEL',
875+
'NIM_API_KEY',
876+
'NIM_MODEL',
780877
'DEFAULT_MODEL',
781878
'LLM_PROVIDER',
782879
'API_ENDPOINT',
@@ -830,6 +927,7 @@ def get_settings():
830927
"mistral_api_key_configured": bool(MISTRAL_API_KEY),
831928
"deepseek_api_key_configured": bool(DEEPSEEK_API_KEY),
832929
"poe_api_key_configured": bool(POE_API_KEY),
930+
"nim_api_key_configured": bool(NIM_API_KEY),
833931
"default_model": DEFAULT_MODEL or "",
834932
"llm_provider": os.getenv('LLM_PROVIDER', 'ollama'),
835933
"api_endpoint": DEFAULT_OLLAMA_API_ENDPOINT or "",

src/api/handlers.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,7 @@ def _openrouter_cost_callback(cost_data):
275275
mistral_api_key=config.get('mistral_api_key', ''),
276276
deepseek_api_key=config.get('deepseek_api_key', ''),
277277
poe_api_key=config.get('poe_api_key', ''),
278+
nim_api_key=config.get('nim_api_key', ''),
278279
context_window=config.get('context_window', 2048),
279280
auto_adjust_context=config.get('auto_adjust_context', True),
280281
min_chunk_size=config.get('min_chunk_size', 5),

src/config.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,10 @@
205205
POE_API_KEY = os.getenv('POE_API_KEY', '')
206206
POE_MODEL = os.getenv('POE_MODEL', 'Claude-Sonnet-4')
207207
POE_API_ENDPOINT = os.getenv('POE_API_ENDPOINT', 'https://api.poe.com/v1/chat/completions')
208+
# NVIDIA NIM configuration (hosted cloud API for Llama, Mistral, and other models)
209+
NIM_API_KEY = os.getenv('NIM_API_KEY', '')
210+
NIM_MODEL = os.getenv('NIM_MODEL', 'meta/llama-3.1-8b-instruct')
211+
NIM_API_ENDPOINT = os.getenv('NIM_API_ENDPOINT', 'https://integrate.api.nvidia.com/v1/chat/completions')
208212

209213
# SRT-specific configuration
210214
SRT_LINES_PER_BLOCK = int(os.getenv('SRT_LINES_PER_BLOCK', '5'))
@@ -449,6 +453,7 @@ class TranslationConfig:
449453
mistral_api_key: str = MISTRAL_API_KEY
450454
deepseek_api_key: str = DEEPSEEK_API_KEY
451455
poe_api_key: str = POE_API_KEY
456+
nim_api_key: str = NIM_API_KEY
452457

453458
# LLM parameters
454459
timeout: int = REQUEST_TIMEOUT
@@ -487,6 +492,7 @@ def from_cli_args(cls, args) -> 'TranslationConfig':
487492
mistral_api_key=getattr(args, 'mistral_api_key', MISTRAL_API_KEY),
488493
deepseek_api_key=getattr(args, 'deepseek_api_key', DEEPSEEK_API_KEY),
489494
poe_api_key=getattr(args, 'poe_api_key', POE_API_KEY),
495+
nim_api_key=getattr(args, 'nim_api_key', NIM_API_KEY),
490496
max_tokens_per_chunk=getattr(args, 'max_tokens_per_chunk', MAX_TOKENS_PER_CHUNK),
491497
soft_limit_ratio=getattr(args, 'soft_limit_ratio', SOFT_LIMIT_RATIO)
492498
)
@@ -515,6 +521,7 @@ def from_web_request(cls, request_data: dict) -> 'TranslationConfig':
515521
mistral_api_key=request_data.get('mistral_api_key', MISTRAL_API_KEY),
516522
deepseek_api_key=request_data.get('deepseek_api_key', DEEPSEEK_API_KEY),
517523
poe_api_key=request_data.get('poe_api_key', POE_API_KEY),
524+
nim_api_key=request_data.get('nim_api_key', NIM_API_KEY),
518525
max_tokens_per_chunk=request_data.get('max_tokens_per_chunk', MAX_TOKENS_PER_CHUNK),
519526
soft_limit_ratio=request_data.get('soft_limit_ratio', SOFT_LIMIT_RATIO)
520527
)
@@ -537,6 +544,7 @@ def to_dict(self) -> dict:
537544
'mistral_api_key': self.mistral_api_key,
538545
'deepseek_api_key': self.deepseek_api_key,
539546
'poe_api_key': self.poe_api_key,
547+
'nim_api_key': self.nim_api_key,
540548
'max_tokens_per_chunk': self.max_tokens_per_chunk,
541549
'soft_limit_ratio': self.soft_limit_ratio
542550
}

src/core/adapters/translate_file.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ async def translate_file(
4242
mistral_api_key: Optional[str] = None,
4343
deepseek_api_key: Optional[str] = None,
4444
poe_api_key: Optional[str] = None,
45+
nim_api_key: Optional[str] = None,
4546
context_window: Optional[int] = None,
4647
auto_adjust_context: bool = True,
4748
min_chunk_size: int = 5,
@@ -77,6 +78,7 @@ async def translate_file(
7778
mistral_api_key: Mistral API key (required for mistral provider)
7879
deepseek_api_key: DeepSeek API key (required for deepseek provider)
7980
poe_api_key: Poe API key (required for poe provider)
81+
nim_api_key: NVIDIA NIM API key
8082
context_window: Maximum context window size in tokens
8183
auto_adjust_context: Whether to automatically adjust context size
8284
min_chunk_size: Minimum chunk size for text splitting
@@ -155,6 +157,7 @@ async def translate_file(
155157
mistral_api_key=mistral_api_key,
156158
deepseek_api_key=deepseek_api_key,
157159
poe_api_key=poe_api_key,
160+
nim_api_key=nim_api_key,
158161
context_window=context_window or 2048,
159162
auto_adjust_context=auto_adjust_context,
160163
min_chunk_size=min_chunk_size,

src/core/epub/translator.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ async def translate_epub_file(
4747
mistral_api_key: Optional[str] = None,
4848
deepseek_api_key: Optional[str] = None,
4949
poe_api_key: Optional[str] = None,
50+
nim_api_key: Optional[str] = None,
5051
context_window: int = 2048,
5152
auto_adjust_context: bool = True,
5253
min_chunk_size: int = 5,
@@ -89,6 +90,7 @@ async def translate_epub_file(
8990
mistral_api_key: Mistral API key
9091
deepseek_api_key: DeepSeek API key
9192
poe_api_key: Poe API key
93+
nim_api_key: NVIDIA NIM API key
9294
context_window: Context window size for LLM
9395
auto_adjust_context: Auto-adjust context based on model
9496
min_chunk_size: Minimum chunk size
@@ -137,6 +139,7 @@ async def translate_epub_file(
137139
mistral_api_key=mistral_api_key,
138140
deepseek_api_key=deepseek_api_key,
139141
poe_api_key=poe_api_key,
142+
nim_api_key=nim_api_key,
140143
cli_api_endpoint=cli_api_endpoint,
141144
initial_context=initial_context,
142145
log_callback=log_callback
@@ -344,6 +347,7 @@ def _create_llm_client(
344347
mistral_api_key: Optional[str],
345348
deepseek_api_key: Optional[str],
346349
poe_api_key: Optional[str],
350+
nim_api_key: Optional[str],
347351
cli_api_endpoint: str,
348352
initial_context: int,
349353
log_callback: Optional[Callable] = None
@@ -355,6 +359,7 @@ def _create_llm_client(
355359
llm_provider, gemini_api_key, cli_api_endpoint, model_name,
356360
openai_api_key, openrouter_api_key, mistral_api_key, deepseek_api_key,
357361
poe_api_key=poe_api_key,
362+
nim_api_key=nim_api_key,
358363
context_window=initial_context,
359364
log_callback=log_callback
360365
)

src/core/llm/factory.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@
1313
OPENROUTER_API_KEY, OPENROUTER_MODEL,
1414
MISTRAL_API_KEY, MISTRAL_MODEL, MISTRAL_API_ENDPOINT,
1515
DEEPSEEK_API_KEY, DEEPSEEK_MODEL, DEEPSEEK_API_ENDPOINT,
16-
POE_API_KEY, POE_MODEL, POE_API_ENDPOINT
16+
POE_API_KEY, POE_MODEL, POE_API_ENDPOINT,
17+
NIM_API_KEY, NIM_MODEL, NIM_API_ENDPOINT
1718
)
1819
from .base import LLMProvider
1920
from .providers.ollama import OllamaProvider
@@ -139,5 +140,17 @@ def create_llm_provider(provider_type: str = "ollama", **kwargs) -> LLMProvider:
139140
model=kwargs.get("model", POE_MODEL),
140141
api_endpoint=POE_API_ENDPOINT
141142
)
143+
elif provider_type.lower() == "nim":
144+
api_key = kwargs.get("api_key") or kwargs.get("nim_api_key")
145+
if not api_key:
146+
api_key = os.getenv("NIM_API_KEY", NIM_API_KEY)
147+
if not api_key:
148+
raise ValueError("NVIDIA NIM provider requires an API key. Get your key at https://build.nvidia.com/")
149+
return OpenAICompatibleProvider(
150+
api_key=api_key,
151+
model=kwargs.get("model", NIM_MODEL),
152+
api_endpoint=kwargs.get("api_endpoint", NIM_API_ENDPOINT)
153+
)
154+
142155
else:
143156
raise ValueError(f"Unknown provider type: {provider_type}")

src/core/llm_client.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -170,13 +170,14 @@ def create_llm_client(llm_provider: str, gemini_api_key: Optional[str],
170170
mistral_api_key: Optional[str] = None,
171171
deepseek_api_key: Optional[str] = None,
172172
poe_api_key: Optional[str] = None,
173+
nim_api_key: Optional[str] = None,
173174
context_window: Optional[int] = None,
174175
log_callback: Optional[callable] = None) -> Optional[LLMClient]:
175176
"""
176177
Factory function to create LLM client based on provider or custom endpoint
177178
178179
Args:
179-
llm_provider: Provider type ('ollama', 'gemini', 'openai', 'openrouter', 'mistral', 'deepseek', or 'poe')
180+
llm_provider: Provider type ('ollama', 'gemini', 'openai', 'openrouter', 'mistral', 'deepseek', or 'poe', or 'nim')
180181
gemini_api_key: API key for Gemini provider
181182
api_endpoint: API endpoint for custom Ollama instance or OpenAI-compatible API
182183
model_name: Model name to use
@@ -185,6 +186,7 @@ def create_llm_client(llm_provider: str, gemini_api_key: Optional[str],
185186
mistral_api_key: API key for Mistral provider
186187
deepseek_api_key: API key for DeepSeek provider
187188
poe_api_key: API key for Poe provider
189+
nim_api_key: API key for NVIDIA NIM provider
188190
context_window: Context window size for the model
189191
log_callback: Callback function for logging
190192
@@ -204,6 +206,8 @@ def create_llm_client(llm_provider: str, gemini_api_key: Optional[str],
204206
return LLMClient(provider_type="deepseek", model=model_name, api_key=deepseek_api_key)
205207
if llm_provider == "poe":
206208
return LLMClient(provider_type="poe", model=model_name, api_key=poe_api_key)
209+
if llm_provider == "nim":
210+
return LLMClient(provider_type="nim", model=model_name, api_key=nim_api_key)
207211
if llm_provider == "ollama":
208212
# Always create a new client for Ollama to ensure proper configuration
209213
return LLMClient(provider_type="ollama", api_endpoint=api_endpoint, model=model_name,
Lines changed: 1 addition & 0 deletions
Loading

src/web/static/js/providers/provider-manager.js

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,8 @@ const PROVIDER_LOGOS = {
2626
mistral: '/static/img/providers/mistral.png',
2727
gemini: '/static/img/providers/gemini.png',
2828
openai: '/static/img/providers/openai.png',
29-
openrouter: '/static/img/providers/openrouter.png'
29+
openrouter: '/static/img/providers/openrouter.png',
30+
nim: '/static/img/providers/nvidia.png'
3031
};
3132

3233
/**
@@ -39,7 +40,8 @@ const PROVIDER_META = {
3940
mistral: { name: 'Mistral', description: 'Cloud API' },
4041
gemini: { name: 'Gemini', description: 'Cloud' },
4142
openai: { name: 'OpenAI', description: 'Compatible' },
42-
openrouter: { name: 'OpenRouter', description: '200+ models' }
43+
openrouter: { name: 'OpenRouter', description: '200+ models' },
44+
nim: { name: 'NVIDIA NIM', description: 'Cloud API' }
4345
};
4446

4547
/**
@@ -141,6 +143,23 @@ const POE_FALLBACK_MODELS = [
141143
{ value: 'exa-search', label: 'Exa Search', group: 'Poe Bots' }
142144
];
143145

146+
/**
147+
* Fallback NVIDIA NIM models list (used when API fetch fails)
148+
* See all models at: https://build.nvidia.com/explore/discover
149+
*/
150+
const NIM_FALLBACK_MODELS = [
151+
{ value: 'meta/llama-3.1-8b-instruct', label: 'Llama 3.1 8B Instruct (128k ctx)' },
152+
{ value: 'meta/llama-3.1-70b-instruct', label: 'Llama 3.1 70B Instruct (128k ctx)' },
153+
{ value: 'meta/llama-3.1-405b-instruct', label: 'Llama 3.1 405B Instruct (128k ctx)' },
154+
{ value: 'meta/llama-3.2-1b-instruct', label: 'Llama 3.2 1B Instruct (128k ctx)' },
155+
{ value: 'meta/llama-3.2-3b-instruct', label: 'Llama 3.2 3B Instruct (128k ctx)' },
156+
{ value: 'mistralai/mistral-nemo-12b-instruct', label: 'Mistral Nemo 12B Instruct (128k ctx)' },
157+
{ value: 'mistralai/mixtral-8x7b-instruct-v0.1', label: 'Mixtral 8x7B Instruct v0.1 (32k ctx)' },
158+
{ value: 'nvidia/llama-3.1-nemotron-70b-instruct', label: 'Llama 3.1 Nemotron 70B Instruct (128k ctx)' },
159+
{ value: 'deepseek-ai/deepseek-v3', label: 'DeepSeek V3 (128k ctx)' },
160+
{ value: 'deepseek-ai/deepseek-r1', label: 'DeepSeek R1 (128k ctx)' }
161+
];
162+
144163
/**
145164
* Fallback OpenRouter models list (used when API fetch fails)
146165
* Sorted by cost: cheap first

0 commit comments

Comments
 (0)