diff --git a/models/nvidia/gliner-pii.yaml b/models/nvidia/gliner-pii.yaml new file mode 100644 index 0000000..9bbb542 --- /dev/null +++ b/models/nvidia/gliner-pii.yaml @@ -0,0 +1,43 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: nvidia +authType: api_key +model: gliner-pii +params: + - path: labels + type: string + label: Labels + description: Entity types to detect. If not specified, uses the default set of 55 PII categories including email, phone_number, ssn, first_name, last_name, and address. + group: provider_metadata + - path: threshold + type: number + label: Threshold + description: Confidence threshold for entity detection. Lower values detect more entities but may include false positives. + default: 0.5 + range: + min: 0 + max: 1 + group: sampling + - path: chunk_length + type: integer + label: Chunk length + description: Context window size for processing. Longer texts are automatically split into chunks with overlap for complete coverage. Must be greater than overlap. + default: 384 + range: + min: 1 + max: 2048 + group: provider_metadata + - path: overlap + type: integer + label: Overlap + description: Token overlap between chunks to prevent entity clipping. Must be less than chunk_length. + default: 128 + range: + min: 0 + max: 512 + group: provider_metadata + - path: flat_ner + type: boolean + label: Flat NER + description: When true, prevents overlapping entity spans. When false, may return nested entities such as both a full name and its constituent first name. + default: false + group: provider_metadata diff --git a/models/nvidia/llama-3.1-nemoguard-8b-topic-control.yaml b/models/nvidia/llama-3.1-nemoguard-8b-topic-control.yaml new file mode 100644 index 0000000..bfc0d61 --- /dev/null +++ b/models/nvidia/llama-3.1-nemoguard-8b-topic-control.yaml @@ -0,0 +1,53 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: nvidia +authType: api_key +model: llama-3.1-nemoguard-8b-topic-control +params: + - path: temperature + type: number + label: Temperature + description: Controls randomness. Lower values make outputs more focused; higher values make them more varied. Not recommended to modify both temperature and top_p in the same call. + default: 0.5 + range: + min: 0 + max: 2 + group: sampling + - path: top_p + type: number + label: Top P + description: Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. Not recommended to modify both temperature and top_p in the same call. + default: 1 + range: + max: 1 + group: sampling + - path: max_tokens + type: integer + label: Max tokens + description: Maximum number of tokens to generate. Generation stops when this limit is reached. + default: 1024 + range: + min: 1 + group: generation_length + - path: frequency_penalty + type: number + label: Frequency penalty + description: Penalizes new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. + default: 0 + range: + min: -2 + max: 2 + group: sampling + - path: presence_penalty + type: number + label: Presence penalty + description: Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics. + default: 0 + range: + min: -2 + max: 2 + group: sampling + - path: stop + type: string + label: Stop + description: A string or list of strings where the API will stop generating further tokens. The returned text will not contain the stop sequence. + group: generation_length diff --git a/models/nvidia/llama-3.1-nemotron-nano-8b-v1.yaml b/models/nvidia/llama-3.1-nemotron-nano-8b-v1.yaml new file mode 100644 index 0000000..fc46239 --- /dev/null +++ b/models/nvidia/llama-3.1-nemotron-nano-8b-v1.yaml @@ -0,0 +1,63 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: nvidia +authType: api_key +model: llama-3.1-nemotron-nano-8b-v1 +params: + - path: temperature + type: number + label: Temperature + description: Controls randomness. Lower values make outputs more focused; higher values make them more varied. Not recommended to modify both temperature and top_p in the same call. + default: 0.6 + range: + min: 0 + max: 1 + group: sampling + - path: top_p + type: number + label: Top P + description: Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. Not recommended to modify both temperature and top_p in the same call. + default: 0.95 + range: + max: 1 + group: sampling + - path: max_tokens + type: integer + label: Max tokens + description: Maximum number of tokens to generate. Generation stops when this limit is reached. + default: 4096 + range: + min: 1 + max: 16384 + group: generation_length + - path: frequency_penalty + type: number + label: Frequency penalty + description: Penalizes new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. + default: 0 + range: + min: -2 + max: 2 + group: sampling + - path: presence_penalty + type: number + label: Presence penalty + description: Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics. + default: 0 + range: + min: -2 + max: 2 + group: sampling + - path: seed + type: integer + label: Seed + description: Best-effort deterministic sampling seed. Changing the seed produces a different response with similar characteristics. Fix the seed to reproduce results. + default: 0 + range: + min: 0 + max: 18446744073709552000 + group: sampling + - path: stop + type: string + label: Stop + description: A string or list of strings where the API will stop generating further tokens. The returned text will not contain the stop sequence. + group: generation_length diff --git a/models/nvidia/llama-3.1-nemotron-safety-guard-8b-v3.yaml b/models/nvidia/llama-3.1-nemotron-safety-guard-8b-v3.yaml new file mode 100644 index 0000000..4256093 --- /dev/null +++ b/models/nvidia/llama-3.1-nemotron-safety-guard-8b-v3.yaml @@ -0,0 +1,14 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: nvidia +authType: api_key +model: llama-3.1-nemotron-safety-guard-8b-v3 +params: + - path: temperature + type: number + label: Temperature + description: Controls randomness. Lower values make outputs more focused; higher values make them more varied. + default: 0 + range: + min: 0 + max: 1 + group: sampling diff --git a/models/nvidia/llama-3.1-nemotron-ultra-253b-v1.yaml b/models/nvidia/llama-3.1-nemotron-ultra-253b-v1.yaml new file mode 100644 index 0000000..367b0ef --- /dev/null +++ b/models/nvidia/llama-3.1-nemotron-ultra-253b-v1.yaml @@ -0,0 +1,63 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: nvidia +authType: api_key +model: llama-3.1-nemotron-ultra-253b-v1 +params: + - path: temperature + type: number + label: Temperature + description: Controls randomness. Lower values make outputs more focused; higher values make them more varied. Not recommended to modify both temperature and top_p in the same call. + default: 0.6 + range: + min: 0 + max: 1 + group: sampling + - path: top_p + type: number + label: Top P + description: Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. Not recommended to modify both temperature and top_p in the same call. + default: 0.95 + range: + max: 1 + group: sampling + - path: max_tokens + type: integer + label: Max tokens + description: Maximum number of tokens to generate. Generation stops when this limit is reached. + default: 4096 + range: + min: 1 + max: 16384 + group: generation_length + - path: frequency_penalty + type: number + label: Frequency penalty + description: Penalizes new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. + default: 0 + range: + min: -2 + max: 2 + group: sampling + - path: presence_penalty + type: number + label: Presence penalty + description: Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics. + default: 0 + range: + min: -2 + max: 2 + group: sampling + - path: seed + type: integer + label: Seed + description: Best-effort deterministic sampling seed. Changing the seed produces a different response with similar characteristics. Fix the seed to reproduce results. + default: 0 + range: + min: 0 + max: 18446744073709552000 + group: sampling + - path: stop + type: string + label: Stop + description: A string or list of strings where the API will stop generating further tokens. The returned text will not contain the stop sequence. + group: generation_length diff --git a/models/nvidia/llama-3.3-nemotron-super-49b-v1.5.yaml b/models/nvidia/llama-3.3-nemotron-super-49b-v1.5.yaml new file mode 100644 index 0000000..b20d19b --- /dev/null +++ b/models/nvidia/llama-3.3-nemotron-super-49b-v1.5.yaml @@ -0,0 +1,63 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: nvidia +authType: api_key +model: llama-3.3-nemotron-super-49b-v1.5 +params: + - path: temperature + type: number + label: Temperature + description: Controls randomness. Lower values make outputs more focused; higher values make them more varied. Not recommended to modify both temperature and top_p in the same call. + default: 0.6 + range: + min: 0 + max: 1 + group: sampling + - path: top_p + type: number + label: Top P + description: Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. Not recommended to modify both temperature and top_p in the same call. + default: 0.95 + range: + max: 1 + group: sampling + - path: max_tokens + type: integer + label: Max tokens + description: Maximum number of tokens to generate. Generation stops when this limit is reached. + default: 65536 + range: + min: 1 + max: 65536 + group: generation_length + - path: frequency_penalty + type: number + label: Frequency penalty + description: Penalizes new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. + default: 0 + range: + min: -2 + max: 2 + group: sampling + - path: presence_penalty + type: number + label: Presence penalty + description: Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics. + default: 0 + range: + min: -2 + max: 2 + group: sampling + - path: seed + type: integer + label: Seed + description: Best-effort deterministic sampling seed. Changing the seed produces a different response with similar characteristics. Fix the seed to reproduce results. + default: 0 + range: + min: 0 + max: 18446744073709552000 + group: sampling + - path: stop + type: string + label: Stop + description: A string or list of strings where the API will stop generating further tokens. The returned text will not contain the stop sequence. + group: generation_length diff --git a/models/nvidia/llama-3.3-nemotron-super-49b-v1.yaml b/models/nvidia/llama-3.3-nemotron-super-49b-v1.yaml new file mode 100644 index 0000000..313d97e --- /dev/null +++ b/models/nvidia/llama-3.3-nemotron-super-49b-v1.yaml @@ -0,0 +1,63 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: nvidia +authType: api_key +model: llama-3.3-nemotron-super-49b-v1 +params: + - path: temperature + type: number + label: Temperature + description: Controls randomness. Lower values make outputs more focused; higher values make them more varied. Not recommended to modify both temperature and top_p in the same call. + default: 0.6 + range: + min: 0 + max: 1 + group: sampling + - path: top_p + type: number + label: Top P + description: Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. Not recommended to modify both temperature and top_p in the same call. + default: 0.95 + range: + max: 1 + group: sampling + - path: max_tokens + type: integer + label: Max tokens + description: Maximum number of tokens to generate. Generation stops when this limit is reached. + default: 4096 + range: + min: 1 + max: 16384 + group: generation_length + - path: frequency_penalty + type: number + label: Frequency penalty + description: Penalizes new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. + default: 0 + range: + min: -2 + max: 2 + group: sampling + - path: presence_penalty + type: number + label: Presence penalty + description: Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics. + default: 0 + range: + min: -2 + max: 2 + group: sampling + - path: seed + type: integer + label: Seed + description: Best-effort deterministic sampling seed. Changing the seed produces a different response with similar characteristics. Fix the seed to reproduce results. + default: 0 + range: + min: 0 + max: 18446744073709552000 + group: sampling + - path: stop + type: string + label: Stop + description: A string or list of strings where the API will stop generating further tokens. The returned text will not contain the stop sequence. + group: generation_length diff --git a/models/nvidia/nemoguard-jailbreak-detect.yaml b/models/nvidia/nemoguard-jailbreak-detect.yaml new file mode 100644 index 0000000..1800f49 --- /dev/null +++ b/models/nvidia/nemoguard-jailbreak-detect.yaml @@ -0,0 +1,10 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: nvidia +authType: api_key +model: nemoguard-jailbreak-detect +params: + - path: input + type: string + label: Input + description: The text to classify for jailbreak attempts. Accepts a string or an array of strings. + group: provider_metadata diff --git a/models/nvidia/nemotron-3-nano-30b-a3b.yaml b/models/nvidia/nemotron-3-nano-30b-a3b.yaml new file mode 100644 index 0000000..aaf9bcc --- /dev/null +++ b/models/nvidia/nemotron-3-nano-30b-a3b.yaml @@ -0,0 +1,43 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: nvidia +authType: api_key +model: nemotron-3-nano-30b-a3b +params: + - path: temperature + type: number + label: Temperature + description: Controls randomness. Lower values make outputs more focused; higher values make them more varied. Not recommended to modify both temperature and top_p in the same call. + default: 1 + range: + max: 1 + group: sampling + - path: top_p + type: number + label: Top P + description: Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. Not recommended to modify both temperature and top_p in the same call. + default: 1 + range: + max: 1 + group: sampling + - path: max_tokens + type: integer + label: Max tokens + description: Maximum number of tokens to generate. Generation stops when this limit is reached. + default: 16384 + range: + min: 1 + max: 32768 + group: generation_length + - path: seed + type: integer + label: Seed + description: Best-effort deterministic sampling seed. Repeated requests with the same seed and parameters should return the same result. + range: + min: 0 + max: 18446744073709552000 + group: sampling + - path: stop + type: string + label: Stop + description: A string or list of strings where the API will stop generating further tokens. The returned text will not contain the stop sequence. + group: generation_length diff --git a/models/nvidia/nemotron-3-super-120b-a12b.yaml b/models/nvidia/nemotron-3-super-120b-a12b.yaml new file mode 100644 index 0000000..1a81de2 --- /dev/null +++ b/models/nvidia/nemotron-3-super-120b-a12b.yaml @@ -0,0 +1,62 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: nvidia +authType: api_key +model: nemotron-3-super-120b-a12b +params: + - path: temperature + type: number + label: Temperature + description: Controls randomness. Lower values make outputs more focused; higher values make them more varied. Not recommended to modify both temperature and top_p in the same call. + default: 1 + range: + max: 1 + group: sampling + - path: top_p + type: number + label: Top P + description: Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. Not recommended to modify both temperature and top_p in the same call. + default: 0.95 + range: + max: 1 + group: sampling + - path: max_tokens + type: integer + label: Max tokens + description: Maximum number of tokens to generate. Generation stops when this limit is reached. + default: 16384 + range: + min: 1 + max: 32768 + group: generation_length + - path: reasoning_effort + type: enum + label: Reasoning effort + description: Controls the reasoning mode. 'none' disables reasoning tokens, 'low' enables low-effort reasoning, and 'high' enables full reasoning. + default: high + values: + - none + - low + - high + group: reasoning + - path: reasoning_budget + type: integer + label: Reasoning budget + description: Maximum number of tokens the model may use for internal reasoning before being forced to end the reasoning trace. Use -1 to disable budget enforcement. + default: 16384 + range: + min: -1 + max: 32768 + group: reasoning + - path: seed + type: integer + label: Seed + description: Best-effort deterministic sampling seed. Repeated requests with the same seed and parameters should return the same result. + range: + min: 0 + max: 18446744073709552000 + group: sampling + - path: stop + type: string + label: Stop + description: A string or list of strings where the API will stop generating further tokens. The returned text will not contain the stop sequence. + group: generation_length diff --git a/models/nvidia/nemotron-3-ultra-550b-a55b.yaml b/models/nvidia/nemotron-3-ultra-550b-a55b.yaml new file mode 100644 index 0000000..b7c2fef --- /dev/null +++ b/models/nvidia/nemotron-3-ultra-550b-a55b.yaml @@ -0,0 +1,62 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: nvidia +authType: api_key +model: nemotron-3-ultra-550b-a55b +params: + - path: temperature + type: number + label: Temperature + description: Controls randomness. Lower values make outputs more focused; higher values make them more varied. Not recommended to modify both temperature and top_p in the same call. + default: 1 + range: + max: 1 + group: sampling + - path: top_p + type: number + label: Top P + description: Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. Not recommended to modify both temperature and top_p in the same call. + default: 0.95 + range: + max: 1 + group: sampling + - path: max_tokens + type: integer + label: Max tokens + description: Maximum number of tokens to generate. Generation stops when this limit is reached. + default: 16384 + range: + min: 1 + max: 32768 + group: generation_length + - path: reasoning_effort + type: enum + label: Reasoning effort + description: Controls the reasoning mode. 'none' disables reasoning tokens, 'medium' enables efficient reasoning, and 'high' enables full reasoning. + default: high + values: + - none + - medium + - high + group: reasoning + - path: reasoning_budget + type: integer + label: Reasoning budget + description: Maximum number of tokens the model may use for internal reasoning before being forced to end the reasoning trace. Use -1 to disable budget enforcement. + default: 16384 + range: + min: -1 + max: 32768 + group: reasoning + - path: seed + type: integer + label: Seed + description: Best-effort deterministic sampling seed. Repeated requests with the same seed and parameters should return the same result. + range: + min: 0 + max: 18446744073709552000 + group: sampling + - path: stop + type: string + label: Stop + description: A string or list of strings where the API will stop generating further tokens. The returned text will not contain the stop sequence. + group: generation_length diff --git a/models/nvidia/nemotron-content-safety-reasoning-4b.yaml b/models/nvidia/nemotron-content-safety-reasoning-4b.yaml new file mode 100644 index 0000000..7bfa8bb --- /dev/null +++ b/models/nvidia/nemotron-content-safety-reasoning-4b.yaml @@ -0,0 +1,43 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: nvidia +authType: api_key +model: nemotron-content-safety-reasoning-4b +params: + - path: temperature + type: number + label: Temperature + description: Controls randomness. Lower values make outputs more focused; higher values make them more varied. Not recommended to modify both temperature and top_p in the same call. + default: 1 + range: + max: 1 + group: sampling + - path: top_p + type: number + label: Top P + description: Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. Not recommended to modify both temperature and top_p in the same call. + default: 1 + range: + max: 1 + group: sampling + - path: max_tokens + type: integer + label: Max tokens + description: Maximum number of tokens to generate. Generation stops when this limit is reached. + default: 16384 + range: + min: 1 + max: 32768 + group: generation_length + - path: seed + type: integer + label: Seed + description: Best-effort deterministic sampling seed. Repeated requests with the same seed and parameters should return the same result. + range: + min: 0 + max: 18446744073709552000 + group: sampling + - path: stop + type: string + label: Stop + description: A string or list of strings where the API will stop generating further tokens. The returned text will not contain the stop sequence. + group: generation_length diff --git a/models/nvidia/nemotron-mini-4b-instruct.yaml b/models/nvidia/nemotron-mini-4b-instruct.yaml new file mode 100644 index 0000000..070d6bf --- /dev/null +++ b/models/nvidia/nemotron-mini-4b-instruct.yaml @@ -0,0 +1,59 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: nvidia +authType: api_key +model: nemotron-mini-4b-instruct +params: + - path: temperature + type: number + label: Temperature + description: Controls randomness. Lower values make outputs more focused; higher values make them more varied. Not recommended to modify both temperature and top_p in the same call. + default: 0.2 + range: + min: 0 + max: 1 + group: sampling + - path: top_p + type: number + label: Top P + description: Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. Not recommended to modify both temperature and top_p in the same call. + default: 0.7 + range: + max: 1 + group: sampling + - path: max_tokens + type: integer + label: Max tokens + description: Maximum number of tokens to generate. Generation stops when this limit is reached. + default: 1024 + range: + min: 1 + max: 4096 + group: generation_length + - path: frequency_penalty + type: number + label: Frequency penalty + description: Penalizes new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. + default: 0 + range: + min: -2 + max: 2 + group: sampling + - path: presence_penalty + type: number + label: Presence penalty + description: Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics. + default: 0 + range: + min: -2 + max: 2 + group: sampling + - path: stop + type: string + label: Stop + description: A string or list of strings where the API will stop generating further tokens. The returned text will not contain the stop sequence. + group: generation_length + - path: tools + type: string + label: Tools + description: A list of tools the model may call. If no tools are provided, the model will not call any tools. + group: tooling diff --git a/models/nvidia/riva-translate-4b-instruct-v1.1.yaml b/models/nvidia/riva-translate-4b-instruct-v1.1.yaml new file mode 100644 index 0000000..420c6e0 --- /dev/null +++ b/models/nvidia/riva-translate-4b-instruct-v1.1.yaml @@ -0,0 +1,54 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: nvidia +authType: api_key +model: riva-translate-4b-instruct-v1.1 +params: + - path: temperature + type: number + label: Temperature + description: Controls randomness. Lower values make outputs more focused; higher values make them more varied. Not recommended to modify both temperature and top_p in the same call. + default: 0 + range: + min: 0 + max: 1 + group: sampling + - path: top_p + type: number + label: Top P + description: Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. Not recommended to modify both temperature and top_p in the same call. + default: 0.9 + range: + max: 1 + group: sampling + - path: max_tokens + type: integer + label: Max tokens + description: Maximum number of tokens to generate. Generation stops when this limit is reached. + default: 512 + range: + min: 1 + max: 4096 + group: generation_length + - path: frequency_penalty + type: number + label: Frequency penalty + description: Penalizes new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. + default: 0 + range: + min: -2 + max: 2 + group: sampling + - path: presence_penalty + type: number + label: Presence penalty + description: Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics. + default: 0 + range: + min: -2 + max: 2 + group: sampling + - path: stop + type: string + label: Stop + description: A string or list of strings where the API will stop generating further tokens. The returned text will not contain the stop sequence. + group: generation_length diff --git a/models/nvidia/usdcode-llama-3.1-70b-instruct.yaml b/models/nvidia/usdcode-llama-3.1-70b-instruct.yaml new file mode 100644 index 0000000..6f4aa2b --- /dev/null +++ b/models/nvidia/usdcode-llama-3.1-70b-instruct.yaml @@ -0,0 +1,42 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: nvidia +authType: api_key +model: usdcode-llama-3.1-70b-instruct +params: + - path: temperature + type: number + label: Temperature + description: Controls randomness. Lower values make outputs more focused; higher values make them more varied. Not recommended to modify both temperature and top_p in the same call. + default: 0.1 + range: + min: 0 + max: 1 + group: sampling + - path: top_p + type: number + label: Top P + description: Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. Not recommended to modify both temperature and top_p in the same call. + default: 1 + range: + max: 1 + group: sampling + - path: max_tokens + type: integer + label: Max tokens + description: Maximum number of tokens to generate. Generation stops when this limit is reached. + default: 1024 + range: + min: 1 + max: 2048 + group: generation_length + - path: expert_type + type: enum + label: Expert type + description: The type of expert to use. 'knowledge' answers with USD knowledge, 'code' responds with vanilla OpenUSD code, 'helperfunction' uses high-level helper functions, and 'auto' lets the LLM determine which expert to use. + default: auto + values: + - auto + - code + - knowledge + - helperfunction + group: provider_metadata