Skip to content

Commit bcf6da7

Browse files
committed
feat: Add new API fields for gpu clusters
1 parent a11453c commit bcf6da7

1 file changed

Lines changed: 85 additions & 11 deletions

File tree

openapi.yaml

Lines changed: 85 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -8454,7 +8454,8 @@ components:
84548454
- gpu_type
84558455
- num_gpus
84568456
- cluster_name
8457-
- driver_version
8457+
- cuda_version
8458+
- nvidia_driver_version
84588459
- billing_type
84598460
type: object
84608461
properties:
@@ -8485,14 +8486,6 @@ components:
84858486
x-stainless-terraform-configurability: computed
84868487
description: Duration in days to keep the cluster running.
84878488
type: integer
8488-
driver_version:
8489-
description: NVIDIA driver version to use in the cluster.
8490-
type: string
8491-
enum:
8492-
- CUDA_12_5_555
8493-
- CUDA_12_6_560
8494-
- CUDA_12_6_565
8495-
- CUDA_12_8_570
84968489
shared_volume:
84978490
x-stainless-terraform-configurability: computed
84988491
$ref: '#/components/schemas/GPUClustersSharedVolumeCreateRequest'
@@ -8509,6 +8502,69 @@ components:
85098502
enum:
85108503
- RESERVED
85118504
- ON_DEMAND
8505+
- SCHEDULED_CAPACITY
8506+
gpu_node_failover_enabled:
8507+
type: boolean
8508+
default: false
8509+
description: Whether automated GPU node failover should be enabled for this cluster. By default, it is disabled.
8510+
auto_scaled:
8511+
type: boolean
8512+
default: false
8513+
description: Whether GPU cluster should be auto-scaled based on the workload. By default, it is not auto-scaled.
8514+
auto_scale_max_gpus:
8515+
type: integer
8516+
description: Maximum number of GPUs to which the cluster can be auto-scaled up. This field is required if auto_scaled is true.
8517+
format: uint32
8518+
slurm_shm_size_gib:
8519+
type: integer
8520+
description: Shared memory size in GiB for Slurm cluster. This field is required if cluster_type is SLURM.
8521+
capacity_pool_id:
8522+
type: string
8523+
description: ID of the capacity pool to use for the cluster. This field is optional and only applicable if the cluster is created from a capacity pool.
8524+
provision_at_ts:
8525+
type: string
8526+
description: Provision timestamp of the cluster. This field is required for SCHEDULED_CAPACITY billing to specify the provision time for the cluster. If not provided, the cluster will be provisioned immediately.
8527+
format: date-time
8528+
decommission_at_ts:
8529+
type: string
8530+
description: Decommission timestamp of the cluster. This field is required for SCHEDULED_CAPACITY billing to specify the decommission time for the cluster.
8531+
format: date-time
8532+
oidc_config:
8533+
$ref: '#/components/schemas/GPUClusterOIDCConfig'
8534+
install_traefik:
8535+
type: boolean
8536+
default: false
8537+
description: Whether to install Traefik ingress controller in the cluster. This field is only applicable for Kubernetes clusters and is false by default.
8538+
cuda_version:
8539+
type: string
8540+
description: CUDA version for this cluster. For example, 12.5
8541+
nvidia_driver_version:
8542+
type: string
8543+
description: Nvidia driver version for this cluster. For example, 550. Only some combination of cuda_version and nvidia_driver_version are supported.
8544+
GPUClusterOIDCConfig:
8545+
type: object
8546+
properties:
8547+
issuer_url:
8548+
type: string
8549+
description: OIDC issuer URL for authentication. For example, https://accounts.google.com
8550+
client_id:
8551+
type: string
8552+
description: OIDC client ID for authentication.
8553+
username_claim:
8554+
type: string
8555+
description: JWT claim to use as the username. For example, 'sub' or 'email'
8556+
username_prefix:
8557+
type: string
8558+
description: Prefix to add to the username claim to form the final username. For example, 'oidc:'
8559+
group_claim:
8560+
type: string
8561+
description: JWT claim to use for user groups. For example, 'groups'
8562+
group_prefix:
8563+
type: string
8564+
description: Prefix to add to the group claim to form the final group name. For example, 'oidc:'
8565+
ca_cert:
8566+
type: string
8567+
description: CA certificate in PEM format to validate the OIDC issuer's TLS certificate. This field is optional but recommended if the issuer uses a private CA or self-signed certificate.
85128568
GPUClusterGPUWorkerNode:
85138569
type: object
85148570
required:
@@ -8547,8 +8603,8 @@ components:
85478603
- region
85488604
- gpu_type
85498605
- cluster_name
8550-
- duration_hours
8551-
- driver_version
8606+
- cuda_version
8607+
- nvidia_driver_version
85528608
- volumes
85538609
- status
85548610
- control_plane_nodes
@@ -8611,6 +8667,24 @@ components:
86118667
type: string
86128668
num_gpus:
86138669
type: integer
8670+
slurm_shm_size_gib:
8671+
type: integer
8672+
capacity_pool_id:
8673+
type: string
8674+
provision_at_ts:
8675+
type: string
8676+
format: date-time
8677+
decommission_at_ts:
8678+
type: string
8679+
format: date-time
8680+
oidc_config:
8681+
$ref: '#/components/schemas/GPUClusterOIDCConfig'
8682+
install_traefik:
8683+
type: boolean
8684+
cuda_version:
8685+
type: string
8686+
nvidia_driver_version:
8687+
type: string
86148688
GPUClusterUpdateRequest:
86158689
type: object
86168690
properties:

0 commit comments

Comments
 (0)