@@ -7151,18 +7151,16 @@ paths:
71517151 in : query
71527152 required : false
71537153 schema :
7154- description : Maximum number of sessions to return (1-100), defaults to 20
7154+ description : Maximum number of sessions to return (1-100)
71557155 type : integer
71567156 format : int32
71577157 default : " 20"
7158- - name : offset
7158+ - name : after
71597159 in : query
71607160 required : false
71617161 schema :
7162- description : Number of sessions to skip
7163- type : integer
7164- format : int32
7165- default : " 0"
7162+ description : Cursor for pagination (ID of the last session from the previous page)
7163+ type : string
71667164 post :
71677165 summary : Create training session
71687166 description : Creates a training session and returns its details.
@@ -7187,7 +7185,6 @@ paths:
71877185 application/json :
71887186 schema :
71897187 $ref : ' #/components/schemas/RpcStatus'
7190-
71917188 /rl/training-sessions/{session_id} :
71927189 get :
71937190 summary : Get training session
@@ -7214,6 +7211,32 @@ paths:
72147211 schema :
72157212 description : ID of the training session
72167213 type : string
7214+ /rl/training-sessions/{session_id}/stop :
7215+ post :
7216+ summary : Stop training session
7217+ description : Stops a training session.
7218+ operationId : stopTrainingSession
7219+ tags : [RL]
7220+ responses :
7221+ " 200 " :
7222+ description : Training session details
7223+ content :
7224+ application/json :
7225+ schema :
7226+ $ref : ' #/components/schemas/RL.TrainingSession'
7227+ default :
7228+ description : An unexpected error response.
7229+ content :
7230+ application/json :
7231+ schema :
7232+ $ref : ' #/components/schemas/RpcStatus'
7233+ parameters :
7234+ - name : session_id
7235+ in : path
7236+ required : true
7237+ schema :
7238+ description : ID of the training session
7239+ type : string
72177240 /rl/training-sessions/{session_id}/operations/forward-backward/{operation_id} :
72187241 get :
72197242 summary : Get forward-backward operation
@@ -7406,19 +7429,19 @@ paths:
74067429 schema :
74077430 description : Training session ID
74087431 type : string
7409- /rl/training-sessions/{session_id}/stop :
7432+ /rl/training-sessions/{session_id}/operations/inference-checkpoint :
74107433 post :
7411- summary : Stop training session
7412- description : Stops a training session .
7413- operationId : stopTrainingSession
7434+ summary : Create inference checkpoint
7435+ description : Submits an operation that will asynchronously save the current LoRA adapter as an inference checkpoint and upload it to object storage .
7436+ operationId : createInferenceCheckpoint
74147437 tags : [RL]
74157438 responses :
74167439 " 200 " :
7417- description : Training session details
7440+ description : Inference checkpoint operation details
74187441 content :
74197442 application/json :
74207443 schema :
7421- $ref : ' #/components/schemas/RL.TrainingSession '
7444+ $ref : ' #/components/schemas/RL.InferenceCheckpointOperation '
74227445 default :
74237446 description : An unexpected error response.
74247447 content :
@@ -7430,8 +7453,72 @@ paths:
74307453 in : path
74317454 required : true
74327455 schema :
7433- description : ID of the training session
7456+ description : Training session ID
74347457 type : string
7458+ /rl/training-sessions/{session_id}/operations/inference-checkpoint/{operation_id} :
7459+ get :
7460+ summary : Get inference checkpoint operation
7461+ description : Retrieves the current status and result of an inference checkpoint operation.
7462+ operationId : getInferenceCheckpointOperation
7463+ tags : [RL]
7464+ responses :
7465+ " 200 " :
7466+ description : Inference checkpoint operation details
7467+ content :
7468+ application/json :
7469+ schema :
7470+ $ref : ' #/components/schemas/RL.InferenceCheckpointOperation'
7471+ default :
7472+ description : An unexpected error response.
7473+ content :
7474+ application/json :
7475+ schema :
7476+ $ref : ' #/components/schemas/RpcStatus'
7477+ parameters :
7478+ - name : session_id
7479+ in : path
7480+ required : true
7481+ schema :
7482+ description : Training session ID
7483+ type : string
7484+ - name : operation_id
7485+ in : path
7486+ required : true
7487+ schema :
7488+ description : Operation ID
7489+ type : string
7490+ /rl/checkpoints/{id}/download :
7491+ get :
7492+ summary : Download checkpoint
7493+ description : Returns presigned URLs for downloading a checkpoint's model files. Only inference checkpoints support downloading.
7494+ operationId : downloadCheckpoint
7495+ tags : [RL]
7496+ responses :
7497+ " 200 " :
7498+ description : Checkpoint download URLs
7499+ content :
7500+ application/json :
7501+ schema :
7502+ $ref : ' #/components/schemas/RL.CheckpointDownloadResponse'
7503+ default :
7504+ description : An unexpected error response.
7505+ content :
7506+ application/json :
7507+ schema :
7508+ $ref : ' #/components/schemas/RpcStatus'
7509+ parameters :
7510+ - name : id
7511+ in : path
7512+ required : true
7513+ schema :
7514+ description : ID of the checkpoint
7515+ type : string
7516+ - name : variant
7517+ in : query
7518+ required : true
7519+ schema :
7520+ description : " Checkpoint variant to download: merged (full model) or adapter (LoRA weights only)"
7521+ $ref : ' #/components/schemas/RL.CheckpointVariant'
74357522
74367523components :
74377524 securitySchemes :
@@ -7544,21 +7631,15 @@ components:
75447631 RL.ListMeta :
75457632 type : object
75467633 properties :
7547- total :
7548- type : string
7549- format : int64
7550- example : 42
7551- description : Total number of items matching the filter
75527634 limit :
75537635 type : integer
75547636 format : int32
75557637 example : 20
75567638 description : Maximum number of items returned per page
7557- offset :
7558- type : integer
7559- format : int32
7560- example : 0
7561- description : Number of items skipped
7639+ has_more :
7640+ type : boolean
7641+ example : true
7642+ description : Whether more items exist beyond this page
75627643 RL.EncodedText :
75637644 type : object
75647645 properties :
@@ -7622,7 +7703,7 @@ components:
76227703 enum :
76237704 - GRPO_LOSS_AGGREGATION_TYPE_UNSPECIFIED
76247705 - GRPO_LOSS_AGGREGATION_TYPE_FIXED_HORIZON
7625- - GRPO_LOSS_AGGREGATION_TYPE_PER_TOKEN
7706+ - GRPO_LOSS_AGGREGATION_TYPE_TOKEN_MEAN
76267707 default : GRPO_LOSS_AGGREGATION_TYPE_UNSPECIFIED
76277708 RL.LossConfig :
76287709 type : object
@@ -7870,6 +7951,7 @@ components:
78707951 - TRAINING_OPERATION_ERROR_CODE_TIMEOUT
78717952 - TRAINING_OPERATION_ERROR_CODE_INTERNAL_ERROR
78727953 - TRAINING_OPERATION_ERROR_CODE_SESSION_NOT_ACTIVE
7954+ - TRAINING_OPERATION_ERROR_CODE_INVALID_INPUT
78737955 default : TRAINING_OPERATION_ERROR_CODE_UNSPECIFIED
78747956 RL.TrainingOperationStatus :
78757957 type : string
@@ -7910,30 +7992,39 @@ components:
79107992 RL.TrainingSession :
79117993 type : object
79127994 properties :
7913- session_id :
7995+ id :
79147996 type : string
7997+ example : 123e4567-e89b-12d3-a456-426614174000
79157998 description : ID of the training session
79167999 status :
79178000 $ref : ' #/components/schemas/RL.TrainingSessionStatus'
8001+ example : TRAINING_SESSION_STATUS_RUNNING
8002+ description : Status of the training session
79188003 base_model :
79198004 type : string
79208005 example : meta-llama/Meta-Llama-3-8B-Instruct
79218006 description : Base model used for the training session
7922- checkpoint_id :
7923- description : Checkpoint ID to use for the training session
7924- type : string
8007+ inference_checkpoints :
8008+ type : array
8009+ items :
8010+ type : object
8011+ $ref : ' #/components/schemas/RL.InferenceCheckpoint'
8012+ description : List of saved inference checkpoints for this session
79258013 step :
79268014 description : Current training step
79278015 type : string
79288016 format : uint64
8017+ example : 100
79298018 default : " 0"
79308019 created_at :
79318020 type : string
79328021 format : date-time
8022+ example : " 2026-01-02T00:00:00Z"
79338023 description : Timestamp when the training session was created
79348024 updated_at :
79358025 type : string
79368026 format : date-time
8027+ example : " 2026-01-02T00:00:05Z"
79378028 description : Timestamp when the training session was last updated
79388029 lora_config :
79398030 $ref : ' #/components/schemas/RL.LoraConfig'
@@ -7999,6 +8090,96 @@ components:
79998090 format : float
80008091 example : 0.1
80018092 default : " 0.1"
8093+ RL.InferenceCheckpointOperation :
8094+ type : object
8095+ properties :
8096+ operation_id :
8097+ type : string
8098+ example : 550e8400-e29b-41d4-a716-446655440000
8099+ description : Operation ID
8100+ status :
8101+ $ref : ' #/components/schemas/RL.TrainingOperationStatus'
8102+ example : TRAINING_OPERATION_STATUS_PENDING
8103+ description : Operation status
8104+ output :
8105+ $ref : ' #/components/schemas/RL.InferenceCheckpointResult'
8106+ error :
8107+ $ref : ' #/components/schemas/RL.TrainingOperationError'
8108+ RL.InferenceCheckpointResult :
8109+ type : object
8110+ properties :
8111+ model_name :
8112+ type : string
8113+ example : username/Meta-Llama-3-8B-rl-step-42-20260216
8114+ description : Registered model name for downloading the checkpoint
8115+ RL.InferenceCheckpoint :
8116+ type : object
8117+ description : Saved inference checkpoint
8118+ properties :
8119+ id :
8120+ type : string
8121+ example : 123e4567-e89b-12d3-a456-426614174000
8122+ description : Unique identifier for the checkpoint
8123+ step :
8124+ type : string
8125+ format : uint64
8126+ example : 42
8127+ description : Training step at time of save
8128+ created_at :
8129+ type : string
8130+ format : date-time
8131+ example : " 2026-01-02T00:00:00Z"
8132+ description : Timestamp when the checkpoint was created
8133+ registration :
8134+ $ref : ' #/components/schemas/RL.InferenceCheckpointRegistration'
8135+ description : Model registration details
8136+ RL.InferenceCheckpointRegistration :
8137+ type : object
8138+ description : Model registration details for an inference checkpoint
8139+ properties :
8140+ model_name :
8141+ type : string
8142+ example : username/Meta-Llama-3-8B-rl-step-42-20260216
8143+ description : Registered model name for downloading the checkpoint
8144+ registered_at :
8145+ type : string
8146+ format : date-time
8147+ example : " 2026-01-02T00:00:00Z"
8148+ description : Timestamp when the model was registered
8149+ RL.CheckpointVariant :
8150+ type : string
8151+ enum :
8152+ - CHECKPOINT_VARIANT_UNSPECIFIED
8153+ - CHECKPOINT_VARIANT_MERGED
8154+ - CHECKPOINT_VARIANT_ADAPTER
8155+ default : CHECKPOINT_VARIANT_UNSPECIFIED
8156+ description : " Checkpoint variant: merged (full model) or adapter (LoRA weights only)"
8157+ RL.CheckpointFile :
8158+ type : object
8159+ description : A downloadable file within a checkpoint
8160+ properties :
8161+ filename :
8162+ type : string
8163+ example : model-00001-of-00002.safetensors
8164+ description : Name of the file
8165+ url :
8166+ type : string
8167+ example : " https://..."
8168+ description : Presigned URL for downloading the file
8169+ size :
8170+ type : string
8171+ format : int64
8172+ example : 123456789
8173+ description : File size in bytes
8174+ RL.CheckpointDownloadResponse :
8175+ type : object
8176+ properties :
8177+ data :
8178+ type : array
8179+ items :
8180+ type : object
8181+ $ref : ' #/components/schemas/RL.CheckpointFile'
8182+ description : List of files with presigned download URLs
80028183 ErrorResponse :
80038184 type : object
80048185 properties :
0 commit comments