Skip to content

Commit 58ae45f

Browse files
committed
feat(rl): update spec
1 parent 394df7b commit 58ae45f

1 file changed

Lines changed: 210 additions & 29 deletions

File tree

openapi.yaml

Lines changed: 210 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -7151,18 +7151,16 @@ paths:
71517151
in: query
71527152
required: false
71537153
schema:
7154-
description: Maximum number of sessions to return (1-100), defaults to 20
7154+
description: Maximum number of sessions to return (1-100)
71557155
type: integer
71567156
format: int32
71577157
default: "20"
7158-
- name: offset
7158+
- name: after
71597159
in: query
71607160
required: false
71617161
schema:
7162-
description: Number of sessions to skip
7163-
type: integer
7164-
format: int32
7165-
default: "0"
7162+
description: Cursor for pagination (ID of the last session from the previous page)
7163+
type: string
71667164
post:
71677165
summary: Create training session
71687166
description: Creates a training session and returns its details.
@@ -7187,7 +7185,6 @@ paths:
71877185
application/json:
71887186
schema:
71897187
$ref: '#/components/schemas/RpcStatus'
7190-
71917188
/rl/training-sessions/{session_id}:
71927189
get:
71937190
summary: Get training session
@@ -7214,6 +7211,32 @@ paths:
72147211
schema:
72157212
description: ID of the training session
72167213
type: string
7214+
/rl/training-sessions/{session_id}/stop:
7215+
post:
7216+
summary: Stop training session
7217+
description: Stops a training session.
7218+
operationId: stopTrainingSession
7219+
tags: [RL]
7220+
responses:
7221+
"200":
7222+
description: Training session details
7223+
content:
7224+
application/json:
7225+
schema:
7226+
$ref: '#/components/schemas/RL.TrainingSession'
7227+
default:
7228+
description: An unexpected error response.
7229+
content:
7230+
application/json:
7231+
schema:
7232+
$ref: '#/components/schemas/RpcStatus'
7233+
parameters:
7234+
- name: session_id
7235+
in: path
7236+
required: true
7237+
schema:
7238+
description: ID of the training session
7239+
type: string
72177240
/rl/training-sessions/{session_id}/operations/forward-backward/{operation_id}:
72187241
get:
72197242
summary: Get forward-backward operation
@@ -7406,19 +7429,19 @@ paths:
74067429
schema:
74077430
description: Training session ID
74087431
type: string
7409-
/rl/training-sessions/{session_id}/stop:
7432+
/rl/training-sessions/{session_id}/operations/inference-checkpoint:
74107433
post:
7411-
summary: Stop training session
7412-
description: Stops a training session.
7413-
operationId: stopTrainingSession
7434+
summary: Create inference checkpoint
7435+
description: Submits an operation that will asynchronously save the current LoRA adapter as an inference checkpoint and upload it to object storage.
7436+
operationId: createInferenceCheckpoint
74147437
tags: [RL]
74157438
responses:
74167439
"200":
7417-
description: Training session details
7440+
description: Inference checkpoint operation details
74187441
content:
74197442
application/json:
74207443
schema:
7421-
$ref: '#/components/schemas/RL.TrainingSession'
7444+
$ref: '#/components/schemas/RL.InferenceCheckpointOperation'
74227445
default:
74237446
description: An unexpected error response.
74247447
content:
@@ -7430,8 +7453,72 @@ paths:
74307453
in: path
74317454
required: true
74327455
schema:
7433-
description: ID of the training session
7456+
description: Training session ID
74347457
type: string
7458+
/rl/training-sessions/{session_id}/operations/inference-checkpoint/{operation_id}:
7459+
get:
7460+
summary: Get inference checkpoint operation
7461+
description: Retrieves the current status and result of an inference checkpoint operation.
7462+
operationId: getInferenceCheckpointOperation
7463+
tags: [RL]
7464+
responses:
7465+
"200":
7466+
description: Inference checkpoint operation details
7467+
content:
7468+
application/json:
7469+
schema:
7470+
$ref: '#/components/schemas/RL.InferenceCheckpointOperation'
7471+
default:
7472+
description: An unexpected error response.
7473+
content:
7474+
application/json:
7475+
schema:
7476+
$ref: '#/components/schemas/RpcStatus'
7477+
parameters:
7478+
- name: session_id
7479+
in: path
7480+
required: true
7481+
schema:
7482+
description: Training session ID
7483+
type: string
7484+
- name: operation_id
7485+
in: path
7486+
required: true
7487+
schema:
7488+
description: Operation ID
7489+
type: string
7490+
/rl/checkpoints/{id}/download:
7491+
get:
7492+
summary: Download checkpoint
7493+
description: Returns presigned URLs for downloading a checkpoint's model files. Only inference checkpoints support downloading.
7494+
operationId: downloadCheckpoint
7495+
tags: [RL]
7496+
responses:
7497+
"200":
7498+
description: Checkpoint download URLs
7499+
content:
7500+
application/json:
7501+
schema:
7502+
$ref: '#/components/schemas/RL.CheckpointDownloadResponse'
7503+
default:
7504+
description: An unexpected error response.
7505+
content:
7506+
application/json:
7507+
schema:
7508+
$ref: '#/components/schemas/RpcStatus'
7509+
parameters:
7510+
- name: id
7511+
in: path
7512+
required: true
7513+
schema:
7514+
description: ID of the checkpoint
7515+
type: string
7516+
- name: variant
7517+
in: query
7518+
required: true
7519+
schema:
7520+
description: "Checkpoint variant to download: merged (full model) or adapter (LoRA weights only)"
7521+
$ref: '#/components/schemas/RL.CheckpointVariant'
74357522

74367523
components:
74377524
securitySchemes:
@@ -7544,21 +7631,15 @@ components:
75447631
RL.ListMeta:
75457632
type: object
75467633
properties:
7547-
total:
7548-
type: string
7549-
format: int64
7550-
example: 42
7551-
description: Total number of items matching the filter
75527634
limit:
75537635
type: integer
75547636
format: int32
75557637
example: 20
75567638
description: Maximum number of items returned per page
7557-
offset:
7558-
type: integer
7559-
format: int32
7560-
example: 0
7561-
description: Number of items skipped
7639+
has_more:
7640+
type: boolean
7641+
example: true
7642+
description: Whether more items exist beyond this page
75627643
RL.EncodedText:
75637644
type: object
75647645
properties:
@@ -7622,7 +7703,7 @@ components:
76227703
enum:
76237704
- GRPO_LOSS_AGGREGATION_TYPE_UNSPECIFIED
76247705
- GRPO_LOSS_AGGREGATION_TYPE_FIXED_HORIZON
7625-
- GRPO_LOSS_AGGREGATION_TYPE_PER_TOKEN
7706+
- GRPO_LOSS_AGGREGATION_TYPE_TOKEN_MEAN
76267707
default: GRPO_LOSS_AGGREGATION_TYPE_UNSPECIFIED
76277708
RL.LossConfig:
76287709
type: object
@@ -7870,6 +7951,7 @@ components:
78707951
- TRAINING_OPERATION_ERROR_CODE_TIMEOUT
78717952
- TRAINING_OPERATION_ERROR_CODE_INTERNAL_ERROR
78727953
- TRAINING_OPERATION_ERROR_CODE_SESSION_NOT_ACTIVE
7954+
- TRAINING_OPERATION_ERROR_CODE_INVALID_INPUT
78737955
default: TRAINING_OPERATION_ERROR_CODE_UNSPECIFIED
78747956
RL.TrainingOperationStatus:
78757957
type: string
@@ -7910,30 +7992,39 @@ components:
79107992
RL.TrainingSession:
79117993
type: object
79127994
properties:
7913-
session_id:
7995+
id:
79147996
type: string
7997+
example: 123e4567-e89b-12d3-a456-426614174000
79157998
description: ID of the training session
79167999
status:
79178000
$ref: '#/components/schemas/RL.TrainingSessionStatus'
8001+
example: TRAINING_SESSION_STATUS_RUNNING
8002+
description: Status of the training session
79188003
base_model:
79198004
type: string
79208005
example: meta-llama/Meta-Llama-3-8B-Instruct
79218006
description: Base model used for the training session
7922-
checkpoint_id:
7923-
description: Checkpoint ID to use for the training session
7924-
type: string
8007+
inference_checkpoints:
8008+
type: array
8009+
items:
8010+
type: object
8011+
$ref: '#/components/schemas/RL.InferenceCheckpoint'
8012+
description: List of saved inference checkpoints for this session
79258013
step:
79268014
description: Current training step
79278015
type: string
79288016
format: uint64
8017+
example: 100
79298018
default: "0"
79308019
created_at:
79318020
type: string
79328021
format: date-time
8022+
example: "2026-01-02T00:00:00Z"
79338023
description: Timestamp when the training session was created
79348024
updated_at:
79358025
type: string
79368026
format: date-time
8027+
example: "2026-01-02T00:00:05Z"
79378028
description: Timestamp when the training session was last updated
79388029
lora_config:
79398030
$ref: '#/components/schemas/RL.LoraConfig'
@@ -7999,6 +8090,96 @@ components:
79998090
format: float
80008091
example: 0.1
80018092
default: "0.1"
8093+
RL.InferenceCheckpointOperation:
8094+
type: object
8095+
properties:
8096+
operation_id:
8097+
type: string
8098+
example: 550e8400-e29b-41d4-a716-446655440000
8099+
description: Operation ID
8100+
status:
8101+
$ref: '#/components/schemas/RL.TrainingOperationStatus'
8102+
example: TRAINING_OPERATION_STATUS_PENDING
8103+
description: Operation status
8104+
output:
8105+
$ref: '#/components/schemas/RL.InferenceCheckpointResult'
8106+
error:
8107+
$ref: '#/components/schemas/RL.TrainingOperationError'
8108+
RL.InferenceCheckpointResult:
8109+
type: object
8110+
properties:
8111+
model_name:
8112+
type: string
8113+
example: username/Meta-Llama-3-8B-rl-step-42-20260216
8114+
description: Registered model name for downloading the checkpoint
8115+
RL.InferenceCheckpoint:
8116+
type: object
8117+
description: Saved inference checkpoint
8118+
properties:
8119+
id:
8120+
type: string
8121+
example: 123e4567-e89b-12d3-a456-426614174000
8122+
description: Unique identifier for the checkpoint
8123+
step:
8124+
type: string
8125+
format: uint64
8126+
example: 42
8127+
description: Training step at time of save
8128+
created_at:
8129+
type: string
8130+
format: date-time
8131+
example: "2026-01-02T00:00:00Z"
8132+
description: Timestamp when the checkpoint was created
8133+
registration:
8134+
$ref: '#/components/schemas/RL.InferenceCheckpointRegistration'
8135+
description: Model registration details
8136+
RL.InferenceCheckpointRegistration:
8137+
type: object
8138+
description: Model registration details for an inference checkpoint
8139+
properties:
8140+
model_name:
8141+
type: string
8142+
example: username/Meta-Llama-3-8B-rl-step-42-20260216
8143+
description: Registered model name for downloading the checkpoint
8144+
registered_at:
8145+
type: string
8146+
format: date-time
8147+
example: "2026-01-02T00:00:00Z"
8148+
description: Timestamp when the model was registered
8149+
RL.CheckpointVariant:
8150+
type: string
8151+
enum:
8152+
- CHECKPOINT_VARIANT_UNSPECIFIED
8153+
- CHECKPOINT_VARIANT_MERGED
8154+
- CHECKPOINT_VARIANT_ADAPTER
8155+
default: CHECKPOINT_VARIANT_UNSPECIFIED
8156+
description: "Checkpoint variant: merged (full model) or adapter (LoRA weights only)"
8157+
RL.CheckpointFile:
8158+
type: object
8159+
description: A downloadable file within a checkpoint
8160+
properties:
8161+
filename:
8162+
type: string
8163+
example: model-00001-of-00002.safetensors
8164+
description: Name of the file
8165+
url:
8166+
type: string
8167+
example: "https://..."
8168+
description: Presigned URL for downloading the file
8169+
size:
8170+
type: string
8171+
format: int64
8172+
example: 123456789
8173+
description: File size in bytes
8174+
RL.CheckpointDownloadResponse:
8175+
type: object
8176+
properties:
8177+
data:
8178+
type: array
8179+
items:
8180+
type: object
8181+
$ref: '#/components/schemas/RL.CheckpointFile'
8182+
description: List of files with presigned download URLs
80028183
ErrorResponse:
80038184
type: object
80048185
properties:

0 commit comments

Comments
 (0)