File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -7789,6 +7789,7 @@ components:
77897789 - GRPO_LOSS_AGGREGATION_TYPE_UNSPECIFIED
77907790 - GRPO_LOSS_AGGREGATION_TYPE_FIXED_HORIZON
77917791 - GRPO_LOSS_AGGREGATION_TYPE_TOKEN_MEAN
7792+ - GRPO_LOSS_AGGREGATION_TYPE_SEQUENCE_MEAN
77927793 default : GRPO_LOSS_AGGREGATION_TYPE_UNSPECIFIED
77937794 RL.LossConfig :
77947795 type : object
@@ -7900,12 +7901,14 @@ components:
79007901 properties :
79017902 target_tokens :
79027903 $ref : ' #/components/schemas/RL.LossTargetTokens'
7903- description : Target tokens for loss computation (optional, defaults to shifted input_ids)
7904+ description : Target tokens for loss computation
79047905 loss_mask :
79057906 $ref : ' #/components/schemas/RL.LossMask'
79067907 description : Per-token loss mask (1=compute loss, 0=ignore)
79077908 grpo_inputs :
79087909 $ref : ' #/components/schemas/RL.GRPOLossInputs'
7910+ required :
7911+ - target_tokens
79097912 RL.TrainingSample :
79107913 type : object
79117914 required :
You can’t perform that action at this time.
0 commit comments