We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent ed5f65a commit 99d0921Copy full SHA for 99d0921
1 file changed
llm/server/server/engine/infer.py
@@ -500,8 +500,8 @@ def run(self):
500
if self.proposer is not None:
501
self.proposer.run(
502
self.share_inputs,
503
- real_batch_size=self.args.max_batch_size,
504
- seq_lens_this_time=self.share_inputs["seq_lens_this_time"],
+ real_batch_size=seq_lens_this_time.shape[0],
+ seq_lens_this_time=seq_lens_this_time,
505
)
506
507
self.infer_engine.predictor.run()
0 commit comments