Skip to content

Commit c249b98

Browse files
committed
fix bug
1 parent c7e1d58 commit c249b98

2 files changed

Lines changed: 2 additions & 3 deletions

File tree

llm/server/server/http_server/api.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ class Req(BaseModel):
3131
req_id: str = Field(default_factory=lambda: str(uuid.uuid4()))
3232
input_ids: Optional[List[int]] = None
3333
text: Optional[str] = None
34+
stop_sequences: Optional[List] = None
3435
messages: Optional[List] = None
3536
max_dec_len: Optional[int] = None
3637
seq_len: Optional[int] = None

llm/server/server/triton_server.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -102,9 +102,7 @@ def _cache_special_tokens(self, batch_result):
102102
for i in range(len(batch_result)):
103103
is_end = batch_result[i].get("is_end", 0)
104104
token_ids = batch_result[i]["token_ids"]
105-
return_all_tokens = batch_result[i].get("return_all_tokens", False)
106-
cache_special_token = False if is_end == 1 else True
107-
if is_end != 1 and (cache_special_token or return_all_tokens or self.cfg.disable_streaming):
105+
if is_end != 1:
108106
if batch_result[i]["req_id"] not in self.token_buffer:
109107
self.token_buffer[batch_result[i]["req_id"]] = list()
110108
self.score_buffer[batch_result[i]["req_id"]] = list()

0 commit comments

Comments
 (0)