We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 8266ed7 commit c7e1d58Copy full SHA for c7e1d58
1 file changed
llm/server/server/triton_server.py
@@ -103,7 +103,7 @@ def _cache_special_tokens(self, batch_result):
103
is_end = batch_result[i].get("is_end", 0)
104
token_ids = batch_result[i]["token_ids"]
105
return_all_tokens = batch_result[i].get("return_all_tokens", False)
106
- cache_special_token = False if is_end == 1 else (13 <= token_ids[0] <= 268)
+ cache_special_token = False if is_end == 1 else True
107
if is_end != 1 and (cache_special_token or return_all_tokens or self.cfg.disable_streaming):
108
if batch_result[i]["req_id"] not in self.token_buffer:
109
self.token_buffer[batch_result[i]["req_id"]] = list()
0 commit comments