File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -204,10 +204,6 @@ def __init__(
204204 # we store the engine args because we have to reset the engine with a different event loop. See _model .
205205 self .engine_args = engine_args
206206
207- self ._tokenizer : PreTrainedTokenizerBase = AutoTokenizer .from_pretrained (
208- self ._hf_model_id
209- ) # type:ignore
210-
211207 @property
212208 def _model (self ) -> vllm .AsyncLLMEngine :
213209 """Use model when making generation requests."""
@@ -231,6 +227,7 @@ def _model(self) -> vllm.AsyncLLMEngine:
231227 self ._underlying_model = vllm .AsyncLLMEngine .from_engine_args (
232228 vllm .AsyncEngineArgs (model = self ._hf_model_id , ** self .engine_args )
233229 )
230+ self ._tokenizer = self ._underlying_model .get_tokenizer ()
234231 self ._event_loop = el
235232
236233 return self ._underlying_model
@@ -299,7 +296,7 @@ async def _generate_from_context_standard(
299296 FancyLogger .get_logger ().info (f"Tools for call: { tools .keys ()} " )
300297
301298 input_str : str = self ._tokenizer .apply_chat_template ( # type: ignore
302- ctx_as_chat ,
299+ ctx_as_chat , # type: ignore
303300 tokenize = False ,
304301 tools = convert_tools_to_json (tools ), # type: ignore
305302 )
You can’t perform that action at this time.
0 commit comments