Skip to content

Commit 05f0a91

Browse files
authored
fix: avoid instantiating an additional tokenizer (#548)
1 parent 6f3e131 commit 05f0a91

1 file changed

Lines changed: 2 additions & 5 deletions

File tree

mellea/backends/vllm.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -204,10 +204,6 @@ def __init__(
204204
# we store the engine args because we have to reset the engine with a different event loop. See _model .
205205
self.engine_args = engine_args
206206

207-
self._tokenizer: PreTrainedTokenizerBase = AutoTokenizer.from_pretrained(
208-
self._hf_model_id
209-
) # type:ignore
210-
211207
@property
212208
def _model(self) -> vllm.AsyncLLMEngine:
213209
"""Use model when making generation requests."""
@@ -231,6 +227,7 @@ def _model(self) -> vllm.AsyncLLMEngine:
231227
self._underlying_model = vllm.AsyncLLMEngine.from_engine_args(
232228
vllm.AsyncEngineArgs(model=self._hf_model_id, **self.engine_args)
233229
)
230+
self._tokenizer = self._underlying_model.get_tokenizer()
234231
self._event_loop = el
235232

236233
return self._underlying_model
@@ -299,7 +296,7 @@ async def _generate_from_context_standard(
299296
FancyLogger.get_logger().info(f"Tools for call: {tools.keys()}")
300297

301298
input_str: str = self._tokenizer.apply_chat_template( # type: ignore
302-
ctx_as_chat,
299+
ctx_as_chat, # type: ignore
303300
tokenize=False,
304301
tools=convert_tools_to_json(tools), # type: ignore
305302
)

0 commit comments

Comments
 (0)