We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent e3bc5aa commit ce3c09dCopy full SHA for ce3c09d
2 files changed
llm/server/server/engine/infer.py
@@ -29,7 +29,7 @@
29
from paddlenlp_ops import step_paddle, speculate_step_paddle
30
from server.data.processor import DataProcessor
31
from server.engine.config import Config
32
-from server.engine.proposers import InferenceWithReferenceProposer
+from paddlenlp.experimental.transformers import InferenceWithReferenceProposer
33
from server.utils import get_logger
34
from task_queue_manager import TaskQueueManager
35
@@ -518,6 +518,7 @@ def run(self):
518
self.share_inputs['infer_seed'].add_(infer_seed_increment)
519
self.share_inputs['infer_seed'][:] %= self.MAX_INFER_SEED
520
if self.free_list_len > 0:
521
+ logger.info('You got into step CUDA!!!')
522
self.step_cuda(seq_lens_this_time)
523
524
llm/server/server/engine/proposers.py
0 commit comments