From 7803b38e95a45848ebbc12734ffb2784a3a253dc Mon Sep 17 00:00:00 2001 From: yumoqing Date: Thu, 24 Jul 2025 14:39:39 +0800 Subject: [PATCH] bugfix --- llmengine/base_chat_llm.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/llmengine/base_chat_llm.py b/llmengine/base_chat_llm.py index 958ded9..ee62d86 100644 --- a/llmengine/base_chat_llm.py +++ b/llmengine/base_chat_llm.py @@ -107,15 +107,14 @@ class BaseChatLLM: inputs = self._messages2inputs(messages) input_len = inputs["input_ids"].shape[-1] streamer = self.get_streamer() - with self.get_streamer() as streamer: - kwargs = self.build_kwargs(inputs, streamer) - thread = threading.Thread(target=self.model.generate, - kwargs=kwargs) - thread.start() - for d in self.output_generator(streamer): - if d['choices'][0]['finish_reason'] == 'stop': - d['input_tokens'] = input_len - yield d + kwargs = self.build_kwargs(inputs, streamer) + thread = threading.Thread(target=self.model.generate, + kwargs=kwargs) + thread.start() + for d in self.output_generator(streamer): + if d['choices'][0]['finish_reason'] == 'stop': + d['input_tokens'] = input_len + yield d async def async_gen(self, messages): async for d in stream.iterate(self._gen(messages)):