This commit is contained in:
yumoqing 2025-07-24 14:39:39 +08:00
parent f733553677
commit 7803b38e95

View File

@ -107,15 +107,14 @@ class BaseChatLLM:
inputs = self._messages2inputs(messages) inputs = self._messages2inputs(messages)
input_len = inputs["input_ids"].shape[-1] input_len = inputs["input_ids"].shape[-1]
streamer = self.get_streamer() streamer = self.get_streamer()
with self.get_streamer() as streamer: kwargs = self.build_kwargs(inputs, streamer)
kwargs = self.build_kwargs(inputs, streamer) thread = threading.Thread(target=self.model.generate,
thread = threading.Thread(target=self.model.generate, kwargs=kwargs)
kwargs=kwargs) thread.start()
thread.start() for d in self.output_generator(streamer):
for d in self.output_generator(streamer): if d['choices'][0]['finish_reason'] == 'stop':
if d['choices'][0]['finish_reason'] == 'stop': d['input_tokens'] = input_len
d['input_tokens'] = input_len yield d
yield d
async def async_gen(self, messages): async def async_gen(self, messages):
async for d in stream.iterate(self._gen(messages)): async for d in stream.iterate(self._gen(messages)):