From 0d95e50df53959a92dc010c441d03eab1d209636 Mon Sep 17 00:00:00 2001 From: yumoqing Date: Fri, 8 Aug 2025 11:27:00 +0800 Subject: [PATCH] bugfix --- llmengine/gptoss.py | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 llmengine/gptoss.py diff --git a/llmengine/gptoss.py b/llmengine/gptoss.py new file mode 100644 index 0000000..41e4dfd --- /dev/null +++ b/llmengine/gptoss.py @@ -0,0 +1,41 @@ +#!/share/vllm-0.8.5/bin/python + +# pip install accelerate +from appPublic.worker import awaitify +from appPublic.log import debug +from ahserver.serverenv import get_serverenv +from PIL import Image +import torch +from llmengine.base_chat_llm import BaseChatLLM, llm_register +from transformers import AutoModelForCausalLM, AutoTokenizer + +class GptossLLM(BaseChatLLM): + def __init__(self, model_id): + self.tokenizer = AutoTokenizer.from_pretrained(model_id) + self.model = AutoModelForCausalLM.from_pretrained( + model_id, + torch_dtype="auto", + device_map="auto" + ) + self.model_id = model_id + + def build_kwargs(self, inputs, streamer): + generate_kwargs = dict( + **inputs, + streamer=streamer, + max_new_tokens=32768, + do_sample=True, + eos_token_id=self.tokenizer.eos_token_id + ) + return generate_kwargs + + def _messages2inputs(self, messages): + inputs = self.tokenizer.apply_chat_template( + messages, + add_generation_prompt=True, + return_tensors="pt", + return_dict=True + ).to(self.model.device) + return inputs + +llm_register("gpt-oss", GptossLLM)