From 80e66263a83420e1974c7901fd4c02211b7b670e Mon Sep 17 00:00:00 2001 From: yumoqing Date: Wed, 6 Aug 2025 15:13:26 +0800 Subject: [PATCH] =?UTF-8?q?=E5=A2=9E=E5=8A=A0Qwen3-Coder=E6=A8=A1=E5=9E=8B?= =?UTF-8?q?=E6=94=AF=E6=8C=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- llmengine/qwen3coder.py | 42 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 llmengine/qwen3coder.py diff --git a/llmengine/qwen3coder.py b/llmengine/qwen3coder.py new file mode 100644 index 0000000..a748a7c --- /dev/null +++ b/llmengine/qwen3coder.py @@ -0,0 +1,42 @@ +#!/share/vllm-0.8.5/bin/python + +# pip install accelerate +from appPublic.worker import awaitify +from appPublic.log import debug +from ahserver.serverenv import get_serverenv +from PIL import Image +import torch +from llmengine.base_chat_llm import BaseChatLLM, llm_register +from transformers import AutoModelForCausalLM, AutoTokenizer + +class Qwen3CoderLLM(BaseChatLLM): + def __init__(self, model_id): + self.tokenizer = AutoTokenizer.from_pretrained(model_id) + self.model = AutoModelForCausalLM.from_pretrained( + model_id, + torch_dtype="auto", + device_map="auto" + ) + self.model_id = model_id + + def build_kwargs(self, inputs, streamer): + generate_kwargs = dict( + **inputs, + streamer=streamer, + # do_sample=True, + # eos_token_id=self.tokenizer.eos_token_id + max_new_tokens=65536 + ) + return generate_kwargs + + def _messages2inputs(self, messages): + debug(f'-----------{messages=}-----------') + text = self.tokenizer.apply_chat_template( + messages, + tokenize=False, + add_generation_prompt=True + ) + debug(f'{text=}, {type(text)=}') + return self.tokenizer([text], return_tensors="pt").to(self.model.device) + +llm_register("Qwen/Qwen3Coder", Qwen3LLM)