From a5e8899b5a34bb5d4cc01950c6898a522d64ca06 Mon Sep 17 00:00:00 2001 From: yumoqing Date: Wed, 16 Jul 2025 14:32:22 +0800 Subject: [PATCH] first commit --- README.md | 58 +++++++++++++++++++++++ app/README.md | 0 app/qwenvl.py | 105 +++++++++++++++++++++++++++++++++++++++++ app/test.py | 32 +++++++++++++ conf/README.md | 0 conf/config.json | 48 +++++++++++++++++++ files/README.md | 0 requirements.txt | 8 ++++ script/install.sh | 3 ++ script/qwenvl.service | 13 +++++ script/qwenvl.sh | 5 ++ wwwroot/README.md | 0 wwwroot/api/index.dspy | 9 ++++ 13 files changed, 281 insertions(+) create mode 100644 README.md create mode 100644 app/README.md create mode 100644 app/qwenvl.py create mode 100644 app/test.py create mode 100644 conf/README.md create mode 100755 conf/config.json create mode 100644 files/README.md create mode 100644 requirements.txt create mode 100755 script/install.sh create mode 100644 script/qwenvl.service create mode 100755 script/qwenvl.sh create mode 100644 wwwroot/README.md create mode 100644 wwwroot/api/index.dspy diff --git a/README.md b/README.md new file mode 100644 index 0000000..e464fbc --- /dev/null +++ b/README.md @@ -0,0 +1,58 @@ +# Qwen2-VL deployment instances + +## dependents +git+https://git.kaiyuancloud.cn/yumoqing/apppublic +git+https://git.kaiyuancloud.cn/yumoqing/ahserver + +## preinstallation +first, create a new python virtual env +``` +python3 -m venv ~/vl +``` +create two shell scripts named vlpy and vlpip: +```vlpy +#!/usr/bin/bash +~/vl/bin/python $* +``` +and +```vlpip +#!/usr/bin/bash +~/vl/bin/pip $* +``` +and copy them to the bin under you $HOME folder, and chmod +x to them +``` +mv vlpip vlpy ~/bin +chmod +x ~/bin/vl* +``` + +## isntallation + +follow instuctions from [Qwen2-VL](https://github.com/QwenLM/Qwen2-VL), remember to change pip to vlpip + +do the following +``` +vlpip install git+https://github.com/huggingface/transformers@21fac7abba2a37fae86106f87fcf9974fd1e3830 accelerate +pip install qwen-vl-utils[decord] + +git clone https://git.kaiyauncloud.cn/yumoqing/qwenvl +cd qwenvl/script +sudo isntall.sh +``` + +## Change model or http port +there is a config.json file under qwenvl folder, change the "modelname" and "port" value to suite your requirements + +## model to use + +* Qwen/Qwen2-VL-7B-Instruct-AWQ +* Qwen/Qwen2-VL-7B-Instruct +* Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int4 +* Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int8 +* Qwen/Qwen2-VL-72B-Instruct +* Qwen/Qwen2-VL-72B-Instruct-AWQ +* Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int4 +* Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int8 +* Qwen/Qwen2-VL-2B-Instruct +* Qwen/Qwen2-VL-2B-Instruct-AWQ +* Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int4 +* Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int8 diff --git a/app/README.md b/app/README.md new file mode 100644 index 0000000..e69de29 diff --git a/app/qwenvl.py b/app/qwenvl.py new file mode 100644 index 0000000..614ec27 --- /dev/null +++ b/app/qwenvl.py @@ -0,0 +1,105 @@ +import torch +from transformers import Qwen2VLForConditionalGeneration, AutoTokenizer, AutoProcessor +from qwen_vl_utils import process_vision_info +from appPublic.worker import awaitify +from appPublic.jsonConfig import getConfig +from ahserver.serverenv import ServerEnv +from ahserver.webapp import webapp + +class Qwen2VLClass: + def __init__(self, modelname): + # default: Load the model on the available device(s) + self.model = Qwen2VLForConditionalGeneration.from_pretrained( + modelname, + torch_dtype=torch.bfloat16, + # attn_implementation="flash_attention_2", + device_map="auto" + ) + self.min_pixels = 256 * 28 * 28 + self.max_pixels = 1280 * 28 * 28 + # We recommend enabling flash_attention_2 for better acceleration and memory saving, especially in multi-image and video scenarios. + # model = Qwen2VLForConditionalGeneration.from_pretrained( + # "Qwen/Qwen2-VL-7B-Instruct", + # torch_dtype=torch.bfloat16, + # attn_implementation="flash_attention_2", + # device_map="auto", + # ) + + # default processer + self.processor = AutoProcessor.from_pretrained(modelname, + min_pixels=self.min_pixels, + max_pixels=self.max_pixels + ) + + # The default range for the number of visual tokens per image in the model is 4-16384. + # You can set min_pixels and max_pixels according to your needs, such as a token range of 256-1280, to balance performance and cost. + # min_pixels = 256*28*28 + # max_pixels = 1280*28*28 + # processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-7B-Instruct", min_pixels=min_pixels, max_pixels=max_pixels) + + def inference(self, prompt, image, videofile): + content = [ + { + "type":"text", + "text":prompt + } + ] + if image: + if not image.startswith('file:///') \ + and not image.startswith('http://') \ + and not image.startswith('https://'): + image = f'data:image;base64,{image}' + content.append({ + "type":"image", + "image":image + }) + if videofile: + if not videofile.startswith('file:///'): + return 'only local video file support' + + content.append({ + "type":"video", + "video":videofile + }) + + messages = [ + { + "role": "user", + "content": content + } + ] + + # Preparation for inference + text = self.processor.apply_chat_template( + messages, tokenize=False, add_generation_prompt=True + ) + image_inputs, video_inputs = process_vision_info(messages) + inputs = self.processor( + text=[text], + images=image_inputs, + videos=video_inputs, + padding=True, + return_tensors="pt", + ) + inputs = inputs.to("cuda") + + # Inference: Generation of the output + generated_ids = self.model.generate(**inputs, max_new_tokens=128) + generated_ids_trimmed = [ + out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids) + ] + output_text = self.processor.batch_decode( + generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False + ) + return output_text[0] + +def main(): + config = getConfig() + modelname = config.modelname + m = Qwen2VLClass(modelname) + g = ServerEnv() + g.inference = awaitify(m.inference) + + +if __name__ == '__main__': + webapp(main) diff --git a/app/test.py b/app/test.py new file mode 100644 index 0000000..e0ff008 --- /dev/null +++ b/app/test.py @@ -0,0 +1,32 @@ +import time +import requests +import base64 + +def file2b64(file_path): + # 读取文件内容 + with open(file_path, 'rb') as file: + file_content = file.read() + + # 将文件内容转换为Base64编码 + base64_encoded_data = base64.b64encode(file_content) + + # 将Base64编码的数据转换为字符串 + base64_encoded_str = base64_encoded_data.decode('utf-8') + + return base64_encoded_str + +while True: + print('prompt:') + p = input() + print('input image path:') + i = input() + if p == '' or i == '': + continue + t1 = time.time() + ret = requests.post('http://pd4e.com:10090/api', + data={ + 'prompt':p, + 'image':file2b64(i) + }) + t2 = time.time() + print(ret.text, t2 - t1, 'seconds') diff --git a/conf/README.md b/conf/README.md new file mode 100644 index 0000000..e69de29 diff --git a/conf/config.json b/conf/config.json new file mode 100755 index 0000000..4b047ef --- /dev/null +++ b/conf/config.json @@ -0,0 +1,48 @@ +{ + "password_key":"!@#$%^&*(*&^%$QWERTYUIqwertyui234567", + "modelname":"Qwen/Qwen2-VL-7B-Instruct", + "modelname":"Qwen/Qwen2-VL-2B-Instruct", + "logger":{ + "name":"qwenvl", + "levelname":"info", + "logfile":"$[workdir]$/logs/qwenvl.log" + }, + "filesroot":"$[workdir]$/files", + "website":{ + "paths":[ + ["$[workdir]$/wwwroot",""] + ], + "client_max_size":10000, + "host":"0.0.0.0", + "port":10090, + "coding":"utf-8", + "indexes":[ + "index.html", + "index.tmpl", + "index.ui", + "index.dspy", + "index.md" + ], + "startswiths":[ + { + "leading":"/idfile", + "registerfunction":"idFileDownload" + } + ], + "processors":[ + [".dspy","dspy"], + [".md","md"] + ], + "session_max_time":3000, + "session_issue_time":2500, + "session_redis_notuse":{ + "url":"redis://127.0.0.1:6379" + } + }, + "langMapping":{ + "zh-Hans-CN":"zh-cn", + "zh-CN":"zh-cn", + "en-us":"en", + "en-US":"en" + } +} diff --git a/files/README.md b/files/README.md new file mode 100644 index 0000000..e69de29 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..1b6b397 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,8 @@ +git+https://git.kaiyuancloud.cn/yumoqing/apppublic +git+https://git.kaiyuancloud.cn/yumoqing/ahserver +git+https://github.com/huggingface/transformers@21fac7abba2a37fae86106f87fcf9974fd1e3830 +accelerate +torch +torchaudio +optimum +auto_gptq diff --git a/script/install.sh b/script/install.sh new file mode 100755 index 0000000..bbd75cd --- /dev/null +++ b/script/install.sh @@ -0,0 +1,3 @@ +sudo cp qwenvl.service /etc/systemd/system +sudo systemctl enable qwenvl.service +sudo systemctl start qwenvl diff --git a/script/qwenvl.service b/script/qwenvl.service new file mode 100644 index 0000000..cd4d304 --- /dev/null +++ b/script/qwenvl.service @@ -0,0 +1,13 @@ +[Unit] +Description=qwen2-vl inference service +Documention=qwen2-vl inference service to control sage service start or stop +Wants=systemd-networkd.service +Requires=nginx.service + +[Service] +Type=forking +ExecStart=su - ymq -c "/d/ymq/py/qwenvl/script/qwenvl.sh" +ExecStop=su - ymq "/d/ymq/bin/killname qwenvl.py" +[Install] +WantedBy=multi-user.target + diff --git a/script/qwenvl.sh b/script/qwenvl.sh new file mode 100755 index 0000000..133c934 --- /dev/null +++ b/script/qwenvl.sh @@ -0,0 +1,5 @@ +#!/usr/bin/bash + +killname /py/qwenvl/app/qwenvl.py +~/ve/qwenvl/bin/python ~/py/qwenvl/app/qwenvl.py -w ~/py/qwenvl >~/py/qwenvl/logs/stderr.log 2>&1 & +exit 0 diff --git a/wwwroot/README.md b/wwwroot/README.md new file mode 100644 index 0000000..e69de29 diff --git a/wwwroot/api/index.dspy b/wwwroot/api/index.dspy new file mode 100644 index 0000000..1873f9a --- /dev/null +++ b/wwwroot/api/index.dspy @@ -0,0 +1,9 @@ +info(f'{params_kw=}') +t1 = time.time() +ret = await inference(params_kw.prompt, params_kw.image, params_kw.video) +t2 = time.time() +info(f'{ret=}') +return { + "content":ret, + "time_cost":t2 - t1 +}