first commit
This commit is contained in:
commit
a5e8899b5a
58
README.md
Normal file
58
README.md
Normal file
@ -0,0 +1,58 @@
|
|||||||
|
# Qwen2-VL deployment instances
|
||||||
|
|
||||||
|
## dependents
|
||||||
|
git+https://git.kaiyuancloud.cn/yumoqing/apppublic
|
||||||
|
git+https://git.kaiyuancloud.cn/yumoqing/ahserver
|
||||||
|
|
||||||
|
## preinstallation
|
||||||
|
first, create a new python virtual env
|
||||||
|
```
|
||||||
|
python3 -m venv ~/vl
|
||||||
|
```
|
||||||
|
create two shell scripts named vlpy and vlpip:
|
||||||
|
```vlpy
|
||||||
|
#!/usr/bin/bash
|
||||||
|
~/vl/bin/python $*
|
||||||
|
```
|
||||||
|
and
|
||||||
|
```vlpip
|
||||||
|
#!/usr/bin/bash
|
||||||
|
~/vl/bin/pip $*
|
||||||
|
```
|
||||||
|
and copy them to the bin under you $HOME folder, and chmod +x to them
|
||||||
|
```
|
||||||
|
mv vlpip vlpy ~/bin
|
||||||
|
chmod +x ~/bin/vl*
|
||||||
|
```
|
||||||
|
|
||||||
|
## isntallation
|
||||||
|
|
||||||
|
follow instuctions from [Qwen2-VL](https://github.com/QwenLM/Qwen2-VL), remember to change pip to vlpip
|
||||||
|
|
||||||
|
do the following
|
||||||
|
```
|
||||||
|
vlpip install git+https://github.com/huggingface/transformers@21fac7abba2a37fae86106f87fcf9974fd1e3830 accelerate
|
||||||
|
pip install qwen-vl-utils[decord]
|
||||||
|
|
||||||
|
git clone https://git.kaiyauncloud.cn/yumoqing/qwenvl
|
||||||
|
cd qwenvl/script
|
||||||
|
sudo isntall.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
## Change model or http port
|
||||||
|
there is a config.json file under qwenvl folder, change the "modelname" and "port" value to suite your requirements
|
||||||
|
|
||||||
|
## model to use
|
||||||
|
|
||||||
|
* Qwen/Qwen2-VL-7B-Instruct-AWQ
|
||||||
|
* Qwen/Qwen2-VL-7B-Instruct
|
||||||
|
* Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int4
|
||||||
|
* Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int8
|
||||||
|
* Qwen/Qwen2-VL-72B-Instruct
|
||||||
|
* Qwen/Qwen2-VL-72B-Instruct-AWQ
|
||||||
|
* Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int4
|
||||||
|
* Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int8
|
||||||
|
* Qwen/Qwen2-VL-2B-Instruct
|
||||||
|
* Qwen/Qwen2-VL-2B-Instruct-AWQ
|
||||||
|
* Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int4
|
||||||
|
* Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int8
|
||||||
0
app/README.md
Normal file
0
app/README.md
Normal file
105
app/qwenvl.py
Normal file
105
app/qwenvl.py
Normal file
@ -0,0 +1,105 @@
|
|||||||
|
import torch
|
||||||
|
from transformers import Qwen2VLForConditionalGeneration, AutoTokenizer, AutoProcessor
|
||||||
|
from qwen_vl_utils import process_vision_info
|
||||||
|
from appPublic.worker import awaitify
|
||||||
|
from appPublic.jsonConfig import getConfig
|
||||||
|
from ahserver.serverenv import ServerEnv
|
||||||
|
from ahserver.webapp import webapp
|
||||||
|
|
||||||
|
class Qwen2VLClass:
|
||||||
|
def __init__(self, modelname):
|
||||||
|
# default: Load the model on the available device(s)
|
||||||
|
self.model = Qwen2VLForConditionalGeneration.from_pretrained(
|
||||||
|
modelname,
|
||||||
|
torch_dtype=torch.bfloat16,
|
||||||
|
# attn_implementation="flash_attention_2",
|
||||||
|
device_map="auto"
|
||||||
|
)
|
||||||
|
self.min_pixels = 256 * 28 * 28
|
||||||
|
self.max_pixels = 1280 * 28 * 28
|
||||||
|
# We recommend enabling flash_attention_2 for better acceleration and memory saving, especially in multi-image and video scenarios.
|
||||||
|
# model = Qwen2VLForConditionalGeneration.from_pretrained(
|
||||||
|
# "Qwen/Qwen2-VL-7B-Instruct",
|
||||||
|
# torch_dtype=torch.bfloat16,
|
||||||
|
# attn_implementation="flash_attention_2",
|
||||||
|
# device_map="auto",
|
||||||
|
# )
|
||||||
|
|
||||||
|
# default processer
|
||||||
|
self.processor = AutoProcessor.from_pretrained(modelname,
|
||||||
|
min_pixels=self.min_pixels,
|
||||||
|
max_pixels=self.max_pixels
|
||||||
|
)
|
||||||
|
|
||||||
|
# The default range for the number of visual tokens per image in the model is 4-16384.
|
||||||
|
# You can set min_pixels and max_pixels according to your needs, such as a token range of 256-1280, to balance performance and cost.
|
||||||
|
# min_pixels = 256*28*28
|
||||||
|
# max_pixels = 1280*28*28
|
||||||
|
# processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-7B-Instruct", min_pixels=min_pixels, max_pixels=max_pixels)
|
||||||
|
|
||||||
|
def inference(self, prompt, image, videofile):
|
||||||
|
content = [
|
||||||
|
{
|
||||||
|
"type":"text",
|
||||||
|
"text":prompt
|
||||||
|
}
|
||||||
|
]
|
||||||
|
if image:
|
||||||
|
if not image.startswith('file:///') \
|
||||||
|
and not image.startswith('http://') \
|
||||||
|
and not image.startswith('https://'):
|
||||||
|
image = f'data:image;base64,{image}'
|
||||||
|
content.append({
|
||||||
|
"type":"image",
|
||||||
|
"image":image
|
||||||
|
})
|
||||||
|
if videofile:
|
||||||
|
if not videofile.startswith('file:///'):
|
||||||
|
return 'only local video file support'
|
||||||
|
|
||||||
|
content.append({
|
||||||
|
"type":"video",
|
||||||
|
"video":videofile
|
||||||
|
})
|
||||||
|
|
||||||
|
messages = [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": content
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
# Preparation for inference
|
||||||
|
text = self.processor.apply_chat_template(
|
||||||
|
messages, tokenize=False, add_generation_prompt=True
|
||||||
|
)
|
||||||
|
image_inputs, video_inputs = process_vision_info(messages)
|
||||||
|
inputs = self.processor(
|
||||||
|
text=[text],
|
||||||
|
images=image_inputs,
|
||||||
|
videos=video_inputs,
|
||||||
|
padding=True,
|
||||||
|
return_tensors="pt",
|
||||||
|
)
|
||||||
|
inputs = inputs.to("cuda")
|
||||||
|
|
||||||
|
# Inference: Generation of the output
|
||||||
|
generated_ids = self.model.generate(**inputs, max_new_tokens=128)
|
||||||
|
generated_ids_trimmed = [
|
||||||
|
out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
|
||||||
|
]
|
||||||
|
output_text = self.processor.batch_decode(
|
||||||
|
generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
|
||||||
|
)
|
||||||
|
return output_text[0]
|
||||||
|
|
||||||
|
def main():
|
||||||
|
config = getConfig()
|
||||||
|
modelname = config.modelname
|
||||||
|
m = Qwen2VLClass(modelname)
|
||||||
|
g = ServerEnv()
|
||||||
|
g.inference = awaitify(m.inference)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
webapp(main)
|
||||||
32
app/test.py
Normal file
32
app/test.py
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
import time
|
||||||
|
import requests
|
||||||
|
import base64
|
||||||
|
|
||||||
|
def file2b64(file_path):
|
||||||
|
# 读取文件内容
|
||||||
|
with open(file_path, 'rb') as file:
|
||||||
|
file_content = file.read()
|
||||||
|
|
||||||
|
# 将文件内容转换为Base64编码
|
||||||
|
base64_encoded_data = base64.b64encode(file_content)
|
||||||
|
|
||||||
|
# 将Base64编码的数据转换为字符串
|
||||||
|
base64_encoded_str = base64_encoded_data.decode('utf-8')
|
||||||
|
|
||||||
|
return base64_encoded_str
|
||||||
|
|
||||||
|
while True:
|
||||||
|
print('prompt:')
|
||||||
|
p = input()
|
||||||
|
print('input image path:')
|
||||||
|
i = input()
|
||||||
|
if p == '' or i == '':
|
||||||
|
continue
|
||||||
|
t1 = time.time()
|
||||||
|
ret = requests.post('http://pd4e.com:10090/api',
|
||||||
|
data={
|
||||||
|
'prompt':p,
|
||||||
|
'image':file2b64(i)
|
||||||
|
})
|
||||||
|
t2 = time.time()
|
||||||
|
print(ret.text, t2 - t1, 'seconds')
|
||||||
0
conf/README.md
Normal file
0
conf/README.md
Normal file
48
conf/config.json
Executable file
48
conf/config.json
Executable file
@ -0,0 +1,48 @@
|
|||||||
|
{
|
||||||
|
"password_key":"!@#$%^&*(*&^%$QWERTYUIqwertyui234567",
|
||||||
|
"modelname":"Qwen/Qwen2-VL-7B-Instruct",
|
||||||
|
"modelname":"Qwen/Qwen2-VL-2B-Instruct",
|
||||||
|
"logger":{
|
||||||
|
"name":"qwenvl",
|
||||||
|
"levelname":"info",
|
||||||
|
"logfile":"$[workdir]$/logs/qwenvl.log"
|
||||||
|
},
|
||||||
|
"filesroot":"$[workdir]$/files",
|
||||||
|
"website":{
|
||||||
|
"paths":[
|
||||||
|
["$[workdir]$/wwwroot",""]
|
||||||
|
],
|
||||||
|
"client_max_size":10000,
|
||||||
|
"host":"0.0.0.0",
|
||||||
|
"port":10090,
|
||||||
|
"coding":"utf-8",
|
||||||
|
"indexes":[
|
||||||
|
"index.html",
|
||||||
|
"index.tmpl",
|
||||||
|
"index.ui",
|
||||||
|
"index.dspy",
|
||||||
|
"index.md"
|
||||||
|
],
|
||||||
|
"startswiths":[
|
||||||
|
{
|
||||||
|
"leading":"/idfile",
|
||||||
|
"registerfunction":"idFileDownload"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"processors":[
|
||||||
|
[".dspy","dspy"],
|
||||||
|
[".md","md"]
|
||||||
|
],
|
||||||
|
"session_max_time":3000,
|
||||||
|
"session_issue_time":2500,
|
||||||
|
"session_redis_notuse":{
|
||||||
|
"url":"redis://127.0.0.1:6379"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"langMapping":{
|
||||||
|
"zh-Hans-CN":"zh-cn",
|
||||||
|
"zh-CN":"zh-cn",
|
||||||
|
"en-us":"en",
|
||||||
|
"en-US":"en"
|
||||||
|
}
|
||||||
|
}
|
||||||
0
files/README.md
Normal file
0
files/README.md
Normal file
8
requirements.txt
Normal file
8
requirements.txt
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
git+https://git.kaiyuancloud.cn/yumoqing/apppublic
|
||||||
|
git+https://git.kaiyuancloud.cn/yumoqing/ahserver
|
||||||
|
git+https://github.com/huggingface/transformers@21fac7abba2a37fae86106f87fcf9974fd1e3830
|
||||||
|
accelerate
|
||||||
|
torch
|
||||||
|
torchaudio
|
||||||
|
optimum
|
||||||
|
auto_gptq
|
||||||
3
script/install.sh
Executable file
3
script/install.sh
Executable file
@ -0,0 +1,3 @@
|
|||||||
|
sudo cp qwenvl.service /etc/systemd/system
|
||||||
|
sudo systemctl enable qwenvl.service
|
||||||
|
sudo systemctl start qwenvl
|
||||||
13
script/qwenvl.service
Normal file
13
script/qwenvl.service
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
[Unit]
|
||||||
|
Description=qwen2-vl inference service
|
||||||
|
Documention=qwen2-vl inference service to control sage service start or stop
|
||||||
|
Wants=systemd-networkd.service
|
||||||
|
Requires=nginx.service
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
Type=forking
|
||||||
|
ExecStart=su - ymq -c "/d/ymq/py/qwenvl/script/qwenvl.sh"
|
||||||
|
ExecStop=su - ymq "/d/ymq/bin/killname qwenvl.py"
|
||||||
|
[Install]
|
||||||
|
WantedBy=multi-user.target
|
||||||
|
|
||||||
5
script/qwenvl.sh
Executable file
5
script/qwenvl.sh
Executable file
@ -0,0 +1,5 @@
|
|||||||
|
#!/usr/bin/bash
|
||||||
|
|
||||||
|
killname /py/qwenvl/app/qwenvl.py
|
||||||
|
~/ve/qwenvl/bin/python ~/py/qwenvl/app/qwenvl.py -w ~/py/qwenvl >~/py/qwenvl/logs/stderr.log 2>&1 &
|
||||||
|
exit 0
|
||||||
0
wwwroot/README.md
Normal file
0
wwwroot/README.md
Normal file
9
wwwroot/api/index.dspy
Normal file
9
wwwroot/api/index.dspy
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
info(f'{params_kw=}')
|
||||||
|
t1 = time.time()
|
||||||
|
ret = await inference(params_kw.prompt, params_kw.image, params_kw.video)
|
||||||
|
t2 = time.time()
|
||||||
|
info(f'{ret=}')
|
||||||
|
return {
|
||||||
|
"content":ret,
|
||||||
|
"time_cost":t2 - t1
|
||||||
|
}
|
||||||
Loading…
x
Reference in New Issue
Block a user