first commit
This commit is contained in:
commit
7d81f9093a
35
README.md
Normal file
35
README.md
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
# A FastVLM webserver
|
||||||
|
[fastvlm](https://github.com/apple/ml-fastvlm) is apple released a open source
|
||||||
|
multiple modal llm, it can input image, promt and generate text
|
||||||
|
|
||||||
|
## Create virtual environment
|
||||||
|
```
|
||||||
|
python3 -m venv ~/fastvlm.env
|
||||||
|
source ~/fastvlm/bin/activate
|
||||||
|
```
|
||||||
|
|
||||||
|
## Installation
|
||||||
|
```
|
||||||
|
git clone https://github.com/apple/ml-fastvlm.git
|
||||||
|
cd ml-fastvlm
|
||||||
|
pip install -U .
|
||||||
|
git clone https://git.kaiyuancloud.cn/yumoqing/fvlm.git
|
||||||
|
pip install git+https://git.kaiyuancloud.cn/yumoqing/apppublic.git
|
||||||
|
pip install git+https://git.kaiyuancloud.cn/yumoqing/sqlor.git
|
||||||
|
pip install git+https://git.kaiyuancloud.cn/yumoqing/ahserver.git
|
||||||
|
```
|
||||||
|
|
||||||
|
## start web server
|
||||||
|
```
|
||||||
|
cd fvlm
|
||||||
|
python app/fastvlm.py
|
||||||
|
```
|
||||||
|
the server will bind on port 9994
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
```
|
||||||
|
curl http://localhost:9994/v1/generate \
|
||||||
|
-F "prompt=描述这张图片" \
|
||||||
|
-F "image_path=@path_to_image"
|
||||||
|
```
|
||||||
|
|
||||||
106
app/fastvlm.py
Normal file
106
app/fastvlm.py
Normal file
@ -0,0 +1,106 @@
|
|||||||
|
#
|
||||||
|
# Modified from LLaVA/predict.py
|
||||||
|
# Please see ACKNOWLEDGEMENTS for details about LICENSE
|
||||||
|
#
|
||||||
|
import os
|
||||||
|
import torch
|
||||||
|
import time
|
||||||
|
from PIL import Image
|
||||||
|
|
||||||
|
from llava.utils import disable_torch_init
|
||||||
|
from llava.conversation import conv_templates
|
||||||
|
from llava.model.builder import load_pretrained_model
|
||||||
|
from llava.mm_utils import tokenizer_image_token, process_images, get_model_name_from_path
|
||||||
|
from llava.constants import IMAGE_TOKEN_INDEX, DEFAULT_IMAGE_TOKEN, DEFAULT_IM_START_TOKEN, DEFAULT_IM_END_TOKEN
|
||||||
|
from ahserver.webapp import webapp
|
||||||
|
from ahserver.serverenv import ServerEnv
|
||||||
|
from appPublic.jsonConfig import getConfig
|
||||||
|
from appPublic.log import debug, exception, error
|
||||||
|
from appPublic.worker import awaitify
|
||||||
|
|
||||||
|
class FastVLM:
|
||||||
|
def __init__(self):
|
||||||
|
self.config = getConfig()
|
||||||
|
model_path = self.config.model_path
|
||||||
|
"""
|
||||||
|
generation_config = None
|
||||||
|
if os.path.exists(os.path.join(model_path, 'generation_config.json')):
|
||||||
|
generation_config = os.path.join(model_path, '.generation_config.json')
|
||||||
|
os.rename(os.path.join(model_path, 'generation_config.json'),
|
||||||
|
generation_config)
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Load model
|
||||||
|
disable_torch_init()
|
||||||
|
model_name = get_model_name_from_path(model_path)
|
||||||
|
model_base = None
|
||||||
|
device = self.config.device
|
||||||
|
tokenizer, model, image_processor, context_len = load_pretrained_model(model_path, model_base, model_name, device=device)
|
||||||
|
self.tokenizer = tokenizer
|
||||||
|
self.model = model
|
||||||
|
self.image_processor = image_processor
|
||||||
|
self.context_len = context_len
|
||||||
|
|
||||||
|
def _generate(self, image_file, prompt,
|
||||||
|
temperature=0.2,
|
||||||
|
top_p=None,
|
||||||
|
num_beams=1,
|
||||||
|
conv_mode='qwen_2'):
|
||||||
|
qs = prompt
|
||||||
|
t1 = time.time()
|
||||||
|
if self.model.config.mm_use_im_start_end:
|
||||||
|
qs = DEFAULT_IM_START_TOKEN + DEFAULT_IMAGE_TOKEN + DEFAULT_IM_END_TOKEN + '\n' + qs
|
||||||
|
else:
|
||||||
|
qs = DEFAULT_IMAGE_TOKEN + '\n' + qs
|
||||||
|
conv = conv_templates[conv_mode].copy()
|
||||||
|
conv.append_message(conv.roles[0], qs)
|
||||||
|
conv.append_message(conv.roles[1], None)
|
||||||
|
prompt = conv.get_prompt()
|
||||||
|
|
||||||
|
# Set the pad token id for generation
|
||||||
|
self.model.generation_config.pad_token_id = self.tokenizer.pad_token_id
|
||||||
|
|
||||||
|
# Tokenize prompt
|
||||||
|
input_ids = tokenizer_image_token(prompt, self.tokenizer, IMAGE_TOKEN_INDEX, return_tensors='pt') \
|
||||||
|
.unsqueeze(0).to(self.model.device)
|
||||||
|
|
||||||
|
# Load and preprocess image
|
||||||
|
image = Image.open(image_file).convert('RGB')
|
||||||
|
image_tensor = process_images([image], self.image_processor, self.model.config)[0]
|
||||||
|
|
||||||
|
# Run inference
|
||||||
|
with torch.inference_mode():
|
||||||
|
output_ids = self.model.generate(
|
||||||
|
input_ids,
|
||||||
|
images=image_tensor.unsqueeze(0).half(),
|
||||||
|
image_sizes=[image.size],
|
||||||
|
do_sample=True if temperature > 0 else False,
|
||||||
|
temperature=temperature,
|
||||||
|
top_p=top_p,
|
||||||
|
num_beams=num_beams,
|
||||||
|
max_new_tokens=256,
|
||||||
|
use_cache=True)
|
||||||
|
|
||||||
|
outputs = self.tokenizer.batch_decode(output_ids, skip_special_tokens=True)[0].strip()
|
||||||
|
t2 = time.time()
|
||||||
|
return {
|
||||||
|
'timecost': t2 - t1,
|
||||||
|
'content': outputs
|
||||||
|
}
|
||||||
|
debug(f'Exception happened .......')
|
||||||
|
return None
|
||||||
|
|
||||||
|
async def generate(self, image_file, prompt):
|
||||||
|
f = awaitify(self._generate)
|
||||||
|
return await f(image_file, prompt)
|
||||||
|
|
||||||
|
fastvlm = None
|
||||||
|
def init():
|
||||||
|
global fastvlm
|
||||||
|
g = ServerEnv()
|
||||||
|
g.fastvlm = fastvlm
|
||||||
|
fastvlm = FastVLM()
|
||||||
|
g.generate = fastvlm.generate
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
webapp(init)
|
||||||
74
conf/config.json
Normal file
74
conf/config.json
Normal file
@ -0,0 +1,74 @@
|
|||||||
|
{
|
||||||
|
"language":{
|
||||||
|
"zh":{
|
||||||
|
"sentence_splitter":"[。?!]|\r?\n"
|
||||||
|
},
|
||||||
|
"en":{
|
||||||
|
"sentence_splitter":"[.?!] |\r?\n"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"model_path":"/share/models/apple/llava-fastvithd_0.5b_stage3",
|
||||||
|
"device":"cuda:0",
|
||||||
|
"filesroot":"$[workdir]$/files",
|
||||||
|
"logger":{
|
||||||
|
"name":"fvlm",
|
||||||
|
"levelname":"info",
|
||||||
|
"logfile":"$[workdir]$/logs/fvlm.log"
|
||||||
|
},
|
||||||
|
"website":{
|
||||||
|
"paths":[
|
||||||
|
["$[workdir]$/wwwroot",""]
|
||||||
|
],
|
||||||
|
"client_max_size":10000,
|
||||||
|
"host":"0.0.0.0",
|
||||||
|
"port":9994,
|
||||||
|
"coding":"utf-8",
|
||||||
|
"ssl_gg":{
|
||||||
|
"crtfile":"$[workdir]$/conf/www.bsppo.com.pem",
|
||||||
|
"keyfile":"$[workdir]$/conf/www.bsppo.com.key"
|
||||||
|
},
|
||||||
|
"indexes":[
|
||||||
|
"index.html",
|
||||||
|
"index.tmpl",
|
||||||
|
"index.ui",
|
||||||
|
"index.dspy",
|
||||||
|
"index.md"
|
||||||
|
],
|
||||||
|
"startswiths":[
|
||||||
|
{
|
||||||
|
"leading":"/idfile",
|
||||||
|
"registerfunction":"idfile"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"processors":[
|
||||||
|
[".ws","ws"],
|
||||||
|
[".xterm","xterm"],
|
||||||
|
[".proxy","proxy"],
|
||||||
|
[".llm", "llm"],
|
||||||
|
[".llms", "llms"],
|
||||||
|
[".llma", "llma"],
|
||||||
|
[".xlsxds","xlsxds"],
|
||||||
|
[".sqlds","sqlds"],
|
||||||
|
[".tmpl.js","tmpl"],
|
||||||
|
[".tmpl.css","tmpl"],
|
||||||
|
[".html.tmpl","tmpl"],
|
||||||
|
[".bcrud", "bricks_crud"],
|
||||||
|
[".tmpl","tmpl"],
|
||||||
|
[".app","app"],
|
||||||
|
[".bui","bui"],
|
||||||
|
[".ui","bui"],
|
||||||
|
[".dspy","dspy"],
|
||||||
|
[".md","md"]
|
||||||
|
],
|
||||||
|
"rsakey":{
|
||||||
|
"privatekey":"$[workdir]$/conf/rsa_private_key.pem",
|
||||||
|
"publickey":"$[workdir]$/conf/rsa_public_key.pem"
|
||||||
|
},
|
||||||
|
"session_max_time":3000,
|
||||||
|
"session_issue_time":2500,
|
||||||
|
"session_redis_notuse":{
|
||||||
|
"url":"redis://127.0.0.1:6379"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
0
files/README.md
Normal file
0
files/README.md
Normal file
17
fvlm.service
Normal file
17
fvlm.service
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
[Unit]
|
||||||
|
Wants=systemd-networkd.service
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
User=ymq
|
||||||
|
Group=ymq
|
||||||
|
Type=forking
|
||||||
|
WorkingDirectory=/share/ymq/run/fvlm
|
||||||
|
# ExecStart=/share/ymq/run/fvlm/fvlm.env/bin/python app/fastvlm.py -p 9994
|
||||||
|
ExecStart=/share/ymq/run/fvlm/start.sh
|
||||||
|
ExecStop=/share/ymq/run/fvlm/stop.sh
|
||||||
|
StandardOutput=append:/var/log/fvlm/fvlm.log
|
||||||
|
StandardError=append:/var/log/fvlm/fvlm.log
|
||||||
|
SyslogIdentifier=fvlm
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=multi-user.target
|
||||||
0
logs/README.md
Normal file
0
logs/README.md
Normal file
5
requirements.txt
Normal file
5
requirements.txt
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
llava
|
||||||
|
# git+https://github.com/apple/ml-fastvlm
|
||||||
|
git+https://git.kaiyuancloud.cn/yumoqing/apppublic
|
||||||
|
git+https://git.kaiyuancloud.cn/yumoqing/sqlor
|
||||||
|
git+https://git.kaiyuancloud.cn/yumoqing/ahserver
|
||||||
3
start.sh
Executable file
3
start.sh
Executable file
@ -0,0 +1,3 @@
|
|||||||
|
#!/usr/bin/bash
|
||||||
|
|
||||||
|
CUDA_VISIBLE_DEVICES=6 /share/ymq/run/fvlm/fvlm.env/bin/python app/fastvlm.py -p 9994 &
|
||||||
1
wwwroot/bricks
Symbolic link
1
wwwroot/bricks
Symbolic link
@ -0,0 +1 @@
|
|||||||
|
/tmp/dist
|
||||||
11
wwwroot/index.md
Normal file
11
wwwroot/index.md
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
# A FastVLM webserver
|
||||||
|
[fastvlm](https://github.com/apple/ml-fastvlm) is apple released a open source
|
||||||
|
multiple modal llm, it can input image, promt and generate text
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
```
|
||||||
|
curl https://{domain}/v1/generate \
|
||||||
|
-F "prompt=描述这张图片" \
|
||||||
|
-F "image_path=@path_to_image"
|
||||||
|
```
|
||||||
|
|
||||||
7
wwwroot/index.ui
Normal file
7
wwwroot/index.ui
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
{
|
||||||
|
"widgettype":"MdWidget",
|
||||||
|
"options":{
|
||||||
|
"md_url":"{{entire_url('index.md')}}",
|
||||||
|
"width":"100%"
|
||||||
|
}
|
||||||
|
}
|
||||||
8
wwwroot/v1/generate/index.dspy
Normal file
8
wwwroot/v1/generate/index.dspy
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
debug(f'{params_kw=}')
|
||||||
|
image_path = realpath(params_kw.image_path)
|
||||||
|
prompt = params_kw.prompt
|
||||||
|
debug(f'{image_path=}, {prompt=}')
|
||||||
|
d = await generate(image_path, prompt)
|
||||||
|
debug(f'{image_path=}, {prompt=}, {d=}')
|
||||||
|
return d
|
||||||
|
|
||||||
Loading…
x
Reference in New Issue
Block a user