first commit

This commit is contained in:
yumoqing 2025-07-16 14:29:05 +08:00
commit 7d81f9093a
13 changed files with 270 additions and 0 deletions

35
README.md Normal file
View File

@ -0,0 +1,35 @@
# A FastVLM webserver
[fastvlm](https://github.com/apple/ml-fastvlm) is apple released a open source
multiple modal llm, it can input image, promt and generate text
## Create virtual environment
```
python3 -m venv ~/fastvlm.env
source ~/fastvlm/bin/activate
```
## Installation
```
git clone https://github.com/apple/ml-fastvlm.git
cd ml-fastvlm
pip install -U .
git clone https://git.kaiyuancloud.cn/yumoqing/fvlm.git
pip install git+https://git.kaiyuancloud.cn/yumoqing/apppublic.git
pip install git+https://git.kaiyuancloud.cn/yumoqing/sqlor.git
pip install git+https://git.kaiyuancloud.cn/yumoqing/ahserver.git
```
## start web server
```
cd fvlm
python app/fastvlm.py
```
the server will bind on port 9994
## Usage
```
curl http://localhost:9994/v1/generate \
-F "prompt=描述这张图片" \
-F "image_path=@path_to_image"
```

106
app/fastvlm.py Normal file
View File

@ -0,0 +1,106 @@
#
# Modified from LLaVA/predict.py
# Please see ACKNOWLEDGEMENTS for details about LICENSE
#
import os
import torch
import time
from PIL import Image
from llava.utils import disable_torch_init
from llava.conversation import conv_templates
from llava.model.builder import load_pretrained_model
from llava.mm_utils import tokenizer_image_token, process_images, get_model_name_from_path
from llava.constants import IMAGE_TOKEN_INDEX, DEFAULT_IMAGE_TOKEN, DEFAULT_IM_START_TOKEN, DEFAULT_IM_END_TOKEN
from ahserver.webapp import webapp
from ahserver.serverenv import ServerEnv
from appPublic.jsonConfig import getConfig
from appPublic.log import debug, exception, error
from appPublic.worker import awaitify
class FastVLM:
def __init__(self):
self.config = getConfig()
model_path = self.config.model_path
"""
generation_config = None
if os.path.exists(os.path.join(model_path, 'generation_config.json')):
generation_config = os.path.join(model_path, '.generation_config.json')
os.rename(os.path.join(model_path, 'generation_config.json'),
generation_config)
"""
# Load model
disable_torch_init()
model_name = get_model_name_from_path(model_path)
model_base = None
device = self.config.device
tokenizer, model, image_processor, context_len = load_pretrained_model(model_path, model_base, model_name, device=device)
self.tokenizer = tokenizer
self.model = model
self.image_processor = image_processor
self.context_len = context_len
def _generate(self, image_file, prompt,
temperature=0.2,
top_p=None,
num_beams=1,
conv_mode='qwen_2'):
qs = prompt
t1 = time.time()
if self.model.config.mm_use_im_start_end:
qs = DEFAULT_IM_START_TOKEN + DEFAULT_IMAGE_TOKEN + DEFAULT_IM_END_TOKEN + '\n' + qs
else:
qs = DEFAULT_IMAGE_TOKEN + '\n' + qs
conv = conv_templates[conv_mode].copy()
conv.append_message(conv.roles[0], qs)
conv.append_message(conv.roles[1], None)
prompt = conv.get_prompt()
# Set the pad token id for generation
self.model.generation_config.pad_token_id = self.tokenizer.pad_token_id
# Tokenize prompt
input_ids = tokenizer_image_token(prompt, self.tokenizer, IMAGE_TOKEN_INDEX, return_tensors='pt') \
.unsqueeze(0).to(self.model.device)
# Load and preprocess image
image = Image.open(image_file).convert('RGB')
image_tensor = process_images([image], self.image_processor, self.model.config)[0]
# Run inference
with torch.inference_mode():
output_ids = self.model.generate(
input_ids,
images=image_tensor.unsqueeze(0).half(),
image_sizes=[image.size],
do_sample=True if temperature > 0 else False,
temperature=temperature,
top_p=top_p,
num_beams=num_beams,
max_new_tokens=256,
use_cache=True)
outputs = self.tokenizer.batch_decode(output_ids, skip_special_tokens=True)[0].strip()
t2 = time.time()
return {
'timecost': t2 - t1,
'content': outputs
}
debug(f'Exception happened .......')
return None
async def generate(self, image_file, prompt):
f = awaitify(self._generate)
return await f(image_file, prompt)
fastvlm = None
def init():
global fastvlm
g = ServerEnv()
g.fastvlm = fastvlm
fastvlm = FastVLM()
g.generate = fastvlm.generate
if __name__ == "__main__":
webapp(init)

74
conf/config.json Normal file
View File

@ -0,0 +1,74 @@
{
"language":{
"zh":{
"sentence_splitter":"[。?!]|\r?\n"
},
"en":{
"sentence_splitter":"[.?!] |\r?\n"
}
},
"model_path":"/share/models/apple/llava-fastvithd_0.5b_stage3",
"device":"cuda:0",
"filesroot":"$[workdir]$/files",
"logger":{
"name":"fvlm",
"levelname":"info",
"logfile":"$[workdir]$/logs/fvlm.log"
},
"website":{
"paths":[
["$[workdir]$/wwwroot",""]
],
"client_max_size":10000,
"host":"0.0.0.0",
"port":9994,
"coding":"utf-8",
"ssl_gg":{
"crtfile":"$[workdir]$/conf/www.bsppo.com.pem",
"keyfile":"$[workdir]$/conf/www.bsppo.com.key"
},
"indexes":[
"index.html",
"index.tmpl",
"index.ui",
"index.dspy",
"index.md"
],
"startswiths":[
{
"leading":"/idfile",
"registerfunction":"idfile"
}
],
"processors":[
[".ws","ws"],
[".xterm","xterm"],
[".proxy","proxy"],
[".llm", "llm"],
[".llms", "llms"],
[".llma", "llma"],
[".xlsxds","xlsxds"],
[".sqlds","sqlds"],
[".tmpl.js","tmpl"],
[".tmpl.css","tmpl"],
[".html.tmpl","tmpl"],
[".bcrud", "bricks_crud"],
[".tmpl","tmpl"],
[".app","app"],
[".bui","bui"],
[".ui","bui"],
[".dspy","dspy"],
[".md","md"]
],
"rsakey":{
"privatekey":"$[workdir]$/conf/rsa_private_key.pem",
"publickey":"$[workdir]$/conf/rsa_public_key.pem"
},
"session_max_time":3000,
"session_issue_time":2500,
"session_redis_notuse":{
"url":"redis://127.0.0.1:6379"
}
}
}

0
files/README.md Normal file
View File

17
fvlm.service Normal file
View File

@ -0,0 +1,17 @@
[Unit]
Wants=systemd-networkd.service
[Service]
User=ymq
Group=ymq
Type=forking
WorkingDirectory=/share/ymq/run/fvlm
# ExecStart=/share/ymq/run/fvlm/fvlm.env/bin/python app/fastvlm.py -p 9994
ExecStart=/share/ymq/run/fvlm/start.sh
ExecStop=/share/ymq/run/fvlm/stop.sh
StandardOutput=append:/var/log/fvlm/fvlm.log
StandardError=append:/var/log/fvlm/fvlm.log
SyslogIdentifier=fvlm
[Install]
WantedBy=multi-user.target

0
logs/README.md Normal file
View File

5
requirements.txt Normal file
View File

@ -0,0 +1,5 @@
llava
# git+https://github.com/apple/ml-fastvlm
git+https://git.kaiyuancloud.cn/yumoqing/apppublic
git+https://git.kaiyuancloud.cn/yumoqing/sqlor
git+https://git.kaiyuancloud.cn/yumoqing/ahserver

3
start.sh Executable file
View File

@ -0,0 +1,3 @@
#!/usr/bin/bash
CUDA_VISIBLE_DEVICES=6 /share/ymq/run/fvlm/fvlm.env/bin/python app/fastvlm.py -p 9994 &

3
stop.sh Executable file
View File

@ -0,0 +1,3 @@
#!/usr/bin/bash
/d/ymq/bin/killname fastvlm.py

1
wwwroot/bricks Symbolic link
View File

@ -0,0 +1 @@
/tmp/dist

11
wwwroot/index.md Normal file
View File

@ -0,0 +1,11 @@
# A FastVLM webserver
[fastvlm](https://github.com/apple/ml-fastvlm) is apple released a open source
multiple modal llm, it can input image, promt and generate text
## Usage
```
curl https://{domain}/v1/generate \
-F "prompt=描述这张图片" \
-F "image_path=@path_to_image"
```

7
wwwroot/index.ui Normal file
View File

@ -0,0 +1,7 @@
{
"widgettype":"MdWidget",
"options":{
"md_url":"{{entire_url('index.md')}}",
"width":"100%"
}
}

View File

@ -0,0 +1,8 @@
debug(f'{params_kw=}')
image_path = realpath(params_kw.image_path)
prompt = params_kw.prompt
debug(f'{image_path=}, {prompt=}')
d = await generate(image_path, prompt)
debug(f'{image_path=}, {prompt=}, {d=}')
return d