first commit

2025-07-16 14:29:05 +08:00 · 2025-07-16 14:29:05 +08:00 · 7d81f9093a
commit 7d81f9093a
13 changed files with 270 additions and 0 deletions
--- a/README.md
+++ b/README.md
@ -0,0 +1,35 @@
 # A FastVLM webserver
 [fastvlm](https://github.com/apple/ml-fastvlm) is apple released a open source
 multiple modal llm, it can input image, promt and generate text
 ## Create  virtual environment
 ```
 python3 -m venv ~/fastvlm.env
 source ~/fastvlm/bin/activate
 ```
 ## Installation
 ```
 git clone https://github.com/apple/ml-fastvlm.git
 cd ml-fastvlm
 pip install -U .
 git clone https://git.kaiyuancloud.cn/yumoqing/fvlm.git
 pip install git+https://git.kaiyuancloud.cn/yumoqing/apppublic.git
 pip install git+https://git.kaiyuancloud.cn/yumoqing/sqlor.git
 pip install git+https://git.kaiyuancloud.cn/yumoqing/ahserver.git
 ```
 ## start web server
 ```
 cd fvlm
 python app/fastvlm.py
 ```
 the server will bind on port 9994
 ## Usage
 ```
 curl http://localhost:9994/v1/generate \
 	-F "prompt=描述这张图片" \
 	-F "image_path=@path_to_image"
 ```
--- a/app/fastvlm.py
+++ b/app/fastvlm.py
@ -0,0 +1,106 @@
 #
 # Modified from LLaVA/predict.py
 # Please see ACKNOWLEDGEMENTS for details about LICENSE
 #
 import os
 import torch
 import time
 from PIL import Image
 from llava.utils import disable_torch_init
 from llava.conversation import conv_templates
 from llava.model.builder import load_pretrained_model
 from llava.mm_utils import tokenizer_image_token, process_images, get_model_name_from_path
 from llava.constants import IMAGE_TOKEN_INDEX, DEFAULT_IMAGE_TOKEN, DEFAULT_IM_START_TOKEN, DEFAULT_IM_END_TOKEN
 from ahserver.webapp import webapp
 from ahserver.serverenv import ServerEnv
 from appPublic.jsonConfig import getConfig
 from appPublic.log import debug, exception, error
 from appPublic.worker import awaitify
 class FastVLM:
 	def __init__(self):
 		self.config = getConfig()
 		model_path = self.config.model_path
 		"""
 		generation_config = None
 		if os.path.exists(os.path.join(model_path, 'generation_config.json')):
 			generation_config = os.path.join(model_path, '.generation_config.json')
 			os.rename(os.path.join(model_path, 'generation_config.json'),
 					  generation_config)
 		"""
 		# Load model
 		disable_torch_init()
 		model_name = get_model_name_from_path(model_path)
 		model_base = None
 		device = self.config.device
 		tokenizer, model, image_processor, context_len = load_pretrained_model(model_path, model_base, model_name, device=device)
 		self.tokenizer = tokenizer
 		self.model = model
 		self.image_processor = image_processor
 		self.context_len = context_len
 	def _generate(self, image_file, prompt, 
 				temperature=0.2,
 				top_p=None,
 				num_beams=1,
 				conv_mode='qwen_2'):
 		qs = prompt
 		t1 = time.time()
 		if self.model.config.mm_use_im_start_end:
 			qs = DEFAULT_IM_START_TOKEN + DEFAULT_IMAGE_TOKEN + DEFAULT_IM_END_TOKEN + '\n' + qs
 		else:
 			qs = DEFAULT_IMAGE_TOKEN + '\n' + qs
 		conv = conv_templates[conv_mode].copy()
 		conv.append_message(conv.roles[0], qs)
 		conv.append_message(conv.roles[1], None)
 		prompt = conv.get_prompt()
 		# Set the pad token id for generation
 		self.model.generation_config.pad_token_id = self.tokenizer.pad_token_id
 		# Tokenize prompt
 		input_ids = tokenizer_image_token(prompt, self.tokenizer, IMAGE_TOKEN_INDEX, return_tensors='pt') \
 					.unsqueeze(0).to(self.model.device)
 		# Load and preprocess image
 		image = Image.open(image_file).convert('RGB')
 		image_tensor = process_images([image], self.image_processor, self.model.config)[0]
 		# Run inference
 		with torch.inference_mode():
 			output_ids = self.model.generate(
 				input_ids,
 				images=image_tensor.unsqueeze(0).half(),
 				image_sizes=[image.size],
 				do_sample=True if temperature > 0 else False,
 				temperature=temperature,
 				top_p=top_p,
 				num_beams=num_beams,
 				max_new_tokens=256,
 				use_cache=True)
 			outputs = self.tokenizer.batch_decode(output_ids, skip_special_tokens=True)[0].strip()
 			t2 = time.time()
 			return {
 				'timecost': t2 - t1,
 				'content': outputs
 			}
 		debug(f'Exception happened .......')
 		return None
 	async def generate(self, image_file, prompt):
 		f = awaitify(self._generate)
 		return await f(image_file, prompt)
 fastvlm = None
 def init():
 	global fastvlm
 	g = ServerEnv()
 	g.fastvlm = fastvlm
 	fastvlm = FastVLM()
 	g.generate = fastvlm.generate
 if __name__ == "__main__":
 	webapp(init)
--- a/conf/config.json
+++ b/conf/config.json
@ -0,0 +1,74 @@
 {
 	"language":{
 		"zh":{
 			"sentence_splitter":"[。？！]|\r?\n"
 		},
 		"en":{
 			"sentence_splitter":"[.?!] |\r?\n"
 		}
 	},
 	"model_path":"/share/models/apple/llava-fastvithd_0.5b_stage3",
 	"device":"cuda:0",
 	"filesroot":"$[workdir]$/files",
 	"logger":{
 		"name":"fvlm",
 		"levelname":"info",
 		"logfile":"$[workdir]$/logs/fvlm.log"
 	},
 	"website":{
 		"paths":[
 			["$[workdir]$/wwwroot",""]
 		],
 		"client_max_size":10000,
 		"host":"0.0.0.0",
 		"port":9994,
 		"coding":"utf-8",
 		"ssl_gg":{
 			"crtfile":"$[workdir]$/conf/www.bsppo.com.pem",
 			"keyfile":"$[workdir]$/conf/www.bsppo.com.key"
 		},
 		"indexes":[
 			"index.html",
 			"index.tmpl",
 			"index.ui",
 			"index.dspy",
 			"index.md"
 		],
 		"startswiths":[
 			{
 				"leading":"/idfile",
 				"registerfunction":"idfile"
 			}
 		],
 		"processors":[
 			[".ws","ws"],
 			[".xterm","xterm"],
 			[".proxy","proxy"],
 			[".llm", "llm"],
 			[".llms", "llms"],
 			[".llma", "llma"],
 			[".xlsxds","xlsxds"],
 			[".sqlds","sqlds"],
 			[".tmpl.js","tmpl"],
 			[".tmpl.css","tmpl"],
 			[".html.tmpl","tmpl"],
 			[".bcrud", "bricks_crud"],
 			[".tmpl","tmpl"],
 			[".app","app"],
 			[".bui","bui"],
 			[".ui","bui"],
 			[".dspy","dspy"],
 			[".md","md"]
 		],
 		"rsakey":{
 			"privatekey":"$[workdir]$/conf/rsa_private_key.pem",
 			"publickey":"$[workdir]$/conf/rsa_public_key.pem"
 		},
 		"session_max_time":3000,
 		"session_issue_time":2500,
 		"session_redis_notuse":{
 			"url":"redis://127.0.0.1:6379"
 		}
 	}
 }
--- a/files/README.md
+++ b/files/README.md
--- a/fvlm.service
+++ b/fvlm.service
@ -0,0 +1,17 @@
 [Unit]
 Wants=systemd-networkd.service
 [Service]
 User=ymq
 Group=ymq
 Type=forking
 WorkingDirectory=/share/ymq/run/fvlm
 # ExecStart=/share/ymq/run/fvlm/fvlm.env/bin/python app/fastvlm.py -p 9994
 ExecStart=/share/ymq/run/fvlm/start.sh
 ExecStop=/share/ymq/run/fvlm/stop.sh
 StandardOutput=append:/var/log/fvlm/fvlm.log
 StandardError=append:/var/log/fvlm/fvlm.log
 SyslogIdentifier=fvlm
 [Install]
 WantedBy=multi-user.target
--- a/logs/README.md
+++ b/logs/README.md
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,5 @@
 llava
 # git+https://github.com/apple/ml-fastvlm
 git+https://git.kaiyuancloud.cn/yumoqing/apppublic
 git+https://git.kaiyuancloud.cn/yumoqing/sqlor
 git+https://git.kaiyuancloud.cn/yumoqing/ahserver
--- a/start.sh
+++ b/start.sh
@ -0,0 +1,3 @@
 #!/usr/bin/bash
 CUDA_VISIBLE_DEVICES=6 /share/ymq/run/fvlm/fvlm.env/bin/python app/fastvlm.py -p 9994 &
--- a/stop.sh
+++ b/stop.sh
@ -0,0 +1,3 @@
 #!/usr/bin/bash
 /d/ymq/bin/killname fastvlm.py
--- a/wwwroot/bricks
+++ b/wwwroot/bricks
@ -0,0 +1 @@
 /tmp/dist
--- a/wwwroot/index.md
+++ b/wwwroot/index.md
@ -0,0 +1,11 @@
 # A FastVLM webserver
 [fastvlm](https://github.com/apple/ml-fastvlm) is apple released a open source
 multiple modal llm, it can input image, promt and generate text
 ## Usage
 ```
 curl https://{domain}/v1/generate \
 	-F "prompt=描述这张图片" \
 	-F "image_path=@path_to_image"
 ```
--- a/wwwroot/index.ui
+++ b/wwwroot/index.ui
@ -0,0 +1,7 @@
 {
 	"widgettype":"MdWidget",
 	"options":{
 		"md_url":"{{entire_url('index.md')}}",
 		"width":"100%"
 	}
 }
--- a/wwwroot/v1/generate/index.dspy
+++ b/wwwroot/v1/generate/index.dspy
@ -0,0 +1,8 @@
 debug(f'{params_kw=}')
 image_path = realpath(params_kw.image_path)
 prompt = params_kw.prompt
 debug(f'{image_path=}, {prompt=}')
 d = await generate(image_path, prompt)
 debug(f'{image_path=}, {prompt=}, {d=}')
 return d
		`@ -0,0 +1,3 @@`
							`#!/usr/bin/bash`

							`CUDA_VISIBLE_DEVICES=6 /share/ymq/run/fvlm/fvlm.env/bin/python app/fastvlm.py -p 9994 &`
		`@ -0,0 +1,3 @@`
							`#!/usr/bin/bash`

							`/d/ymq/bin/killname fastvlm.py`