fix: use cuda:0 (CUDA_VISIBLE_DEVICES handles GPU mapping), add shell scripts and README
This commit is contained in:
parent
e0009da8e7
commit
ceb905d3eb
52
README.md
Normal file
52
README.md
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
# CLIP Embedding Service
|
||||||
|
|
||||||
|
CLIP-ViT-H/14 多模态 Embedding 服务,支持文本和图片向量化。
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
- **Model**: laion/CLIP-ViT-H-14-laion2B-s32B-b79K
|
||||||
|
- **Dimension**: 1024
|
||||||
|
- **Precision**: float16
|
||||||
|
- **Port**: 9086
|
||||||
|
- **GPU**: 2 (default)
|
||||||
|
|
||||||
|
## API
|
||||||
|
|
||||||
|
### GET /api/status
|
||||||
|
Service health and GPU info.
|
||||||
|
|
||||||
|
### POST /api/text
|
||||||
|
Text embedding.
|
||||||
|
```json
|
||||||
|
{"texts": ["hello world", "a cat"]}
|
||||||
|
```
|
||||||
|
|
||||||
|
### POST /api/image
|
||||||
|
Image embedding (file path, URL, or base64 data URI).
|
||||||
|
```json
|
||||||
|
{"images": ["/path/to/img.jpg", "https://example.com/img.png"]}
|
||||||
|
```
|
||||||
|
|
||||||
|
### POST /api/embed
|
||||||
|
Combined text + image embedding.
|
||||||
|
```json
|
||||||
|
{"texts": ["a cat"], "images": ["/path/to/cat.jpg"]}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Model Download (Offline Deploy)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pip install huggingface_hub
|
||||||
|
huggingface-cli download laion/CLIP-ViT-H-14-laion2B-s32B-b79K \
|
||||||
|
--local-dir /data/ymq/models/laion/CLIP-ViT-H-14-laion2B-s32B-b79K \
|
||||||
|
--local-dir-use-symlinks False
|
||||||
|
```
|
||||||
|
Size: ~15GB
|
||||||
|
|
||||||
|
## Deploy
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bash build.sh deploy # start
|
||||||
|
bash build.sh stop # stop
|
||||||
|
bash build.sh status # check
|
||||||
|
```
|
||||||
6
ah.py
6
ah.py
@ -2,6 +2,8 @@
|
|||||||
import os
|
import os
|
||||||
from ahserver.webapp import webapp
|
from ahserver.webapp import webapp
|
||||||
|
|
||||||
if __name__ == '__main__':
|
def init():
|
||||||
webapp()
|
pass
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
webapp(init)
|
||||||
|
|||||||
58
build.sh
Executable file
58
build.sh
Executable file
@ -0,0 +1,58 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -e
|
||||||
|
cd "$(dirname "$0")"
|
||||||
|
|
||||||
|
SERVICE_NAME="clip_embedding"
|
||||||
|
PORT=9086
|
||||||
|
DEFAULT_GPU=2
|
||||||
|
action="${1:-status}"
|
||||||
|
|
||||||
|
case "$action" in
|
||||||
|
deploy|update)
|
||||||
|
echo "=== $SERVICE_NAME Deploy ==="
|
||||||
|
if [ -f ah.pid ] && kill -0 $(cat ah.pid) 2>/dev/null; then
|
||||||
|
bash stop.sh
|
||||||
|
sleep 2
|
||||||
|
fi
|
||||||
|
if [ -d .git ]; then
|
||||||
|
echo "Pulling latest code..."
|
||||||
|
git pull origin master 2>/dev/null || git pull origin main 2>/dev/null || true
|
||||||
|
fi
|
||||||
|
export CLIP_GPU_ID="${CLIP_GPU_ID:-$DEFAULT_GPU}"
|
||||||
|
bash start.sh
|
||||||
|
sleep 3
|
||||||
|
if curl -s http://localhost:$PORT/api/status > /dev/null 2>&1; then
|
||||||
|
echo "Service is healthy on port $PORT"
|
||||||
|
curl -s http://localhost:$PORT/api/status | python3 -m json.tool 2>/dev/null || true
|
||||||
|
else
|
||||||
|
echo "WARNING: Service may not be ready yet. Check nohup.out"
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
stop)
|
||||||
|
bash stop.sh
|
||||||
|
;;
|
||||||
|
start)
|
||||||
|
export CLIP_GPU_ID="${CLIP_GPU_ID:-$DEFAULT_GPU}"
|
||||||
|
bash start.sh
|
||||||
|
;;
|
||||||
|
status)
|
||||||
|
echo "=== $SERVICE_NAME Status ==="
|
||||||
|
if [ -f ah.pid ] && kill -0 $(cat ah.pid) 2>/dev/null; then
|
||||||
|
echo "Process: running (PID $(cat ah.pid))"
|
||||||
|
else
|
||||||
|
echo "Process: not running"
|
||||||
|
fi
|
||||||
|
echo "Port: $PORT"
|
||||||
|
echo "GPU: ${CLIP_GPU_ID:-$DEFAULT_GPU}"
|
||||||
|
if curl -s --max-time 3 http://localhost:$PORT/api/status > /dev/null 2>&1; then
|
||||||
|
echo "HTTP: OK"
|
||||||
|
curl -s http://localhost:$PORT/api/status | python3 -m json.tool 2>/dev/null || true
|
||||||
|
else
|
||||||
|
echo "HTTP: not responding"
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
echo "Usage: $0 {deploy|update|stop|start|status}"
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
esac
|
||||||
@ -1,22 +1,22 @@
|
|||||||
{
|
{
|
||||||
password_key: ClipEmbedding2026Key,
|
"password_key": "ClipEmbedding2026Key",
|
||||||
databases: {},
|
"databases": {},
|
||||||
session_redis: {
|
"session_redis": {
|
||||||
host: 127.0.0.1,
|
"host": "127.0.0.1",
|
||||||
port: 6379,
|
"port": 6379,
|
||||||
db: 1
|
"db": 1
|
||||||
},
|
},
|
||||||
website: {
|
"website": {
|
||||||
paths: [
|
"paths": [
|
||||||
[0$/app, ]
|
["$[workdir]$/app", ""]
|
||||||
],
|
],
|
||||||
host: 0.0.0.0,
|
"host": "0.0.0.0",
|
||||||
port: 9086,
|
"port": 9086,
|
||||||
coding: utf-8,
|
"coding": "utf-8",
|
||||||
indexes: [index.html, index.dspy],
|
"indexes": ["index.html", "index.dspy"],
|
||||||
processors: [
|
"processors": [
|
||||||
[.dspy, dspy]
|
[".dspy", "dspy"]
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
hot_reload: false
|
"hot_reload": false
|
||||||
}
|
}
|
||||||
7
nohup.out
Normal file
7
nohup.out
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
2026-06-14 17:03:26.974[webapp][debug][/data/ymq/wan22-service/py3/lib/python3.10/site-packages/ahserver/configuredServer.py:40]client_max_size=1024000000
|
||||||
|
reuse_port= True
|
||||||
|
======== Running on http://0.0.0.0:9086 ========
|
||||||
|
(Press CTRL+C to quit)
|
||||||
|
Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
|
||||||
|
[CLIP] Model loaded on cuda:0, dtype=float16
|
||||||
|
2026-06-14 17:03:40.976[webapp][debug][/data/ymq/wan22-service/py3/lib/python3.10/site-packages/ahserver/auth_api.py:178]timecost=client(127.0.0.1) None access /api/text cost 4.946, (0.000)
|
||||||
23
start.sh
Executable file
23
start.sh
Executable file
@ -0,0 +1,23 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
cd "$(dirname "$0")"
|
||||||
|
export CLIP_GPU_ID="${CLIP_GPU_ID:-2}"
|
||||||
|
export CUDA_VISIBLE_DEVICES="$CLIP_GPU_ID"
|
||||||
|
export PYTHONPATH="$(pwd)"
|
||||||
|
|
||||||
|
if [ -f ah.pid ] && kill -0 $(cat ah.pid) 2>/dev/null; then
|
||||||
|
echo "Service already running (PID $(cat ah.pid))"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Starting CLIP Embedding Service on GPU $CLIP_GPU_ID, port 9086..."
|
||||||
|
nohup /data/ymq/wan22-service/py3/bin/python ah.py > nohup.out 2>&1 &
|
||||||
|
echo $! > ah.pid
|
||||||
|
echo "Started (PID $(cat ah.pid))"
|
||||||
|
sleep 2
|
||||||
|
if kill -0 $(cat ah.pid) 2>/dev/null; then
|
||||||
|
echo "Service is running"
|
||||||
|
else
|
||||||
|
echo "Service failed to start. Check nohup.out"
|
||||||
|
tail -20 nohup.out
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
14
stop.sh
Executable file
14
stop.sh
Executable file
@ -0,0 +1,14 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
cd "$(dirname "$0")"
|
||||||
|
if [ -f ah.pid ]; then
|
||||||
|
PID=$(cat ah.pid)
|
||||||
|
if kill -0 $PID 2>/dev/null; then
|
||||||
|
kill $PID
|
||||||
|
echo "Stopped (PID $PID)"
|
||||||
|
else
|
||||||
|
echo "Process $PID not running"
|
||||||
|
fi
|
||||||
|
rm -f ah.pid
|
||||||
|
else
|
||||||
|
echo "No ah.pid found"
|
||||||
|
fi
|
||||||
BIN
workers/__pycache__/__init__.cpython-310.pyc
Normal file
BIN
workers/__pycache__/__init__.cpython-310.pyc
Normal file
Binary file not shown.
BIN
workers/__pycache__/clip_model.cpython-310.pyc
Normal file
BIN
workers/__pycache__/clip_model.cpython-310.pyc
Normal file
Binary file not shown.
@ -1,2 +1,61 @@
|
|||||||
# -*- coding:utf-8 -*-
|
# -*- coding:utf-8 -*-
|
||||||
CLIP ViT-H/14 lazy-loading wrapper.
|
"""CLIP ViT-H/14 lazy-loading wrapper."""
|
||||||
|
import os
|
||||||
|
import torch
|
||||||
|
import numpy as np
|
||||||
|
from PIL import Image
|
||||||
|
from io import BytesIO
|
||||||
|
import base64
|
||||||
|
import urllib.request
|
||||||
|
|
||||||
|
MODEL_PATH = '/data/ymq/models/laion/CLIP-ViT-H-14-laion2B-s32B-b79K'
|
||||||
|
|
||||||
|
_model = None
|
||||||
|
_processor = None
|
||||||
|
_device = None
|
||||||
|
|
||||||
|
|
||||||
|
def _load():
|
||||||
|
global _model, _processor, _device
|
||||||
|
if _model is not None:
|
||||||
|
return
|
||||||
|
# CUDA_VISIBLE_DEVICES is set in start.sh, so GPU 0 in visible devices is our target
|
||||||
|
_device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
|
||||||
|
from transformers import CLIPModel, CLIPProcessor
|
||||||
|
_processor = CLIPProcessor.from_pretrained(MODEL_PATH)
|
||||||
|
_model = CLIPModel.from_pretrained(MODEL_PATH, torch_dtype=torch.float16)
|
||||||
|
_model = _model.to(_device)
|
||||||
|
_model.eval()
|
||||||
|
print(f'[CLIP] Model loaded on {_device}, dtype=float16')
|
||||||
|
|
||||||
|
|
||||||
|
def embed_texts(texts):
|
||||||
|
_load()
|
||||||
|
inputs = _processor(text=texts, return_tensors='pt', padding=True, truncation=True, max_length=77)
|
||||||
|
inputs = {k: v.to(_device) for k, v in inputs.items()}
|
||||||
|
with torch.no_grad():
|
||||||
|
outputs = _model.get_text_features(**inputs)
|
||||||
|
outputs = outputs / outputs.norm(dim=-1, keepdim=True)
|
||||||
|
return outputs.cpu().float().numpy().tolist()
|
||||||
|
|
||||||
|
|
||||||
|
def _load_image(src):
|
||||||
|
if src.startswith('data:'):
|
||||||
|
_, b64 = src.split(',', 1)
|
||||||
|
return Image.open(BytesIO(base64.b64decode(b64))).convert('RGB')
|
||||||
|
elif src.startswith('http://') or src.startswith('https://'):
|
||||||
|
with urllib.request.urlopen(src, timeout=30) as resp:
|
||||||
|
return Image.open(BytesIO(resp.read())).convert('RGB')
|
||||||
|
else:
|
||||||
|
return Image.open(src).convert('RGB')
|
||||||
|
|
||||||
|
|
||||||
|
def embed_images(sources):
|
||||||
|
_load()
|
||||||
|
images = [_load_image(s) for s in sources]
|
||||||
|
inputs = _processor(images=images, return_tensors='pt')
|
||||||
|
inputs = {k: v.to(_device) for k, v in inputs.items()}
|
||||||
|
with torch.no_grad():
|
||||||
|
outputs = _model.get_image_features(**inputs)
|
||||||
|
outputs = outputs / outputs.norm(dim=-1, keepdim=True)
|
||||||
|
return outputs.cpu().float().numpy().tolist()
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user