diff --git a/README.md b/README.md new file mode 100644 index 0000000..dadaa09 --- /dev/null +++ b/README.md @@ -0,0 +1,52 @@ +# CLIP Embedding Service + +CLIP-ViT-H/14 多模态 Embedding 服务,支持文本和图片向量化。 + +## Overview + +- **Model**: laion/CLIP-ViT-H-14-laion2B-s32B-b79K +- **Dimension**: 1024 +- **Precision**: float16 +- **Port**: 9086 +- **GPU**: 2 (default) + +## API + +### GET /api/status +Service health and GPU info. + +### POST /api/text +Text embedding. +```json +{"texts": ["hello world", "a cat"]} +``` + +### POST /api/image +Image embedding (file path, URL, or base64 data URI). +```json +{"images": ["/path/to/img.jpg", "https://example.com/img.png"]} +``` + +### POST /api/embed +Combined text + image embedding. +```json +{"texts": ["a cat"], "images": ["/path/to/cat.jpg"]} +``` + +## Model Download (Offline Deploy) + +```bash +pip install huggingface_hub +huggingface-cli download laion/CLIP-ViT-H-14-laion2B-s32B-b79K \ + --local-dir /data/ymq/models/laion/CLIP-ViT-H-14-laion2B-s32B-b79K \ + --local-dir-use-symlinks False +``` +Size: ~15GB + +## Deploy + +```bash +bash build.sh deploy # start +bash build.sh stop # stop +bash build.sh status # check +``` diff --git a/ah.pid b/ah.pid new file mode 100644 index 0000000..199bb65 --- /dev/null +++ b/ah.pid @@ -0,0 +1 @@ +829715 diff --git a/ah.py b/ah.py index 5194402..a9e56ca 100644 --- a/ah.py +++ b/ah.py @@ -2,6 +2,8 @@ import os from ahserver.webapp import webapp -if __name__ == '__main__': - webapp() +def init(): + pass +if __name__ == '__main__': + webapp(init) diff --git a/build.sh b/build.sh new file mode 100755 index 0000000..dce7380 --- /dev/null +++ b/build.sh @@ -0,0 +1,58 @@ +#!/bin/bash +set -e +cd "$(dirname "$0")" + +SERVICE_NAME="clip_embedding" +PORT=9086 +DEFAULT_GPU=2 +action="${1:-status}" + +case "$action" in + deploy|update) + echo "=== $SERVICE_NAME Deploy ===" + if [ -f ah.pid ] && kill -0 $(cat ah.pid) 2>/dev/null; then + bash stop.sh + sleep 2 + fi + if [ -d .git ]; then + echo "Pulling latest code..." + git pull origin master 2>/dev/null || git pull origin main 2>/dev/null || true + fi + export CLIP_GPU_ID="${CLIP_GPU_ID:-$DEFAULT_GPU}" + bash start.sh + sleep 3 + if curl -s http://localhost:$PORT/api/status > /dev/null 2>&1; then + echo "Service is healthy on port $PORT" + curl -s http://localhost:$PORT/api/status | python3 -m json.tool 2>/dev/null || true + else + echo "WARNING: Service may not be ready yet. Check nohup.out" + fi + ;; + stop) + bash stop.sh + ;; + start) + export CLIP_GPU_ID="${CLIP_GPU_ID:-$DEFAULT_GPU}" + bash start.sh + ;; + status) + echo "=== $SERVICE_NAME Status ===" + if [ -f ah.pid ] && kill -0 $(cat ah.pid) 2>/dev/null; then + echo "Process: running (PID $(cat ah.pid))" + else + echo "Process: not running" + fi + echo "Port: $PORT" + echo "GPU: ${CLIP_GPU_ID:-$DEFAULT_GPU}" + if curl -s --max-time 3 http://localhost:$PORT/api/status > /dev/null 2>&1; then + echo "HTTP: OK" + curl -s http://localhost:$PORT/api/status | python3 -m json.tool 2>/dev/null || true + else + echo "HTTP: not responding" + fi + ;; + *) + echo "Usage: $0 {deploy|update|stop|start|status}" + exit 1 + ;; +esac diff --git a/conf/config.json b/conf/config.json index 1113f6a..c0f117a 100644 --- a/conf/config.json +++ b/conf/config.json @@ -1,22 +1,22 @@ { - password_key: ClipEmbedding2026Key, - databases: {}, - session_redis: { - host: 127.0.0.1, - port: 6379, - db: 1 + "password_key": "ClipEmbedding2026Key", + "databases": {}, + "session_redis": { + "host": "127.0.0.1", + "port": 6379, + "db": 1 }, - website: { - paths: [ - [0$/app, ] + "website": { + "paths": [ + ["$[workdir]$/app", ""] ], - host: 0.0.0.0, - port: 9086, - coding: utf-8, - indexes: [index.html, index.dspy], - processors: [ - [.dspy, dspy] + "host": "0.0.0.0", + "port": 9086, + "coding": "utf-8", + "indexes": ["index.html", "index.dspy"], + "processors": [ + [".dspy", "dspy"] ] }, - hot_reload: false -} + "hot_reload": false +} \ No newline at end of file diff --git a/nohup.out b/nohup.out new file mode 100644 index 0000000..22bb63e --- /dev/null +++ b/nohup.out @@ -0,0 +1,7 @@ +2026-06-14 17:03:26.974[webapp][debug][/data/ymq/wan22-service/py3/lib/python3.10/site-packages/ahserver/configuredServer.py:40]client_max_size=1024000000 +reuse_port= True +======== Running on http://0.0.0.0:9086 ======== +(Press CTRL+C to quit) +Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`. +[CLIP] Model loaded on cuda:0, dtype=float16 +2026-06-14 17:03:40.976[webapp][debug][/data/ymq/wan22-service/py3/lib/python3.10/site-packages/ahserver/auth_api.py:178]timecost=client(127.0.0.1) None access /api/text cost 4.946, (0.000) diff --git a/start.sh b/start.sh new file mode 100755 index 0000000..c5afe11 --- /dev/null +++ b/start.sh @@ -0,0 +1,23 @@ +#!/bin/bash +cd "$(dirname "$0")" +export CLIP_GPU_ID="${CLIP_GPU_ID:-2}" +export CUDA_VISIBLE_DEVICES="$CLIP_GPU_ID" +export PYTHONPATH="$(pwd)" + +if [ -f ah.pid ] && kill -0 $(cat ah.pid) 2>/dev/null; then + echo "Service already running (PID $(cat ah.pid))" + exit 1 +fi + +echo "Starting CLIP Embedding Service on GPU $CLIP_GPU_ID, port 9086..." +nohup /data/ymq/wan22-service/py3/bin/python ah.py > nohup.out 2>&1 & +echo $! > ah.pid +echo "Started (PID $(cat ah.pid))" +sleep 2 +if kill -0 $(cat ah.pid) 2>/dev/null; then + echo "Service is running" +else + echo "Service failed to start. Check nohup.out" + tail -20 nohup.out + exit 1 +fi diff --git a/stop.sh b/stop.sh new file mode 100755 index 0000000..4e900b3 --- /dev/null +++ b/stop.sh @@ -0,0 +1,14 @@ +#!/bin/bash +cd "$(dirname "$0")" +if [ -f ah.pid ]; then + PID=$(cat ah.pid) + if kill -0 $PID 2>/dev/null; then + kill $PID + echo "Stopped (PID $PID)" + else + echo "Process $PID not running" + fi + rm -f ah.pid +else + echo "No ah.pid found" +fi diff --git a/workers/__pycache__/__init__.cpython-310.pyc b/workers/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000..10b925b Binary files /dev/null and b/workers/__pycache__/__init__.cpython-310.pyc differ diff --git a/workers/__pycache__/clip_model.cpython-310.pyc b/workers/__pycache__/clip_model.cpython-310.pyc new file mode 100644 index 0000000..9fadc7a Binary files /dev/null and b/workers/__pycache__/clip_model.cpython-310.pyc differ diff --git a/workers/clip_model.py b/workers/clip_model.py index 21511b2..507c3da 100644 --- a/workers/clip_model.py +++ b/workers/clip_model.py @@ -1,2 +1,61 @@ # -*- coding:utf-8 -*- -CLIP ViT-H/14 lazy-loading wrapper. +"""CLIP ViT-H/14 lazy-loading wrapper.""" +import os +import torch +import numpy as np +from PIL import Image +from io import BytesIO +import base64 +import urllib.request + +MODEL_PATH = '/data/ymq/models/laion/CLIP-ViT-H-14-laion2B-s32B-b79K' + +_model = None +_processor = None +_device = None + + +def _load(): + global _model, _processor, _device + if _model is not None: + return + # CUDA_VISIBLE_DEVICES is set in start.sh, so GPU 0 in visible devices is our target + _device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') + from transformers import CLIPModel, CLIPProcessor + _processor = CLIPProcessor.from_pretrained(MODEL_PATH) + _model = CLIPModel.from_pretrained(MODEL_PATH, torch_dtype=torch.float16) + _model = _model.to(_device) + _model.eval() + print(f'[CLIP] Model loaded on {_device}, dtype=float16') + + +def embed_texts(texts): + _load() + inputs = _processor(text=texts, return_tensors='pt', padding=True, truncation=True, max_length=77) + inputs = {k: v.to(_device) for k, v in inputs.items()} + with torch.no_grad(): + outputs = _model.get_text_features(**inputs) + outputs = outputs / outputs.norm(dim=-1, keepdim=True) + return outputs.cpu().float().numpy().tolist() + + +def _load_image(src): + if src.startswith('data:'): + _, b64 = src.split(',', 1) + return Image.open(BytesIO(base64.b64decode(b64))).convert('RGB') + elif src.startswith('http://') or src.startswith('https://'): + with urllib.request.urlopen(src, timeout=30) as resp: + return Image.open(BytesIO(resp.read())).convert('RGB') + else: + return Image.open(src).convert('RGB') + + +def embed_images(sources): + _load() + images = [_load_image(s) for s in sources] + inputs = _processor(images=images, return_tensors='pt') + inputs = {k: v.to(_device) for k, v in inputs.items()} + with torch.no_grad(): + outputs = _model.get_image_features(**inputs) + outputs = outputs / outputs.norm(dim=-1, keepdim=True) + return outputs.cpu().float().numpy().tolist()