From ceb905d3ebb3e72d68dc0ff1175af6249d4b0731 Mon Sep 17 00:00:00 2001 From: yumoqing Date: Sun, 14 Jun 2026 17:04:22 +0800 Subject: [PATCH] fix: use cuda:0 (CUDA_VISIBLE_DEVICES handles GPU mapping), add shell scripts and README --- README.md | 52 +++++++++++++++ ah.pid | 1 + ah.py | 6 +- build.sh | 58 +++++++++++++++++ conf/config.json | 34 +++++----- nohup.out | 7 ++ start.sh | 23 +++++++ stop.sh | 14 ++++ workers/__pycache__/__init__.cpython-310.pyc | Bin 0 -> 137 bytes .../__pycache__/clip_model.cpython-310.pyc | Bin 0 -> 2619 bytes workers/clip_model.py | 61 +++++++++++++++++- 11 files changed, 236 insertions(+), 20 deletions(-) create mode 100644 README.md create mode 100644 ah.pid create mode 100755 build.sh create mode 100644 nohup.out create mode 100755 start.sh create mode 100755 stop.sh create mode 100644 workers/__pycache__/__init__.cpython-310.pyc create mode 100644 workers/__pycache__/clip_model.cpython-310.pyc diff --git a/README.md b/README.md new file mode 100644 index 0000000..dadaa09 --- /dev/null +++ b/README.md @@ -0,0 +1,52 @@ +# CLIP Embedding Service + +CLIP-ViT-H/14 多模态 Embedding 服务,支持文本和图片向量化。 + +## Overview + +- **Model**: laion/CLIP-ViT-H-14-laion2B-s32B-b79K +- **Dimension**: 1024 +- **Precision**: float16 +- **Port**: 9086 +- **GPU**: 2 (default) + +## API + +### GET /api/status +Service health and GPU info. + +### POST /api/text +Text embedding. +```json +{"texts": ["hello world", "a cat"]} +``` + +### POST /api/image +Image embedding (file path, URL, or base64 data URI). +```json +{"images": ["/path/to/img.jpg", "https://example.com/img.png"]} +``` + +### POST /api/embed +Combined text + image embedding. +```json +{"texts": ["a cat"], "images": ["/path/to/cat.jpg"]} +``` + +## Model Download (Offline Deploy) + +```bash +pip install huggingface_hub +huggingface-cli download laion/CLIP-ViT-H-14-laion2B-s32B-b79K \ + --local-dir /data/ymq/models/laion/CLIP-ViT-H-14-laion2B-s32B-b79K \ + --local-dir-use-symlinks False +``` +Size: ~15GB + +## Deploy + +```bash +bash build.sh deploy # start +bash build.sh stop # stop +bash build.sh status # check +``` diff --git a/ah.pid b/ah.pid new file mode 100644 index 0000000..199bb65 --- /dev/null +++ b/ah.pid @@ -0,0 +1 @@ +829715 diff --git a/ah.py b/ah.py index 5194402..a9e56ca 100644 --- a/ah.py +++ b/ah.py @@ -2,6 +2,8 @@ import os from ahserver.webapp import webapp -if __name__ == '__main__': - webapp() +def init(): + pass +if __name__ == '__main__': + webapp(init) diff --git a/build.sh b/build.sh new file mode 100755 index 0000000..dce7380 --- /dev/null +++ b/build.sh @@ -0,0 +1,58 @@ +#!/bin/bash +set -e +cd "$(dirname "$0")" + +SERVICE_NAME="clip_embedding" +PORT=9086 +DEFAULT_GPU=2 +action="${1:-status}" + +case "$action" in + deploy|update) + echo "=== $SERVICE_NAME Deploy ===" + if [ -f ah.pid ] && kill -0 $(cat ah.pid) 2>/dev/null; then + bash stop.sh + sleep 2 + fi + if [ -d .git ]; then + echo "Pulling latest code..." + git pull origin master 2>/dev/null || git pull origin main 2>/dev/null || true + fi + export CLIP_GPU_ID="${CLIP_GPU_ID:-$DEFAULT_GPU}" + bash start.sh + sleep 3 + if curl -s http://localhost:$PORT/api/status > /dev/null 2>&1; then + echo "Service is healthy on port $PORT" + curl -s http://localhost:$PORT/api/status | python3 -m json.tool 2>/dev/null || true + else + echo "WARNING: Service may not be ready yet. Check nohup.out" + fi + ;; + stop) + bash stop.sh + ;; + start) + export CLIP_GPU_ID="${CLIP_GPU_ID:-$DEFAULT_GPU}" + bash start.sh + ;; + status) + echo "=== $SERVICE_NAME Status ===" + if [ -f ah.pid ] && kill -0 $(cat ah.pid) 2>/dev/null; then + echo "Process: running (PID $(cat ah.pid))" + else + echo "Process: not running" + fi + echo "Port: $PORT" + echo "GPU: ${CLIP_GPU_ID:-$DEFAULT_GPU}" + if curl -s --max-time 3 http://localhost:$PORT/api/status > /dev/null 2>&1; then + echo "HTTP: OK" + curl -s http://localhost:$PORT/api/status | python3 -m json.tool 2>/dev/null || true + else + echo "HTTP: not responding" + fi + ;; + *) + echo "Usage: $0 {deploy|update|stop|start|status}" + exit 1 + ;; +esac diff --git a/conf/config.json b/conf/config.json index 1113f6a..c0f117a 100644 --- a/conf/config.json +++ b/conf/config.json @@ -1,22 +1,22 @@ { - password_key: ClipEmbedding2026Key, - databases: {}, - session_redis: { - host: 127.0.0.1, - port: 6379, - db: 1 + "password_key": "ClipEmbedding2026Key", + "databases": {}, + "session_redis": { + "host": "127.0.0.1", + "port": 6379, + "db": 1 }, - website: { - paths: [ - [0$/app, ] + "website": { + "paths": [ + ["$[workdir]$/app", ""] ], - host: 0.0.0.0, - port: 9086, - coding: utf-8, - indexes: [index.html, index.dspy], - processors: [ - [.dspy, dspy] + "host": "0.0.0.0", + "port": 9086, + "coding": "utf-8", + "indexes": ["index.html", "index.dspy"], + "processors": [ + [".dspy", "dspy"] ] }, - hot_reload: false -} + "hot_reload": false +} \ No newline at end of file diff --git a/nohup.out b/nohup.out new file mode 100644 index 0000000..22bb63e --- /dev/null +++ b/nohup.out @@ -0,0 +1,7 @@ +2026-06-14 17:03:26.974[webapp][debug][/data/ymq/wan22-service/py3/lib/python3.10/site-packages/ahserver/configuredServer.py:40]client_max_size=1024000000 +reuse_port= True +======== Running on http://0.0.0.0:9086 ======== +(Press CTRL+C to quit) +Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`. +[CLIP] Model loaded on cuda:0, dtype=float16 +2026-06-14 17:03:40.976[webapp][debug][/data/ymq/wan22-service/py3/lib/python3.10/site-packages/ahserver/auth_api.py:178]timecost=client(127.0.0.1) None access /api/text cost 4.946, (0.000) diff --git a/start.sh b/start.sh new file mode 100755 index 0000000..c5afe11 --- /dev/null +++ b/start.sh @@ -0,0 +1,23 @@ +#!/bin/bash +cd "$(dirname "$0")" +export CLIP_GPU_ID="${CLIP_GPU_ID:-2}" +export CUDA_VISIBLE_DEVICES="$CLIP_GPU_ID" +export PYTHONPATH="$(pwd)" + +if [ -f ah.pid ] && kill -0 $(cat ah.pid) 2>/dev/null; then + echo "Service already running (PID $(cat ah.pid))" + exit 1 +fi + +echo "Starting CLIP Embedding Service on GPU $CLIP_GPU_ID, port 9086..." +nohup /data/ymq/wan22-service/py3/bin/python ah.py > nohup.out 2>&1 & +echo $! > ah.pid +echo "Started (PID $(cat ah.pid))" +sleep 2 +if kill -0 $(cat ah.pid) 2>/dev/null; then + echo "Service is running" +else + echo "Service failed to start. Check nohup.out" + tail -20 nohup.out + exit 1 +fi diff --git a/stop.sh b/stop.sh new file mode 100755 index 0000000..4e900b3 --- /dev/null +++ b/stop.sh @@ -0,0 +1,14 @@ +#!/bin/bash +cd "$(dirname "$0")" +if [ -f ah.pid ]; then + PID=$(cat ah.pid) + if kill -0 $PID 2>/dev/null; then + kill $PID + echo "Stopped (PID $PID)" + else + echo "Process $PID not running" + fi + rm -f ah.pid +else + echo "No ah.pid found" +fi diff --git a/workers/__pycache__/__init__.cpython-310.pyc b/workers/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..10b925b8d417e6d335fc7c6ea149d00c87fa9e16 GIT binary patch literal 137 zcmd1j<>g`kg6lbYS&TsXF^Gcx?9`%S{rLFIyv&mLc)fzkTO2mI`6;D2sdgY!ikW}} G3j+XNUmcVH literal 0 HcmV?d00001 diff --git a/workers/__pycache__/clip_model.cpython-310.pyc b/workers/__pycache__/clip_model.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9fadc7a061dd74b3408dac1b3aa8b18a4a199b30 GIT binary patch literal 2619 zcmZ`)&2JM&6yKTsvNui$X;eN+Tel#!u82(_1c6dSfvTiHNSmmrE@-v!Op*=j-DPGR zh-II`p%+T49;-^W#D#xpu07?}cM+eQ!SAypP{xqoaWZ--EQe@oB-b z{=uK&kHXJ2=<+ELX4Nf57)?l>n32}08ST0Qqn)@-ukI0?<0O8wP%jvsCdGQu%nRxx z%&nIgjYgr5K|jJg=0CIQy)~6&5sTBc@)Hr|ITzI(Fwt755be%4qcskUe|<}FIe)J`TVs)mYFo|6wPwb6 zB5O$$XK4+0oie*lP0vgj+4<{J@&b^ROP6mqaL2t5UEx5#0bME}3LMfR!eiEwrAUt| zvSIg}z9o(@l7gH1ByyPzuJD?E*J>8JgjgNYE2;oABOPl4I9O9+?sFQ!ZoN_Ef(E&< zzC|nr&Qb1P7W4nMw_z=wQzk+%c^Ltar`#GBm3tLaf>7e&_KQf30Y zFrqTiSPz-n%6a?PFQ|T4Hgn79Le9!rT5caXTQ)PkUIn3=o~?{)FEq~5uGv6)jBmyb zt{psImtq-4n^BxZD+$+rxT`EF5v6i96HP9p@W49Z1JOrTMb->+!9gFVoax~1y_?_O z2^YUvzNG_D?AojS-Sah7nRfVQlxQ~>ajJxiSGB}382+su1hkRFdB~e9oMAT9o@C+? z99=VFV}CW@(r$=Xoq(khNr5`#I2|Ve!CoMfM4SL|1HHNT*}yF4(Q6>*woPgQxhR2FAXkU`wNSAh?ZTQTG)rIHuBdbgA zLtGtb+cvS%!n7qXK7m+c`x$zN_6mKgSA-rwAL(Nb1PhwxYI)}_7*%n#qxe&$$H28M zk%o$=5O%5kJTeZ^ffB8>5h=)b9W-&j1KLHK30ML5 z?vp?1V<<-xfmc*a!aOksLOF--eu7Oshi zi6Z_TUE|7ZAFlEU9K)q{QpieyC9GYOWZF$zO-K$;Wl1bm#fD5rgR~c?c?(8AYbj*j zz0}B%^<~&kjsx+?1T@U3NpkAHNn2b3;agtDqxUoFCH!=(9^_J=h?BqpVlcZ;2R>J{ zXDdg!J+A{evJG>3K6CnHX|M)xLY^_JJy`v{Dr^)t0?11rv+BiU*B-3z*qAW~tM6NM zDBG4S$hI59*=CRoPVQI-=T(XeZ5Pq=iY8}wut2o^<9FBF{<>26m0GP`Kt=+$^9d*s zbyeKt;JHdk2U0~s$tST|*RITySP4u3?PNJm1y-2$RwBt~XLNC8c82i=K$!L$S-Q!E z(q2m>NxY(c!5_D{RN4mtF4d0U5vw@bmZG6asBKt{LYW_mm8twN#wXz`IMBojqjVDD zG(kQjXWoq4;x=gBAFJO%JE(qq#-P^s>YQ}o*#i`yUAl_`bRPxiJ17u9O;Ljab_be4 z0eJKb3fPBG;PP->AdZJoU}WJfgZG{%EMbYIxrQa|0T==JY?D6@O1DDHbt_byfoYl? zO7CC}%fZwv=CH)AC#`mQkoCrs!$rG4S)7E46$<5p&X(e1_5UkIU zLv(m2cH#?=T}JtrK;C=)KEQ|yOj{NhWwX;TREQ~*B2b7b5(5$w2olEFeUuRAP>Yo+ zrjeXSasdb=n=iALfM-=KpztD+StR%(4Tyof(y*U!mU(hKGDb>o1H=!Yt4zY%8M3Sq zhMHyrC^Ls@n&#SGoWC=u7#e4$Uc@z6q{jw@QQfTzaRY?nC=$Gv!A6731E|Cxnve@T pamZX_xUsn~2Tx@y;a9Q1Wf_PA&my)GG#niYjeyiHj1_{?$p0U~b_@Uj literal 0 HcmV?d00001 diff --git a/workers/clip_model.py b/workers/clip_model.py index 21511b2..507c3da 100644 --- a/workers/clip_model.py +++ b/workers/clip_model.py @@ -1,2 +1,61 @@ # -*- coding:utf-8 -*- -CLIP ViT-H/14 lazy-loading wrapper. +"""CLIP ViT-H/14 lazy-loading wrapper.""" +import os +import torch +import numpy as np +from PIL import Image +from io import BytesIO +import base64 +import urllib.request + +MODEL_PATH = '/data/ymq/models/laion/CLIP-ViT-H-14-laion2B-s32B-b79K' + +_model = None +_processor = None +_device = None + + +def _load(): + global _model, _processor, _device + if _model is not None: + return + # CUDA_VISIBLE_DEVICES is set in start.sh, so GPU 0 in visible devices is our target + _device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') + from transformers import CLIPModel, CLIPProcessor + _processor = CLIPProcessor.from_pretrained(MODEL_PATH) + _model = CLIPModel.from_pretrained(MODEL_PATH, torch_dtype=torch.float16) + _model = _model.to(_device) + _model.eval() + print(f'[CLIP] Model loaded on {_device}, dtype=float16') + + +def embed_texts(texts): + _load() + inputs = _processor(text=texts, return_tensors='pt', padding=True, truncation=True, max_length=77) + inputs = {k: v.to(_device) for k, v in inputs.items()} + with torch.no_grad(): + outputs = _model.get_text_features(**inputs) + outputs = outputs / outputs.norm(dim=-1, keepdim=True) + return outputs.cpu().float().numpy().tolist() + + +def _load_image(src): + if src.startswith('data:'): + _, b64 = src.split(',', 1) + return Image.open(BytesIO(base64.b64decode(b64))).convert('RGB') + elif src.startswith('http://') or src.startswith('https://'): + with urllib.request.urlopen(src, timeout=30) as resp: + return Image.open(BytesIO(resp.read())).convert('RGB') + else: + return Image.open(src).convert('RGB') + + +def embed_images(sources): + _load() + images = [_load_image(s) for s in sources] + inputs = _processor(images=images, return_tensors='pt') + inputs = {k: v.to(_device) for k, v in inputs.items()} + with torch.no_grad(): + outputs = _model.get_image_features(**inputs) + outputs = outputs / outputs.norm(dim=-1, keepdim=True) + return outputs.cpu().float().numpy().tolist()