From ceb905d3ebb3e72d68dc0ff1175af6249d4b0731 Mon Sep 17 00:00:00 2001
From: yumoqing <yumoqing@gmail.com>
Date: Sun, 14 Jun 2026 17:04:22 +0800
Subject: [PATCH] fix: use cuda:0 (CUDA_VISIBLE_DEVICES handles GPU mapping),
 add shell scripts and README

---
 README.md                                     |  52 +++++++++++++++
 ah.pid                                        |   1 +
 ah.py                                         |   6 +-
 build.sh                                      |  58 +++++++++++++++++
 conf/config.json                              |  34 +++++-----
 nohup.out                                     |   7 ++
 start.sh                                      |  23 +++++++
 stop.sh                                       |  14 ++++
 workers/__pycache__/__init__.cpython-310.pyc  | Bin 0 -> 137 bytes
 .../__pycache__/clip_model.cpython-310.pyc    | Bin 0 -> 2619 bytes
 workers/clip_model.py                         |  61 +++++++++++++++++-
 11 files changed, 236 insertions(+), 20 deletions(-)
 create mode 100644 README.md
 create mode 100644 ah.pid
 create mode 100755 build.sh
 create mode 100644 nohup.out
 create mode 100755 start.sh
 create mode 100755 stop.sh
 create mode 100644 workers/__pycache__/__init__.cpython-310.pyc
 create mode 100644 workers/__pycache__/clip_model.cpython-310.pyc

diff --git a/README.md b/README.md
new file mode 100644
index 0000000..dadaa09
--- /dev/null
+++ b/README.md
@@ -0,0 +1,52 @@
+# CLIP Embedding Service
+
+CLIP-ViT-H/14 多模态 Embedding 服务，支持文本和图片向量化。
+
+## Overview
+
+- **Model**: laion/CLIP-ViT-H-14-laion2B-s32B-b79K
+- **Dimension**: 1024
+- **Precision**: float16
+- **Port**: 9086
+- **GPU**: 2 (default)
+
+## API
+
+### GET /api/status
+Service health and GPU info.
+
+### POST /api/text
+Text embedding.
+```json
+{"texts": ["hello world", "a cat"]}
+```
+
+### POST /api/image
+Image embedding (file path, URL, or base64 data URI).
+```json
+{"images": ["/path/to/img.jpg", "https://example.com/img.png"]}
+```
+
+### POST /api/embed
+Combined text + image embedding.
+```json
+{"texts": ["a cat"], "images": ["/path/to/cat.jpg"]}
+```
+
+## Model Download (Offline Deploy)
+
+```bash
+pip install huggingface_hub
+huggingface-cli download laion/CLIP-ViT-H-14-laion2B-s32B-b79K \
+  --local-dir /data/ymq/models/laion/CLIP-ViT-H-14-laion2B-s32B-b79K \
+  --local-dir-use-symlinks False
+```
+Size: ~15GB
+
+## Deploy
+
+```bash
+bash build.sh deploy    # start
+bash build.sh stop      # stop
+bash build.sh status    # check
+```
diff --git a/ah.pid b/ah.pid
new file mode 100644
index 0000000..199bb65
--- /dev/null
+++ b/ah.pid
@@ -0,0 +1 @@
+829715
diff --git a/ah.py b/ah.py
index 5194402..a9e56ca 100644
--- a/ah.py
+++ b/ah.py
@@ -2,6 +2,8 @@
 import os
 from ahserver.webapp import webapp
 
-if __name__ == '__main__':
-    webapp()
+def init():
+    pass
 
+if __name__ == '__main__':
+    webapp(init)
diff --git a/build.sh b/build.sh
new file mode 100755
index 0000000..dce7380
--- /dev/null
+++ b/build.sh
@@ -0,0 +1,58 @@
+#!/bin/bash
+set -e
+cd "$(dirname "$0")"
+
+SERVICE_NAME="clip_embedding"
+PORT=9086
+DEFAULT_GPU=2
+action="${1:-status}"
+
+case "$action" in
+    deploy|update)
+        echo "=== $SERVICE_NAME Deploy ==="
+        if [ -f ah.pid ] && kill -0 $(cat ah.pid) 2>/dev/null; then
+            bash stop.sh
+            sleep 2
+        fi
+        if [ -d .git ]; then
+            echo "Pulling latest code..."
+            git pull origin master 2>/dev/null || git pull origin main 2>/dev/null || true
+        fi
+        export CLIP_GPU_ID="${CLIP_GPU_ID:-$DEFAULT_GPU}"
+        bash start.sh
+        sleep 3
+        if curl -s http://localhost:$PORT/api/status > /dev/null 2>&1; then
+            echo "Service is healthy on port $PORT"
+            curl -s http://localhost:$PORT/api/status | python3 -m json.tool 2>/dev/null || true
+        else
+            echo "WARNING: Service may not be ready yet. Check nohup.out"
+        fi
+        ;;
+    stop)
+        bash stop.sh
+        ;;
+    start)
+        export CLIP_GPU_ID="${CLIP_GPU_ID:-$DEFAULT_GPU}"
+        bash start.sh
+        ;;
+    status)
+        echo "=== $SERVICE_NAME Status ==="
+        if [ -f ah.pid ] && kill -0 $(cat ah.pid) 2>/dev/null; then
+            echo "Process: running (PID $(cat ah.pid))"
+        else
+            echo "Process: not running"
+        fi
+        echo "Port: $PORT"
+        echo "GPU: ${CLIP_GPU_ID:-$DEFAULT_GPU}"
+        if curl -s --max-time 3 http://localhost:$PORT/api/status > /dev/null 2>&1; then
+            echo "HTTP: OK"
+            curl -s http://localhost:$PORT/api/status | python3 -m json.tool 2>/dev/null || true
+        else
+            echo "HTTP: not responding"
+        fi
+        ;;
+    *)
+        echo "Usage: $0 {deploy|update|stop|start|status}"
+        exit 1
+        ;;
+esac
diff --git a/conf/config.json b/conf/config.json
index 1113f6a..c0f117a 100644
--- a/conf/config.json
+++ b/conf/config.json
@@ -1,22 +1,22 @@
 {
-    password_key: ClipEmbedding2026Key,
-    databases: {},
-    session_redis: {
-        host: 127.0.0.1,
-        port: 6379,
-        db: 1
+    "password_key": "ClipEmbedding2026Key",
+    "databases": {},
+    "session_redis": {
+        "host": "127.0.0.1",
+        "port": 6379,
+        "db": 1
     },
-    website: {
-        paths: [
-            [0$/app, ]
+    "website": {
+        "paths": [
+            ["$[workdir]$/app", ""]
         ],
-        host: 0.0.0.0,
-        port: 9086,
-        coding: utf-8,
-        indexes: [index.html, index.dspy],
-        processors: [
-            [.dspy, dspy]
+        "host": "0.0.0.0",
+        "port": 9086,
+        "coding": "utf-8",
+        "indexes": ["index.html", "index.dspy"],
+        "processors": [
+            [".dspy", "dspy"]
         ]
     },
-    hot_reload: false
-}
+    "hot_reload": false
+}
\ No newline at end of file
diff --git a/nohup.out b/nohup.out
new file mode 100644
index 0000000..22bb63e
--- /dev/null
+++ b/nohup.out
@@ -0,0 +1,7 @@
+2026-06-14 17:03:26.974[webapp][debug][/data/ymq/wan22-service/py3/lib/python3.10/site-packages/ahserver/configuredServer.py:40]client_max_size=1024000000
+reuse_port= True
+======== Running on http://0.0.0.0:9086 ========
+(Press CTRL+C to quit)
+Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
+[CLIP] Model loaded on cuda:0, dtype=float16
+2026-06-14 17:03:40.976[webapp][debug][/data/ymq/wan22-service/py3/lib/python3.10/site-packages/ahserver/auth_api.py:178]timecost=client(127.0.0.1) None access /api/text cost 4.946, (0.000)
diff --git a/start.sh b/start.sh
new file mode 100755
index 0000000..c5afe11
--- /dev/null
+++ b/start.sh
@@ -0,0 +1,23 @@
+#!/bin/bash
+cd "$(dirname "$0")"
+export CLIP_GPU_ID="${CLIP_GPU_ID:-2}"
+export CUDA_VISIBLE_DEVICES="$CLIP_GPU_ID"
+export PYTHONPATH="$(pwd)"
+
+if [ -f ah.pid ] && kill -0 $(cat ah.pid) 2>/dev/null; then
+    echo "Service already running (PID $(cat ah.pid))"
+    exit 1
+fi
+
+echo "Starting CLIP Embedding Service on GPU $CLIP_GPU_ID, port 9086..."
+nohup /data/ymq/wan22-service/py3/bin/python ah.py > nohup.out 2>&1 &
+echo $! > ah.pid
+echo "Started (PID $(cat ah.pid))"
+sleep 2
+if kill -0 $(cat ah.pid) 2>/dev/null; then
+    echo "Service is running"
+else
+    echo "Service failed to start. Check nohup.out"
+    tail -20 nohup.out
+    exit 1
+fi
diff --git a/stop.sh b/stop.sh
new file mode 100755
index 0000000..4e900b3
--- /dev/null
+++ b/stop.sh
@@ -0,0 +1,14 @@
+#!/bin/bash
+cd "$(dirname "$0")"
+if [ -f ah.pid ]; then
+    PID=$(cat ah.pid)
+    if kill -0 $PID 2>/dev/null; then
+        kill $PID
+        echo "Stopped (PID $PID)"
+    else
+        echo "Process $PID not running"
+    fi
+    rm -f ah.pid
+else
+    echo "No ah.pid found"
+fi
diff --git a/workers/__pycache__/__init__.cpython-310.pyc b/workers/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..10b925b8d417e6d335fc7c6ea149d00c87fa9e16
GIT binary patch
literal 137
zcmd1j<>g`kg6lbYS&TsXF^Gc<K!OfHT+9L_QW%06G#UL?G8BP?5yUSY{glL#ME%O#
zLjB~N%!2sT+@#c$l+3(z{qp>x?9`%S{rLFIyv&mLc)fzkTO2mI`6;D2sdgY!ikW}}
G3j+XNUmcVH

literal 0
HcmV?d00001

diff --git a/workers/__pycache__/clip_model.cpython-310.pyc b/workers/__pycache__/clip_model.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9fadc7a061dd74b3408dac1b3aa8b18a4a199b30
GIT binary patch
literal 2619
zcmZ`)&2JM&6yKTsvNui$X;eN+Tel#!u82(_1c6dSfvTiHNSmmrE@-v!Op*=j-DPGR
zh-II`p%+T49;-^W#D#xpu07?<t%uV0#`!Qr>}cM+eQ!SAypP{xqoaWZ--EQe@oB-b
z{=uK&kHXJ2=<+ELX4Nf57)?l>n32}08ST0Qqn)@-ukI0?<0O8wP%jvsCdGQu%nRxx
z%&nIgjYgr5K|jJg=0CIQ<E+4nFdk(A8-a0xmDnhZA27OPRmR$<Z`_$*EdLZQPu;3b
z&y<s>y)~6&5sTBc@)Hr|ITzI(Fwt755be%4qcskUe|<}FIe)J`TVs)mYFo|6wPwb6
zB5O$$XK4+0oie*lP0vgj+4<{J@&b^ROP6mqaL2t5UEx5#0bME}3LMfR!eiEwrAUt|
zvSIg}z9o(@l7gH1ByyPzuJD?E*J>8JgjgNYE2;oABOPl4I9O9+?sFQ!ZoN_Ef(E&<
zzC|nr&Qb1P7W4nMw_z=wQz<O8y+(^gSI%j>k+%c^Ltar`#GBm3tLaf>7e&_KQf30Y
zFrqTiSPz-n%6a?PFQ|T4Hgn79Le9!rT5caXTQ)PkUIn3=o~?{)FEq~5uGv6)jBmyb
zt{psImtq-4n^BxZD+$+rxT`EF5v6i96HP9p@W49Z1JOrTMb->+!9gFVoax~1y_?_O
z2^YUvzNG_D?AojS-Sah7nRfVQlxQ~>ajJxiSGB}382+su1hkRFdB~e9oMAT9o@C+?
z99=VFV}CW@(r$=Xoq(khNr5`#I2|Ve!CoMfM4SL|1HHNT*}y<a4C4JOFv{A-sF02+
z0!A^m&)qIzF7uvYWNF{JX>F4(Q6>*woPgQxhR2FAXkU`wNSAh?ZTQTG)rIHuBdbgA
zLtGtb+cvS%!n7qXK7m+c`x$zN_6mKgSA-rwAL(Nb1PhwxYI)}_7*%n#qxe&$$H28M
zk%o$=5O%5kJTeZ^ffB8>5h=)b9W<k-VZzfjwSMTX7^@}duTKF1i>-&j1KLHK30ML5
z?vp?1V<<-xfmc*a!aOks<dv;y^_(V;G}$!D0XYsT9n7(~p&D5;zj_+}Ut<-Wvf7`T
zfPt}3m8{MsStCm1)#}0Aoj347UfV2gYX1@EIWT71O38#uU>LO<E8dg>F--eu7Oshi
zi6Z_TUE|7ZAFlEU9K)q{QpieyC9GYOWZF$zO-K$;Wl1bm#fD5rgR~c?c?(8AYbj*j
zz0}B%^<~&kjsx+?1T@U3NpkAHNn2b3;agtDqxUoFCH!=(9^_J=h?BqpVlcZ;2R>J{
zXDdg!J+A{evJG>3K6CnHX|M)xLY^_JJy`v{Dr^)t0?11rv+BiU*B-3z*qAW~tM6NM
zDBG4S$hI59*=CRoPVQI-=T(XeZ5Pq=iY8}wut2o^<9FBF{<>26m0GP`Kt=+$^9d*s
zbyeKt;JHdk2U0~s$tST|*RITySP4u3?PNJm1y-2$RwBt~XLNC8c82i=K$!L$S-Q!E
z(q2m>NxY(c!5_D{RN4mtF4d0U5vw@bmZG6asBKt{LYW_mm8twN#wXz`IMBojqjVDD
zG(kQjXWoq4;x=gBAFJO%JE(qq#-P^s>YQ}o*#i`yUAl_`bRPxiJ17u9O;Ljab_be4
z0eJKb3fPBG;PP->AdZJoU}WJfgZG{%EMbYIxrQa|0T==JY?D6@O1DDHbt_byfoYl?
zO7CC}%fZwv=CH)AC#`mQkoCrs!$rG4S)7E46$<5p&X(e1_<hSp4EP?*h|l0>5UkIU
zLv(m2cH#?=T}JtrK;C=)KEQ|yOj{NhWwX;TREQ~*B2b7b5(5$w2olEFeUuRAP>Yo+
zrjeXSasdb=n=iALfM-=KpztD+StR%(4Tyof(y*U!mU(hKGDb>o1H=!Yt4zY%8M3Sq
zhMHyrC^Ls@n&#SGoWC=u7#e4$Uc@z6q{jw@QQfTzaRY?nC=$Gv!A6731E|Cxnve@T
pamZX_xUsn~2Tx@y;a9Q1Wf_PA&my)GG#niYjeyiHj1_{?$p0U~b_@Uj

literal 0
HcmV?d00001

diff --git a/workers/clip_model.py b/workers/clip_model.py
index 21511b2..507c3da 100644
--- a/workers/clip_model.py
+++ b/workers/clip_model.py
@@ -1,2 +1,61 @@
 # -*- coding:utf-8 -*-
-CLIP ViT-H/14 lazy-loading wrapper.
+"""CLIP ViT-H/14 lazy-loading wrapper."""
+import os
+import torch
+import numpy as np
+from PIL import Image
+from io import BytesIO
+import base64
+import urllib.request
+
+MODEL_PATH = '/data/ymq/models/laion/CLIP-ViT-H-14-laion2B-s32B-b79K'
+
+_model = None
+_processor = None
+_device = None
+
+
+def _load():
+    global _model, _processor, _device
+    if _model is not None:
+        return
+    # CUDA_VISIBLE_DEVICES is set in start.sh, so GPU 0 in visible devices is our target
+    _device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
+    from transformers import CLIPModel, CLIPProcessor
+    _processor = CLIPProcessor.from_pretrained(MODEL_PATH)
+    _model = CLIPModel.from_pretrained(MODEL_PATH, torch_dtype=torch.float16)
+    _model = _model.to(_device)
+    _model.eval()
+    print(f'[CLIP] Model loaded on {_device}, dtype=float16')
+
+
+def embed_texts(texts):
+    _load()
+    inputs = _processor(text=texts, return_tensors='pt', padding=True, truncation=True, max_length=77)
+    inputs = {k: v.to(_device) for k, v in inputs.items()}
+    with torch.no_grad():
+        outputs = _model.get_text_features(**inputs)
+        outputs = outputs / outputs.norm(dim=-1, keepdim=True)
+    return outputs.cpu().float().numpy().tolist()
+
+
+def _load_image(src):
+    if src.startswith('data:'):
+        _, b64 = src.split(',', 1)
+        return Image.open(BytesIO(base64.b64decode(b64))).convert('RGB')
+    elif src.startswith('http://') or src.startswith('https://'):
+        with urllib.request.urlopen(src, timeout=30) as resp:
+            return Image.open(BytesIO(resp.read())).convert('RGB')
+    else:
+        return Image.open(src).convert('RGB')
+
+
+def embed_images(sources):
+    _load()
+    images = [_load_image(s) for s in sources]
+    inputs = _processor(images=images, return_tensors='pt')
+    inputs = {k: v.to(_device) for k, v in inputs.items()}
+    with torch.no_grad():
+        outputs = _model.get_image_features(**inputs)
+        outputs = outputs / outputs.norm(dim=-1, keepdim=True)
+    return outputs.cpu().float().numpy().tolist()