fix: use cuda:0 (CUDA_VISIBLE_DEVICES handles GPU mapping), add shell scripts and README

2026-06-14 17:04:22 +08:00 · 2026-06-14 17:04:22 +08:00 · ceb905d3eb
commit ceb905d3eb
parent e0009da8e7
11 changed files with 236 additions and 20 deletions
--- a/README.md
+++ b/README.md
@ -0,0 +1,52 @@
+# CLIP Embedding Service
+
+CLIP-ViT-H/14 多模态 Embedding 服务，支持文本和图片向量化。
+
+## Overview
+
+- **Model**: laion/CLIP-ViT-H-14-laion2B-s32B-b79K
+- **Dimension**: 1024
+- **Precision**: float16
+- **Port**: 9086
+- **GPU**: 2 (default)
+
+## API
+
+### GET /api/status
+Service health and GPU info.
+
+### POST /api/text
+Text embedding.
+```json
+{"texts": ["hello world", "a cat"]}
+```
+
+### POST /api/image
+Image embedding (file path, URL, or base64 data URI).
+```json
+{"images": ["/path/to/img.jpg", "https://example.com/img.png"]}
+```
+
+### POST /api/embed
+Combined text + image embedding.
+```json
+{"texts": ["a cat"], "images": ["/path/to/cat.jpg"]}
+```
+
+## Model Download (Offline Deploy)
+
+```bash
+pip install huggingface_hub
+huggingface-cli download laion/CLIP-ViT-H-14-laion2B-s32B-b79K \
+  --local-dir /data/ymq/models/laion/CLIP-ViT-H-14-laion2B-s32B-b79K \
+  --local-dir-use-symlinks False
+```
+Size: ~15GB
+
+## Deploy
+
+```bash
+bash build.sh deploy    # start
+bash build.sh stop      # stop
+bash build.sh status    # check
+```
--- a/ah.pid
+++ b/ah.pid
@ -0,0 +1 @@
+829715
--- a/ah.py
+++ b/ah.py
@ -2,6 +2,8 @@
 import os
 from ahserver.webapp import webapp

-if __name__ == '__main__':
-    webapp()
+def init():
+    pass

+if __name__ == '__main__':
+    webapp(init)
--- a/build.sh
+++ b/build.sh
@ -0,0 +1,58 @@
+#!/bin/bash
+set -e
+cd "$(dirname "$0")"
+
+SERVICE_NAME="clip_embedding"
+PORT=9086
+DEFAULT_GPU=2
+action="${1:-status}"
+
+case "$action" in
+    deploy|update)
+        echo "=== $SERVICE_NAME Deploy ==="
+        if [ -f ah.pid ] && kill -0 $(cat ah.pid) 2>/dev/null; then
+            bash stop.sh
+            sleep 2
+        fi
+        if [ -d .git ]; then
+            echo "Pulling latest code..."
+            git pull origin master 2>/dev/null || git pull origin main 2>/dev/null || true
+        fi
+        export CLIP_GPU_ID="${CLIP_GPU_ID:-$DEFAULT_GPU}"
+        bash start.sh
+        sleep 3
+        if curl -s http://localhost:$PORT/api/status > /dev/null 2>&1; then
+            echo "Service is healthy on port $PORT"
+            curl -s http://localhost:$PORT/api/status | python3 -m json.tool 2>/dev/null || true
+        else
+            echo "WARNING: Service may not be ready yet. Check nohup.out"
+        fi
+        ;;
+    stop)
+        bash stop.sh
+        ;;
+    start)
+        export CLIP_GPU_ID="${CLIP_GPU_ID:-$DEFAULT_GPU}"
+        bash start.sh
+        ;;
+    status)
+        echo "=== $SERVICE_NAME Status ==="
+        if [ -f ah.pid ] && kill -0 $(cat ah.pid) 2>/dev/null; then
+            echo "Process: running (PID $(cat ah.pid))"
+        else
+            echo "Process: not running"
+        fi
+        echo "Port: $PORT"
+        echo "GPU: ${CLIP_GPU_ID:-$DEFAULT_GPU}"
+        if curl -s --max-time 3 http://localhost:$PORT/api/status > /dev/null 2>&1; then
+            echo "HTTP: OK"
+            curl -s http://localhost:$PORT/api/status | python3 -m json.tool 2>/dev/null || true
+        else
+            echo "HTTP: not responding"
+        fi
+        ;;
+    *)
+        echo "Usage: $0 {deploy|update|stop|start|status}"
+        exit 1
+        ;;
+esac
--- a/conf/config.json
+++ b/conf/config.json
@ -1,22 +1,22 @@
 {
-    password_key: ClipEmbedding2026Key,
-    databases: {},
-    session_redis: {
-        host: 127.0.0.1,
-        port: 6379,
-        db: 1
+    "password_key": "ClipEmbedding2026Key",
+    "databases": {},
+    "session_redis": {
+        "host": "127.0.0.1",
+        "port": 6379,
+        "db": 1
    },
-    website: {
-        paths: [
-            [0$/app, ]
+    "website": {
+        "paths": [
+            ["$[workdir]$/app", ""]
        ],
-        host: 0.0.0.0,
-        port: 9086,
-        coding: utf-8,
-        indexes: [index.html, index.dspy],
-        processors: [
-            [.dspy, dspy]
+        "host": "0.0.0.0",
+        "port": 9086,
+        "coding": "utf-8",
+        "indexes": ["index.html", "index.dspy"],
+        "processors": [
+            [".dspy", "dspy"]
        ]
    },
-    hot_reload: false
+    "hot_reload": false
 }
--- a/nohup.out
+++ b/nohup.out
@ -0,0 +1,7 @@
+2026-06-14 17:03:26.974[webapp][debug][/data/ymq/wan22-service/py3/lib/python3.10/site-packages/ahserver/configuredServer.py:40]client_max_size=1024000000
+reuse_port= True
+======== Running on http://0.0.0.0:9086 ========
+(Press CTRL+C to quit)
+Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
+[CLIP] Model loaded on cuda:0, dtype=float16
+2026-06-14 17:03:40.976[webapp][debug][/data/ymq/wan22-service/py3/lib/python3.10/site-packages/ahserver/auth_api.py:178]timecost=client(127.0.0.1) None access /api/text cost 4.946, (0.000)
--- a/start.sh
+++ b/start.sh
@ -0,0 +1,23 @@
+#!/bin/bash
+cd "$(dirname "$0")"
+export CLIP_GPU_ID="${CLIP_GPU_ID:-2}"
+export CUDA_VISIBLE_DEVICES="$CLIP_GPU_ID"
+export PYTHONPATH="$(pwd)"
+
+if [ -f ah.pid ] && kill -0 $(cat ah.pid) 2>/dev/null; then
+    echo "Service already running (PID $(cat ah.pid))"
+    exit 1
+fi
+
+echo "Starting CLIP Embedding Service on GPU $CLIP_GPU_ID, port 9086..."
+nohup /data/ymq/wan22-service/py3/bin/python ah.py > nohup.out 2>&1 &
+echo $! > ah.pid
+echo "Started (PID $(cat ah.pid))"
+sleep 2
+if kill -0 $(cat ah.pid) 2>/dev/null; then
+    echo "Service is running"
+else
+    echo "Service failed to start. Check nohup.out"
+    tail -20 nohup.out
+    exit 1
+fi
--- a/stop.sh
+++ b/stop.sh
@ -0,0 +1,14 @@
+#!/bin/bash
+cd "$(dirname "$0")"
+if [ -f ah.pid ]; then
+    PID=$(cat ah.pid)
+    if kill -0 $PID 2>/dev/null; then
+        kill $PID
+        echo "Stopped (PID $PID)"
+    else
+        echo "Process $PID not running"
+    fi
+    rm -f ah.pid
+else
+    echo "No ah.pid found"
+fi
--- a/workers/pycache/init.cpython-310.pyc
+++ b/workers/pycache/init.cpython-310.pyc
--- a/workers/pycache/clip_model.cpython-310.pyc
+++ b/workers/pycache/clip_model.cpython-310.pyc
--- a/workers/clip_model.py
+++ b/workers/clip_model.py
@ -1,2 +1,61 @@
 # -*- coding:utf-8 -*-
-CLIP ViT-H/14 lazy-loading wrapper.
+"""CLIP ViT-H/14 lazy-loading wrapper."""
+import os
+import torch
+import numpy as np
+from PIL import Image
+from io import BytesIO
+import base64
+import urllib.request
+
+MODEL_PATH = '/data/ymq/models/laion/CLIP-ViT-H-14-laion2B-s32B-b79K'
+
+_model = None
+_processor = None
+_device = None
+
+
+def _load():
+    global _model, _processor, _device
+    if _model is not None:
+        return
+    # CUDA_VISIBLE_DEVICES is set in start.sh, so GPU 0 in visible devices is our target
+    _device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
+    from transformers import CLIPModel, CLIPProcessor
+    _processor = CLIPProcessor.from_pretrained(MODEL_PATH)
+    _model = CLIPModel.from_pretrained(MODEL_PATH, torch_dtype=torch.float16)
+    _model = _model.to(_device)
+    _model.eval()
+    print(f'[CLIP] Model loaded on {_device}, dtype=float16')
+
+
+def embed_texts(texts):
+    _load()
+    inputs = _processor(text=texts, return_tensors='pt', padding=True, truncation=True, max_length=77)
+    inputs = {k: v.to(_device) for k, v in inputs.items()}
+    with torch.no_grad():
+        outputs = _model.get_text_features(**inputs)
+        outputs = outputs / outputs.norm(dim=-1, keepdim=True)
+    return outputs.cpu().float().numpy().tolist()
+
+
+def _load_image(src):
+    if src.startswith('data:'):
+        _, b64 = src.split(',', 1)
+        return Image.open(BytesIO(base64.b64decode(b64))).convert('RGB')
+    elif src.startswith('http://') or src.startswith('https://'):
+        with urllib.request.urlopen(src, timeout=30) as resp:
+            return Image.open(BytesIO(resp.read())).convert('RGB')
+    else:
+        return Image.open(src).convert('RGB')
+
+
+def embed_images(sources):
+    _load()
+    images = [_load_image(s) for s in sources]
+    inputs = _processor(images=images, return_tensors='pt')
+    inputs = {k: v.to(_device) for k, v in inputs.items()}
+    with torch.no_grad():
+        outputs = _model.get_image_features(**inputs)
+        outputs = outputs / outputs.norm(dim=-1, keepdim=True)
+    return outputs.cpu().float().numpy().tolist()