feat: CLIP-ViT-H/14 embedding service (text+image, 1024-dim, GPU float16)

2026-06-14 17:00:14 +08:00 · 2026-06-14 17:00:14 +08:00 · e0009da8e7
commit e0009da8e7
8 changed files with 202 additions and 0 deletions
--- a/ah.py
+++ b/ah.py
@ -0,0 +1,7 @@
+# -*- coding:utf-8 -*-
+import os
+from ahserver.webapp import webapp
+
+if __name__ == '__main__':
+    webapp()
+
--- a/app/api/embed/index.dspy
+++ b/app/api/embed/index.dspy
@ -0,0 +1,56 @@
+# -*- coding:utf-8 -*-
+import json
+import sys
+import os
+sys.path.insert(0, os.getcwd())
+
+method = request.method
+
+if method == 'GET':
+    return json.dumps({
+        'usage': 'POST with JSON body',
+        'params': {
+            'texts': 'list[str] (optional)',
+            'images': 'list[str] (optional, file paths/URLs/base64)'
+        },
+        'response': {
+            'text_embeddings': 'list of vectors (if texts provided)',
+            'image_embeddings': 'list of vectors (if images provided)'
+        },
+        'example': 'POST {texts: [a cat], images: [/path/to/cat.jpg]}'
+    }, ensure_ascii=False)
+
+texts = params_kw.get('texts', [])
+images = params_kw.get('images', [])
+
+if isinstance(texts, str):
+    texts = [texts]
+if isinstance(images, str):
+    images = [images]
+
+if not texts and not images:
+    return json.dumps({'error': 'at least one of texts or images is required'}, ensure_ascii=False)
+
+try:
+    import time
+    t0 = time.time()
+    result = {'status': 'ok'}
+
+    if texts:
+        from workers.clip_model import embed_texts
+        result['text_embeddings'] = embed_texts(texts)
+        result['text_count'] = len(texts)
+
+    if images:
+        from workers.clip_model import embed_images
+        result['image_embeddings'] = embed_images(images)
+        result['image_count'] = len(images)
+
+    result['dimension'] = 1024
+    result['elapsed'] = round(time.time() - t0, 3)
+
+    return json.dumps(result, ensure_ascii=False)
+except Exception as e:
+    import traceback
+    return json.dumps({'error': str(e), 'traceback': traceback.format_exc()}, ensure_ascii=False)
+
--- a/app/api/image/index.dspy
+++ b/app/api/image/index.dspy
@ -0,0 +1,47 @@
+# -*- coding:utf-8 -*-
+import json
+import sys
+import os
+sys.path.insert(0, os.getcwd())
+
+method = request.method
+
+if method == 'GET':
+    return json.dumps({
+        'usage': 'POST with JSON body',
+        'params': {
+            'images': 'list[str] (required, up to 32 images)',
+            '  - file path': '/data/ymq/images/photo.jpg',
+            '  - URL': 'https://example.com/image.jpg',
+            '  - base64': 'data:image/jpeg;base64,/9j/4AAQ...'
+        },
+        'example': 'POST {images: [/path/to/img.jpg, https://example.com/img.png]}'
+    }, ensure_ascii=False)
+
+images = params_kw.get('images', [])
+if isinstance(images, str):
+    images = [images]
+
+if not images:
+    return json.dumps({'error': 'images is required (list of file paths, URLs, or base64 data URIs)'}, ensure_ascii=False)
+
+if len(images) > 32:
+    return json.dumps({'error': 'max 32 images per request'}, ensure_ascii=False)
+
+try:
+    import time
+    from workers.clip_model import embed_images
+    t0 = time.time()
+    embeddings = embed_images(images)
+    elapsed = round(time.time() - t0, 3)
+    return json.dumps({
+        'status': 'ok',
+        'count': len(embeddings),
+        'dimension': len(embeddings[0]),
+        'embeddings': embeddings,
+        'elapsed': elapsed
+    }, ensure_ascii=False)
+except Exception as e:
+    import traceback
+    return json.dumps({'error': str(e), 'traceback': traceback.format_exc()}, ensure_ascii=False)
+
--- a/app/api/status/index.dspy
+++ b/app/api/status/index.dspy
@ -0,0 +1,23 @@
+# -*- coding:utf-8 -*-
+import json, subprocess
+
+result = {
+    'service': 'clip-embedding',
+    'model': 'laion/CLIP-ViT-H-14-laion2B-s32B-b79K',
+    'projection_dim': 1024,
+    'gpu_id': int(__import__('os').environ.get('CLIP_GPU_ID', '2')),
+    'gpus': []
+}
+try:
+    out = subprocess.check_output(
+        ['nvidia-smi', '--query-gpu=index,utilization.gpu,memory.used,memory.total',
+         '--format=csv,noheader,nounits'], timeout=5
+    ).decode().strip()
+    for line in out.split(chr(10)):
+        p = [x.strip() for x in line.split(',')]
+        result['gpus'].append({'id': int(p[0]), 'util': int(p[1]), 'mem_used': int(p[2]), 'mem_total': int(p[3])})
+except Exception:
+    pass
+
+return json.dumps(result, ensure_ascii=False)
+
--- a/app/api/text/index.dspy
+++ b/app/api/text/index.dspy
@ -0,0 +1,44 @@
+# -*- coding:utf-8 -*-
+import json
+import sys
+import os
+sys.path.insert(0, os.getcwd())
+
+method = request.method
+
+if method == 'GET':
+    return json.dumps({
+        'usage': 'POST with JSON body',
+        'params': {
+            'texts': 'list[str] (required, up to 128 texts)',
+        },
+        'example': 'POST {texts: [a cat, a dog]}'
+    }, ensure_ascii=False)
+
+texts = params_kw.get('texts', [])
+if isinstance(texts, str):
+    texts = [texts]
+
+if not texts:
+    return json.dumps({'error': 'texts is required (list of strings)'}, ensure_ascii=False)
+
+if len(texts) > 128:
+    return json.dumps({'error': 'max 128 texts per request'}, ensure_ascii=False)
+
+try:
+    import time
+    from workers.clip_model import embed_texts
+    t0 = time.time()
+    embeddings = embed_texts(texts)
+    elapsed = round(time.time() - t0, 3)
+    return json.dumps({
+        'status': 'ok',
+        'count': len(embeddings),
+        'dimension': len(embeddings[0]),
+        'embeddings': embeddings,
+        'elapsed': elapsed
+    }, ensure_ascii=False)
+except Exception as e:
+    import traceback
+    return json.dumps({'error': str(e), 'traceback': traceback.format_exc()}, ensure_ascii=False)
+
--- a/conf/config.json
+++ b/conf/config.json
@ -0,0 +1,22 @@
+{
+    password_key: ClipEmbedding2026Key,
+    databases: {},
+    session_redis: {
+        host: 127.0.0.1,
+        port: 6379,
+        db: 1
+    },
+    website: {
+        paths: [
+            [0$/app, ]
+        ],
+        host: 0.0.0.0,
+        port: 9086,
+        coding: utf-8,
+        indexes: [index.html, index.dspy],
+        processors: [
+            [.dspy, dspy]
+        ]
+    },
+    hot_reload: false
+}
--- a/workers/init.py
+++ b/workers/init.py
@ -0,0 +1 @@
+
--- a/workers/clip_model.py
+++ b/workers/clip_model.py
@ -0,0 +1,2 @@
+# -*- coding:utf-8 -*-
+CLIP ViT-H/14 lazy-loading wrapper.