From 86589ec2e8ef7a1d8e1ec8c0c4b318ae8ea79bf9 Mon Sep 17 00:00:00 2001 From: ymq1 Date: Wed, 8 Oct 2025 22:42:37 +0800 Subject: [PATCH] bugfic --- llmengine/mm_embedding.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/llmengine/mm_embedding.py b/llmengine/mm_embedding.py index f788be6..455e9b2 100644 --- a/llmengine/mm_embedding.py +++ b/llmengine/mm_embedding.py @@ -9,8 +9,20 @@ Features: - Video frame sampling + average pooling - Audio resampling + CLAP embedding - L2 normalized output for similarity search + +model_name='/data/ymq/models/laion/CLIP-ViT-B-32-laion2B-s34B-b79K' + +impput: + +text: +{ + "type":"text, + "text":"...." +} +image: """ + import os from pathlib import Path import numpy as np @@ -35,6 +47,13 @@ except Exception: DEVICE = "cuda" if torch.cuda.is_available() else "mps" if getattr(torch.backends, "mps", None) and torch.backends.mps.is_available() else "cpu" USE_FP16 = DEVICE == "cuda" +def choose_device(): + if torch.cuda.is_available(): + return "cuda" + if getattr(torch.backends, "mps", None) and torch.backends.mps.is_available(): + return "mps" + return "cpu" + # Unified model for all modalities CLIP_MODEL_NAME = "openai/clip-vit-large-patch14" FRAME_SAMPLE_RATE = 1.0 # fps for video