2026-04-20 17:10:48 +08:00
2026-04-20 17:10:48 +08:00

在GPU上跑的服务

clip.service

多媒体embedding服务提供文本图像视频和音频的embedding服务

[Unit]
Wants=systemd-networkd.service

[Service]
WorkingDirectory=/share/run/clip
ExecStart=/share/run/clip/start.sh
ExecStop=/share/run/clip/stop.sh
StandardOutput=append:/var/log/clip/clip.log
StandardError=append:/var/log/clip/clip.log
SyslogIdentifier=clip

[Install]
WantedBy=multi-user.target

http服务

https://embedding.opencomputing.net:1044/mme

模型

  • 使用的模型名laion/CLIP-ViT-B-32-laion2B-s34B-b79K
  • 模型路径

运行脚本

#!/bin/bash

CUDA_VISIBLE_DEVICES=4 /share/vllm-0.8.5/bin/python -m llmengine
.mm_embedding -p 8882

entities.service

实体抽取服务

[Unit]
Wants=systemd-networkd.service

[Service]
WorkingDirectory=/share/run/entities
ExecStart=/share/run/entities/start.sh
ExecStop=/share/run/entities/stop.sh
StandardOutput=append:/var/log/entities/entities.log
StandardError=append:/var/log/entities/entities.log
SyslogIdentifier=entities

[Install]
WantedBy=multi-user.target

http服务

https://entities.opencomputing.net:10443

模型

路径:/share/models/LTP/small

运行脚本

#!/bin/bash

CUDA_VISIBLE_DEVICES=7 /share/vllm-0.8.5/bin/python -m llmengine.entity -p 9990 /share/models/LTP/small

f5tts.service

[Unit]
Wants=systemd-networkd.service

[Service]
User=ymq
Group=ymq
WorkingDirectory=/share/ymq/run/f5tts
Type=forking
ExecStart=/share/ymq/run/f5tts/start.sh
ExecStop=/share/ymq/run/f5tts/stop.sh
StandardOutput=append:/var/log/f5tts/f5tts.log
StandardError=append:/var/log/f5tts/f5tts.log
SyslogIdentifier=f5tts

[Install]
WantedBy=multi-user.target

http服务

https://tts.opencomputing.net:10443

模型

  • 模型路径 /share/models/SWivid/F5-TTS/F5TTS_v1_Base/model_1250000.safetensors

软件仓库

https://git.opencomputing.cn/yumoqing/f5tts

执行脚本

#!/usr/bin/bash

echo start 3 instances for f5tts engine
rundir=/share/ymq/run/f5tts
CUDA_VISIBLE_DEVICES=6 ${rundir}/f5tts.env/bin/python ${rundir}/app/f5tts.py -w ${rundir} -p 9995 &
CUDA_VISIBLE_DEVICES=6 ${rundir}/f5tts.env/bin/python ${rundir}/app/f5tts.py -w ${rundir} -p 9995 &
CUDA_VISIBLE_DEVICES=6 ${rundir}/f5tts.env/bin/python ${rundir}/app/f5tts.py -w ${rundir} -p 9995 &
CUDA_VISIBLE_DEVICES=6 ${rundir}/f5tts.env/bin/python ${rundir}/app/f5tts.py -w ${rundir} -p 9995 &

fastwhisper.service

ASR模型

[Unit]
Wants=systemd-networkd.service

[Service]
User=ymq
Group=ymq
Type=forking
WorkingDirectory=/d/ymq/run/fastwhisper
ExecStart=/d/ymq/run/fastwhisper/start.sh
ExecStop=/d/ymq/run/fastwhisper/stop.sh
StandardOutput=append:/d/ymq/run/fastwhisper/logs/fastwhisper.log
StandardError=append:/d/ymq/run/fastwhisper/logs/fastwhisper.log
SyslogIdentifier=fastwhisper

[Install]
WantedBy=multi-user.target

http服务

https://asr.opencomputing.net:10443/fw

模型

  • 路径 /data/ymq/models/deepdml/faster-whisper-large-v3-turbo-ct2

软件仓库

https://git.opencomputing.cn/yumoqing/fastwhisper

执行脚本

cd /d/ymq/run/fastwhisper
/d/ymq/run/fastwhisper/py3/bin/python /d/ymq/run/fastwhisper/app/fastwhisper.py -w /d/ymq/run/fastwhisper &
exit 0

fvlm.service

fast vlm 模型服务

[Unit]
Wants=systemd-networkd.service

[Service]
User=ymq
Group=ymq
Type=forking
WorkingDirectory=/share/ymq/run/fvlm
# ExecStart=/share/ymq/run/fvlm/fvlm.env/bin/python app/fastvlm.py -p 9994
ExecStart=/share/ymq/run/fvlm/start.sh
ExecStop=/share/ymq/run/fvlm/stop.sh
StandardOutput=append:/var/log/fvlm/fvlm.log
StandardError=append:/var/log/fvlm/fvlm.log
SyslogIdentifier=fvlm

[Install]
WantedBy=multi-user.target

http服务

https://fastvlm.opencomputing.net:10443

模型

  • 路径 /share/models/apple/llava-fastvithd_0.5b_stage3

软件仓库

https://git.kaiyuancloud.cn/yumoqing/fvlm

脚本

#!/usr/bin/bash

CUDA_VISIBLE_DEVICES=6 /share/ymq/run/fvlm/fvlm.env/bin/python app/fastvlm.py -p 9994 &

m2m.service

多语言翻译

[Unit]
Wants=systemd-networkd.service

[Service]
WorkingDirectory=/share/run/m2m
ExecStart=/share/run/m2m/start.sh
ExecStop=/share/run/m2m/stop.sh
StandardOutput=append:/var/log/m2m/m2m.log
StandardError=append:/var/log/m2m/m2m.log
SyslogIdentifier=m2m

[Install]
WantedBy=multi-user.target

http服务

https://t2t.opencomputing.net:10443/m2m

模型

  • 路径 /share/models/facebook/m2m100_1.2B

运行脚本

#!/bin/bash

/share/vllm-0.8.5/bin/python -m llmengine.m2m -p 8883 /share/models/facebook/m2m100_1.2B

neo4j.service

知识图谱数据库

[Unit]
Wants=systemd-networkd.service

[Service]
Type=forking
WorkingDirectory=/share/run/neo4j
ExecStart=/share/run/neo4j/start.sh
ExecStop=/share/run/neo4j/stop.sh
StandardOutput=append:/var/log/neo4j/neo4j.log
StandardError=append:/var/log/neo4j/neo4j.log
SyslogIdentifier=/share/run/neo4j
TimeoutStartSec=300

[Install]
WantedBy=multi-user.target

http服务

https://graphdb.opencomputing.net:10443

模型路径

  • 路径

软件仓库

https://git.opencomputing.cn/yumoqing/llmengine

运行脚本

#!/bin/bash
CUDA_VISIBLE_DEVICES=7 /share/vllm-0.8.5/bin/python -m llmengine.neo4j -p 8885 Neo4j &

nvidia-asr.service

英伟达的ASR模型

[Unit]
Wants=systemd-networkd.service

[Service]
User=ymq
Group=ymq
Type=forking
WorkingDirectory=/share/ymq/run/nvidia-asr
ExecStart=/share/ymq/run/nvidia-asr/start.sh
ExecStop=/share/ymq/run/nvidia-asr/stop.sh
StandardOutput=append:/var/log/asr/asr.log
StandardError=append:/var/log/asr/asr.log
SyslogIdentifier=asr
[Install]
WantedBy=multi-user.target

http服务

https://asr.opencomputing.net:10443

模型

  • 路径 /share/models/nvidia/parakeet-tdt-0.6b-v2/parakeet-tdt-0.6b-v2.nemo

软件仓库

https://git.opencomputing.cn/yumoqing/nvidia-asr.git

执行脚本

#!/usr/bin/bash

rundir=/share/ymq/run/nvidia-asr
cd $rundir
CUDA_VISIBLE_DEVICES=6 $rundir/nvidia-asr.env/bin/python app/asr.py -p 9992 &
# CUDA_VISIBLE_DEVICES=6 $rundir/nvidia-asr.env/bin/python app/asr.py -p 9992 &

ollama.service

[Unit]
Description=Ollama Service
After=network-online.target

[Service]
ExecStart=/d/ollama/start.sh
# CUDA_VISIBLE_DEVICES=2,3 /usr/local/bin/ollama serve
User=ollama
Group=ollama
Restart=always
RestartSec=3
Environment="PATH=/d/ymq/.local/bin:/d/ymq/bin:/d/ymq/bin:/usr/l
ocal/cuda-12/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/b
in:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin"

[Install]
WantedBy=default.target

http服务

https://ollama.opencomputing.net:10443

qwen3.service

qwen3-0.6b模型

[Unit]
Wants=systemd-networkd.service

[Service]
WorkingDirectory=/share/run/qwen3
Type=forking
ExecStart=/share/run/qwen3/start.sh
ExecStop=/share/run/qwen3/stop.sh
StandardOutput=append:/var/log/qwen3/qwen3.log
StandardError=append:/var/log/qwen3/qwen3.log
SyslogIdentifier=qwen3

[Install]

http服务

https://t2t.opencomputing.net:10443/qwen3-6b

模型

  • 路径 /d/models/Qwen/Qwen3-0.6B

执行脚本

#!/usr/bin/bash

rundir=/share/run/qwen3
CUDA_VISIBLE_DEVICES=7 /share/vllm-0.8.5/bin/python -m llmengine.server -w ${rundir} -p 9089 /d/models/Qwen/Qwen3-0.6B &
exit 0

qwen3coder.service

[Unit]
Wants=systemd-networkd.service

[Service]
WorkingDirectory=/share/run/qwen3coder
Type=forking
ExecStart=/share/run/qwen3coder/start.sh
ExecStop=/share/run/qwen3coder/stop.sh
StandardOutput=append:/var/log/qwen3coder/qwen3coder.log
StandardError=append:/var/log/qwen3coder/qwen3coder.log
SyslogIdentifier=qwen3coder

[Install]
WantedBy=multi-user.target

http服务

https://t2t.opencomputing.net:10443

模型

  • 路径 /d/models/Qwen/Qwen3-Coder-30B-A3B-Instruct

执行脚本

#!/usr/bin/bash

rundir=/share/run/qwen3coder
CUDA_VISIBLE_DEVICES=0 /share/vllm-0.8.5/bin/python -m llmengine.server -w ${rundir} -p 9088 /d/models/Qwen/Qwen3-Coder-30B-A3B-Instruct &
exit 0

reranker.service

[Unit]
Description=A Rerank Service using Qwen3-Reranker-0.6B
# After=network.target DeepSeek70B-kyyds671b-ray.service
# Requires=DeepSeek70B-kyyds671b-ray.service
StartLimitIntervalSec=60
StartLimitBurst=5

[Service]
# 核心启动参数(保持原有配置)
WorkingDirectory=/share/run/reranker
#定义环境变量,所有节点的启动脚本与服务需一致
#Environment="NCCL_SOCKET_IFNAME=enp196s0f0np0"
#ExecStartPre=/data/kyyds671b/ray_check.sh
ExecStart=/share/run/reranker/start.sh
ExecStop=/share/run/reranker/stop.sh

# 超时与停止控制(新增部分)
# 启动超时延长至 120 秒
# TimeoutStartSec=120
# 停止等待时间 30 秒
# TimeoutStopSec=30
# 优先发送 SIGINT 信号(更适合 Python 程序)
# KillSignal=SIGINT
# 最终强制终止信号
# RestartKillSignal=SIGKILL
# 混合终止模式
# KillMode=mixed
# 重启策略
# Restart=on-failure
# RestartSec=10s
# 服务管理(保持原有配置+增强)
#Restart=always
#RestartSec=10                    # 重启间隔从 5 秒调整为 10 秒
#append 是继续写入相当于>>  file是从新写入 相当于>
StandardOutput=append:/var/log/rerank/rerank.log
StandardError=append:/var/log/rerank/error.log
SyslogIdentifier=rerank
# 资源限制(保持可选配置)
#LimitNOFILE=65536
#LimitNPROC=65536
# GPU 支持
#Environment=CUDA_VISIBLE_DEVICES=0,1

[Install]
WantedBy=multi-user.target

http服务

https://reranker.opencomputing.net:10443

模型

  • 路径 /share/models/BAAI/bge-reranker-v2-m3

脚本

#!/bin/bash

# CUDA_VISIBLE_DEVICES=7 /share/vllm-0.8.5/bin/python -m llmengine.rerank -p 9997 /d/ymq/models/Qwen/Qwen3-Reranker-0___6B
CUDA_VISIBLE_DEVICES=4 /share/vllm-0.8.5/bin/python -m llmengine.rerank -p 9997  /share/models/BAAI/bge-reranker-v2-m3

subtitler.service

歌曲歌词对齐服务

[Unit]
Wants=systemd-networkd.service

[Service]
User=ymq
Group=ymq
Type=forking
WorkingDirectory=/d/ymq/py/subtitle
ExecStart=/d/ymq/py/subtitle/start.sh
ExecStop=/d/ymq/py/subtitle/stop.sh
StandardOutput=append:/var/log/subtitler/subtitler.log
StandardError=append:/var/log/subtitler/subtitler.log
SyslogIdentifier=subtitler

[Install]
WantedBy=multi-user.target

模型

  • 路径 1 /data/ymq/models/MahmoudAshraf/mms-300m-1130-forced-aligner 对齐模型 2 /data/ymq/models/mdx_models/UVR-MDX-NET-Voc_FT.onnx 分割人声和伴奏

仓库

https://git.opencomputing.cn/yumoqing/subtitle

脚本

#!/usr/bin/bash
cd /d/ymq/py/subtitle
/d/ymq/py/subtitle/py3/bin/python /d/ymq/py/subtitle/app/subtitler.py -p 9901 -w /d/ymq/py/subtitle &
exit 0

triples.service

获取三元组

[Unit]
Wants=systemd-networkd.service

[Service]
Type=forking
WorkingDirectory=/share/run/triples
ExecStart=/share/run/triples/start.sh
ExecStop=/share/run/triples/stop.sh
StandardOutput=append:/var/log/triples/triples.log
StandardError=append:/var/log/triples/triples.log
SyslogIdentifier=/share/run/triples

[Install]
WantedBy=multi-user.target

http服务

https://triples.opencomputing.net:10443

模型

  • 路径 /share/models/Babelscape/mrebel-large

脚本

#!/bin/bash

CUDA_VISIBLE_DEVICES=7 /share/vllm-0.8.5/bin/python -m llmengine.triple -p 9991 /share/models/Babelscape/mrebel-large &
CUDA_VISIBLE_DEVICES=7 /share/vllm-0.8.5/bin/python -m llmengine.triple -p 9991 /share/models/Babelscape/mrebel-large &
CUDA_VISIBLE_DEVICES=7 /share/vllm-0.8.5/bin/python -m llmengine.triple -p 9991 /share/models/Babelscape/mrebel-large &

vdb.service

milvus向量数据库

[Unit]
Wants=systemd-networkd.service

[Service]
User=wangmeihua
Group=wangmeihua
Type=forking
WorkingDirectory=/d/wangmeihua/vdbtest/vdb
ExecStart=/d/wangmeihua/vdbtest/vdb/start.sh
ExecStop=/d/wangmeihua/vdbtest/vdb/stop.sh
StandardOutput=append:/var/log/vdb/vdb.log
StandardError=append:/var/log/vdb/vdb.log
SyslogIdentifier=vdb

[Install]
WantedBy=multi-user.target

http服务

https://vectordb.opencomputing.net:10443/milvus

仓库

https://git.opencomputing.cn/yumoqing/vdb

脚本

#!/usr/bin/bash

# 检查是否有 vdb 进程正在运行
PID=$(ps aux | grep "vdbapp.py" | grep -v grep | awk '{print $2}' | head -1)

if [ -n "$PID" ]; then
    echo "错误VDB 服务已在运行 (PID: $PID)"
    echo "请先停止现有服务kill $PID"
    exit 1
fi

cd /share/wangmeihua/vdbtest/vdb
/share/wangmeihua/vdbtest/vdb/vdbvenv/bin/python /d/wangmeihua/vdbtest/vdb/app/vdbapp.py -p 8887 -w /d/wangmeihua/vdbtest/vdb &
echo "VDB 服务已启动 (PID: $!)"
exit 0
Description
No description provided
Readme 37 KiB