yumoqing/gpu-base-service

Fork 0

Go to file

yumoqing 92ee047ae1 bugfix

2026-04-20 17:10:48 +08:00

README.md

bugfix

2026-04-20 17:10:48 +08:00

README.md

在GPU上跑的服务

clip.service

多媒体embedding服务，提供文本，图像，视频和音频的embedding服务

[Unit]
Wants=systemd-networkd.service

[Service]
WorkingDirectory=/share/run/clip
ExecStart=/share/run/clip/start.sh
ExecStop=/share/run/clip/stop.sh
StandardOutput=append:/var/log/clip/clip.log
StandardError=append:/var/log/clip/clip.log
SyslogIdentifier=clip

[Install]
WantedBy=multi-user.target

http服务

https://embedding.opencomputing.net:1044/mme

模型

使用的模型名：laion/CLIP-ViT-B-32-laion2B-s34B-b79K
模型路径

运行脚本

#!/bin/bash

CUDA_VISIBLE_DEVICES=4 /share/vllm-0.8.5/bin/python -m llmengine
.mm_embedding -p 8882

entities.service

实体抽取服务

[Unit]
Wants=systemd-networkd.service

[Service]
WorkingDirectory=/share/run/entities
ExecStart=/share/run/entities/start.sh
ExecStop=/share/run/entities/stop.sh
StandardOutput=append:/var/log/entities/entities.log
StandardError=append:/var/log/entities/entities.log
SyslogIdentifier=entities

[Install]
WantedBy=multi-user.target

http服务

https://entities.opencomputing.net:10443

模型

路径：/share/models/LTP/small

运行脚本

#!/bin/bash

CUDA_VISIBLE_DEVICES=7 /share/vllm-0.8.5/bin/python -m llmengine.entity -p 9990 /share/models/LTP/small

f5tts.service

[Unit]
Wants=systemd-networkd.service

[Service]
User=ymq
Group=ymq
WorkingDirectory=/share/ymq/run/f5tts
Type=forking
ExecStart=/share/ymq/run/f5tts/start.sh
ExecStop=/share/ymq/run/f5tts/stop.sh
StandardOutput=append:/var/log/f5tts/f5tts.log
StandardError=append:/var/log/f5tts/f5tts.log
SyslogIdentifier=f5tts

[Install]
WantedBy=multi-user.target

http服务

https://tts.opencomputing.net:10443

模型

模型路径 /share/models/SWivid/F5-TTS/F5TTS_v1_Base/model_1250000.safetensors

软件仓库

https://git.opencomputing.cn/yumoqing/f5tts

执行脚本

#!/usr/bin/bash

echo start 3 instances for f5tts engine
rundir=/share/ymq/run/f5tts
CUDA_VISIBLE_DEVICES=6 ${rundir}/f5tts.env/bin/python ${rundir}/app/f5tts.py -w ${rundir} -p 9995 &
CUDA_VISIBLE_DEVICES=6 ${rundir}/f5tts.env/bin/python ${rundir}/app/f5tts.py -w ${rundir} -p 9995 &
CUDA_VISIBLE_DEVICES=6 ${rundir}/f5tts.env/bin/python ${rundir}/app/f5tts.py -w ${rundir} -p 9995 &
CUDA_VISIBLE_DEVICES=6 ${rundir}/f5tts.env/bin/python ${rundir}/app/f5tts.py -w ${rundir} -p 9995 &

fastwhisper.service

ASR模型

[Unit]
Wants=systemd-networkd.service

[Service]
User=ymq
Group=ymq
Type=forking
WorkingDirectory=/d/ymq/run/fastwhisper
ExecStart=/d/ymq/run/fastwhisper/start.sh
ExecStop=/d/ymq/run/fastwhisper/stop.sh
StandardOutput=append:/d/ymq/run/fastwhisper/logs/fastwhisper.log
StandardError=append:/d/ymq/run/fastwhisper/logs/fastwhisper.log
SyslogIdentifier=fastwhisper

[Install]
WantedBy=multi-user.target

http服务

https://asr.opencomputing.net:10443/fw

模型

路径 /data/ymq/models/deepdml/faster-whisper-large-v3-turbo-ct2

软件仓库

https://git.opencomputing.cn/yumoqing/fastwhisper

执行脚本

cd /d/ymq/run/fastwhisper
/d/ymq/run/fastwhisper/py3/bin/python /d/ymq/run/fastwhisper/app/fastwhisper.py -w /d/ymq/run/fastwhisper &
exit 0

fvlm.service

fast vlm 模型服务

[Unit]
Wants=systemd-networkd.service

[Service]
User=ymq
Group=ymq
Type=forking
WorkingDirectory=/share/ymq/run/fvlm
# ExecStart=/share/ymq/run/fvlm/fvlm.env/bin/python app/fastvlm.py -p 9994
ExecStart=/share/ymq/run/fvlm/start.sh
ExecStop=/share/ymq/run/fvlm/stop.sh
StandardOutput=append:/var/log/fvlm/fvlm.log
StandardError=append:/var/log/fvlm/fvlm.log
SyslogIdentifier=fvlm

[Install]
WantedBy=multi-user.target

http服务

https://fastvlm.opencomputing.net:10443

模型

路径 /share/models/apple/llava-fastvithd_0.5b_stage3

软件仓库

https://git.kaiyuancloud.cn/yumoqing/fvlm

脚本

#!/usr/bin/bash

CUDA_VISIBLE_DEVICES=6 /share/ymq/run/fvlm/fvlm.env/bin/python app/fastvlm.py -p 9994 &

m2m.service

多语言翻译

[Unit]
Wants=systemd-networkd.service

[Service]
WorkingDirectory=/share/run/m2m
ExecStart=/share/run/m2m/start.sh
ExecStop=/share/run/m2m/stop.sh
StandardOutput=append:/var/log/m2m/m2m.log
StandardError=append:/var/log/m2m/m2m.log
SyslogIdentifier=m2m

[Install]
WantedBy=multi-user.target

http服务

https://t2t.opencomputing.net:10443/m2m

模型

路径 /share/models/facebook/m2m100_1.2B

运行脚本

#!/bin/bash

/share/vllm-0.8.5/bin/python -m llmengine.m2m -p 8883 /share/models/facebook/m2m100_1.2B

neo4j.service

知识图谱数据库

[Unit]
Wants=systemd-networkd.service

[Service]
Type=forking
WorkingDirectory=/share/run/neo4j
ExecStart=/share/run/neo4j/start.sh
ExecStop=/share/run/neo4j/stop.sh
StandardOutput=append:/var/log/neo4j/neo4j.log
StandardError=append:/var/log/neo4j/neo4j.log
SyslogIdentifier=/share/run/neo4j
TimeoutStartSec=300

[Install]
WantedBy=multi-user.target

http服务

https://graphdb.opencomputing.net:10443

模型路径

路径

软件仓库

https://git.opencomputing.cn/yumoqing/llmengine

运行脚本

#!/bin/bash
CUDA_VISIBLE_DEVICES=7 /share/vllm-0.8.5/bin/python -m llmengine.neo4j -p 8885 Neo4j &

nvidia-asr.service

英伟达的ASR模型

[Unit]
Wants=systemd-networkd.service

[Service]
User=ymq
Group=ymq
Type=forking
WorkingDirectory=/share/ymq/run/nvidia-asr
ExecStart=/share/ymq/run/nvidia-asr/start.sh
ExecStop=/share/ymq/run/nvidia-asr/stop.sh
StandardOutput=append:/var/log/asr/asr.log
StandardError=append:/var/log/asr/asr.log
SyslogIdentifier=asr
[Install]
WantedBy=multi-user.target

http服务

https://asr.opencomputing.net:10443

模型

路径 /share/models/nvidia/parakeet-tdt-0.6b-v2/parakeet-tdt-0.6b-v2.nemo

软件仓库

https://git.opencomputing.cn/yumoqing/nvidia-asr.git

执行脚本

#!/usr/bin/bash

rundir=/share/ymq/run/nvidia-asr
cd $rundir
CUDA_VISIBLE_DEVICES=6 $rundir/nvidia-asr.env/bin/python app/asr.py -p 9992 &
# CUDA_VISIBLE_DEVICES=6 $rundir/nvidia-asr.env/bin/python app/asr.py -p 9992 &

ollama.service

[Unit]
Description=Ollama Service
After=network-online.target

[Service]
ExecStart=/d/ollama/start.sh
# CUDA_VISIBLE_DEVICES=2,3 /usr/local/bin/ollama serve
User=ollama
Group=ollama
Restart=always
RestartSec=3
Environment="PATH=/d/ymq/.local/bin:/d/ymq/bin:/d/ymq/bin:/usr/l
ocal/cuda-12/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/b
in:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin"

[Install]
WantedBy=default.target

http服务

https://ollama.opencomputing.net:10443

qwen3.service

qwen3-0.6b模型

[Unit]
Wants=systemd-networkd.service

[Service]
WorkingDirectory=/share/run/qwen3
Type=forking
ExecStart=/share/run/qwen3/start.sh
ExecStop=/share/run/qwen3/stop.sh
StandardOutput=append:/var/log/qwen3/qwen3.log
StandardError=append:/var/log/qwen3/qwen3.log
SyslogIdentifier=qwen3

[Install]

http服务

https://t2t.opencomputing.net:10443/qwen3-6b

模型

路径 /d/models/Qwen/Qwen3-0.6B

执行脚本

#!/usr/bin/bash

rundir=/share/run/qwen3
CUDA_VISIBLE_DEVICES=7 /share/vllm-0.8.5/bin/python -m llmengine.server -w ${rundir} -p 9089 /d/models/Qwen/Qwen3-0.6B &
exit 0

qwen3coder.service

[Unit]
Wants=systemd-networkd.service

[Service]
WorkingDirectory=/share/run/qwen3coder
Type=forking
ExecStart=/share/run/qwen3coder/start.sh
ExecStop=/share/run/qwen3coder/stop.sh
StandardOutput=append:/var/log/qwen3coder/qwen3coder.log
StandardError=append:/var/log/qwen3coder/qwen3coder.log
SyslogIdentifier=qwen3coder

[Install]
WantedBy=multi-user.target

http服务

https://t2t.opencomputing.net:10443

模型

路径 /d/models/Qwen/Qwen3-Coder-30B-A3B-Instruct

执行脚本

#!/usr/bin/bash

rundir=/share/run/qwen3coder
CUDA_VISIBLE_DEVICES=0 /share/vllm-0.8.5/bin/python -m llmengine.server -w ${rundir} -p 9088 /d/models/Qwen/Qwen3-Coder-30B-A3B-Instruct &
exit 0

reranker.service

[Unit]
Description=A Rerank Service using Qwen3-Reranker-0.6B
# After=network.target DeepSeek70B-kyyds671b-ray.service
# Requires=DeepSeek70B-kyyds671b-ray.service
StartLimitIntervalSec=60
StartLimitBurst=5

[Service]
# 核心启动参数（保持原有配置）
WorkingDirectory=/share/run/reranker
#定义环境变量，所有节点的启动脚本与服务需一致
#Environment="NCCL_SOCKET_IFNAME=enp196s0f0np0"
#ExecStartPre=/data/kyyds671b/ray_check.sh
ExecStart=/share/run/reranker/start.sh
ExecStop=/share/run/reranker/stop.sh

# 超时与停止控制（新增部分）
# 启动超时延长至 120 秒
# TimeoutStartSec=120
# 停止等待时间 30 秒
# TimeoutStopSec=30
# 优先发送 SIGINT 信号（更适合 Python 程序）
# KillSignal=SIGINT
# 最终强制终止信号
# RestartKillSignal=SIGKILL
# 混合终止模式
# KillMode=mixed
# 重启策略
# Restart=on-failure
# RestartSec=10s
# 服务管理（保持原有配置+增强）
#Restart=always
#RestartSec=10                    # 重启间隔从 5 秒调整为 10 秒
#append 是继续写入相当于>>  file是从新写入 相当于>
StandardOutput=append:/var/log/rerank/rerank.log
StandardError=append:/var/log/rerank/error.log
SyslogIdentifier=rerank
# 资源限制（保持可选配置）
#LimitNOFILE=65536
#LimitNPROC=65536
# GPU 支持
#Environment=CUDA_VISIBLE_DEVICES=0,1

[Install]
WantedBy=multi-user.target

http服务

https://reranker.opencomputing.net:10443

模型

路径 /share/models/BAAI/bge-reranker-v2-m3

脚本

#!/bin/bash

# CUDA_VISIBLE_DEVICES=7 /share/vllm-0.8.5/bin/python -m llmengine.rerank -p 9997 /d/ymq/models/Qwen/Qwen3-Reranker-0___6B
CUDA_VISIBLE_DEVICES=4 /share/vllm-0.8.5/bin/python -m llmengine.rerank -p 9997  /share/models/BAAI/bge-reranker-v2-m3

subtitler.service

歌曲歌词对齐服务

[Unit]
Wants=systemd-networkd.service

[Service]
User=ymq
Group=ymq
Type=forking
WorkingDirectory=/d/ymq/py/subtitle
ExecStart=/d/ymq/py/subtitle/start.sh
ExecStop=/d/ymq/py/subtitle/stop.sh
StandardOutput=append:/var/log/subtitler/subtitler.log
StandardError=append:/var/log/subtitler/subtitler.log
SyslogIdentifier=subtitler

[Install]
WantedBy=multi-user.target

模型

路径 1 /data/ymq/models/MahmoudAshraf/mms-300m-1130-forced-aligner 对齐模型 2 /data/ymq/models/mdx_models/UVR-MDX-NET-Voc_FT.onnx 分割人声和伴奏

仓库

https://git.opencomputing.cn/yumoqing/subtitle

脚本

#!/usr/bin/bash
cd /d/ymq/py/subtitle
/d/ymq/py/subtitle/py3/bin/python /d/ymq/py/subtitle/app/subtitler.py -p 9901 -w /d/ymq/py/subtitle &
exit 0

triples.service

获取三元组

[Unit]
Wants=systemd-networkd.service

[Service]
Type=forking
WorkingDirectory=/share/run/triples
ExecStart=/share/run/triples/start.sh
ExecStop=/share/run/triples/stop.sh
StandardOutput=append:/var/log/triples/triples.log
StandardError=append:/var/log/triples/triples.log
SyslogIdentifier=/share/run/triples

[Install]
WantedBy=multi-user.target

http服务

https://triples.opencomputing.net:10443

模型

路径 /share/models/Babelscape/mrebel-large

脚本

#!/bin/bash

CUDA_VISIBLE_DEVICES=7 /share/vllm-0.8.5/bin/python -m llmengine.triple -p 9991 /share/models/Babelscape/mrebel-large &
CUDA_VISIBLE_DEVICES=7 /share/vllm-0.8.5/bin/python -m llmengine.triple -p 9991 /share/models/Babelscape/mrebel-large &
CUDA_VISIBLE_DEVICES=7 /share/vllm-0.8.5/bin/python -m llmengine.triple -p 9991 /share/models/Babelscape/mrebel-large &

vdb.service

milvus向量数据库

[Unit]
Wants=systemd-networkd.service

[Service]
User=wangmeihua
Group=wangmeihua
Type=forking
WorkingDirectory=/d/wangmeihua/vdbtest/vdb
ExecStart=/d/wangmeihua/vdbtest/vdb/start.sh
ExecStop=/d/wangmeihua/vdbtest/vdb/stop.sh
StandardOutput=append:/var/log/vdb/vdb.log
StandardError=append:/var/log/vdb/vdb.log
SyslogIdentifier=vdb

[Install]
WantedBy=multi-user.target

http服务

https://vectordb.opencomputing.net:10443/milvus

仓库

https://git.opencomputing.cn/yumoqing/vdb

脚本

#!/usr/bin/bash

# 检查是否有 vdb 进程正在运行
PID=$(ps aux | grep "vdbapp.py" | grep -v grep | awk '{print $2}' | head -1)

if [ -n "$PID" ]; then
    echo "错误：VDB 服务已在运行 (PID: $PID)"
    echo "请先停止现有服务：kill $PID"
    exit 1
fi

cd /share/wangmeihua/vdbtest/vdb
/share/wangmeihua/vdbtest/vdb/vdbvenv/bin/python /d/wangmeihua/vdbtest/vdb/app/vdbapp.py -p 8887 -w /d/wangmeihua/vdbtest/vdb &
echo "VDB 服务已启动 (PID: $!)"
exit 0

README.md Unescape Escape

在GPU上跑的服务

clip.service

http服务

模型

运行脚本

entities.service

http服务

模型

运行脚本

f5tts.service

http服务

模型

软件仓库

执行脚本

fastwhisper.service

http服务

模型

软件仓库

执行脚本

fvlm.service

http服务

模型

软件仓库

脚本

m2m.service

http服务

模型

运行脚本

neo4j.service

http服务

模型路径

软件仓库

运行脚本

nvidia-asr.service

http服务

模型

软件仓库

执行脚本

ollama.service

http服务

qwen3.service

http服务

模型

执行脚本

qwen3coder.service

http服务

模型

执行脚本

reranker.service

http服务

模型

脚本

subtitler.service

模型

仓库

脚本

triples.service

http服务

模型

脚本

vdb.service

http服务

仓库

脚本

README.md