first commit
This commit is contained in:
commit
f5c325b96c
90
README.md
Normal file
90
README.md
Normal file
@ -0,0 +1,90 @@
|
||||
# 包装F5-TTS为一个服务
|
||||
|
||||
## 依赖
|
||||
分为系统级依赖和python依赖
|
||||
|
||||
### 操作系统级别
|
||||
* 安装FFMpeg
|
||||
```
|
||||
sudo apt install ffmpeg
|
||||
```
|
||||
* 安装GPU卡驱动
|
||||
英伟达驱动按照显卡类型到[英伟达网站](https://www.nvidia.com/en-us/drivers/details/241089/)下载后,执行
|
||||
```
|
||||
sudo sh 下载的驱动文件
|
||||
```
|
||||
|
||||
### python依赖
|
||||
* 安装F5-tts相关依赖
|
||||
```
|
||||
pip install torch==2.3.0+cu118 torchaudio==2.3.0+cu118 --extra-index-url https://download.pytorch.org/whl/cu118
|
||||
```
|
||||
|
||||
* 安装F5-TTS
|
||||
```
|
||||
pip install git+https://github.com/SWivid/F5-TTS.git
|
||||
```
|
||||
|
||||
* 安装其他依赖
|
||||
```
|
||||
pip install git+https://git.kaiyuancloud.cn/yumoqing/apppublic.git
|
||||
pip install git+https://git.kaiyuancloud.cn/yumoqing/sqlor.git
|
||||
pip install git+https://git.kaiyuancloud.cn/yumoqing/ahserver.git
|
||||
pip install cn2an pycld2
|
||||
```
|
||||
## 安装与部署
|
||||
执行下列步骤
|
||||
* 安装操作系统依赖
|
||||
* 添加操作系统用户f5tts
|
||||
* 登录f5tts用户
|
||||
* 安装python依赖
|
||||
* 克隆项目
|
||||
* 运行环境设置
|
||||
* 启动
|
||||
* 停止
|
||||
|
||||
* 克隆项目
|
||||
```
|
||||
cd ~
|
||||
git clone git@git.kaiyuancloud.cn:yumoqing/f5tts.git
|
||||
mv f5tts/* ~
|
||||
rm -rf f5tts
|
||||
```
|
||||
|
||||
* 运行环境设置
|
||||
```
|
||||
cd script
|
||||
bash install.sh
|
||||
```
|
||||
|
||||
* 启动
|
||||
```
|
||||
sudo systemctl start f5tts
|
||||
```
|
||||
* 停止
|
||||
```
|
||||
killname f5tts.py
|
||||
```
|
||||
|
||||
## API
|
||||
|
||||
### 添加播音员
|
||||
|
||||
url:/api/addvoice
|
||||
方法:POST
|
||||
上传数据
|
||||
speaker:播音员名称
|
||||
ref_audio:声音文件
|
||||
ref_text: 录音相应的文字
|
||||
返回:添加成功返回UiMessage的json文件
|
||||
出错则返回UiError的json文件
|
||||
|
||||
### TTS转换
|
||||
url:/api/inference
|
||||
方法:POST
|
||||
数据:
|
||||
prompt:正文文本
|
||||
speaker:播音员名
|
||||
|
||||
### TTS流式转换
|
||||
|
||||
349
app/f5tts.py
Normal file
349
app/f5tts.py
Normal file
@ -0,0 +1,349 @@
|
||||
import os
|
||||
import io
|
||||
import base64
|
||||
import sys
|
||||
import asyncio
|
||||
import codecs
|
||||
from traceback import format_exc
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
import soundfile as sf
|
||||
# import tomli
|
||||
from cached_path import cached_path
|
||||
from appPublic.textsplit import split_text_with_dialog_preserved
|
||||
from appPublic.uniqueID import getID
|
||||
from ahserver.serverenv import get_serverenv
|
||||
from filetxt.loader import fileloader
|
||||
import pycld2 as cld
|
||||
import cn2an
|
||||
|
||||
from f5_tts.model import DiT, UNetT
|
||||
from f5_tts.infer.utils_infer import (
|
||||
mel_spec_type,
|
||||
target_rms,
|
||||
cross_fade_duration,
|
||||
nfe_step,
|
||||
cfg_strength,
|
||||
sway_sampling_coef,
|
||||
speed,
|
||||
fix_duration,
|
||||
infer_process,
|
||||
load_model,
|
||||
load_vocoder,
|
||||
preprocess_ref_audio_text,
|
||||
remove_silence_for_generated_wav,
|
||||
)
|
||||
|
||||
import json
|
||||
from time import time, sleep
|
||||
from appPublic.dictObject import DictObject
|
||||
from appPublic.folderUtils import temp_file
|
||||
from appPublic.jsonConfig import getConfig
|
||||
from appPublic.worker import awaitify
|
||||
from appPublic.uniqueID import getID
|
||||
from appPublic.log import debug, info
|
||||
from appPublic.background import Background
|
||||
from ahserver.webapp import webapp
|
||||
from ahserver.serverenv import ServerEnv
|
||||
from ahserver.filestorage import FileStorage
|
||||
|
||||
n_mel_channels = 100
|
||||
hop_length = 256
|
||||
target_rms = 0.1
|
||||
nfe_step = 32 # 16, 32
|
||||
cfg_strength = 2.0
|
||||
ode_method = "euler"
|
||||
sway_sampling_coef = -1.0
|
||||
speed = 1.0
|
||||
|
||||
def audio_ndarray_to_base64(waveform: np.ndarray, sample_rate: int = 16000) -> str:
|
||||
# 如果是单通道,确保 shape 为 (samples, 1)
|
||||
if waveform.ndim == 1:
|
||||
waveform = waveform.reshape(-1, 1)
|
||||
|
||||
# 写入内存 buffer(WAV 格式)
|
||||
buffer = io.BytesIO()
|
||||
sf.write(buffer, waveform, samplerate=sample_rate, format='WAV')
|
||||
buffer.seek(0)
|
||||
|
||||
# base64 编码
|
||||
b64_audio = base64.b64encode(buffer.read()).decode('utf-8')
|
||||
return b64_audio
|
||||
|
||||
def write_wav_buffer(wav, nchannels, framerate):
|
||||
fs = FileStorage()
|
||||
fn = fs._name2path(f'{getID()}.wav', userid='tmp')
|
||||
os.makedirs(os.path.dirname(fn))
|
||||
debug(fn)
|
||||
with open(fn, "wb") as f:
|
||||
sf.write(f.name, wav, framerate)
|
||||
return fs.webpath(fn)
|
||||
|
||||
async_write_wav_buffer = awaitify(write_wav_buffer)
|
||||
|
||||
def detect_language(txt):
|
||||
isReliable, textBytesFound, details = cld.detect(txt)
|
||||
debug(f' detect_language():{isReliable=}, {textBytesFound=}, {details=} ')
|
||||
return details[0][1]
|
||||
|
||||
class F5TTS:
|
||||
def __init__(self):
|
||||
self.config = getConfig()
|
||||
# self.vocos = load_vocoder(is_local=True, local_path="../checkpoints/charactr/vocos-mel-24khz")
|
||||
self.load_model()
|
||||
self.setup_voices()
|
||||
|
||||
def load_model(self):
|
||||
self.vocoder = load_vocoder(vocoder_name=self.config.vocoder_name,
|
||||
is_local=True,
|
||||
local_path=self.config.vocoder_local_path)
|
||||
|
||||
# load models
|
||||
ckpt_file = ''
|
||||
model_cls = DiT
|
||||
model_cfg = dict(dim=1024, depth=22, heads=16,
|
||||
ff_mult=2, text_dim=512, conv_layers=4)
|
||||
ckpt_file = self.config.ckpts_path
|
||||
self.model = load_model(model_cls, model_cfg, ckpt_file,
|
||||
mel_spec_type=self.config.vocoder_name,
|
||||
vocab_file=self.config.vocab_file)
|
||||
self.model = self.model.to(self.config.device)
|
||||
self.lock = asyncio.Lock()
|
||||
|
||||
def f5tts_infer(self, ref_audio, ref_text, gen_text, speed_factor):
|
||||
audio, final_sample_rate, spectragram = \
|
||||
infer_process(ref_audio,
|
||||
ref_text,
|
||||
gen_text,
|
||||
self.model,
|
||||
self.vocoder,
|
||||
mel_spec_type=self.config.vocoder_name,
|
||||
speed=self.config.speed or speed)
|
||||
if audio is not None:
|
||||
audio = self.speed_convert(audio, speed_factor)
|
||||
else:
|
||||
return None
|
||||
debug(f'audio shape {audio.shape}, {gen_text=}')
|
||||
return {
|
||||
'text': gen_text,
|
||||
'audio':audio,
|
||||
'sample_rate':final_sample_rate
|
||||
}
|
||||
|
||||
def speed_convert(self, output_audio_np, speed_factor):
|
||||
original_len = len(output_audio_np)
|
||||
speed_factor = max(0.1, min(speed_factor, 5.0))
|
||||
target_len = int(
|
||||
original_len / speed_factor
|
||||
) # Target length based on speed_factor
|
||||
if (
|
||||
target_len != original_len and target_len > 0
|
||||
): # Only interpolate if length changes and is valid
|
||||
x_original = np.arange(original_len)
|
||||
x_resampled = np.linspace(0, original_len - 1, target_len)
|
||||
output_audio_np = np.interp(x_resampled, x_original, output_audio_np)
|
||||
output_audio_np = output_audio_np.astype(np.float32)
|
||||
return output_audio_np
|
||||
|
||||
def get_speakers(self):
|
||||
t = [{'value':s, 'text':s} for s in self.speakers.keys() ]
|
||||
t.append({'value':'main', 'text':'main'})
|
||||
return t
|
||||
|
||||
async def split_text(self, text_gen, speaker):
|
||||
chunks = split_text_with_dialog_preserved(text_gen)
|
||||
debug(f'{len(chunks)=}')
|
||||
# reg2 = self.config.speaker_match
|
||||
reg2 = r"\[\[(\w+)\]\]"
|
||||
ret = []
|
||||
for text in chunks:
|
||||
if text == ['\r', '']:
|
||||
continue
|
||||
lang = await awaitify(detect_language)(text)
|
||||
if lang == 'zh':
|
||||
text = await awaitify(cn2an.transform)(text, 'an2cn')
|
||||
voice = speaker
|
||||
match = re.match(reg2, text)
|
||||
if match:
|
||||
voice = match[1]
|
||||
if voice not in self.voices:
|
||||
voice = speaker
|
||||
text = re.sub(reg2, "", text)
|
||||
gen_text = text.strip()
|
||||
ref_audio = self.voices[voice]["ref_audio"]
|
||||
ref_text = self.voices[voice]["ref_text"]
|
||||
ret.append({'text':gen_text, 'ref_audio':ref_audio, 'ref_text':ref_text})
|
||||
return ret
|
||||
|
||||
async def infer_stream(self, prompt, speaker, speed_factor=1.0):
|
||||
async for a in self._inference_stream(prompt, speaker, speed_factor=speed_factor):
|
||||
wavdata = a['audio']
|
||||
samplerate = a['sample_rate']
|
||||
b = await async_write_wav_buffer(wavdata, 1, samplerate)
|
||||
yield b
|
||||
|
||||
async def _inference_stream(self, prompt, speaker, speed_factor=1.0):
|
||||
text_gen = prompt
|
||||
chunks = await self.split_text(prompt, speaker)
|
||||
debug(f'{len(chunks)=}')
|
||||
for chunk in chunks:
|
||||
gen_text = chunk['text']
|
||||
ref_audio = chunk['ref_audio']
|
||||
ref_text = chunk['ref_text']
|
||||
infer = awaitify(self.f5tts_infer)
|
||||
try:
|
||||
d = await infer(ref_audio, ref_text, gen_text, speed_factor)
|
||||
if d is not None:
|
||||
yield d
|
||||
except:
|
||||
debug(f'{gen_text=} inference error\n{format_exc()}')
|
||||
|
||||
async def inference_stream(self, prompt, speaker, speed_factor=1.0):
|
||||
total_duration = 0
|
||||
async for d in self._inference_stream(prompt, speaker, speed_factor=speed_factor):
|
||||
sampels = d['audio'].shape[0]
|
||||
duration = samples / d['sample_rate']
|
||||
audio_b64=audio_ndarray_to_base64(d['audio'], d['sample_rate'])
|
||||
d['audio'] = audio_b64
|
||||
d['duration'] = duration
|
||||
d['done'] = False
|
||||
txt = json.dumps(d, ensure_ascii=False)
|
||||
yield txt + '\n'
|
||||
d = {
|
||||
'done': True,
|
||||
'duration': total_duration
|
||||
}
|
||||
txt = json.dumps(d, ensure_ascii=False)
|
||||
yield txt + '\n'
|
||||
|
||||
def setup_voices(self):
|
||||
config = getConfig()
|
||||
workdir = config.workdir
|
||||
print('workdir=', workdir)
|
||||
d = None
|
||||
with codecs.open(config.speakers_file, 'r', 'utf-8') as f:
|
||||
b = f.read()
|
||||
self.speakers = json.loads(b)
|
||||
fn = f'{workdir}/samples/{config.ref_audio}'
|
||||
ref_audio, ref_text = preprocess_ref_audio_text(fn,
|
||||
config.ref_text)
|
||||
self.voices = {
|
||||
"main":{
|
||||
'ref_text':ref_text,
|
||||
'ref_audio':ref_audio
|
||||
}
|
||||
}
|
||||
for k,v in self.speakers.items():
|
||||
fn = f'{workdir}/samples/{v["ref_audio"]}'
|
||||
ref_audio, ref_text = preprocess_ref_audio_text(fn,
|
||||
v['ref_text'])
|
||||
self.voices[k] = {
|
||||
'ref_text':ref_text,
|
||||
'ref_audio':ref_audio
|
||||
}
|
||||
|
||||
def copyfile(self, src, dest):
|
||||
with open(src, 'rb') as f:
|
||||
b = f.read()
|
||||
with open(dest, 'wb') as f1:
|
||||
f1.write(b)
|
||||
|
||||
async def add_voice(self, speaker, ref_audio, ref_text):
|
||||
config = getConfig()
|
||||
ref_audio = FileStorage().realPath(ref_audio)
|
||||
workdir = config.workdir
|
||||
filename = f'{getID()}.wav'
|
||||
fn = f'{workdir}/samples/{filename}'
|
||||
await awaitify(self.copyfile)(ref_audio, fn)
|
||||
os.unlink(ref_adio)
|
||||
self.speakers[speaker] = {
|
||||
'ref_text':ref_text,
|
||||
'ref_audio':filename
|
||||
}
|
||||
f = awaitify(preprocess_ref_audio_text)
|
||||
ref_audio, ref_text = await f(ref_audio, ref_text)
|
||||
self.voices[speaker] = {
|
||||
'ref_text':ref_text,
|
||||
'ref_audio':filename
|
||||
}
|
||||
with codecs.open(config.speakers_file, 'w', 'utf-8') as f:
|
||||
f.write(json.dumps(self.speakers, indent=4, ensure_ascii=False))
|
||||
return None
|
||||
|
||||
async def _inference(self, prompt, speaker, speed_factor=1.0):
|
||||
generated_audio_segments = []
|
||||
remove_silence = self.config.remove_silence or False
|
||||
final_sample_rate = 16000
|
||||
async for d in self._inference_stream(prompt,
|
||||
speaker,
|
||||
speed_factor=speed_factor):
|
||||
audio = d.get('audio', None)
|
||||
if audio is None:
|
||||
debug(f'audio is none, {d=}')
|
||||
continue
|
||||
final_sample_rate = d['sample_rate']
|
||||
generated_audio_segments.append(audio)
|
||||
|
||||
if generated_audio_segments:
|
||||
final_wave = np.concatenate(generated_audio_segments)
|
||||
debug(f'{prompt=}, {final_sample_rate=}')
|
||||
return await async_write_wav_buffer(final_wave, 1, final_sample_rate)
|
||||
else:
|
||||
debug(f'{prompt=} not audio generated')
|
||||
|
||||
def UiError(title="出错", message="出错啦", timeout=5):
|
||||
return {
|
||||
"widgettype":"Error",
|
||||
"options":{
|
||||
"author":"tr",
|
||||
"timeout":timeout,
|
||||
"cwidth":15,
|
||||
"cheight":10,
|
||||
"title":title,
|
||||
"auto_open":True,
|
||||
"auto_dismiss":True,
|
||||
"auto_destroy":True,
|
||||
"message":message
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def UiMessage(title="消息", message="后台消息", timeout=5):
|
||||
return {
|
||||
"widgettype":"Message",
|
||||
"options":{
|
||||
"author":"tr",
|
||||
"timeout":timeout,
|
||||
"cwidth":15,
|
||||
"cheight":10,
|
||||
"title":title,
|
||||
"auto_open":True,
|
||||
"auto_dismiss":True,
|
||||
"auto_destroy":True,
|
||||
"message":message
|
||||
}
|
||||
}
|
||||
|
||||
def test1():
|
||||
sleep(36000)
|
||||
return {}
|
||||
|
||||
f5 = None
|
||||
def init():
|
||||
global f5
|
||||
g = ServerEnv()
|
||||
f5 = F5TTS()
|
||||
g.tts_engine = f5
|
||||
g.infer_stream = f5.infer_stream
|
||||
g.inference_stream = f5.inference_stream
|
||||
g.get_speakers = f5.get_speakers
|
||||
g.infer = f5._inference
|
||||
g.test1 = awaitify(test1)
|
||||
g.add_voice = f5.add_voice
|
||||
g.UiError = UiError
|
||||
g.filelaoder = fileloader
|
||||
g.UiMessage = UiMessage
|
||||
|
||||
if __name__ == '__main__':
|
||||
webapp(init)
|
||||
14
app/w4a2wav.py
Normal file
14
app/w4a2wav.py
Normal file
@ -0,0 +1,14 @@
|
||||
import os
|
||||
import sys
|
||||
from pydub import AudioSegment
|
||||
|
||||
if len(sys.argv) < 2:
|
||||
exit(1)
|
||||
|
||||
m4afn = sys.argv[1]
|
||||
wavfn = m4afn[:-3] + 'wav'
|
||||
# Load the m4a file
|
||||
audio = AudioSegment.from_file(m4afn, format="m4a")
|
||||
|
||||
# Export the audio as a wav file
|
||||
audio.export(wavfn, format="wav")
|
||||
87
conf/config.json
Normal file
87
conf/config.json
Normal file
@ -0,0 +1,87 @@
|
||||
{
|
||||
"speaker_match":"\\[\\[(\\w+)\\]\\]",
|
||||
"language":{
|
||||
"zh":{
|
||||
"sentence_splitter":"[。?!]|\r?\n"
|
||||
},
|
||||
"en":{
|
||||
"sentence_splitter":"[.?!] |\r?\n"
|
||||
}
|
||||
},
|
||||
"sample_rate":16000,
|
||||
"vocab_file":"",
|
||||
"ckpts_path_bak":"/share/models/SWivid/F5-TTS/F5TTS_Base/model_1200000.pt",
|
||||
"ckpts_path":"/share/models/SWivid/F5-TTS/F5TTS_v1_Base/model_1250000.safetensors",
|
||||
"speakers_file":"$[workdir]$/conf/speakers.json",
|
||||
"vocoder_name":"vocos",
|
||||
"vocoder_local_path":"/share/models/charactr/vocos-mel-24khz",
|
||||
"remove_silence":false,
|
||||
"modelname":"F5-TTS",
|
||||
"device":"cuda:0",
|
||||
"ref_audio":"ttt.wav",
|
||||
"ref_text":"快点吃饭,上课要迟到了。",
|
||||
"cross_fade_duration":0,
|
||||
"workdir":"$[workdir]$",
|
||||
"filesroot":"$[workdir]$/files",
|
||||
"logger":{
|
||||
"name":"f5tts",
|
||||
"levelname":"info",
|
||||
"logfile":"$[workdir]$/logs/f5tts.log"
|
||||
},
|
||||
"website":{
|
||||
"paths":[
|
||||
["$[workdir]$/wwwroot",""]
|
||||
],
|
||||
"client_max_size":10000,
|
||||
"host":"0.0.0.0",
|
||||
"port":9995,
|
||||
"coding":"utf-8",
|
||||
"ssl_gg":{
|
||||
"crtfile":"$[workdir]$/conf/www.bsppo.com.pem",
|
||||
"keyfile":"$[workdir]$/conf/www.bsppo.com.key"
|
||||
},
|
||||
"indexes":[
|
||||
"index.html",
|
||||
"index.tmpl",
|
||||
"index.ui",
|
||||
"index.dspy",
|
||||
"index.md"
|
||||
],
|
||||
"startswiths":[
|
||||
{
|
||||
"leading":"/idfile",
|
||||
"registerfunction":"idfile"
|
||||
}
|
||||
],
|
||||
"processors":[
|
||||
[".ws","ws"],
|
||||
[".xterm","xterm"],
|
||||
[".proxy","proxy"],
|
||||
[".llm", "llm"],
|
||||
[".llms", "llms"],
|
||||
[".llma", "llma"],
|
||||
[".xlsxds","xlsxds"],
|
||||
[".sqlds","sqlds"],
|
||||
[".tmpl.js","tmpl"],
|
||||
[".tmpl.css","tmpl"],
|
||||
[".html.tmpl","tmpl"],
|
||||
[".bcrud", "bricks_crud"],
|
||||
[".tmpl","tmpl"],
|
||||
[".app","app"],
|
||||
[".bui","bui"],
|
||||
[".ui","bui"],
|
||||
[".dspy","dspy"],
|
||||
[".md","md"]
|
||||
],
|
||||
"rsakey":{
|
||||
"privatekey":"$[workdir]$/conf/rsa_private_key.pem",
|
||||
"publickey":"$[workdir]$/conf/rsa_public_key.pem"
|
||||
},
|
||||
"session_max_time":3000,
|
||||
"session_issue_time":2500,
|
||||
"session_redis_notuse":{
|
||||
"url":"redis://127.0.0.1:6379"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
6
conf/speakers.json
Normal file
6
conf/speakers.json
Normal file
@ -0,0 +1,6 @@
|
||||
{
|
||||
"ymq": {
|
||||
"ref_text": "\u8f7b\u91cf\u5e94\u7528\u670d\u52a1\u5668\u5907\u6848\u6761\u4ef6\uff1a\u8d2d\u4e70\u65f6\u957f\u57283\u4e2a\u6708\u53ca\u4ee5\u4e0a",
|
||||
"ref_audio": "ymq.wav"
|
||||
}
|
||||
}
|
||||
16
f5tts.service
Normal file
16
f5tts.service
Normal file
@ -0,0 +1,16 @@
|
||||
[Unit]
|
||||
Wants=systemd-networkd.service
|
||||
|
||||
[Service]
|
||||
User=ymq
|
||||
Group=ymq
|
||||
WorkingDirectory=/share/ymq/run/f5tts
|
||||
Type=forking
|
||||
ExecStart=/share/ymq/run/f5tts/start.sh
|
||||
ExecStop=/share/ymq/run/f5tts/stop.sh
|
||||
StandardOutput=append:/var/log/f5tts/f5tts.log
|
||||
StandardError=append:/var/log/f5tts/f5tts.log
|
||||
SyslogIdentifier=f5tts
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
62
install
Executable file
62
install
Executable file
@ -0,0 +1,62 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
import os
|
||||
import sys
|
||||
import codecs
|
||||
|
||||
if len(sys.argv) < 2:
|
||||
print(f'Usage:\n{sys.argv[0]} venvname')
|
||||
sys.exit(1)
|
||||
|
||||
user = os.getlogin()
|
||||
home = os.environ.get('HOME')
|
||||
|
||||
venv = sys.argv[1]
|
||||
if not os.path.exists(f'{home}/{venv}'):
|
||||
os.system(f'python3 -m venv ~/{venv}')
|
||||
pwd = os.getcwd()
|
||||
name = os.path.basename(pwd)
|
||||
|
||||
service = f"""[Unit]
|
||||
Description={name} service
|
||||
Wants=systemd-networkd.service
|
||||
Requires=nginx.service
|
||||
|
||||
[Service]
|
||||
Type=forking
|
||||
ExecStart=su - {user} -c "{pwd}/script/{name}.sh"
|
||||
ExecStop=su - {user} "{home}/bin/killname app/{name}.py"
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
"""
|
||||
|
||||
with codecs.open(f'./script/{name}.service', 'w', 'utf-8') as f:
|
||||
f.write(service)
|
||||
|
||||
with codecs.open(f'./script/{name}.sh', 'w', 'utf-8') as f:
|
||||
f.write(f"""#!/usr/bin/bash
|
||||
|
||||
killname {pwd}/app/{name}.py
|
||||
{home}/{venv}/bin/python {pwd}/app/{name}.py -w {pwd} > {pwd}/logs/stderr.log 2>&1 &
|
||||
exit 0
|
||||
""")
|
||||
|
||||
with codecs.open(f'./script/install.sh', 'w', 'utf-8') as f:
|
||||
f.write(f"""#!/usr/bin/bash
|
||||
sudo cp {name}.service /etc/systemd/system
|
||||
sudo systemctl enable {name}.service
|
||||
sudo systemctl start {name}
|
||||
""")
|
||||
|
||||
if not os.path.exists(f'{home}/bin'):
|
||||
os.mkdir(f'{home}/bin')
|
||||
if not os.path.exists(f'{home}/bin/killname'):
|
||||
with codecs.open(f'{home}/bin/killname', 'w', 'utf-8') as f:
|
||||
f.write("""#!/usr/bin/bash
|
||||
|
||||
ps -ef|grep "$1"|grep -v grep|awk '{print("kill -9", $2)}'|sh
|
||||
""")
|
||||
os.system(f'chmod +x {pwd}/bin/*')
|
||||
os.system(f'chmod +x {pwd}/script/*.sh')
|
||||
os.system(f'{pwd}/script/install.sh')
|
||||
|
||||
0
logs/stderr.log
Normal file
0
logs/stderr.log
Normal file
11
requirements.txt
Normal file
11
requirements.txt
Normal file
@ -0,0 +1,11 @@
|
||||
numpy
|
||||
soundfile
|
||||
cached_path
|
||||
redis
|
||||
pycld2
|
||||
cn2an
|
||||
git+https://git.kaiyuancloud.cn/yumoqing/apppublic
|
||||
git+https://git.kaiyuancloud.cn/yumoqing/sqlor
|
||||
git+https://git.kaiyuancloud.cn/yumoqing/ahserver
|
||||
git+https://git.kaiyuancloud.cn/yumoqing/filetxt
|
||||
# git+https://github.com/SWivid/F5-TTS
|
||||
BIN
samples/test2.m4a
Normal file
BIN
samples/test2.m4a
Normal file
Binary file not shown.
BIN
samples/test_en_1_ref_short.wav
Normal file
BIN
samples/test_en_1_ref_short.wav
Normal file
Binary file not shown.
BIN
samples/test_zh_1_ref_short.wav
Normal file
BIN
samples/test_zh_1_ref_short.wav
Normal file
Binary file not shown.
BIN
samples/ttt.m4a
Normal file
BIN
samples/ttt.m4a
Normal file
Binary file not shown.
BIN
samples/ttt.wav
Normal file
BIN
samples/ttt.wav
Normal file
Binary file not shown.
BIN
samples/ymq.wav
Normal file
BIN
samples/ymq.wav
Normal file
Binary file not shown.
10
script/f5tts.service
Normal file
10
script/f5tts.service
Normal file
@ -0,0 +1,10 @@
|
||||
[Unit]
|
||||
Wants=systemd-networkd.service
|
||||
|
||||
[Service]
|
||||
User=ymq
|
||||
Group=ymq
|
||||
WorkingDirectory=/share/ymq/run/f5tts
|
||||
ExecStart=/share/ymq/run/f5tts/f5tts.env/bin/python app/f5tts.py -p 9995
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
5
script/f5tts.sh
Executable file
5
script/f5tts.sh
Executable file
@ -0,0 +1,5 @@
|
||||
#!/usr/bin/bash
|
||||
|
||||
/d/f5tts/bin/killname /data/f5tts/app/f5tts.py
|
||||
/d/f5tts/py3/bin/python /data/f5tts/app/f5tts.py -w /data/f5tts > /data/f5tts/logs/stderr.log 2>&1 &
|
||||
exit 0
|
||||
4
script/install.sh
Executable file
4
script/install.sh
Executable file
@ -0,0 +1,4 @@
|
||||
#!/usr/bin/bash
|
||||
sudo cp f5tts.service /etc/systemd/system
|
||||
sudo systemctl enable f5tts.service
|
||||
sudo systemctl start f5tts
|
||||
3
script/speakers.json
Normal file
3
script/speakers.json
Normal file
@ -0,0 +1,3 @@
|
||||
{
|
||||
}
|
||||
|
||||
10
start.sh
Executable file
10
start.sh
Executable file
@ -0,0 +1,10 @@
|
||||
#!/usr/bin/bash
|
||||
|
||||
echo start 3 instances for f5tts engine
|
||||
rundir=/share/ymq/run/f5tts
|
||||
CUDA_VISIBLE_DEVICES=6 ${rundir}/f5tts.env/bin/python ${rundir}/app/f5tts.py -w ${rundir} -p 9995 &
|
||||
CUDA_VISIBLE_DEVICES=6 ${rundir}/f5tts.env/bin/python ${rundir}/app/f5tts.py -w ${rundir} -p 9995 &
|
||||
CUDA_VISIBLE_DEVICES=6 ${rundir}/f5tts.env/bin/python ${rundir}/app/f5tts.py -w ${rundir} -p 9995 &
|
||||
CUDA_VISIBLE_DEVICES=6 ${rundir}/f5tts.env/bin/python ${rundir}/app/f5tts.py -w ${rundir} -p 9995 &
|
||||
|
||||
exit 0
|
||||
BIN
wwwroot/.tts.ui.swp
Normal file
BIN
wwwroot/.tts.ui.swp
Normal file
Binary file not shown.
31
wwwroot/add.ui
Normal file
31
wwwroot/add.ui
Normal file
@ -0,0 +1,31 @@
|
||||
{
|
||||
"widgettype":"Form",
|
||||
"options":{
|
||||
"height":"70%",
|
||||
"title":"向知识库添加文件",
|
||||
"description":"可以添加的文件类型有:文本文件(.txt),数据文件(.csv),excel文件(.xlsx, .xls),word文件(.doc, .docx), 演示文件(.ppt, .pptx), pdf文件",
|
||||
"method":"POST",
|
||||
"submit_url":"{{entire_url('v1/add')}}",
|
||||
"fields":[
|
||||
{
|
||||
"name":"file_path",
|
||||
"uitype":"file",
|
||||
"required":true
|
||||
},
|
||||
{
|
||||
"name":"userid",
|
||||
"uitype":"str",
|
||||
"label":"用户id",
|
||||
"value":"user1",
|
||||
"required":true
|
||||
},
|
||||
{
|
||||
"name":"kdbname",
|
||||
"uitype":"str",
|
||||
"label":"知识库名",
|
||||
"required":true,
|
||||
"value":"testdb"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
41
wwwroot/addvoice.ui
Normal file
41
wwwroot/addvoice.ui
Normal file
@ -0,0 +1,41 @@
|
||||
{
|
||||
"widgettype":"VBox",
|
||||
"options":{
|
||||
"height":"100%"
|
||||
},
|
||||
"subwidgets":[
|
||||
{
|
||||
"widgettype":"Filler",
|
||||
"options":{},
|
||||
"subwidgets":[
|
||||
{
|
||||
"widgettype":"Form",
|
||||
"id":"form",
|
||||
"options":{
|
||||
"title":"添加播音员",
|
||||
"method":"POST",
|
||||
"description":"通过输入播音员id,录音和录音文字说明,来添加播音员",
|
||||
"submit_url":"{{entire_url('/v1/addvoice')}}",
|
||||
"fields":[
|
||||
{
|
||||
"name":"speaker",
|
||||
"label":"播音员id",
|
||||
"uitype":"str"
|
||||
},
|
||||
{
|
||||
"name":"ref_voice",
|
||||
"label":"语音",
|
||||
"uitype":"audiorecorder"
|
||||
},
|
||||
{
|
||||
"name":"ref_text",
|
||||
"label":"语音文字",
|
||||
"uitype":"text"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
1
wwwroot/get_speakers.dspy
Normal file
1
wwwroot/get_speakers.dspy
Normal file
@ -0,0 +1 @@
|
||||
return get_speakers()
|
||||
45
wwwroot/index.ui
Normal file
45
wwwroot/index.ui
Normal file
@ -0,0 +1,45 @@
|
||||
{
|
||||
"widgettype":"TabPanel",
|
||||
"options":{
|
||||
"tab_wide":"auto",
|
||||
"interval":"15px",
|
||||
"height":"100%",
|
||||
"width":"100%",
|
||||
"tab_pos":"top",
|
||||
"items":[
|
||||
{
|
||||
"name":"add",
|
||||
"label":"文本转语音",
|
||||
"refresh":true,
|
||||
"content":{
|
||||
"widgettype":"urlwidget",
|
||||
"options":{
|
||||
"url":"{{entire_url('tts.ui')}}"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"name":"add1",
|
||||
"label":"文本转语音(stream)",
|
||||
"refresh":true,
|
||||
"content":{
|
||||
"widgettype":"urlwidget",
|
||||
"options":{
|
||||
"url":"{{entire_url('tts_stream.ui')}}"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"name":"query",
|
||||
"label":"添加播音员",
|
||||
"refresh":true,
|
||||
"content":{
|
||||
"widgettype":"urlwidget",
|
||||
"options":{
|
||||
"url":"{{entire_url('addvoice.ui')}}"
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
10
wwwroot/js/myapp.js
Normal file
10
wwwroot/js/myapp.js
Normal file
@ -0,0 +1,10 @@
|
||||
var set_response_text_url = function(w, resp){
|
||||
schedule_once(async_set_response_text_url.bind(w, w, resp), 0.1);
|
||||
}
|
||||
|
||||
var async_set_response_text_url = async function(w, resp){
|
||||
console.log('arguments=', arguments);
|
||||
var url = await resp.text();
|
||||
w.set_url(url);
|
||||
w.play();
|
||||
}
|
||||
28
wwwroot/query.ui
Normal file
28
wwwroot/query.ui
Normal file
@ -0,0 +1,28 @@
|
||||
{
|
||||
"widgettype":"Form",
|
||||
"options":{
|
||||
"height":"70%",
|
||||
"submit_url":"{{entire_url('v1/query')}}",
|
||||
"fields":[
|
||||
{
|
||||
"name":"prompt",
|
||||
"uitype":"text",
|
||||
"required":true
|
||||
},
|
||||
{
|
||||
"name":"userid",
|
||||
"uitype":"str",
|
||||
"label":"用户id",
|
||||
"value":"user1",
|
||||
"required":true
|
||||
},
|
||||
{
|
||||
"name":"kdbname",
|
||||
"uitype":"str",
|
||||
"label":"知识库名",
|
||||
"required":true,
|
||||
"value":"testdb"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
1
wwwroot/t.dspy
Normal file
1
wwwroot/t.dspy
Normal file
@ -0,0 +1 @@
|
||||
return entire_url('/idfile') + "?path=/trhr"
|
||||
1
wwwroot/test1.dspy
Normal file
1
wwwroot/test1.dspy
Normal file
@ -0,0 +1 @@
|
||||
return await test1()
|
||||
56
wwwroot/tts.ui
Normal file
56
wwwroot/tts.ui
Normal file
@ -0,0 +1,56 @@
|
||||
{
|
||||
"widgettype":"VBox",
|
||||
"options":{
|
||||
"height":"100%"
|
||||
},
|
||||
"subwidgets":[
|
||||
{
|
||||
"widgettype":"Filler",
|
||||
"options":{},
|
||||
"subwidgets":[
|
||||
{
|
||||
"widgettype":"Form",
|
||||
"id":"form",
|
||||
"options":{
|
||||
"submit_url":"{{entire_url('/v1/inference')}}",
|
||||
"fields":[
|
||||
{
|
||||
"name":"speaker",
|
||||
"label":"播音员",
|
||||
"uitype":"code",
|
||||
"value":"main",
|
||||
"dataurl":"{{entire_url('/get_speakers.dspy')}}"
|
||||
},
|
||||
{
|
||||
"name":"prompt",
|
||||
"label":"文本",
|
||||
"uitype":"text",
|
||||
"uiparams":{
|
||||
"rows":20,
|
||||
"cols":80
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id":"audio",
|
||||
"widgettype":"AudioPlayer",
|
||||
"options":{
|
||||
"height":"40px",
|
||||
"auto_play":true
|
||||
}
|
||||
}
|
||||
],
|
||||
"binds":[
|
||||
{
|
||||
"wid":"form",
|
||||
"event":"submited",
|
||||
"actiontype":"script",
|
||||
"target":"audio",
|
||||
"script":"set_response_text_url(this, event.params);"
|
||||
}
|
||||
]
|
||||
}
|
||||
53
wwwroot/tts_stream.ui
Normal file
53
wwwroot/tts_stream.ui
Normal file
@ -0,0 +1,53 @@
|
||||
{
|
||||
"widgettype":"HBox",
|
||||
"options":{
|
||||
"height":"100%"
|
||||
},
|
||||
"subwidgets":[
|
||||
{
|
||||
"widgettype":"Form",
|
||||
"id":"form",
|
||||
"options":{
|
||||
"width":"50%",
|
||||
"title":"流式返回",
|
||||
"submit_url":"{{entire_url('/v1/infer_stream')}}",
|
||||
"fields":[
|
||||
{
|
||||
"name":"speaker",
|
||||
"label":"播音员",
|
||||
"uitype":"code",
|
||||
"value":"main",
|
||||
"dataurl":"{{entire_url('/get_speakers.dspy')}}"
|
||||
},
|
||||
{
|
||||
"name":"prompt",
|
||||
"label":"文本",
|
||||
"uitype":"text",
|
||||
"uiparams":{
|
||||
"rows":20,
|
||||
"cols":80
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"id":"audio",
|
||||
"widgettype":"TextedAudioPlayer",
|
||||
"options":{
|
||||
"width": "50%",
|
||||
"height":"100%",
|
||||
"auto_play":true
|
||||
}
|
||||
}
|
||||
],
|
||||
"binds":[
|
||||
{
|
||||
"wid":"form",
|
||||
"event":"submited",
|
||||
"actiontype":"script",
|
||||
"target":"audio",
|
||||
"script":"console.log('this=', this, event);this.set_stream_urls(event.params)"
|
||||
}
|
||||
]
|
||||
}
|
||||
11
wwwroot/v1/addvoice/index.dspy
Normal file
11
wwwroot/v1/addvoice/index.dspy
Normal file
@ -0,0 +1,11 @@
|
||||
debug(f'{params_kw=}')
|
||||
try:
|
||||
speaker = params_kw.speaker
|
||||
ref_audio = params_kw.ref_voice
|
||||
ref_text = params_kw.ref_text
|
||||
await add_voice(speaker, ref_audio, ref_text)
|
||||
return UiMessage(title='Success', message='add voice success')
|
||||
except Exception as e:
|
||||
exception(f'{e=}')
|
||||
return UiError(title='Error', message='add voice error')
|
||||
|
||||
21
wwwroot/v1/index.md
Normal file
21
wwwroot/v1/index.md
Normal file
@ -0,0 +1,21 @@
|
||||
# API for F5TTS wraped web server
|
||||
we apply following apis
|
||||
|
||||
## addvoice
|
||||
|
||||
* path: /v1/add_voice
|
||||
* method: POST
|
||||
* form data:
|
||||
1 ref_text: text
|
||||
2 ref_audio: vocal audio
|
||||
3 speaker: speaker name for ref_audio voice
|
||||
|
||||
examples
|
||||
```
|
||||
curl .../v1/add_voice \
|
||||
-F "speaker=Trump" \
|
||||
-F "ref_text=today is a good day" \
|
||||
-F "ref_audio=@goodday.wav"
|
||||
```
|
||||
|
||||
|
||||
8
wwwroot/v1/index.ui
Normal file
8
wwwroot/v1/index.ui
Normal file
@ -0,0 +1,8 @@
|
||||
{
|
||||
"widgettype":"MdWidget",
|
||||
"options":{
|
||||
"height":"100%",
|
||||
"width":"100%",
|
||||
"md_url":"{{entire_url('index.md')}}"
|
||||
}
|
||||
}
|
||||
7
wwwroot/v1/infer_stream/index.dspy
Normal file
7
wwwroot/v1/infer_stream/index.dspy
Normal file
@ -0,0 +1,7 @@
|
||||
debug(f'{params_kw=}')
|
||||
async def g():
|
||||
speaker = params_kw.speaker or 'main'
|
||||
async for d in inference_stream(params_kw.prompt, speaker):
|
||||
yield d
|
||||
|
||||
return await stream_response(request, g)
|
||||
7
wwwroot/v1/inference/index.dspy
Normal file
7
wwwroot/v1/inference/index.dspy
Normal file
@ -0,0 +1,7 @@
|
||||
# normal mode
|
||||
debug(f'{params_kw=}')
|
||||
speaker = params_kw.speaker or 'main'
|
||||
path = await infer(params_kw.prompt, speaker)
|
||||
ret = entire_url(f'/idfile?path={path}')
|
||||
debug(f'inference/index.dspy:return url={ret}')
|
||||
return ret
|
||||
Loading…
x
Reference in New Issue
Block a user