first commit

This commit is contained in:
yumoqing 2025-07-16 15:07:03 +08:00
commit f5c325b96c
37 changed files with 992 additions and 0 deletions

90
README.md Normal file
View File

@ -0,0 +1,90 @@
# 包装F5-TTS为一个服务
## 依赖
分为系统级依赖和python依赖
### 操作系统级别
* 安装FFMpeg
```
sudo apt install ffmpeg
```
* 安装GPU卡驱动
英伟达驱动按照显卡类型到[英伟达网站](https://www.nvidia.com/en-us/drivers/details/241089/)下载后,执行
```
sudo sh 下载的驱动文件
```
### python依赖
* 安装F5-tts相关依赖
```
pip install torch==2.3.0+cu118 torchaudio==2.3.0+cu118 --extra-index-url https://download.pytorch.org/whl/cu118
```
* 安装F5-TTS
```
pip install git+https://github.com/SWivid/F5-TTS.git
```
* 安装其他依赖
```
pip install git+https://git.kaiyuancloud.cn/yumoqing/apppublic.git
pip install git+https://git.kaiyuancloud.cn/yumoqing/sqlor.git
pip install git+https://git.kaiyuancloud.cn/yumoqing/ahserver.git
pip install cn2an pycld2
```
## 安装与部署
执行下列步骤
* 安装操作系统依赖
* 添加操作系统用户f5tts
* 登录f5tts用户
* 安装python依赖
* 克隆项目
* 运行环境设置
* 启动
* 停止
* 克隆项目
```
cd ~
git clone git@git.kaiyuancloud.cn:yumoqing/f5tts.git
mv f5tts/* ~
rm -rf f5tts
```
* 运行环境设置
```
cd script
bash install.sh
```
* 启动
```
sudo systemctl start f5tts
```
* 停止
```
killname f5tts.py
```
## API
### 添加播音员
url/api/addvoice
方法POST
上传数据
speaker:播音员名称
ref_audio声音文件
ref_text: 录音相应的文字
返回添加成功返回UiMessage的json文件
出错则返回UiError的json文件
### TTS转换
url:/api/inference
方法POST
数据:
prompt:正文文本
speaker播音员名
### TTS流式转换

349
app/f5tts.py Normal file
View File

@ -0,0 +1,349 @@
import os
import io
import base64
import sys
import asyncio
import codecs
from traceback import format_exc
import re
import numpy as np
import soundfile as sf
# import tomli
from cached_path import cached_path
from appPublic.textsplit import split_text_with_dialog_preserved
from appPublic.uniqueID import getID
from ahserver.serverenv import get_serverenv
from filetxt.loader import fileloader
import pycld2 as cld
import cn2an
from f5_tts.model import DiT, UNetT
from f5_tts.infer.utils_infer import (
mel_spec_type,
target_rms,
cross_fade_duration,
nfe_step,
cfg_strength,
sway_sampling_coef,
speed,
fix_duration,
infer_process,
load_model,
load_vocoder,
preprocess_ref_audio_text,
remove_silence_for_generated_wav,
)
import json
from time import time, sleep
from appPublic.dictObject import DictObject
from appPublic.folderUtils import temp_file
from appPublic.jsonConfig import getConfig
from appPublic.worker import awaitify
from appPublic.uniqueID import getID
from appPublic.log import debug, info
from appPublic.background import Background
from ahserver.webapp import webapp
from ahserver.serverenv import ServerEnv
from ahserver.filestorage import FileStorage
n_mel_channels = 100
hop_length = 256
target_rms = 0.1
nfe_step = 32 # 16, 32
cfg_strength = 2.0
ode_method = "euler"
sway_sampling_coef = -1.0
speed = 1.0
def audio_ndarray_to_base64(waveform: np.ndarray, sample_rate: int = 16000) -> str:
# 如果是单通道,确保 shape 为 (samples, 1)
if waveform.ndim == 1:
waveform = waveform.reshape(-1, 1)
# 写入内存 bufferWAV 格式)
buffer = io.BytesIO()
sf.write(buffer, waveform, samplerate=sample_rate, format='WAV')
buffer.seek(0)
# base64 编码
b64_audio = base64.b64encode(buffer.read()).decode('utf-8')
return b64_audio
def write_wav_buffer(wav, nchannels, framerate):
fs = FileStorage()
fn = fs._name2path(f'{getID()}.wav', userid='tmp')
os.makedirs(os.path.dirname(fn))
debug(fn)
with open(fn, "wb") as f:
sf.write(f.name, wav, framerate)
return fs.webpath(fn)
async_write_wav_buffer = awaitify(write_wav_buffer)
def detect_language(txt):
isReliable, textBytesFound, details = cld.detect(txt)
debug(f' detect_language():{isReliable=}, {textBytesFound=}, {details=} ')
return details[0][1]
class F5TTS:
def __init__(self):
self.config = getConfig()
# self.vocos = load_vocoder(is_local=True, local_path="../checkpoints/charactr/vocos-mel-24khz")
self.load_model()
self.setup_voices()
def load_model(self):
self.vocoder = load_vocoder(vocoder_name=self.config.vocoder_name,
is_local=True,
local_path=self.config.vocoder_local_path)
# load models
ckpt_file = ''
model_cls = DiT
model_cfg = dict(dim=1024, depth=22, heads=16,
ff_mult=2, text_dim=512, conv_layers=4)
ckpt_file = self.config.ckpts_path
self.model = load_model(model_cls, model_cfg, ckpt_file,
mel_spec_type=self.config.vocoder_name,
vocab_file=self.config.vocab_file)
self.model = self.model.to(self.config.device)
self.lock = asyncio.Lock()
def f5tts_infer(self, ref_audio, ref_text, gen_text, speed_factor):
audio, final_sample_rate, spectragram = \
infer_process(ref_audio,
ref_text,
gen_text,
self.model,
self.vocoder,
mel_spec_type=self.config.vocoder_name,
speed=self.config.speed or speed)
if audio is not None:
audio = self.speed_convert(audio, speed_factor)
else:
return None
debug(f'audio shape {audio.shape}, {gen_text=}')
return {
'text': gen_text,
'audio':audio,
'sample_rate':final_sample_rate
}
def speed_convert(self, output_audio_np, speed_factor):
original_len = len(output_audio_np)
speed_factor = max(0.1, min(speed_factor, 5.0))
target_len = int(
original_len / speed_factor
) # Target length based on speed_factor
if (
target_len != original_len and target_len > 0
): # Only interpolate if length changes and is valid
x_original = np.arange(original_len)
x_resampled = np.linspace(0, original_len - 1, target_len)
output_audio_np = np.interp(x_resampled, x_original, output_audio_np)
output_audio_np = output_audio_np.astype(np.float32)
return output_audio_np
def get_speakers(self):
t = [{'value':s, 'text':s} for s in self.speakers.keys() ]
t.append({'value':'main', 'text':'main'})
return t
async def split_text(self, text_gen, speaker):
chunks = split_text_with_dialog_preserved(text_gen)
debug(f'{len(chunks)=}')
# reg2 = self.config.speaker_match
reg2 = r"\[\[(\w+)\]\]"
ret = []
for text in chunks:
if text == ['\r', '']:
continue
lang = await awaitify(detect_language)(text)
if lang == 'zh':
text = await awaitify(cn2an.transform)(text, 'an2cn')
voice = speaker
match = re.match(reg2, text)
if match:
voice = match[1]
if voice not in self.voices:
voice = speaker
text = re.sub(reg2, "", text)
gen_text = text.strip()
ref_audio = self.voices[voice]["ref_audio"]
ref_text = self.voices[voice]["ref_text"]
ret.append({'text':gen_text, 'ref_audio':ref_audio, 'ref_text':ref_text})
return ret
async def infer_stream(self, prompt, speaker, speed_factor=1.0):
async for a in self._inference_stream(prompt, speaker, speed_factor=speed_factor):
wavdata = a['audio']
samplerate = a['sample_rate']
b = await async_write_wav_buffer(wavdata, 1, samplerate)
yield b
async def _inference_stream(self, prompt, speaker, speed_factor=1.0):
text_gen = prompt
chunks = await self.split_text(prompt, speaker)
debug(f'{len(chunks)=}')
for chunk in chunks:
gen_text = chunk['text']
ref_audio = chunk['ref_audio']
ref_text = chunk['ref_text']
infer = awaitify(self.f5tts_infer)
try:
d = await infer(ref_audio, ref_text, gen_text, speed_factor)
if d is not None:
yield d
except:
debug(f'{gen_text=} inference error\n{format_exc()}')
async def inference_stream(self, prompt, speaker, speed_factor=1.0):
total_duration = 0
async for d in self._inference_stream(prompt, speaker, speed_factor=speed_factor):
sampels = d['audio'].shape[0]
duration = samples / d['sample_rate']
audio_b64=audio_ndarray_to_base64(d['audio'], d['sample_rate'])
d['audio'] = audio_b64
d['duration'] = duration
d['done'] = False
txt = json.dumps(d, ensure_ascii=False)
yield txt + '\n'
d = {
'done': True,
'duration': total_duration
}
txt = json.dumps(d, ensure_ascii=False)
yield txt + '\n'
def setup_voices(self):
config = getConfig()
workdir = config.workdir
print('workdir=', workdir)
d = None
with codecs.open(config.speakers_file, 'r', 'utf-8') as f:
b = f.read()
self.speakers = json.loads(b)
fn = f'{workdir}/samples/{config.ref_audio}'
ref_audio, ref_text = preprocess_ref_audio_text(fn,
config.ref_text)
self.voices = {
"main":{
'ref_text':ref_text,
'ref_audio':ref_audio
}
}
for k,v in self.speakers.items():
fn = f'{workdir}/samples/{v["ref_audio"]}'
ref_audio, ref_text = preprocess_ref_audio_text(fn,
v['ref_text'])
self.voices[k] = {
'ref_text':ref_text,
'ref_audio':ref_audio
}
def copyfile(self, src, dest):
with open(src, 'rb') as f:
b = f.read()
with open(dest, 'wb') as f1:
f1.write(b)
async def add_voice(self, speaker, ref_audio, ref_text):
config = getConfig()
ref_audio = FileStorage().realPath(ref_audio)
workdir = config.workdir
filename = f'{getID()}.wav'
fn = f'{workdir}/samples/{filename}'
await awaitify(self.copyfile)(ref_audio, fn)
os.unlink(ref_adio)
self.speakers[speaker] = {
'ref_text':ref_text,
'ref_audio':filename
}
f = awaitify(preprocess_ref_audio_text)
ref_audio, ref_text = await f(ref_audio, ref_text)
self.voices[speaker] = {
'ref_text':ref_text,
'ref_audio':filename
}
with codecs.open(config.speakers_file, 'w', 'utf-8') as f:
f.write(json.dumps(self.speakers, indent=4, ensure_ascii=False))
return None
async def _inference(self, prompt, speaker, speed_factor=1.0):
generated_audio_segments = []
remove_silence = self.config.remove_silence or False
final_sample_rate = 16000
async for d in self._inference_stream(prompt,
speaker,
speed_factor=speed_factor):
audio = d.get('audio', None)
if audio is None:
debug(f'audio is none, {d=}')
continue
final_sample_rate = d['sample_rate']
generated_audio_segments.append(audio)
if generated_audio_segments:
final_wave = np.concatenate(generated_audio_segments)
debug(f'{prompt=}, {final_sample_rate=}')
return await async_write_wav_buffer(final_wave, 1, final_sample_rate)
else:
debug(f'{prompt=} not audio generated')
def UiError(title="出错", message="出错啦", timeout=5):
return {
"widgettype":"Error",
"options":{
"author":"tr",
"timeout":timeout,
"cwidth":15,
"cheight":10,
"title":title,
"auto_open":True,
"auto_dismiss":True,
"auto_destroy":True,
"message":message
}
}
def UiMessage(title="消息", message="后台消息", timeout=5):
return {
"widgettype":"Message",
"options":{
"author":"tr",
"timeout":timeout,
"cwidth":15,
"cheight":10,
"title":title,
"auto_open":True,
"auto_dismiss":True,
"auto_destroy":True,
"message":message
}
}
def test1():
sleep(36000)
return {}
f5 = None
def init():
global f5
g = ServerEnv()
f5 = F5TTS()
g.tts_engine = f5
g.infer_stream = f5.infer_stream
g.inference_stream = f5.inference_stream
g.get_speakers = f5.get_speakers
g.infer = f5._inference
g.test1 = awaitify(test1)
g.add_voice = f5.add_voice
g.UiError = UiError
g.filelaoder = fileloader
g.UiMessage = UiMessage
if __name__ == '__main__':
webapp(init)

14
app/w4a2wav.py Normal file
View File

@ -0,0 +1,14 @@
import os
import sys
from pydub import AudioSegment
if len(sys.argv) < 2:
exit(1)
m4afn = sys.argv[1]
wavfn = m4afn[:-3] + 'wav'
# Load the m4a file
audio = AudioSegment.from_file(m4afn, format="m4a")
# Export the audio as a wav file
audio.export(wavfn, format="wav")

87
conf/config.json Normal file
View File

@ -0,0 +1,87 @@
{
"speaker_match":"\\[\\[(\\w+)\\]\\]",
"language":{
"zh":{
"sentence_splitter":"[。?!]|\r?\n"
},
"en":{
"sentence_splitter":"[.?!] |\r?\n"
}
},
"sample_rate":16000,
"vocab_file":"",
"ckpts_path_bak":"/share/models/SWivid/F5-TTS/F5TTS_Base/model_1200000.pt",
"ckpts_path":"/share/models/SWivid/F5-TTS/F5TTS_v1_Base/model_1250000.safetensors",
"speakers_file":"$[workdir]$/conf/speakers.json",
"vocoder_name":"vocos",
"vocoder_local_path":"/share/models/charactr/vocos-mel-24khz",
"remove_silence":false,
"modelname":"F5-TTS",
"device":"cuda:0",
"ref_audio":"ttt.wav",
"ref_text":"快点吃饭,上课要迟到了。",
"cross_fade_duration":0,
"workdir":"$[workdir]$",
"filesroot":"$[workdir]$/files",
"logger":{
"name":"f5tts",
"levelname":"info",
"logfile":"$[workdir]$/logs/f5tts.log"
},
"website":{
"paths":[
["$[workdir]$/wwwroot",""]
],
"client_max_size":10000,
"host":"0.0.0.0",
"port":9995,
"coding":"utf-8",
"ssl_gg":{
"crtfile":"$[workdir]$/conf/www.bsppo.com.pem",
"keyfile":"$[workdir]$/conf/www.bsppo.com.key"
},
"indexes":[
"index.html",
"index.tmpl",
"index.ui",
"index.dspy",
"index.md"
],
"startswiths":[
{
"leading":"/idfile",
"registerfunction":"idfile"
}
],
"processors":[
[".ws","ws"],
[".xterm","xterm"],
[".proxy","proxy"],
[".llm", "llm"],
[".llms", "llms"],
[".llma", "llma"],
[".xlsxds","xlsxds"],
[".sqlds","sqlds"],
[".tmpl.js","tmpl"],
[".tmpl.css","tmpl"],
[".html.tmpl","tmpl"],
[".bcrud", "bricks_crud"],
[".tmpl","tmpl"],
[".app","app"],
[".bui","bui"],
[".ui","bui"],
[".dspy","dspy"],
[".md","md"]
],
"rsakey":{
"privatekey":"$[workdir]$/conf/rsa_private_key.pem",
"publickey":"$[workdir]$/conf/rsa_public_key.pem"
},
"session_max_time":3000,
"session_issue_time":2500,
"session_redis_notuse":{
"url":"redis://127.0.0.1:6379"
}
}
}

6
conf/speakers.json Normal file
View File

@ -0,0 +1,6 @@
{
"ymq": {
"ref_text": "\u8f7b\u91cf\u5e94\u7528\u670d\u52a1\u5668\u5907\u6848\u6761\u4ef6\uff1a\u8d2d\u4e70\u65f6\u957f\u57283\u4e2a\u6708\u53ca\u4ee5\u4e0a",
"ref_audio": "ymq.wav"
}
}

16
f5tts.service Normal file
View File

@ -0,0 +1,16 @@
[Unit]
Wants=systemd-networkd.service
[Service]
User=ymq
Group=ymq
WorkingDirectory=/share/ymq/run/f5tts
Type=forking
ExecStart=/share/ymq/run/f5tts/start.sh
ExecStop=/share/ymq/run/f5tts/stop.sh
StandardOutput=append:/var/log/f5tts/f5tts.log
StandardError=append:/var/log/f5tts/f5tts.log
SyslogIdentifier=f5tts
[Install]
WantedBy=multi-user.target

62
install Executable file
View File

@ -0,0 +1,62 @@
#!/usr/bin/python3
import os
import sys
import codecs
if len(sys.argv) < 2:
print(f'Usage:\n{sys.argv[0]} venvname')
sys.exit(1)
user = os.getlogin()
home = os.environ.get('HOME')
venv = sys.argv[1]
if not os.path.exists(f'{home}/{venv}'):
os.system(f'python3 -m venv ~/{venv}')
pwd = os.getcwd()
name = os.path.basename(pwd)
service = f"""[Unit]
Description={name} service
Wants=systemd-networkd.service
Requires=nginx.service
[Service]
Type=forking
ExecStart=su - {user} -c "{pwd}/script/{name}.sh"
ExecStop=su - {user} "{home}/bin/killname app/{name}.py"
[Install]
WantedBy=multi-user.target
"""
with codecs.open(f'./script/{name}.service', 'w', 'utf-8') as f:
f.write(service)
with codecs.open(f'./script/{name}.sh', 'w', 'utf-8') as f:
f.write(f"""#!/usr/bin/bash
killname {pwd}/app/{name}.py
{home}/{venv}/bin/python {pwd}/app/{name}.py -w {pwd} > {pwd}/logs/stderr.log 2>&1 &
exit 0
""")
with codecs.open(f'./script/install.sh', 'w', 'utf-8') as f:
f.write(f"""#!/usr/bin/bash
sudo cp {name}.service /etc/systemd/system
sudo systemctl enable {name}.service
sudo systemctl start {name}
""")
if not os.path.exists(f'{home}/bin'):
os.mkdir(f'{home}/bin')
if not os.path.exists(f'{home}/bin/killname'):
with codecs.open(f'{home}/bin/killname', 'w', 'utf-8') as f:
f.write("""#!/usr/bin/bash
ps -ef|grep "$1"|grep -v grep|awk '{print("kill -9", $2)}'|sh
""")
os.system(f'chmod +x {pwd}/bin/*')
os.system(f'chmod +x {pwd}/script/*.sh')
os.system(f'{pwd}/script/install.sh')

0
logs/stderr.log Normal file
View File

11
requirements.txt Normal file
View File

@ -0,0 +1,11 @@
numpy
soundfile
cached_path
redis
pycld2
cn2an
git+https://git.kaiyuancloud.cn/yumoqing/apppublic
git+https://git.kaiyuancloud.cn/yumoqing/sqlor
git+https://git.kaiyuancloud.cn/yumoqing/ahserver
git+https://git.kaiyuancloud.cn/yumoqing/filetxt
# git+https://github.com/SWivid/F5-TTS

BIN
samples/test2.m4a Normal file

Binary file not shown.

Binary file not shown.

Binary file not shown.

BIN
samples/ttt.m4a Normal file

Binary file not shown.

BIN
samples/ttt.wav Normal file

Binary file not shown.

BIN
samples/ymq.wav Normal file

Binary file not shown.

10
script/f5tts.service Normal file
View File

@ -0,0 +1,10 @@
[Unit]
Wants=systemd-networkd.service
[Service]
User=ymq
Group=ymq
WorkingDirectory=/share/ymq/run/f5tts
ExecStart=/share/ymq/run/f5tts/f5tts.env/bin/python app/f5tts.py -p 9995
[Install]
WantedBy=multi-user.target

5
script/f5tts.sh Executable file
View File

@ -0,0 +1,5 @@
#!/usr/bin/bash
/d/f5tts/bin/killname /data/f5tts/app/f5tts.py
/d/f5tts/py3/bin/python /data/f5tts/app/f5tts.py -w /data/f5tts > /data/f5tts/logs/stderr.log 2>&1 &
exit 0

4
script/install.sh Executable file
View File

@ -0,0 +1,4 @@
#!/usr/bin/bash
sudo cp f5tts.service /etc/systemd/system
sudo systemctl enable f5tts.service
sudo systemctl start f5tts

3
script/speakers.json Normal file
View File

@ -0,0 +1,3 @@
{
}

10
start.sh Executable file
View File

@ -0,0 +1,10 @@
#!/usr/bin/bash
echo start 3 instances for f5tts engine
rundir=/share/ymq/run/f5tts
CUDA_VISIBLE_DEVICES=6 ${rundir}/f5tts.env/bin/python ${rundir}/app/f5tts.py -w ${rundir} -p 9995 &
CUDA_VISIBLE_DEVICES=6 ${rundir}/f5tts.env/bin/python ${rundir}/app/f5tts.py -w ${rundir} -p 9995 &
CUDA_VISIBLE_DEVICES=6 ${rundir}/f5tts.env/bin/python ${rundir}/app/f5tts.py -w ${rundir} -p 9995 &
CUDA_VISIBLE_DEVICES=6 ${rundir}/f5tts.env/bin/python ${rundir}/app/f5tts.py -w ${rundir} -p 9995 &
exit 0

4
stop.sh Executable file
View File

@ -0,0 +1,4 @@
#!/usr/bin/bash
killname f5tts.py

BIN
wwwroot/.tts.ui.swp Normal file

Binary file not shown.

31
wwwroot/add.ui Normal file
View File

@ -0,0 +1,31 @@
{
"widgettype":"Form",
"options":{
"height":"70%",
"title":"向知识库添加文件",
"description":"可以添加的文件类型有:文本文件(.txt数据文件(.csv)excel文件.xlsx, .xls)word文件.doc, .docx), 演示文件(.ppt, .pptx), pdf文件",
"method":"POST",
"submit_url":"{{entire_url('v1/add')}}",
"fields":[
{
"name":"file_path",
"uitype":"file",
"required":true
},
{
"name":"userid",
"uitype":"str",
"label":"用户id",
"value":"user1",
"required":true
},
{
"name":"kdbname",
"uitype":"str",
"label":"知识库名",
"required":true,
"value":"testdb"
}
]
}
}

41
wwwroot/addvoice.ui Normal file
View File

@ -0,0 +1,41 @@
{
"widgettype":"VBox",
"options":{
"height":"100%"
},
"subwidgets":[
{
"widgettype":"Filler",
"options":{},
"subwidgets":[
{
"widgettype":"Form",
"id":"form",
"options":{
"title":"添加播音员",
"method":"POST",
"description":"通过输入播音员id录音和录音文字说明来添加播音员",
"submit_url":"{{entire_url('/v1/addvoice')}}",
"fields":[
{
"name":"speaker",
"label":"播音员id",
"uitype":"str"
},
{
"name":"ref_voice",
"label":"语音",
"uitype":"audiorecorder"
},
{
"name":"ref_text",
"label":"语音文字",
"uitype":"text"
}
]
}
}
]
}
]
}

View File

@ -0,0 +1 @@
return get_speakers()

45
wwwroot/index.ui Normal file
View File

@ -0,0 +1,45 @@
{
"widgettype":"TabPanel",
"options":{
"tab_wide":"auto",
"interval":"15px",
"height":"100%",
"width":"100%",
"tab_pos":"top",
"items":[
{
"name":"add",
"label":"文本转语音",
"refresh":true,
"content":{
"widgettype":"urlwidget",
"options":{
"url":"{{entire_url('tts.ui')}}"
}
}
},
{
"name":"add1",
"label":"文本转语音(stream)",
"refresh":true,
"content":{
"widgettype":"urlwidget",
"options":{
"url":"{{entire_url('tts_stream.ui')}}"
}
}
},
{
"name":"query",
"label":"添加播音员",
"refresh":true,
"content":{
"widgettype":"urlwidget",
"options":{
"url":"{{entire_url('addvoice.ui')}}"
}
}
}
]
}
}

10
wwwroot/js/myapp.js Normal file
View File

@ -0,0 +1,10 @@
var set_response_text_url = function(w, resp){
schedule_once(async_set_response_text_url.bind(w, w, resp), 0.1);
}
var async_set_response_text_url = async function(w, resp){
console.log('arguments=', arguments);
var url = await resp.text();
w.set_url(url);
w.play();
}

28
wwwroot/query.ui Normal file
View File

@ -0,0 +1,28 @@
{
"widgettype":"Form",
"options":{
"height":"70%",
"submit_url":"{{entire_url('v1/query')}}",
"fields":[
{
"name":"prompt",
"uitype":"text",
"required":true
},
{
"name":"userid",
"uitype":"str",
"label":"用户id",
"value":"user1",
"required":true
},
{
"name":"kdbname",
"uitype":"str",
"label":"知识库名",
"required":true,
"value":"testdb"
}
]
}
}

1
wwwroot/t.dspy Normal file
View File

@ -0,0 +1 @@
return entire_url('/idfile') + "?path=/trhr"

1
wwwroot/test1.dspy Normal file
View File

@ -0,0 +1 @@
return await test1()

56
wwwroot/tts.ui Normal file
View File

@ -0,0 +1,56 @@
{
"widgettype":"VBox",
"options":{
"height":"100%"
},
"subwidgets":[
{
"widgettype":"Filler",
"options":{},
"subwidgets":[
{
"widgettype":"Form",
"id":"form",
"options":{
"submit_url":"{{entire_url('/v1/inference')}}",
"fields":[
{
"name":"speaker",
"label":"播音员",
"uitype":"code",
"value":"main",
"dataurl":"{{entire_url('/get_speakers.dspy')}}"
},
{
"name":"prompt",
"label":"文本",
"uitype":"text",
"uiparams":{
"rows":20,
"cols":80
}
}
]
}
}
]
},
{
"id":"audio",
"widgettype":"AudioPlayer",
"options":{
"height":"40px",
"auto_play":true
}
}
],
"binds":[
{
"wid":"form",
"event":"submited",
"actiontype":"script",
"target":"audio",
"script":"set_response_text_url(this, event.params);"
}
]
}

53
wwwroot/tts_stream.ui Normal file
View File

@ -0,0 +1,53 @@
{
"widgettype":"HBox",
"options":{
"height":"100%"
},
"subwidgets":[
{
"widgettype":"Form",
"id":"form",
"options":{
"width":"50%",
"title":"流式返回",
"submit_url":"{{entire_url('/v1/infer_stream')}}",
"fields":[
{
"name":"speaker",
"label":"播音员",
"uitype":"code",
"value":"main",
"dataurl":"{{entire_url('/get_speakers.dspy')}}"
},
{
"name":"prompt",
"label":"文本",
"uitype":"text",
"uiparams":{
"rows":20,
"cols":80
}
}
]
}
},
{
"id":"audio",
"widgettype":"TextedAudioPlayer",
"options":{
"width": "50%",
"height":"100%",
"auto_play":true
}
}
],
"binds":[
{
"wid":"form",
"event":"submited",
"actiontype":"script",
"target":"audio",
"script":"console.log('this=', this, event);this.set_stream_urls(event.params)"
}
]
}

View File

@ -0,0 +1,11 @@
debug(f'{params_kw=}')
try:
speaker = params_kw.speaker
ref_audio = params_kw.ref_voice
ref_text = params_kw.ref_text
await add_voice(speaker, ref_audio, ref_text)
return UiMessage(title='Success', message='add voice success')
except Exception as e:
exception(f'{e=}')
return UiError(title='Error', message='add voice error')

21
wwwroot/v1/index.md Normal file
View File

@ -0,0 +1,21 @@
# API for F5TTS wraped web server
we apply following apis
## addvoice
* path: /v1/add_voice
* method: POST
* form data:
1 ref_text: text
2 ref_audio: vocal audio
3 speaker: speaker name for ref_audio voice
examples
```
curl .../v1/add_voice \
-F "speaker=Trump" \
-F "ref_text=today is a good day" \
-F "ref_audio=@goodday.wav"
```

8
wwwroot/v1/index.ui Normal file
View File

@ -0,0 +1,8 @@
{
"widgettype":"MdWidget",
"options":{
"height":"100%",
"width":"100%",
"md_url":"{{entire_url('index.md')}}"
}
}

View File

@ -0,0 +1,7 @@
debug(f'{params_kw=}')
async def g():
speaker = params_kw.speaker or 'main'
async for d in inference_stream(params_kw.prompt, speaker):
yield d
return await stream_response(request, g)

View File

@ -0,0 +1,7 @@
# normal mode
debug(f'{params_kw=}')
speaker = params_kw.speaker or 'main'
path = await infer(params_kw.prompt, speaker)
ret = entire_url(f'/idfile?path={path}')
debug(f'inference/index.dspy:return url={ret}')
return ret