commit f5c325b96c42c5d72870b4eb23117164d7e4985a Author: yumoqing Date: Wed Jul 16 15:07:03 2025 +0800 first commit diff --git a/README.md b/README.md new file mode 100644 index 0000000..1a162fc --- /dev/null +++ b/README.md @@ -0,0 +1,90 @@ +# 包装F5-TTS为一个服务 + +## 依赖 +分为系统级依赖和python依赖 + +### 操作系统级别 +* 安装FFMpeg +``` +sudo apt install ffmpeg +``` +* 安装GPU卡驱动 +英伟达驱动按照显卡类型到[英伟达网站](https://www.nvidia.com/en-us/drivers/details/241089/)下载后,执行 +``` +sudo sh 下载的驱动文件 +``` + +### python依赖 +* 安装F5-tts相关依赖 +``` +pip install torch==2.3.0+cu118 torchaudio==2.3.0+cu118 --extra-index-url https://download.pytorch.org/whl/cu118 +``` + +* 安装F5-TTS +``` +pip install git+https://github.com/SWivid/F5-TTS.git +``` + +* 安装其他依赖 +``` +pip install git+https://git.kaiyuancloud.cn/yumoqing/apppublic.git +pip install git+https://git.kaiyuancloud.cn/yumoqing/sqlor.git +pip install git+https://git.kaiyuancloud.cn/yumoqing/ahserver.git +pip install cn2an pycld2 +``` +## 安装与部署 +执行下列步骤 +* 安装操作系统依赖 +* 添加操作系统用户f5tts +* 登录f5tts用户 +* 安装python依赖 +* 克隆项目 +* 运行环境设置 +* 启动 +* 停止 + +* 克隆项目 +``` +cd ~ +git clone git@git.kaiyuancloud.cn:yumoqing/f5tts.git +mv f5tts/* ~ +rm -rf f5tts +``` + +* 运行环境设置 +``` +cd script +bash install.sh +``` + +* 启动 +``` +sudo systemctl start f5tts +``` +* 停止 +``` +killname f5tts.py +``` + +## API + +### 添加播音员 + +url:/api/addvoice +方法:POST +上传数据 +speaker:播音员名称 +ref_audio:声音文件 +ref_text: 录音相应的文字 +返回:添加成功返回UiMessage的json文件 + 出错则返回UiError的json文件 + +### TTS转换 +url:/api/inference +方法:POST +数据: +prompt:正文文本 +speaker:播音员名 + +### TTS流式转换 + diff --git a/app/f5tts.py b/app/f5tts.py new file mode 100644 index 0000000..e0e5f0d --- /dev/null +++ b/app/f5tts.py @@ -0,0 +1,349 @@ +import os +import io +import base64 +import sys +import asyncio +import codecs +from traceback import format_exc +import re + +import numpy as np +import soundfile as sf +# import tomli +from cached_path import cached_path +from appPublic.textsplit import split_text_with_dialog_preserved +from appPublic.uniqueID import getID +from ahserver.serverenv import get_serverenv +from filetxt.loader import fileloader +import pycld2 as cld +import cn2an + +from f5_tts.model import DiT, UNetT +from f5_tts.infer.utils_infer import ( + mel_spec_type, + target_rms, + cross_fade_duration, + nfe_step, + cfg_strength, + sway_sampling_coef, + speed, + fix_duration, + infer_process, + load_model, + load_vocoder, + preprocess_ref_audio_text, + remove_silence_for_generated_wav, +) + +import json +from time import time, sleep +from appPublic.dictObject import DictObject +from appPublic.folderUtils import temp_file +from appPublic.jsonConfig import getConfig +from appPublic.worker import awaitify +from appPublic.uniqueID import getID +from appPublic.log import debug, info +from appPublic.background import Background +from ahserver.webapp import webapp +from ahserver.serverenv import ServerEnv +from ahserver.filestorage import FileStorage + +n_mel_channels = 100 +hop_length = 256 +target_rms = 0.1 +nfe_step = 32 # 16, 32 +cfg_strength = 2.0 +ode_method = "euler" +sway_sampling_coef = -1.0 +speed = 1.0 + +def audio_ndarray_to_base64(waveform: np.ndarray, sample_rate: int = 16000) -> str: + # 如果是单通道,确保 shape 为 (samples, 1) + if waveform.ndim == 1: + waveform = waveform.reshape(-1, 1) + + # 写入内存 buffer(WAV 格式) + buffer = io.BytesIO() + sf.write(buffer, waveform, samplerate=sample_rate, format='WAV') + buffer.seek(0) + + # base64 编码 + b64_audio = base64.b64encode(buffer.read()).decode('utf-8') + return b64_audio + +def write_wav_buffer(wav, nchannels, framerate): + fs = FileStorage() + fn = fs._name2path(f'{getID()}.wav', userid='tmp') + os.makedirs(os.path.dirname(fn)) + debug(fn) + with open(fn, "wb") as f: + sf.write(f.name, wav, framerate) + return fs.webpath(fn) + +async_write_wav_buffer = awaitify(write_wav_buffer) + +def detect_language(txt): + isReliable, textBytesFound, details = cld.detect(txt) + debug(f' detect_language():{isReliable=}, {textBytesFound=}, {details=} ') + return details[0][1] + +class F5TTS: + def __init__(self): + self.config = getConfig() + # self.vocos = load_vocoder(is_local=True, local_path="../checkpoints/charactr/vocos-mel-24khz") + self.load_model() + self.setup_voices() + + def load_model(self): + self.vocoder = load_vocoder(vocoder_name=self.config.vocoder_name, + is_local=True, + local_path=self.config.vocoder_local_path) + + # load models + ckpt_file = '' + model_cls = DiT + model_cfg = dict(dim=1024, depth=22, heads=16, + ff_mult=2, text_dim=512, conv_layers=4) + ckpt_file = self.config.ckpts_path + self.model = load_model(model_cls, model_cfg, ckpt_file, + mel_spec_type=self.config.vocoder_name, + vocab_file=self.config.vocab_file) + self.model = self.model.to(self.config.device) + self.lock = asyncio.Lock() + + def f5tts_infer(self, ref_audio, ref_text, gen_text, speed_factor): + audio, final_sample_rate, spectragram = \ + infer_process(ref_audio, + ref_text, + gen_text, + self.model, + self.vocoder, + mel_spec_type=self.config.vocoder_name, + speed=self.config.speed or speed) + if audio is not None: + audio = self.speed_convert(audio, speed_factor) + else: + return None + debug(f'audio shape {audio.shape}, {gen_text=}') + return { + 'text': gen_text, + 'audio':audio, + 'sample_rate':final_sample_rate + } + + def speed_convert(self, output_audio_np, speed_factor): + original_len = len(output_audio_np) + speed_factor = max(0.1, min(speed_factor, 5.0)) + target_len = int( + original_len / speed_factor + ) # Target length based on speed_factor + if ( + target_len != original_len and target_len > 0 + ): # Only interpolate if length changes and is valid + x_original = np.arange(original_len) + x_resampled = np.linspace(0, original_len - 1, target_len) + output_audio_np = np.interp(x_resampled, x_original, output_audio_np) + output_audio_np = output_audio_np.astype(np.float32) + return output_audio_np + + def get_speakers(self): + t = [{'value':s, 'text':s} for s in self.speakers.keys() ] + t.append({'value':'main', 'text':'main'}) + return t + + async def split_text(self, text_gen, speaker): + chunks = split_text_with_dialog_preserved(text_gen) + debug(f'{len(chunks)=}') + # reg2 = self.config.speaker_match + reg2 = r"\[\[(\w+)\]\]" + ret = [] + for text in chunks: + if text == ['\r', '']: + continue + lang = await awaitify(detect_language)(text) + if lang == 'zh': + text = await awaitify(cn2an.transform)(text, 'an2cn') + voice = speaker + match = re.match(reg2, text) + if match: + voice = match[1] + if voice not in self.voices: + voice = speaker + text = re.sub(reg2, "", text) + gen_text = text.strip() + ref_audio = self.voices[voice]["ref_audio"] + ref_text = self.voices[voice]["ref_text"] + ret.append({'text':gen_text, 'ref_audio':ref_audio, 'ref_text':ref_text}) + return ret + + async def infer_stream(self, prompt, speaker, speed_factor=1.0): + async for a in self._inference_stream(prompt, speaker, speed_factor=speed_factor): + wavdata = a['audio'] + samplerate = a['sample_rate'] + b = await async_write_wav_buffer(wavdata, 1, samplerate) + yield b + + async def _inference_stream(self, prompt, speaker, speed_factor=1.0): + text_gen = prompt + chunks = await self.split_text(prompt, speaker) + debug(f'{len(chunks)=}') + for chunk in chunks: + gen_text = chunk['text'] + ref_audio = chunk['ref_audio'] + ref_text = chunk['ref_text'] + infer = awaitify(self.f5tts_infer) + try: + d = await infer(ref_audio, ref_text, gen_text, speed_factor) + if d is not None: + yield d + except: + debug(f'{gen_text=} inference error\n{format_exc()}') + + async def inference_stream(self, prompt, speaker, speed_factor=1.0): + total_duration = 0 + async for d in self._inference_stream(prompt, speaker, speed_factor=speed_factor): + sampels = d['audio'].shape[0] + duration = samples / d['sample_rate'] + audio_b64=audio_ndarray_to_base64(d['audio'], d['sample_rate']) + d['audio'] = audio_b64 + d['duration'] = duration + d['done'] = False + txt = json.dumps(d, ensure_ascii=False) + yield txt + '\n' + d = { + 'done': True, + 'duration': total_duration + } + txt = json.dumps(d, ensure_ascii=False) + yield txt + '\n' + + def setup_voices(self): + config = getConfig() + workdir = config.workdir + print('workdir=', workdir) + d = None + with codecs.open(config.speakers_file, 'r', 'utf-8') as f: + b = f.read() + self.speakers = json.loads(b) + fn = f'{workdir}/samples/{config.ref_audio}' + ref_audio, ref_text = preprocess_ref_audio_text(fn, + config.ref_text) + self.voices = { + "main":{ + 'ref_text':ref_text, + 'ref_audio':ref_audio + } + } + for k,v in self.speakers.items(): + fn = f'{workdir}/samples/{v["ref_audio"]}' + ref_audio, ref_text = preprocess_ref_audio_text(fn, + v['ref_text']) + self.voices[k] = { + 'ref_text':ref_text, + 'ref_audio':ref_audio + } + + def copyfile(self, src, dest): + with open(src, 'rb') as f: + b = f.read() + with open(dest, 'wb') as f1: + f1.write(b) + + async def add_voice(self, speaker, ref_audio, ref_text): + config = getConfig() + ref_audio = FileStorage().realPath(ref_audio) + workdir = config.workdir + filename = f'{getID()}.wav' + fn = f'{workdir}/samples/{filename}' + await awaitify(self.copyfile)(ref_audio, fn) + os.unlink(ref_adio) + self.speakers[speaker] = { + 'ref_text':ref_text, + 'ref_audio':filename + } + f = awaitify(preprocess_ref_audio_text) + ref_audio, ref_text = await f(ref_audio, ref_text) + self.voices[speaker] = { + 'ref_text':ref_text, + 'ref_audio':filename + } + with codecs.open(config.speakers_file, 'w', 'utf-8') as f: + f.write(json.dumps(self.speakers, indent=4, ensure_ascii=False)) + return None + + async def _inference(self, prompt, speaker, speed_factor=1.0): + generated_audio_segments = [] + remove_silence = self.config.remove_silence or False + final_sample_rate = 16000 + async for d in self._inference_stream(prompt, + speaker, + speed_factor=speed_factor): + audio = d.get('audio', None) + if audio is None: + debug(f'audio is none, {d=}') + continue + final_sample_rate = d['sample_rate'] + generated_audio_segments.append(audio) + + if generated_audio_segments: + final_wave = np.concatenate(generated_audio_segments) + debug(f'{prompt=}, {final_sample_rate=}') + return await async_write_wav_buffer(final_wave, 1, final_sample_rate) + else: + debug(f'{prompt=} not audio generated') + +def UiError(title="出错", message="出错啦", timeout=5): + return { + "widgettype":"Error", + "options":{ + "author":"tr", + "timeout":timeout, + "cwidth":15, + "cheight":10, + "title":title, + "auto_open":True, + "auto_dismiss":True, + "auto_destroy":True, + "message":message + } + } + + +def UiMessage(title="消息", message="后台消息", timeout=5): + return { + "widgettype":"Message", + "options":{ + "author":"tr", + "timeout":timeout, + "cwidth":15, + "cheight":10, + "title":title, + "auto_open":True, + "auto_dismiss":True, + "auto_destroy":True, + "message":message + } + } + +def test1(): + sleep(36000) + return {} + +f5 = None +def init(): + global f5 + g = ServerEnv() + f5 = F5TTS() + g.tts_engine = f5 + g.infer_stream = f5.infer_stream + g.inference_stream = f5.inference_stream + g.get_speakers = f5.get_speakers + g.infer = f5._inference + g.test1 = awaitify(test1) + g.add_voice = f5.add_voice + g.UiError = UiError + g.filelaoder = fileloader + g.UiMessage = UiMessage + +if __name__ == '__main__': + webapp(init) diff --git a/app/w4a2wav.py b/app/w4a2wav.py new file mode 100644 index 0000000..18d26ce --- /dev/null +++ b/app/w4a2wav.py @@ -0,0 +1,14 @@ +import os +import sys +from pydub import AudioSegment + +if len(sys.argv) < 2: + exit(1) + +m4afn = sys.argv[1] +wavfn = m4afn[:-3] + 'wav' +# Load the m4a file +audio = AudioSegment.from_file(m4afn, format="m4a") + +# Export the audio as a wav file +audio.export(wavfn, format="wav") diff --git a/conf/config.json b/conf/config.json new file mode 100644 index 0000000..375b476 --- /dev/null +++ b/conf/config.json @@ -0,0 +1,87 @@ +{ + "speaker_match":"\\[\\[(\\w+)\\]\\]", + "language":{ + "zh":{ + "sentence_splitter":"[。?!]|\r?\n" + }, + "en":{ + "sentence_splitter":"[.?!] |\r?\n" + } + }, + "sample_rate":16000, + "vocab_file":"", + "ckpts_path_bak":"/share/models/SWivid/F5-TTS/F5TTS_Base/model_1200000.pt", + "ckpts_path":"/share/models/SWivid/F5-TTS/F5TTS_v1_Base/model_1250000.safetensors", + "speakers_file":"$[workdir]$/conf/speakers.json", + "vocoder_name":"vocos", + "vocoder_local_path":"/share/models/charactr/vocos-mel-24khz", + "remove_silence":false, + "modelname":"F5-TTS", + "device":"cuda:0", + "ref_audio":"ttt.wav", + "ref_text":"快点吃饭,上课要迟到了。", + "cross_fade_duration":0, + "workdir":"$[workdir]$", + "filesroot":"$[workdir]$/files", + "logger":{ + "name":"f5tts", + "levelname":"info", + "logfile":"$[workdir]$/logs/f5tts.log" + }, + "website":{ + "paths":[ + ["$[workdir]$/wwwroot",""] + ], + "client_max_size":10000, + "host":"0.0.0.0", + "port":9995, + "coding":"utf-8", + "ssl_gg":{ + "crtfile":"$[workdir]$/conf/www.bsppo.com.pem", + "keyfile":"$[workdir]$/conf/www.bsppo.com.key" + }, + "indexes":[ + "index.html", + "index.tmpl", + "index.ui", + "index.dspy", + "index.md" + ], + "startswiths":[ + { + "leading":"/idfile", + "registerfunction":"idfile" + } + ], + "processors":[ + [".ws","ws"], + [".xterm","xterm"], + [".proxy","proxy"], + [".llm", "llm"], + [".llms", "llms"], + [".llma", "llma"], + [".xlsxds","xlsxds"], + [".sqlds","sqlds"], + [".tmpl.js","tmpl"], + [".tmpl.css","tmpl"], + [".html.tmpl","tmpl"], + [".bcrud", "bricks_crud"], + [".tmpl","tmpl"], + [".app","app"], + [".bui","bui"], + [".ui","bui"], + [".dspy","dspy"], + [".md","md"] + ], + "rsakey":{ + "privatekey":"$[workdir]$/conf/rsa_private_key.pem", + "publickey":"$[workdir]$/conf/rsa_public_key.pem" + }, + "session_max_time":3000, + "session_issue_time":2500, + "session_redis_notuse":{ + "url":"redis://127.0.0.1:6379" + } + } +} + diff --git a/conf/speakers.json b/conf/speakers.json new file mode 100644 index 0000000..7aa3bee --- /dev/null +++ b/conf/speakers.json @@ -0,0 +1,6 @@ +{ + "ymq": { + "ref_text": "\u8f7b\u91cf\u5e94\u7528\u670d\u52a1\u5668\u5907\u6848\u6761\u4ef6\uff1a\u8d2d\u4e70\u65f6\u957f\u57283\u4e2a\u6708\u53ca\u4ee5\u4e0a", + "ref_audio": "ymq.wav" + } +} diff --git a/f5tts.service b/f5tts.service new file mode 100644 index 0000000..eefb6e7 --- /dev/null +++ b/f5tts.service @@ -0,0 +1,16 @@ +[Unit] +Wants=systemd-networkd.service + +[Service] +User=ymq +Group=ymq +WorkingDirectory=/share/ymq/run/f5tts +Type=forking +ExecStart=/share/ymq/run/f5tts/start.sh +ExecStop=/share/ymq/run/f5tts/stop.sh +StandardOutput=append:/var/log/f5tts/f5tts.log +StandardError=append:/var/log/f5tts/f5tts.log +SyslogIdentifier=f5tts + +[Install] +WantedBy=multi-user.target diff --git a/install b/install new file mode 100755 index 0000000..ac5f5d3 --- /dev/null +++ b/install @@ -0,0 +1,62 @@ +#!/usr/bin/python3 + +import os +import sys +import codecs + +if len(sys.argv) < 2: + print(f'Usage:\n{sys.argv[0]} venvname') + sys.exit(1) + +user = os.getlogin() +home = os.environ.get('HOME') + +venv = sys.argv[1] +if not os.path.exists(f'{home}/{venv}'): + os.system(f'python3 -m venv ~/{venv}') +pwd = os.getcwd() +name = os.path.basename(pwd) + +service = f"""[Unit] +Description={name} service +Wants=systemd-networkd.service +Requires=nginx.service + +[Service] +Type=forking +ExecStart=su - {user} -c "{pwd}/script/{name}.sh" +ExecStop=su - {user} "{home}/bin/killname app/{name}.py" +[Install] +WantedBy=multi-user.target +""" + +with codecs.open(f'./script/{name}.service', 'w', 'utf-8') as f: + f.write(service) + +with codecs.open(f'./script/{name}.sh', 'w', 'utf-8') as f: + f.write(f"""#!/usr/bin/bash + +killname {pwd}/app/{name}.py +{home}/{venv}/bin/python {pwd}/app/{name}.py -w {pwd} > {pwd}/logs/stderr.log 2>&1 & +exit 0 +""") + +with codecs.open(f'./script/install.sh', 'w', 'utf-8') as f: + f.write(f"""#!/usr/bin/bash +sudo cp {name}.service /etc/systemd/system +sudo systemctl enable {name}.service +sudo systemctl start {name} +""") + +if not os.path.exists(f'{home}/bin'): + os.mkdir(f'{home}/bin') +if not os.path.exists(f'{home}/bin/killname'): + with codecs.open(f'{home}/bin/killname', 'w', 'utf-8') as f: + f.write("""#!/usr/bin/bash + +ps -ef|grep "$1"|grep -v grep|awk '{print("kill -9", $2)}'|sh +""") +os.system(f'chmod +x {pwd}/bin/*') +os.system(f'chmod +x {pwd}/script/*.sh') +os.system(f'{pwd}/script/install.sh') + diff --git a/logs/stderr.log b/logs/stderr.log new file mode 100644 index 0000000..e69de29 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..8030106 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,11 @@ +numpy +soundfile +cached_path +redis +pycld2 +cn2an +git+https://git.kaiyuancloud.cn/yumoqing/apppublic +git+https://git.kaiyuancloud.cn/yumoqing/sqlor +git+https://git.kaiyuancloud.cn/yumoqing/ahserver +git+https://git.kaiyuancloud.cn/yumoqing/filetxt +# git+https://github.com/SWivid/F5-TTS diff --git a/samples/test2.m4a b/samples/test2.m4a new file mode 100644 index 0000000..2115810 Binary files /dev/null and b/samples/test2.m4a differ diff --git a/samples/test_en_1_ref_short.wav b/samples/test_en_1_ref_short.wav new file mode 100644 index 0000000..3c593c3 Binary files /dev/null and b/samples/test_en_1_ref_short.wav differ diff --git a/samples/test_zh_1_ref_short.wav b/samples/test_zh_1_ref_short.wav new file mode 100644 index 0000000..8cc055e Binary files /dev/null and b/samples/test_zh_1_ref_short.wav differ diff --git a/samples/ttt.m4a b/samples/ttt.m4a new file mode 100644 index 0000000..2115810 Binary files /dev/null and b/samples/ttt.m4a differ diff --git a/samples/ttt.wav b/samples/ttt.wav new file mode 100644 index 0000000..0261f02 Binary files /dev/null and b/samples/ttt.wav differ diff --git a/samples/ymq.wav b/samples/ymq.wav new file mode 100644 index 0000000..4027401 Binary files /dev/null and b/samples/ymq.wav differ diff --git a/script/f5tts.service b/script/f5tts.service new file mode 100644 index 0000000..dd82c51 --- /dev/null +++ b/script/f5tts.service @@ -0,0 +1,10 @@ +[Unit] +Wants=systemd-networkd.service + +[Service] +User=ymq +Group=ymq +WorkingDirectory=/share/ymq/run/f5tts +ExecStart=/share/ymq/run/f5tts/f5tts.env/bin/python app/f5tts.py -p 9995 +[Install] +WantedBy=multi-user.target diff --git a/script/f5tts.sh b/script/f5tts.sh new file mode 100755 index 0000000..82fcac5 --- /dev/null +++ b/script/f5tts.sh @@ -0,0 +1,5 @@ +#!/usr/bin/bash + +/d/f5tts/bin/killname /data/f5tts/app/f5tts.py +/d/f5tts/py3/bin/python /data/f5tts/app/f5tts.py -w /data/f5tts > /data/f5tts/logs/stderr.log 2>&1 & +exit 0 diff --git a/script/install.sh b/script/install.sh new file mode 100755 index 0000000..3042c68 --- /dev/null +++ b/script/install.sh @@ -0,0 +1,4 @@ +#!/usr/bin/bash +sudo cp f5tts.service /etc/systemd/system +sudo systemctl enable f5tts.service +sudo systemctl start f5tts diff --git a/script/speakers.json b/script/speakers.json new file mode 100644 index 0000000..bfd870e --- /dev/null +++ b/script/speakers.json @@ -0,0 +1,3 @@ +{ +} + diff --git a/start.sh b/start.sh new file mode 100755 index 0000000..64ea755 --- /dev/null +++ b/start.sh @@ -0,0 +1,10 @@ +#!/usr/bin/bash + +echo start 3 instances for f5tts engine +rundir=/share/ymq/run/f5tts +CUDA_VISIBLE_DEVICES=6 ${rundir}/f5tts.env/bin/python ${rundir}/app/f5tts.py -w ${rundir} -p 9995 & +CUDA_VISIBLE_DEVICES=6 ${rundir}/f5tts.env/bin/python ${rundir}/app/f5tts.py -w ${rundir} -p 9995 & +CUDA_VISIBLE_DEVICES=6 ${rundir}/f5tts.env/bin/python ${rundir}/app/f5tts.py -w ${rundir} -p 9995 & +CUDA_VISIBLE_DEVICES=6 ${rundir}/f5tts.env/bin/python ${rundir}/app/f5tts.py -w ${rundir} -p 9995 & + +exit 0 diff --git a/stop.sh b/stop.sh new file mode 100755 index 0000000..5478bee --- /dev/null +++ b/stop.sh @@ -0,0 +1,4 @@ +#!/usr/bin/bash + +killname f5tts.py + diff --git a/wwwroot/.tts.ui.swp b/wwwroot/.tts.ui.swp new file mode 100644 index 0000000..1e5abb4 Binary files /dev/null and b/wwwroot/.tts.ui.swp differ diff --git a/wwwroot/add.ui b/wwwroot/add.ui new file mode 100644 index 0000000..e403bae --- /dev/null +++ b/wwwroot/add.ui @@ -0,0 +1,31 @@ +{ + "widgettype":"Form", + "options":{ + "height":"70%", + "title":"向知识库添加文件", + "description":"可以添加的文件类型有:文本文件(.txt),数据文件(.csv),excel文件(.xlsx, .xls),word文件(.doc, .docx), 演示文件(.ppt, .pptx), pdf文件", + "method":"POST", + "submit_url":"{{entire_url('v1/add')}}", + "fields":[ + { + "name":"file_path", + "uitype":"file", + "required":true + }, + { + "name":"userid", + "uitype":"str", + "label":"用户id", + "value":"user1", + "required":true + }, + { + "name":"kdbname", + "uitype":"str", + "label":"知识库名", + "required":true, + "value":"testdb" + } + ] + } +} diff --git a/wwwroot/addvoice.ui b/wwwroot/addvoice.ui new file mode 100644 index 0000000..ac16a56 --- /dev/null +++ b/wwwroot/addvoice.ui @@ -0,0 +1,41 @@ +{ + "widgettype":"VBox", + "options":{ + "height":"100%" + }, + "subwidgets":[ + { + "widgettype":"Filler", + "options":{}, + "subwidgets":[ + { + "widgettype":"Form", + "id":"form", + "options":{ + "title":"添加播音员", + "method":"POST", + "description":"通过输入播音员id,录音和录音文字说明,来添加播音员", + "submit_url":"{{entire_url('/v1/addvoice')}}", + "fields":[ + { + "name":"speaker", + "label":"播音员id", + "uitype":"str" + }, + { + "name":"ref_voice", + "label":"语音", + "uitype":"audiorecorder" + }, + { + "name":"ref_text", + "label":"语音文字", + "uitype":"text" + } + ] + } + } + ] + } + ] +} diff --git a/wwwroot/get_speakers.dspy b/wwwroot/get_speakers.dspy new file mode 100644 index 0000000..23f0413 --- /dev/null +++ b/wwwroot/get_speakers.dspy @@ -0,0 +1 @@ +return get_speakers() diff --git a/wwwroot/index.ui b/wwwroot/index.ui new file mode 100644 index 0000000..a2ab19b --- /dev/null +++ b/wwwroot/index.ui @@ -0,0 +1,45 @@ +{ + "widgettype":"TabPanel", + "options":{ + "tab_wide":"auto", + "interval":"15px", + "height":"100%", + "width":"100%", + "tab_pos":"top", + "items":[ + { + "name":"add", + "label":"文本转语音", + "refresh":true, + "content":{ + "widgettype":"urlwidget", + "options":{ + "url":"{{entire_url('tts.ui')}}" + } + } + }, + { + "name":"add1", + "label":"文本转语音(stream)", + "refresh":true, + "content":{ + "widgettype":"urlwidget", + "options":{ + "url":"{{entire_url('tts_stream.ui')}}" + } + } + }, + { + "name":"query", + "label":"添加播音员", + "refresh":true, + "content":{ + "widgettype":"urlwidget", + "options":{ + "url":"{{entire_url('addvoice.ui')}}" + } + } + } + ] + } +} diff --git a/wwwroot/js/myapp.js b/wwwroot/js/myapp.js new file mode 100644 index 0000000..d34702e --- /dev/null +++ b/wwwroot/js/myapp.js @@ -0,0 +1,10 @@ +var set_response_text_url = function(w, resp){ + schedule_once(async_set_response_text_url.bind(w, w, resp), 0.1); +} + +var async_set_response_text_url = async function(w, resp){ + console.log('arguments=', arguments); + var url = await resp.text(); + w.set_url(url); + w.play(); +} diff --git a/wwwroot/query.ui b/wwwroot/query.ui new file mode 100644 index 0000000..b383006 --- /dev/null +++ b/wwwroot/query.ui @@ -0,0 +1,28 @@ +{ + "widgettype":"Form", + "options":{ + "height":"70%", + "submit_url":"{{entire_url('v1/query')}}", + "fields":[ + { + "name":"prompt", + "uitype":"text", + "required":true + }, + { + "name":"userid", + "uitype":"str", + "label":"用户id", + "value":"user1", + "required":true + }, + { + "name":"kdbname", + "uitype":"str", + "label":"知识库名", + "required":true, + "value":"testdb" + } + ] + } +} diff --git a/wwwroot/t.dspy b/wwwroot/t.dspy new file mode 100644 index 0000000..05d96f3 --- /dev/null +++ b/wwwroot/t.dspy @@ -0,0 +1 @@ +return entire_url('/idfile') + "?path=/trhr" diff --git a/wwwroot/test1.dspy b/wwwroot/test1.dspy new file mode 100644 index 0000000..4347fba --- /dev/null +++ b/wwwroot/test1.dspy @@ -0,0 +1 @@ +return await test1() diff --git a/wwwroot/tts.ui b/wwwroot/tts.ui new file mode 100644 index 0000000..7cc296e --- /dev/null +++ b/wwwroot/tts.ui @@ -0,0 +1,56 @@ +{ + "widgettype":"VBox", + "options":{ + "height":"100%" + }, + "subwidgets":[ + { + "widgettype":"Filler", + "options":{}, + "subwidgets":[ + { + "widgettype":"Form", + "id":"form", + "options":{ + "submit_url":"{{entire_url('/v1/inference')}}", + "fields":[ + { + "name":"speaker", + "label":"播音员", + "uitype":"code", + "value":"main", + "dataurl":"{{entire_url('/get_speakers.dspy')}}" + }, + { + "name":"prompt", + "label":"文本", + "uitype":"text", + "uiparams":{ + "rows":20, + "cols":80 + } + } + ] + } + } + ] + }, + { + "id":"audio", + "widgettype":"AudioPlayer", + "options":{ + "height":"40px", + "auto_play":true + } + } + ], + "binds":[ + { + "wid":"form", + "event":"submited", + "actiontype":"script", + "target":"audio", + "script":"set_response_text_url(this, event.params);" + } + ] +} diff --git a/wwwroot/tts_stream.ui b/wwwroot/tts_stream.ui new file mode 100644 index 0000000..337fc5a --- /dev/null +++ b/wwwroot/tts_stream.ui @@ -0,0 +1,53 @@ +{ + "widgettype":"HBox", + "options":{ + "height":"100%" + }, + "subwidgets":[ + { + "widgettype":"Form", + "id":"form", + "options":{ + "width":"50%", + "title":"流式返回", + "submit_url":"{{entire_url('/v1/infer_stream')}}", + "fields":[ + { + "name":"speaker", + "label":"播音员", + "uitype":"code", + "value":"main", + "dataurl":"{{entire_url('/get_speakers.dspy')}}" + }, + { + "name":"prompt", + "label":"文本", + "uitype":"text", + "uiparams":{ + "rows":20, + "cols":80 + } + } + ] + } + }, + { + "id":"audio", + "widgettype":"TextedAudioPlayer", + "options":{ + "width": "50%", + "height":"100%", + "auto_play":true + } + } + ], + "binds":[ + { + "wid":"form", + "event":"submited", + "actiontype":"script", + "target":"audio", +"script":"console.log('this=', this, event);this.set_stream_urls(event.params)" + } + ] +} diff --git a/wwwroot/v1/addvoice/index.dspy b/wwwroot/v1/addvoice/index.dspy new file mode 100644 index 0000000..3db8f8e --- /dev/null +++ b/wwwroot/v1/addvoice/index.dspy @@ -0,0 +1,11 @@ +debug(f'{params_kw=}') +try: + speaker = params_kw.speaker + ref_audio = params_kw.ref_voice + ref_text = params_kw.ref_text + await add_voice(speaker, ref_audio, ref_text) + return UiMessage(title='Success', message='add voice success') +except Exception as e: + exception(f'{e=}') + return UiError(title='Error', message='add voice error') + diff --git a/wwwroot/v1/index.md b/wwwroot/v1/index.md new file mode 100644 index 0000000..73278d6 --- /dev/null +++ b/wwwroot/v1/index.md @@ -0,0 +1,21 @@ +# API for F5TTS wraped web server +we apply following apis + +## addvoice + +* path: /v1/add_voice +* method: POST +* form data: + 1 ref_text: text + 2 ref_audio: vocal audio + 3 speaker: speaker name for ref_audio voice + +examples +``` +curl .../v1/add_voice \ + -F "speaker=Trump" \ + -F "ref_text=today is a good day" \ + -F "ref_audio=@goodday.wav" +``` + + diff --git a/wwwroot/v1/index.ui b/wwwroot/v1/index.ui new file mode 100644 index 0000000..144734a --- /dev/null +++ b/wwwroot/v1/index.ui @@ -0,0 +1,8 @@ +{ + "widgettype":"MdWidget", + "options":{ + "height":"100%", + "width":"100%", + "md_url":"{{entire_url('index.md')}}" + } +} diff --git a/wwwroot/v1/infer_stream/index.dspy b/wwwroot/v1/infer_stream/index.dspy new file mode 100644 index 0000000..4828d34 --- /dev/null +++ b/wwwroot/v1/infer_stream/index.dspy @@ -0,0 +1,7 @@ +debug(f'{params_kw=}') +async def g(): + speaker = params_kw.speaker or 'main' + async for d in inference_stream(params_kw.prompt, speaker): + yield d + +return await stream_response(request, g) diff --git a/wwwroot/v1/inference/index.dspy b/wwwroot/v1/inference/index.dspy new file mode 100644 index 0000000..190a9f8 --- /dev/null +++ b/wwwroot/v1/inference/index.dspy @@ -0,0 +1,7 @@ +# normal mode +debug(f'{params_kw=}') +speaker = params_kw.speaker or 'main' +path = await infer(params_kw.prompt, speaker) +ret = entire_url(f'/idfile?path={path}') +debug(f'inference/index.dspy:return url={ret}') +return ret