first commit

2025-07-16 15:07:03 +08:00 · 2025-07-16 15:07:03 +08:00 · f5c325b96c
commit f5c325b96c
37 changed files with 992 additions and 0 deletions
--- a/README.md
+++ b/README.md
@ -0,0 +1,90 @@
+# 包装F5-TTS为一个服务
+
+## 依赖
+分为系统级依赖和python依赖
+
+### 操作系统级别
+* 安装FFMpeg
+```
+sudo apt install ffmpeg
+```
+* 安装GPU卡驱动
+英伟达驱动按照显卡类型到[英伟达网站](https://www.nvidia.com/en-us/drivers/details/241089/)下载后，执行
+```
+sudo sh 下载的驱动文件
+```
+
+### python依赖
+* 安装F5-tts相关依赖
+```
+pip install torch==2.3.0+cu118 torchaudio==2.3.0+cu118 --extra-index-url https://download.pytorch.org/whl/cu118
+```
+
+* 安装F5-TTS
+```
+pip install git+https://github.com/SWivid/F5-TTS.git
+```
+
+* 安装其他依赖
+```
+pip install git+https://git.kaiyuancloud.cn/yumoqing/apppublic.git
+pip install git+https://git.kaiyuancloud.cn/yumoqing/sqlor.git
+pip install git+https://git.kaiyuancloud.cn/yumoqing/ahserver.git
+pip install cn2an pycld2
+```
+## 安装与部署
+执行下列步骤
+* 安装操作系统依赖
+* 添加操作系统用户f5tts
+* 登录f5tts用户
+* 安装python依赖
+* 克隆项目
+* 运行环境设置
+* 启动
+* 停止
+
+* 克隆项目
+```
+cd ~
+git clone git@git.kaiyuancloud.cn:yumoqing/f5tts.git
+mv f5tts/* ~
+rm -rf f5tts
+```
+
+* 运行环境设置
+```
+cd script
+bash install.sh
+```
+
+* 启动
+```
+sudo systemctl start f5tts
+```
+* 停止
+```
+killname f5tts.py
+```
+
+## API
+
+### 添加播音员
+
+url：/api/addvoice
+方法：POST
+上传数据
+speaker:播音员名称
+ref_audio：声音文件
+ref_text: 录音相应的文字
+返回：添加成功返回UiMessage的json文件
+	出错则返回UiError的json文件
+
+### TTS转换
+url:/api/inference
+方法：POST
+数据：
+prompt:正文文本
+speaker：播音员名
+
+### TTS流式转换
+
--- a/app/f5tts.py
+++ b/app/f5tts.py
@ -0,0 +1,349 @@
+import os
+import io
+import base64
+import sys
+import asyncio
+import codecs
+from traceback import format_exc
+import re
+
+import numpy as np
+import soundfile as sf
+# import tomli
+from cached_path import cached_path
+from appPublic.textsplit import split_text_with_dialog_preserved
+from appPublic.uniqueID import getID
+from ahserver.serverenv import get_serverenv
+from filetxt.loader import fileloader
+import pycld2 as cld
+import cn2an
+
+from f5_tts.model import DiT, UNetT
+from f5_tts.infer.utils_infer import (
+	mel_spec_type,
+	target_rms,
+	cross_fade_duration,
+	nfe_step,
+	cfg_strength,
+	sway_sampling_coef,
+	speed,
+	fix_duration,
+	infer_process,
+	load_model,
+	load_vocoder,
+	preprocess_ref_audio_text,
+	remove_silence_for_generated_wav,
+)
+
+import json
+from time import time, sleep
+from appPublic.dictObject import DictObject
+from appPublic.folderUtils import temp_file
+from appPublic.jsonConfig import getConfig
+from appPublic.worker import awaitify
+from appPublic.uniqueID import getID
+from appPublic.log import debug, info
+from appPublic.background import Background
+from ahserver.webapp import webapp
+from ahserver.serverenv import ServerEnv
+from ahserver.filestorage import FileStorage
+
+n_mel_channels = 100
+hop_length = 256
+target_rms = 0.1
+nfe_step = 32  # 16, 32
+cfg_strength = 2.0
+ode_method = "euler"
+sway_sampling_coef = -1.0
+speed = 1.0
+
+def audio_ndarray_to_base64(waveform: np.ndarray, sample_rate: int = 16000) -> str:
+	# 如果是单通道，确保 shape 为 (samples, 1)
+	if waveform.ndim == 1:
+		waveform = waveform.reshape(-1, 1)
+
+	# 写入内存 buffer（WAV 格式）
+	buffer = io.BytesIO()
+	sf.write(buffer, waveform, samplerate=sample_rate, format='WAV')
+	buffer.seek(0)
+
+	# base64 编码
+	b64_audio = base64.b64encode(buffer.read()).decode('utf-8')
+	return b64_audio
+
+def write_wav_buffer(wav, nchannels, framerate):
+	fs = FileStorage()
+	fn = fs._name2path(f'{getID()}.wav', userid='tmp')
+	os.makedirs(os.path.dirname(fn))
+	debug(fn)
+	with open(fn, "wb") as f:
+		sf.write(f.name, wav, framerate)
+	return fs.webpath(fn)
+
+async_write_wav_buffer = awaitify(write_wav_buffer)
+
+def detect_language(txt):
+	isReliable, textBytesFound, details = cld.detect(txt)
+	debug(f' detect_language():{isReliable=}, {textBytesFound=}, {details=} ')
+	return details[0][1]
+
+class F5TTS:
+	def __init__(self):
+		self.config = getConfig()
+		# self.vocos = load_vocoder(is_local=True, local_path="../checkpoints/charactr/vocos-mel-24khz")
+		self.load_model()
+		self.setup_voices()
+
+	def load_model(self):
+		self.vocoder = load_vocoder(vocoder_name=self.config.vocoder_name, 
+							is_local=True, 
+							local_path=self.config.vocoder_local_path)
+
+		# load models
+		ckpt_file = ''
+		model_cls = DiT
+		model_cfg = dict(dim=1024, depth=22, heads=16, 
+							ff_mult=2, text_dim=512, conv_layers=4)
+		ckpt_file = self.config.ckpts_path
+		self.model = load_model(model_cls, model_cfg, ckpt_file, 
+						mel_spec_type=self.config.vocoder_name,
+						vocab_file=self.config.vocab_file)
+		self.model = self.model.to(self.config.device)
+		self.lock = asyncio.Lock()
+
+	def f5tts_infer(self, ref_audio, ref_text, gen_text, speed_factor):
+		audio, final_sample_rate, spectragram = \
+				infer_process(ref_audio, 
+								ref_text, 
+								gen_text, 
+								self.model,
+								self.vocoder,
+								mel_spec_type=self.config.vocoder_name,
+								speed=self.config.speed or speed)
+		if audio is not None:
+			audio = self.speed_convert(audio, speed_factor)
+		else:
+			return None
+		debug(f'audio shape {audio.shape}, {gen_text=}')
+		return {
+			'text': gen_text,
+			'audio':audio,
+			'sample_rate':final_sample_rate
+		}
+
+	def speed_convert(self, output_audio_np, speed_factor):
+		original_len = len(output_audio_np)
+		speed_factor = max(0.1, min(speed_factor, 5.0))
+		target_len = int(
+			original_len / speed_factor
+		)  # Target length based on speed_factor
+		if (
+			target_len != original_len and target_len > 0
+		):  # Only interpolate if length changes and is valid
+			x_original = np.arange(original_len)
+			x_resampled = np.linspace(0, original_len - 1, target_len)
+			output_audio_np = np.interp(x_resampled, x_original, output_audio_np)
+		output_audio_np = output_audio_np.astype(np.float32)
+		return output_audio_np
+		
+	def get_speakers(self):
+		t = [{'value':s, 'text':s} for s in self.speakers.keys() ]
+		t.append({'value':'main', 'text':'main'})
+		return t
+
+	async def split_text(self, text_gen, speaker):
+		chunks = split_text_with_dialog_preserved(text_gen)
+		debug(f'{len(chunks)=}')
+		# reg2 = self.config.speaker_match
+		reg2 = r"\[\[(\w+)\]\]"
+		ret = []
+		for text in chunks:
+			if text == ['\r', '']:
+				continue
+			lang = await awaitify(detect_language)(text)
+			if lang == 'zh':
+				text = await awaitify(cn2an.transform)(text, 'an2cn')
+			voice = speaker
+			match = re.match(reg2, text)
+			if match:
+				voice = match[1]
+			if voice not in self.voices:
+				voice = speaker
+			text = re.sub(reg2, "", text)
+			gen_text = text.strip()
+			ref_audio = self.voices[voice]["ref_audio"]
+			ref_text = self.voices[voice]["ref_text"]
+			ret.append({'text':gen_text, 'ref_audio':ref_audio, 'ref_text':ref_text})
+		return ret
+
+	async def infer_stream(self, prompt, speaker, speed_factor=1.0):
+		async for a in self._inference_stream(prompt, speaker, speed_factor=speed_factor):
+			wavdata = a['audio']
+			samplerate = a['sample_rate']
+			b = await async_write_wav_buffer(wavdata, 1, samplerate)
+			yield b
+
+	async def _inference_stream(self, prompt, speaker, speed_factor=1.0):
+		text_gen = prompt
+		chunks = await self.split_text(prompt, speaker)
+		debug(f'{len(chunks)=}')
+		for chunk in chunks:
+			gen_text = chunk['text']
+			ref_audio = chunk['ref_audio']
+			ref_text = chunk['ref_text']
+			infer = awaitify(self.f5tts_infer)
+			try:
+				d = await infer(ref_audio, ref_text, gen_text, speed_factor)
+				if d is not None:
+					yield d
+			except:
+				debug(f'{gen_text=} inference error\n{format_exc()}')
+
+	async def inference_stream(self, prompt, speaker, speed_factor=1.0):
+		total_duration = 0
+		async for d in self._inference_stream(prompt, speaker, speed_factor=speed_factor):
+			sampels = d['audio'].shape[0]
+			duration = samples / d['sample_rate']
+			audio_b64=audio_ndarray_to_base64(d['audio'], d['sample_rate'])
+			d['audio'] = audio_b64
+			d['duration'] = duration
+			d['done'] = False
+			txt = json.dumps(d, ensure_ascii=False)
+			yield txt + '\n'
+		d = {
+			'done': True,
+			'duration': total_duration
+		}
+		txt = json.dumps(d, ensure_ascii=False)
+		yield txt + '\n'
+
+	def setup_voices(self):
+		config = getConfig()
+		workdir = config.workdir
+		print('workdir=', workdir)
+		d = None
+		with codecs.open(config.speakers_file, 'r', 'utf-8') as f:
+			b = f.read()
+			self.speakers = json.loads(b)
+		fn = f'{workdir}/samples/{config.ref_audio}'
+		ref_audio, ref_text = preprocess_ref_audio_text(fn, 
+							config.ref_text)
+		self.voices = {
+			"main":{
+				'ref_text':ref_text,
+				'ref_audio':ref_audio
+			}
+		}
+		for k,v in self.speakers.items():
+			fn = f'{workdir}/samples/{v["ref_audio"]}'
+			ref_audio, ref_text = preprocess_ref_audio_text(fn, 
+								v['ref_text'])
+			self.voices[k] = {
+				'ref_text':ref_text,
+				'ref_audio':ref_audio
+			}
+
+	def copyfile(self, src, dest):
+		with open(src, 'rb') as f:
+			b = f.read()
+			with open(dest, 'wb') as f1:
+				f1.write(b)
+
+	async def add_voice(self, speaker, ref_audio, ref_text):
+		config = getConfig()
+		ref_audio = FileStorage().realPath(ref_audio)
+		workdir = config.workdir
+		filename = f'{getID()}.wav'
+		fn = f'{workdir}/samples/{filename}'
+		await awaitify(self.copyfile)(ref_audio, fn)
+		os.unlink(ref_adio)
+		self.speakers[speaker] = {
+			'ref_text':ref_text,
+			'ref_audio':filename
+		}
+		f = awaitify(preprocess_ref_audio_text)
+		ref_audio, ref_text = await f(ref_audio, ref_text)
+		self.voices[speaker] = {
+			'ref_text':ref_text,
+			'ref_audio':filename
+		}
+		with codecs.open(config.speakers_file, 'w', 'utf-8') as f:
+			f.write(json.dumps(self.speakers, indent=4, ensure_ascii=False))
+		return None
+
+	async def _inference(self, prompt, speaker, speed_factor=1.0):
+		generated_audio_segments = []
+		remove_silence = self.config.remove_silence or False
+		final_sample_rate = 16000
+		async for d in self._inference_stream(prompt, 
+						speaker, 
+						speed_factor=speed_factor):
+			audio = d.get('audio', None)
+			if audio is None:
+				debug(f'audio is none, {d=}')
+				continue
+			final_sample_rate = d['sample_rate']
+			generated_audio_segments.append(audio)
+
+		if generated_audio_segments:
+			final_wave = np.concatenate(generated_audio_segments)
+			debug(f'{prompt=}, {final_sample_rate=}')
+			return await async_write_wav_buffer(final_wave, 1, final_sample_rate)
+		else:
+			debug(f'{prompt=} not audio generated')
+
+def UiError(title="出错", message="出错啦", timeout=5):
+	return {
+		"widgettype":"Error",
+		"options":{
+			"author":"tr",
+			"timeout":timeout,
+			"cwidth":15,
+			"cheight":10,
+			"title":title,
+			"auto_open":True,
+			"auto_dismiss":True,
+			"auto_destroy":True,
+			"message":message
+		}
+	}
+
+
+def UiMessage(title="消息", message="后台消息", timeout=5):
+	return {
+		"widgettype":"Message",
+		"options":{
+			"author":"tr",
+			"timeout":timeout,
+			"cwidth":15,
+			"cheight":10,
+			"title":title,
+			"auto_open":True,
+			"auto_dismiss":True,
+			"auto_destroy":True,
+			"message":message
+		}
+	}
+
+def test1():
+	sleep(36000)
+	return {}
+
+f5 = None
+def init():
+	global f5
+	g = ServerEnv()
+	f5 = F5TTS()
+	g.tts_engine = f5
+	g.infer_stream = f5.infer_stream
+	g.inference_stream = f5.inference_stream
+	g.get_speakers = f5.get_speakers
+	g.infer = f5._inference
+	g.test1 = awaitify(test1)
+	g.add_voice = f5.add_voice
+	g.UiError = UiError
+	g.filelaoder = fileloader
+	g.UiMessage = UiMessage
+
+if __name__ == '__main__':
+	webapp(init)
--- a/app/w4a2wav.py
+++ b/app/w4a2wav.py
@ -0,0 +1,14 @@
+import os
+import sys
+from pydub import AudioSegment
+
+if len(sys.argv) < 2:
+	exit(1)
+
+m4afn = sys.argv[1]
+wavfn = m4afn[:-3] + 'wav'
+# Load the m4a file
+audio = AudioSegment.from_file(m4afn, format="m4a")
+
+# Export the audio as a wav file
+audio.export(wavfn, format="wav")
--- a/conf/config.json
+++ b/conf/config.json
@ -0,0 +1,87 @@
+{
+	"speaker_match":"\\[\\[(\\w+)\\]\\]",
+	"language":{
+		"zh":{
+			"sentence_splitter":"[。？！]|\r?\n"
+		},
+		"en":{
+			"sentence_splitter":"[.?!] |\r?\n"
+		}
+	},
+	"sample_rate":16000,
+	"vocab_file":"",
+	"ckpts_path_bak":"/share/models/SWivid/F5-TTS/F5TTS_Base/model_1200000.pt",
+	"ckpts_path":"/share/models/SWivid/F5-TTS/F5TTS_v1_Base/model_1250000.safetensors",
+	"speakers_file":"$[workdir]$/conf/speakers.json",
+	"vocoder_name":"vocos",
+	"vocoder_local_path":"/share/models/charactr/vocos-mel-24khz",
+	"remove_silence":false,
+	"modelname":"F5-TTS",
+	"device":"cuda:0",
+	"ref_audio":"ttt.wav",
+	"ref_text":"快点吃饭，上课要迟到了。",
+	"cross_fade_duration":0,
+	"workdir":"$[workdir]$",
+	"filesroot":"$[workdir]$/files",
+	"logger":{
+		"name":"f5tts",
+		"levelname":"info",
+		"logfile":"$[workdir]$/logs/f5tts.log"
+	},
+	"website":{
+		"paths":[
+			["$[workdir]$/wwwroot",""]
+		],
+		"client_max_size":10000,
+		"host":"0.0.0.0",
+		"port":9995,
+		"coding":"utf-8",
+		"ssl_gg":{
+			"crtfile":"$[workdir]$/conf/www.bsppo.com.pem",
+			"keyfile":"$[workdir]$/conf/www.bsppo.com.key"
+		},
+		"indexes":[
+			"index.html",
+			"index.tmpl",
+			"index.ui",
+			"index.dspy",
+			"index.md"
+		],
+		"startswiths":[
+			{
+				"leading":"/idfile",
+				"registerfunction":"idfile"
+			}
+		],
+		"processors":[
+			[".ws","ws"],
+			[".xterm","xterm"],
+			[".proxy","proxy"],
+			[".llm", "llm"],
+			[".llms", "llms"],
+			[".llma", "llma"],
+			[".xlsxds","xlsxds"],
+			[".sqlds","sqlds"],
+			[".tmpl.js","tmpl"],
+			[".tmpl.css","tmpl"],
+			[".html.tmpl","tmpl"],
+			[".bcrud", "bricks_crud"],
+			[".tmpl","tmpl"],
+			[".app","app"],
+			[".bui","bui"],
+			[".ui","bui"],
+			[".dspy","dspy"],
+			[".md","md"]
+		],
+		"rsakey":{
+			"privatekey":"$[workdir]$/conf/rsa_private_key.pem",
+			"publickey":"$[workdir]$/conf/rsa_public_key.pem"
+		},
+		"session_max_time":3000,
+		"session_issue_time":2500,
+		"session_redis_notuse":{
+			"url":"redis://127.0.0.1:6379"
+		}
+	}
+}
+
--- a/conf/speakers.json
+++ b/conf/speakers.json
@ -0,0 +1,6 @@
+{
+    "ymq": {
+        "ref_text": "\u8f7b\u91cf\u5e94\u7528\u670d\u52a1\u5668\u5907\u6848\u6761\u4ef6\uff1a\u8d2d\u4e70\u65f6\u957f\u57283\u4e2a\u6708\u53ca\u4ee5\u4e0a",
+        "ref_audio": "ymq.wav"
+    }
+}
--- a/f5tts.service
+++ b/f5tts.service
@ -0,0 +1,16 @@
+[Unit]
+Wants=systemd-networkd.service
+
+[Service]
+User=ymq
+Group=ymq
+WorkingDirectory=/share/ymq/run/f5tts
+Type=forking
+ExecStart=/share/ymq/run/f5tts/start.sh
+ExecStop=/share/ymq/run/f5tts/stop.sh
+StandardOutput=append:/var/log/f5tts/f5tts.log
+StandardError=append:/var/log/f5tts/f5tts.log
+SyslogIdentifier=f5tts
+
+[Install]
+WantedBy=multi-user.target
--- a/62
+++ b/62
@ -0,0 +1,62 @@
+#!/usr/bin/python3
+
+import os
+import sys
+import codecs
+
+if len(sys.argv) < 2:
+	print(f'Usage:\n{sys.argv[0]} venvname')
+	sys.exit(1)
+
+user = os.getlogin()
+home = os.environ.get('HOME')
+
+venv = sys.argv[1]
+if not os.path.exists(f'{home}/{venv}'):
+	os.system(f'python3 -m venv ~/{venv}')
+pwd = os.getcwd()
+name = os.path.basename(pwd)
+
+service = f"""[Unit]
+Description={name} service
+Wants=systemd-networkd.service
+Requires=nginx.service
+
+[Service]
+Type=forking
+ExecStart=su - {user} -c "{pwd}/script/{name}.sh"
+ExecStop=su - {user} "{home}/bin/killname app/{name}.py"
+[Install]
+WantedBy=multi-user.target
+"""
+
+with codecs.open(f'./script/{name}.service', 'w', 'utf-8') as f:
+	f.write(service)
+
+with codecs.open(f'./script/{name}.sh', 'w', 'utf-8') as f:
+	f.write(f"""#!/usr/bin/bash
+
+killname {pwd}/app/{name}.py
+{home}/{venv}/bin/python {pwd}/app/{name}.py -w {pwd} > {pwd}/logs/stderr.log 2>&1 &
+exit 0
+""")
+
+with codecs.open(f'./script/install.sh', 'w', 'utf-8') as f:
+	f.write(f"""#!/usr/bin/bash
+sudo cp {name}.service /etc/systemd/system
+sudo systemctl enable {name}.service
+sudo systemctl start {name}
+""")
+
+if not os.path.exists(f'{home}/bin'):
+	os.mkdir(f'{home}/bin')
+if not os.path.exists(f'{home}/bin/killname'):
+	with codecs.open(f'{home}/bin/killname', 'w', 'utf-8') as f:
+		f.write("""#!/usr/bin/bash
+
+ps -ef|grep "$1"|grep -v grep|awk '{print("kill -9", $2)}'|sh
+""")
+os.system(f'chmod +x {pwd}/bin/*')
+os.system(f'chmod +x {pwd}/script/*.sh')
+os.system(f'{pwd}/script/install.sh')
+
--- a/logs/stderr.log
+++ b/logs/stderr.log
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,11 @@
+numpy
+soundfile
+cached_path
+redis
+pycld2
+cn2an
+git+https://git.kaiyuancloud.cn/yumoqing/apppublic
+git+https://git.kaiyuancloud.cn/yumoqing/sqlor
+git+https://git.kaiyuancloud.cn/yumoqing/ahserver
+git+https://git.kaiyuancloud.cn/yumoqing/filetxt
+# git+https://github.com/SWivid/F5-TTS
--- a/samples/test2.m4a
+++ b/samples/test2.m4a
--- a/samples/test_en_1_ref_short.wav
+++ b/samples/test_en_1_ref_short.wav
--- a/samples/test_zh_1_ref_short.wav
+++ b/samples/test_zh_1_ref_short.wav
--- a/samples/ttt.m4a
+++ b/samples/ttt.m4a
--- a/samples/ttt.wav
+++ b/samples/ttt.wav
--- a/samples/ymq.wav
+++ b/samples/ymq.wav
--- a/script/f5tts.service
+++ b/script/f5tts.service
@ -0,0 +1,10 @@
+[Unit]
+Wants=systemd-networkd.service
+
+[Service]
+User=ymq
+Group=ymq
+WorkingDirectory=/share/ymq/run/f5tts
+ExecStart=/share/ymq/run/f5tts/f5tts.env/bin/python app/f5tts.py -p 9995
+[Install]
+WantedBy=multi-user.target
--- a/script/f5tts.sh
+++ b/script/f5tts.sh
@ -0,0 +1,5 @@
+#!/usr/bin/bash
+
+/d/f5tts/bin/killname /data/f5tts/app/f5tts.py
+/d/f5tts/py3/bin/python /data/f5tts/app/f5tts.py -w /data/f5tts > /data/f5tts/logs/stderr.log 2>&1 &
+exit 0
--- a/script/install.sh
+++ b/script/install.sh
@ -0,0 +1,4 @@
+#!/usr/bin/bash
+sudo cp f5tts.service /etc/systemd/system
+sudo systemctl enable f5tts.service
+sudo systemctl start f5tts
--- a/script/speakers.json
+++ b/script/speakers.json
@ -0,0 +1,3 @@
+{
+}
+
--- a/start.sh
+++ b/start.sh
@ -0,0 +1,10 @@
+#!/usr/bin/bash
+
+echo start 3 instances for f5tts engine
+rundir=/share/ymq/run/f5tts
+CUDA_VISIBLE_DEVICES=6 ${rundir}/f5tts.env/bin/python ${rundir}/app/f5tts.py -w ${rundir} -p 9995 &
+CUDA_VISIBLE_DEVICES=6 ${rundir}/f5tts.env/bin/python ${rundir}/app/f5tts.py -w ${rundir} -p 9995 &
+CUDA_VISIBLE_DEVICES=6 ${rundir}/f5tts.env/bin/python ${rundir}/app/f5tts.py -w ${rundir} -p 9995 &
+CUDA_VISIBLE_DEVICES=6 ${rundir}/f5tts.env/bin/python ${rundir}/app/f5tts.py -w ${rundir} -p 9995 &
+
+exit 0
--- a/stop.sh
+++ b/stop.sh
@ -0,0 +1,4 @@
+#!/usr/bin/bash
+
+killname f5tts.py
+
--- a/wwwroot/.tts.ui.swp
+++ b/wwwroot/.tts.ui.swp
--- a/wwwroot/add.ui
+++ b/wwwroot/add.ui
@ -0,0 +1,31 @@
+{
+	"widgettype":"Form",
+	"options":{
+		"height":"70%",
+		"title":"向知识库添加文件",
+		"description":"可以添加的文件类型有：文本文件（.txt），数据文件(.csv)，excel文件（.xlsx, .xls)，word文件（.doc, .docx), 演示文件(.ppt, .pptx), pdf文件",
+		"method":"POST",
+		"submit_url":"{{entire_url('v1/add')}}",
+		"fields":[
+			{
+				"name":"file_path",
+				"uitype":"file",
+				"required":true
+			},
+			{
+				"name":"userid",
+				"uitype":"str",
+				"label":"用户id",
+				"value":"user1",
+				"required":true
+			},
+			{
+				"name":"kdbname",
+				"uitype":"str",
+				"label":"知识库名",
+				"required":true,
+				"value":"testdb"
+			}
+		]
+	}
+}
--- a/wwwroot/addvoice.ui
+++ b/wwwroot/addvoice.ui
@ -0,0 +1,41 @@
+{
+	"widgettype":"VBox",
+	"options":{
+		"height":"100%"
+	},
+	"subwidgets":[
+		{
+			"widgettype":"Filler",
+			"options":{},
+			"subwidgets":[
+				{
+					"widgettype":"Form",
+					"id":"form",
+					"options":{
+						"title":"添加播音员",
+						"method":"POST",
+						"description":"通过输入播音员id，录音和录音文字说明，来添加播音员",
+						"submit_url":"{{entire_url('/v1/addvoice')}}",
+						"fields":[
+							{
+								"name":"speaker",
+								"label":"播音员id",
+								"uitype":"str"
+							},
+							{
+								"name":"ref_voice",
+								"label":"语音",
+								"uitype":"audiorecorder"
+							},
+							{
+								"name":"ref_text",
+								"label":"语音文字",
+								"uitype":"text"
+							}
+						]
+					}
+				}
+			]
+		}
+	]
+}
--- a/wwwroot/get_speakers.dspy
+++ b/wwwroot/get_speakers.dspy
@ -0,0 +1 @@
+return get_speakers()
--- a/wwwroot/index.ui
+++ b/wwwroot/index.ui
@ -0,0 +1,45 @@
+{
+    "widgettype":"TabPanel",
+    "options":{
+        "tab_wide":"auto",
+        "interval":"15px",
+        "height":"100%",
+        "width":"100%",
+        "tab_pos":"top",
+        "items":[
+            {   
+                "name":"add",
+                "label":"文本转语音",
+		"refresh":true,
+                "content":{
+                    "widgettype":"urlwidget",
+                    "options":{
+                        "url":"{{entire_url('tts.ui')}}"
+                    }   
+                }   
+            },  
+            {   
+                "name":"add1",
+                "label":"文本转语音(stream)",
+		"refresh":true,
+                "content":{
+                    "widgettype":"urlwidget",
+                    "options":{
+                        "url":"{{entire_url('tts_stream.ui')}}"
+                    }   
+                }   
+            },  
+            {   
+                "name":"query",
+                "label":"添加播音员",
+                "refresh":true,
+                "content":{
+                    "widgettype":"urlwidget",
+                    "options":{
+                        "url":"{{entire_url('addvoice.ui')}}"
+                    }   
+                }   
+            }  
+	]
+	}
+}
--- a/wwwroot/js/myapp.js
+++ b/wwwroot/js/myapp.js
@ -0,0 +1,10 @@
+var set_response_text_url = function(w, resp){
+	schedule_once(async_set_response_text_url.bind(w, w, resp), 0.1);
+}
+
+var async_set_response_text_url = async function(w, resp){
+	console.log('arguments=', arguments);
+	var url = await resp.text();
+	w.set_url(url);
+	w.play();
+}
--- a/wwwroot/query.ui
+++ b/wwwroot/query.ui
@ -0,0 +1,28 @@
+{
+	"widgettype":"Form",
+	"options":{
+		"height":"70%",
+		"submit_url":"{{entire_url('v1/query')}}",
+		"fields":[
+			{
+				"name":"prompt",
+				"uitype":"text",
+				"required":true
+			},
+			{
+				"name":"userid",
+				"uitype":"str",
+				"label":"用户id",
+				"value":"user1",
+				"required":true
+			},
+			{
+				"name":"kdbname",
+				"uitype":"str",
+				"label":"知识库名",
+				"required":true,
+				"value":"testdb"
+			}
+		]
+	}
+}
--- a/wwwroot/t.dspy
+++ b/wwwroot/t.dspy
@ -0,0 +1 @@
+return entire_url('/idfile') + "?path=/trhr"
--- a/wwwroot/test1.dspy
+++ b/wwwroot/test1.dspy
@ -0,0 +1 @@
+return await test1()
--- a/wwwroot/tts.ui
+++ b/wwwroot/tts.ui
@ -0,0 +1,56 @@
+{
+	"widgettype":"VBox",
+	"options":{
+		"height":"100%"
+	},
+	"subwidgets":[
+		{
+			"widgettype":"Filler",
+			"options":{},
+			"subwidgets":[
+				{
+					"widgettype":"Form",
+					"id":"form",
+					"options":{
+						"submit_url":"{{entire_url('/v1/inference')}}",
+						"fields":[
+							{
+								"name":"speaker",
+								"label":"播音员",
+								"uitype":"code",
+								"value":"main",
+								"dataurl":"{{entire_url('/get_speakers.dspy')}}"
+							},
+							{
+								"name":"prompt",
+								"label":"文本",
+								"uitype":"text",
+								"uiparams":{
+									"rows":20,
+									"cols":80
+								}
+							}
+						]
+					}
+				}
+			]
+		},
+		{
+			"id":"audio",
+			"widgettype":"AudioPlayer",
+			"options":{
+				"height":"40px",
+				"auto_play":true
+			}
+		}
+	],
+	"binds":[
+		{
+			"wid":"form",
+			"event":"submited",
+			"actiontype":"script",
+			"target":"audio",
+			"script":"set_response_text_url(this, event.params);"
+		}
+	]
+}
--- a/wwwroot/tts_stream.ui
+++ b/wwwroot/tts_stream.ui
@ -0,0 +1,53 @@
+{
+	"widgettype":"HBox",
+	"options":{
+		"height":"100%"
+	},
+	"subwidgets":[
+		{
+			"widgettype":"Form",
+			"id":"form",
+			"options":{
+				"width":"50%",
+				"title":"流式返回",
+				"submit_url":"{{entire_url('/v1/infer_stream')}}",
+				"fields":[
+					{
+						"name":"speaker",
+						"label":"播音员",
+						"uitype":"code",
+						"value":"main",
+						"dataurl":"{{entire_url('/get_speakers.dspy')}}"
+					},
+					{
+						"name":"prompt",
+						"label":"文本",
+						"uitype":"text",
+						"uiparams":{
+							"rows":20,
+							"cols":80
+						}
+					}
+				]
+			}
+		},
+		{
+			"id":"audio",
+			"widgettype":"TextedAudioPlayer",
+			"options":{
+				"width": "50%",
+				"height":"100%",
+				"auto_play":true
+			}
+		}
+	],
+	"binds":[
+		{
+			"wid":"form",
+			"event":"submited",
+			"actiontype":"script",
+			"target":"audio",
+"script":"console.log('this=', this, event);this.set_stream_urls(event.params)"
+		}
+	]
+}
--- a/wwwroot/v1/addvoice/index.dspy
+++ b/wwwroot/v1/addvoice/index.dspy
@ -0,0 +1,11 @@
+debug(f'{params_kw=}')
+try:
+	speaker = params_kw.speaker
+	ref_audio = params_kw.ref_voice
+	ref_text = params_kw.ref_text
+	await add_voice(speaker, ref_audio, ref_text)
+	return UiMessage(title='Success', message='add voice success')
+except Exception as e:
+	exception(f'{e=}')
+	return UiError(title='Error', message='add voice error')
+
--- a/wwwroot/v1/index.md
+++ b/wwwroot/v1/index.md
@ -0,0 +1,21 @@
+# API for F5TTS wraped web server
+we apply following apis
+
+## addvoice
+
+* path: /v1/add_voice
+* method: POST
+* form data:
+	1 ref_text: text
+	2 ref_audio: vocal audio
+	3 speaker: speaker name for ref_audio voice
+
+examples
+```
+curl .../v1/add_voice \
+	-F "speaker=Trump" \
+	-F "ref_text=today is a good day" \
+	-F "ref_audio=@goodday.wav"
+```
+
+
--- a/wwwroot/v1/index.ui
+++ b/wwwroot/v1/index.ui
@ -0,0 +1,8 @@
+{
+	"widgettype":"MdWidget",
+	"options":{
+		"height":"100%",
+		"width":"100%",
+		"md_url":"{{entire_url('index.md')}}"
+	}
+}
--- a/wwwroot/v1/infer_stream/index.dspy
+++ b/wwwroot/v1/infer_stream/index.dspy
@ -0,0 +1,7 @@
+debug(f'{params_kw=}')
+async def g():
+	speaker = params_kw.speaker or 'main'
+	async for d in inference_stream(params_kw.prompt, speaker):
+		yield d
+
+return await stream_response(request, g)
--- a/wwwroot/v1/inference/index.dspy
+++ b/wwwroot/v1/inference/index.dspy
@ -0,0 +1,7 @@
+# normal mode
+debug(f'{params_kw=}')
+speaker = params_kw.speaker or 'main'
+path = await infer(params_kw.prompt, speaker)
+ret = entire_url(f'/idfile?path={path}')
+debug(f'inference/index.dspy:return url={ret}')
+return ret
				`@ -0,0 +1 @@`
				`return entire_url('/idfile') + "?path=/trhr"`