yumoqing df8aafe1d8 feat: add TTS and ASR audio API endpoints
- POST /v1/audio/speech (TTS): MiniMax Speech 2.6 Turbo/HD, 2.5 HD, F5-TTS local
- POST /v1/audio/transcriptions (ASR): qwen3-asr-flash, Nvidia parakeet
- Add comprehensive docs for both endpoints in API.md
- Update load_path.py RBAC (logined + customer roles)
2026-06-04 13:58:26 +08:00

75 lines
2.1 KiB
Plaintext

# OpenAI-compatible Text-to-Speech API
# POST /v1/audio/speech
# Required params: model, catelogid, prompt (text to synthesize)
# Optional params: speaker (voice_id), speed, emotion
#
# Example request:
# {
# "model": "speech-2.6-turbo",
# "catelogid": "tts",
# "prompt": "你好,欢迎使用语音合成服务",
# "speaker": "female-tianmei",
# "speed": 1.0,
# "emotion": "happy"
# }
#
# Response (stream, hex audio chunks):
# {
# "status": "SUCCEEDED",
# "audio": "base64_encoded_audio_data"
# }
userid = await get_user()
userorgid = await get_userorgid()
if userid is None:
debug('need login')
return openai_403()
# Validate required parameters
if not params_kw.model:
d = return_error('Missing required parameter: model')
return json_response(d, status=400)
if not params_kw.catelogid:
d = return_error('Missing required parameter: catelogid')
return json_response(d, status=400)
if not params_kw.prompt:
d = return_error('Missing required parameter: prompt (text to synthesize)')
return json_response(d, status=400)
lctype = params_kw.catelogid
env = request._run_ns
async with get_sor_context(env, 'llmage') as sor:
# Look up llm by model name and catalog type through llm_api_map
sql = """select distinct a.* from llm a
join llm_api_map m on a.id = m.llmid
join llmcatelog b on m.llmcatelogid = b.id
where (b.id = ${lctype}$ OR b.name = ${lctype}$)
and a.model=${model}$
and a.status = 'published'"""
recs = await sor.sqlExe(sql, {
'lctype': lctype,
'model': params_kw.model
})
if len(recs) == 0:
debug(f'{params_kw.model=} not found for catalog {lctype}')
return openai_400()
params_kw.llmid = recs[0].id
debug(f'{params_kw.llmid=}')
# Check balance
f = await checkCustomerBalance(params_kw.llmid, userid, userorgid)
if not f:
debug(f'{userid=} balance not enough')
return openai_429()
# Generate task ID and attach to params
if not params_kw.transno:
params_kw.transno = getID()
# Call inference (TTS can be stream or sync depending on model)
return await inference(request, env=env)