2026-04-01 13:00:15 +08:00

51 lines
1.3 KiB
Plaintext

async def gen():
env = request._run_ns.copy()
f = partial(inference_generator, request, params_kw=params_kw)
if params_kw.stream:
async for l in f():
yield b'data: ' + l + b'\n'
yield b'data: [DONE]'
else:
async for l in f():
yield l
debug(f'{params_kw=}')
lctype='文生文'
if params_kw.off_peak:
off_peak = params_kw.off_peak
if off_peak in [True, "Y" "y", 1, "1"]:
off_peak = True
else:
off_peak = False
params_kw.off_peak = off_peak
userid = await get_user()
userorgid = await get_userorgid()
if userid is None:
return openai_403()
if not params_kw.prompt and not params_kw.messages:
d = return_error('Missing need data(prompt or messages)')
return json_response(d, status=400)
env = request._run_ns
async with get_sor_context(env, 'llmage') as sor:
sql = """select a.* from llm a, llmcatelog b
where a.llmcatelogid=b.id
and a.model=${model}$
and b.name = ${lctype}$"""
recs = await sor.sqlExe(sql, {
'lctype': lctype,
'model': params_kw.model or 'qwen3-max'
})
if len(recs) == 0:
return openai_400()
params_kw.llmid = recs[0].id
f = await checkCustomerBalance(params_kw.llmid, userorgid)
if not f:
return openai_429()
return await env.stream_response(request, gen)
env = DictObject(**globals())
return await inference(request, env=env)