56 lines
1.6 KiB
Plaintext
56 lines
1.6 KiB
Plaintext
async def gen():
|
|
env = request._run_ns.copy()
|
|
f = partial(inference_generator, request, params_kw=params_kw)
|
|
if params_kw.stream:
|
|
async for l in f():
|
|
yield f'data: {l}\n'
|
|
yield 'data: [DONE]\n\n'
|
|
else:
|
|
async for l in f():
|
|
yield l
|
|
|
|
debug_params('params_kw', params_kw)
|
|
lctype='文生文'
|
|
if params_kw.off_peak:
|
|
off_peak = params_kw.off_peak
|
|
if off_peak in [True, "Y" "y", 1, "1"]:
|
|
off_peak = True
|
|
else:
|
|
off_peak = False
|
|
params_kw.off_peak = off_peak
|
|
userid = await get_user()
|
|
userorgid = await get_userorgid()
|
|
if userid is None:
|
|
debug(f'need login')
|
|
return openai_403()
|
|
|
|
if not params_kw.prompt and not params_kw.messages:
|
|
debug(f'missing prompt and messages, model={params_kw.model}')
|
|
d = return_error('Missing need data(prompt or messages)')
|
|
return json_response(d, status=400)
|
|
env = request._run_ns
|
|
async with get_sor_context(env, 'llmage') as sor:
|
|
sql = """select distinct a.* from llm a
|
|
join llm_api_map m on a.id = m.llmid
|
|
join llmcatelog b on m.llmcatelogid = b.id
|
|
where b.name = ${lctype}$
|
|
and a.model=${model}$
|
|
and a.status = 'published'"""
|
|
recs = await sor.sqlExe(sql, {
|
|
'lctype': lctype,
|
|
'model': params_kw.model or 'qwen3-max'
|
|
})
|
|
if len(recs) == 0:
|
|
debug(f'{params_kw.model=} not found')
|
|
return openai_400()
|
|
params_kw.llmid = recs[0].id
|
|
|
|
debug(f'{params_kw.llmid=}')
|
|
f = await checkCustomerBalance(params_kw.llmid, userid, userorgid)
|
|
if not f:
|
|
debug(f'{userid=} balance not enough')
|
|
return openai_429()
|
|
# debug(f'{tools=}, {request._run_ns.tools=}')
|
|
return await env.stream_response(request, gen, content_type='application/json')
|
|
|