async def gen(): env = request._run_ns.copy() f = partial(inference_generator, request, params_kw=params_kw) if params_kw.stream: async for l in f(): yield f'data: {l}\n' yield 'data: [DONE]\n\n' else: async for l in f(): yield l debug_params('params_kw', params_kw) catelogid = params_kw.catelogid or 't2t' if params_kw.off_peak: off_peak = params_kw.off_peak if off_peak in [True, "Y" "y", 1, "1"]: off_peak = True else: off_peak = False params_kw.off_peak = off_peak userid = await get_user() userorgid = await get_userorgid() if userid is None: debug(f'need login') return openai_403() if not params_kw.prompt and not params_kw.messages: debug(f'missing prompt and messages, model={params_kw.model}') d = return_error('Missing need data(prompt or messages)') return json_response(d, status=400) env = request._run_ns async with get_sor_context(env, 'llmage') as sor: sql = """select distinct a.* from llm a join llm_api_map m on a.id = m.llmid join llmcatelog b on m.llmcatelogid = b.id where (b.id = ${catelogid}$ OR b.name = ${catelogid}$) and a.model=${model}$ and a.status = 'published'""" recs = await sor.sqlExe(sql, { 'catelogid': catelogid, 'model': params_kw.model or 'qwen3-max' }) if len(recs) == 0: debug(f'{params_kw.model=} not found') return openai_400() params_kw.llmid = recs[0].id debug(f'{params_kw.llmid=}') f = await checkCustomerBalance(params_kw.llmid, userid, userorgid) if not f: debug(f'{userid=} balance not enough') return openai_429() # debug(f'{tools=}, {request._run_ns.tools=}') return await env.stream_response(request, gen, content_type='application/json')