From 86030862f03ded6231d15c5e20fcb59828d830cc Mon Sep 17 00:00:00 2001 From: yumoqing Date: Wed, 11 Feb 2026 17:24:58 +0800 Subject: [PATCH] bugfix --- llmage/init.py | 2 ++ llmage/llmclient.py | 21 ++++++++++++++------- 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/llmage/init.py b/llmage/init.py index 43eef82..4c004cb 100644 --- a/llmage/init.py +++ b/llmage/init.py @@ -6,6 +6,7 @@ from llmage.keling import keling_token from llmage.llmclient import ( b64media2url, get_llm, + inference_generator, inference, get_llmproviders, get_llms_sort_by_provider, @@ -21,6 +22,7 @@ def load_llmage(): env.b64media2url = b64media2url env.hex2base64 = hex2base64 env.inference = inference + env.inference_generator = inference_generator env.get_llms_by_catelog = get_llms_by_catelog env.get_llmcatelogs = get_llmcatelogs env.checkCustomerBalance = checkCustomerBalance diff --git a/llmage/llmclient.py b/llmage/llmclient.py index a5370ee..576b173 100644 --- a/llmage/llmclient.py +++ b/llmage/llmclient.py @@ -380,7 +380,7 @@ def b64media2url(request, mediafile): url = entire_url('/idfile?path=') + env.quote(mediafile) return url -async def inference(request, *args, params_kw=None, **kw): +async def inference_generator(request, *args, params_kw=None, **kw): env = request._run_ns.copy() if not params_kw: params_kw = env.params_kw @@ -390,6 +390,7 @@ async def inference(request, *args, params_kw=None, **kw): dbname = env.get_module_dbname('llmage') db = env.DBPools() async with db.sqlorContext(dbname) as sor: + f == None llm = await get_llm(llmid) if not params_kw.model: params_kw.model = llm.model @@ -397,11 +398,17 @@ async def inference(request, *args, params_kw=None, **kw): llm.stream = 'sync' if llm.stream == 'async': f = partial(async_uapi_request, request, llm, sor, params_kw=params_kw) - return await env.stream_response(request, f) - if llm.stream == 'sync': + elif llm.stream == 'sync': f = partial(sync_uapi_request, request, llm, sor, params_kw=params_kw) - return await env.stream_response(request, f) # env.update(llm) - uapi = UAPI(request, sor=sor) - f = partial(uapi_request, request, llm, sor, params_kw=params_kw) - return await env.stream_response(request, f) + else: + uapi = UAPI(request, sor=sor) + f = partial(uapi_request, request, llm, sor, params_kw=params_kw) + async for d in f(): + yield d + +async def inference(request, *args, params_kw=None, **kw): + env = request._run_ns.copy() + f = partial(inference_generator, *args, params_kw=params_kw, **kw) + return await env.stream_response(request, f) +