bugfix
This commit is contained in:
parent
e88b4f61c6
commit
86030862f0
@ -6,6 +6,7 @@ from llmage.keling import keling_token
|
||||
from llmage.llmclient import (
|
||||
b64media2url,
|
||||
get_llm,
|
||||
inference_generator,
|
||||
inference,
|
||||
get_llmproviders,
|
||||
get_llms_sort_by_provider,
|
||||
@ -21,6 +22,7 @@ def load_llmage():
|
||||
env.b64media2url = b64media2url
|
||||
env.hex2base64 = hex2base64
|
||||
env.inference = inference
|
||||
env.inference_generator = inference_generator
|
||||
env.get_llms_by_catelog = get_llms_by_catelog
|
||||
env.get_llmcatelogs = get_llmcatelogs
|
||||
env.checkCustomerBalance = checkCustomerBalance
|
||||
|
||||
@ -380,7 +380,7 @@ def b64media2url(request, mediafile):
|
||||
url = entire_url('/idfile?path=') + env.quote(mediafile)
|
||||
return url
|
||||
|
||||
async def inference(request, *args, params_kw=None, **kw):
|
||||
async def inference_generator(request, *args, params_kw=None, **kw):
|
||||
env = request._run_ns.copy()
|
||||
if not params_kw:
|
||||
params_kw = env.params_kw
|
||||
@ -390,6 +390,7 @@ async def inference(request, *args, params_kw=None, **kw):
|
||||
dbname = env.get_module_dbname('llmage')
|
||||
db = env.DBPools()
|
||||
async with db.sqlorContext(dbname) as sor:
|
||||
f == None
|
||||
llm = await get_llm(llmid)
|
||||
if not params_kw.model:
|
||||
params_kw.model = llm.model
|
||||
@ -397,11 +398,17 @@ async def inference(request, *args, params_kw=None, **kw):
|
||||
llm.stream = 'sync'
|
||||
if llm.stream == 'async':
|
||||
f = partial(async_uapi_request, request, llm, sor, params_kw=params_kw)
|
||||
return await env.stream_response(request, f)
|
||||
if llm.stream == 'sync':
|
||||
elif llm.stream == 'sync':
|
||||
f = partial(sync_uapi_request, request, llm, sor, params_kw=params_kw)
|
||||
return await env.stream_response(request, f)
|
||||
# env.update(llm)
|
||||
else:
|
||||
uapi = UAPI(request, sor=sor)
|
||||
f = partial(uapi_request, request, llm, sor, params_kw=params_kw)
|
||||
async for d in f():
|
||||
yield d
|
||||
|
||||
async def inference(request, *args, params_kw=None, **kw):
|
||||
env = request._run_ns.copy()
|
||||
f = partial(inference_generator, *args, params_kw=params_kw, **kw)
|
||||
return await env.stream_response(request, f)
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user