bugfix
This commit is contained in:
parent
e88b4f61c6
commit
86030862f0
@ -6,6 +6,7 @@ from llmage.keling import keling_token
|
|||||||
from llmage.llmclient import (
|
from llmage.llmclient import (
|
||||||
b64media2url,
|
b64media2url,
|
||||||
get_llm,
|
get_llm,
|
||||||
|
inference_generator,
|
||||||
inference,
|
inference,
|
||||||
get_llmproviders,
|
get_llmproviders,
|
||||||
get_llms_sort_by_provider,
|
get_llms_sort_by_provider,
|
||||||
@ -21,6 +22,7 @@ def load_llmage():
|
|||||||
env.b64media2url = b64media2url
|
env.b64media2url = b64media2url
|
||||||
env.hex2base64 = hex2base64
|
env.hex2base64 = hex2base64
|
||||||
env.inference = inference
|
env.inference = inference
|
||||||
|
env.inference_generator = inference_generator
|
||||||
env.get_llms_by_catelog = get_llms_by_catelog
|
env.get_llms_by_catelog = get_llms_by_catelog
|
||||||
env.get_llmcatelogs = get_llmcatelogs
|
env.get_llmcatelogs = get_llmcatelogs
|
||||||
env.checkCustomerBalance = checkCustomerBalance
|
env.checkCustomerBalance = checkCustomerBalance
|
||||||
|
|||||||
@ -380,7 +380,7 @@ def b64media2url(request, mediafile):
|
|||||||
url = entire_url('/idfile?path=') + env.quote(mediafile)
|
url = entire_url('/idfile?path=') + env.quote(mediafile)
|
||||||
return url
|
return url
|
||||||
|
|
||||||
async def inference(request, *args, params_kw=None, **kw):
|
async def inference_generator(request, *args, params_kw=None, **kw):
|
||||||
env = request._run_ns.copy()
|
env = request._run_ns.copy()
|
||||||
if not params_kw:
|
if not params_kw:
|
||||||
params_kw = env.params_kw
|
params_kw = env.params_kw
|
||||||
@ -390,6 +390,7 @@ async def inference(request, *args, params_kw=None, **kw):
|
|||||||
dbname = env.get_module_dbname('llmage')
|
dbname = env.get_module_dbname('llmage')
|
||||||
db = env.DBPools()
|
db = env.DBPools()
|
||||||
async with db.sqlorContext(dbname) as sor:
|
async with db.sqlorContext(dbname) as sor:
|
||||||
|
f == None
|
||||||
llm = await get_llm(llmid)
|
llm = await get_llm(llmid)
|
||||||
if not params_kw.model:
|
if not params_kw.model:
|
||||||
params_kw.model = llm.model
|
params_kw.model = llm.model
|
||||||
@ -397,11 +398,17 @@ async def inference(request, *args, params_kw=None, **kw):
|
|||||||
llm.stream = 'sync'
|
llm.stream = 'sync'
|
||||||
if llm.stream == 'async':
|
if llm.stream == 'async':
|
||||||
f = partial(async_uapi_request, request, llm, sor, params_kw=params_kw)
|
f = partial(async_uapi_request, request, llm, sor, params_kw=params_kw)
|
||||||
return await env.stream_response(request, f)
|
elif llm.stream == 'sync':
|
||||||
if llm.stream == 'sync':
|
|
||||||
f = partial(sync_uapi_request, request, llm, sor, params_kw=params_kw)
|
f = partial(sync_uapi_request, request, llm, sor, params_kw=params_kw)
|
||||||
return await env.stream_response(request, f)
|
|
||||||
# env.update(llm)
|
# env.update(llm)
|
||||||
uapi = UAPI(request, sor=sor)
|
else:
|
||||||
f = partial(uapi_request, request, llm, sor, params_kw=params_kw)
|
uapi = UAPI(request, sor=sor)
|
||||||
return await env.stream_response(request, f)
|
f = partial(uapi_request, request, llm, sor, params_kw=params_kw)
|
||||||
|
async for d in f():
|
||||||
|
yield d
|
||||||
|
|
||||||
|
async def inference(request, *args, params_kw=None, **kw):
|
||||||
|
env = request._run_ns.copy()
|
||||||
|
f = partial(inference_generator, *args, params_kw=params_kw, **kw)
|
||||||
|
return await env.stream_response(request, f)
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user