From 2aa3185f2e73a928094d7dbc90c15d50279c33c3 Mon Sep 17 00:00:00 2001 From: yumoqing Date: Mon, 30 Mar 2026 16:21:38 +0800 Subject: [PATCH] bugfix --- llmage/asyncinference.py | 13 ++++++++----- llmage/llmclient.py | 4 +++- llmage/syncinference.py | 7 ++++--- 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/llmage/asyncinference.py b/llmage/asyncinference.py index e5844df..5c4f51f 100644 --- a/llmage/asyncinference.py +++ b/llmage/asyncinference.py @@ -78,9 +78,6 @@ async def async_uapi_request(request, llm, sor, b = b.decode('utf-8') debug(f'task submited:{b}') d = DictObject(**json.loads(b)) - if d.status == 'FAILED': - e = Exception(f'resp={d} FFAILED') - raise e responsed_seconds = time.time() - start_timestamp finish_seconds = responsed_seconds llmusage = DictObject() @@ -90,22 +87,28 @@ async def async_uapi_request(request, llm, sor, llmusage.use_time = timestampstr() llmusage.userid = callerid llmusage.ioinfo = json.dumps({ - "input": params_kw + "input": params_kw, + 'output': [d] }) llmusage.taskid = d.taskid llmusage.transno = params_kw.transno llmusage.responsed_seconds = responsed_seconds llmusage.finish_seconds = finish_seconds - llmusage.status = 'CREATED' + llmusage.status = d.status llmusage.userorgid = callerorgid llmusage.ownerid = llm.orgid + llmusage.accounting_status = 'created' b = json.dumps(d, ensure_ascii=False) yield b await write_llmusage(llmusage) # if llm.callbackurl: # return + if d.status == 'FAILED': + e = Exception(f'resp={d} FFAILED') + raise e asyncio.create_task(query_task_status(request, llm.upappid, llm.query_apiname, luid, userid, d.taskid)) + yield d except Exception as e: exception(f'{e=},{format_exc()}') diff --git a/llmage/llmclient.py b/llmage/llmclient.py index 0a4f445..6be5ff4 100644 --- a/llmage/llmclient.py +++ b/llmage/llmclient.py @@ -105,6 +105,7 @@ async def uapi_request(request, llm, sor, callerid, callerorgid, params_kw=None) except Exception as e: e = Exception(f'{llm.pid} charging error{e}') exception(f'{e}') + llmusage.amount = llmusage.cost = 0 else: llmusage.amount = 0 llmusage.cost = 0 @@ -112,7 +113,8 @@ async def uapi_request(request, llm, sor, callerid, callerorgid, params_kw=None) llmusage.ownerid = llm.orgid llmusage.accounting_status = 'created' await write_llmusage(llmusage) - await llm_accounting(request, llmusage) + if llmusage.amount > 0.0001: + await llm_accounting(request, llmusage) except Exception as e: exception(f'{e=},{format_exc()}') diff --git a/llmage/syncinference.py b/llmage/syncinference.py index 119f96d..83ed9d1 100644 --- a/llmage/syncinference.py +++ b/llmage/syncinference.py @@ -37,7 +37,7 @@ async def sync_uapi_request(request, llm, sor, callerid, callerorgid, params_kw= b = b.decode('utf-8') d = json.loads(b) status = d.get('status') - usage = d.get('usage', {}) + usage = d.get('usage') if status and status != 'SUCCEEDED': raise Exception(d['error']) responsed_seconds = time.time() - start_timestamp @@ -51,7 +51,7 @@ async def sync_uapi_request(request, llm, sor, callerid, callerorgid, params_kw= llmusage.usage = json.dumps(usage) llmusage.ioinfo = json.dumps({ "input": params_kw, - "output": d + "output": [d] }) llmusage.transno = params_kw.transno llmusage.responsed_seconds = responsed_seconds @@ -77,7 +77,8 @@ async def sync_uapi_request(request, llm, sor, callerid, callerorgid, params_kw= b = json.dumps(d, ensure_ascii=False) yield b await write_llmusage(llmusage) - await llm_accounting(request, llmusage) + if llmusage.amount > 0.0001: + await llm_accounting(request, llmusage) except Exception as e: exception(f'{e=},{format_exc()}') estr = erase_apikey(e)