From 93d5786b15a035b756d13cc86cfdb964d0f20ef4 Mon Sep 17 00:00:00 2001 From: wangmeihua <13383952685@163.com> Date: Fri, 15 Aug 2025 10:45:48 +0800 Subject: [PATCH] rag --- rag/uapi_service.py | 30 +++++++----------------------- 1 file changed, 7 insertions(+), 23 deletions(-) diff --git a/rag/uapi_service.py b/rag/uapi_service.py index 21f2454..0c5ddcc 100644 --- a/rag/uapi_service.py +++ b/rag/uapi_service.py @@ -1,3 +1,4 @@ +from traceback import format_exc from appPublic.log import debug, error from typing import Dict, Any, List import uuid @@ -15,23 +16,12 @@ class APIService: log_prefix = f"request #{request_id} " if request_id else "" if not b: error(f"{log_prefix}{service_name} 返回空响应: upappid={upappid}, apiname={apiname}") - raise RuntimeError(f"{service_name} 返回空响应") + raise RuntimeError(f"{service_name} 返回空响应\n{format_exc()}") try: - response_text = b.decode('utf-8') - except UnicodeDecodeError as decode_err: - error(f"{log_prefix}{service_name} 响应解码失败: {str(decode_err)}, 原始响应: {b[:100]}") - raise RuntimeError(f"响应解码失败: {str(decode_err)}") - # 清理响应中的控制字符 - response_text = re.sub(r'[\x00-\x1F\x7F]', '', response_text) - debug(f"{log_prefix}{service_name} 原始响应: {response_text[:500]}") - try: - d = json.loads(response_text) - except json.JSONDecodeError as json_err: - error(f"{log_prefix}{service_name} JSON 解析失败: {str(json_err)}, 响应内容: {response_text[:500]}") - raise RuntimeError(f"JSON 解析失败: {str(json_err)}") - if not isinstance(d, dict): - error(f"{log_prefix}{service_name} 响应不是有效字典: {d}") - raise RuntimeError(f"{service_name} 响应格式错误") + d = json.loads(b.decode('utf-8')) + except Exception as e: + error( + f"request #{request_id} JSON 解析失败: {str(e)}, upappid={upappid}, apiname={apiname}\n{format_exc()}") return d # 嵌入服务 (BAAI/bge-m3) @@ -40,8 +30,6 @@ class APIService: request_id = str(uuid.uuid4()) debug(f"Request #{request_id} started for embeddings, texts={texts[:2]}") try: - # 清理输入文本 - texts = [re.sub(r'[\x00-\x1F\x7F]', '', text) for text in texts] uapi = UAPI(request, DictObject(**globals())) params_kw = {"input": texts} b = await uapi.call(upappid, apiname, user, params_kw) @@ -64,7 +52,6 @@ class APIService: try: if not query: raise ValueError("查询文本不能为空") - query = re.sub(r'[\x00-\x1F\x7F]', '', query) # 清理输入 uapi = UAPI(request, DictObject(**globals())) params_kw = {"query": query} b = await uapi.call(upappid, apiname, user, params_kw) @@ -86,7 +73,6 @@ class APIService: request_id = str(uuid.uuid4()) debug(f"Request #{request_id} started for triples extraction, text={text[:100]}") try: - text = re.sub(r'[\x00-\x1F\x7F]', '', text) # 清理输入 uapi = UAPI(request, DictObject(**globals())) params_kw = {"text": text} b = await uapi.call(upappid, apiname, user, params_kw) @@ -115,9 +101,8 @@ class APIService: top_n = len(results) else: top_n = min(top_n, len(results)) - documents = [re.sub(r'[\x00-\x1F\x7F]', '', result.get("text", str(result))) for result in results] - query = re.sub(r'[\x00-\x1F\x7F]', '', query) uapi = UAPI(request, DictObject(**globals())) + documents = [result.get("text", str(result)) for result in results] params_kw = { "model": "rerank-001", "query": query, @@ -232,7 +217,6 @@ class APIService: request_id = str(uuid.uuid4()) debug(f"Request #{request_id} started for Neo4j triplets matching, query={query[:100]}") try: - query = re.sub(r'[\x00-\x1F\x7F]', '', query) uapi = UAPI(request, DictObject(**globals())) params_kw = { "query": query,