bugfix

2025-09-11 13:46:48 +08:00 · 2025-09-11 13:46:48 +08:00 · 9e7ff6c71f
commit 9e7ff6c71f
parent 2e81611868
1 changed files with 425 additions and 438 deletions
--- a/rag/folderinfo.py
+++ b/rag/folderinfo.py
@ -20,487 +20,474 @@ from typing import List, Dict, Any
 import json
 class RagFileMgr(FileMgr):
-    async def get_folder_ownerid(self, sor):
+	async def get_folder_ownerid(self, sor):
-       fiid = self.fiid
+		fiid = self.fiid
-       recs = await sor.R('kdb', {'id': self.fiid})
+		recs = await sor.R('kdb', {'id': self.fiid})
-       if len(recs) > 0:
+		if len(recs) > 0:
-          return recs[0].orgid
+			return recs[0].orgid
-       return None
+		return None
-    async def get_organization_quota(self, sor, orgid):
+	async def get_organization_quota(self, sor, orgid):
-       sql = """select a.* from ragquota a, kdb b
+		sql = """select a.* from ragquota a, kdb b
 where a.orgid = b.orgid
-    and b.id = ${id}$
+	and b.id = ${id}$
-    and ${today}$ >= a.enabled_date
+	and ${today}$ >= a.enabled_date
-    and ${today}$ < a.expired_date
+	and ${today}$ < a.expired_date
 """
-       recs = await sor.sqlExe(sql, {
+		recs = await sor.sqlExe(sql, {
-             'id': self.fiid,
+			 'id': self.fiid,
-             'today': curDateString()
+			 'today': curDateString()
-       })
+		})
-       if len(recs) > 0:
+		if len(recs) > 0:
-          r = recs[0]
+			r = recs[0]
-          return r.quota, r.expired_date
+			return r.quota, r.expired_date
-       return None, None
+		return None, None
-    async def get_service_params(self,orgid):
+	async def get_service_params(self,sor, orgid):
-       """ 根据 orgid 从数据库获取服务参数 (仅 upappid)，假设 service_opts 表返回单条记录。 """
+		""" 根据 orgid 从数据库获取服务参数 (仅 upappid)，假设 service_opts 表返回单条记录。 """
-       db = DBPools()
+		sql_opts = """
-       dbname = "kyrag"
+		SELECT embedding_id, vdb_id, reranker_id, triples_id, gdb_id, entities_id 
 		FROM service_opts 
 		WHERE orgid = ${orgid}$
 		"""
 		opts_result = await sor.sqlExe(sql_opts, {"orgid": orgid})
 		if not opts_result:
 			error(f"未找到 orgid={orgid} 的服务配置")
 			return None
 		opts = opts_result[0]
-       sql_opts = """
+		# 收集服务 ID
-        SELECT embedding_id, vdb_id, reranker_id, triples_id, gdb_id, entities_id 
+		service_ids = set()
-        FROM service_opts 
+		for key in ['embedding_id', 'vdb_id', 'reranker_id', 'triples_id', 'gdb_id', 'entities_id']:
-        WHERE orgid = ${orgid}$
+		if opts[key]:
-        """
+			service_ids.add(opts[key])
       try:
          async with db.sqlorContext(dbname) as sor:
             opts_result = await sor.sqlExe(sql_opts, {"orgid": orgid})
             if not opts_result:
                error(f"未找到 orgid={orgid} 的服务配置")
                return None
             opts = opts_result[0]
       except Exception as e:
          error(f"查询 service_opts 失败: {str(e)}, 堆栈: {traceback.format_exc()}")
          return None
-       # 收集服务 ID
+		# 检查 service_ids 是否为空
-       service_ids = set()
+		if not service_ids:
-       for key in ['embedding_id', 'vdb_id', 'reranker_id', 'triples_id', 'gdb_id', 'entities_id']:
+			error(f"未找到任何服务 ID for orgid={orgid}")
-          if opts[key]:
+			return None
             service_ids.add(opts[key])
-       # 检查 service_ids 是否为空
+		# 手动构造 IN 子句的 ID 列表
-       if not service_ids:
+		id_list = ','.join([f"'{id}'" for id in service_ids])	# 确保每个 ID 被单引号包裹
-          error(f"未找到任何服务 ID for orgid={orgid}")
+		sql_services = """
-          return None
+		SELECT id, name, upappid 
 		FROM ragservices 
 		WHERE id IN ${id_list}$
 		"""
 		services_result = await sor.sqlExe(sql_services, {'id_list': id_list})
 		if not services_result:
 			error(f"未找到服务 ID {service_ids} 的 ragservices 配置")
 			return None
-       # 手动构造 IN 子句的 ID 列表
+		# 构建服务参数字典，基于 name 字段匹配，仅存储 upappid
-       id_list = ','.join([f"'{id}'" for id in service_ids])  # 确保每个 ID 被单引号包裹
+		service_params = {
-       sql_services = f"""
+			'embedding': None,
-        SELECT id, name, upappid 
+			'vdb': None,
-        FROM ragservices 
+			'reranker': None,
-        WHERE id IN ({id_list})
+			'triples': None,
-        """
+			'gdb': None,
-       try:
+			'entities': None
-          async with db.sqlorContext(dbname) as sor:
+		}
-             services_result = await sor.sqlExe(sql_services, {})
+		for service in services_result:
-             if not services_result:
+		name = service['name']
-                error(f"未找到服务 ID {service_ids} 的 ragservices 配置")
+		if name == 'bgem3嵌入':
-                return None
+			service_params['embedding'] = service['upappid']
 		elif name == 'milvus向量检索':
 			service_params['vdb'] = service['upappid']
 		elif name == 'bgem2v3重排':
 			service_params['reranker'] = service['upappid']
 		elif name == 'mrebel三元组抽取':
 			service_params['triples'] = service['upappid']
 		elif name == 'neo4j删除知识库':
 			service_params['gdb'] = service['upappid']
 		elif name == 'small实体抽取':
 			service_params['entities'] = service['upappid']
-             # 构建服务参数字典，基于 name 字段匹配，仅存储 upappid
+		# 检查是否所有服务参数都已填充
-             service_params = {
+		missing_services = [k for k, v in service_params.items() if v is None]
-                'embedding': None,
+		if missing_services:
-                'vdb': None,
+			error(f"未找到以下服务的配置: {missing_services}")
-                'reranker': None,
+			return None
-                'triples': None,
+		return service_params
                'gdb': None,
                'entities': None
             }
             for service in services_result:
                name = service['name']
                if name == 'bgem3嵌入':
                   service_params['embedding'] = service['upappid']
                elif name == 'milvus向量检索':
                   service_params['vdb'] = service['upappid']
                elif name == 'bgem2v3重排':
                   service_params['reranker'] = service['upappid']
                elif name == 'mrebel三元组抽取':
                   service_params['triples'] = service['upappid']
                elif name == 'neo4j删除知识库':
                   service_params['gdb'] = service['upappid']
                elif name == 'small实体抽取':
                   service_params['entities'] = service['upappid']
-             # 检查是否所有服务参数都已填充
+	async def file_uploaded(self, request, ns, userid):
-             missing_services = [k for k, v in service_params.items() if v is None]
+		"""将文档插入 Milvus 并抽取三元组到 Neo4j"""
-             if missing_services:
+		debug(f'Received ns: {ns=}')
-                error(f"未找到以下服务的配置: {missing_services}")
+		env = request._run_ns
-                return None
+		realpath = ns.get('realpath', '')
 		fiid = ns.get('fiid', '')
 		id = ns.get('id', '')
 		orgid = ns.get('ownerid', '')
 		hashvalue = ns.get('hashvalue', '')
 		db_type = ''
-             return service_params
+		api_service = APIService()
-       except Exception as e:
+		debug(
-          error(f"查询 ragservices 失败: {str(e)}, 堆栈: {traceback.format_exc()}")
+			f'Inserting document: file_path={realpath}, userid={orgid}, db_type={db_type}, knowledge_base_id={fiid}, document_id={id}')
          return None
-    async def file_uploaded(self, request, ns, userid):
+		timings = {}
-       """将文档插入 Milvus 并抽取三元组到 Neo4j"""
+		start_total = time.time()
       debug(f'Received ns: {ns=}')
       realpath = ns.get('realpath', '')
       fiid = ns.get('fiid', '')
       id = ns.get('id', '')
       orgid = ns.get('ownerid', '')
       hashvalue = ns.get('hashvalue', '')
       db_type = ''
-       api_service = APIService()
+		try:
-       debug(
+			if not orgid or not fiid or not id:
-          f'Inserting document: file_path={realpath}, userid={orgid}, db_type={db_type}, knowledge_base_id={fiid}, document_id={id}')
+			 raise ValueError("orgid、fiid 和 id 不能为空")
 			if len(orgid) > 32 or len(fiid) > 255:
 			 raise ValueError("orgid 或 fiid 的长度超出限制")
 			if not os.path.exists(realpath):
 			 raise ValueError(f"文件 {realpath} 不存在")
-       timings = {}
+			# 检查 hashvalue 是否已存在
-       start_total = time.time()
+			db = DBPools()
 			dbname = env.get_module_dbname('rag')
 			sql_check_hash = """
 				SELECT hashvalue 
 				FROM file 
 				WHERE hashvalue = ${hashvalue}$
 			"""
 			async with db.sqlorContext(dbname) as sor:
 			 hash_result = await sor.sqlExe(sql_check_hash, {"hashvalue": hashvalue})
 			 if hash_result:
 				debug(f"文件已存在: hashvalue={hashvalue}")
 				timings["total"] = time.time() - start_total
 				return {
 					"status": "error",
 					"document_id": id,
 					"collection_name": "ragdb",
 					"timings": timings,
 					"message": f"文件已存在: hashvalue={hashvalue}",
 					"status_code": 400
 				}
-       try:
+			# 获取服务参数
-          if not orgid or not fiid or not id:
+			service_params = await self.get_service_params(sor, orgid)
-             raise ValueError("orgid、fiid 和 id 不能为空")
+			if not service_params:
-          if len(orgid) > 32 or len(fiid) > 255:
+			 raise ValueError("无法获取服务参数")
             raise ValueError("orgid 或 fiid 的长度超出限制")
          if not os.path.exists(realpath):
             raise ValueError(f"文件 {realpath} 不存在")
-          # 检查 hashvalue 是否已存在
+			supported_formats = {'pdf', 'docx', 'xlsx', 'pptx', 'csv', 'txt'}
-          db = DBPools()
+			ext = realpath.rsplit('.', 1)[1].lower() if '.' in realpath else ''
-          dbname = "kyrag"
+			if ext not in supported_formats:
-          sql_check_hash = """
+			 raise ValueError(f"不支持的文件格式: {ext}, 支持的格式: {', '.join(supported_formats)}")
                SELECT hashvalue 
                FROM file 
                WHERE hashvalue = ${hashvalue}$
            """
          async with db.sqlorContext(dbname) as sor:
             hash_result = await sor.sqlExe(sql_check_hash, {"hashvalue": hashvalue})
             if hash_result:
                debug(f"文件已存在: hashvalue={hashvalue}")
                timings["total"] = time.time() - start_total
                return {
                   "status": "error",
                   "document_id": id,
                   "collection_name": "ragdb",
                   "timings": timings,
                   "message": f"文件已存在: hashvalue={hashvalue}",
                   "status_code": 400
                }
-          # 获取服务参数
+			debug(f"加载文件: {realpath}")
-          service_params = await self.get_service_params(orgid)
+			start_load = time.time()
-          if not service_params:
+			text = fileloader(realpath)
-             raise ValueError("无法获取服务参数")
+			# debug(f"处理后的文件内容是：{text=}")
 			text = re.sub(r'[^\u4e00-\u9fa5a-zA-Z0-9\s.;,\n/]', '', text)
 			timings["load_file"] = time.time() - start_load
 			debug(f"加载文件耗时: {timings['load_file']:.2f} 秒, 文本长度: {len(text)}")
 			if not text or not text.strip():
 			 raise ValueError(f"文件 {realpath} 加载为空")
-          supported_formats = {'pdf', 'docx', 'xlsx', 'pptx', 'csv', 'txt'}
+			document = Document(page_content=text)
-          ext = realpath.rsplit('.', 1)[1].lower() if '.' in realpath else ''
+			text_splitter = RecursiveCharacterTextSplitter(
-          if ext not in supported_formats:
+			 chunk_size=500,
-             raise ValueError(f"不支持的文件格式: {ext}, 支持的格式: {', '.join(supported_formats)}")
+			 chunk_overlap=100,
 			 length_function=len)
 			debug("开始分片文件内容")
 			start_split = time.time()
 			chunks = text_splitter.split_documents([document])
 			timings["split_text"] = time.time() - start_split
 			debug(
 			 f"文本分片耗时: {timings['split_text']:.2f} 秒, 分片数量: {len(chunks)}, 分片内容: {[chunk.page_content[:50] for chunk in chunks[:5]]}")
 			debug(f"分片内容: {[chunk.page_content[:100] + '...' for chunk in chunks]}")
 			if not chunks:
 			 raise ValueError(f"文件 {realpath} 未生成任何文档块")
-          debug(f"加载文件: {realpath}")
+			filename = os.path.basename(realpath).rsplit('.', 1)[0]
-          start_load = time.time()
+			upload_time = datetime.now().isoformat()
          text = fileloader(realpath)
          # debug(f"处理后的文件内容是：{text=}")
          text = re.sub(r'[^\u4e00-\u9fa5a-zA-Z0-9\s.;,\n/]', '', text)
          timings["load_file"] = time.time() - start_load
          debug(f"加载文件耗时: {timings['load_file']:.2f} 秒, 文本长度: {len(text)}")
          if not text or not text.strip():
             raise ValueError(f"文件 {realpath} 加载为空")
-          document = Document(page_content=text)
+			debug("调用嵌入服务生成向量")
-          text_splitter = RecursiveCharacterTextSplitter(
+			start_embedding = time.time()
-             chunk_size=500,
+			texts = [chunk.page_content for chunk in chunks]
-             chunk_overlap=100,
+			embeddings = []
-             length_function=len)
+			for i in range(0, len(texts), 10):	# 每次处理 10 个文本块
-          debug("开始分片文件内容")
+			 batch_texts = texts[i:i + 10]
-          start_split = time.time()
+			 batch_embeddings = await api_service.get_embeddings(
-          chunks = text_splitter.split_documents([document])
+				request=request,
-          timings["split_text"] = time.time() - start_split
+				texts=batch_texts,
-          debug(
+				upappid=service_params['embedding'],
-             f"文本分片耗时: {timings['split_text']:.2f} 秒, 分片数量: {len(chunks)}, 分片内容: {[chunk.page_content[:50] for chunk in chunks[:5]]}")
+				apiname="BAAI/bge-m3",
-          debug(f"分片内容: {[chunk.page_content[:100] + '...' for chunk in chunks]}")
+				user=userid
-          if not chunks:
+			 )
-             raise ValueError(f"文件 {realpath} 未生成任何文档块")
+			 embeddings.extend(batch_embeddings)
 			if not embeddings or not all(len(vec) == 1024 for vec in embeddings):
 			 raise ValueError("所有嵌入向量必须是长度为 1024 的浮点数列表")
 			timings["generate_embeddings"] = time.time() - start_embedding
 			debug(f"生成嵌入向量耗时: {timings['generate_embeddings']:.2f} 秒, 嵌入数量: {len(embeddings)}")
-          filename = os.path.basename(realpath).rsplit('.', 1)[0]
+			chunks_data = []
-          upload_time = datetime.now().isoformat()
+			for i, chunk in enumerate(chunks):
 			 chunks_data.append({
 				"userid": orgid,
 				"knowledge_base_id": fiid,
 				"text": chunk.page_content,
 				"vector": embeddings[i],
 				"document_id": id,
 				"filename": filename + '.' + ext,
 				"file_path": realpath,
 				"upload_time": upload_time,
 				"file_type": ext,
 			 })
-          debug("调用嵌入服务生成向量")
+			debug(f"调用插入文件端点: {realpath}")
-          start_embedding = time.time()
+			start_milvus = time.time()
-          texts = [chunk.page_content for chunk in chunks]
+			for i in range(0, len(chunks_data), 10):	# 每次处理 10 条数据
-          embeddings = []
+			 batch_chunks = chunks_data[i:i + 10]
-          for i in range(0, len(texts), 10):  # 每次处理 10 个文本块
+			 result = await api_service.milvus_insert_document(
-             batch_texts = texts[i:i + 10]
+				request=request,
-             batch_embeddings = await api_service.get_embeddings(
+				chunks=batch_chunks,
-                request=request,
+				db_type=db_type,
-                texts=batch_texts,
+				upappid=service_params['vdb'],
-                upappid=service_params['embedding'],
+				apiname="milvus/insertdocument",
-                apiname="BAAI/bge-m3",
+				user=userid
-                user=userid
+			 )
-             )
+			 if result.get("status") != "success":
-             embeddings.extend(batch_embeddings)
+				raise ValueError(result.get("message", "Milvus 插入失败"))
-          if not embeddings or not all(len(vec) == 1024 for vec in embeddings):
+			timings["insert_milvus"] = time.time() - start_milvus
-             raise ValueError("所有嵌入向量必须是长度为 1024 的浮点数列表")
+			debug(f"Milvus 插入耗时: {timings['insert_milvus']:.2f} 秒")
          timings["generate_embeddings"] = time.time() - start_embedding
          debug(f"生成嵌入向量耗时: {timings['generate_embeddings']:.2f} 秒, 嵌入数量: {len(embeddings)}")
-          chunks_data = []
+			if result.get("status") != "success":
-          for i, chunk in enumerate(chunks):
+			 timings["total"] = time.time() - start_total
-             chunks_data.append({
+			 return {"status": "error", "document_id": id, "timings": timings,
-                "userid": orgid,
+					"message": result.get("message", "未知错误"), "status_code": 400}
                "knowledge_base_id": fiid,
                "text": chunk.page_content,
                "vector": embeddings[i],
                "document_id": id,
                "filename": filename + '.' + ext,
                "file_path": realpath,
                "upload_time": upload_time,
                "file_type": ext,
             })
-          debug(f"调用插入文件端点: {realpath}")
+			debug("调用三元组抽取服务")
-          start_milvus = time.time()
+			start_triples = time.time()
-          for i in range(0, len(chunks_data), 10):  # 每次处理 10 条数据
+			unique_triples = []
-             batch_chunks = chunks_data[i:i + 10]
+			try:
-             result = await api_service.milvus_insert_document(
+			 chunk_texts = [doc.page_content for doc in chunks]
-                request=request,
+			 debug(f"处理 {len(chunk_texts)} 个分片进行三元组抽取")
-                chunks=batch_chunks,
+			 triples = []
-                db_type=db_type,
+			 for i, chunk in enumerate(chunk_texts):
-                upappid=service_params['vdb'],
+				result = await api_service.extract_triples(
-                apiname="milvus/insertdocument",
+					request=request,
-                user=userid
+					text=chunk,
-             )
+					upappid=service_params['triples'],
-             if result.get("status") != "success":
+					apiname="Babelscape/mrebel-large",
-                raise ValueError(result.get("message", "Milvus 插入失败"))
+					user=userid
-          timings["insert_milvus"] = time.time() - start_milvus
+				)
-          debug(f"Milvus 插入耗时: {timings['insert_milvus']:.2f} 秒")
+				if isinstance(result, list):
 					triples.extend(result)
 					debug(f"分片 {i + 1} 抽取到 {len(result)} 个三元组: {result[:5]}")
 				else:
 					error(f"分片 {i + 1} 处理失败: {str(result)}")
-          if result.get("status") != "success":
+			 seen = set()
-             timings["total"] = time.time() - start_total
+			 for t in triples:
-             return {"status": "error", "document_id": id, "timings": timings,
+				identifier = (t['head'].lower(), t['tail'].lower(), t['type'].lower())
-                   "message": result.get("message", "未知错误"), "status_code": 400}
+				if identifier not in seen:
 					seen.add(identifier)
 					unique_triples.append(t)
 				else:
 					for existing in unique_triples:
 						if (existing['head'].lower() == t['head'].lower() and
 							existing['tail'].lower() == t['tail'].lower() and
 							len(t['type']) > len(existing['type'])):
 						 unique_triples.remove(existing)
 						 unique_triples.append(t)
 						 debug(f"替换三元组为更具体类型: {t}")
 						 break
-          debug("调用三元组抽取服务")
+			 timings["extract_triples"] = time.time() - start_triples
-          start_triples = time.time()
+			 debug(
-          unique_triples = []
+				f"三元组抽取耗时: {timings['extract_triples']:.2f} 秒, 抽取到 {len(unique_triples)} 个三元组: {unique_triples[:5]}")
          try:
             chunk_texts = [doc.page_content for doc in chunks]
             debug(f"处理 {len(chunk_texts)} 个分片进行三元组抽取")
             triples = []
             for i, chunk in enumerate(chunk_texts):
                result = await api_service.extract_triples(
                   request=request,
                   text=chunk,
                   upappid=service_params['triples'],
                   apiname="Babelscape/mrebel-large",
                   user=userid
                )
                if isinstance(result, list):
                   triples.extend(result)
                   debug(f"分片 {i + 1} 抽取到 {len(result)} 个三元组: {result[:5]}")
                else:
                   error(f"分片 {i + 1} 处理失败: {str(result)}")
-             seen = set()
+			 if unique_triples:
-             for t in triples:
+				debug(f"抽取到 {len(unique_triples)} 个三元组，调用 Neo4j 服务插入")
-                identifier = (t['head'].lower(), t['tail'].lower(), t['type'].lower())
+				start_neo4j = time.time()
-                if identifier not in seen:
+				for i in range(0, len(unique_triples), 30):	# 每次插入 30 个三元组
-                   seen.add(identifier)
+					batch_triples = unique_triples[i:i + 30]
-                   unique_triples.append(t)
+					neo4j_result = await api_service.neo4j_insert_triples(
-                else:
+						request=request,
-                   for existing in unique_triples:
+						triples=batch_triples,
-                      if (existing['head'].lower() == t['head'].lower() and
+						document_id=id,
-                            existing['tail'].lower() == t['tail'].lower() and
+						knowledge_base_id=fiid,
-                            len(t['type']) > len(existing['type'])):
+						userid=orgid,
-                         unique_triples.remove(existing)
+						upappid=service_params['gdb'],
-                         unique_triples.append(t)
+						apiname="neo4j/inserttriples",
-                         debug(f"替换三元组为更具体类型: {t}")
+						user=userid
-                         break
+					)
 					debug(f"Neo4j 服务响应: {neo4j_result}")
 					if neo4j_result.get("status") != "success":
 						timings["insert_neo4j"] = time.time() - start_neo4j
 						timings["total"] = time.time() - start_total
 						return {
 						 "status": "error",
 						 "document_id": id,
 						 "collection_name": "ragdb",
 						 "timings": timings,
 						 "message": f"Neo4j 三元组插入失败: {neo4j_result.get('message', '未知错误')}",
 						 "status_code": 400
 						}
 					info(f"文件 {realpath} 三元组成功插入 Neo4j: {neo4j_result.get('message')}")
 				timings["insert_neo4j"] = time.time() - start_neo4j
 				debug(f"Neo4j 插入耗时: {timings['insert_neo4j']:.2f} 秒")
 			 else:
 				debug(f"文件 {realpath} 未抽取到三元组")
 				timings["insert_neo4j"] = 0.0
-             timings["extract_triples"] = time.time() - start_triples
+			except Exception as e:
-             debug(
+			 timings["extract_triples"] = time.time() - start_triples if "extract_triples" not in timings else \
-                f"三元组抽取耗时: {timings['extract_triples']:.2f} 秒, 抽取到 {len(unique_triples)} 个三元组: {unique_triples[:5]}")
+			 timings["extract_triples"]
 			 timings["insert_neo4j"] = time.time() - start_neo4j if "insert_neo4j" not in timings else timings[
 				"insert_neo4j"]
 			 debug(f"处理三元组或 Neo4j 插入失败: {str(e)}, 堆栈: {traceback.format_exc()}")
 			 timings["total"] = time.time() - start_total
 			 return {
 				"status": "success",
 				"document_id": id,
 				"collection_name": "ragdb",
 				"timings": timings,
 				"unique_triples": unique_triples,
 				"message": f"文件 {realpath} 成功嵌入，但三元组处理或 Neo4j 插入失败: {str(e)}",
 				"status_code": 200
 			 }
-             if unique_triples:
+			timings["total"] = time.time() - start_total
-                debug(f"抽取到 {len(unique_triples)} 个三元组，调用 Neo4j 服务插入")
+			debug(f"总耗时: {timings['total']:.2f} 秒")
-                start_neo4j = time.time()
+			return {
-                for i in range(0, len(unique_triples), 30):  # 每次插入 30 个三元组
+			 "status": "success",
-                   batch_triples = unique_triples[i:i + 30]
+			 "userid": orgid,
-                   neo4j_result = await api_service.neo4j_insert_triples(
+			 "document_id": id,
-                      request=request,
+			 "collection_name": "ragdb",
-                      triples=batch_triples,
+			 "timings": timings,
-                      document_id=id,
+			 "unique_triples": unique_triples,
-                      knowledge_base_id=fiid,
+			 "message": f"文件 {realpath} 成功嵌入并处理三元组",
-                      userid=orgid,
+			 "status_code": 200
-                      upappid=service_params['gdb'],
+			}
                      apiname="neo4j/inserttriples",
                      user=userid
                   )
                   debug(f"Neo4j 服务响应: {neo4j_result}")
                   if neo4j_result.get("status") != "success":
                      timings["insert_neo4j"] = time.time() - start_neo4j
                      timings["total"] = time.time() - start_total
                      return {
                         "status": "error",
                         "document_id": id,
                         "collection_name": "ragdb",
                         "timings": timings,
                         "message": f"Neo4j 三元组插入失败: {neo4j_result.get('message', '未知错误')}",
                         "status_code": 400
                      }
                   info(f"文件 {realpath} 三元组成功插入 Neo4j: {neo4j_result.get('message')}")
                timings["insert_neo4j"] = time.time() - start_neo4j
                debug(f"Neo4j 插入耗时: {timings['insert_neo4j']:.2f} 秒")
             else:
                debug(f"文件 {realpath} 未抽取到三元组")
                timings["insert_neo4j"] = 0.0
-          except Exception as e:
+		except Exception as e:
-             timings["extract_triples"] = time.time() - start_triples if "extract_triples" not in timings else \
+			error(f"插入文档失败: {str(e)}, 堆栈: {traceback.format_exc()}")
-             timings["extract_triples"]
+			timings["total"] = time.time() - start_total
-             timings["insert_neo4j"] = time.time() - start_neo4j if "insert_neo4j" not in timings else timings[
+			return {
-                "insert_neo4j"]
+			 "status": "error",
-             debug(f"处理三元组或 Neo4j 插入失败: {str(e)}, 堆栈: {traceback.format_exc()}")
+			 "document_id": id,
-             timings["total"] = time.time() - start_total
+			 "collection_name": "ragdb",
-             return {
+			 "timings": timings,
-                "status": "success",
+			 "message": f"插入文档失败: {str(e)}",
-                "document_id": id,
+			 "status_code": 400
-                "collection_name": "ragdb",
+			}
                "timings": timings,
                "unique_triples": unique_triples,
                "message": f"文件 {realpath} 成功嵌入，但三元组处理或 Neo4j 插入失败: {str(e)}",
                "status_code": 200
             }
-          timings["total"] = time.time() - start_total
+	async def file_deleted(self, request, recs, userid):
-          debug(f"总耗时: {timings['total']:.2f} 秒")
+		"""删除用户指定文件数据，包括 Milvus 和 Neo4j 中的记录"""
-          return {
+		if not isinstance(recs, list):
-             "status": "success",
+			recs = [recs]	# 确保 recs 是列表，即使传入单个记录
-             "userid": orgid,
+		results = []
-             "document_id": id,
+		api_service = APIService()
-             "collection_name": "ragdb",
+		total_nodes_deleted = 0
-             "timings": timings,
+		total_rels_deleted = 0
             "unique_triples": unique_triples,
             "message": f"文件 {realpath} 成功嵌入并处理三元组",
             "status_code": 200
          }
-       except Exception as e:
+		for rec in recs:
-          error(f"插入文档失败: {str(e)}, 堆栈: {traceback.format_exc()}")
+			id = rec.get('id', '')
-          timings["total"] = time.time() - start_total
+			realpath = rec.get('realpath', '')
-          return {
+			fiid = rec.get('fiid', '')
-             "status": "error",
+			orgid = rec.get('ownerid', '')
-             "document_id": id,
+			db_type = ''
-             "collection_name": "ragdb",
+			collection_name = "ragdb" if not db_type else f"ragdb_{db_type}"
             "timings": timings,
             "message": f"插入文档失败: {str(e)}",
             "status_code": 400
          }
-    async def file_deleted(self, request, recs, userid):
+			try:
-       """删除用户指定文件数据，包括 Milvus 和 Neo4j 中的记录"""
+			 required_fields = ['id', 'realpath', 'fiid', 'ownerid']
-       if not isinstance(recs, list):
+			 missing_fields = [field for field in required_fields if not rec.get(field, '')]
-          recs = [recs]  # 确保 recs 是列表，即使传入单个记录
+			 if missing_fields:
-       results = []
+				raise ValueError(f"缺少必填字段: {', '.join(missing_fields)}")
       api_service = APIService()
       total_nodes_deleted = 0
       total_rels_deleted = 0
-       for rec in recs:
+			 # 获取服务参数
-          id = rec.get('id', '')
+			 service_params = await self.get_service_params(sor, orgid)
-          realpath = rec.get('realpath', '')
+			 if not service_params:
-          fiid = rec.get('fiid', '')
+				raise ValueError("无法获取服务参数")
          orgid = rec.get('ownerid', '')
          db_type = ''
          collection_name = "ragdb" if not db_type else f"ragdb_{db_type}"
-          try:
+			 debug(
-             required_fields = ['id', 'realpath', 'fiid', 'ownerid']
+				f"调用删除文件端点: userid={orgid}, file_path={realpath}, knowledge_base_id={fiid}, document_id={id}")
-             missing_fields = [field for field in required_fields if not rec.get(field, '')]
+			 milvus_result = await api_service.milvus_delete_document(
-             if missing_fields:
+				request=request,
-                raise ValueError(f"缺少必填字段: {', '.join(missing_fields)}")
+				userid=orgid,
 				file_path=realpath,
 				knowledge_base_id=fiid,
 				document_id=id,
 				db_type=db_type,
 				upappid=service_params['vdb'],
 				apiname="milvus/deletedocument",
 				user=userid
 			 )
-             # 获取服务参数
+			 if milvus_result.get("status") != "success":
-             service_params = await self.get_service_params(orgid)
+				raise ValueError(milvus_result.get("message", "Milvus 删除失败"))
             if not service_params:
                raise ValueError("无法获取服务参数")
-             debug(
+			 neo4j_deleted_nodes = 0
-                f"调用删除文件端点: userid={orgid}, file_path={realpath}, knowledge_base_id={fiid}, document_id={id}")
+			 neo4j_deleted_rels = 0
-             milvus_result = await api_service.milvus_delete_document(
+			 try:
-                request=request,
+				debug(f"调用 Neo4j 删除文档端点: document_id={id}")
-                userid=orgid,
+				neo4j_result = await api_service.neo4j_delete_document(
-                file_path=realpath,
+					request=request,
-                knowledge_base_id=fiid,
+					document_id=id,
-                document_id=id,
+					upappid=service_params['gdb'],
-                db_type=db_type,
+					apiname="neo4j/deletedocument",
-                upappid=service_params['vdb'],
+					user=userid
-                apiname="milvus/deletedocument",
+				)
-                user=userid
+				if neo4j_result.get("status") != "success":
-             )
+					raise ValueError(neo4j_result.get("message", "Neo4j 删除失败"))
 				nodes_deleted = neo4j_result.get("nodes_deleted", 0)
 				rels_deleted = neo4j_result.get("rels_deleted", 0)
 				neo4j_deleted_nodes += nodes_deleted
 				neo4j_deleted_rels += rels_deleted
 				total_nodes_deleted += nodes_deleted
 				total_rels_deleted += rels_deleted
 				info(f"成功删除 document_id={id} 的 {nodes_deleted} 个 Neo4j 节点和 {rels_deleted} 个关系")
 			 except Exception as e:
 				error(f"删除 document_id={id} 的 Neo4j 数据失败: {str(e)}")
-             if milvus_result.get("status") != "success":
+			 results.append({
-                raise ValueError(milvus_result.get("message", "Milvus 删除失败"))
+				"status": "success",
 				"collection_name": collection_name,
 				"document_id": id,
 				"message": f"成功删除文件 {realpath} 的 Milvus 记录，{neo4j_deleted_nodes} 个 Neo4j 节点，{neo4j_deleted_rels} 个 Neo4j 关系",
 				"status_code": 200
 			 })
-             neo4j_deleted_nodes = 0
+			except Exception as e:
-             neo4j_deleted_rels = 0
+			 error(f"删除文档 {realpath} 失败: {str(e)}, 堆栈: {traceback.format_exc()}")
-             try:
+			 results.append({
-                debug(f"调用 Neo4j 删除文档端点: document_id={id}")
+				"status": "error",
-                neo4j_result = await api_service.neo4j_delete_document(
+				"collection_name": collection_name,
-                   request=request,
+				"document_id": id,
-                   document_id=id,
+				"message": f"删除文档 {realpath} 失败: {str(e)}",
-                   upappid=service_params['gdb'],
+				"status_code": 400
-                   apiname="neo4j/deletedocument",
+			 })
                   user=userid
                )
                if neo4j_result.get("status") != "success":
                   raise ValueError(neo4j_result.get("message", "Neo4j 删除失败"))
                nodes_deleted = neo4j_result.get("nodes_deleted", 0)
                rels_deleted = neo4j_result.get("rels_deleted", 0)
                neo4j_deleted_nodes += nodes_deleted
                neo4j_deleted_rels += rels_deleted
                total_nodes_deleted += nodes_deleted
                total_rels_deleted += rels_deleted
                info(f"成功删除 document_id={id} 的 {nodes_deleted} 个 Neo4j 节点和 {rels_deleted} 个关系")
             except Exception as e:
                error(f"删除 document_id={id} 的 Neo4j 数据失败: {str(e)}")
-             results.append({
+		return {
-                "status": "success",
+			"status": "success" if all(r["status"] == "success" for r in results) else "partial",
-                "collection_name": collection_name,
+			"results": results,
-                "document_id": id,
+			"total_nodes_deleted": total_nodes_deleted,
-                "message": f"成功删除文件 {realpath} 的 Milvus 记录，{neo4j_deleted_nodes} 个 Neo4j 节点，{neo4j_deleted_rels} 个 Neo4j 关系",
+			"total_rels_deleted": total_rels_deleted,
-                "status_code": 200
+			"message": f"处理 {len(recs)} 个文件，成功删除 {sum(1 for r in results if r['status'] == 'success')} 个",
-             })
+			"status_code": 200 if all(r["status"] == "success" for r in results) else 207
-
+		}
          except Exception as e:
             error(f"删除文档 {realpath} 失败: {str(e)}, 堆栈: {traceback.format_exc()}")
             results.append({
                "status": "error",
                "collection_name": collection_name,
                "document_id": id,
                "message": f"删除文档 {realpath} 失败: {str(e)}",
                "status_code": 400
             })
       return {
          "status": "success" if all(r["status"] == "success" for r in results) else "partial",
          "results": results,
          "total_nodes_deleted": total_nodes_deleted,
          "total_rels_deleted": total_rels_deleted,
          "message": f"处理 {len(recs)} 个文件，成功删除 {sum(1 for r in results if r['status'] == 'success')} 个",
          "status_code": 200 if all(r["status"] == "success" for r in results) else 207
       }
 async def test_ragfilemgr():
-    """测试 RagFileMgr 类的 get_service_params"""
+	"""测试 RagFileMgr 类的 get_service_params"""
-    print("初始化数据库连接池...")
+	print("初始化数据库连接池...")
-    dbs = {
+	dbs = {
-        "kyrag": {
+		"kyrag": {
-            "driver": "aiomysql",
+			"driver": "aiomysql",
-            "async_mode": True,
+			"async_mode": True,
-            "coding": "utf8",
+			"coding": "utf8",
-            "maxconn": 100,
+			"maxconn": 100,
-            "dbname": "kyrag",
+			"dbname": "kyrag",
-            "kwargs": {
+			"kwargs": {
-                "user": "test",
+				"user": "test",
-                "db": "kyrag",
+				"db": "kyrag",
-                "password": "QUZVcXg5V1p1STMybG5Ia6mX9D0v7+g=",
+				"password": "QUZVcXg5V1p1STMybG5Ia6mX9D0v7+g=",
-                "host": "db"
+				"host": "db"
-            }
+			}
-        }
+		}
-    }
+	}
-    DBPools(dbs)
+	DBPools(dbs)
-    ragfilemgr = RagFileMgr()
+	ragfilemgr = RagFileMgr()
-    orgid = "04J6VbxLqB_9RPMcgOv_8"
+	orgid = "04J6VbxLqB_9RPMcgOv_8"
-    result = await ragfilemgr.get_service_params(orgid)
+	result = await ragfilemgr.get_service_params(orgid)
-    print(f"get_service_params 结果: {result}")
+	print(f"get_service_params 结果: {result}")
 if __name__ == "__main__":
-    asyncio.run(test_ragfilemgr())
+	asyncio.run(test_ragfilemgr())