This commit is contained in:
wangmeihua 2025-09-09 13:18:54 +08:00
parent 0399ca60dd
commit 6b0a9e9cd0
3 changed files with 29 additions and 50 deletions

View File

@ -112,9 +112,6 @@ class MilvusConnection:
if not file_path or not userid or not knowledge_base_id or not document_id: if not file_path or not userid or not knowledge_base_id or not document_id:
return {"status": "error", "message": "file_path、userid document_id和 knowledge_base_id 不能为空", return {"status": "error", "message": "file_path、userid document_id和 knowledge_base_id 不能为空",
"collection_name": collection_name, "document_id": "", "status_code": 400} "collection_name": collection_name, "document_id": "", "status_code": 400}
if "_" in userid or "_" in knowledge_base_id:
return {"status": "error", "message": "userid 和 knowledge_base_id 不能包含下划线",
"collection_name": collection_name, "document_id": document_id, "status_code": 400}
if len(knowledge_base_id) > 100: if len(knowledge_base_id) > 100:
return {"status": "error", "message": "knowledge_base_id 的长度应小于 100", return {"status": "error", "message": "knowledge_base_id 的长度应小于 100",
"collection_name": collection_name, "document_id": "", "status_code": 400} "collection_name": collection_name, "document_id": "", "status_code": 400}
@ -127,9 +124,6 @@ class MilvusConnection:
if not userid or not file_path or not knowledge_base_id or not document_id: if not userid or not file_path or not knowledge_base_id or not document_id:
return {"status": "error", "message": "userid、file_path document_id和 knowledge_base_id 不能为空", return {"status": "error", "message": "userid、file_path document_id和 knowledge_base_id 不能为空",
"collection_name": collection_name, "document_id": "", "status_code": 400} "collection_name": collection_name, "document_id": "", "status_code": 400}
if "_" in userid or "_" in knowledge_base_id:
return {"status": "error", "message": "userid 和 knowledge_base_id 不能包含下划线",
"collection_name": collection_name, "document_id": "", "status_code": 400}
if len(userid) > 100 or len(file_path) > 255 or len(knowledge_base_id) > 100: if len(userid) > 100 or len(file_path) > 255 or len(knowledge_base_id) > 100:
return {"status": "error", "message": "userid、file_path 或 knowledge_base_id 的长度超出限制", return {"status": "error", "message": "userid、file_path 或 knowledge_base_id 的长度超出限制",
"collection_name": collection_name, "document_id": "", "status_code": 400} "collection_name": collection_name, "document_id": "", "status_code": 400}
@ -140,9 +134,6 @@ class MilvusConnection:
if not userid or not knowledge_base_id: if not userid or not knowledge_base_id:
return {"status": "error", "message": "userid 和 knowledge_base_id 不能为空", return {"status": "error", "message": "userid 和 knowledge_base_id 不能为空",
"collection_name": collection_name, "document_id": "", "status_code": 400} "collection_name": collection_name, "document_id": "", "status_code": 400}
if "_" in userid or "_" in knowledge_base_id:
return {"status": "error", "message": "userid 和 knowledge_base_id 不能包含下划线",
"collection_name": collection_name, "document_id": "", "status_code": 400}
if len(userid) > 100 or len(knowledge_base_id) > 100: if len(userid) > 100 or len(knowledge_base_id) > 100:
return {"status": "error", "message": "userid 或 knowledge_base_id 的长度超出限制", return {"status": "error", "message": "userid 或 knowledge_base_id 的长度超出限制",
"collection_name": collection_name, "document_id": "", "status_code": 400} "collection_name": collection_name, "document_id": "", "status_code": 400}
@ -249,8 +240,6 @@ class MilvusConnection:
# 验证参数 # 验证参数
if not userid or not knowledge_base_id: if not userid or not knowledge_base_id:
raise ValueError("userid 和 knowledge_base_id 不能为空") raise ValueError("userid 和 knowledge_base_id 不能为空")
if "_" in userid or "_" in knowledge_base_id:
raise ValueError("userid 和 knowledge_base_id 不能包含下划线")
if len(userid) > 100 or len(knowledge_base_id) > 100: if len(userid) > 100 or len(knowledge_base_id) > 100:
raise ValueError("userid 或 knowledge_base_id 的长度超出限制") raise ValueError("userid 或 knowledge_base_id 的长度超出限制")
if not os.path.exists(file_path): if not os.path.exists(file_path):
@ -736,8 +725,6 @@ class MilvusConnection:
raise ValueError("查询文本不能为空") raise ValueError("查询文本不能为空")
if not userid: if not userid:
raise ValueError("userid 不能为空") raise ValueError("userid 不能为空")
if "_" in userid or (db_type and "_" in db_type):
raise ValueError("userid 和 db_type 不能包含下划线")
if (db_type and len(db_type) > 100) or len(userid) > 100: if (db_type and len(db_type) > 100) or len(userid) > 100:
raise ValueError("userid 或 db_type 的长度超出限制") raise ValueError("userid 或 db_type 的长度超出限制")
if limit <= 0 or limit > 16384: if limit <= 0 or limit > 16384:
@ -753,8 +740,6 @@ class MilvusConnection:
raise ValueError(f"knowledge_base_id 必须是字符串: {kb_id}") raise ValueError(f"knowledge_base_id 必须是字符串: {kb_id}")
if len(kb_id) > 100: if len(kb_id) > 100:
raise ValueError(f"knowledge_base_id 长度超出 100 个字符: {kb_id}") raise ValueError(f"knowledge_base_id 长度超出 100 个字符: {kb_id}")
if "_" in kb_id:
raise ValueError(f"knowledge_base_id 不能包含下划线: {kb_id}")
# 将查询文本转换为向量 # 将查询文本转换为向量
vector_start = time.time() vector_start = time.time()
@ -814,8 +799,6 @@ class MilvusConnection:
if not query or not userid or not knowledge_base_ids: if not query or not userid or not knowledge_base_ids:
raise ValueError("query、userid 和 knowledge_base_ids 不能为空") raise ValueError("query、userid 和 knowledge_base_ids 不能为空")
if "_" in userid or (db_type and "_" in db_type):
raise ValueError("userid 和 db_type 不能包含下划线")
if (db_type and len(db_type) > 100) or len(userid) > 100: if (db_type and len(db_type) > 100) or len(userid) > 100:
raise ValueError("db_type 或 userid 的长度超出限制") raise ValueError("db_type 或 userid 的长度超出限制")
if limit < 1 or limit > 16384 or offset < 0: if limit < 1 or limit > 16384 or offset < 0:
@ -925,8 +908,6 @@ class MilvusConnection:
if not userid: if not userid:
raise ValueError("userid 不能为空") raise ValueError("userid 不能为空")
if "_" in userid or (db_type and "_" in db_type):
raise ValueError("userid 和 db_type 不能包含下划线")
if (db_type and len(db_type) > 100) or len(userid) > 100: if (db_type and len(db_type) > 100) or len(userid) > 100:
raise ValueError("userid 或 db_type 的长度超出限制") raise ValueError("userid 或 db_type 的长度超出限制")

View File

@ -8,6 +8,7 @@ import uuid
from datetime import datetime from datetime import datetime
from llmengine.base_db import connection_register, BaseDBConnection from llmengine.base_db import connection_register, BaseDBConnection
import time import time
import traceback
class MilvusDBConnection(BaseDBConnection): class MilvusDBConnection(BaseDBConnection):
_instance = None _instance = None
@ -26,6 +27,7 @@ class MilvusDBConnection(BaseDBConnection):
try: try:
config = getConfig() config = getConfig()
self.db_path = config['milvus_db'] self.db_path = config['milvus_db']
debug(f"dbpath: {self.db_path}")
except KeyError as e: except KeyError as e:
error(f"配置文件缺少必要字段: {str(e)}") error(f"配置文件缺少必要字段: {str(e)}")
raise RuntimeError(f"配置文件缺少必要字段: {str(e)}") raise RuntimeError(f"配置文件缺少必要字段: {str(e)}")
@ -37,11 +39,13 @@ class MilvusDBConnection(BaseDBConnection):
"""初始化 Milvus 连接,确保单一连接""" """初始化 Milvus 连接,确保单一连接"""
try: try:
db_dir = os.path.dirname(self.db_path) db_dir = os.path.dirname(self.db_path)
debug(f"db_dir: {db_dir}")
if not os.path.exists(db_dir): if not os.path.exists(db_dir):
os.makedirs(db_dir, exist_ok=True) os.makedirs(db_dir, exist_ok=True)
debug(f"创建 Milvus 目录: {db_dir}") debug(f"创建 Milvus 目录: {db_dir}")
if not os.access(db_dir, os.W_OK): if not os.access(db_dir, os.W_OK):
raise RuntimeError(f"Milvus 目录 {db_dir} 不可写") raise RuntimeError(f"Milvus 目录 {db_dir} 不可写")
debug(f"不可写")
if not connections.has_connection("default"): if not connections.has_connection("default"):
connections.connect("default", uri=self.db_path) connections.connect("default", uri=self.db_path)
debug(f"已连接到 Milvus Lite路径: {self.db_path}") debug(f"已连接到 Milvus Lite路径: {self.db_path}")
@ -314,19 +318,19 @@ class MilvusDBConnection(BaseDBConnection):
# 检查是否已存在相同的 userid、knowledge_base_id 和 filename # 检查是否已存在相同的 userid、knowledge_base_id 和 filename
collection = Collection(collection_name) collection = Collection(collection_name)
collection.load() collection.load()
expr = f"userid == '{chunks[0]['userid']}' and knowledge_base_id == '{chunks[0]['knowledge_base_id']}' and filename == '{chunks[0]['filename']}'" # expr = f"userid == '{chunks[0]['userid']}' and knowledge_base_id == '{chunks[0]['knowledge_base_id']}' and filename == '{chunks[0]['filename']}'"
debug(f"检查重复文档: {expr}") # debug(f"检查重复文档: {expr}")
results = collection.query(expr=expr, output_fields=["document_id"], limit=1) # results = collection.query(expr=expr, output_fields=["document_id"], limit=1)
if results: # if results:
debug( # debug(
f"找到重复文档: userid={chunks[0]['userid']}, knowledge_base_id={chunks[0]['knowledge_base_id']}, filename={chunks[0]['filename']}") # f"找到重复文档: userid={chunks[0]['userid']}, knowledge_base_id={chunks[0]['knowledge_base_id']}, filename={chunks[0]['filename']}")
return { # return {
"status": "error", # "status": "error",
"document_id": document_id, # "document_id": document_id,
"collection_name": collection_name, # "collection_name": collection_name,
"message": f"文档已存在: userid={chunks[0]['userid']}, knowledge_base_id={chunks[0]['knowledge_base_id']}, filename={chunks[0]['filename']}", # "message": f"文档已存在: userid={chunks[0]['userid']}, knowledge_base_id={chunks[0]['knowledge_base_id']}, filename={chunks[0]['filename']}",
"status_code": 400 # "status_code": 400
} # }
# 提取数据 # 提取数据
userids = [chunk["userid"] for chunk in chunks] userids = [chunk["userid"] for chunk in chunks]
@ -368,15 +372,15 @@ class MilvusDBConnection(BaseDBConnection):
"message": f"成功插入 {len(texts)} 个文档到 {collection_name}", "message": f"成功插入 {len(texts)} 个文档到 {collection_name}",
"status_code": 200 "status_code": 200
} }
except MilvusException as e: # except MilvusException as e:
error(f"Milvus 插入失败: {str(e)}, 堆栈: {traceback.format_exc()}") # error(f"Milvus 插入失败: {str(e)}, 堆栈: {traceback.format_exc()}")
return { # return {
"status": "error", # "status": "error",
"document_id": document_id, # "document_id": document_id,
"collection_name": collection_name, # "collection_name": collection_name,
"message": f"Milvus 插入失败: {str(e)}", # "message": f"Milvus 插入失败: {str(e)}",
"status_code": 400 # "status_code": 400
} # }
except Exception as e: except Exception as e:
error(f"插入文档失败: {str(e)}, 堆栈: {traceback.format_exc()}") error(f"插入文档失败: {str(e)}, 堆栈: {traceback.format_exc()}")
return { return {
@ -443,7 +447,7 @@ class MilvusDBConnection(BaseDBConnection):
return { return {
"status": "success", "status": "success",
"collection_name": collection_name, "collection_name": collection_name,
"document_id": ",".join(document_id), "document_id":document_id,
"message": f"成功删除 {total_deleted} 条 Milvus 记录userid={userid}, file_path={file_path}, knowledge_base_id={knowledge_base_id}, document_id={document_id}", "message": f"成功删除 {total_deleted} 条 Milvus 记录userid={userid}, file_path={file_path}, knowledge_base_id={knowledge_base_id}, document_id={document_id}",
"status_code": 200 "status_code": 200
} }
@ -569,8 +573,6 @@ class MilvusDBConnection(BaseDBConnection):
raise ValueError("userid 不能为空") raise ValueError("userid 不能为空")
if not knowledge_base_ids: if not knowledge_base_ids:
raise ValueError("knowledge_base_ids 不能为空") raise ValueError("knowledge_base_ids 不能为空")
if "_" in userid:
raise ValueError("userid 不能包含下划线")
if len(userid) > 100: if len(userid) > 100:
raise ValueError("userid 的长度超出限制") raise ValueError("userid 的长度超出限制")
if limit <= 0 or limit > 16384: if limit <= 0 or limit > 16384:
@ -584,8 +586,6 @@ class MilvusDBConnection(BaseDBConnection):
raise ValueError(f"knowledge_base_id 必须是字符串: {kb_id}") raise ValueError(f"knowledge_base_id 必须是字符串: {kb_id}")
if len(kb_id) > 100: if len(kb_id) > 100:
raise ValueError(f"knowledge_base_id 长度超出 100 个字符: {kb_id}") raise ValueError(f"knowledge_base_id 长度超出 100 个字符: {kb_id}")
if "_" in kb_id:
raise ValueError(f"knowledge_base_id 不能包含下划线: {kb_id}")
if not utility.has_collection(collection_name): if not utility.has_collection(collection_name):
debug(f"集合 {collection_name} 不存在") debug(f"集合 {collection_name} 不存在")
@ -672,8 +672,6 @@ class MilvusDBConnection(BaseDBConnection):
if not userid: if not userid:
raise ValueError("userid 不能为空") raise ValueError("userid 不能为空")
if "_" in userid or (db_type and "_" in db_type):
raise ValueError("userid 和 db_type 不能包含下划线")
if (db_type and len(db_type) > 100) or len(userid) > 100: if (db_type and len(db_type) > 100) or len(userid) > 100:
raise ValueError("userid 或 db_type 的长度超出限制") raise ValueError("userid 或 db_type 的长度超出限制")

2
test/milvus/conf/config.json Normal file → Executable file
View File

@ -1,6 +1,6 @@
{ {
"filesroot": "$[workdir]$/files", "filesroot": "$[workdir]$/files",
"milvus_db": "$[workdir]$/milvus.db", "milvus_db": "/share/wangmeihua/llmengine/test/milvus/milvus.db",
"logger": { "logger": {
"name": "llmengine", "name": "llmengine",
"levelname": "info", "levelname": "info",