This commit is contained in:
wangmeihua 2025-09-09 13:18:54 +08:00
parent 0399ca60dd
commit 6b0a9e9cd0
3 changed files with 29 additions and 50 deletions

View File

@ -112,9 +112,6 @@ class MilvusConnection:
if not file_path or not userid or not knowledge_base_id or not document_id:
return {"status": "error", "message": "file_path、userid document_id和 knowledge_base_id 不能为空",
"collection_name": collection_name, "document_id": "", "status_code": 400}
if "_" in userid or "_" in knowledge_base_id:
return {"status": "error", "message": "userid 和 knowledge_base_id 不能包含下划线",
"collection_name": collection_name, "document_id": document_id, "status_code": 400}
if len(knowledge_base_id) > 100:
return {"status": "error", "message": "knowledge_base_id 的长度应小于 100",
"collection_name": collection_name, "document_id": "", "status_code": 400}
@ -127,9 +124,6 @@ class MilvusConnection:
if not userid or not file_path or not knowledge_base_id or not document_id:
return {"status": "error", "message": "userid、file_path document_id和 knowledge_base_id 不能为空",
"collection_name": collection_name, "document_id": "", "status_code": 400}
if "_" in userid or "_" in knowledge_base_id:
return {"status": "error", "message": "userid 和 knowledge_base_id 不能包含下划线",
"collection_name": collection_name, "document_id": "", "status_code": 400}
if len(userid) > 100 or len(file_path) > 255 or len(knowledge_base_id) > 100:
return {"status": "error", "message": "userid、file_path 或 knowledge_base_id 的长度超出限制",
"collection_name": collection_name, "document_id": "", "status_code": 400}
@ -140,9 +134,6 @@ class MilvusConnection:
if not userid or not knowledge_base_id:
return {"status": "error", "message": "userid 和 knowledge_base_id 不能为空",
"collection_name": collection_name, "document_id": "", "status_code": 400}
if "_" in userid or "_" in knowledge_base_id:
return {"status": "error", "message": "userid 和 knowledge_base_id 不能包含下划线",
"collection_name": collection_name, "document_id": "", "status_code": 400}
if len(userid) > 100 or len(knowledge_base_id) > 100:
return {"status": "error", "message": "userid 或 knowledge_base_id 的长度超出限制",
"collection_name": collection_name, "document_id": "", "status_code": 400}
@ -249,8 +240,6 @@ class MilvusConnection:
# 验证参数
if not userid or not knowledge_base_id:
raise ValueError("userid 和 knowledge_base_id 不能为空")
if "_" in userid or "_" in knowledge_base_id:
raise ValueError("userid 和 knowledge_base_id 不能包含下划线")
if len(userid) > 100 or len(knowledge_base_id) > 100:
raise ValueError("userid 或 knowledge_base_id 的长度超出限制")
if not os.path.exists(file_path):
@ -736,8 +725,6 @@ class MilvusConnection:
raise ValueError("查询文本不能为空")
if not userid:
raise ValueError("userid 不能为空")
if "_" in userid or (db_type and "_" in db_type):
raise ValueError("userid 和 db_type 不能包含下划线")
if (db_type and len(db_type) > 100) or len(userid) > 100:
raise ValueError("userid 或 db_type 的长度超出限制")
if limit <= 0 or limit > 16384:
@ -753,8 +740,6 @@ class MilvusConnection:
raise ValueError(f"knowledge_base_id 必须是字符串: {kb_id}")
if len(kb_id) > 100:
raise ValueError(f"knowledge_base_id 长度超出 100 个字符: {kb_id}")
if "_" in kb_id:
raise ValueError(f"knowledge_base_id 不能包含下划线: {kb_id}")
# 将查询文本转换为向量
vector_start = time.time()
@ -814,8 +799,6 @@ class MilvusConnection:
if not query or not userid or not knowledge_base_ids:
raise ValueError("query、userid 和 knowledge_base_ids 不能为空")
if "_" in userid or (db_type and "_" in db_type):
raise ValueError("userid 和 db_type 不能包含下划线")
if (db_type and len(db_type) > 100) or len(userid) > 100:
raise ValueError("db_type 或 userid 的长度超出限制")
if limit < 1 or limit > 16384 or offset < 0:
@ -925,8 +908,6 @@ class MilvusConnection:
if not userid:
raise ValueError("userid 不能为空")
if "_" in userid or (db_type and "_" in db_type):
raise ValueError("userid 和 db_type 不能包含下划线")
if (db_type and len(db_type) > 100) or len(userid) > 100:
raise ValueError("userid 或 db_type 的长度超出限制")

View File

@ -8,6 +8,7 @@ import uuid
from datetime import datetime
from llmengine.base_db import connection_register, BaseDBConnection
import time
import traceback
class MilvusDBConnection(BaseDBConnection):
_instance = None
@ -26,6 +27,7 @@ class MilvusDBConnection(BaseDBConnection):
try:
config = getConfig()
self.db_path = config['milvus_db']
debug(f"dbpath: {self.db_path}")
except KeyError as e:
error(f"配置文件缺少必要字段: {str(e)}")
raise RuntimeError(f"配置文件缺少必要字段: {str(e)}")
@ -37,11 +39,13 @@ class MilvusDBConnection(BaseDBConnection):
"""初始化 Milvus 连接,确保单一连接"""
try:
db_dir = os.path.dirname(self.db_path)
debug(f"db_dir: {db_dir}")
if not os.path.exists(db_dir):
os.makedirs(db_dir, exist_ok=True)
debug(f"创建 Milvus 目录: {db_dir}")
if not os.access(db_dir, os.W_OK):
raise RuntimeError(f"Milvus 目录 {db_dir} 不可写")
debug(f"不可写")
if not connections.has_connection("default"):
connections.connect("default", uri=self.db_path)
debug(f"已连接到 Milvus Lite路径: {self.db_path}")
@ -314,19 +318,19 @@ class MilvusDBConnection(BaseDBConnection):
# 检查是否已存在相同的 userid、knowledge_base_id 和 filename
collection = Collection(collection_name)
collection.load()
expr = f"userid == '{chunks[0]['userid']}' and knowledge_base_id == '{chunks[0]['knowledge_base_id']}' and filename == '{chunks[0]['filename']}'"
debug(f"检查重复文档: {expr}")
results = collection.query(expr=expr, output_fields=["document_id"], limit=1)
if results:
debug(
f"找到重复文档: userid={chunks[0]['userid']}, knowledge_base_id={chunks[0]['knowledge_base_id']}, filename={chunks[0]['filename']}")
return {
"status": "error",
"document_id": document_id,
"collection_name": collection_name,
"message": f"文档已存在: userid={chunks[0]['userid']}, knowledge_base_id={chunks[0]['knowledge_base_id']}, filename={chunks[0]['filename']}",
"status_code": 400
}
# expr = f"userid == '{chunks[0]['userid']}' and knowledge_base_id == '{chunks[0]['knowledge_base_id']}' and filename == '{chunks[0]['filename']}'"
# debug(f"检查重复文档: {expr}")
# results = collection.query(expr=expr, output_fields=["document_id"], limit=1)
# if results:
# debug(
# f"找到重复文档: userid={chunks[0]['userid']}, knowledge_base_id={chunks[0]['knowledge_base_id']}, filename={chunks[0]['filename']}")
# return {
# "status": "error",
# "document_id": document_id,
# "collection_name": collection_name,
# "message": f"文档已存在: userid={chunks[0]['userid']}, knowledge_base_id={chunks[0]['knowledge_base_id']}, filename={chunks[0]['filename']}",
# "status_code": 400
# }
# 提取数据
userids = [chunk["userid"] for chunk in chunks]
@ -368,15 +372,15 @@ class MilvusDBConnection(BaseDBConnection):
"message": f"成功插入 {len(texts)} 个文档到 {collection_name}",
"status_code": 200
}
except MilvusException as e:
error(f"Milvus 插入失败: {str(e)}, 堆栈: {traceback.format_exc()}")
return {
"status": "error",
"document_id": document_id,
"collection_name": collection_name,
"message": f"Milvus 插入失败: {str(e)}",
"status_code": 400
}
# except MilvusException as e:
# error(f"Milvus 插入失败: {str(e)}, 堆栈: {traceback.format_exc()}")
# return {
# "status": "error",
# "document_id": document_id,
# "collection_name": collection_name,
# "message": f"Milvus 插入失败: {str(e)}",
# "status_code": 400
# }
except Exception as e:
error(f"插入文档失败: {str(e)}, 堆栈: {traceback.format_exc()}")
return {
@ -443,7 +447,7 @@ class MilvusDBConnection(BaseDBConnection):
return {
"status": "success",
"collection_name": collection_name,
"document_id": ",".join(document_id),
"document_id":document_id,
"message": f"成功删除 {total_deleted} 条 Milvus 记录userid={userid}, file_path={file_path}, knowledge_base_id={knowledge_base_id}, document_id={document_id}",
"status_code": 200
}
@ -569,8 +573,6 @@ class MilvusDBConnection(BaseDBConnection):
raise ValueError("userid 不能为空")
if not knowledge_base_ids:
raise ValueError("knowledge_base_ids 不能为空")
if "_" in userid:
raise ValueError("userid 不能包含下划线")
if len(userid) > 100:
raise ValueError("userid 的长度超出限制")
if limit <= 0 or limit > 16384:
@ -584,8 +586,6 @@ class MilvusDBConnection(BaseDBConnection):
raise ValueError(f"knowledge_base_id 必须是字符串: {kb_id}")
if len(kb_id) > 100:
raise ValueError(f"knowledge_base_id 长度超出 100 个字符: {kb_id}")
if "_" in kb_id:
raise ValueError(f"knowledge_base_id 不能包含下划线: {kb_id}")
if not utility.has_collection(collection_name):
debug(f"集合 {collection_name} 不存在")
@ -672,8 +672,6 @@ class MilvusDBConnection(BaseDBConnection):
if not userid:
raise ValueError("userid 不能为空")
if "_" in userid or (db_type and "_" in db_type):
raise ValueError("userid 和 db_type 不能包含下划线")
if (db_type and len(db_type) > 100) or len(userid) > 100:
raise ValueError("userid 或 db_type 的长度超出限制")
@ -827,4 +825,4 @@ class MilvusDBConnection(BaseDBConnection):
}
connection_register('Milvus', MilvusDBConnection)
info("MilvusDBConnection registered")
info("MilvusDBConnection registered")

2
test/milvus/conf/config.json Normal file → Executable file
View File

@ -1,6 +1,6 @@
{
"filesroot": "$[workdir]$/files",
"milvus_db": "$[workdir]$/milvus.db",
"milvus_db": "/share/wangmeihua/llmengine/test/milvus/milvus.db",
"logger": {
"name": "llmengine",
"levelname": "info",