Merge branch 'main' of https://git.opencomputing.cn/yumoqing/llmengine
This commit is contained in:
commit
b2e4c0befb
@ -112,9 +112,6 @@ class MilvusConnection:
|
|||||||
if not file_path or not userid or not knowledge_base_id or not document_id:
|
if not file_path or not userid or not knowledge_base_id or not document_id:
|
||||||
return {"status": "error", "message": "file_path、userid document_id和 knowledge_base_id 不能为空",
|
return {"status": "error", "message": "file_path、userid document_id和 knowledge_base_id 不能为空",
|
||||||
"collection_name": collection_name, "document_id": "", "status_code": 400}
|
"collection_name": collection_name, "document_id": "", "status_code": 400}
|
||||||
if "_" in userid or "_" in knowledge_base_id:
|
|
||||||
return {"status": "error", "message": "userid 和 knowledge_base_id 不能包含下划线",
|
|
||||||
"collection_name": collection_name, "document_id": document_id, "status_code": 400}
|
|
||||||
if len(knowledge_base_id) > 100:
|
if len(knowledge_base_id) > 100:
|
||||||
return {"status": "error", "message": "knowledge_base_id 的长度应小于 100",
|
return {"status": "error", "message": "knowledge_base_id 的长度应小于 100",
|
||||||
"collection_name": collection_name, "document_id": "", "status_code": 400}
|
"collection_name": collection_name, "document_id": "", "status_code": 400}
|
||||||
@ -127,9 +124,6 @@ class MilvusConnection:
|
|||||||
if not userid or not file_path or not knowledge_base_id or not document_id:
|
if not userid or not file_path or not knowledge_base_id or not document_id:
|
||||||
return {"status": "error", "message": "userid、file_path document_id和 knowledge_base_id 不能为空",
|
return {"status": "error", "message": "userid、file_path document_id和 knowledge_base_id 不能为空",
|
||||||
"collection_name": collection_name, "document_id": "", "status_code": 400}
|
"collection_name": collection_name, "document_id": "", "status_code": 400}
|
||||||
if "_" in userid or "_" in knowledge_base_id:
|
|
||||||
return {"status": "error", "message": "userid 和 knowledge_base_id 不能包含下划线",
|
|
||||||
"collection_name": collection_name, "document_id": "", "status_code": 400}
|
|
||||||
if len(userid) > 100 or len(file_path) > 255 or len(knowledge_base_id) > 100:
|
if len(userid) > 100 or len(file_path) > 255 or len(knowledge_base_id) > 100:
|
||||||
return {"status": "error", "message": "userid、file_path 或 knowledge_base_id 的长度超出限制",
|
return {"status": "error", "message": "userid、file_path 或 knowledge_base_id 的长度超出限制",
|
||||||
"collection_name": collection_name, "document_id": "", "status_code": 400}
|
"collection_name": collection_name, "document_id": "", "status_code": 400}
|
||||||
@ -140,9 +134,6 @@ class MilvusConnection:
|
|||||||
if not userid or not knowledge_base_id:
|
if not userid or not knowledge_base_id:
|
||||||
return {"status": "error", "message": "userid 和 knowledge_base_id 不能为空",
|
return {"status": "error", "message": "userid 和 knowledge_base_id 不能为空",
|
||||||
"collection_name": collection_name, "document_id": "", "status_code": 400}
|
"collection_name": collection_name, "document_id": "", "status_code": 400}
|
||||||
if "_" in userid or "_" in knowledge_base_id:
|
|
||||||
return {"status": "error", "message": "userid 和 knowledge_base_id 不能包含下划线",
|
|
||||||
"collection_name": collection_name, "document_id": "", "status_code": 400}
|
|
||||||
if len(userid) > 100 or len(knowledge_base_id) > 100:
|
if len(userid) > 100 or len(knowledge_base_id) > 100:
|
||||||
return {"status": "error", "message": "userid 或 knowledge_base_id 的长度超出限制",
|
return {"status": "error", "message": "userid 或 knowledge_base_id 的长度超出限制",
|
||||||
"collection_name": collection_name, "document_id": "", "status_code": 400}
|
"collection_name": collection_name, "document_id": "", "status_code": 400}
|
||||||
@ -249,8 +240,6 @@ class MilvusConnection:
|
|||||||
# 验证参数
|
# 验证参数
|
||||||
if not userid or not knowledge_base_id:
|
if not userid or not knowledge_base_id:
|
||||||
raise ValueError("userid 和 knowledge_base_id 不能为空")
|
raise ValueError("userid 和 knowledge_base_id 不能为空")
|
||||||
if "_" in userid or "_" in knowledge_base_id:
|
|
||||||
raise ValueError("userid 和 knowledge_base_id 不能包含下划线")
|
|
||||||
if len(userid) > 100 or len(knowledge_base_id) > 100:
|
if len(userid) > 100 or len(knowledge_base_id) > 100:
|
||||||
raise ValueError("userid 或 knowledge_base_id 的长度超出限制")
|
raise ValueError("userid 或 knowledge_base_id 的长度超出限制")
|
||||||
if not os.path.exists(file_path):
|
if not os.path.exists(file_path):
|
||||||
@ -736,8 +725,6 @@ class MilvusConnection:
|
|||||||
raise ValueError("查询文本不能为空")
|
raise ValueError("查询文本不能为空")
|
||||||
if not userid:
|
if not userid:
|
||||||
raise ValueError("userid 不能为空")
|
raise ValueError("userid 不能为空")
|
||||||
if "_" in userid or (db_type and "_" in db_type):
|
|
||||||
raise ValueError("userid 和 db_type 不能包含下划线")
|
|
||||||
if (db_type and len(db_type) > 100) or len(userid) > 100:
|
if (db_type and len(db_type) > 100) or len(userid) > 100:
|
||||||
raise ValueError("userid 或 db_type 的长度超出限制")
|
raise ValueError("userid 或 db_type 的长度超出限制")
|
||||||
if limit <= 0 or limit > 16384:
|
if limit <= 0 or limit > 16384:
|
||||||
@ -753,8 +740,6 @@ class MilvusConnection:
|
|||||||
raise ValueError(f"knowledge_base_id 必须是字符串: {kb_id}")
|
raise ValueError(f"knowledge_base_id 必须是字符串: {kb_id}")
|
||||||
if len(kb_id) > 100:
|
if len(kb_id) > 100:
|
||||||
raise ValueError(f"knowledge_base_id 长度超出 100 个字符: {kb_id}")
|
raise ValueError(f"knowledge_base_id 长度超出 100 个字符: {kb_id}")
|
||||||
if "_" in kb_id:
|
|
||||||
raise ValueError(f"knowledge_base_id 不能包含下划线: {kb_id}")
|
|
||||||
|
|
||||||
# 将查询文本转换为向量
|
# 将查询文本转换为向量
|
||||||
vector_start = time.time()
|
vector_start = time.time()
|
||||||
@ -814,8 +799,6 @@ class MilvusConnection:
|
|||||||
|
|
||||||
if not query or not userid or not knowledge_base_ids:
|
if not query or not userid or not knowledge_base_ids:
|
||||||
raise ValueError("query、userid 和 knowledge_base_ids 不能为空")
|
raise ValueError("query、userid 和 knowledge_base_ids 不能为空")
|
||||||
if "_" in userid or (db_type and "_" in db_type):
|
|
||||||
raise ValueError("userid 和 db_type 不能包含下划线")
|
|
||||||
if (db_type and len(db_type) > 100) or len(userid) > 100:
|
if (db_type and len(db_type) > 100) or len(userid) > 100:
|
||||||
raise ValueError("db_type 或 userid 的长度超出限制")
|
raise ValueError("db_type 或 userid 的长度超出限制")
|
||||||
if limit < 1 or limit > 16384 or offset < 0:
|
if limit < 1 or limit > 16384 or offset < 0:
|
||||||
@ -925,8 +908,6 @@ class MilvusConnection:
|
|||||||
|
|
||||||
if not userid:
|
if not userid:
|
||||||
raise ValueError("userid 不能为空")
|
raise ValueError("userid 不能为空")
|
||||||
if "_" in userid or (db_type and "_" in db_type):
|
|
||||||
raise ValueError("userid 和 db_type 不能包含下划线")
|
|
||||||
if (db_type and len(db_type) > 100) or len(userid) > 100:
|
if (db_type and len(db_type) > 100) or len(userid) > 100:
|
||||||
raise ValueError("userid 或 db_type 的长度超出限制")
|
raise ValueError("userid 或 db_type 的长度超出限制")
|
||||||
|
|
||||||
|
|||||||
@ -8,6 +8,7 @@ import uuid
|
|||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from llmengine.base_db import connection_register, BaseDBConnection
|
from llmengine.base_db import connection_register, BaseDBConnection
|
||||||
import time
|
import time
|
||||||
|
import traceback
|
||||||
|
|
||||||
class MilvusDBConnection(BaseDBConnection):
|
class MilvusDBConnection(BaseDBConnection):
|
||||||
_instance = None
|
_instance = None
|
||||||
@ -26,6 +27,7 @@ class MilvusDBConnection(BaseDBConnection):
|
|||||||
try:
|
try:
|
||||||
config = getConfig()
|
config = getConfig()
|
||||||
self.db_path = config['milvus_db']
|
self.db_path = config['milvus_db']
|
||||||
|
debug(f"dbpath: {self.db_path}")
|
||||||
except KeyError as e:
|
except KeyError as e:
|
||||||
error(f"配置文件缺少必要字段: {str(e)}")
|
error(f"配置文件缺少必要字段: {str(e)}")
|
||||||
raise RuntimeError(f"配置文件缺少必要字段: {str(e)}")
|
raise RuntimeError(f"配置文件缺少必要字段: {str(e)}")
|
||||||
@ -37,11 +39,13 @@ class MilvusDBConnection(BaseDBConnection):
|
|||||||
"""初始化 Milvus 连接,确保单一连接"""
|
"""初始化 Milvus 连接,确保单一连接"""
|
||||||
try:
|
try:
|
||||||
db_dir = os.path.dirname(self.db_path)
|
db_dir = os.path.dirname(self.db_path)
|
||||||
|
debug(f"db_dir: {db_dir}")
|
||||||
if not os.path.exists(db_dir):
|
if not os.path.exists(db_dir):
|
||||||
os.makedirs(db_dir, exist_ok=True)
|
os.makedirs(db_dir, exist_ok=True)
|
||||||
debug(f"创建 Milvus 目录: {db_dir}")
|
debug(f"创建 Milvus 目录: {db_dir}")
|
||||||
if not os.access(db_dir, os.W_OK):
|
if not os.access(db_dir, os.W_OK):
|
||||||
raise RuntimeError(f"Milvus 目录 {db_dir} 不可写")
|
raise RuntimeError(f"Milvus 目录 {db_dir} 不可写")
|
||||||
|
debug(f"不可写")
|
||||||
if not connections.has_connection("default"):
|
if not connections.has_connection("default"):
|
||||||
connections.connect("default", uri=self.db_path)
|
connections.connect("default", uri=self.db_path)
|
||||||
debug(f"已连接到 Milvus Lite,路径: {self.db_path}")
|
debug(f"已连接到 Milvus Lite,路径: {self.db_path}")
|
||||||
@ -314,19 +318,19 @@ class MilvusDBConnection(BaseDBConnection):
|
|||||||
# 检查是否已存在相同的 userid、knowledge_base_id 和 filename
|
# 检查是否已存在相同的 userid、knowledge_base_id 和 filename
|
||||||
collection = Collection(collection_name)
|
collection = Collection(collection_name)
|
||||||
collection.load()
|
collection.load()
|
||||||
expr = f"userid == '{chunks[0]['userid']}' and knowledge_base_id == '{chunks[0]['knowledge_base_id']}' and filename == '{chunks[0]['filename']}'"
|
# expr = f"userid == '{chunks[0]['userid']}' and knowledge_base_id == '{chunks[0]['knowledge_base_id']}' and filename == '{chunks[0]['filename']}'"
|
||||||
debug(f"检查重复文档: {expr}")
|
# debug(f"检查重复文档: {expr}")
|
||||||
results = collection.query(expr=expr, output_fields=["document_id"], limit=1)
|
# results = collection.query(expr=expr, output_fields=["document_id"], limit=1)
|
||||||
if results:
|
# if results:
|
||||||
debug(
|
# debug(
|
||||||
f"找到重复文档: userid={chunks[0]['userid']}, knowledge_base_id={chunks[0]['knowledge_base_id']}, filename={chunks[0]['filename']}")
|
# f"找到重复文档: userid={chunks[0]['userid']}, knowledge_base_id={chunks[0]['knowledge_base_id']}, filename={chunks[0]['filename']}")
|
||||||
return {
|
# return {
|
||||||
"status": "error",
|
# "status": "error",
|
||||||
"document_id": document_id,
|
# "document_id": document_id,
|
||||||
"collection_name": collection_name,
|
# "collection_name": collection_name,
|
||||||
"message": f"文档已存在: userid={chunks[0]['userid']}, knowledge_base_id={chunks[0]['knowledge_base_id']}, filename={chunks[0]['filename']}",
|
# "message": f"文档已存在: userid={chunks[0]['userid']}, knowledge_base_id={chunks[0]['knowledge_base_id']}, filename={chunks[0]['filename']}",
|
||||||
"status_code": 400
|
# "status_code": 400
|
||||||
}
|
# }
|
||||||
|
|
||||||
# 提取数据
|
# 提取数据
|
||||||
userids = [chunk["userid"] for chunk in chunks]
|
userids = [chunk["userid"] for chunk in chunks]
|
||||||
@ -368,15 +372,15 @@ class MilvusDBConnection(BaseDBConnection):
|
|||||||
"message": f"成功插入 {len(texts)} 个文档到 {collection_name}",
|
"message": f"成功插入 {len(texts)} 个文档到 {collection_name}",
|
||||||
"status_code": 200
|
"status_code": 200
|
||||||
}
|
}
|
||||||
except MilvusException as e:
|
# except MilvusException as e:
|
||||||
error(f"Milvus 插入失败: {str(e)}, 堆栈: {traceback.format_exc()}")
|
# error(f"Milvus 插入失败: {str(e)}, 堆栈: {traceback.format_exc()}")
|
||||||
return {
|
# return {
|
||||||
"status": "error",
|
# "status": "error",
|
||||||
"document_id": document_id,
|
# "document_id": document_id,
|
||||||
"collection_name": collection_name,
|
# "collection_name": collection_name,
|
||||||
"message": f"Milvus 插入失败: {str(e)}",
|
# "message": f"Milvus 插入失败: {str(e)}",
|
||||||
"status_code": 400
|
# "status_code": 400
|
||||||
}
|
# }
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
error(f"插入文档失败: {str(e)}, 堆栈: {traceback.format_exc()}")
|
error(f"插入文档失败: {str(e)}, 堆栈: {traceback.format_exc()}")
|
||||||
return {
|
return {
|
||||||
@ -443,7 +447,7 @@ class MilvusDBConnection(BaseDBConnection):
|
|||||||
return {
|
return {
|
||||||
"status": "success",
|
"status": "success",
|
||||||
"collection_name": collection_name,
|
"collection_name": collection_name,
|
||||||
"document_id": ",".join(document_id),
|
"document_id":document_id,
|
||||||
"message": f"成功删除 {total_deleted} 条 Milvus 记录,userid={userid}, file_path={file_path}, knowledge_base_id={knowledge_base_id}, document_id={document_id}",
|
"message": f"成功删除 {total_deleted} 条 Milvus 记录,userid={userid}, file_path={file_path}, knowledge_base_id={knowledge_base_id}, document_id={document_id}",
|
||||||
"status_code": 200
|
"status_code": 200
|
||||||
}
|
}
|
||||||
@ -569,8 +573,6 @@ class MilvusDBConnection(BaseDBConnection):
|
|||||||
raise ValueError("userid 不能为空")
|
raise ValueError("userid 不能为空")
|
||||||
if not knowledge_base_ids:
|
if not knowledge_base_ids:
|
||||||
raise ValueError("knowledge_base_ids 不能为空")
|
raise ValueError("knowledge_base_ids 不能为空")
|
||||||
if "_" in userid:
|
|
||||||
raise ValueError("userid 不能包含下划线")
|
|
||||||
if len(userid) > 100:
|
if len(userid) > 100:
|
||||||
raise ValueError("userid 的长度超出限制")
|
raise ValueError("userid 的长度超出限制")
|
||||||
if limit <= 0 or limit > 16384:
|
if limit <= 0 or limit > 16384:
|
||||||
@ -584,8 +586,6 @@ class MilvusDBConnection(BaseDBConnection):
|
|||||||
raise ValueError(f"knowledge_base_id 必须是字符串: {kb_id}")
|
raise ValueError(f"knowledge_base_id 必须是字符串: {kb_id}")
|
||||||
if len(kb_id) > 100:
|
if len(kb_id) > 100:
|
||||||
raise ValueError(f"knowledge_base_id 长度超出 100 个字符: {kb_id}")
|
raise ValueError(f"knowledge_base_id 长度超出 100 个字符: {kb_id}")
|
||||||
if "_" in kb_id:
|
|
||||||
raise ValueError(f"knowledge_base_id 不能包含下划线: {kb_id}")
|
|
||||||
|
|
||||||
if not utility.has_collection(collection_name):
|
if not utility.has_collection(collection_name):
|
||||||
debug(f"集合 {collection_name} 不存在")
|
debug(f"集合 {collection_name} 不存在")
|
||||||
@ -672,8 +672,6 @@ class MilvusDBConnection(BaseDBConnection):
|
|||||||
|
|
||||||
if not userid:
|
if not userid:
|
||||||
raise ValueError("userid 不能为空")
|
raise ValueError("userid 不能为空")
|
||||||
if "_" in userid or (db_type and "_" in db_type):
|
|
||||||
raise ValueError("userid 和 db_type 不能包含下划线")
|
|
||||||
if (db_type and len(db_type) > 100) or len(userid) > 100:
|
if (db_type and len(db_type) > 100) or len(userid) > 100:
|
||||||
raise ValueError("userid 或 db_type 的长度超出限制")
|
raise ValueError("userid 或 db_type 的长度超出限制")
|
||||||
|
|
||||||
@ -827,4 +825,4 @@ class MilvusDBConnection(BaseDBConnection):
|
|||||||
}
|
}
|
||||||
|
|
||||||
connection_register('Milvus', MilvusDBConnection)
|
connection_register('Milvus', MilvusDBConnection)
|
||||||
info("MilvusDBConnection registered")
|
info("MilvusDBConnection registered")
|
||||||
|
|||||||
2
test/milvus/conf/config.json
Normal file → Executable file
2
test/milvus/conf/config.json
Normal file → Executable file
@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"filesroot": "$[workdir]$/files",
|
"filesroot": "$[workdir]$/files",
|
||||||
"milvus_db": "$[workdir]$/milvus.db",
|
"milvus_db": "/share/wangmeihua/llmengine/test/milvus/milvus.db",
|
||||||
"logger": {
|
"logger": {
|
||||||
"name": "llmengine",
|
"name": "llmengine",
|
||||||
"levelname": "info",
|
"levelname": "info",
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user