bugfix
This commit is contained in:
parent
47a4a2f606
commit
cd8e02687d
BIN
Milvus/milvus.db
BIN
Milvus/milvus.db
Binary file not shown.
Binary file not shown.
Binary file not shown.
57
app/embed.py
57
app/embed.py
@ -1,57 +0,0 @@
|
|||||||
import os
|
|
||||||
from datetime import datetime
|
|
||||||
from langchain_community.document_loaders.csv_loader import CSVLoader
|
|
||||||
from langchain_community.document_loaders.text import TextLoader
|
|
||||||
from langchain_community.document_loaders import UnstructuredPDFLoader
|
|
||||||
from langchain_community.document_loaders import UnstructuredWordDocumentLoader
|
|
||||||
from langchain_community.document_loaders import UnstructuredExcelLoader
|
|
||||||
from langchain_community.document_loaders import UnstructuredPowerPointLoader
|
|
||||||
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
|
||||||
from appPublic.log import debug
|
|
||||||
from appPublic.uniqueID import getID
|
|
||||||
|
|
||||||
from get_vector_db import get_vector_db
|
|
||||||
|
|
||||||
TEMP_FOLDER = os.getenv('TEMP_FOLDER', './_temp')
|
|
||||||
|
|
||||||
# Function to check if the uploaded file is allowed (only PDF files)
|
|
||||||
def allowed_file(filename):
|
|
||||||
allowed_file_subffix = ['pdf','doc', 'docx','xlsx', 'xls', 'ppt', 'pptx', 'csv', 'txt']
|
|
||||||
return '.' in filename and filename.rsplit('.', 1)[1].lower() in allowed_file_subffix
|
|
||||||
|
|
||||||
# Function to load and split the data from the PDF file
|
|
||||||
def load_and_split_data(file_path):
|
|
||||||
# Load the PDF file and split the data into chunks
|
|
||||||
data = None
|
|
||||||
if file_path.lower().endswith('.pdf'):
|
|
||||||
loader = UnstructuredPDFLoader(file_path=file_path)
|
|
||||||
elif file_path.lower().endswith('.docx') or file_path.lower().endswith('.doc'):
|
|
||||||
loader = UnstructuredWordDocumentLoader(file_path=file_path)
|
|
||||||
elif file_path.lower().endswith('.pptx') or file_path.lower().endswith('.pptx'):
|
|
||||||
loader = UnstructuredPowerPointLoader(file_path=file_path)
|
|
||||||
elif file_path.lower().endswith('.xlsx') or file_path.lower().endswith('.xls'):
|
|
||||||
loader = UnstructuredExcelLoader(file_path=file_path)
|
|
||||||
elif file_path.lower().endswith('.csv'):
|
|
||||||
loader = CSVLoader(file_path=file_path)
|
|
||||||
else:
|
|
||||||
loader = TextLoader(file_path=file_path)
|
|
||||||
data = loader.load()
|
|
||||||
|
|
||||||
text_splitter = RecursiveCharacterTextSplitter(chunk_size=7500, chunk_overlap=100)
|
|
||||||
chunks = text_splitter.split_documents(data)
|
|
||||||
|
|
||||||
return chunks
|
|
||||||
|
|
||||||
# Main function to handle the embedding process
|
|
||||||
def embed(file_path, userid, kdbname):
|
|
||||||
if allowed_file(file_path):
|
|
||||||
chunks = load_and_split_data(file_path)
|
|
||||||
debug(f'{chunks=}')
|
|
||||||
db = get_vector_db(userid, kdbname)
|
|
||||||
db.add(
|
|
||||||
documents=[c.page_content for c in chunks],
|
|
||||||
metadatas=[c.metadata for c in chunks],
|
|
||||||
ids=[getID() for c in chunks]
|
|
||||||
)
|
|
||||||
return True
|
|
||||||
return False
|
|
||||||
@ -1,22 +0,0 @@
|
|||||||
from ahserver.serverenv import ServerEnv
|
|
||||||
from ahserver.configuredServer import ConfiguredServer
|
|
||||||
from ahserver.webapp import webapp
|
|
||||||
from appPublic.worker import awaitify
|
|
||||||
from filemgr.init import load_filemgr
|
|
||||||
from rbac.init import load_rbac
|
|
||||||
from appbase.init import load_appbase
|
|
||||||
from rag.init import load_rag
|
|
||||||
|
|
||||||
def get_module_dbname(name):
|
|
||||||
return 'sage'
|
|
||||||
|
|
||||||
def init():
|
|
||||||
load_rag()
|
|
||||||
load_appbase()
|
|
||||||
load_filemgr()
|
|
||||||
env = ServerEnv()
|
|
||||||
env.get_module_dbname = get_module_dbname
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
webapp(init)
|
|
||||||
|
|
||||||
BIN
data/jishu.pdf
BIN
data/jishu.pdf
Binary file not shown.
File diff suppressed because one or more lines are too long
BIN
data/qianru.pdf
BIN
data/qianru.pdf
Binary file not shown.
BIN
data/test.docx
BIN
data/test.docx
Binary file not shown.
@ -1 +0,0 @@
|
|||||||
开元云(北京)科技有限公司,是一家注册于2020年的高科技企业,在上海、南京、深圳、济南等地设有分支机构,创始团队核心成员来自一流的云计算公司及电信运营商,拥有云计算、超算、智算和网络运营专业经验,在企业市场均拥有超过十年以上行业经验,服务客户超过2万家。公司以自主研发的业务操作支撑系统(KBoss)为底座,打造开放算力应用服务平台(open-computing),将云计算、算力资源和算力应用进行整合,为高校、科研、大模型、AI等政企客户提供专业算力云服务,形成“云+网+算+应用”的一体化解决方案。在2021年,我们荣幸地成为阿里云计算的合作伙伴,致力于提供算力应用、算力网络、算网一体的产品和服务,同时为芯片、教育科研等企业提供优质的算力服务。2022年,我们与国家超级计算济南中心以及中信网络有限公司签署了战略合作协议,并成功推出了“Kboss”算网平台。在2023年,我们的平台进一步发展,成功引入火山引擎、百度智能云。目前,我们已成为阿里云、江苏未来网络集团的战略合作伙伴。同时,我们深耕“算力+教育”赛道,持续推进高校算力平台项目,积极建设学校算力网络节点,目前已经成功开拓了27所高校。公司提供新一代算力云应用服务模式,通过自主研发的开元算力云应用服务平台,整合算力资源和算法应用,利用创新算力调度化和确定性网络技术,针对现代社会对智能化和数字化需求,形成包括算力云服务、算力网络和算力应用的全场景解决方案。旨在为政府和企业提供"技术+资源+场景+运营”的产业互联网算力云应用服务平台,实现以算力云服务推动数字经济的发展。开元云科技自成立以来得到了包括工信部、教育部、全国高校学会、国家超算中心以及南京未来网络研究院等政府机构、科研机构的大力支持,合作领域包括“东数西算、大科学计算、存算分离、芯算一体及国产工业软件SaaS化”,覆盖人工智能、芯片仿真、生物制药、工业仿真、材料研发、精尖制造、海洋勘探以及气象监测等高科技领域。
|
|
||||||
BIN
data/zongshu.pdf
BIN
data/zongshu.pdf
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
data/聚类结果1.xlsx
BIN
data/聚类结果1.xlsx
Binary file not shown.
@ -1,129 +0,0 @@
|
|||||||
Metadata-Version: 2.4
|
|
||||||
Name: rag
|
|
||||||
Version: 0.0.1
|
|
||||||
Summary: rag
|
|
||||||
Home-page: https://github.com/yumoqing/rag
|
|
||||||
Author: yumoqing
|
|
||||||
Author-email: yumoqing@gmail.com
|
|
||||||
Platform: any
|
|
||||||
Classifier: Operating System :: OS Independent
|
|
||||||
Classifier: Programming Language :: Python :: 3
|
|
||||||
Classifier: License :: OSI Approved :: MIT License
|
|
||||||
Description-Content-Type: text/markdown
|
|
||||||
Requires-Dist: chromadb
|
|
||||||
Requires-Dist: langchain
|
|
||||||
Requires-Dist: langchain_community
|
|
||||||
Requires-Dist: unstructured
|
|
||||||
Requires-Dist: langchain-text-splitters
|
|
||||||
Requires-Dist: unstructured[all-docs]
|
|
||||||
Requires-Dist: langchain_milvus
|
|
||||||
Requires-Dist: langchain_huggingface
|
|
||||||
Requires-Dist: transformers
|
|
||||||
Requires-Dist: openai
|
|
||||||
Requires-Dist: torch
|
|
||||||
Requires-Dist: torchvision
|
|
||||||
Requires-Dist: pymilvus
|
|
||||||
Dynamic: author
|
|
||||||
Dynamic: author-email
|
|
||||||
Dynamic: classifier
|
|
||||||
Dynamic: description
|
|
||||||
Dynamic: description-content-type
|
|
||||||
Dynamic: home-page
|
|
||||||
Dynamic: platform
|
|
||||||
Dynamic: requires-dist
|
|
||||||
Dynamic: summary
|
|
||||||
|
|
||||||
# 知识库服务器
|
|
||||||
本系统为不同的客户提供自我管理的知识库,并在知识库基础上提供知识检索
|
|
||||||
|
|
||||||
本系统提供API形式,为注册的服务器提供知识服支持,不面向最终客户
|
|
||||||
|
|
||||||
## 依赖
|
|
||||||
依赖[这些模块](requirements.txt)
|
|
||||||
|
|
||||||
## 安装部署
|
|
||||||
1. 创建rag用户
|
|
||||||
2. 登录rag用户
|
|
||||||
3. 执行以下命令
|
|
||||||
```
|
|
||||||
git clone git@git.kaiyuancloud.cn:yumoqing/rag
|
|
||||||
cd rag/script
|
|
||||||
./install.sh
|
|
||||||
|
|
||||||
```
|
|
||||||
|
|
||||||
将项目在用户根目录checkout
|
|
||||||
3.
|
|
||||||
## 功能
|
|
||||||
管理client系统的客户知识库,并提供知识查询
|
|
||||||
|
|
||||||
每个客户可以创建一到多个独立的知识库,为不同的业务场景提供知识库知识
|
|
||||||
|
|
||||||
知识库之间数据相互独立,互不干扰。
|
|
||||||
|
|
||||||
## http API
|
|
||||||
|
|
||||||
### add
|
|
||||||
增加知识库文档
|
|
||||||
|
|
||||||
#### path
|
|
||||||
/api/add
|
|
||||||
#### method
|
|
||||||
POST
|
|
||||||
#### 输入
|
|
||||||
name: authentication
|
|
||||||
value: Bears ${apikey}
|
|
||||||
score: headers
|
|
||||||
|
|
||||||
name: file_name
|
|
||||||
value: path of uploaded file
|
|
||||||
score: data
|
|
||||||
|
|
||||||
name: userid
|
|
||||||
value: userid of client system
|
|
||||||
score: data
|
|
||||||
|
|
||||||
name: kdbname
|
|
||||||
value: rag kdb name
|
|
||||||
score: data
|
|
||||||
|
|
||||||
#### 输出
|
|
||||||
|
|
||||||
### query
|
|
||||||
查询知识库
|
|
||||||
|
|
||||||
#### path
|
|
||||||
/api/query
|
|
||||||
#### method
|
|
||||||
POST
|
|
||||||
|
|
||||||
#### 输入
|
|
||||||
name: authentication
|
|
||||||
value: Bears ${apikey}
|
|
||||||
score: headers
|
|
||||||
|
|
||||||
name: prompt
|
|
||||||
value: ${prompt}
|
|
||||||
score: data
|
|
||||||
|
|
||||||
name: userid
|
|
||||||
value: ${userid}
|
|
||||||
score: data
|
|
||||||
|
|
||||||
name: kdbname
|
|
||||||
value: ${kdbname}
|
|
||||||
score: data
|
|
||||||
|
|
||||||
|
|
||||||
#### 输出
|
|
||||||
```
|
|
||||||
{
|
|
||||||
total:返回记录条数,
|
|
||||||
rows:返回记录内容
|
|
||||||
}
|
|
||||||
rows有以下属性
|
|
||||||
content:文本内容
|
|
||||||
distances:距离
|
|
||||||
source:文档path
|
|
||||||
```
|
|
||||||
|
|
||||||
@ -1,16 +0,0 @@
|
|||||||
README.md
|
|
||||||
setup.py
|
|
||||||
rag/__init__.py
|
|
||||||
rag/deletefile.py
|
|
||||||
rag/embed.py
|
|
||||||
rag/init.py
|
|
||||||
rag/kdb.py
|
|
||||||
rag/query.py
|
|
||||||
rag/rag.bak.py
|
|
||||||
rag/vector.py
|
|
||||||
rag/version.py
|
|
||||||
rag.egg-info/PKG-INFO
|
|
||||||
rag.egg-info/SOURCES.txt
|
|
||||||
rag.egg-info/dependency_links.txt
|
|
||||||
rag.egg-info/requires.txt
|
|
||||||
rag.egg-info/top_level.txt
|
|
||||||
@ -1 +0,0 @@
|
|||||||
|
|
||||||
@ -1,13 +0,0 @@
|
|||||||
chromadb
|
|
||||||
langchain
|
|
||||||
langchain_community
|
|
||||||
unstructured
|
|
||||||
langchain-text-splitters
|
|
||||||
unstructured[all-docs]
|
|
||||||
langchain_milvus
|
|
||||||
langchain_huggingface
|
|
||||||
transformers
|
|
||||||
openai
|
|
||||||
torch
|
|
||||||
torchvision
|
|
||||||
pymilvus
|
|
||||||
@ -1 +0,0 @@
|
|||||||
rag
|
|
||||||
10
rag/init.py
10
rag/init.py
@ -1,15 +1,7 @@
|
|||||||
from appPublic.worker import awaitify
|
from appPublic.worker import awaitify
|
||||||
from ahserver.serverenv import ServerEnv
|
from ahserver.serverenv import ServerEnv
|
||||||
from .kdb import add_kdb, add_dir, add_doc, get_all_docs
|
|
||||||
from .query import search_query
|
|
||||||
from .embed import embed
|
|
||||||
def load_rag():
|
def load_rag():
|
||||||
env = ServerEnv()
|
env = ServerEnv()
|
||||||
env.add_kdb = add_kdb
|
|
||||||
env.query = awaitify(search_query)
|
|
||||||
env.embed = awaitify(embed)
|
|
||||||
env.add_dir = add_dir
|
|
||||||
env.add_doc = add_doc
|
|
||||||
env.get_all_docs = get_all_docs
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -1,52 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
# 检查操作系统
|
|
||||||
OS=$(uname -s)
|
|
||||||
if [[ "$OS" != "Darwin" && "$OS" != "Linux" ]]; then
|
|
||||||
echo "错误:此脚本仅支持 macOS 和 Linux!"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
# 检查依赖文件
|
|
||||||
SERVICE_FILE="rag.service"
|
|
||||||
NGINX_FILE="rag.nginx"
|
|
||||||
if [[ ! -f "$SERVICE_FILE" || ! -f "$NGINX_FILE" ]]; then
|
|
||||||
echo "错误:缺少 $SERVICE_FILE 或 $NGINX_FILE 文件"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
# 1. 配置服务
|
|
||||||
if [[ "$OS" == "Darwin" ]]; then
|
|
||||||
# macOS: 使用 launchd
|
|
||||||
mkdir -p ~/Library/LaunchAgents
|
|
||||||
cp rag.service ~/Library/LaunchAgents/
|
|
||||||
launchctl load ~/Library/LaunchAgents/rag.service
|
|
||||||
launchctl start rag.service
|
|
||||||
elif [[ "$OS" == "Linux" ]]; then
|
|
||||||
# Linux: 使用 Systemd
|
|
||||||
sudo cp rag.service /etc/systemd/system/
|
|
||||||
sudo systemctl daemon-reload
|
|
||||||
sudo systemctl enable rag.service
|
|
||||||
sudo systemctl start rag.service
|
|
||||||
fi
|
|
||||||
|
|
||||||
# 2. 配置 Nginx
|
|
||||||
if ! command -v nginx &> /dev/null; then
|
|
||||||
echo "安装 Nginx..."
|
|
||||||
if [[ "$OS" == "Darwin" ]]; then
|
|
||||||
brew install nginx
|
|
||||||
elif [[ "$OS" == "Linux" ]]; then
|
|
||||||
sudo apt-get update && sudo apt-get install -y nginx
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
# 动态检测 Nginx 配置路径
|
|
||||||
NGINX_CONF_DIR="/etc/nginx/sites-enabled"
|
|
||||||
if [[ "$OS" == "Darwin" ]]; then
|
|
||||||
NGINX_CONF_DIR="/usr/local/etc/nginx/sites-enabled"
|
|
||||||
fi
|
|
||||||
mkdir -p "$NGINX_CONF_DIR"
|
|
||||||
cp rag.nginx "$NGINX_CONF_DIR/"
|
|
||||||
nginx -t && nginx -s reload || echo "错误:Nginx 配置重载失败"
|
|
||||||
|
|
||||||
echo "安装完成!"
|
|
||||||
@ -1,20 +0,0 @@
|
|||||||
#!/bin/sh
|
|
||||||
|
|
||||||
if [ -z "$1" ]; then
|
|
||||||
echo "错误:请提供进程名称"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
# 查找进程并终止
|
|
||||||
PIDS=$(ps -ef | grep "$1" | grep -v grep | awk '{print $2}')
|
|
||||||
if [ -z "$PIDS" ]; then
|
|
||||||
echo "未找到匹配的进程:$1"
|
|
||||||
exit 0
|
|
||||||
fi
|
|
||||||
|
|
||||||
for PID in $PIDS; do
|
|
||||||
echo "终止进程 $PID"
|
|
||||||
kill -9 "$PID"
|
|
||||||
done
|
|
||||||
|
|
||||||
exit 0
|
|
||||||
@ -1,31 +0,0 @@
|
|||||||
server {
|
|
||||||
listen 80;
|
|
||||||
server_name rag.opencomputing.cn;
|
|
||||||
autoindex on;
|
|
||||||
client_max_body_size 20m;
|
|
||||||
proxy_set_header X-Forwarded-Host $host;
|
|
||||||
proxy_set_header X-Forwarded-server $host;
|
|
||||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
|
||||||
proxy_set_header X-Forwarded-Scheme $scheme;
|
|
||||||
proxy_set_header X-Forwarded-Port $server_port;
|
|
||||||
proxy_set_header X-Forwarded-Url "$scheme://$host:$server_port$request_uri";
|
|
||||||
|
|
||||||
index index.html index.htm;
|
|
||||||
|
|
||||||
location ~^/ip$ {
|
|
||||||
return 200 "$remote_addr";
|
|
||||||
}
|
|
||||||
location / {
|
|
||||||
add_header Access-Control-Allow-Origin *;
|
|
||||||
add_header Access-Control-Allow-Origin *;
|
|
||||||
proxy_set_header X-Forwarded-Host $host;
|
|
||||||
proxy_set_header X-Forwarded-server $host;
|
|
||||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
|
||||||
proxy_set_header X-Forwarded-Scheme $scheme;
|
|
||||||
proxy_set_header X-Forwarded-Port $server_port;
|
|
||||||
proxy_set_header X-real-ip $remote_addr;
|
|
||||||
proxy_send_timeout 600s;
|
|
||||||
proxy_read_timeout 600s;
|
|
||||||
proxy_pass http://localhost:10098/;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@ -1,19 +0,0 @@
|
|||||||
[Unit]
|
|
||||||
Description=RAG Service
|
|
||||||
Documentation=RAG service to control RAG application
|
|
||||||
After=network.target nginx.service
|
|
||||||
Requires=nginx.service
|
|
||||||
|
|
||||||
[Service]
|
|
||||||
User=wangmeihua
|
|
||||||
Group=wangmeihua
|
|
||||||
# Type=forking
|
|
||||||
User=wangmeihua
|
|
||||||
WorkingDirectory=/share/wangmeihua/rag
|
|
||||||
ExecStart=/bin/bash /share/wangmeihua/rag/script/rag.sh
|
|
||||||
ExecStop=/bin/bash /share/wangmeihua/rag/script/killname app/ragapp.py
|
|
||||||
Restart=on-failure
|
|
||||||
StandardOutput=append:/var/log/rag/rag.log
|
|
||||||
StandardError=append:/var/log/rag/error.log
|
|
||||||
[Install]
|
|
||||||
WantedBy=multi-user.target
|
|
||||||
@ -1,18 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
User=wangmeihua
|
|
||||||
Group=wangmeihua
|
|
||||||
PYTHON=python3
|
|
||||||
RAG_PY="/d/wangmeihua/rag/app/ragapp.py"
|
|
||||||
LOG_DIR="/d/wangmeihua/rag/logs"
|
|
||||||
|
|
||||||
# 验证文件存在
|
|
||||||
if [[ ! -f "$RAG_PY" ]]; then
|
|
||||||
echo "错误:$RAG_PY 不存在"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
# 终止旧进程
|
|
||||||
"/d/wangmeihua/rag/script/killname" $RAG_PY
|
|
||||||
|
|
||||||
# 启动新进程
|
|
||||||
"$PYTHON" "$RAG_PY" -w "/d/wangmeihua/rag"
|
|
||||||
@ -1,46 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
HOME_DIR="/share/wangmeihua"
|
|
||||||
RAG_DIR="/share/wangmeihua/rag"
|
|
||||||
PYTHON_VERSION="python3"
|
|
||||||
|
|
||||||
# 检查 Python 版本
|
|
||||||
if ! command -v "$PYTHON_VERSION" &> /dev/null; then
|
|
||||||
echo "错误:未找到 Python3"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
# 检查 requirements.txt
|
|
||||||
if [[ ! -f "${RAG_DIR}/requirements.txt" ]]; then
|
|
||||||
echo "错误:${RAG_DIR}/requirements.txt 不存在"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
# 创建虚拟环境
|
|
||||||
mkdir -p "${HOME_DIR}/bin"
|
|
||||||
"$PYTHON_VERSION" -m venv "${HOME_DIR}/py3"
|
|
||||||
source "${HOME_DIR}/py3/bin/activate"
|
|
||||||
|
|
||||||
# 备份 .bashrc
|
|
||||||
if [[ -f "${HOME_DIR}/.bashrc" ]]; then
|
|
||||||
cp "${HOME_DIR}/.bashrc" "${HOME_DIR}/.bashrc.bak"
|
|
||||||
fi
|
|
||||||
|
|
||||||
# 配置环境变量
|
|
||||||
cat >> "${HOME_DIR}/.bashrc" << EOF
|
|
||||||
export PATH="${HOME_DIR}/bin:${HOME_DIR}/py3/bin:\$PATH"
|
|
||||||
source "${HOME_DIR}/py3/bin/activate"
|
|
||||||
EOF
|
|
||||||
|
|
||||||
# 安装依赖
|
|
||||||
pip install -r "${RAG_DIR}/requirements.txt"
|
|
||||||
if [[ $? -ne 0 ]]; then
|
|
||||||
echo "错误:依赖安装失败"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
# 复制并授权 killname
|
|
||||||
cp killname "${HOME_DIR}/bin"
|
|
||||||
chmod +x "${HOME_DIR}/bin/killname"
|
|
||||||
|
|
||||||
echo "环境配置完成!"
|
|
||||||
@ -1,41 +0,0 @@
|
|||||||
谷歌 industry 搜索引擎 org concept
|
|
||||||
知识图谱 Web 3.0 万维网 concept media
|
|
||||||
Web is a list of 网的 unk time
|
|
||||||
自顶向下 百科类网站 结构化数据源 concept media
|
|
||||||
结构化数据 <org> 关系数据库 concept media
|
|
||||||
非结构化数据 subclass of XML concept org
|
|
||||||
模式层 subclass of 知识图谱 concept media
|
|
||||||
结构化知识库 subclass of 知识图谱 concept misc
|
|
||||||
比尔盖茨 employer 微软 per org
|
|
||||||
5 信息抽取 facet of 数据层 media concept
|
|
||||||
信息抽取 part of 知识图谱 concept media
|
|
||||||
实体识别 subclass of 信息抽取 concept media
|
|
||||||
实体分类体系 part of 112种实体类别 concept misc
|
|
||||||
分类研究 实体类别 面向开放域的实体识别 concept media
|
|
||||||
服务器日志 特征建模 搜索引擎 concept org
|
|
||||||
关系抽取 subclass of Relation Extraction concept unk
|
|
||||||
模式匹配 实体 语料 concept media
|
|
||||||
属性抽取 <misc> 统计机器学习 concept media
|
|
||||||
属性 subclass of 实体 concept misc
|
|
||||||
数据挖掘 subclass of 结构化数据 concept media
|
|
||||||
拼图碎片 非结构化 信息抽取 concept media
|
|
||||||
歧义 used by 实体消歧 concept media
|
|
||||||
共指消解 自然语言处理 信息检索 concept misc
|
|
||||||
外部知识库 结构化数据 知识图谱 concept media
|
|
||||||
数据层的融合 模式层 关系数据库 concept media
|
|
||||||
资源描述框架 <media> 本体构建本体 concept org
|
|
||||||
DB2RDF subclass of 结构化的历史数据 cel date
|
|
||||||
自动化本体构建过程 本体库 数据驱动的自动化方式 concept media
|
|
||||||
阿里 owned by 阿里巴巴 org media
|
|
||||||
上下位关系 阿里巴巴 图谱 concept media
|
|
||||||
腾讯 owned by 阿里巴巴 org concept
|
|
||||||
知识图谱 location 城市 concept loc
|
|
||||||
串联 规则 推理策略的一环 concept media
|
|
||||||
算法 part of 知识库 concept media
|
|
||||||
知识库的更新 subclass of 概念层 concept media
|
|
||||||
知识图谱 part of 数据层 concept media
|
|
||||||
总结 part of 知识图谱 concept media
|
|
||||||
知识图谱 移动个人助理(Siri 智能语义搜索 concept media
|
|
||||||
(Sri) subclass of 的知识 eve unk
|
|
||||||
病毒 知识图谱 埃博拉病毒的症状有哪些 concept media
|
|
||||||
症状 part of 三元组 concept misc
|
|
||||||
@ -1,41 +0,0 @@
|
|||||||
谷歌 industry 搜索引擎 org concept
|
|
||||||
知识图谱 Web 3.0 万维网 concept media
|
|
||||||
Web is a list of 网的 unk time
|
|
||||||
自顶向下 百科类网站 结构化数据源 concept media
|
|
||||||
结构化数据 <org> 关系数据库 concept media
|
|
||||||
非结构化数据 subclass of XML concept org
|
|
||||||
模式层 subclass of 知识图谱 concept media
|
|
||||||
结构化知识库 subclass of 知识图谱 concept misc
|
|
||||||
比尔盖茨 employer 微软 per org
|
|
||||||
5 信息抽取 facet of 数据层 media concept
|
|
||||||
信息抽取 part of 知识图谱 concept media
|
|
||||||
实体识别 subclass of 信息抽取 concept media
|
|
||||||
实体分类体系 part of 112种实体类别 concept misc
|
|
||||||
分类研究 实体类别 面向开放域的实体识别 concept media
|
|
||||||
服务器日志 特征建模 搜索引擎 concept org
|
|
||||||
关系抽取 subclass of Relation Extraction concept unk
|
|
||||||
模式匹配 实体 语料 concept media
|
|
||||||
属性抽取 <misc> 统计机器学习 concept media
|
|
||||||
属性 subclass of 实体 concept misc
|
|
||||||
数据挖掘 subclass of 结构化数据 concept media
|
|
||||||
拼图碎片 非结构化 信息抽取 concept media
|
|
||||||
歧义 used by 实体消歧 concept media
|
|
||||||
共指消解 自然语言处理 信息检索 concept misc
|
|
||||||
外部知识库 结构化数据 知识图谱 concept media
|
|
||||||
数据层的融合 模式层 关系数据库 concept media
|
|
||||||
资源描述框架 <media> 本体构建本体 concept org
|
|
||||||
DB2RDF subclass of 结构化的历史数据 cel date
|
|
||||||
自动化本体构建过程 本体库 数据驱动的自动化方式 concept media
|
|
||||||
阿里 owned by 阿里巴巴 org media
|
|
||||||
上下位关系 阿里巴巴 图谱 concept media
|
|
||||||
腾讯 owned by 阿里巴巴 org concept
|
|
||||||
知识图谱 location 城市 concept loc
|
|
||||||
串联 规则 推理策略的一环 concept media
|
|
||||||
算法 part of 知识库 concept media
|
|
||||||
知识库的更新 subclass of 概念层 concept media
|
|
||||||
知识图谱 part of 数据层 concept media
|
|
||||||
总结 part of 知识图谱 concept media
|
|
||||||
知识图谱 移动个人助理(Siri 智能语义搜索 concept media
|
|
||||||
(Sri) subclass of 的知识 eve unk
|
|
||||||
病毒 知识图谱 埃博拉病毒的症状有哪些 concept media
|
|
||||||
症状 part of 三元组 concept misc
|
|
||||||
Loading…
x
Reference in New Issue
Block a user