- CLIP embedding (9086) + Milvus VDB (8886) + NetworkX graph (9092) - BGE-Reranker (9090) for result reranking - Hybrid retrieval: vector search + graph expansion + RRF fusion - API: /api/ingest, /api/search, /api/pipelines, /api/plugins, /api/status - Two pipelines: kg-rag-standard (full) and kg-rag-lite (vector only) - Tested E2E: ingest + search with rerank_score=0.99
80 lines
2.6 KiB
Python
80 lines
2.6 KiB
Python
# -*- coding:utf-8 -*-
|
||
"""Entity and relation extraction via LLM."""
|
||
import json
|
||
import re
|
||
from typing import List, Dict
|
||
|
||
|
||
EXTRACTION_PROMPT = """你是一个知识图谱抽取专家。请从以下文本中抽取实体和关系。
|
||
|
||
文本:
|
||
{text}
|
||
|
||
请按以下JSON格式返回:
|
||
{{
|
||
"entities": [
|
||
{{"name": "实体名", "type": "实体类型(person/company/product/concept/event/location)", "description": "简要描述"}}
|
||
],
|
||
"relations": [
|
||
{{"source": "源实体名", "target": "目标实体名", "relation": "关系类型", "description": "关系描述"}}
|
||
]
|
||
}}
|
||
|
||
只返回JSON,不要其他内容。"""
|
||
|
||
|
||
def extract_entities_relations(text: str, llm_func=None) -> Dict:
|
||
"""Extract entities and relations from text using LLM.
|
||
|
||
Args:
|
||
text: Input text
|
||
llm_func: Function that takes a prompt and returns LLM response text.
|
||
If None, returns empty result.
|
||
"""
|
||
if not llm_func:
|
||
return {"entities": [], "relations": []}
|
||
|
||
prompt = EXTRACTION_PROMPT.format(text=text[:2000]) # Limit text length
|
||
|
||
try:
|
||
response = llm_func(prompt)
|
||
|
||
# Try to extract JSON from response
|
||
# Remove markdown code blocks if present
|
||
response = response.strip()
|
||
if response.startswith('```'):
|
||
response = re.sub(r'^```(?:json)?\s*', '', response)
|
||
response = re.sub(r'\s*```$', '', response)
|
||
|
||
result = json.loads(response)
|
||
|
||
# Validate structure
|
||
entities = result.get("entities", [])
|
||
relations = result.get("relations", [])
|
||
|
||
# Basic validation
|
||
valid_entities = []
|
||
for e in entities:
|
||
if isinstance(e, dict) and "name" in e:
|
||
valid_entities.append({
|
||
"name": e["name"],
|
||
"type": e.get("type", "unknown"),
|
||
"description": e.get("description", "")
|
||
})
|
||
|
||
valid_relations = []
|
||
entity_names = {e["name"] for e in valid_entities}
|
||
for r in relations:
|
||
if isinstance(r, dict) and "source" in r and "target" in r:
|
||
valid_relations.append({
|
||
"source": r["source"],
|
||
"target": r["target"],
|
||
"relation": r.get("relation", "related_to"),
|
||
"description": r.get("description", "")
|
||
})
|
||
|
||
return {"entities": valid_entities, "relations": valid_relations}
|
||
|
||
except Exception as e:
|
||
return {"entities": [], "relations": [], "error": str(e)}
|