# -*- coding:utf-8 -*- """Entity and relation extraction via LLM.""" import json import re from typing import List, Dict EXTRACTION_PROMPT = """你是一个知识图谱抽取专家。请从以下文本中抽取实体和关系。 文本: {text} 请按以下JSON格式返回: {{ "entities": [ {{"name": "实体名", "type": "实体类型(person/company/product/concept/event/location)", "description": "简要描述"}} ], "relations": [ {{"source": "源实体名", "target": "目标实体名", "relation": "关系类型", "description": "关系描述"}} ] }} 只返回JSON,不要其他内容。""" def extract_entities_relations(text: str, llm_func=None) -> Dict: """Extract entities and relations from text using LLM. Args: text: Input text llm_func: Function that takes a prompt and returns LLM response text. If None, returns empty result. """ if not llm_func: return {"entities": [], "relations": []} prompt = EXTRACTION_PROMPT.format(text=text[:2000]) # Limit text length try: response = llm_func(prompt) # Try to extract JSON from response # Remove markdown code blocks if present response = response.strip() if response.startswith('```'): response = re.sub(r'^```(?:json)?\s*', '', response) response = re.sub(r'\s*```$', '', response) result = json.loads(response) # Validate structure entities = result.get("entities", []) relations = result.get("relations", []) # Basic validation valid_entities = [] for e in entities: if isinstance(e, dict) and "name" in e: valid_entities.append({ "name": e["name"], "type": e.get("type", "unknown"), "description": e.get("description", "") }) valid_relations = [] entity_names = {e["name"] for e in valid_entities} for r in relations: if isinstance(r, dict) and "source" in r and "target" in r: valid_relations.append({ "source": r["source"], "target": r["target"], "relation": r.get("relation", "related_to"), "description": r.get("description", "") }) return {"entities": valid_entities, "relations": valid_relations} except Exception as e: return {"entities": [], "relations": [], "error": str(e)}