From a05d2f474eee90011717eae044498534bf6a9d89 Mon Sep 17 00:00:00 2001 From: sal Date: Thu, 4 Jun 2026 10:08:39 +0900 Subject: [PATCH] =?UTF-8?q?IDEA-8:=20GraphRAG=20=E2=80=94=20NetworkX=20?= =?UTF-8?q?=EA=B8=B0=EB=B0=98=20=EC=A7=80=EC=8B=9D=20=EA=B7=B8=EB=9E=98?= =?UTF-8?q?=ED=94=84?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - td_knowledge_graph 테이블 (user_id, subject, relation, object 트리플) - GraphService: MultiDiGraph 인메모리 캐시 + MySQL 영속화 - add_relation / query_entity LangChain 도구 - call_model에 그래프 요약 자동 주입 (시스템 프롬프트) - GRAPH_ENABLED=true 환경변수로 활성화 - requirements.txt에 networkx>=3.0 추가 Co-Authored-By: Claude Sonnet 4.6 --- api.py | 1 + config.py | 3 ++ container.py | 6 +++ docs/ROADMAP.md | 25 ++++++--- requirements.txt | 2 + services/agent/agent_service.py | 22 ++++++++ services/agent/graph_tools.py | 24 +++++++++ services/db/mysql_service.py | 11 ++++ services/knowledge/__init__.py | 0 services/knowledge/graph_service.py | 82 +++++++++++++++++++++++++++++ 10 files changed, 170 insertions(+), 6 deletions(-) create mode 100644 services/agent/graph_tools.py create mode 100644 services/knowledge/__init__.py create mode 100644 services/knowledge/graph_service.py diff --git a/api.py b/api.py index 563af63..dd4210c 100644 --- a/api.py +++ b/api.py @@ -72,6 +72,7 @@ def _get_agent(user_id: str) -> AgentService: ingestion_service=_container.ingestion_service() if _cfg.conv_rag_enabled else None, crag_enabled=_cfg.crag_enabled, conv_rag_enabled=_cfg.conv_rag_enabled, + graph_service=_container.graph_service() if _cfg.graph_enabled else None, user_id=user_id, ) if _vision_model: diff --git a/config.py b/config.py index d1fed91..8414ca4 100644 --- a/config.py +++ b/config.py @@ -64,6 +64,9 @@ class Config(BaseSettings): vision_model_id: str = "mlx-community/Qwen2.5-VL-7B-Instruct-4bit" vision_max_tokens: int = 512 + # 지식 그래프 (IDEA-8) — GraphRAG + graph_enabled: bool = False + # CRAG — 검색 결과 없을 때 web_search 자동 fallback (IDEA-5) crag_enabled: bool = False diff --git a/container.py b/container.py index eb6f52d..9ee3c6d 100644 --- a/container.py +++ b/container.py @@ -12,6 +12,7 @@ from services.db.user_profile_repository import UserProfileRepository from services.db.feedback_repository import FeedbackRepository from services.db.reminder_repository import ReminderRepository from services.scheduler_service import SchedulerService +from services.knowledge.graph_service import GraphService from services.ui.cli_service import CliUiService from services.events.event_bus import EventBus from services.events.handlers import StreamTokenHandler, StreamEndHandler @@ -83,6 +84,11 @@ class Container(containers.DeclarativeContainer): user_map_json=providers.Callable(lambda c: c.telegram_user_map, config), ) + graph_service = providers.Singleton( + GraphService, + db=db_service, + ) + history_service = providers.Factory( HistoryService, system_prompt=providers.Callable(lambda c: c.system_prompt, config), diff --git a/docs/ROADMAP.md b/docs/ROADMAP.md index 1cba601..8fdd28e 100644 --- a/docs/ROADMAP.md +++ b/docs/ROADMAP.md @@ -656,14 +656,27 @@ Phase 20 RAGAS 평가 → Phase 15 (모델선택) → Phase 16 (Docke ### 장기 — 구조적 확장 -#### IDEA-8. GraphRAG / 지식 그래프 +#### ✅ IDEA-8. GraphRAG / 지식 그래프 **배경**: `td_user_profile`이 flat key-value라 엔티티 간 관계 추론이 불가능하다. -**구현 방향**: -- `(도율) -[알레르기]→ (복숭아)`, `(아록) -[부모]→ (도율)` 형태 그래프 -- NetworkX 기반 로컬 그래프 + 그래프 쿼리 도구 -- 복잡한 추론 질문 ("도율이 먹으면 안 되는 음식은?") 대응 가능 +**구현 내용**: +- `td_knowledge_graph` 테이블 — (user_id, subject, relation, object) 트리플 영구 저장 +- `GraphService` — NetworkX `MultiDiGraph` 인메모리 캐시 + MySQL 영속화 +- `add_relation(subject, relation, obj)` 도구 — 관계 저장 + 예: `도율 -[알레르기]→ 복숭아`, `아록 -[자녀]→ 도율` +- `query_entity(entity)` 도구 — 출발/도착 방향 모든 관계 조회 +- `call_model`에 저장된 그래프 요약을 시스템 프롬프트에 자동 주입 +- `.env` `GRAPH_ENABLED=true`로 활성화 (기본 비활성) + +**사용 예시**: +``` +사용자: "도율이 복숭아 알레르기가 있어" +→ add_relation("도율", "알레르기", "복숭아") + +사용자: "도율이 먹으면 안 되는 음식은?" +→ query_entity("도율") → "도율 -[알레르기]→ 복숭아" +``` **난이도**: 높음 | **임팩트**: 높음 (메모리 추론 능력 대폭 향상) @@ -694,5 +707,5 @@ Phase 20 RAGAS 평가 → Phase 15 (모델선택) → Phase 16 (Docke | IDEA-5 CRAG | ✅ crag_check LangGraph 노드 | 중간 | 높음 | — | | IDEA-7 Auto-Eval | ✅ eval/auto_tune.py | 중간 | 중간 | — | | IDEA-6 영수증 OCR | analyze_image 재활용 | 중간 | 높음 | 1순위 | -| IDEA-8 GraphRAG | 새 데이터 구조 | 높음 | 높음 | 7순위 | +| IDEA-8 GraphRAG | ✅ NetworkX + MySQL + 2개 도구 | 높음 | 높음 | — | | IDEA-9 PWA WebUI | 프론트엔드 재작성 | 높음 | 높음 | 8순위 | diff --git a/requirements.txt b/requirements.txt index a8131d9..88980fd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -26,3 +26,5 @@ gradio>=4.0.0 duckduckgo-search>=6.0.0 # Phase 14 — 음성 인터페이스 (STT) openai-whisper>=20231117 +# IDEA-8 — 지식 그래프 (GraphRAG) +networkx>=3.0 diff --git a/services/agent/agent_service.py b/services/agent/agent_service.py index 0cd1143..e87b42c 100644 --- a/services/agent/agent_service.py +++ b/services/agent/agent_service.py @@ -17,6 +17,7 @@ class AgentState(TypedDict): crag_fallback_used: bool from services.agent.tools import get_current_date, make_memory_tools, make_reminder_tools, make_retriever_tool, make_search_tool, make_vision_tool, web_search +from services.agent.graph_tools import make_graph_tools class AgentService: @@ -41,6 +42,7 @@ class AgentService: ingestion_service=None, crag_enabled: bool = False, conv_rag_enabled: bool = False, + graph_service=None, user_id: str = "default", ): self._system_prompt = system_prompt @@ -60,6 +62,7 @@ class AgentService: self._ingestion_service = ingestion_service self._crag_enabled = crag_enabled self._conv_rag_enabled = conv_rag_enabled + self._graph_service = graph_service if conversation_repository: try: @@ -91,6 +94,9 @@ class AgentService: if reminder_repository is not None: set_reminder_tool, list_reminders_tool = make_reminder_tools(reminder_repository, user_id) self._base_tools += [set_reminder_tool, list_reminders_tool] + if graph_service is not None: + add_relation_tool, query_entity_tool = make_graph_tools(graph_service, user_id) + self._base_tools += [add_relation_tool, query_entity_tool] self._vision_model = None # set via set_vision_model() self._llm_with_tools = chat_model.bind_tools(self._base_tools) self._chat_model = chat_model @@ -103,6 +109,22 @@ class AgentService: f"오늘 날짜: {date.today().isoformat()}\n\n" + self._system_prompt ) + if self._graph_service: + graph_summary = self._graph_service.get_summary(self._user_id) + if graph_summary: + system_content += ( + "\n\n## 지식 그래프 (저장된 관계 정보)\n" + + graph_summary + + "\n\n**지식 그래프 사용 규칙**: 가족·사물 간 관계 정보(알레르기, " + "가족 관계, 선호도, 질환 등)는 add_relation으로 저장하고, " + "특정 인물 정보 조회 시 query_entity를 먼저 호출하세요." + ) + else: + system_content += ( + "\n\n**지식 그래프 사용 규칙**: 가족·사물 간 관계 정보(알레르기, " + "가족 관계, 선호도, 질환 등)를 언급하면 add_relation으로 저장하세요." + ) + if self._profile_repo: profile = self._profile_repo.get_all(self._user_id) if profile: diff --git a/services/agent/graph_tools.py b/services/agent/graph_tools.py new file mode 100644 index 0000000..53c83f6 --- /dev/null +++ b/services/agent/graph_tools.py @@ -0,0 +1,24 @@ +from langchain_core.tools import tool + + +def make_graph_tools(graph_service, user_id: str = "default"): + """지식 그래프 저장/조회 Tool 쌍을 반환한다.""" + + @tool + def add_relation(subject: str, relation: str, obj: str) -> str: + """가족 구성원이나 사물 사이의 관계를 지식 그래프에 저장합니다. + 알레르기·가족 관계·선호도·질환·특기 등 관계형 정보를 저장할 때 사용하세요. + 예: + subject='도율', relation='알레르기', obj='복숭아' + subject='아록', relation='자녀', obj='도율' + subject='근혜', relation='직업', obj='간호사' + subject='하율', relation='좋아하는음식', obj='바나나'""" + return graph_service.add_relation(subject, relation, obj, user_id) + + @tool + def query_entity(entity: str) -> str: + """특정 인물이나 사물에 대해 저장된 모든 관계 정보를 조회합니다. + 예: entity='도율' → 도율의 알레르기, 나이, 부모, 좋아하는 것 등 모든 알려진 관계""" + return graph_service.query_entity(entity, user_id) + + return add_relation, query_entity diff --git a/services/db/mysql_service.py b/services/db/mysql_service.py index e625017..be036e4 100644 --- a/services/db/mysql_service.py +++ b/services/db/mysql_service.py @@ -110,6 +110,17 @@ class DatabaseService: created_at DATETIME DEFAULT CURRENT_TIMESTAMP ) """) + cursor.execute(""" + CREATE TABLE IF NOT EXISTS td_knowledge_graph ( + id INT AUTO_INCREMENT PRIMARY KEY, + user_id VARCHAR(50) NOT NULL, + subject VARCHAR(200) NOT NULL, + relation VARCHAR(100) NOT NULL, + object VARCHAR(200) NOT NULL, + created_at DATETIME DEFAULT CURRENT_TIMESTAMP, + INDEX idx_user_subject (user_id, subject(80)) + ) + """) cursor.execute(""" CREATE TABLE IF NOT EXISTS td_reminders ( id INT AUTO_INCREMENT PRIMARY KEY, diff --git a/services/knowledge/__init__.py b/services/knowledge/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/services/knowledge/graph_service.py b/services/knowledge/graph_service.py new file mode 100644 index 0000000..3a3d9da --- /dev/null +++ b/services/knowledge/graph_service.py @@ -0,0 +1,82 @@ +from __future__ import annotations + +import networkx as nx + +from services.db.mysql_service import DatabaseService + + +class GraphService: + """NetworkX 기반 지식 그래프. + + 관계 트리플(subject, relation, object)을 MySQL에 영구 저장하고 + 메모리에 로드해 빠른 그래프 쿼리를 제공한다. + """ + + def __init__(self, db: DatabaseService): + self._db = db + self._graphs: dict[str, nx.MultiDiGraph] = {} + + def _load(self, user_id: str) -> nx.MultiDiGraph: + g = nx.MultiDiGraph() + rows = self._db.execute( + "SELECT subject, relation, object FROM td_knowledge_graph WHERE user_id = %s", + (user_id,), + ) + for row in rows: + g.add_edge(row["subject"], row["object"], relation=row["relation"]) + return g + + def _graph(self, user_id: str) -> nx.MultiDiGraph: + if user_id not in self._graphs: + self._graphs[user_id] = self._load(user_id) + return self._graphs[user_id] + + def _edge_exists(self, g: nx.MultiDiGraph, subject: str, relation: str, obj: str) -> bool: + return any( + d.get("relation") == relation and target == obj + for _, target, d in g.out_edges(subject, data=True) + ) + + def add_relation(self, subject: str, relation: str, obj: str, user_id: str) -> str: + """관계 트리플을 저장한다. 동일 트리플이 존재하면 스킵.""" + g = self._graph(user_id) + if self._edge_exists(g, subject, relation, obj): + return f"이미 저장된 관계입니다: {subject} -[{relation}]→ {obj}" + + rows = self._db.execute( + "SELECT id FROM td_knowledge_graph " + "WHERE user_id=%s AND subject=%s AND relation=%s AND object=%s", + (user_id, subject, relation, obj), + ) + if not rows: + self._db.execute_write( + "INSERT INTO td_knowledge_graph (user_id, subject, relation, object) " + "VALUES (%s, %s, %s, %s)", + (user_id, subject, relation, obj), + ) + g.add_edge(subject, obj, relation=relation) + return f"'{subject} -[{relation}]→ {obj}' 관계를 저장했습니다." + + def query_entity(self, entity: str, user_id: str) -> str: + """엔티티에 연결된 모든 관계를 반환한다 (출발/도착 방향 모두).""" + g = self._graph(user_id) + if entity not in g: + return f"'{entity}'에 대해 저장된 정보가 없습니다." + lines = [] + for _, target, data in g.out_edges(entity, data=True): + lines.append(f" {entity} -[{data['relation']}]→ {target}") + for source, _, data in g.in_edges(entity, data=True): + lines.append(f" {source} -[{data['relation']}]→ {entity}") + if not lines: + return f"'{entity}'에 대해 저장된 정보가 없습니다." + return f"'{entity}' 관련 정보:\n" + "\n".join(lines) + + def get_summary(self, user_id: str) -> str: + """시스템 프롬프트 주입용 전체 관계 요약. 없으면 빈 문자열.""" + g = self._graph(user_id) + if not g.edges: + return "" + return "\n".join( + f" {s} -[{d['relation']}]→ {t}" + for s, t, d in g.edges(data=True) + )