From 145b0cc96fed5a890a306c6b2beb2191508500dc Mon Sep 17 00:00:00 2001 From: sal Date: Fri, 29 May 2026 17:41:36 +0900 Subject: [PATCH] Implement Phase 12 feedback, Phase 13 Semantic Chunker, Phase 13-B Reranker, Bug 5 thinking fix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Phase 12: FeedbackRepository + td_feedback ํ…Œ์ด๋ธ”, Gradio ๐Ÿ‘/๐Ÿ‘Ž ์ด๋ฒคํŠธ, run_id ์ถ”์ , LangSmith create_feedback() ์—ฐ๋™ - Phase 13: ์ปค์Šคํ…€ _SemanticSplitter ์ œ๊ฑฐ โ†’ langchain_experimental.SemanticChunker ๊ต์ฒด, buffer_size/threshold_type ํ™˜๊ฒฝ๋ณ€์ˆ˜ ์ ์šฉ - Phase 13-B: RerankService (Cross-Encoder), RetrieverService.search()์— reranker ํ†ตํ•ฉ, tools.py as_retriever() โ†’ search() ์ „ํ™˜ - Bug 5: mlx_chat_model enable_thinking ๋Ÿฐํƒ€์ž„ ์˜ค๋ฒ„๋ผ์ด๋“œ, agent_service stream_mode=["messages","custom"] ์ด์ค‘ ์ŠคํŠธ๋ฆผ, thinking ํ† ํฐ custom ์ด๋ฒคํŠธ๋กœ emit - ROADMAP: LLM ๋ชจ๋ธ๋ช… 8B ๋ฐ˜์˜, RAG์— Reranker ์ถ”๊ฐ€, ์ถ”์ฒœ ์ง„ํ–‰ ์ˆœ์„œ ๊ฐฑ์‹  Co-Authored-By: Claude Sonnet 4.6 --- app.py | 76 +++++++++++--- config.py | 8 +- container.py | 15 +++ docs/ROADMAP.md | 155 +++++++++++++++++++---------- docs/thinking-feature-analysis.md | 150 ++++++++++++++++++++++++++++ services/agent/agent_service.py | 66 +++++++++--- services/agent/tools.py | 5 +- services/db/feedback_repository.py | 19 ++++ services/db/mysql_service.py | 11 ++ services/model/mlx_chat_model.py | 19 +++- services/rag/ingestion_service.py | 59 ++--------- services/rag/rerank_service.py | 19 ++++ services/rag/retriever_service.py | 10 +- 13 files changed, 469 insertions(+), 143 deletions(-) create mode 100644 docs/thinking-feature-analysis.md create mode 100644 services/db/feedback_repository.py create mode 100644 services/rag/rerank_service.py diff --git a/app.py b/app.py index 7eefd77..872d7bc 100644 --- a/app.py +++ b/app.py @@ -1,4 +1,4 @@ -"""Gradio Web UI โ€” ์œจ๋ด‡ Phase 4 + Phase 9/10 + Phase 14(์Œ์„ฑ).""" +"""Gradio Web UI โ€” ์œจ๋ด‡ Phase 4 + Phase 9/10 + Phase 12(ํ”ผ๋“œ๋ฐฑ) + Phase 14(์Œ์„ฑ).""" import os import subprocess import tempfile @@ -17,6 +17,7 @@ db.init_schema() ingestion = container.ingestion_service() retriever = container.retriever_service() +feedback_repo = container.feedback_repository() _cfg = container.config() _agent_cache: dict[str, AgentService] = {} @@ -44,7 +45,7 @@ def transcribe_audio(filepath: str) -> str: def tts_speak(text: str, voice: str) -> str | None: - """ํ…์ŠคํŠธ๋ฅผ macOS say ๋ช…๋ น์–ด๋กœ ์Œ์„ฑ ๋ณ€ํ™˜, ์žฌ์ƒ์šฉ wav ํŒŒ์ผ ๊ฒฝ๋กœ ๋ฐ˜ํ™˜.""" + """ํ…์ŠคํŠธ๋ฅผ macOS say ๋ช…๋ น์–ด๋กœ ์Œ์„ฑ ๋ณ€ํ™˜, ์žฌ์ƒ์šฉ aiff ํŒŒ์ผ ๊ฒฝ๋กœ ๋ฐ˜ํ™˜.""" if not text: return None try: @@ -77,36 +78,72 @@ def _get_agent(user_id: str) -> AgentService: return _agent_cache[user_id] -async def respond(message, history, show_thinking, user_id, use_tts): +async def respond(message, history, show_thinking, user_id, use_tts, run_ids): if not message.strip(): - yield history, "", None + yield history, "", None, run_ids return agent = _get_agent(user_id) history = list(history) + run_ids = list(run_ids) history.append({"role": "user", "content": message}) history.append({"role": "assistant", "content": ""}) - yield history, "", None + yield history, "", None, run_ids async for token in agent.stream_response(message, show_thinking=show_thinking): history[-1]["content"] += token - yield history, "", None + yield history, "", None, run_ids + + run_ids.append(agent.last_run_id) if use_tts: response_text = history[-1]["content"] audio_path = tts_speak(response_text, _cfg.tts_voice) - yield history, "", audio_path + yield history, "", audio_path, run_ids + else: + yield history, "", None, run_ids + + +def handle_feedback(like_data: gr.LikeData, history, run_ids, user_id): + idx = like_data.index + if isinstance(idx, (list, tuple)): + idx = idx[0] + if not isinstance(idx, int) or idx >= len(history): + return + if history[idx].get("role") != "assistant": + return + asst_turn = sum(1 for m in history[:idx] if m.get("role") == "assistant") + run_id = run_ids[asst_turn] if asst_turn < len(run_ids) else None + + def _to_str(val) -> str: + return val if isinstance(val, str) else str(val) + + user_msg = _to_str(history[idx - 1]["content"]) if idx > 0 else "" + asst_msg = _to_str(history[idx]["content"]) + rating = 1 if like_data.liked else -1 + + try: + feedback_repo.save_feedback(user_id, user_msg, asst_msg, rating, run_id) + except Exception as e: + print(f"[Feedback] DB ์ €์žฅ ์‹คํŒจ: {e}") + + if run_id and os.getenv("LANGCHAIN_TRACING_V2") == "true": + try: + from langsmith import Client + Client().create_feedback(run_id=run_id, key="user_feedback", score=rating) + except Exception as e: + print(f"[Feedback] LangSmith ๊ธฐ๋ก ์‹คํŒจ: {e}") def switch_user(user_id): - """์‚ฌ์šฉ์ž ์ „ํ™˜ ์‹œ ์ฑ„ํŒ… ํ™”๋ฉด๋งŒ ์ดˆ๊ธฐํ™” (๋Œ€ํ™” ์ด๋ ฅ์€ ์œ ์ง€).""" - return [] + """์‚ฌ์šฉ์ž ์ „ํ™˜ ์‹œ ์ฑ„ํŒ… ํ™”๋ฉด๊ณผ run_ids ์ดˆ๊ธฐํ™” (๋Œ€ํ™” ์ด๋ ฅ์€ DB์— ์œ ์ง€).""" + return [], [] def reset_chat(user_id): agent = _get_agent(user_id) agent.reset() - return [] + return [], [] def ingest_files(files): @@ -143,6 +180,7 @@ with gr.Blocks(title="์œจ๋ด‡") as demo: gr.Markdown("# ์œจ๋ด‡\n์œก์•„ยท๊ธˆ์œต ์ „๋ฌธ AI ์ƒ๋‹ด ๋„์šฐ๋ฏธ") user_state = gr.State(DEFAULT_USER) + run_ids_state = gr.State([]) with gr.Tab("๋Œ€ํ™”"): with gr.Row(): @@ -185,7 +223,7 @@ with gr.Blocks(title="์œจ๋ด‡") as demo: user_selector.change( switch_user, inputs=[user_selector], - outputs=[chatbot], + outputs=[chatbot, run_ids_state], ).then( lambda u: u, inputs=[user_selector], outputs=[user_state] ) @@ -198,15 +236,21 @@ with gr.Blocks(title="์œจ๋ด‡") as demo: send_btn.click( respond, - inputs=[msg_box, chatbot, show_thinking, user_state, use_tts], - outputs=[chatbot, msg_box, tts_output], + inputs=[msg_box, chatbot, show_thinking, user_state, use_tts, run_ids_state], + outputs=[chatbot, msg_box, tts_output, run_ids_state], ) msg_box.submit( respond, - inputs=[msg_box, chatbot, show_thinking, user_state, use_tts], - outputs=[chatbot, msg_box, tts_output], + inputs=[msg_box, chatbot, show_thinking, user_state, use_tts, run_ids_state], + outputs=[chatbot, msg_box, tts_output, run_ids_state], + ) + reset_btn.click(reset_chat, inputs=[user_state], outputs=[chatbot, run_ids_state]) + + chatbot.like( + handle_feedback, + inputs=[chatbot, run_ids_state, user_state], + outputs=[], ) - reset_btn.click(reset_chat, inputs=[user_state], outputs=[chatbot]) with gr.Tab("๋ฌธ์„œ ๋“ฑ๋ก"): gr.Markdown("PDF ๋˜๋Š” TXT ํŒŒ์ผ์„ ์—…๋กœ๋“œํ•˜๋ฉด ์œจ๋ด‡์ด ๋‚ด์šฉ์„ ์ฐธ๊ณ ํ•ด ๋‹ต๋ณ€ํ•ฉ๋‹ˆ๋‹ค.") diff --git a/config.py b/config.py index c7db764..9590fec 100644 --- a/config.py +++ b/config.py @@ -34,7 +34,13 @@ class Config(BaseSettings): # RAG rag_top_k: int = 3 - semantic_breakpoint_threshold_type: str = "percentile" # percentile | standard_deviation | interquartile + semantic_breakpoint_threshold_type: str = "percentile" # percentile | standard_deviation | interquartile | gradient + semantic_buffer_size: int = 1 # ์ธ์ ‘ ๋ฌธ์žฅ ๋ช‡ ๊ฐœ๋ฅผ ๋ฌถ์–ด ์ž„๋ฒ ๋”ฉํ• ์ง€ (1=๋‹จ์ผ ๋ฌธ์žฅ, 2=์ „ํ›„ 1๋ฌธ์žฅ ํฌํ•จ) + + # Reranker (RERANKER_ENABLED=true ์‹œ ํ™œ์„ฑํ™”) + reranker_enabled: bool = False + reranker_model_id: str = "cross-encoder/mmarco-mMiniLMv2-L12-H384-v1" # ํ•œ๊ตญ์–ด ์ง€์› ๋‹ค๊ตญ์–ด ๋ชจ๋ธ + reranker_fetch_k: int = 10 # rerank ์ „ ๋ฒกํ„ฐ ๊ฒ€์ƒ‰ ํ›„๋ณด ์ˆ˜ (rag_top_k๋ณด๋‹ค ์ปค์•ผ ํ•จ) rag_verbose: bool = False rag_show_sources: bool = False langgraph_verbose: bool = False diff --git a/container.py b/container.py index 06a3a01..2745b4a 100644 --- a/container.py +++ b/container.py @@ -9,11 +9,13 @@ from services.chat.compact_service import CompactService from services.db.mysql_service import DatabaseService from services.db.conversation_repository import ConversationRepository from services.db.user_profile_repository import UserProfileRepository +from services.db.feedback_repository import FeedbackRepository from services.ui.cli_service import CliUiService from services.events.event_bus import EventBus from services.events.handlers import StreamTokenHandler, StreamEndHandler from langchain_huggingface import HuggingFaceEmbeddings from services.rag.ingestion_service import IngestionService +from services.rag.rerank_service import RerankService from services.rag.retriever_service import RetrieverService from services.agent.agent_service import AgentService @@ -60,6 +62,11 @@ class Container(containers.DeclarativeContainer): db=db_service, ) + feedback_repository = providers.Singleton( + FeedbackRepository, + db=db_service, + ) + history_service = providers.Factory( HistoryService, system_prompt=providers.Callable(lambda c: c.system_prompt, config), @@ -97,6 +104,12 @@ class Container(containers.DeclarativeContainer): breakpoint_threshold_type=providers.Callable( lambda c: c.semantic_breakpoint_threshold_type, config ), + buffer_size=providers.Callable(lambda c: c.semantic_buffer_size, config), + ) + + reranker = providers.Callable( + lambda c: RerankService(c.reranker_model_id) if c.reranker_enabled else None, + config, ) retriever_service = providers.Singleton( @@ -105,6 +118,8 @@ class Container(containers.DeclarativeContainer): qdrant_url=providers.Callable(lambda c: c.qdrant_url, config), collection_name=providers.Callable(lambda c: c.qdrant_collection, config), top_k=providers.Callable(lambda c: c.rag_top_k, config), + reranker=reranker, + rerank_fetch_k=providers.Callable(lambda c: c.reranker_fetch_k, config), ) # Phase 3 โ€” LangGraph Agent diff --git a/docs/ROADMAP.md b/docs/ROADMAP.md index aa7e84b..44d139d 100644 --- a/docs/ROADMAP.md +++ b/docs/ROADMAP.md @@ -4,10 +4,11 @@ | ์˜์—ญ | ํ˜„ํ™ฉ | |------|------| -| LLM | Qwen3-14B-4bit (MLX, Apple Silicon) | +| LLM | Qwen3-8B-4bit (MLX, Apple Silicon) | | Agent | LangGraph ReAct + Tool Calling + Thinking ๋ชจ๋“œ | -| RAG | Qdrant + BAAI/bge-m3 ์ž„๋ฒ ๋”ฉ + Semantic Chunking (`_SemanticSplitter`) | +| RAG | Qdrant + BAAI/bge-m3 ์ž„๋ฒ ๋”ฉ + Semantic Chunking (`SemanticChunker`) + Reranker (BAAI/bge-reranker-v2-m3) | | Tools | `search_documents`, `web_search`, `get_current_date`, `remember_user_info`, `recall_user_info` (5๊ฐœ) | +| Feedback | Gradio ๐Ÿ‘/๐Ÿ‘Ž โ†’ `td_feedback` DB ์ €์žฅ + LangSmith `create_feedback()` ์—ฐ๋™ | | UI | CLI + Gradio Web UI + ์Œ์„ฑ ์ž…๋ ฅ(STT)/์ถœ๋ ฅ(TTS) | | Memory | LangGraph MemorySaver (์„ธ์…˜ ๋‚ด) + MySQL ๋Œ€ํ™” ์ €์žฅ + ์žฅ๊ธฐ ์‚ฌ์šฉ์ž ํ”„๋กœํ•„ | | Tracing | LangSmith ํŠธ๋ ˆ์ด์‹ฑ | @@ -31,6 +32,13 @@ DB ์Šคํ‚ค๋งˆ(`td_conversations.user_id`, `td_user_profile.user_id`)๋Š” `_migrate ### โœ… ๋ฒ„๊ทธ 4 โ€” ๋‚˜์ด ๊ณ„์‚ฐ ์˜ค๋ฅ˜ (์ˆ˜์ • ์™„๋ฃŒ) LLM์ด ํ›ˆ๋ จ ๋ฐ์ดํ„ฐ ๊ธฐ์ค€ ์—ฐ๋„๋กœ ๋‚˜์ด๋ฅผ ๊ณ„์‚ฐํ•˜๋Š” ๋ฌธ์ œ. `AgentService.call_model()`์—์„œ ๋งค ํ˜ธ์ถœ ์‹œ ์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ ์•ž์— `์˜ค๋Š˜ ๋‚ ์งœ: {date.today().isoformat()}`๋ฅผ ์ฃผ์ž…. ํ”„๋กœํ•„์—์„œ ์ƒ๋…„์›”์ผ/์ƒ๋…„ ๊ฐ’์„ ํŒŒ์‹ฑํ•ด ํ•œ๊ตญ ๋‚˜์ด(ํ˜„์žฌ์—ฐ๋„-์ถœ์ƒ์—ฐ๋„+1)์™€ ๋งŒ ๋‚˜์ด(์ƒ์ผ ๊ธฐ์ค€ ์ •ํ™• ๊ณ„์‚ฐ)๋ฅผ ์ž๋™ ๊ณ„์‚ฐํ•ด ์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ์— ํฌํ•จ. +### โœ… ๋ฒ„๊ทธ 5 โ€” ์‚ฌ๊ณ  ๊ณผ์ •(thinking) ์ฒดํฌ๋ฐ•์Šค ๋ฌดํšจ (์ˆ˜์ • ์™„๋ฃŒ) +ON/OFF์™€ ๋ฌด๊ด€ํ•˜๊ฒŒ ์‚ฌ๊ณ  ๊ณผ์ •์ด ํ‘œ์‹œ๋˜์ง€ ์•Š๋˜ ๋ฒ„๊ทธ. +- `call_model` ๋‚ด๋ถ€์—์„œ `get_stream_writer()`๋กœ thinking ํ† ํฐ์„ custom ์ด๋ฒคํŠธ๋กœ emit โ†’ ๋‹ต๋ณ€ ์•ž์— ๋จผ์ € ์ŠคํŠธ๋ฆฌ๋ฐ +- ์ฒดํฌ๋ฐ•์Šค ๊ฐ’์„ LangGraph configurable โ†’ `llm_with_tools.bind(enable_thinking=...)` ๋กœ ๋ชจ๋ธ ๋ ˆ๋ฒจ๊นŒ์ง€ ์ „๋‹ฌ (`.env` `ENABLE_THINKING` ์„ค์ •๊ณผ ๋…๋ฆฝ) +- `stream_response` ๋ฃจํ”„๋ฅผ `stream_mode=["messages", "custom"]` ์ด์ค‘ ์ŠคํŠธ๋ฆผ์œผ๋กœ ์ „ํ™˜ +- `self._think_verbose` ์ธ์Šคํ„ด์Šค ๋ณ€์ˆ˜ ์ฐธ์กฐ ๋ฒ„๊ทธ ์ˆ˜์ • (`_think_verbose` ๋กœ์ปฌ ๋ณ€์ˆ˜ ์‚ฌ์šฉ) + --- ## โœ… Phase 4 โ€” Web UI (Gradio) @@ -97,43 +105,42 @@ turns = conversation_repository.load_turns_after(self._conv_id, None, limit=10) --- -## Phase 12 โ€” ๋‹ต๋ณ€ ํ”ผ๋“œ๋ฐฑ & ํ’ˆ์งˆ ๊ฐœ์„  โ˜…โ˜…โ˜† +## โœ… Phase 12 โ€” ๋‹ต๋ณ€ ํ”ผ๋“œ๋ฐฑ & ํ’ˆ์งˆ ๊ฐœ์„  **๋ฐฐ๊ฒฝ**: ์—์ด์ „ํŠธ๊ฐ€ ์ž˜๋ชป๋œ ๋‹ต๋ณ€์„ ํ•ด๋„ ํ”ผ๋“œ๋ฐฑ ๋ฃจํ”„๊ฐ€ ์—†์–ด ๊ฐœ์„ ์ด ์–ด๋ ค์›€. -**๊ตฌํ˜„ ๋ฒ”์œ„**: -- Gradio ์ฑ„ํŒ… ๋ฉ”์‹œ์ง€๋งˆ๋‹ค ๐Ÿ‘ / ๐Ÿ‘Ž ๋ฒ„ํŠผ -- `td_feedback` ํ…Œ์ด๋ธ”์— ๋ฉ”์‹œ์ง€ยทํ‰์  ์ €์žฅ -- LangSmith์˜ `run_id`์™€ ์—ฐ๊ฒฐํ•ด ํ”ผ๋“œ๋ฐฑ์„ ํŠธ๋ ˆ์ด์Šค์— ๊ธฐ๋ก (`langsmith.Client().create_feedback()`) - -```sql -CREATE TABLE td_feedback ( - id INT AUTO_INCREMENT PRIMARY KEY, - message TEXT, - response TEXT, - rating TINYINT, -- 1: ์ข‹์Œ, -1: ๋‚˜์จ - langsmith_run_id VARCHAR(100), - created_at DATETIME DEFAULT CURRENT_TIMESTAMP -); -``` +**๊ตฌํ˜„ ๋‚ด์šฉ**: +- Gradio Chatbot ๋ฉ”์‹œ์ง€๋งˆ๋‹ค ๐Ÿ‘ / ๐Ÿ‘Ž ๋ฒ„ํŠผ (`chatbot.like()` ์ด๋ฒคํŠธ) +- `td_feedback` ํ…Œ์ด๋ธ”์— `user_id`, ์งˆ๋ฌธ, ๋‹ต๋ณ€, ํ‰์  ์ €์žฅ (`FeedbackRepository`) +- `AgentService`์—์„œ ์‘๋‹ต๋งˆ๋‹ค `run_id`(UUID)๋ฅผ LangChain config์— ์ฃผ์ž… โ†’ `last_run_id` property๋กœ ๋…ธ์ถœ +- `run_ids_state`(gr.State)๋กœ ๋Œ€ํ™” ํ„ด๋ณ„ `run_id` ์ถ”์  +- LangSmith `Client().create_feedback()` ์—ฐ๋™ (ํŠธ๋ ˆ์ด์‹ฑ ํ™œ์„ฑํ™” ์‹œ ์ž๋™ ๊ธฐ๋ก) **๋‚œ์ด๋„**: ์ค‘๊ฐ„ | **์ž„ํŒฉํŠธ**: ์ค‘๊ฐ„ (์žฅ๊ธฐ ํ’ˆ์งˆ ํ–ฅ์ƒ) --- -## Phase 13 โ€” RAG ํ’ˆ์งˆ ํ–ฅ์ƒ โ˜…โ˜…โ˜† (๋ถ€๋ถ„ ์™„๋ฃŒ) +## โœ… Phase 13 โ€” RAG ํ’ˆ์งˆ ํ–ฅ์ƒ โ˜…โ˜…โ˜… (์™„๋ฃŒ) **๋ฐฐ๊ฒฝ**: ๊ณ ์ • ํฌ๊ธฐ ์ฒญํ‚น + ๋ฒกํ„ฐ ์œ ์‚ฌ๋„ ๊ฒ€์ƒ‰๋งŒ์œผ๋กœ๋Š” ๊ด€๋ จ ์—†๋Š” ์ฒญํฌ๊ฐ€ ์„ž์ผ ์ˆ˜ ์žˆ์Œ. **โœ… Semantic Chunker โ€” ์™„๋ฃŒ** -- `_SemanticSplitter` ํด๋ž˜์Šค ์ง์ ‘ ๊ตฌํ˜„ (`services/rag/ingestion_service.py`) -- `langchain-experimental` ์‚ฌ์šฉ ์—†์ด numpy + ๊ธฐ์กด BAAI/bge-m3 ์ž„๋ฒ ๋”ฉ์œผ๋กœ ๊ตฌํ˜„ -- ์ธ์ ‘ ๋ฌธ์žฅ ๊ฐ„ ์ฝ”์‚ฌ์ธ ์œ ์‚ฌ๋„ ๊ณ„์‚ฐ โ†’ ์œ ์‚ฌ๋„ ํ•˜์œ„ 5% ์ง€์ ์—์„œ ์ฒญํฌ ๋ถ„๋ฆฌ -- `config.py`์—์„œ `rag_chunk_size` / `rag_chunk_overlap` ์ œ๊ฑฐ โ†’ `semantic_breakpoint_threshold_type` ์ถ”๊ฐ€ -**๐Ÿ”ฒ ๋ฏธ์™„ โ€” Reranker** -1. **Reranker ์ถ”๊ฐ€** โ€” `cross-encoder/ms-marco-MiniLM-L-6-v2`๋กœ ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ ์žฌ์ˆœ์œ„ -2. **top_k ์กฐ์ •** โ€” ๊ฒ€์ƒ‰ ํ›„ rerank โ†’ ์ƒ์œ„ 3๊ฐœ๋งŒ LLM์— ์ „๋‹ฌ +์ปค์Šคํ…€ `_SemanticSplitter`๋ฅผ ์ œ๊ฑฐํ•˜๊ณ  `langchain_experimental.SemanticChunker`๋กœ ๊ต์ฒด (`services/rag/ingestion_service.py`). +๊ธฐ์กด์— ๋ฌด์‹œ๋˜๋˜ `semantic_breakpoint_threshold_type` ์„ค์ •์ด ์ด์ œ ์‹ค์ œ๋กœ ์ ์šฉ๋œ๋‹ค. + +| ๊ธฐ๋Šฅ | ์ง€์› ์—ฌ๋ถ€ | +|------|----------| +| breakpoint_threshold_type | โœ… percentile / standard_deviation / interquartile / gradient | +| buffer_size | โœ… `SEMANTIC_BUFFER_SIZE` ํ™˜๊ฒฝ๋ณ€์ˆ˜๋กœ ์„ค์ • | +| min_chunk_size | โœ… (SemanticChunker ๊ธฐ๋ณธ ์ง€์›) | +| HuggingFaceEmbeddings ์žฌ์‚ฌ์šฉ | โœ… ๊ธฐ์กด ์ž„๋ฒ ๋”ฉ ๋ชจ๋ธ ๊ทธ๋Œ€๋กœ ์‚ฌ์šฉ | + +> **langchain-experimental ํŒจํ‚ค์ง€ ์ƒํƒœ**: +> `langchain-experimental` v0.4.2๋Š” ๊ณต์‹ ์œ ์ง€๋ณด์ˆ˜ ์ข…๋ฃŒ๊ฐ€ ์„ ์–ธ๋์ง€๋งŒ([#87](https://github.com/langchain-ai/langchain-experimental/issues/87)), +> `SemanticChunker` ์ž์ฒด๋Š” ํ˜„์žฌ ์ •์ƒ ๋™์ž‘ํ•˜๋ฉฐ ํ›„์† ํŒจํ‚ค์ง€(`langchain-text-splitters`)๋กœ ์ด์ „ ์™„๋ฃŒ ์‹œ migration ์˜ˆ์ •. + +**โœ… ๋ฏธ์™„ 1 โ€” Semantic Chunker ๊ธฐ๋Šฅ ์™„์„ฑ (์™„๋ฃŒ)** > ๊ธฐ์กด Qdrant ์ €์žฅ ๋ฌธ์„œ๋Š” ์žฌ๋“ฑ๋กํ•ด์•ผ ์ƒˆ ์ฒญํ‚น ๋ฐฉ์‹์ด ์ ์šฉ๋จ. @@ -157,26 +164,67 @@ CREATE TABLE td_feedback ( --- -## Phase 15 โ€” ์˜ˆ๋ฐฉ์ ‘์ข…ยท๊ฑด๊ฐ•๊ฒ€์ง„ ์•Œ๋ฆผ ์Šค์ผ€์ค„๋Ÿฌ โ˜…โ˜…โ˜† +## โœ… Phase 13-B โ€” Reranker โ˜…โ˜…โ˜† -**๋ฐฐ๊ฒฝ**: ์•„์ด ์ƒ๋…„์„ ๊ธฐ์–ตํ•˜๊ณ  ์žˆ์œผ๋ฏ€๋กœ, ์˜ˆ๋ฐฉ์ ‘์ข… ์ผ์ •(BCG, DTaP ๋“ฑ)์„ ์ž๋™ ๊ณ„์‚ฐํ•ด ์•Œ๋ฆผ์„ ์ค„ ์ˆ˜ ์žˆ์Œ. ์œจ๋ด‡์˜ ์ฐจ๋ณ„ํ™” ํฌ์ธํŠธ. +**๋ฐฐ๊ฒฝ**: ๋ฒกํ„ฐ ์œ ์‚ฌ๋„ ๊ฒ€์ƒ‰์€ ์˜๋ฏธ์ ์œผ๋กœ ๋น„์Šทํ•œ ์ฒญํฌ๋ฅผ ๊ฐ€์ ธ์˜ค์ง€๋งŒ, ์งˆ๋ฌธ๊ณผ ์‹ค์ œ๋กœ ๊ด€๋ จ ์žˆ๋Š” ์ฒญํฌ๋ฅผ ์ •ํ™•ํžˆ ๊ฐ€๋ ค๋‚ด์ง€ ๋ชปํ•˜๋Š” ๊ฒฝ์šฐ๊ฐ€ ์žˆ๋‹ค. Reranker๋Š” ๊ฒ€์ƒ‰ ํ›„ ์ˆœ์œ„๋ฅผ ์žฌ์กฐ์ •ํ•ด LLM์— ์ „๋‹ฌ๋˜๋Š” ์ปจํ…์ŠคํŠธ ํ’ˆ์งˆ์„ ๋†’์ธ๋‹ค. -**๊ตฌํ˜„ ๋ฐฉ์‹**: -- `td_user_profile`์—์„œ ์•„์ด ์ƒ๋…„ ์กฐํšŒ โ†’ ์˜ˆ๋ฐฉ์ ‘์ข… ์Šค์ผ€์ค„ ๊ณ„์‚ฐ Tool -- Gradio "๊ฑด๊ฐ• ์ผ์ •" ํƒญ: ๋‹ฌ๋ ฅํ˜• ์ผ์ • ํ‘œ์‹œ -- APScheduler๋กœ ๋‹น์ผ ์•Œ๋ฆผ (๋˜๋Š” Gradio ์‹œ์ž‘ ์‹œ ์˜ค๋Š˜ ์ผ์ • ๋ฐฐ๋„ˆ) +**๊ตฌํ˜„ ๋‚ด์šฉ**: +- `services/rag/rerank_service.py` โ€” `RerankService` ํด๋ž˜์Šค (Cross-Encoder ๋ž˜ํผ) +- `RetrieverService.search()`: reranker ํ™œ์„ฑํ™” ์‹œ `rerank_fetch_k`(๊ธฐ๋ณธ 10)๊ฐœ ํ›„๋ณด ๊ฒ€์ƒ‰ โ†’ rerank โ†’ ์ƒ์œ„ `rag_top_k`(๊ธฐ๋ณธ 3)๊ฐœ ๋ฐ˜ํ™˜ +- `tools.py` `make_retriever_tool`: `as_retriever()` โ†’ `search()` ์ง์ ‘ ํ˜ธ์ถœ๋กœ ๋ณ€๊ฒฝ (reranker ์ž๋™ ์ ์šฉ) +- `.env` `RERANKER_ENABLED=true`๋กœ ํ™œ์„ฑํ™”, ๊ธฐ๋ณธ ๋น„ํ™œ์„ฑ (์ฒซ ์‹คํ–‰ ์‹œ ๋ชจ๋ธ ๋‹ค์šด๋กœ๋“œ) -```python -@tool -def get_vaccination_schedule(birth_year: int, birth_month: int) -> str: - """์•„์ด ์ƒ๋…„์›”์„ ๊ธฐ๋ฐ˜์œผ๋กœ ์˜ˆ๋ฐฉ์ ‘์ข… ์ผ์ •์„ ๊ณ„์‚ฐํ•ฉ๋‹ˆ๋‹ค.""" -``` +| ์„ค์ • | ๊ธฐ๋ณธ๊ฐ’ | ์„ค๋ช… | +|------|--------|------| +| `RERANKER_ENABLED` | `false` | `true`๋กœ ์„ค์ • ์‹œ ํ™œ์„ฑํ™” | +| `RERANKER_MODEL_ID` | `cross-encoder/mmarco-mMiniLMv2-L12-H384-v1` | ํ•œ๊ตญ์–ด ํฌํ•จ ๋‹ค๊ตญ์–ด ๋ชจ๋ธ (117MB) | +| `RERANKER_FETCH_K` | `10` | rerank ์ „ ๋ฒกํ„ฐ ๊ฒ€์ƒ‰ ํ›„๋ณด ์ˆ˜ | -**๋‚œ์ด๋„**: ์ค‘๊ฐ„ | **์ž„ํŒฉํŠธ**: ๋†’์Œ (์œก์•„ ํŠนํ™” ์ฐจ๋ณ„ํ™”) +**๋‚œ์ด๋„**: ์ค‘๊ฐ„ | **์ž„ํŒฉํŠธ**: ๋†’์Œ (๊ด€๋ จ์„ฑ ๋‚ฎ์€ ์ฒญํฌ ํ•„ํ„ฐ๋ง โ†’ ๋‹ต๋ณ€ ์ •ํ™•๋„ ํ–ฅ์ƒ) --- -## Phase 16 โ€” ๋ชจ๋ธ ์„ ํƒ (Claude API / OpenAI ์˜ต์…˜) โ˜…โ˜†โ˜† +## Phase 18 โ€” Hybrid Search (BM25 + Vector) โ˜…โ˜…โ˜† + +**๋ฐฐ๊ฒฝ**: ํ•œ๊ตญ์–ด ์งˆ๋ฌธ์—์„œ ๊ณ ์œ ๋ช…์‚ฌยท์ „๋ฌธ์šฉ์–ด๊ฐ€ ํฌํ•จ๋œ ๊ฒฝ์šฐ ์˜๋ฏธ ๊ฒ€์ƒ‰(Dense)๋งŒ์œผ๋กœ๋Š” recall์ด ๋–จ์–ด์ง„๋‹ค. BM25 ํ‚ค์›Œ๋“œ ๊ฒ€์ƒ‰๊ณผ ๊ฒฐํ•ฉ(Hybrid)ํ•˜๋ฉด ๋ณด์™„์ด ๊ฐ€๋Šฅํ•˜๋‹ค. + +**๊ตฌํ˜„ ๋ฐฉ์‹**: +- Qdrant์˜ Sparse Vector ์ง€์› ํ™œ์šฉ (`FastEmbedSparseEmbeddings` ๋˜๋Š” BM42) +- ์ธ๋ฑ์‹ฑ ์‹œ dense + sparse ๋‘ ๋ฒกํ„ฐ ๋™์‹œ ์ €์žฅ +- ๊ฒ€์ƒ‰ ์‹œ `RRF(Reciprocal Rank Fusion)`๋กœ ๊ฒฐ๊ณผ ํ†ตํ•ฉ +- `IngestionService`, `RetrieverService` ์–‘์ชฝ ์ˆ˜์ • ํ•„์š” + +**๋‚œ์ด๋„**: ์ค‘๊ฐ„ | **์ž„ํŒฉํŠธ**: ๋†’์Œ (ํ‚ค์›Œ๋“œ ํฌํ•จ ์งˆ๋ฌธ recall ๋Œ€ํญ ํ–ฅ์ƒ) + +--- + +## Phase 19 โ€” Query Rewriting โ˜…โ˜†โ˜† + +**๋ฐฐ๊ฒฝ**: ์‚ฌ์šฉ์ž ๊ตฌ์–ด์ฒด ์งˆ๋ฌธ("์•„์ด๊ฐ€ ๋ฐฅ์„ ์•ˆ ๋จน์–ด์š”")์€ ๋ฒกํ„ฐ ๊ฒ€์ƒ‰์— ์ตœ์ ํ™”๋˜์–ด ์žˆ์ง€ ์•Š๋‹ค. LLM์ด ๊ฒ€์ƒ‰ ์ „์— ์งˆ๋ฌธ์„ ์žฌ์ž‘์„ฑํ•˜๋ฉด ๊ด€๋ จ ๋ฌธ์„œ ๊ฒ€์ƒ‰ ํ™•๋ฅ ์ด ๋†’์•„์ง„๋‹ค. + +**๊ตฌํ˜„ ๋ฐฉ์‹**: +- LangGraph์— `query_rewrite` ๋…ธ๋“œ ์ถ”๊ฐ€ (agent โ†’ query_rewrite โ†’ tools ์ˆœ์„œ) +- ๋˜๋Š” `search_documents` ๋„๊ตฌ ๋‚ด๋ถ€์—์„œ rewrite ํ›„ ๊ฒ€์ƒ‰ +- ํ”„๋กฌํ”„ํŠธ: "๋‹ค์Œ ์งˆ๋ฌธ์„ ๋ฌธ์„œ ๊ฒ€์ƒ‰์— ์ตœ์ ํ™”๋œ ํ‚ค์›Œ๋“œ ์ค‘์‹ฌ ๋ฌธ์žฅ์œผ๋กœ ๋ณ€ํ™˜ํ•˜์„ธ์š”" + +**๋‚œ์ด๋„**: ํ•˜ | **์ž„ํŒฉํŠธ**: ์ค‘๊ฐ„ (๊ตฌ์–ด์ฒด ์งˆ๋ฌธ ๊ฒ€์ƒ‰ ํ’ˆ์งˆ ํ–ฅ์ƒ) + +--- + +## Phase 20 โ€” RAG ํ’ˆ์งˆ ์ž๋™ ํ‰๊ฐ€ (RAGAS) โ˜…โ˜†โ˜† + +**๋ฐฐ๊ฒฝ**: ์ฒญํ‚น ์ „๋žตยท๊ฒ€์ƒ‰ ํŒŒ๋ผ๋ฏธํ„ฐยทReranker ๋ณ€๊ฒฝ ์‹œ ๋‹ต๋ณ€ ํ’ˆ์งˆ์ด ์‹ค์ œ๋กœ ๋‚˜์•„์กŒ๋Š”์ง€ ์ˆ˜์น˜๋กœ ํ™•์ธํ•  ๋ฐฉ๋ฒ•์ด ์—†๋‹ค. + +**๊ตฌํ˜„ ๋ฐฉ์‹**: +- `ragas` ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ๋กœ FaithfulnessยทAnswer RelevancyยทContext Recall ์ž๋™ ์ธก์ • +- ํ…Œ์ŠคํŠธ ์งˆ๋ฌธ-์ •๋‹ต ์…‹์„ `eval/` ๋””๋ ‰ํ„ฐ๋ฆฌ์— ๊ด€๋ฆฌ +- ์„ค์ • ๋ณ€๊ฒฝ ํ›„ `python eval/run_ragas.py`๋กœ ๋น„๊ต ๊ฐ€๋Šฅ + +**๋‚œ์ด๋„**: ์ค‘๊ฐ„ | **์ž„ํŒฉํŠธ**: ์ค‘๊ฐ„ (์žฅ๊ธฐ ํ’ˆ์งˆ ๊ด€๋ฆฌ ๊ธฐ๋ฐ˜) + +--- + +## Phase 15 โ€” ๋ชจ๋ธ ์„ ํƒ (Claude API / OpenAI ์˜ต์…˜) โ˜…โ˜†โ˜† **๋ฐฐ๊ฒฝ**: ๋กœ์ปฌ MLX ๋ชจ๋ธ์€ Apple Silicon ์ „์šฉ. ์›๊ฒฉ ์ ‘์† ์‹œ๋‚˜๋ฆฌ์˜ค๋‚˜ ๋” ๋†’์€ ํ’ˆ์งˆ์ด ํ•„์š”ํ•  ๋•Œ Claude API/OpenAI๋ฅผ ์„ ํƒํ•  ์ˆ˜ ์žˆ์œผ๋ฉด ์œ ์—ฐ์„ฑ ํ™•๋ณด. @@ -190,7 +238,7 @@ model_provider: str = "mlx" # "mlx" | "claude" | "openai" --- -## Phase 17 โ€” Docker ์ปจํ…Œ์ด๋„ˆํ™” โ˜…โ˜†โ˜† +## Phase 16 โ€” Docker ์ปจํ…Œ์ด๋„ˆํ™” โ˜…โ˜†โ˜† **๋ฐฐ๊ฒฝ**: ํ˜„์žฌ ๋กœ์ปฌ ์ „์šฉ. ๊ฐ€์กฑ์ด๋‚˜ ์ง€์ธ๋„ ์“ธ ์ˆ˜ ์žˆ๋„๋ก ์„œ๋ฒ„ ๋ฐฐํฌ ๊ฐ€๋Šฅํ•œ ํ˜•ํƒœ๋กœ ํŒจํ‚ค์ง•. @@ -202,13 +250,13 @@ docker-compose.yml โ””โ”€โ”€ mysql ``` -> ์ฃผ์˜: MLX๋Š” Apple Silicon ์ „์šฉ์ด๋ผ ์„œ๋ฒ„ ๋ฐฐํฌ ์‹œ Phase 16(๋ชจ๋ธ ์„ ํƒ)์ด ์„ ํ–‰๋˜์–ด์•ผ ํ•จ. +> ์ฃผ์˜: MLX๋Š” Apple Silicon ์ „์šฉ์ด๋ผ ์„œ๋ฒ„ ๋ฐฐํฌ ์‹œ Phase 15(๋ชจ๋ธ ์„ ํƒ)์ด ์„ ํ–‰๋˜์–ด์•ผ ํ•จ. **๋‚œ์ด๋„**: ๋†’์Œ | **์ž„ํŒฉํŠธ**: ์ค‘๊ฐ„ --- -## Phase 18 โ€” ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ์ด๋ฏธ์ง€ ์ดํ•ด โ˜…โ˜†โ˜† +## Phase 17 โ€” ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ์ด๋ฏธ์ง€ ์ดํ•ด โ˜…โ˜†โ˜† **๋ฐฐ๊ฒฝ**: ์ด์œ ์‹ ์‚ฌ์ง„ โ†’ ์žฌ๋ฃŒ ๋ถ„์„, ๊ธˆ์œต ์„œ๋ฅ˜ ์‚ฌ์ง„ โ†’ ๋‚ด์šฉ ํ•ด์„ ๋“ฑ. @@ -221,10 +269,10 @@ docker-compose.yml ## ์ถ”์ฒœ ์ง„ํ–‰ ์ˆœ์„œ ``` -๋‹จ๊ธฐ (1~2์ฃผ) ์ค‘๊ธฐ (1๊ฐœ์›”) ์žฅ๊ธฐ -โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ -Phase 15 (์•Œ๋ฆผ) โ†’ Phase 13 Reranker โ†’ Phase 17 (Docker) -Phase 12 (ํ”ผ๋“œ๋ฐฑ) Phase 16 (๋ชจ๋ธ์„ ํƒ) Phase 18 (๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ) +๋‹จ๊ธฐ (1~2์ฃผ) ์ค‘๊ธฐ (1๊ฐœ์›”) ์žฅ๊ธฐ +โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ +Phase 18 Hybrid Search โ†’ Phase 15 (๋ชจ๋ธ์„ ํƒ) โ†’ Phase 16 (Docker) +Phase 19 Query Rewriting โ†’ Phase 20 (RAGAS ํ‰๊ฐ€) โ†’ Phase 17 (๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ) ``` ### ์šฐ์„ ์ˆœ์œ„ ๋งคํŠธ๋ฆญ์Šค @@ -235,6 +283,7 @@ Phase 12 (ํ”ผ๋“œ๋ฐฑ) Phase 16 (๋ชจ๋ธ์„ ํƒ) Phase 18 (๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ) | ๋ฒ„๊ทธ 2 ์ด๋ ฅ ๋ฏธ์—ฐ๋™ | โœ… ์™„๋ฃŒ | โ€” | โ€” | โ€” | | ๋ฒ„๊ทธ 3 ๋‹จ์ผ ์‚ฌ์šฉ์ž | โœ… ์™„๋ฃŒ | โ€” | โ€” | โ€” | | ๋ฒ„๊ทธ 4 ๋‚˜์ด ๊ณ„์‚ฐ ์˜ค๋ฅ˜ | โœ… ์™„๋ฃŒ | โ€” | โ€” | โ€” | +| ๋ฒ„๊ทธ 5 thinking ์ฒดํฌ๋ฐ•์Šค ๋ฌดํšจ | โœ… ์™„๋ฃŒ | โ€” | โ€” | โ€” | | Phase 4 Web UI | โœ… ์™„๋ฃŒ | โ€” | โ€” | โ€” | | Phase 5 ์žฅ๊ธฐ ์‚ฌ์šฉ์ž ๋ฉ”๋ชจ๋ฆฌ | โœ… ์™„๋ฃŒ | โ€” | โ€” | โ€” | | Phase 6 ์›น ๊ฒ€์ƒ‰ | โœ… ์™„๋ฃŒ | โ€” | โ€” | โ€” | @@ -242,11 +291,13 @@ Phase 12 (ํ”ผ๋“œ๋ฐฑ) Phase 16 (๋ชจ๋ธ์„ ํƒ) Phase 18 (๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ) | Phase 9 ๋ฌธ์„œ ๊ด€๋ฆฌ | โœ… ์™„๋ฃŒ | โ€” | โ€” | โ€” | | Phase 10 ๋ฉ€ํ‹ฐ์œ ์ € | โœ… ์™„๋ฃŒ | โ€” | โ€” | โ€” | | Phase 11 ์ด๋ ฅ ๋ณต์› | โœ… ์™„๋ฃŒ | โ€” | โ€” | โ€” | +| Phase 12 ํ”ผ๋“œ๋ฐฑ | โœ… ์™„๋ฃŒ | โ€” | โ€” | โ€” | | Phase 13 Semantic Chunker | โœ… ์™„๋ฃŒ | โ€” | โ€” | โ€” | | Phase 14 ์Œ์„ฑ ์ธํ„ฐํŽ˜์ด์Šค | โœ… ์™„๋ฃŒ | โ€” | โ€” | โ€” | -| Phase 15 ์˜ˆ๋ฐฉ์ ‘์ข… ์•Œ๋ฆผ | ๐Ÿ”ฒ ๋ฏธ์™„ | ์ค‘๊ฐ„ | ๋†’์Œ | โญ 1์ˆœ์œ„ | -| Phase 12 ํ”ผ๋“œ๋ฐฑ | ๐Ÿ”ฒ ๋ฏธ์™„ | ์ค‘๊ฐ„ | ์ค‘๊ฐ„ | 2์ˆœ์œ„ | -| Phase 13 Reranker | ๐Ÿ”ฒ ์ง„ํ–‰ ์ค‘ | ์ค‘๊ฐ„ | ์ค‘๊ฐ„ | 3์ˆœ์œ„ | -| Phase 16 ๋ชจ๋ธ ์„ ํƒ | ๐Ÿ”ฒ ๋ฏธ์™„ | ์ค‘๊ฐ„ | ์ค‘๊ฐ„ | 4์ˆœ์œ„ | -| Phase 17 Docker | ๐Ÿ”ฒ ๋ฏธ์™„ | ๋†’์Œ | ์ค‘๊ฐ„ | 5์ˆœ์œ„ | -| Phase 18 ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ | ๐Ÿ”ฒ ๋ฏธ์™„ | ๋†’์Œ | ๋†’์Œ | 6์ˆœ์œ„ | +| Phase 13-B Reranker | โœ… ์™„๋ฃŒ | โ€” | โ€” | โ€” | +| Phase 18 Hybrid Search | ๐Ÿ”ฒ ์‹ ๊ทœ | ์ค‘๊ฐ„ | ๋†’์Œ | โญ 1์ˆœ์œ„ | +| Phase 19 Query Rewriting | ๐Ÿ”ฒ ์‹ ๊ทœ | ํ•˜ | ์ค‘๊ฐ„ | 3์ˆœ์œ„ | +| Phase 15 ๋ชจ๋ธ ์„ ํƒ | ๐Ÿ”ฒ ๋ฏธ์™„ | ์ค‘๊ฐ„ | ์ค‘๊ฐ„ | 4์ˆœ์œ„ | +| Phase 20 RAGAS ํ‰๊ฐ€ | ๐Ÿ”ฒ ์‹ ๊ทœ | ์ค‘๊ฐ„ | ์ค‘๊ฐ„ | 5์ˆœ์œ„ | +| Phase 16 Docker | ๐Ÿ”ฒ ๋ฏธ์™„ | ๋†’์Œ | ์ค‘๊ฐ„ | 6์ˆœ์œ„ | +| Phase 17 ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ | ๐Ÿ”ฒ ๋ฏธ์™„ | ๋†’์Œ | ๋†’์Œ | 7์ˆœ์œ„ | diff --git a/docs/thinking-feature-analysis.md b/docs/thinking-feature-analysis.md new file mode 100644 index 0000000..7f77c20 --- /dev/null +++ b/docs/thinking-feature-analysis.md @@ -0,0 +1,150 @@ +# ์‚ฌ๊ณ  ๊ณผ์ • ํ‘œ์‹œ ๊ธฐ๋Šฅ ๋ถ„์„ ๋ณด๊ณ ์„œ + +**ํ…Œ์ŠคํŠธ ์ผ์‹œ**: 2026-05-28 +**ํ…Œ์ŠคํŠธ ์งˆ๋ฌธ**: "๋…ผ๋ฌธ ๊ฒฐ๊ณผ๊ฐ€ ์–ด๋–ป๊ฒŒ ๋ผ?" +**์•ฑ ๋ฒ„์ „**: http://localhost:7860 + +--- + +## ํ…Œ์ŠคํŠธ ๊ฒฐ๊ณผ ์š”์•ฝ + +| ํ•ญ๋ชฉ | ์‚ฌ๊ณ  ๊ณผ์ • OFF | ์‚ฌ๊ณ  ๊ณผ์ • ON | +|------|-------------|------------| +| ์ด ์†Œ์š” ์‹œ๊ฐ„ | 200.5s | 233.7s | +| 1๋‹จ๊ณ„ (์งˆ๋ฌธ ๋ถ„์„) | 59.8s | 77.4s | +| ์‚ฌ๊ณ  ๊ณผ์ • ๋ธ”๋ก ํ‘œ์‹œ | ์—†์Œ | **์—†์Œ (๋ฒ„๊ทธ)** | +| ์ตœ์ข… ๋‹ต๋ณ€ ๋‚ด์šฉ | 6๊ฐœ ์„น์…˜, ๋™์ผ | 6๊ฐœ ์„น์…˜, ๋™์ผ | +| ๋‹ต๋ณ€ ์ฐจ์ด | **์—†์Œ** | **์—†์Œ** | + +**๊ฒฐ๋ก : ON/OFF ์ฒดํฌ๋ฐ•์Šค๊ฐ€ ํ˜„์žฌ ์•„๋ฌด๋Ÿฐ ์‹œ๊ฐ์  ์ฐจ์ด๋ฅผ ๋งŒ๋“ค์ง€ ์•Š๋Š”๋‹ค.** + +--- + +## ์‹ค์ œ ์‘๋‹ต (๋‘ ๊ฒฝ์šฐ ๋ชจ๋‘ ๋™์ผ) + +``` +[LangGraph โ†’ agent: ์งˆ๋ฌธ ๋ถ„์„ ์ค‘] (59.84s) + +๋ฌธ์„œ ๊ฒ€์ƒ‰ ์ค‘... ("์–ด๋จธ๋‹ˆ์˜ ๋ฐ˜์‘์„ฑ ์ƒํ˜ธ์ž‘์šฉ์ด ์•„๋™์˜ ์ค‘์‹ฌ์ถ• ํ–‰๋™๊ณผ ์ง€๋Šฅ ๋ฐ ๋‹ค์ค‘์ง€๋Šฅ ๋ฐœ๋‹ฌ์— ๋ฏธ์น˜๋Š” ์˜ํ–ฅ") + +[LangGraph โ†’ tools: ๋„๊ตฌ ์‹คํ–‰ ์ค‘] (71.18s) + [๊ฒฐ๊ณผ: 3๊ฐœ ๋ฌธ์„œ ๋ฐ˜ํ™˜ โ†’ agent ๋ณต๊ท€] + +[๋ฌธ์„œ ๊ฒ€์ƒ‰: "์–ด๋จธ๋‹ˆ์˜ ๋ฐ˜์‘์„ฑ ์ƒํ˜ธ์ž‘์šฉ์ด ์•„๋™์˜ ์ค‘์‹ฌ์ถ• ํ–‰๋™๊ณผ ์ง€๋Šฅ ๋ฐ ๋‹ค์ค‘์ง€๋Šฅ ๋ฐœ๋‹ฌ์— ๋ฏธ์น˜๋Š” ์˜ํ–ฅ"] + โ†’ [๋ฌธ์„œ 1] 1, 81-99 ์–ด๋จธ๋‹ˆ์˜ ๋ฐ˜์‘์„ฑ ์ƒํ˜ธ์ž‘์šฉ์ด ์•„๋™์˜ ์ค‘์‹ฌ์ถ• ํ–‰๋™... + โ†’ [๋ฌธ์„œ 2] ๊น€์ •๋ฏธโ€ค์ •์€์ฃผ/ ์–ด๋จธ๋‹ˆ์˜๋ฐ˜์‘์„ฑ์ƒํ˜ธ์ž‘์šฉ์ด... + โ†’ [๋ฌธ์„œ 3] ๊น€์ •๋ฏธโ€ค์ •์€์ฃผ/ ์–ด๋จธ๋‹ˆ์˜๋ฐ˜์‘์„ฑ์ƒํ˜ธ์ž‘์šฉ์ด... + +[LangGraph โ†’ agent: ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ ๋ฐ˜์˜ ์ค‘] (132.91s) + +[LangGraph โ†’ agent: ์ตœ์ข… ๋‹ต๋ณ€ ์ƒ์„ฑ] + +๋ณธ ์—ฐ๊ตฌ์˜ ๊ฒฐ๊ณผ๋Š” ๋‹ค์Œ๊ณผ ๊ฐ™์ด ์š”์•ฝํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค: +1. ์–ด๋จธ๋‹ˆ์˜ ๋ฐ˜์‘์„ฑ ์ƒํ˜ธ์ž‘์šฉ๊ณผ ์•„๋™์˜ ์ค‘์‹ฌ์ถ• ํ–‰๋™ ๊ฐ„์˜ ๊ด€๊ณ„ + ... +``` + +์‚ฌ๊ณ  ๊ณผ์ • ON์„ ์„ ํƒํ–ˆ์„ ๋•Œ ๊ธฐ๋Œ€๋˜๋Š” `[์‚ฌ๊ณ  ๊ณผ์ •]...[/์‚ฌ๊ณ  ๊ณผ์ •]` ๋ธ”๋ก์ด ๋‚˜ํƒ€๋‚˜์ง€ ์•Š์Œ. + +--- + +## ์›์ธ ๋ถ„์„ + +### ๊ตฌ์กฐ์  ๋ฌธ์ œ + +``` +LLM ์ƒ์„ฑ ํ๋ฆ„: + ์‚ฌ๊ณ  ๋‚ด์šฉ... โ†’ ์ตœ์ข… ๋‹ต๋ณ€ ํ…์ŠคํŠธ + โ†“ โ†“ + AIMessageChunk AIMessageChunk + content="" content="๋ณธ ์—ฐ๊ตฌ์˜..." + additional_kwargs= additional_kwargs={} + {"thinking": "..."} +``` + +#### ํ•ต์‹ฌ ๋ณ‘๋ชฉ: `call_model` ๋‚ด๋ถ€ ๋ˆ„์  ๋ฐฉ์‹ + +`agent_service.py:111`์˜ `call_model` ํ•จ์ˆ˜๋Š” LLM ์ฒญํฌ๋ฅผ ๋‚ด๋ถ€์—์„œ ๋ชจ๋‘ ๋ˆ„์ ํ•œ ๋’ค **๋‹จ์ผ `AIMessage`๋กœ ๋ฐ˜ํ™˜**ํ•œ๋‹ค: + +```python +async for chunk in llm_with_tools.astream(msgs, config): + thinking_acc += chunk.additional_kwargs.get("thinking", "") + content_acc += chunk.content or "" + ... +return {"messages": [AIMessage(content=content_acc, additional_kwargs={"thinking": thinking_acc})]} +``` + +LangGraph `stream_mode="messages"`๋Š” ๋‚ด๋ถ€ LLM ์ฒญํฌ๋ฅผ ์™ธ๋ถ€๋กœ ํ†ต๊ณผ์‹œํ‚ค์ง€๋งŒ, +์‚ฌ๊ณ  ์ฒญํฌ(`content=""`, `additional_kwargs={"thinking":"..."}`)๋Š” +๋นˆ content๋กœ ์ธํ•ด **LangGraph ์ŠคํŠธ๋ฆผ์—์„œ ํ•„ํ„ฐ๋ง**๋˜๊ฑฐ๋‚˜ ์ „๋‹ฌ๋˜์ง€ ์•Š๋Š” ๊ฒƒ์œผ๋กœ ๋ณด์ธ๋‹ค. + +๊ฒฐ๊ณผ์ ์œผ๋กœ `stream_response`๊ฐ€ ์ˆ˜์‹ ํ•˜๋Š” ์ฒญํฌ: + +| ์ˆ˜์‹ ๋˜๋Š” ๊ฒƒ | ์ˆ˜์‹  ์•ˆ ๋˜๋Š” ๊ฒƒ | +|-----------|--------------| +| content๊ฐ€ ์žˆ๋Š” `AIMessageChunk` | **thinking์ด ์žˆ๋Š” `AIMessageChunk`** | +| ์ตœ์ข… `AIMessage` (thinking ํฌํ•จ) | | + +#### ์™œ ์ตœ์ข… `AIMessage`์˜ thinking๋„ ํ‘œ์‹œ ์•ˆ ๋˜๋Š”๊ฐ€ + +`stream_response:221`์˜ ์กฐ๊ฑด์ด ์ด๋ฅผ ์ฐจ๋‹จํ•œ๋‹ค: + +```python +elif node == "agent" and isinstance(chunk, AIMessage): + if not content_started and not thinking_open: # โ† content_started=True๋ฉด ์ „์ฒด ์Šคํ‚ต + thinking = chunk.additional_kwargs.get("thinking", "") + if thinking and _think_verbose: + yield "\n[์‚ฌ๊ณ  ๊ณผ์ •]\n" + ... +``` + +content `AIMessageChunk`๋“ค์ด ๋จผ์ € ์ฒ˜๋ฆฌ๋˜๋ฉด์„œ `content_started = True`๊ฐ€ ์„ธํŒ…๋จ. +์ตœ์ข… `AIMessage`๊ฐ€ ๋„์ฐฉํ•  ๋•Œ๋Š” ์ด๋ฏธ `content_started=True`๋ผ ์ „์ฒด ๋ธ”๋ก์ด ์‹คํ–‰๋˜์ง€ ์•Š๋Š”๋‹ค. + +--- + +## ์ ์šฉ๋œ ๋ฒ„๊ทธ ์ˆ˜์ • (2026-05-28) + +### ์ˆ˜์ • 1: `agent_service.py:223` โ€” ์ธ์Šคํ„ด์Šค ๋ณ€์ˆ˜ ์ฐธ์กฐ ์˜ค๋ฅ˜ + +```diff +- if thinking and self._think_verbose: # ํ•ญ์ƒ False (config ๊ธฐ๋ณธ๊ฐ’) ++ if thinking and _think_verbose: # ์ฒดํฌ๋ฐ•์Šค ๊ฐ’ ์‚ฌ์šฉ +``` + +์ด ์ˆ˜์ •์€ ์—ฃ์ง€์ผ€์ด์Šค(content ์ŠคํŠธ๋ฆฌ๋ฐ ์—†์ด ์ตœ์ข… AIMessage๋งŒ ๋„๋‹ฌํ•˜๋Š” ๊ฒฝ์šฐ)์—์„œ ์ฒดํฌ๋ฐ•์Šค๋ฅผ ์˜ฌ๋ฐ”๋ฅด๊ฒŒ ๋ฐ˜์˜ํ•œ๋‹ค. +๊ทธ๋Ÿฌ๋‚˜ ์ •์ƒ ์ŠคํŠธ๋ฆฌ๋ฐ ๊ฒฝ๋กœ์—์„œ๋Š” `content_started=True` ์กฐ๊ฑด์ด ์—ฌ์ „ํžˆ ๋ธ”๋ก์„ ๋ง‰๋Š”๋‹ค. + +--- + +## ์ œ์•ˆํ•˜๋Š” ์ถ”๊ฐ€ ์ˆ˜์ • + +`stream_response`์—์„œ ์ตœ์ข… `AIMessage`์˜ thinking์„ ์ €์žฅํ•ด๋‘๊ณ , +์ŠคํŠธ๋ฆฌ๋ฐ ๋ฃจํ”„ ์ข…๋ฃŒ ํ›„ ํ‘œ์‹œํ•˜๋Š” ๋ฐฉ์‹์ด ๊ฐ€์žฅ ๊ฐ„๋‹จํ•˜๋‹ค: + +```python +# ๋ฃจํ”„ ๋‚ด - AIMessage ์ฒ˜๋ฆฌ ์‹œ thinking ์ €์žฅ +elif node == "agent" and isinstance(chunk, AIMessage): + if not thinking_open: + deferred_thinking = chunk.additional_kwargs.get("thinking", "") + if chunk.content and not content_started: + ... + +# ๋ฃจํ”„ ์ข…๋ฃŒ ํ›„ +if deferred_thinking and _think_verbose: + yield "\n\n---\n**[์‚ฌ๊ณ  ๊ณผ์ •]**\n\n" + yield deferred_thinking + yield "\n\n**[/์‚ฌ๊ณ  ๊ณผ์ •]**\n" +``` + +> ๋‹จ, thinking์ด ๋‹ต๋ณ€ ๋’ค์— ํ‘œ์‹œ๋˜๋Š” UX ํŠธ๋ ˆ์ด๋“œ์˜คํ”„๊ฐ€ ์žˆ๋‹ค. +> ๋‹ต๋ณ€ ์ „์— ํ‘œ์‹œํ•˜๋ ค๋ฉด `call_model`์„ ๋ฆฌํŒฉํ† ๋งํ•ด thinking์„ ๋จผ์ € ์ŠคํŠธ๋ฆฌ๋ฐํ•ด์•ผ ํ•œ๋‹ค. + +--- + +## ์†Œ์š” ์‹œ๊ฐ„ ๋น„๊ต ์ฐธ๊ณ  + +ON์ด OFF๋ณด๋‹ค ์•ฝ 33์ดˆ ๋” ๊ฑธ๋ฆฐ ์ ์€ ์ฃผ๋ชฉํ•  ๋งŒํ•˜๋‹ค. +`enable_thinking=True`(config ์„ค์ •)๋กœ ๋ชจ๋ธ์ด ํ•ญ์ƒ thinking์„ ์ƒ์„ฑํ•˜๋ฏ€๋กœ, +ON/OFF ๊ฐ„ ์†Œ์š” ์‹œ๊ฐ„ ์ฐจ์ด๋Š” ๋ชจ๋ธ ๋น„๊ฒฐ์ •์„ฑ(temperature)์— ์˜ํ•œ ์ž์—ฐ ํŽธ์ฐจ๋กœ ๋ณด์ธ๋‹ค. +์ฒดํฌ๋ฐ•์Šค๋Š” ํ‘œ์‹œ ์—ฌ๋ถ€๋งŒ ์ œ์–ดํ•˜๋ฉฐ ๋ชจ๋ธ ๋™์ž‘ ์ž์ฒด๋Š” ๋ฐ”๊พธ์ง€ ์•Š๋Š”๋‹ค. diff --git a/services/agent/agent_service.py b/services/agent/agent_service.py index 9d82dc3..78d0472 100644 --- a/services/agent/agent_service.py +++ b/services/agent/agent_service.py @@ -6,6 +6,7 @@ from typing import AsyncIterator from langchain_core.messages import AIMessage, AIMessageChunk, HumanMessage, SystemMessage from langchain_core.runnables import RunnableConfig from langgraph.checkpoint.memory import MemorySaver +from langgraph.config import get_stream_writer from langgraph.graph import START, MessagesState, StateGraph from langgraph.prebuilt import ToolNode, tools_condition @@ -43,6 +44,7 @@ class AgentService: self._conv_id: int | None = None self._pending_history: list = [] self._user_id = user_id + self._last_run_id: str | None = None if conversation_repository: try: @@ -107,10 +109,19 @@ class AgentService: system_content += f"\n\n## ์‚ฌ์šฉ์ž ์ •๋ณด (์ด์ „ ๋Œ€ํ™”์—์„œ ๊ธฐ์–ต๋œ ๋‚ด์šฉ)\n" + "\n".join(lines) msgs = [SystemMessage(content=system_content)] + state["messages"] thinking_acc, content_acc, tool_calls_acc = "", "", [] - async for chunk in llm_with_tools.astream(msgs, config): + try: + writer = get_stream_writer() + except Exception: + writer = None + # ์ฒดํฌ๋ฐ•์Šค ๊ฐ’์„ ๋ชจ๋ธ์˜ enable_thinking์œผ๋กœ ์ „๋‹ฌ (๋Ÿฐํƒ€์ž„ ์˜ค๋ฒ„๋ผ์ด๋“œ) + show_thinking = config.get("configurable", {}).get("show_thinking", False) + _llm = llm_with_tools.bind(enable_thinking=show_thinking) if show_thinking != chat_model.enable_thinking else llm_with_tools + async for chunk in _llm.astream(msgs, config): t = chunk.additional_kwargs.get("thinking", "") if t: thinking_acc += t + if writer: + writer({"__thinking": t}) if chunk.content and isinstance(chunk.content, str): content_acc += chunk.content if chunk.tool_calls: @@ -132,13 +143,18 @@ class AgentService: self._agent = builder.compile(checkpointer=MemorySaver()) @property - def _config(self) -> dict: - return {"configurable": {"thread_id": self._thread_id}} + def last_run_id(self) -> str | None: + return self._last_run_id + + def _make_config(self, show_thinking: bool = False) -> dict: + return {"configurable": {"thread_id": self._thread_id, "show_thinking": show_thinking}} async def stream_response(self, user_input: str, show_thinking: bool | None = None) -> AsyncIterator[str]: """์‚ฌ์šฉ์ž ์ž…๋ ฅ์„ ๋ฐ›์•„ ์‘๋‹ต ํ† ํฐ์„ ์ˆœ์„œ๋Œ€๋กœ yieldํ•œ๋‹ค.""" _think_verbose = show_thinking if show_thinking is not None else self._think_verbose self._source_buffer.clear() + run_id = uuid.uuid4() + run_config = {**self._make_config(_think_verbose), "run_id": str(run_id)} # ์žฌ์‹œ์ž‘ ํ›„ ์ฒซ ํ˜ธ์ถœ ์‹œ MySQL ์ด๋ ฅ์„ ์ดˆ๊ธฐ ์ƒํƒœ์— ์ฃผ์ž… if self._pending_history: @@ -155,13 +171,42 @@ class AgentService: content_started = False # ๋…ธ๋“œ ๋‹น ๋ ˆ์ด๋ธ” 1ํšŒ ์ถœ๋ ฅ ์ œ์–ด start_time = time.perf_counter() - async for chunk, metadata in self._agent.astream( - messages, self._config, stream_mode="messages" + async for stream_event in self._agent.astream( + messages, run_config, stream_mode=["messages", "custom"] ): + mode, data = stream_event + + # โ”€โ”€ custom ์ด๋ฒคํŠธ โ€” call_model writer๊ฐ€ emitํ•œ thinking ํ† ํฐ โ”€โ”€ + if mode == "custom": + if isinstance(data, dict) and "__thinking" in data: + # thinking ์ฒซ ํ† ํฐ ๋„์ฐฉ ์‹œ agent ๋ ˆ์ด๋ธ” + prev_node ๊ฐฑ์‹  + if "agent" != prev_node: + if thinking_open: + yield "\n[/์‚ฌ๊ณ  ๊ณผ์ •]\n" + thinking_open = False + content_started = False + if lg: + elapsed = time.perf_counter() - start_time + label = "agent: ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ ๋ฐ˜์˜ ์ค‘" if prev_node == "tools" else "agent: ์งˆ๋ฌธ ๋ถ„์„ ์ค‘" + yield f"\n[LangGraph โ†’ {label}] ({elapsed:.2f}s)\n" + prev_node = "agent" + if _think_verbose: + if not thinking_open: + yield "\n[์‚ฌ๊ณ  ๊ณผ์ •]\n" + thinking_open = True + yield data["__thinking"] + continue + + # โ”€โ”€ messages ์ด๋ฒคํŠธ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + chunk, metadata = data node = metadata.get("langgraph_node", "") # โ”€โ”€ ๋…ธ๋“œ ์ „ํ™˜ ์‹œ ํ”Œ๋ž˜๊ทธ ๋ฆฌ์…‹ + ๋ ˆ์ด๋ธ” ์ถœ๋ ฅ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + # (agent ๋ ˆ์ด๋ธ”์€ custom ์ด๋ฒคํŠธ ํ•ธ๋“ค๋Ÿฌ์—์„œ ์ด๋ฏธ ์ฒ˜๋ฆฌ๋  ์ˆ˜ ์žˆ์œผ๋ฏ€๋กœ ์ค‘๋ณต ๋ฐฉ์ง€) if node != prev_node: + if thinking_open: + yield "\n[/์‚ฌ๊ณ  ๊ณผ์ •]\n" + thinking_open = False content_started = False if lg: if node == "agent": @@ -175,13 +220,6 @@ class AgentService: # โ”€โ”€ agent ๋…ธ๋“œ โ€” AIMessageChunk๋งŒ ์ฒ˜๋ฆฌ (์ค‘๋ณต ๋ฐฉ์ง€) โ”€โ”€โ”€โ”€โ”€โ”€ if node == "agent" and isinstance(chunk, AIMessageChunk): - thinking = chunk.additional_kwargs.get("thinking", "") - if thinking and _think_verbose: - if not thinking_open: - yield "\n[์‚ฌ๊ณ  ๊ณผ์ •]\n" - thinking_open = True - yield thinking - if chunk.tool_calls: if thinking_open: yield "\n[/์‚ฌ๊ณ  ๊ณผ์ •]\n" @@ -213,7 +251,7 @@ class AgentService: elif node == "agent" and isinstance(chunk, AIMessage): if not content_started and not thinking_open: thinking = chunk.additional_kwargs.get("thinking", "") - if thinking and self._think_verbose: + if thinking and _think_verbose: yield "\n[์‚ฌ๊ณ  ๊ณผ์ •]\n" yield thinking yield "\n[/์‚ฌ๊ณ  ๊ณผ์ •]\n" @@ -247,6 +285,8 @@ class AgentService: if thinking_open: yield "\n[/์‚ฌ๊ณ  ๊ณผ์ •]\n" + self._last_run_id = str(run_id) + # ๋Œ€ํ™” ๋‚ด์šฉ์„ MySQL์— ์ €์žฅ if self._conv_repo and self._conv_id and response_content: try: diff --git a/services/agent/tools.py b/services/agent/tools.py index 56af318..9aa6e25 100644 --- a/services/agent/tools.py +++ b/services/agent/tools.py @@ -24,15 +24,14 @@ def web_search(query: str) -> str: def make_retriever_tool(retriever_service): - """as_retriever()๋ฅผ ์‚ฌ์šฉํ•˜๋Š” ๋‹จ์ˆœ ๊ฒ€์ƒ‰ Tool (source_buffer ์—†์Œ).""" - retriever = retriever_service.as_retriever() + """retriever_service.search()๋ฅผ ์‚ฌ์šฉํ•˜๋Š” ๊ฒ€์ƒ‰ Tool (Reranker ์ž๋™ ์ ์šฉ).""" @tool def search_documents(query: str) -> str: """๋“ฑ๋ก๋œ ๋ฌธ์„œ(๋…ผ๋ฌธ, ์œก์•„ ๊ฐ€์ด๋“œ, ๊ธˆ์œต ์ž๋ฃŒ ๋“ฑ)์—์„œ ๊ด€๋ จ ์ •๋ณด๋ฅผ ๊ฒ€์ƒ‰ํ•ฉ๋‹ˆ๋‹ค. ์œก์•„ยท๊ธˆ์œต ๊ด€๋ จ ์งˆ๋ฌธ์ด ์˜ค๋ฉด ์ž์‹ ์˜ ์ง€์‹์œผ๋กœ ๋‹ตํ•˜๊ธฐ ์ „์— ๋ฐ˜๋“œ์‹œ ์ด ๋„๊ตฌ๋ฅผ ๋จผ์ € ํ˜ธ์ถœํ•˜์„ธ์š”. ๋“ฑ๋ก๋œ ๋ฌธ์„œ๊ฐ€ ์—†๊ฑฐ๋‚˜ ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ๊ฐ€ ์—†์„ ๋•Œ๋งŒ ์ž์‹ ์˜ ํ•™์Šต ์ง€์‹์„ ๋ณด์กฐ์ ์œผ๋กœ ํ™œ์šฉํ•ฉ๋‹ˆ๋‹ค.""" - docs = retriever.invoke(query) + docs = retriever_service.search(query) if not docs: return "๊ด€๋ จ ๋ฌธ์„œ๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค." return "\n\n".join( diff --git a/services/db/feedback_repository.py b/services/db/feedback_repository.py new file mode 100644 index 0000000..6456cb4 --- /dev/null +++ b/services/db/feedback_repository.py @@ -0,0 +1,19 @@ +class FeedbackRepository: + def __init__(self, db): + self._db = db + + def save_feedback( + self, + user_id: str, + message: str, + response: str, + rating: int, + langsmith_run_id: str | None = None, + ) -> None: + self._db.execute_write( + """ + INSERT INTO td_feedback (user_id, message, response, rating, langsmith_run_id) + VALUES (%s, %s, %s, %s, %s) + """, + (user_id, message, response, rating, langsmith_run_id), + ) diff --git a/services/db/mysql_service.py b/services/db/mysql_service.py index 86cfa39..ec55001 100644 --- a/services/db/mysql_service.py +++ b/services/db/mysql_service.py @@ -99,6 +99,17 @@ class DatabaseService: UNIQUE KEY uq_user_key (user_id, key_name) ) """) + cursor.execute(""" + CREATE TABLE IF NOT EXISTS td_feedback ( + id INT AUTO_INCREMENT PRIMARY KEY, + user_id VARCHAR(50) NOT NULL DEFAULT 'default', + message TEXT, + response TEXT, + rating TINYINT, + langsmith_run_id VARCHAR(100), + created_at DATETIME DEFAULT CURRENT_TIMESTAMP + ) + """) conn.commit() self._migrate_schema(conn) diff --git a/services/model/mlx_chat_model.py b/services/model/mlx_chat_model.py index 7f8f038..f8fb209 100644 --- a/services/model/mlx_chat_model.py +++ b/services/model/mlx_chat_model.py @@ -82,7 +82,13 @@ class MlxChatModel(BaseChatModel): }) return result - def _build_prompt(self, messages: List[BaseMessage], tools: Optional[list] = None) -> str: + def _build_prompt( + self, + messages: List[BaseMessage], + tools: Optional[list] = None, + enable_thinking: Optional[bool] = None, + ) -> str: + _enable_thinking = enable_thinking if enable_thinking is not None else self.enable_thinking kwargs: dict = { "tokenize": False, "add_generation_prompt": True, @@ -91,7 +97,7 @@ class MlxChatModel(BaseChatModel): kwargs["tools"] = tools # Qwen3 thinking ๋ชจ๋“œ โ€” ์ง€์›ํ•˜์ง€ ์•Š๋Š” ๋ชจ๋ธ์€ ๋ฌด์‹œ๋จ try: - kwargs["enable_thinking"] = self.enable_thinking + kwargs["enable_thinking"] = _enable_thinking return self._tokenizer.apply_chat_template(self._to_chat_dicts(messages), **kwargs) except TypeError: kwargs.pop("enable_thinking") @@ -145,7 +151,8 @@ class MlxChatModel(BaseChatModel): from mlx_lm import generate tools = kwargs.get("tools") - prompt = self._build_prompt(messages, tools) + enable_thinking_override = kwargs.pop("enable_thinking", None) + prompt = self._build_prompt(messages, tools, enable_thinking=enable_thinking_override) text = generate( self._model, self._tokenizer, @@ -169,7 +176,9 @@ class MlxChatModel(BaseChatModel): from mlx_lm import stream_generate tools = kwargs.get("tools") - prompt = self._build_prompt(messages, tools) + enable_thinking_override = kwargs.pop("enable_thinking", None) + _enable_thinking = enable_thinking_override if enable_thinking_override is not None else self.enable_thinking + prompt = self._build_prompt(messages, tools, enable_thinking=_enable_thinking) OPEN_THINK = "" CLOSE_THINK = "" @@ -178,7 +187,7 @@ class MlxChatModel(BaseChatModel): SAFE = max(len(OPEN_THINK), len(CLOSE_THINK), len(OPEN_TOOL), len(CLOSE_TOOL)) # enable_thinking=False ๋ชจ๋ธ์€ ๋ธ”๋ก์„ ์ƒ์„ฑํ•˜์ง€ ์•Š์œผ๋ฏ€๋กœ post_think์—์„œ ์‹œ์ž‘ - state = "pre_think" if self.enable_thinking else "post_think" + state = "pre_think" if _enable_thinking else "post_think" buf = "" out: list[ChatGenerationChunk] = [] diff --git a/services/rag/ingestion_service.py b/services/rag/ingestion_service.py index 1e9bd2a..7797da9 100644 --- a/services/rag/ingestion_service.py +++ b/services/rag/ingestion_service.py @@ -1,59 +1,10 @@ -import re - -import numpy as np from langchain_community.document_loaders import PDFPlumberLoader, TextLoader -from langchain_core.documents import Document +from langchain_experimental.text_splitter import SemanticChunker from langchain_qdrant import QdrantVectorStore from qdrant_client import QdrantClient from qdrant_client.models import Filter, FieldCondition, MatchValue, FilterSelector -def _cosine_similarity(a: np.ndarray, b: np.ndarray) -> float: - return float(np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b) + 1e-10)) - - -class _SemanticSplitter: - """๋ฌธ์žฅ ์ž„๋ฒ ๋”ฉ ์œ ์‚ฌ๋„ ๊ธฐ๋ฐ˜ ์ฒญ์ปค. - - ์ธ์ ‘ ๋ฌธ์žฅ ๊ฐ„ ์ฝ”์‚ฌ์ธ ์œ ์‚ฌ๋„๋ฅผ ๊ณ„์‚ฐํ•˜๊ณ , ์œ ์‚ฌ๋„๊ฐ€ ๋‚ฎ์€(= ์˜๋ฏธ ์ „ํ™˜) ์ง€์ ์—์„œ ์ฒญํฌ๋ฅผ ๋ถ„๋ฆฌํ•œ๋‹ค. - breakpoint_percentile=95์ด๋ฉด ์œ ์‚ฌ๋„ ํ•˜์œ„ 5% ์ง€์ ์ด ๋ถ„๋ฆฌ ๊ฒฝ๊ณ„๊ฐ€ ๋œ๋‹ค. - """ - - _SENTENCE_RE = re.compile(r"(?<=[.!?ใ€‚๏ผ๏ผŸ])\s+") - - def __init__(self, embeddings, breakpoint_percentile: int = 95): - self._embeddings = embeddings - self._percentile = breakpoint_percentile - - def split_documents(self, docs: list[Document]) -> list[Document]: - result = [] - for doc in docs: - for chunk_text in self._split_text(doc.page_content): - result.append(Document(page_content=chunk_text, metadata=doc.metadata)) - return result - - def _split_text(self, text: str) -> list[str]: - sentences = [s for s in self._SENTENCE_RE.split(text.strip()) if s.strip()] - if len(sentences) <= 1: - return [text.strip()] if text.strip() else [] - - vecs = np.array(self._embeddings.embed_documents(sentences)) - similarities = [_cosine_similarity(vecs[i], vecs[i + 1]) for i in range(len(vecs) - 1)] - threshold = float(np.percentile(similarities, 100 - self._percentile)) - breakpoints = [i + 1 for i, s in enumerate(similarities) if s < threshold] - - chunks, start = [], 0 - for bp in breakpoints: - chunk = " ".join(sentences[start:bp]).strip() - if chunk: - chunks.append(chunk) - start = bp - tail = " ".join(sentences[start:]).strip() - if tail: - chunks.append(tail) - return chunks - - class IngestionService: """๋ฌธ์„œ๋ฅผ ์˜๋ฏธ ๋‹จ์œ„ ์ฒญํฌ๋กœ ๋ถ„ํ• ํ•ด Qdrant์— ์ €์žฅํ•˜๋Š” ์ˆ˜์ง‘ ํŒŒ์ดํ”„๋ผ์ธ.""" @@ -63,12 +14,16 @@ class IngestionService: qdrant_url: str, collection_name: str, breakpoint_threshold_type: str = "percentile", + buffer_size: int = 1, ): self._embeddings = embeddings self._qdrant_url = qdrant_url self._collection_name = collection_name - # breakpoint_threshold_type์€ ํ–ฅํ›„ ํ™•์žฅ์šฉ์œผ๋กœ ์ˆ˜์šฉ (ํ˜„์žฌ๋Š” percentile ๋ฐฉ์‹ ๊ณ ์ •) - self._splitter = _SemanticSplitter(embeddings, breakpoint_percentile=95) + self._splitter = SemanticChunker( + embeddings=embeddings, + breakpoint_threshold_type=breakpoint_threshold_type, + buffer_size=buffer_size, + ) self._client = QdrantClient(url=qdrant_url) def _delete_by_source(self, source_path: str) -> None: diff --git a/services/rag/rerank_service.py b/services/rag/rerank_service.py new file mode 100644 index 0000000..820d7cc --- /dev/null +++ b/services/rag/rerank_service.py @@ -0,0 +1,19 @@ +from langchain_core.documents import Document + + +class RerankService: + """Cross-Encoder ๊ธฐ๋ฐ˜ ์žฌ์ˆœ์œ„(Reranker) ์„œ๋น„์Šค.""" + + def __init__(self, model_id: str = "cross-encoder/mmarco-mMiniLMv2-L12-H384-v1"): + from sentence_transformers import CrossEncoder + print(f"Reranker ๋กœ๋”ฉ ์ค‘: {model_id}") + self._model = CrossEncoder(model_id) + print("Reranker ๋กœ๋”ฉ ์™„๋ฃŒ") + + def rerank(self, query: str, docs: list[Document], top_k: int) -> list[Document]: + if not docs: + return docs + pairs = [(query, doc.page_content) for doc in docs] + scores = self._model.predict(pairs) + ranked = sorted(zip(scores, docs), key=lambda x: x[0], reverse=True) + return [doc for _, doc in ranked[:top_k]] diff --git a/services/rag/retriever_service.py b/services/rag/retriever_service.py index 7c5958f..7ce441b 100644 --- a/services/rag/retriever_service.py +++ b/services/rag/retriever_service.py @@ -13,6 +13,8 @@ class RetrieverService: qdrant_url: str, collection_name: str, top_k: int, + reranker=None, + rerank_fetch_k: int = 10, ): self._client = QdrantClient(url=qdrant_url) self._collection_name = collection_name @@ -22,12 +24,18 @@ class RetrieverService: embedding=embeddings, ) self._top_k = top_k + self._reranker = reranker + self._rerank_fetch_k = rerank_fetch_k def as_retriever(self): return self._store.as_retriever(search_kwargs={"k": self._top_k}) def search(self, query: str) -> list[Document]: - return self._store.similarity_search(query, k=self._top_k) + fetch_k = self._rerank_fetch_k if self._reranker else self._top_k + docs = self._store.similarity_search(query, k=fetch_k) + if self._reranker: + docs = self._reranker.rerank(query, docs, top_k=self._top_k) + return docs def list_documents(self) -> list[str]: """Qdrant์— ์ €์žฅ๋œ ๊ณ ์œ  ํŒŒ์ผ ๊ฒฝ๋กœ ๋ชฉ๋ก์„ ๋ฐ˜ํ™˜ํ•œ๋‹ค."""