Implement Phase 4~14: LangGraph Agent, RAG pipeline, Gradio Web UI, voice interface

- Upgrade LLM to Qwen3-14B-4bit with Thinking mode (MlxChatModel as LangChain BaseChatModel)
- Add LangGraph ReAct agent with tool calling loop (search_documents, web_search, get_current_date, remember/recall_user_info)
- Add RAG pipeline: BAAI/bge-m3 embeddings + Qdrant vector store + semantic chunking (SemanticSplitter via cosine similarity)
- Replace fixed-size RecursiveCharacterTextSplitter with meaning-based SemanticSplitter (numpy only, no extra deps)
- Add Gradio Web UI (app.py): chat, document ingestion, document management tabs
- Add multi-user support (user_id isolation in DB + per-user agent cache + dropdown selector)
- Add conversation history restore from MySQL on agent init (Phase 11)
- Add UserProfileRepository for persistent user profile (remember/recall tools)
- Add thread-local DB connections to fix pymysql thread-safety with LangGraph ToolNode
- Add Phase 14 voice interface: Whisper STT (microphone → text) + macOS TTS (say -v Yuna)
- Enforce search_documents-first policy in system prompt and tool descriptions
- Update ROADMAP2.md: Phase 14 완료, Phase 13 청킹 부분 완료

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
sal
2026-05-27 14:06:22 +09:00
parent cd41e9e33e
commit 06bcdb03ac
20 changed files with 1934 additions and 47 deletions
+58
View File
@@ -2,14 +2,20 @@ from dependency_injector import containers, providers
from config import Config
from services.model.mlx_model import MlxModelService
from services.model.mlx_chat_model import MlxChatModel
from services.chat.history_service import HistoryService
from services.chat.chat_service import ChatService
from services.chat.compact_service import CompactService
from services.db.mysql_service import DatabaseService
from services.db.conversation_repository import ConversationRepository
from services.db.user_profile_repository import UserProfileRepository
from services.ui.cli_service import CliUiService
from services.events.event_bus import EventBus
from services.events.handlers import StreamTokenHandler, StreamEndHandler
from langchain_huggingface import HuggingFaceEmbeddings
from services.rag.ingestion_service import IngestionService
from services.rag.retriever_service import RetrieverService
from services.agent.agent_service import AgentService
class Container(containers.DeclarativeContainer):
@@ -22,6 +28,14 @@ class Container(containers.DeclarativeContainer):
model_id=providers.Callable(lambda c: c.model_id, config),
)
# LangGraph 에이전트용 BaseChatModel (Phase 1)
chat_model = providers.Singleton(
MlxChatModel,
model_id=providers.Callable(lambda c: c.model_id, config),
max_tokens=providers.Callable(lambda c: c.max_tokens, config),
enable_thinking=providers.Callable(lambda c: c.enable_thinking, config),
)
compact_service = providers.Singleton(
CompactService,
model=model_service,
@@ -41,6 +55,11 @@ class Container(containers.DeclarativeContainer):
db=db_service,
)
user_profile_repository = providers.Singleton(
UserProfileRepository,
db=db_service,
)
history_service = providers.Factory(
HistoryService,
system_prompt=providers.Callable(lambda c: c.system_prompt, config),
@@ -62,3 +81,42 @@ class Container(containers.DeclarativeContainer):
stream_token_handler = providers.Singleton(StreamTokenHandler)
stream_end_handler = providers.Singleton(StreamEndHandler)
# Phase 2 — RAG 파이프라인
embeddings = providers.Singleton(
HuggingFaceEmbeddings,
model_name=providers.Callable(lambda c: c.embedding_model_id, config),
model_kwargs=providers.Callable(lambda c: {"device": c.embedding_device}, config),
)
ingestion_service = providers.Singleton(
IngestionService,
embeddings=embeddings,
qdrant_url=providers.Callable(lambda c: c.qdrant_url, config),
collection_name=providers.Callable(lambda c: c.qdrant_collection, config),
breakpoint_threshold_type=providers.Callable(
lambda c: c.semantic_breakpoint_threshold_type, config
),
)
retriever_service = providers.Singleton(
RetrieverService,
embeddings=embeddings,
qdrant_url=providers.Callable(lambda c: c.qdrant_url, config),
collection_name=providers.Callable(lambda c: c.qdrant_collection, config),
top_k=providers.Callable(lambda c: c.rag_top_k, config),
)
# Phase 3 — LangGraph Agent
agent_service = providers.Singleton(
AgentService,
chat_model=chat_model,
retriever_service=retriever_service,
system_prompt=providers.Callable(lambda c: c.system_prompt, config),
rag_verbose=providers.Callable(lambda c: c.rag_verbose, config),
rag_show_sources=providers.Callable(lambda c: c.rag_show_sources, config),
langgraph_verbose=providers.Callable(lambda c: c.langgraph_verbose, config),
think_verbose=providers.Callable(lambda c: c.think_verbose, config),
user_profile_repository=user_profile_repository,
conversation_repository=conversation_repository,
)