Files
youlbot/container.py
T
shinalok 68f741af72 Phase 17: Multimodal image understanding via analyze_image tool
Dual-model approach (C): Qwen3-8B handles conversation, Qwen2.5-VL-7B
analyzes images on demand via analyze_image LangChain tool.

- services/model/mlx_vision_model.py: MlxVisionModel (mlx-vlm wrapper, lazy load)
- services/agent/tools.py: make_vision_tool(vision_model, image_path)
- agent_service.py: stream_response(image_path=None), dynamic tool binding
  via config["image_path"] — thread-safe per-request rebinding
- container.py: vision_model Singleton provider
- config.py: vision_enabled, vision_model_id, vision_max_tokens
- api.py: image_base64 in ChatRequest, decode to temp file, cleanup after stream

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-02 13:52:10 +09:00

155 lines
5.9 KiB
Python

from dependency_injector import containers, providers
from config import Config
from services.model.mlx_model import MlxModelService
from services.model.mlx_chat_model import MlxChatModel
from services.chat.history_service import HistoryService
from services.chat.chat_service import ChatService
from services.chat.compact_service import CompactService
from services.db.mysql_service import DatabaseService
from services.db.conversation_repository import ConversationRepository
from services.db.user_profile_repository import UserProfileRepository
from services.db.feedback_repository import FeedbackRepository
from services.ui.cli_service import CliUiService
from services.events.event_bus import EventBus
from services.events.handlers import StreamTokenHandler, StreamEndHandler
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_qdrant import FastEmbedSparse
from services.rag.ingestion_service import IngestionService
from services.rag.rerank_service import RerankService
from services.rag.retriever_service import RetrieverService
from services.agent.agent_service import AgentService
from services.model.mlx_vision_model import MlxVisionModel
class Container(containers.DeclarativeContainer):
config = providers.Singleton(Config)
event_bus = providers.Singleton(EventBus)
model_service = providers.Singleton(
MlxModelService,
model_id=providers.Callable(lambda c: c.model_id, config),
)
# LangGraph 에이전트용 BaseChatModel (Phase 1)
chat_model = providers.Singleton(
MlxChatModel,
model_id=providers.Callable(lambda c: c.model_id, config),
max_tokens=providers.Callable(lambda c: c.max_tokens, config),
enable_thinking=providers.Callable(lambda c: c.enable_thinking, config),
)
compact_service = providers.Singleton(
CompactService,
model=model_service,
)
db_service = providers.Singleton(
DatabaseService,
host=providers.Callable(lambda c: c.db_host, config),
port=providers.Callable(lambda c: c.db_port, config),
db=providers.Callable(lambda c: c.db_name, config),
user=providers.Callable(lambda c: c.db_user, config),
password=providers.Callable(lambda c: c.db_password, config),
)
conversation_repository = providers.Singleton(
ConversationRepository,
db=db_service,
)
user_profile_repository = providers.Singleton(
UserProfileRepository,
db=db_service,
)
feedback_repository = providers.Singleton(
FeedbackRepository,
db=db_service,
)
history_service = providers.Factory(
HistoryService,
system_prompt=providers.Callable(lambda c: c.system_prompt, config),
max_turns=providers.Callable(lambda c: c.max_history_turns, config),
compact_threshold=providers.Callable(lambda c: c.compact_threshold, config),
repository=conversation_repository,
compact_service=compact_service,
)
chat_service = providers.Factory(
ChatService,
model=model_service,
history=history_service,
event_bus=event_bus,
max_tokens=providers.Callable(lambda c: c.max_tokens, config),
)
ui_service = providers.Singleton(CliUiService)
stream_token_handler = providers.Singleton(StreamTokenHandler)
stream_end_handler = providers.Singleton(StreamEndHandler)
# Phase 2 — RAG 파이프라인
embeddings = providers.Singleton(
HuggingFaceEmbeddings,
model_name=providers.Callable(lambda c: c.embedding_model_id, config),
model_kwargs=providers.Callable(lambda c: {"device": c.embedding_device}, config),
)
reranker = providers.Callable(
lambda c: RerankService(c.reranker_model_id) if c.reranker_enabled else None,
config,
)
sparse_embeddings = providers.Singleton(
lambda c: FastEmbedSparse(model_name=c.sparse_model_id) if c.hybrid_search_enabled else None,
config,
)
ingestion_service = providers.Singleton(
IngestionService,
embeddings=embeddings,
qdrant_url=providers.Callable(lambda c: c.qdrant_url, config),
collection_name=providers.Callable(lambda c: c.qdrant_collection, config),
breakpoint_threshold_type=providers.Callable(
lambda c: c.semantic_breakpoint_threshold_type, config
),
buffer_size=providers.Callable(lambda c: c.semantic_buffer_size, config),
sparse_embeddings=sparse_embeddings,
)
retriever_service = providers.Singleton(
RetrieverService,
embeddings=embeddings,
qdrant_url=providers.Callable(lambda c: c.qdrant_url, config),
collection_name=providers.Callable(lambda c: c.qdrant_collection, config),
top_k=providers.Callable(lambda c: c.rag_top_k, config),
reranker=reranker,
rerank_fetch_k=providers.Callable(lambda c: c.reranker_fetch_k, config),
sparse_embeddings=sparse_embeddings,
)
# Phase 17 — Vision Model (lazy load)
vision_model = providers.Singleton(
MlxVisionModel,
model_id=providers.Callable(lambda c: c.vision_model_id, config),
max_tokens=providers.Callable(lambda c: c.vision_max_tokens, config),
)
# Phase 3 — LangGraph Agent
agent_service = providers.Singleton(
AgentService,
chat_model=chat_model,
retriever_service=retriever_service,
system_prompt=providers.Callable(lambda c: c.system_prompt, config),
rag_verbose=providers.Callable(lambda c: c.rag_verbose, config),
rag_show_sources=providers.Callable(lambda c: c.rag_show_sources, config),
langgraph_verbose=providers.Callable(lambda c: c.langgraph_verbose, config),
think_verbose=providers.Callable(lambda c: c.think_verbose, config),
query_rewrite_enabled=providers.Callable(lambda c: c.query_rewrite_enabled, config),
user_profile_repository=user_profile_repository,
conversation_repository=conversation_repository,
)