68f741af72
Dual-model approach (C): Qwen3-8B handles conversation, Qwen2.5-VL-7B analyzes images on demand via analyze_image LangChain tool. - services/model/mlx_vision_model.py: MlxVisionModel (mlx-vlm wrapper, lazy load) - services/agent/tools.py: make_vision_tool(vision_model, image_path) - agent_service.py: stream_response(image_path=None), dynamic tool binding via config["image_path"] — thread-safe per-request rebinding - container.py: vision_model Singleton provider - config.py: vision_enabled, vision_model_id, vision_max_tokens - api.py: image_base64 in ChatRequest, decode to temp file, cleanup after stream Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
49 lines
1.6 KiB
Python
49 lines
1.6 KiB
Python
"""Qwen2.5-VL (mlx-vlm) 기반 이미지 분석 서비스.
|
|
|
|
첫 analyze() 호출 시 모델을 lazy load해 메모리를 아낀다.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
_DEFAULT_PROMPT = "이 이미지를 한국어로 자세히 설명해줘. 사람, 음식, 문서 등 보이는 것을 빠짐없이 설명해."
|
|
|
|
|
|
class MlxVisionModel:
|
|
def __init__(self, model_id: str, max_tokens: int = 512) -> None:
|
|
self._model_id = model_id
|
|
self._max_tokens = max_tokens
|
|
self._model = None
|
|
self._processor = None
|
|
|
|
def _load(self) -> None:
|
|
if self._model is not None:
|
|
return
|
|
logger.info("Vision 모델 로딩 중: %s", self._model_id)
|
|
from mlx_vlm import load
|
|
self._model, self._processor = load(self._model_id)
|
|
logger.info("Vision 모델 로딩 완료")
|
|
|
|
def analyze(self, image_path: str, prompt: str = _DEFAULT_PROMPT) -> str:
|
|
"""이미지를 분석해 한국어 설명을 반환한다."""
|
|
self._load()
|
|
from mlx_vlm import generate
|
|
from mlx_vlm.prompt_utils import apply_chat_template
|
|
from mlx_vlm.utils import load_config
|
|
|
|
config = load_config(self._model_id)
|
|
formatted_prompt = apply_chat_template(
|
|
self._processor, config, prompt, num_images=1
|
|
)
|
|
result = generate(
|
|
self._model,
|
|
self._processor,
|
|
image=image_path,
|
|
prompt=formatted_prompt,
|
|
max_tokens=self._max_tokens,
|
|
verbose=False,
|
|
)
|
|
return result if isinstance(result, str) else str(result)
|