from typing import Iterator from services.model.base import AbstractModelService class MlxModelService(AbstractModelService): """MLX 기반 로컬 LLM Strategy 구현체.""" def __init__(self, model_id: str): self._model_id = model_id self._model = None self._tokenizer = None def load(self) -> None: from mlx_lm import load print(f"모델 로딩 중: {self._model_id}") self._model, self._tokenizer = load(self._model_id) def build_prompt(self, history: list[dict]) -> str: return self._tokenizer.apply_chat_template( history, tokenize=False, add_generation_prompt=True, ) def stream(self, prompt: str, max_tokens: int) -> Iterator[str]: from mlx_lm import stream_generate for chunk in stream_generate(self._model, self._tokenizer, prompt=prompt, max_tokens=max_tokens): yield chunk.text