cd41e9e33e
- **Implement `MlxModelService` for local LLM backend.** - **Introduce `DatabaseService` for MySQL integration.** - **Add `HistoryService` to manage conversation context.** - **Set up CLI interface via `CliUiService`.** - **Establish EventBus for token streaming.** - **Include conversation repository for data persistence.** - **Add environment-based configuration management.** - **Draft IoC architectural plan.**
30 lines
944 B
Python
30 lines
944 B
Python
from typing import Iterator
|
|
|
|
from services.model.base import AbstractModelService
|
|
|
|
|
|
class MlxModelService(AbstractModelService):
|
|
"""MLX 기반 로컬 LLM Strategy 구현체."""
|
|
|
|
def __init__(self, model_id: str):
|
|
self._model_id = model_id
|
|
self._model = None
|
|
self._tokenizer = None
|
|
|
|
def load(self) -> None:
|
|
from mlx_lm import load
|
|
print(f"모델 로딩 중: {self._model_id}")
|
|
self._model, self._tokenizer = load(self._model_id)
|
|
|
|
def build_prompt(self, history: list[dict]) -> str:
|
|
return self._tokenizer.apply_chat_template(
|
|
history,
|
|
tokenize=False,
|
|
add_generation_prompt=True,
|
|
)
|
|
|
|
def stream(self, prompt: str, max_tokens: int) -> Iterator[str]:
|
|
from mlx_lm import stream_generate
|
|
for chunk in stream_generate(self._model, self._tokenizer, prompt=prompt, max_tokens=max_tokens):
|
|
yield chunk.text
|