diff --git a/api_client.py b/api_client.py index e429be2..b29d909 100644 --- a/api_client.py +++ b/api_client.py @@ -57,11 +57,17 @@ class HTTPAPIClient: message: str, user_id: str = "default", show_thinking: bool = False, + image_path: str | None = None, ) -> AsyncIterator[tuple[str, str | None]]: + payload: dict = {"message": message, "user_id": user_id, "show_thinking": show_thinking} + if image_path: + import base64 + with open(image_path, "rb") as f: + payload["image_base64"] = base64.b64encode(f.read()).decode() async with self._client.stream( "POST", f"{self._url}/chat", - json={"message": message, "user_id": user_id, "show_thinking": show_thinking}, + json=payload, timeout=self._timeout, ) as response: response.raise_for_status() diff --git a/app.py b/app.py index 789eaee..c8015b9 100644 --- a/app.py +++ b/app.py @@ -52,25 +52,31 @@ def transcribe_audio(filepath: str) -> str: # ── 채팅 ───────────────────────────────────────────────────────── -async def respond(message, history, show_thinking, user_id, use_tts, run_ids): - if not message.strip(): - yield history, "", None, run_ids, "", "" +async def respond(message, history, show_thinking, user_id, use_tts, run_ids, image_path): + if not message.strip() and not image_path: + yield history, "", None, run_ids, "", "", None return history = list(history) run_ids = list(run_ids) - history.append({"role": "user", "content": message}) + display_msg = message + if image_path: + display_msg = f"🖼️ [이미지 첨부]\n{message}" if message.strip() else "🖼️ [이미지 첨부]" + history.append({"role": "user", "content": display_msg}) history.append({"role": "assistant", "content": ""}) - yield history, "", None, run_ids, "", "" # thinking_box + source_box 초기화 + yield history, "", None, run_ids, "", "", None # boxes 초기화 + 이미지 초기화 collected_run_id: str | None = None - tts_text = "" # 순수 답변만 누적 (TTS용) - thinking_acc = "" # 전체 누적 (완료 후 details용) - thinking_text = "" # __thinking 토큰만 (줄 감지용) + tts_text = "" + thinking_acc = "" + thinking_text = "" thinking_finalized = False + source_box_html = "" try: - async for token, run_id in container.chat_service().chat(message, user_id, show_thinking): + async for token, run_id in container.chat_service().chat( + message or "이 이미지를 분석해줘.", user_id, show_thinking, image_path=image_path + ): if run_id is not None: collected_run_id = run_id break @@ -78,14 +84,14 @@ async def respond(message, history, show_thinking, user_id, use_tts, run_ids): # 즉시 상태 — thinking_acc에 누적 안 함 if isinstance(token, dict) and "__status" in token: if not thinking_acc: - yield history, "", None, run_ids, _status_html(token["__status"]), gr.update() + yield history, "", None, run_ids, _status_html(token["__status"]), gr.update(), gr.update() continue # 사고 과정(LLM thinking) — 현재 줄만 live_html로 표시 if isinstance(token, dict) and "__thinking" in token: thinking_text += token["__thinking"] thinking_acc += token["__thinking"] - yield history, "", None, run_ids, _live_html(_last_line(thinking_text)), gr.update() + yield history, "", None, run_ids, _live_html(_last_line(thinking_text)), gr.update(), gr.update() continue # 진행 로그(LangGraph, 검색 등) — 메시지 전체를 live_html로 표시 @@ -93,36 +99,36 @@ async def respond(message, history, show_thinking, user_id, use_tts, run_ids): thinking_acc += token["__meta"] live = token["__meta"].strip() if live: - yield history, "", None, run_ids, _live_html(live), gr.update() + yield history, "", None, run_ids, _live_html(live), gr.update(), gr.update() continue # RAG 출처 — 별도 source_box로 표시 if isinstance(token, dict) and "__sources" in token: source_box_html = _sources_html(token["__sources"]) - yield history, "", None, run_ids, gr.update(), source_box_html + yield history, "", None, run_ids, gr.update(), source_box_html, gr.update() continue # 첫 답변 토큰 도착 — 전체를 details로 전환 (접힌 상태) if thinking_acc and not thinking_finalized: thinking_finalized = True - yield history, "", None, run_ids, _thinking_html(thinking_acc), gr.update() + yield history, "", None, run_ids, _thinking_html(thinking_acc), gr.update(), gr.update() tts_text += token history[-1]["content"] += token - yield history, "", None, run_ids, gr.update(), gr.update() + yield history, "", None, run_ids, gr.update(), gr.update(), gr.update() except Exception as e: history[-1]["content"] += f"\n\n[오류: {e}]" - yield history, "", None, run_ids, gr.update(), gr.update() + yield history, "", None, run_ids, gr.update(), gr.update(), gr.update() return run_ids.append(collected_run_id) if use_tts: audio_path = await container.tts_service().speak(tts_text) - yield history, "", audio_path, run_ids, gr.update(), gr.update() + yield history, "", audio_path, run_ids, gr.update(), gr.update(), gr.update() else: - yield history, "", None, run_ids, gr.update(), gr.update() + yield history, "", None, run_ids, gr.update(), gr.update(), gr.update() async def handle_feedback(like_data: gr.LikeData, history, run_ids, user_id): @@ -282,6 +288,15 @@ with gr.Blocks(title="율봇") as demo: autofocus=True, ) send_btn = gr.Button("전송", variant="primary", scale=1) + with gr.Row(): + image_input = gr.Image( + type="filepath", + label="📷 이미지 첨부 (선택)", + sources=["upload", "clipboard"], + height=160, + scale=1, + ) + gr.HTML("
", visible=False) # spacer with gr.Row(): audio_input = gr.Audio( @@ -310,16 +325,11 @@ with gr.Blocks(title="율봇") as demo: transcribe_btn.click(transcribe_audio, inputs=[audio_input], outputs=[msg_box]) - send_btn.click( - respond, - inputs=[msg_box, chatbot, show_thinking, user_state, use_tts, run_ids_state], - outputs=[chatbot, msg_box, tts_output, run_ids_state, thinking_box, source_box], - ) - msg_box.submit( - respond, - inputs=[msg_box, chatbot, show_thinking, user_state, use_tts, run_ids_state], - outputs=[chatbot, msg_box, tts_output, run_ids_state, thinking_box, source_box], - ) + _respond_inputs = [msg_box, chatbot, show_thinking, user_state, use_tts, run_ids_state, image_input] + _respond_outputs = [chatbot, msg_box, tts_output, run_ids_state, thinking_box, source_box, image_input] + + send_btn.click(respond, inputs=_respond_inputs, outputs=_respond_outputs) + msg_box.submit(respond, inputs=_respond_inputs, outputs=_respond_outputs) reset_btn.click(reset_chat, inputs=[user_state], outputs=[chatbot, run_ids_state]) chatbot.like( diff --git a/services/chat.py b/services/chat.py index 2e58ae8..7c976f7 100644 --- a/services/chat.py +++ b/services/chat.py @@ -8,9 +8,13 @@ class ChatService: self._api = api_client def chat( - self, message: str, user_id: str, show_thinking: bool + self, + message: str, + user_id: str, + show_thinking: bool, + image_path: str | None = None, ) -> AsyncIterator[tuple[str, str | None]]: - return self._api.chat(message, user_id, show_thinking) + return self._api.chat(message, user_id, show_thinking, image_path=image_path) async def reset(self, user_id: str) -> None: await self._api.reset(user_id)