Phase 17: Add image upload to chat UI
- app.py: image_input gr.Image component, respond() accepts image_path, all yields updated to 7 outputs - api_client.py: chat(image_path=None), base64-encodes image for API - services/chat.py: chat(image_path=None) passes through to api_client Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
+7
-1
@@ -57,11 +57,17 @@ class HTTPAPIClient:
|
|||||||
message: str,
|
message: str,
|
||||||
user_id: str = "default",
|
user_id: str = "default",
|
||||||
show_thinking: bool = False,
|
show_thinking: bool = False,
|
||||||
|
image_path: str | None = None,
|
||||||
) -> AsyncIterator[tuple[str, str | None]]:
|
) -> AsyncIterator[tuple[str, str | None]]:
|
||||||
|
payload: dict = {"message": message, "user_id": user_id, "show_thinking": show_thinking}
|
||||||
|
if image_path:
|
||||||
|
import base64
|
||||||
|
with open(image_path, "rb") as f:
|
||||||
|
payload["image_base64"] = base64.b64encode(f.read()).decode()
|
||||||
async with self._client.stream(
|
async with self._client.stream(
|
||||||
"POST",
|
"POST",
|
||||||
f"{self._url}/chat",
|
f"{self._url}/chat",
|
||||||
json={"message": message, "user_id": user_id, "show_thinking": show_thinking},
|
json=payload,
|
||||||
timeout=self._timeout,
|
timeout=self._timeout,
|
||||||
) as response:
|
) as response:
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
|
|||||||
@@ -52,25 +52,31 @@ def transcribe_audio(filepath: str) -> str:
|
|||||||
|
|
||||||
# ── 채팅 ─────────────────────────────────────────────────────────
|
# ── 채팅 ─────────────────────────────────────────────────────────
|
||||||
|
|
||||||
async def respond(message, history, show_thinking, user_id, use_tts, run_ids):
|
async def respond(message, history, show_thinking, user_id, use_tts, run_ids, image_path):
|
||||||
if not message.strip():
|
if not message.strip() and not image_path:
|
||||||
yield history, "", None, run_ids, "", ""
|
yield history, "", None, run_ids, "", "", None
|
||||||
return
|
return
|
||||||
|
|
||||||
history = list(history)
|
history = list(history)
|
||||||
run_ids = list(run_ids)
|
run_ids = list(run_ids)
|
||||||
history.append({"role": "user", "content": message})
|
display_msg = message
|
||||||
|
if image_path:
|
||||||
|
display_msg = f"🖼️ [이미지 첨부]\n{message}" if message.strip() else "🖼️ [이미지 첨부]"
|
||||||
|
history.append({"role": "user", "content": display_msg})
|
||||||
history.append({"role": "assistant", "content": ""})
|
history.append({"role": "assistant", "content": ""})
|
||||||
yield history, "", None, run_ids, "", "" # thinking_box + source_box 초기화
|
yield history, "", None, run_ids, "", "", None # boxes 초기화 + 이미지 초기화
|
||||||
|
|
||||||
collected_run_id: str | None = None
|
collected_run_id: str | None = None
|
||||||
tts_text = "" # 순수 답변만 누적 (TTS용)
|
tts_text = ""
|
||||||
thinking_acc = "" # 전체 누적 (완료 후 details용)
|
thinking_acc = ""
|
||||||
thinking_text = "" # __thinking 토큰만 (줄 감지용)
|
thinking_text = ""
|
||||||
thinking_finalized = False
|
thinking_finalized = False
|
||||||
|
source_box_html = ""
|
||||||
|
|
||||||
try:
|
try:
|
||||||
async for token, run_id in container.chat_service().chat(message, user_id, show_thinking):
|
async for token, run_id in container.chat_service().chat(
|
||||||
|
message or "이 이미지를 분석해줘.", user_id, show_thinking, image_path=image_path
|
||||||
|
):
|
||||||
if run_id is not None:
|
if run_id is not None:
|
||||||
collected_run_id = run_id
|
collected_run_id = run_id
|
||||||
break
|
break
|
||||||
@@ -78,14 +84,14 @@ async def respond(message, history, show_thinking, user_id, use_tts, run_ids):
|
|||||||
# 즉시 상태 — thinking_acc에 누적 안 함
|
# 즉시 상태 — thinking_acc에 누적 안 함
|
||||||
if isinstance(token, dict) and "__status" in token:
|
if isinstance(token, dict) and "__status" in token:
|
||||||
if not thinking_acc:
|
if not thinking_acc:
|
||||||
yield history, "", None, run_ids, _status_html(token["__status"]), gr.update()
|
yield history, "", None, run_ids, _status_html(token["__status"]), gr.update(), gr.update()
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# 사고 과정(LLM thinking) — 현재 줄만 live_html로 표시
|
# 사고 과정(LLM thinking) — 현재 줄만 live_html로 표시
|
||||||
if isinstance(token, dict) and "__thinking" in token:
|
if isinstance(token, dict) and "__thinking" in token:
|
||||||
thinking_text += token["__thinking"]
|
thinking_text += token["__thinking"]
|
||||||
thinking_acc += token["__thinking"]
|
thinking_acc += token["__thinking"]
|
||||||
yield history, "", None, run_ids, _live_html(_last_line(thinking_text)), gr.update()
|
yield history, "", None, run_ids, _live_html(_last_line(thinking_text)), gr.update(), gr.update()
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# 진행 로그(LangGraph, 검색 등) — 메시지 전체를 live_html로 표시
|
# 진행 로그(LangGraph, 검색 등) — 메시지 전체를 live_html로 표시
|
||||||
@@ -93,36 +99,36 @@ async def respond(message, history, show_thinking, user_id, use_tts, run_ids):
|
|||||||
thinking_acc += token["__meta"]
|
thinking_acc += token["__meta"]
|
||||||
live = token["__meta"].strip()
|
live = token["__meta"].strip()
|
||||||
if live:
|
if live:
|
||||||
yield history, "", None, run_ids, _live_html(live), gr.update()
|
yield history, "", None, run_ids, _live_html(live), gr.update(), gr.update()
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# RAG 출처 — 별도 source_box로 표시
|
# RAG 출처 — 별도 source_box로 표시
|
||||||
if isinstance(token, dict) and "__sources" in token:
|
if isinstance(token, dict) and "__sources" in token:
|
||||||
source_box_html = _sources_html(token["__sources"])
|
source_box_html = _sources_html(token["__sources"])
|
||||||
yield history, "", None, run_ids, gr.update(), source_box_html
|
yield history, "", None, run_ids, gr.update(), source_box_html, gr.update()
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# 첫 답변 토큰 도착 — 전체를 details로 전환 (접힌 상태)
|
# 첫 답변 토큰 도착 — 전체를 details로 전환 (접힌 상태)
|
||||||
if thinking_acc and not thinking_finalized:
|
if thinking_acc and not thinking_finalized:
|
||||||
thinking_finalized = True
|
thinking_finalized = True
|
||||||
yield history, "", None, run_ids, _thinking_html(thinking_acc), gr.update()
|
yield history, "", None, run_ids, _thinking_html(thinking_acc), gr.update(), gr.update()
|
||||||
|
|
||||||
tts_text += token
|
tts_text += token
|
||||||
history[-1]["content"] += token
|
history[-1]["content"] += token
|
||||||
yield history, "", None, run_ids, gr.update(), gr.update()
|
yield history, "", None, run_ids, gr.update(), gr.update(), gr.update()
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
history[-1]["content"] += f"\n\n[오류: {e}]"
|
history[-1]["content"] += f"\n\n[오류: {e}]"
|
||||||
yield history, "", None, run_ids, gr.update(), gr.update()
|
yield history, "", None, run_ids, gr.update(), gr.update(), gr.update()
|
||||||
return
|
return
|
||||||
|
|
||||||
run_ids.append(collected_run_id)
|
run_ids.append(collected_run_id)
|
||||||
|
|
||||||
if use_tts:
|
if use_tts:
|
||||||
audio_path = await container.tts_service().speak(tts_text)
|
audio_path = await container.tts_service().speak(tts_text)
|
||||||
yield history, "", audio_path, run_ids, gr.update(), gr.update()
|
yield history, "", audio_path, run_ids, gr.update(), gr.update(), gr.update()
|
||||||
else:
|
else:
|
||||||
yield history, "", None, run_ids, gr.update(), gr.update()
|
yield history, "", None, run_ids, gr.update(), gr.update(), gr.update()
|
||||||
|
|
||||||
|
|
||||||
async def handle_feedback(like_data: gr.LikeData, history, run_ids, user_id):
|
async def handle_feedback(like_data: gr.LikeData, history, run_ids, user_id):
|
||||||
@@ -274,14 +280,22 @@ with gr.Blocks(title="율봇") as demo:
|
|||||||
thinking_box = gr.HTML(value="")
|
thinking_box = gr.HTML(value="")
|
||||||
chatbot = gr.Chatbot(label="율봇", height=500)
|
chatbot = gr.Chatbot(label="율봇", height=500)
|
||||||
source_box = gr.HTML(value="")
|
source_box = gr.HTML(value="")
|
||||||
with gr.Row():
|
with gr.Row(equal_height=True):
|
||||||
|
image_input = gr.Image(
|
||||||
|
type="filepath",
|
||||||
|
label="이미지 첨부 (선택)",
|
||||||
|
sources=["upload", "clipboard"],
|
||||||
|
scale=1,
|
||||||
|
height=120,
|
||||||
|
)
|
||||||
|
with gr.Column(scale=5):
|
||||||
msg_box = gr.Textbox(
|
msg_box = gr.Textbox(
|
||||||
placeholder="질문을 입력하세요... (Enter로 전송)",
|
placeholder="질문을 입력하세요... (Enter로 전송)",
|
||||||
label="",
|
label="",
|
||||||
scale=5,
|
lines=2,
|
||||||
autofocus=True,
|
autofocus=True,
|
||||||
)
|
)
|
||||||
send_btn = gr.Button("전송", variant="primary", scale=1)
|
send_btn = gr.Button("전송", variant="primary")
|
||||||
|
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
audio_input = gr.Audio(
|
audio_input = gr.Audio(
|
||||||
@@ -310,16 +324,11 @@ with gr.Blocks(title="율봇") as demo:
|
|||||||
|
|
||||||
transcribe_btn.click(transcribe_audio, inputs=[audio_input], outputs=[msg_box])
|
transcribe_btn.click(transcribe_audio, inputs=[audio_input], outputs=[msg_box])
|
||||||
|
|
||||||
send_btn.click(
|
_respond_inputs = [msg_box, chatbot, show_thinking, user_state, use_tts, run_ids_state, image_input]
|
||||||
respond,
|
_respond_outputs = [chatbot, msg_box, tts_output, run_ids_state, thinking_box, source_box, image_input]
|
||||||
inputs=[msg_box, chatbot, show_thinking, user_state, use_tts, run_ids_state],
|
|
||||||
outputs=[chatbot, msg_box, tts_output, run_ids_state, thinking_box, source_box],
|
send_btn.click(respond, inputs=_respond_inputs, outputs=_respond_outputs)
|
||||||
)
|
msg_box.submit(respond, inputs=_respond_inputs, outputs=_respond_outputs)
|
||||||
msg_box.submit(
|
|
||||||
respond,
|
|
||||||
inputs=[msg_box, chatbot, show_thinking, user_state, use_tts, run_ids_state],
|
|
||||||
outputs=[chatbot, msg_box, tts_output, run_ids_state, thinking_box, source_box],
|
|
||||||
)
|
|
||||||
reset_btn.click(reset_chat, inputs=[user_state], outputs=[chatbot, run_ids_state])
|
reset_btn.click(reset_chat, inputs=[user_state], outputs=[chatbot, run_ids_state])
|
||||||
|
|
||||||
chatbot.like(
|
chatbot.like(
|
||||||
|
|||||||
+6
-2
@@ -8,9 +8,13 @@ class ChatService:
|
|||||||
self._api = api_client
|
self._api = api_client
|
||||||
|
|
||||||
def chat(
|
def chat(
|
||||||
self, message: str, user_id: str, show_thinking: bool
|
self,
|
||||||
|
message: str,
|
||||||
|
user_id: str,
|
||||||
|
show_thinking: bool,
|
||||||
|
image_path: str | None = None,
|
||||||
) -> AsyncIterator[tuple[str, str | None]]:
|
) -> AsyncIterator[tuple[str, str | None]]:
|
||||||
return self._api.chat(message, user_id, show_thinking)
|
return self._api.chat(message, user_id, show_thinking, image_path=image_path)
|
||||||
|
|
||||||
async def reset(self, user_id: str) -> None:
|
async def reset(self, user_id: str) -> None:
|
||||||
await self._api.reset(user_id)
|
await self._api.reset(user_id)
|
||||||
|
|||||||
Reference in New Issue
Block a user