From 2348f177917c0d62a53afbb7f8cc21b9a8359c22 Mon Sep 17 00:00:00 2001 From: sal Date: Mon, 1 Jun 2026 10:56:01 +0900 Subject: [PATCH 1/6] Move progress logs into thinking box alongside LLM reasoning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Both __meta (LangGraph/search progress) and __thinking (LLM reasoning) tokens now stream into the thinking box instead of the chatbot. Chatbot shows only the final answer. Thinking box shows the full analysis pipeline: [LangGraph → ...], 문서 검색 중, thinking content. Co-Authored-By: Claude Sonnet 4.6 --- app.py | 37 +++++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/app.py b/app.py index 103de3d..a4b24e9 100644 --- a/app.py +++ b/app.py @@ -105,9 +105,9 @@ async def respond(message, history, show_thinking, user_id, use_tts, run_ids): yield history, "", None, run_ids, gr.update(value="", visible=False) collected_run_id: str | None = None - tts_text = "" # 순수 답변만 누적 (TTS용) - thinking_acc = "" # 사고 과정 누적 - thinking_active = False + tts_text = "" # 순수 답변만 누적 (TTS용) + thinking_acc = "" # 사고 과정 + 진행 로그 누적 + thinking_finalized = False # 첫 답변 토큰 도착 시 박스 완료 처리 try: async for token, run_id in api_client.chat(message, user_id, show_thinking): @@ -115,26 +115,31 @@ async def respond(message, history, show_thinking, user_id, use_tts, run_ids): collected_run_id = run_id break + # 사고 과정(LLM thinking) — 박스에 추가 if isinstance(token, dict) and "__thinking" in token: - thinking_active = True thinking_acc += token["__thinking"] - thinking_md = f"🤔 **사고 중...**\n\n{thinking_acc}▌" - yield history, "", None, run_ids, gr.update(value=thinking_md, visible=True) + yield history, "", None, run_ids, gr.update( + value=f"🤔 **분석 중...**\n\n{thinking_acc}▌", visible=True + ) continue - if thinking_active: - # 첫 답변 토큰 도착 — 사고 완료 표시 - thinking_active = False + # 진행 로그(LangGraph, 검색 등) — 박스에 추가 (챗봇에는 표시 안 함) + if isinstance(token, dict) and "__meta" in token: + thinking_acc += token["__meta"] yield history, "", None, run_ids, gr.update( - value=f"💭 **사고 완료**\n\n{thinking_acc}", visible=True + value=f"🤔 **분석 중...**\n\n{thinking_acc}▌", visible=True + ) + continue + + # 첫 답변 토큰 도착 — 박스를 완료 상태로 전환 + if thinking_acc and not thinking_finalized: + thinking_finalized = True + yield history, "", None, run_ids, gr.update( + value=f"💭 **분석 완료**\n\n{thinking_acc}", visible=True ) - if isinstance(token, dict) and "__meta" in token: - display_token = token["__meta"] - else: - display_token = token - tts_text += display_token - history[-1]["content"] += display_token + tts_text += token + history[-1]["content"] += token yield history, "", None, run_ids, gr.update() except Exception as e: From 4956ab70858f33db4a657b3b0e0e2df369c5e503 Mon Sep 17 00:00:00 2001 From: sal Date: Mon, 1 Jun 2026 11:30:57 +0900 Subject: [PATCH 2/6] Replace gr.Markdown thinking box with gr.HTML for reliable streaming MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit gr.Markdown visible toggling is unreliable in Gradio streaming generators. Switched to gr.HTML with inline styles — empty string hides the element, HTML string shows the styled box. No visibility state needed. Co-Authored-By: Claude Sonnet 4.6 --- app.py | 58 ++++++++++++++++++++++++++-------------------------------- 1 file changed, 26 insertions(+), 32 deletions(-) diff --git a/app.py b/app.py index a4b24e9..082e8bb 100644 --- a/app.py +++ b/app.py @@ -8,6 +8,7 @@ YOULBOT_API_TOKEN= ← api.py에 API_TOKEN 설정 시 동일 값 """ import asyncio +import html as _html import os import platform import subprocess @@ -95,14 +96,14 @@ async def tts_speak(text: str) -> str | None: async def respond(message, history, show_thinking, user_id, use_tts, run_ids): if not message.strip(): - yield history, "", None, run_ids, gr.update() + yield history, "", None, run_ids, "" return history = list(history) run_ids = list(run_ids) history.append({"role": "user", "content": message}) history.append({"role": "assistant", "content": ""}) - yield history, "", None, run_ids, gr.update(value="", visible=False) + yield history, "", None, run_ids, "" # thinking_box 초기화 collected_run_id: str | None = None tts_text = "" # 순수 답변만 누적 (TTS용) @@ -118,29 +119,23 @@ async def respond(message, history, show_thinking, user_id, use_tts, run_ids): # 사고 과정(LLM thinking) — 박스에 추가 if isinstance(token, dict) and "__thinking" in token: thinking_acc += token["__thinking"] - yield history, "", None, run_ids, gr.update( - value=f"🤔 **분석 중...**\n\n{thinking_acc}▌", visible=True - ) + yield history, "", None, run_ids, _thinking_html(thinking_acc) continue # 진행 로그(LangGraph, 검색 등) — 박스에 추가 (챗봇에는 표시 안 함) if isinstance(token, dict) and "__meta" in token: thinking_acc += token["__meta"] - yield history, "", None, run_ids, gr.update( - value=f"🤔 **분석 중...**\n\n{thinking_acc}▌", visible=True - ) + yield history, "", None, run_ids, _thinking_html(thinking_acc) continue # 첫 답변 토큰 도착 — 박스를 완료 상태로 전환 if thinking_acc and not thinking_finalized: thinking_finalized = True - yield history, "", None, run_ids, gr.update( - value=f"💭 **분석 완료**\n\n{thinking_acc}", visible=True - ) + yield history, "", None, run_ids, _thinking_html(thinking_acc, done=True) tts_text += token history[-1]["content"] += token - yield history, "", None, run_ids, gr.update() + yield history, "", None, run_ids, gr.update() # thinking_box 유지 except Exception as e: history[-1]["content"] += f"\n\n[오류: {e}]" @@ -227,22 +222,25 @@ def delete_doc(source): # ── UI 구성 ────────────────────────────────────────────────────── -_THINKING_CSS = """ -.thinking-box { - background: #f9f9f9; - border-left: 3px solid #bbb; - border-radius: 6px; - padding: 10px 14px; - margin-bottom: 6px; - max-height: 220px; - overflow-y: auto; - font-size: 0.85em; - color: #555; - white-space: pre-wrap; -} -""" +_THINKING_STYLE = ( + "background:#f9f9f9;border-left:3px solid #bbb;border-radius:6px;" + "padding:10px 14px;max-height:220px;overflow-y:auto;" + "font-size:0.85em;color:#555;white-space:pre-wrap;margin-bottom:6px;" +) -with gr.Blocks(title="율봇", css=_THINKING_CSS) as demo: + +def _thinking_html(text: str, done: bool = False) -> str: + icon = "💭" if done else "🤔" + label = "분석 완료" if done else "분석 중..." + cursor = "" if done else " ▌" + return ( + f'
' + f"{icon} {label}

" + f"{_html.escape(text)}{cursor}
" + ) + + +with gr.Blocks(title="율봇") as demo: gr.Markdown("# 율봇\n육아·금융 전문 AI 상담 도우미") user_state = gr.State(DEFAULT_USER) @@ -257,11 +255,7 @@ with gr.Blocks(title="율봇", css=_THINKING_CSS) as demo: scale=1, ) - thinking_box = gr.Markdown( - value="", - visible=False, - elem_classes=["thinking-box"], - ) + thinking_box = gr.HTML(value="") chatbot = gr.Chatbot(label="율봇", height=500) with gr.Row(): msg_box = gr.Textbox( From 5cf8bdabfd60f0e057dc8e60976541035351b96f Mon Sep 17 00:00:00 2001 From: sal Date: Mon, 1 Jun 2026 13:08:30 +0900 Subject: [PATCH 3/6] Handle __status tokens for instant thinking box feedback __status tokens show immediately in the thinking box but do not accumulate in thinking_acc. When real content (__meta/__thinking) arrives it overwrites the status message naturally. Co-Authored-By: Claude Sonnet 4.6 --- app.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/app.py b/app.py index 082e8bb..c361b86 100644 --- a/app.py +++ b/app.py @@ -116,6 +116,12 @@ async def respond(message, history, show_thinking, user_id, use_tts, run_ids): collected_run_id = run_id break + # 즉시 상태 표시 — thinking_acc에 누적하지 않음 (임시 메시지) + if isinstance(token, dict) and "__status" in token: + status_text = (thinking_acc + "\n\n" + token["__status"]) if thinking_acc else token["__status"] + yield history, "", None, run_ids, _thinking_html(status_text) + continue + # 사고 과정(LLM thinking) — 박스에 추가 if isinstance(token, dict) and "__thinking" in token: thinking_acc += token["__thinking"] From 18609a4f7d82569e505aba929e5e482fb8c7f176 Mon Sep 17 00:00:00 2001 From: sal Date: Mon, 1 Jun 2026 13:34:47 +0900 Subject: [PATCH 4/6] Collapsible thinking box with details/summary, thinking on by default MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Thinking box uses
/ — collapsed by default, expands on click - Simple __status header shown before content arrives (no expand needed) - show_thinking checkbox default changed to True Co-Authored-By: Claude Sonnet 4.6 --- app.py | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/app.py b/app.py index c361b86..c1dfb15 100644 --- a/app.py +++ b/app.py @@ -118,8 +118,9 @@ async def respond(message, history, show_thinking, user_id, use_tts, run_ids): # 즉시 상태 표시 — thinking_acc에 누적하지 않음 (임시 메시지) if isinstance(token, dict) and "__status" in token: - status_text = (thinking_acc + "\n\n" + token["__status"]) if thinking_acc else token["__status"] - yield history, "", None, run_ids, _thinking_html(status_text) + if not thinking_acc: + yield history, "", None, run_ids, _status_html(token["__status"]) + # thinking_acc에 내용 있으면 기존 표시 유지 continue # 사고 과정(LLM thinking) — 박스에 추가 @@ -228,21 +229,35 @@ def delete_doc(source): # ── UI 구성 ────────────────────────────────────────────────────── -_THINKING_STYLE = ( +_BOX_STYLE = ( "background:#f9f9f9;border-left:3px solid #bbb;border-radius:6px;" - "padding:10px 14px;max-height:220px;overflow-y:auto;" - "font-size:0.85em;color:#555;white-space:pre-wrap;margin-bottom:6px;" + "padding:8px 14px;margin-bottom:6px;" +) +_CONTENT_STYLE = ( + "margin-top:8px;white-space:pre-wrap;font-size:0.85em;" + "color:#555;max-height:200px;overflow-y:auto;" ) def _thinking_html(text: str, done: bool = False) -> str: + """접기/펼치기 가능한 사고 과정 박스.""" icon = "💭" if done else "🤔" label = "분석 완료" if done else "분석 중..." cursor = "" if done else " ▌" return ( - f'
' - f"{icon} {label}

" - f"{_html.escape(text)}{cursor}
" + f'
' + f'{icon} {label}' + f'
{_html.escape(text)}{cursor}
' + f'
' + ) + + +def _status_html(status: str) -> str: + """내용 없이 상태만 표시하는 단순 헤더.""" + return ( + f'
' + f'🤔 {_html.escape(status)}' + f'
' ) @@ -282,7 +297,7 @@ with gr.Blocks(title="율봇") as demo: transcribe_btn = gr.Button("음성 → 텍스트 변환", scale=1) with gr.Row(): - show_thinking = gr.Checkbox(label="사고 과정 표시", value=False) + show_thinking = gr.Checkbox(label="사고 과정 표시", value=True) use_tts = gr.Checkbox(label="음성으로 답변 읽기 (TTS)", value=False) reset_btn = gr.Button("대화 초기화", size="sm") From 55ea69d902f072d5ac18d2b205da98b766a154b9 Mon Sep 17 00:00:00 2001 From: sal Date: Mon, 1 Jun 2026 13:50:21 +0900 Subject: [PATCH 5/6] Fix details close-on-update: use div during streaming, details on complete MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During streaming: _live_html (plain div) shows only the current line — no DOM reset, no closing issue. __thinking shows last non-empty line, __meta shows the full trimmed message. On completion: _thinking_html (
) shows all accumulated content collapsed, expands on click. Co-Authored-By: Claude Sonnet 4.6 --- app.py | 56 ++++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 36 insertions(+), 20 deletions(-) diff --git a/app.py b/app.py index c1dfb15..66f3740 100644 --- a/app.py +++ b/app.py @@ -107,8 +107,9 @@ async def respond(message, history, show_thinking, user_id, use_tts, run_ids): collected_run_id: str | None = None tts_text = "" # 순수 답변만 누적 (TTS용) - thinking_acc = "" # 사고 과정 + 진행 로그 누적 - thinking_finalized = False # 첫 답변 토큰 도착 시 박스 완료 처리 + thinking_acc = "" # 전체 누적 (완료 후 details용) + thinking_text = "" # __thinking 토큰만 (줄 감지용) + thinking_finalized = False try: async for token, run_id in api_client.chat(message, user_id, show_thinking): @@ -116,33 +117,35 @@ async def respond(message, history, show_thinking, user_id, use_tts, run_ids): collected_run_id = run_id break - # 즉시 상태 표시 — thinking_acc에 누적하지 않음 (임시 메시지) + # 즉시 상태 — thinking_acc에 누적 안 함 if isinstance(token, dict) and "__status" in token: if not thinking_acc: yield history, "", None, run_ids, _status_html(token["__status"]) - # thinking_acc에 내용 있으면 기존 표시 유지 continue - # 사고 과정(LLM thinking) — 박스에 추가 + # 사고 과정(LLM thinking) — 현재 줄만 live_html로 표시 if isinstance(token, dict) and "__thinking" in token: + thinking_text += token["__thinking"] thinking_acc += token["__thinking"] - yield history, "", None, run_ids, _thinking_html(thinking_acc) + yield history, "", None, run_ids, _live_html(_last_line(thinking_text)) continue - # 진행 로그(LangGraph, 검색 등) — 박스에 추가 (챗봇에는 표시 안 함) + # 진행 로그(LangGraph, 검색 등) — 메시지 전체를 live_html로 표시 if isinstance(token, dict) and "__meta" in token: thinking_acc += token["__meta"] - yield history, "", None, run_ids, _thinking_html(thinking_acc) + live = token["__meta"].strip() + if live: + yield history, "", None, run_ids, _live_html(live) continue - # 첫 답변 토큰 도착 — 박스를 완료 상태로 전환 + # 첫 답변 토큰 도착 — 전체를 details로 전환 (접힌 상태) if thinking_acc and not thinking_finalized: thinking_finalized = True - yield history, "", None, run_ids, _thinking_html(thinking_acc, done=True) + yield history, "", None, run_ids, _thinking_html(thinking_acc) tts_text += token history[-1]["content"] += token - yield history, "", None, run_ids, gr.update() # thinking_box 유지 + yield history, "", None, run_ids, gr.update() except Exception as e: history[-1]["content"] += f"\n\n[오류: {e}]" @@ -234,20 +237,33 @@ _BOX_STYLE = ( "padding:8px 14px;margin-bottom:6px;" ) _CONTENT_STYLE = ( - "margin-top:8px;white-space:pre-wrap;font-size:0.85em;" - "color:#555;max-height:200px;overflow-y:auto;" + "margin-top:6px;white-space:pre-wrap;font-size:0.85em;" + "color:#555;max-height:160px;overflow-y:auto;" ) -def _thinking_html(text: str, done: bool = False) -> str: - """접기/펼치기 가능한 사고 과정 박스.""" - icon = "💭" if done else "🤔" - label = "분석 완료" if done else "분석 중..." - cursor = "" if done else " ▌" +def _last_line(text: str) -> str: + """현재 진행 중인 마지막 비어있지 않은 줄 반환.""" + lines = [l for l in text.split("\n") if l.strip()] + return lines[-1] if lines else text.strip() + + +def _live_html(text: str) -> str: + """스트리밍 중 현재 줄만 보여주는 단순 div (details 미사용 → 닫힘 현상 없음).""" + return ( + f'
' + f'🤔 분석 중...' + f'
{_html.escape(text)} ▌
' + f'
' + ) + + +def _thinking_html(text: str) -> str: + """완료 후 전체 내용을 접기/펼치기로 표시.""" return ( f'
' - f'{icon} {label}' - f'
{_html.escape(text)}{cursor}
' + f'💭 분석 완료' + f'
{_html.escape(text)}
' f'
' ) From 38d2edeeec565a3e66caab02715d4fa5b55a100b Mon Sep 17 00:00:00 2001 From: sal Date: Mon, 1 Jun 2026 16:15:06 +0900 Subject: [PATCH 6/6] Phase 25: Separate RAG sources into collapsible box below chatbot - Add source_box gr.HTML component below chatbot - Add _sources_html() helper rendering
expand/collapse - Handle __sources token in respond(): update source_box independently of thinking_box - Reset both thinking_box and source_box on each new message Co-Authored-By: Claude Sonnet 4.6 --- app.py | 48 ++++++++++++++++++++++++++++++++++++------------ 1 file changed, 36 insertions(+), 12 deletions(-) diff --git a/app.py b/app.py index 66f3740..e9e04fb 100644 --- a/app.py +++ b/app.py @@ -96,20 +96,21 @@ async def tts_speak(text: str) -> str | None: async def respond(message, history, show_thinking, user_id, use_tts, run_ids): if not message.strip(): - yield history, "", None, run_ids, "" + yield history, "", None, run_ids, "", "" return history = list(history) run_ids = list(run_ids) history.append({"role": "user", "content": message}) history.append({"role": "assistant", "content": ""}) - yield history, "", None, run_ids, "" # thinking_box 초기화 + yield history, "", None, run_ids, "", "" # thinking_box + source_box 초기화 collected_run_id: str | None = None tts_text = "" # 순수 답변만 누적 (TTS용) thinking_acc = "" # 전체 누적 (완료 후 details용) thinking_text = "" # __thinking 토큰만 (줄 감지용) thinking_finalized = False + source_box_html = "" try: async for token, run_id in api_client.chat(message, user_id, show_thinking): @@ -120,14 +121,14 @@ async def respond(message, history, show_thinking, user_id, use_tts, run_ids): # 즉시 상태 — thinking_acc에 누적 안 함 if isinstance(token, dict) and "__status" in token: if not thinking_acc: - yield history, "", None, run_ids, _status_html(token["__status"]) + yield history, "", None, run_ids, _status_html(token["__status"]), gr.update() continue # 사고 과정(LLM thinking) — 현재 줄만 live_html로 표시 if isinstance(token, dict) and "__thinking" in token: thinking_text += token["__thinking"] thinking_acc += token["__thinking"] - yield history, "", None, run_ids, _live_html(_last_line(thinking_text)) + yield history, "", None, run_ids, _live_html(_last_line(thinking_text)), gr.update() continue # 진행 로그(LangGraph, 검색 등) — 메시지 전체를 live_html로 표시 @@ -135,30 +136,36 @@ async def respond(message, history, show_thinking, user_id, use_tts, run_ids): thinking_acc += token["__meta"] live = token["__meta"].strip() if live: - yield history, "", None, run_ids, _live_html(live) + yield history, "", None, run_ids, _live_html(live), gr.update() + continue + + # RAG 출처 — 별도 source_box로 표시 + if isinstance(token, dict) and "__sources" in token: + source_box_html = _sources_html(token["__sources"]) + yield history, "", None, run_ids, gr.update(), source_box_html continue # 첫 답변 토큰 도착 — 전체를 details로 전환 (접힌 상태) if thinking_acc and not thinking_finalized: thinking_finalized = True - yield history, "", None, run_ids, _thinking_html(thinking_acc) + yield history, "", None, run_ids, _thinking_html(thinking_acc), gr.update() tts_text += token history[-1]["content"] += token - yield history, "", None, run_ids, gr.update() + yield history, "", None, run_ids, gr.update(), gr.update() except Exception as e: history[-1]["content"] += f"\n\n[오류: {e}]" - yield history, "", None, run_ids, gr.update() + yield history, "", None, run_ids, gr.update(), gr.update() return run_ids.append(collected_run_id) if use_tts: audio_path = await tts_speak(tts_text) - yield history, "", audio_path, run_ids, gr.update() + yield history, "", audio_path, run_ids, gr.update(), gr.update() else: - yield history, "", None, run_ids, gr.update() + yield history, "", None, run_ids, gr.update(), gr.update() def handle_feedback(like_data: gr.LikeData, history, run_ids, user_id): @@ -277,6 +284,22 @@ def _status_html(status: str) -> str: ) +def _sources_html(sources: list) -> str: + """RAG 출처 목록을 접기/펼치기로 표시.""" + items = "".join( + f"
  • {_html.escape(s['filename'])}" + + (f" — {s['page']}페이지" if "page" in s else "") + + "
  • " + for s in sources + ) + return ( + f'
    ' + f'📄 출처 ({len(sources)}개)' + f'
      {items}
    ' + f'
    ' + ) + + with gr.Blocks(title="율봇") as demo: gr.Markdown("# 율봇\n육아·금융 전문 AI 상담 도우미") @@ -294,6 +317,7 @@ with gr.Blocks(title="율봇") as demo: thinking_box = gr.HTML(value="") chatbot = gr.Chatbot(label="율봇", height=500) + source_box = gr.HTML(value="") with gr.Row(): msg_box = gr.Textbox( placeholder="질문을 입력하세요... (Enter로 전송)", @@ -333,12 +357,12 @@ with gr.Blocks(title="율봇") as demo: send_btn.click( respond, inputs=[msg_box, chatbot, show_thinking, user_state, use_tts, run_ids_state], - outputs=[chatbot, msg_box, tts_output, run_ids_state, thinking_box], + outputs=[chatbot, msg_box, tts_output, run_ids_state, thinking_box, source_box], ) msg_box.submit( respond, inputs=[msg_box, chatbot, show_thinking, user_state, use_tts, run_ids_state], - outputs=[chatbot, msg_box, tts_output, run_ids_state, thinking_box], + outputs=[chatbot, msg_box, tts_output, run_ids_state, thinking_box, source_box], ) reset_btn.click(reset_chat, inputs=[user_state], outputs=[chatbot, run_ids_state])