Tag metadata tokens as {\"__meta\"} to separate TTS from progress messages

stream_response() now yields plain str for actual answer tokens and
{\"__meta\": str} dicts for progress/thinking/source metadata.
Consumers (WebUI, Telegram) can filter __meta tokens for TTS/display.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
sal
2026-05-31 23:08:14 +09:00
parent e9a6d00059
commit 67821250fd
3 changed files with 58 additions and 47 deletions
+33 -29
View File
@@ -216,8 +216,12 @@ class AgentService:
def _make_config(self, show_thinking: bool = False) -> dict:
return {"configurable": {"thread_id": self._thread_id, "show_thinking": show_thinking}}
async def stream_response(self, user_input: str, show_thinking: bool | None = None) -> AsyncIterator[str]:
"""사용자 입력을 받아 응답 토큰을 순서대로 yield한다."""
async def stream_response(self, user_input: str, show_thinking: bool | None = None) -> AsyncIterator[str | dict]:
"""사용자 입력을 받아 응답 토큰을 순서대로 yield한다.
실제 답변: plain str
진행/thinking/출처 메타데이터: {"__meta": str} ← 소비자가 TTS 등에서 필터링 가능
"""
_think_verbose = show_thinking if show_thinking is not None else self._think_verbose
self._source_buffer.clear()
run_id = uuid.uuid4()
@@ -248,25 +252,25 @@ class AgentService:
if isinstance(data, dict) and "__query_rewrite" in data:
info = data["__query_rewrite"]
if lg or self._rag_verbose:
yield f'\n쿼리 최적화: "{info["original"]}""{info["rewritten"]}"\n'
yield {"__meta": f'\n쿼리 최적화: "{info["original"]}""{info["rewritten"]}"\n'}
continue
if isinstance(data, dict) and "__thinking" in data:
# thinking 첫 토큰 도착 시 agent 레이블 + prev_node 갱신
if "agent" != prev_node:
if thinking_open:
yield "\n[/사고 과정]\n"
yield {"__meta": "\n[/사고 과정]\n"}
thinking_open = False
content_started = False
if lg:
elapsed = time.perf_counter() - start_time
label = "agent: 검색 결과 반영 중" if prev_node == "tools" else "agent: 질문 분석 중"
yield f"\n[LangGraph → {label}] ({elapsed:.2f}s)\n"
yield {"__meta": f"\n[LangGraph → {label}] ({elapsed:.2f}s)\n"}
prev_node = "agent"
if _think_verbose:
if not thinking_open:
yield "\n[사고 과정]\n"
yield {"__meta": "\n[사고 과정]\n"}
thinking_open = True
yield data["__thinking"]
yield {"__meta": data["__thinking"]}
continue
# ── messages 이벤트 ──────────────────────────────────────
@@ -277,44 +281,44 @@ class AgentService:
# (agent 레이블은 custom 이벤트 핸들러에서 이미 처리될 수 있으므로 중복 방지)
if node != prev_node:
if thinking_open:
yield "\n[/사고 과정]\n"
yield {"__meta": "\n[/사고 과정]\n"}
thinking_open = False
content_started = False
if lg:
elapsed = time.perf_counter() - start_time
if node == "agent":
label = "agent: 검색 결과 반영 중" if prev_node == "tools" else "agent: 질문 분석 중"
yield f"\n[LangGraph → {label}] ({elapsed:.2f}s)\n"
yield {"__meta": f"\n[LangGraph → {label}] ({elapsed:.2f}s)\n"}
elif node == "query_rewrite":
yield f"\n[LangGraph → query_rewrite: 쿼리 최적화 중] ({elapsed:.2f}s)\n"
yield {"__meta": f"\n[LangGraph → query_rewrite: 쿼리 최적화 중] ({elapsed:.2f}s)\n"}
elif node == "tools":
yield f"\n[LangGraph → tools: 도구 실행 중] ({elapsed:.2f}s)\n"
yield {"__meta": f"\n[LangGraph → tools: 도구 실행 중] ({elapsed:.2f}s)\n"}
prev_node = node
# ── agent 노드 — AIMessageChunk만 처리 (중복 방지) ──────
if node == "agent" and isinstance(chunk, AIMessageChunk):
if chunk.tool_calls:
if thinking_open:
yield "\n[/사고 과정]\n"
yield {"__meta": "\n[/사고 과정]\n"}
thinking_open = False
for tc in chunk.tool_calls:
pending_tool_calls[tc["id"]] = tc
if tc.get("name") == "search_documents":
query = tc.get("args", {}).get("query", "")
yield f'\n문서 검색 중... ("{query}")\n' if query else "\n문서 검색 중...\n"
yield {"__meta": f'\n문서 검색 중... ("{query}")\n'} if query else {"__meta": "\n문서 검색 중...\n"}
elif tc.get("name") == "web_search":
query = tc.get("args", {}).get("query", "")
yield f'\n웹 검색 중... ("{query}")\n' if query else "\n웹 검색 중...\n"
yield {"__meta": f'\n웹 검색 중... ("{query}")\n'} if query else {"__meta": "\n웹 검색 중...\n"}
elif lg:
args_str = ", ".join(f'{k}="{v}"' for k, v in tc["args"].items())
yield f" [tool_call: {tc['name']}({args_str})]\n"
yield {"__meta": f" [tool_call: {tc['name']}({args_str})]\n"}
elif chunk.content:
if thinking_open:
yield "\n[/사고 과정]\n"
yield {"__meta": "\n[/사고 과정]\n"}
thinking_open = False
if lg and not content_started:
yield "\n[LangGraph → agent: 최종 답변 생성]\n\n"
yield {"__meta": "\n[LangGraph → agent: 최종 답변 생성]\n\n"}
content_started = True
response_content += chunk.content
yield chunk.content
@@ -325,12 +329,12 @@ class AgentService:
if not content_started and not thinking_open:
thinking = chunk.additional_kwargs.get("thinking", "")
if thinking and _think_verbose:
yield "\n[사고 과정]\n"
yield thinking
yield "\n[/사고 과정]\n"
yield {"__meta": "\n[사고 과정]\n"}
yield {"__meta": thinking}
yield {"__meta": "\n[/사고 과정]\n"}
if chunk.content:
if lg:
yield "\n[LangGraph → agent: 최종 답변 생성]\n\n"
yield {"__meta": "\n[LangGraph → agent: 최종 답변 생성]\n\n"}
response_content += chunk.content
yield chunk.content
@@ -338,25 +342,25 @@ class AgentService:
elif node == "tools" and hasattr(chunk, "name") and chunk.name == "search_documents":
if lg:
result_lines = [b for b in chunk.content.split("\n\n") if b.strip()]
yield f" [결과: {len(result_lines)}개 문서 반환 → agent 복귀]\n"
yield {"__meta": f" [결과: {len(result_lines)}개 문서 반환 → agent 복귀]\n"}
if self._rag_verbose:
tc = pending_tool_calls.get(chunk.tool_call_id, {})
query = tc.get("args", {}).get("query", "")
yield f'\n[문서 검색: "{query}"]\n'
yield {"__meta": f'\n[문서 검색: "{query}"]\n'}
for block in chunk.content.split("\n\n"):
if block.strip():
preview = block.strip().replace("\n", " ")[:80]
yield f"{preview}\n"
yield "\n"
yield {"__meta": f"{preview}\n"}
yield {"__meta": "\n"}
elif node == "tools" and hasattr(chunk, "name") and chunk.name == "web_search":
if lg:
result_lines = [b for b in chunk.content.split("\n\n") if b.strip()]
yield f" [웹 검색 결과: {len(result_lines)}건 → agent 복귀]\n"
yield {"__meta": f" [웹 검색 결과: {len(result_lines)}건 → agent 복귀]\n"}
if thinking_open:
yield "\n[/사고 과정]\n"
yield {"__meta": "\n[/사고 과정]\n"}
self._last_run_id = str(run_id)
@@ -369,11 +373,11 @@ class AgentService:
print(f"[Agent] 대화 저장 실패: {e}")
if self._rag_show_sources and self._source_buffer:
yield "\n\n[참고 문서]\n"
yield {"__meta": "\n\n[참고 문서]\n"}
for src in self._source_buffer:
filename = os.path.basename(src["source"])
page = f" {src['page']}페이지" if "page" in src else ""
yield f"- {filename}{page}\n"
yield {"__meta": f"- {filename}{page}\n"}
def reset(self) -> None:
"""새 thread_id로 대화 히스토리를 초기화한다."""