From cf68e19f3866cea12762e733bc76f01ef61d651a Mon Sep 17 00:00:00 2001
From: shinalok <shinalok357@gmail.com>
Date: Sat, 30 May 2026 23:54:41 +0900
Subject: [PATCH] Add cross-platform TTS support and update dependencies

---
 api_client.py    |  2 +-
 app.py           | 55 +++++++++++++++++++++++++++++++++++++-----------
 requirements.txt |  4 +++-
 3 files changed, 47 insertions(+), 14 deletions(-)

diff --git a/api_client.py b/api_client.py
index 9e4f74e..37c61ed 100644
--- a/api_client.py
+++ b/api_client.py
@@ -43,7 +43,7 @@ async def chat(
                 try:
                     payload = json.loads(raw)
                 except json.JSONDecodeError:
-                    yield raw, None
+                    yield str(raw), None
                     continue
                 if isinstance(payload, dict) and payload.get("__done"):
                     yield "", payload.get("run_id")
diff --git a/app.py b/app.py
index 4a3a3fd..a2fd99b 100644
--- a/app.py
+++ b/app.py
@@ -9,6 +9,7 @@
 """
 import asyncio
 import os
+import platform
 import subprocess
 import tempfile
 
@@ -25,7 +26,8 @@ DEFAULT_USER = "아록"
 # ── STT (Whisper) — 로컬 실행 유지 ──────────────────────────────
 _whisper_model = None
 _WHISPER_SIZE = os.getenv("WHISPER_MODEL_SIZE", "small")
-_TTS_VOICE = os.getenv("TTS_VOICE", "Yuna")
+_TTS_VOICE = os.getenv("TTS_VOICE", "Yuna")                        # macOS say 보이스
+_TTS_EDGE_VOICE = os.getenv("TTS_EDGE_VOICE", "ko-KR-SunHiNeural") # edge-tts 보이스
 
 
 def _get_whisper():
@@ -44,18 +46,46 @@ def transcribe_audio(filepath: str) -> str:
     return result["text"].strip()
 
 
-def tts_speak(text: str) -> str | None:
-    """macOS say 명령어로 TTS, 재생용 aiff 파일 경로 반환."""
+async def tts_speak(text: str) -> str | None:
+    """크로스플랫폼 TTS. macOS: say→edge-tts→pyttsx3 / Windows: edge-tts→pyttsx3"""
     if not text:
         return None
+
+    # macOS: say 우선 (오프라인, 내장 한국어)
+    if platform.system() == "Darwin":
+        try:
+            tmp = tempfile.NamedTemporaryFile(suffix=".aiff", delete=False)
+            tmp.close()
+            await asyncio.to_thread(
+                subprocess.run,
+                ["say", "-v", _TTS_VOICE, "-o", tmp.name, text],
+                check=True,
+                capture_output=True,
+            )
+            return tmp.name
+        except Exception:
+            pass
+
+    # Windows 1순위 / macOS say 실패 시: edge-tts (온라인)
     try:
-        tmp = tempfile.NamedTemporaryFile(suffix=".aiff", delete=False)
+        import edge_tts
+        tmp = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False)
         tmp.close()
-        subprocess.run(
-            ["say", "-v", _TTS_VOICE, "-o", tmp.name, text],
-            check=True,
-            capture_output=True,
-        )
+        await edge_tts.Communicate(text, _TTS_EDGE_VOICE).save(tmp.name)
+        return tmp.name
+    except Exception:
+        pass
+
+    # 최종 폴백: pyttsx3 (오프라인)
+    try:
+        import pyttsx3
+        tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
+        tmp.close()
+        def _save():
+            engine = pyttsx3.init()
+            engine.save_to_file(text, tmp.name)
+            engine.runAndWait()
+        await asyncio.to_thread(_save)
         return tmp.name
     except Exception:
         return None
@@ -90,7 +120,7 @@ async def respond(message, history, show_thinking, user_id, use_tts, run_ids):
     run_ids.append(collected_run_id)
 
     if use_tts:
-        audio_path = tts_speak(history[-1]["content"])
+        audio_path = await tts_speak(history[-1]["content"])
         yield history, "", audio_path, run_ids
     else:
         yield history, "", None, run_ids
@@ -100,12 +130,13 @@ def handle_feedback(like_data: gr.LikeData, history, run_ids, user_id):
     idx = like_data.index
     if isinstance(idx, (list, tuple)):
         idx = idx[0]
-    if not isinstance(idx, int) or idx >= len(history):
+    if not isinstance(idx, int) or idx < 0 or idx >= len(history):
         return
     if history[idx].get("role") != "assistant":
         return
+    # idx 위치까지 등장한 assistant 메시지 수 = 이 메시지의 0-based 턴 번호
     asst_turn = sum(1 for m in history[:idx] if m.get("role") == "assistant")
-    run_id = run_ids[asst_turn] if asst_turn < len(run_ids) else None
+    run_id = run_ids[asst_turn] if run_ids and asst_turn < len(run_ids) else None
 
     user_msg = str(history[idx - 1]["content"]) if idx > 0 else ""
     asst_msg = str(history[idx]["content"])
diff --git a/requirements.txt b/requirements.txt
index 16bcfc0..5168873 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,6 @@
-gradio>=4.0.0
+gradio>=6.0.0
 httpx>=0.27.0
 python-dotenv>=1.0.0
 openai-whisper>=20231117
+edge-tts>=6.1.9
+pyttsx3>=2.90