Add cross-platform TTS support and update dependencies
This commit is contained in:
+1
-1
@@ -43,7 +43,7 @@ async def chat(
|
|||||||
try:
|
try:
|
||||||
payload = json.loads(raw)
|
payload = json.loads(raw)
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
yield raw, None
|
yield str(raw), None
|
||||||
continue
|
continue
|
||||||
if isinstance(payload, dict) and payload.get("__done"):
|
if isinstance(payload, dict) and payload.get("__done"):
|
||||||
yield "", payload.get("run_id")
|
yield "", payload.get("run_id")
|
||||||
|
|||||||
@@ -9,6 +9,7 @@
|
|||||||
"""
|
"""
|
||||||
import asyncio
|
import asyncio
|
||||||
import os
|
import os
|
||||||
|
import platform
|
||||||
import subprocess
|
import subprocess
|
||||||
import tempfile
|
import tempfile
|
||||||
|
|
||||||
@@ -25,7 +26,8 @@ DEFAULT_USER = "아록"
|
|||||||
# ── STT (Whisper) — 로컬 실행 유지 ──────────────────────────────
|
# ── STT (Whisper) — 로컬 실행 유지 ──────────────────────────────
|
||||||
_whisper_model = None
|
_whisper_model = None
|
||||||
_WHISPER_SIZE = os.getenv("WHISPER_MODEL_SIZE", "small")
|
_WHISPER_SIZE = os.getenv("WHISPER_MODEL_SIZE", "small")
|
||||||
_TTS_VOICE = os.getenv("TTS_VOICE", "Yuna")
|
_TTS_VOICE = os.getenv("TTS_VOICE", "Yuna") # macOS say 보이스
|
||||||
|
_TTS_EDGE_VOICE = os.getenv("TTS_EDGE_VOICE", "ko-KR-SunHiNeural") # edge-tts 보이스
|
||||||
|
|
||||||
|
|
||||||
def _get_whisper():
|
def _get_whisper():
|
||||||
@@ -44,19 +46,47 @@ def transcribe_audio(filepath: str) -> str:
|
|||||||
return result["text"].strip()
|
return result["text"].strip()
|
||||||
|
|
||||||
|
|
||||||
def tts_speak(text: str) -> str | None:
|
async def tts_speak(text: str) -> str | None:
|
||||||
"""macOS say 명령어로 TTS, 재생용 aiff 파일 경로 반환."""
|
"""크로스플랫폼 TTS. macOS: say→edge-tts→pyttsx3 / Windows: edge-tts→pyttsx3"""
|
||||||
if not text:
|
if not text:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
# macOS: say 우선 (오프라인, 내장 한국어)
|
||||||
|
if platform.system() == "Darwin":
|
||||||
try:
|
try:
|
||||||
tmp = tempfile.NamedTemporaryFile(suffix=".aiff", delete=False)
|
tmp = tempfile.NamedTemporaryFile(suffix=".aiff", delete=False)
|
||||||
tmp.close()
|
tmp.close()
|
||||||
subprocess.run(
|
await asyncio.to_thread(
|
||||||
|
subprocess.run,
|
||||||
["say", "-v", _TTS_VOICE, "-o", tmp.name, text],
|
["say", "-v", _TTS_VOICE, "-o", tmp.name, text],
|
||||||
check=True,
|
check=True,
|
||||||
capture_output=True,
|
capture_output=True,
|
||||||
)
|
)
|
||||||
return tmp.name
|
return tmp.name
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Windows 1순위 / macOS say 실패 시: edge-tts (온라인)
|
||||||
|
try:
|
||||||
|
import edge_tts
|
||||||
|
tmp = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False)
|
||||||
|
tmp.close()
|
||||||
|
await edge_tts.Communicate(text, _TTS_EDGE_VOICE).save(tmp.name)
|
||||||
|
return tmp.name
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# 최종 폴백: pyttsx3 (오프라인)
|
||||||
|
try:
|
||||||
|
import pyttsx3
|
||||||
|
tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
|
||||||
|
tmp.close()
|
||||||
|
def _save():
|
||||||
|
engine = pyttsx3.init()
|
||||||
|
engine.save_to_file(text, tmp.name)
|
||||||
|
engine.runAndWait()
|
||||||
|
await asyncio.to_thread(_save)
|
||||||
|
return tmp.name
|
||||||
except Exception:
|
except Exception:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@@ -90,7 +120,7 @@ async def respond(message, history, show_thinking, user_id, use_tts, run_ids):
|
|||||||
run_ids.append(collected_run_id)
|
run_ids.append(collected_run_id)
|
||||||
|
|
||||||
if use_tts:
|
if use_tts:
|
||||||
audio_path = tts_speak(history[-1]["content"])
|
audio_path = await tts_speak(history[-1]["content"])
|
||||||
yield history, "", audio_path, run_ids
|
yield history, "", audio_path, run_ids
|
||||||
else:
|
else:
|
||||||
yield history, "", None, run_ids
|
yield history, "", None, run_ids
|
||||||
@@ -100,12 +130,13 @@ def handle_feedback(like_data: gr.LikeData, history, run_ids, user_id):
|
|||||||
idx = like_data.index
|
idx = like_data.index
|
||||||
if isinstance(idx, (list, tuple)):
|
if isinstance(idx, (list, tuple)):
|
||||||
idx = idx[0]
|
idx = idx[0]
|
||||||
if not isinstance(idx, int) or idx >= len(history):
|
if not isinstance(idx, int) or idx < 0 or idx >= len(history):
|
||||||
return
|
return
|
||||||
if history[idx].get("role") != "assistant":
|
if history[idx].get("role") != "assistant":
|
||||||
return
|
return
|
||||||
|
# idx 위치까지 등장한 assistant 메시지 수 = 이 메시지의 0-based 턴 번호
|
||||||
asst_turn = sum(1 for m in history[:idx] if m.get("role") == "assistant")
|
asst_turn = sum(1 for m in history[:idx] if m.get("role") == "assistant")
|
||||||
run_id = run_ids[asst_turn] if asst_turn < len(run_ids) else None
|
run_id = run_ids[asst_turn] if run_ids and asst_turn < len(run_ids) else None
|
||||||
|
|
||||||
user_msg = str(history[idx - 1]["content"]) if idx > 0 else ""
|
user_msg = str(history[idx - 1]["content"]) if idx > 0 else ""
|
||||||
asst_msg = str(history[idx]["content"])
|
asst_msg = str(history[idx]["content"])
|
||||||
|
|||||||
+3
-1
@@ -1,4 +1,6 @@
|
|||||||
gradio>=4.0.0
|
gradio>=6.0.0
|
||||||
httpx>=0.27.0
|
httpx>=0.27.0
|
||||||
python-dotenv>=1.0.0
|
python-dotenv>=1.0.0
|
||||||
openai-whisper>=20231117
|
openai-whisper>=20231117
|
||||||
|
edge-tts>=6.1.9
|
||||||
|
pyttsx3>=2.90
|
||||||
|
|||||||
Reference in New Issue
Block a user