gwoe-antragspruefer/app/tour_audio.py

"""Tour-Audio-Generator über ElevenLabs (#185 Phase 2).

Architektur:
- Pro Tour-Station ein Text-String. Wir hashen (text, voice_id) und
  cachen die fertige MP3 im ``data/tour_audio/<hash>.mp3``-Cache.
- Beim ersten Abruf wird die ElevenLabs-Text-to-Speech-API aufgerufen,
  die MP3 gespeichert, dann ausgeliefert.
- Folgeabrufe gehen direkt aus dem Cache. Kein Re-API-Call solange der
  Text identisch bleibt.

ENV:
- ``ELEVENLABS_API_KEY``    — Pflicht; ohne fällt die Tour auf
                              browser-internes ``speechSynthesis`` zurück.
- ``ELEVENLABS_VOICE_ID``   — optional; Default Domi (AZnzlk1XvdvUeBnXmlld).
- ``ELEVENLABS_MODEL_ID``   — optional; Default ``eleven_multilingual_v2``.

Caching-Strategie: SHA-256(text + voice_id + model_id) als Dateiname.
Damit:
- Text-Edit → neuer Hash → frische Generierung.
- Voice-Wechsel → neuer Hash, alte Voice bleibt im Cache (kann manuell
  weggeräumt werden).
"""
import hashlib
import logging
import os
from pathlib import Path
from typing import Optional

import httpx

from .config import settings

logger = logging.getLogger(__name__)

DEFAULT_VOICE_ID = "AZnzlk1XvdvUeBnXmlld"  # Domi
DEFAULT_MODEL_ID = "eleven_multilingual_v2"
ELEVENLABS_TTS_URL = "https://api.elevenlabs.io/v1/text-to-speech/{voice_id}"

CACHE_DIR = settings.data_dir / "tour_audio"


def _config() -> tuple[Optional[str], str, str]:
    """API-Key (None wenn nicht gesetzt) + Voice-ID + Model-ID."""
    api_key = os.environ.get("ELEVENLABS_API_KEY")
    voice_id = os.environ.get("ELEVENLABS_VOICE_ID") or DEFAULT_VOICE_ID
    model_id = os.environ.get("ELEVENLABS_MODEL_ID") or DEFAULT_MODEL_ID
    return (api_key or None), voice_id, model_id


def is_available() -> bool:
    """True, wenn die ENV den API-Key liefert."""
    return _config()[0] is not None


def _cache_key(text: str, voice_id: str, model_id: str) -> str:
    h = hashlib.sha256(f"{voice_id}|{model_id}|{text}".encode("utf-8")).hexdigest()
    return h[:32]


def _cache_path(text: str, voice_id: str, model_id: str) -> Path:
    CACHE_DIR.mkdir(parents=True, exist_ok=True)
    return CACHE_DIR / f"{_cache_key(text, voice_id, model_id)}.mp3"


async def get_or_generate(text: str) -> Optional[bytes]:
    """Liefert die MP3-Bytes für ``text`` (aus Cache oder frisch generiert).

    Returns None, wenn der API-Key fehlt — der Tour-Frontend-Code fällt
    dann auf ``speechSynthesis`` (Browser-eingebaut) zurück.
    """
    api_key, voice_id, model_id = _config()
    if not api_key:
        return None

    if not text or len(text) > 5000:
        logger.warning("Tour-Audio: Text leer oder zu lang (%d chars)", len(text))
        return None

    cache_file = _cache_path(text, voice_id, model_id)
    if cache_file.exists():
        return cache_file.read_bytes()

    url = ELEVENLABS_TTS_URL.format(voice_id=voice_id)
    payload = {
        "text": text,
        "model_id": model_id,
        "voice_settings": {
            # Für eine warme, klare Erzähl-Stimme: stability hoch (=ruhig),
            # similarity_boost mittel (=natürlich, nicht über-poliert).
            "stability": 0.55,
            "similarity_boost": 0.7,
            "style": 0.0,
            "use_speaker_boost": True,
        },
    }
    headers = {
        "xi-api-key": api_key,
        "Content-Type": "application/json",
        "Accept": "audio/mpeg",
    }

    try:
        async with httpx.AsyncClient(timeout=httpx.Timeout(30.0)) as client:
            r = await client.post(url, headers=headers, json=payload)
        if r.status_code != 200:
            logger.warning(
                "ElevenLabs-TTS Status %d für voice=%s: %s",
                r.status_code, voice_id, r.text[:200],
            )
            return None
        audio = r.content
        cache_file.write_bytes(audio)
        logger.info(
            "Tour-Audio cached: %s (%d bytes, voice=%s)",
            cache_file.name, len(audio), voice_id,
        )
        return audio
    except Exception:
        logger.exception("ElevenLabs-TTS-Aufruf fehlgeschlagen")
        return None