Phase 1+2: FastAPI-Backend, SQLite, Embeddings, Semantische Suche

Phase 1: - FastAPI-Backend (backend/app.py) mit REST-API - SQLite-Datenbank für Podcasts, Episoden, Absätze, Zitate - Auto-Import aus mindmap_data.json + srt_index.json beim Start - Webapp als SPA: API-first mit Static-File-Fallback - Audio als gemountetes Volume statt im Docker-Image - Docker-Compose mit Traefik-Labels Phase 2: - Qwen text-embedding-v3 via DashScope (1024-dim Vektoren) - Embedding aller Transkript-Absätze (728 für NEU DENKEN) - Semantische Suche: /api/semantic-search?q=... - Similarity-API: /api/similar/{podcast}/{episode}/{paragraph} - Cosine-Similarity auf normalisierten Vektoren, <100ms - Findet thematisch verwandte Stellen über Episoden hinweg, auch bei komplett unterschiedlicher Wortwahl Vorbereitet für Multi-Podcast (#10): Datenstruktur unterstützt mehrere Podcasts, Cross-Podcast-Similarity ist ein Parameter. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-20 10:24:53 +02:00 · 2026-04-20 10:24:53 +02:00 · b0649cea49
commit b0649cea49
parent 8d7c16f8f6
8 changed files with 781 additions and 19 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,3 +1,8 @@
 __pycache__/
 *.pyc
 .DS_Store
 data/
 audio/
 *.sqlite
 *.sqlite-shm
 *.sqlite-wal
--- a/17
+++ b/17
@ -0,0 +1,17 @@
 FROM python:3.12-slim
 WORKDIR /app
 # Install dependencies
 COPY backend/requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
 # Copy backend code
 COPY backend/ .
 # Copy webapp as static files
 COPY webapp/index.html webapp/d3.v7.min.js /static/
 EXPOSE 8000
 CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"]
--- a/backend/app.py
+++ b/backend/app.py
@ -0,0 +1,274 @@
 """FastAPI backend for podcast-mindmap."""
 import json
 import os
 import numpy as np
 from pathlib import Path
 from typing import Optional
 from fastapi import FastAPI, Query, HTTPException
 from fastapi.staticfiles import StaticFiles
 from fastapi.responses import FileResponse
 from database import get_db, init_db, get_all_embeddings
 app = FastAPI(title="Podcast Mindmap API")
 DATA_DIR = os.environ.get("DATA_DIR", "/data")
 AUDIO_DIR = os.environ.get("AUDIO_DIR", "/audio")
 STATIC_DIR = os.environ.get("STATIC_DIR", "/static")
 # Cache embeddings in memory
 _embeddings_cache = {}
 def _load_embeddings(podcast_id: Optional[str] = None):
    """Load and cache embeddings."""
    key = podcast_id or "__all__"
    if key not in _embeddings_cache:
        vectors, meta = get_all_embeddings(podcast_id)
        _embeddings_cache[key] = (vectors, meta)
    return _embeddings_cache[key]
 def _invalidate_cache():
    _embeddings_cache.clear()
 # ── API Routes ──
@app.get("/api/podcasts")
 def list_podcasts():
    db = get_db()
    rows = db.execute("SELECT * FROM podcasts").fetchall()
    db.close()
    return [dict(r) for r in rows]
@app.get("/api/podcasts/{podcast_id}")
 def get_podcast(podcast_id: str):
    db = get_db()
    podcast = db.execute("SELECT * FROM podcasts WHERE id = ?", (podcast_id,)).fetchone()
    if not podcast:
        raise HTTPException(404, "Podcast not found")
    staffeln = db.execute("SELECT * FROM staffeln WHERE podcast_id = ? ORDER BY id", (podcast_id,)).fetchall()
    themes = db.execute("SELECT * FROM themes WHERE podcast_id = ?", (podcast_id,)).fetchall()
    episodes = db.execute("SELECT * FROM episodes WHERE podcast_id = ? ORDER BY id", (podcast_id,)).fetchall()
    quotes = db.execute("SELECT * FROM quotes WHERE podcast_id = ?", (podcast_id,)).fetchall()
    db.close()
    # Build mindmap_data compatible format
    return {
        "name": podcast["name"],
        "host": podcast["host"],
        "description": podcast["description"],
        "staffeln": [dict(s) for s in staffeln],
        "themes": [{**dict(t), "episodes": json.loads(t["episodes_json"])} for t in themes],
        "episodes": [{"id": e["id"], "title": e["title"], "guest": e["guest"],
                      "staffel": e["staffel"], "audioFile": e["audio_file"]} for e in episodes],
        "quotes": [{
            "id": q["id"], "text": q["text"], "verbatim": q["verbatim"],
            "speaker": q["speaker"], "episode": q["episode_id"],
            "startTime": q["start_time"], "endTime": q["end_time"],
            "isTopQuote": bool(q["is_top_quote"]),
            "themes": json.loads(q["themes_json"]),
            "audioFile": next((e["audio_file"] for e in episodes if e["id"] == q["episode_id"]), None)
        } for q in quotes],
    }
@app.get("/api/podcasts/{podcast_id}/transcript/{episode_id}")
 def get_transcript(podcast_id: str, episode_id: str):
    db = get_db()
    paras = db.execute(
        "SELECT idx, start_time, end_time, text FROM paragraphs WHERE podcast_id = ? AND episode_id = ? ORDER BY idx",
        (podcast_id, episode_id)
    ).fetchall()
    db.close()
    return {"paragraphs": [{"start": p["start_time"], "end": p["end_time"], "text": p["text"]} for p in paras]}
@app.get("/api/search")
 def search(q: str = Query(..., min_length=2), podcast_id: Optional[str] = None, limit: int = 50):
    """Full-text search across all transcripts."""
    db = get_db()
    q_like = f"%{q}%"
    if podcast_id:
        rows = db.execute(
            "SELECT p.podcast_id, p.episode_id, p.idx, p.start_time, p.text, e.title, e.guest "
            "FROM paragraphs p JOIN episodes e ON p.podcast_id = e.podcast_id AND p.episode_id = e.id "
            "WHERE p.podcast_id = ? AND p.text LIKE ? LIMIT ?",
            (podcast_id, q_like, limit)
        ).fetchall()
    else:
        rows = db.execute(
            "SELECT p.podcast_id, p.episode_id, p.idx, p.start_time, p.text, e.title, e.guest "
            "FROM paragraphs p JOIN episodes e ON p.podcast_id = e.podcast_id AND p.episode_id = e.id "
            "WHERE p.text LIKE ? LIMIT ?",
            (q_like, limit)
        ).fetchall()
    db.close()
    return [dict(r) for r in rows]
@app.get("/api/similar/{podcast_id}/{episode_id}/{para_idx}")
 def find_similar(podcast_id: str, episode_id: str, para_idx: int,
                 limit: int = 10, cross_podcast: bool = False):
    """Find semantically similar paragraphs using embeddings."""
    db = get_db()
    row = db.execute(
        "SELECT id, embedding FROM paragraphs WHERE podcast_id = ? AND episode_id = ? AND idx = ?",
        (podcast_id, episode_id, para_idx)
    ).fetchone()
    db.close()
    if not row or not row["embedding"]:
        raise HTTPException(404, "Paragraph not found or not embedded")
    query_vec = np.frombuffer(row["embedding"], dtype=np.float32)
    query_vec = query_vec / np.linalg.norm(query_vec)
    # Load all embeddings
    search_podcast = None if cross_podcast else podcast_id
    vectors, meta = _load_embeddings(search_podcast)
    if vectors is None or len(meta) == 0:
        return []
    # Cosine similarity (vectors are already normalized)
    scores = vectors @ query_vec
    # Get top results (skip self)
    indices = np.argsort(scores)[::-1]
    results = []
    for idx in indices:
        m = meta[idx]
        # Skip self
        if m["podcast_id"] == podcast_id and m["episode_id"] == episode_id and m["idx"] == para_idx:
            continue
        # Skip same episode unless cross_podcast
        if not cross_podcast and m["episode_id"] == episode_id:
            continue
        results.append({
            "podcast_id": m["podcast_id"],
            "episode_id": m["episode_id"],
            "paragraph_idx": m["idx"],
            "score": float(scores[idx])
        })
        if len(results) >= limit:
            break
    # Enrich with text previews
    db = get_db()
    for r in results:
        p = db.execute(
            "SELECT text, start_time FROM paragraphs WHERE podcast_id = ? AND episode_id = ? AND idx = ?",
            (r["podcast_id"], r["episode_id"], r["paragraph_idx"])
        ).fetchone()
        if p:
            r["text_preview"] = p["text"][:150]
            r["start_time"] = p["start_time"]
        ep = db.execute(
            "SELECT title, guest FROM episodes WHERE podcast_id = ? AND id = ?",
            (r["podcast_id"], r["episode_id"])
        ).fetchone()
        if ep:
            r["episode_title"] = ep["title"]
            r["guest"] = ep["guest"]
    db.close()
    return results
@app.get("/api/semantic-search")
 def semantic_search(q: str = Query(..., min_length=3), podcast_id: Optional[str] = None, limit: int = 20):
    """Semantic search using query embedding."""
    from embeddings import embed_texts
    try:
        query_vec = np.array(embed_texts([q])[0], dtype=np.float32)
        query_vec = query_vec / np.linalg.norm(query_vec)
    except Exception as e:
        raise HTTPException(500, f"Embedding failed: {e}")
    vectors, meta = _load_embeddings(podcast_id)
    if vectors is None:
        return []
    scores = vectors @ query_vec
    indices = np.argsort(scores)[::-1][:limit]
    db = get_db()
    results = []
    for idx in indices:
        m = meta[idx]
        score = float(scores[idx])
        if score < 0.3:
            break
        p = db.execute(
            "SELECT text, start_time FROM paragraphs WHERE id = ?", (m["id"],)
        ).fetchone()
        ep = db.execute(
            "SELECT title, guest FROM episodes WHERE podcast_id = ? AND id = ?",
            (m["podcast_id"], m["episode_id"])
        ).fetchone()
        results.append({
            "podcast_id": m["podcast_id"],
            "episode_id": m["episode_id"],
            "paragraph_idx": m["idx"],
            "score": score,
            "text_preview": p["text"][:200] if p else "",
            "start_time": p["start_time"] if p else None,
            "episode_title": ep["title"] if ep else "",
            "guest": ep["guest"] if ep else "",
        })
    db.close()
    return results
 # ── Startup ──
@app.on_event("startup")
 def startup():
    init_db()
    # Auto-import podcasts from data directory
    data_path = Path(DATA_DIR)
    if data_path.exists():
        for podcast_dir in data_path.iterdir():
            if not podcast_dir.is_dir():
                continue
            mindmap_file = podcast_dir / "mindmap_data.json"
            srt_file = podcast_dir / "srt_index.json"
            if mindmap_file.exists() and srt_file.exists():
                podcast_id = podcast_dir.name
                db = get_db()
                existing = db.execute("SELECT id FROM podcasts WHERE id = ?", (podcast_id,)).fetchone()
                db.close()
                if not existing:
                    print(f"Importing podcast: {podcast_id}")
                    with open(mindmap_file) as f:
                        mindmap_data = json.load(f)
                    with open(srt_file) as f:
                        srt_index = json.load(f)
                    from database import import_podcast
                    import_podcast(podcast_id, mindmap_data, srt_index)
 # ── Static Files + Audio ──
 # Mount audio directory (per-podcast subdirs)
 if os.path.isdir(AUDIO_DIR):
    app.mount("/audio", StaticFiles(directory=AUDIO_DIR), name="audio")
 # Serve webapp as static files (fallback)
 if os.path.isdir(STATIC_DIR):
    app.mount("/", StaticFiles(directory=STATIC_DIR, html=True), name="static")
--- a/backend/database.py
+++ b/backend/database.py
@ -0,0 +1,197 @@
 """SQLite database for podcast-mindmap: paragraphs, quotes, embeddings."""
 import json
 import sqlite3
 import os
 import numpy as np
 from typing import Optional
 DB_PATH = os.environ.get("DB_PATH", "/data/db.sqlite")
 def get_db():
    db = sqlite3.connect(DB_PATH)
    db.row_factory = sqlite3.Row
    db.execute("PRAGMA journal_mode=WAL")
    return db
 def init_db():
    db = get_db()
    db.executescript("""
    CREATE TABLE IF NOT EXISTS podcasts (
        id TEXT PRIMARY KEY,
        name TEXT NOT NULL,
        host TEXT,
        description TEXT,
        config_json TEXT
    );
    CREATE TABLE IF NOT EXISTS episodes (
        id TEXT NOT NULL,
        podcast_id TEXT NOT NULL,
        title TEXT NOT NULL,
        guest TEXT,
        staffel INTEGER,
        youtube_id TEXT,
        audio_file TEXT,
        PRIMARY KEY (podcast_id, id),
        FOREIGN KEY (podcast_id) REFERENCES podcasts(id)
    );
    CREATE TABLE IF NOT EXISTS paragraphs (
        id INTEGER PRIMARY KEY AUTOINCREMENT,
        podcast_id TEXT NOT NULL,
        episode_id TEXT NOT NULL,
        idx INTEGER NOT NULL,
        start_time REAL,
        end_time REAL,
        text TEXT NOT NULL,
        embedding BLOB,
        UNIQUE(podcast_id, episode_id, idx),
        FOREIGN KEY (podcast_id) REFERENCES podcasts(id)
    );
    CREATE TABLE IF NOT EXISTS topics (
        paragraph_id INTEGER NOT NULL,
        tag TEXT NOT NULL,
        score REAL DEFAULT 1.0,
        PRIMARY KEY (paragraph_id, tag),
        FOREIGN KEY (paragraph_id) REFERENCES paragraphs(id)
    );
    CREATE TABLE IF NOT EXISTS quotes (
        id TEXT NOT NULL,
        podcast_id TEXT NOT NULL,
        episode_id TEXT NOT NULL,
        text TEXT NOT NULL,
        verbatim TEXT,
        speaker TEXT,
        start_time REAL,
        end_time REAL,
        is_top_quote BOOLEAN DEFAULT 0,
        themes_json TEXT DEFAULT '[]',
        PRIMARY KEY (podcast_id, id),
        FOREIGN KEY (podcast_id) REFERENCES podcasts(id)
    );
    CREATE TABLE IF NOT EXISTS staffeln (
        id INTEGER NOT NULL,
        podcast_id TEXT NOT NULL,
        name TEXT NOT NULL,
        color TEXT DEFAULT '#666',
        PRIMARY KEY (podcast_id, id),
        FOREIGN KEY (podcast_id) REFERENCES podcasts(id)
    );
    CREATE TABLE IF NOT EXISTS themes (
        id TEXT NOT NULL,
        podcast_id TEXT NOT NULL,
        label TEXT NOT NULL,
        description TEXT,
        color TEXT DEFAULT '#666',
        episodes_json TEXT DEFAULT '[]',
        PRIMARY KEY (podcast_id, id),
        FOREIGN KEY (podcast_id) REFERENCES podcasts(id)
    );
    CREATE INDEX IF NOT EXISTS idx_paragraphs_podcast ON paragraphs(podcast_id, episode_id);
    CREATE INDEX IF NOT EXISTS idx_quotes_podcast ON quotes(podcast_id, episode_id);
    CREATE INDEX IF NOT EXISTS idx_topics_tag ON topics(tag);
    """)
    db.commit()
    db.close()
 def import_podcast(podcast_id: str, mindmap_data: dict, srt_index: dict):
    """Import a podcast's data from mindmap_data.json + srt_index.json into the DB."""
    db = get_db()
    # Podcast
    db.execute(
        "INSERT OR REPLACE INTO podcasts (id, name, host, description) VALUES (?, ?, ?, ?)",
        (podcast_id, mindmap_data.get("name", ""), mindmap_data.get("host", ""),
         mindmap_data.get("description", ""))
    )
    # Staffeln
    for s in mindmap_data.get("staffeln", []):
        db.execute(
            "INSERT OR REPLACE INTO staffeln (id, podcast_id, name, color) VALUES (?, ?, ?, ?)",
            (s["id"], podcast_id, s["name"], s.get("color", "#666"))
        )
    # Themes
    for t in mindmap_data.get("themes", []):
        db.execute(
            "INSERT OR REPLACE INTO themes (id, podcast_id, label, description, color, episodes_json) VALUES (?, ?, ?, ?, ?, ?)",
            (t["id"], podcast_id, t["label"], t.get("description", ""),
             t.get("color", "#666"), json.dumps(t.get("episodes", [])))
        )
    # Episodes
    for ep in mindmap_data.get("episodes", []):
        db.execute(
            "INSERT OR REPLACE INTO episodes (id, podcast_id, title, guest, staffel, audio_file) VALUES (?, ?, ?, ?, ?, ?)",
            (ep["id"], podcast_id, ep["title"], ep.get("guest", ""),
             ep.get("staffel"), ep.get("audioFile"))
        )
    # Quotes
    for q in mindmap_data.get("quotes", []):
        db.execute(
            "INSERT OR REPLACE INTO quotes (id, podcast_id, episode_id, text, verbatim, speaker, start_time, end_time, is_top_quote, themes_json) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
            (q["id"], podcast_id, q["episode"], q["text"], q.get("verbatim"),
             q.get("speaker", ""), q.get("startTime"), q.get("endTime"),
             q.get("isTopQuote", False), json.dumps(q.get("themes", [])))
        )
    # Paragraphs from srt_index
    for ep_key, ep_data in srt_index.items():
        ep_id = ep_key.split("-")[0]  # S1E1-Wachstum → S1E1
        for i, p in enumerate(ep_data.get("paragraphs", [])):
            db.execute(
                "INSERT OR REPLACE INTO paragraphs (podcast_id, episode_id, idx, start_time, end_time, text) VALUES (?, ?, ?, ?, ?, ?)",
                (podcast_id, ep_id, i, p["start"], p["end"], p["text"])
            )
    db.commit()
    db.close()
 def get_all_embeddings(podcast_id: Optional[str] = None):
    """Load all embeddings as numpy array + metadata."""
    db = get_db()
    if podcast_id:
        rows = db.execute(
            "SELECT id, podcast_id, episode_id, idx, embedding FROM paragraphs WHERE podcast_id = ? AND embedding IS NOT NULL",
            (podcast_id,)
        ).fetchall()
    else:
        rows = db.execute(
            "SELECT id, podcast_id, episode_id, idx, embedding FROM paragraphs WHERE embedding IS NOT NULL"
        ).fetchall()
    db.close()
    if not rows:
        return None, []
    meta = [{"id": r["id"], "podcast_id": r["podcast_id"],
             "episode_id": r["episode_id"], "idx": r["idx"]} for r in rows]
    vectors = np.array([np.frombuffer(r["embedding"], dtype=np.float32) for r in rows])
    # Normalize for cosine similarity
    norms = np.linalg.norm(vectors, axis=1, keepdims=True)
    norms[norms == 0] = 1
    vectors = vectors / norms
    return vectors, meta
 def store_embedding(paragraph_id: int, embedding: list[float]):
    """Store embedding as binary blob."""
    db = get_db()
    blob = np.array(embedding, dtype=np.float32).tobytes()
    db.execute("UPDATE paragraphs SET embedding = ? WHERE id = ?", (blob, paragraph_id))
    db.commit()
    db.close()
--- a/backend/embeddings.py
+++ b/backend/embeddings.py
@ -0,0 +1,73 @@
 """Generate embeddings via DashScope (Qwen text-embedding-v3)."""
 import os
 import time
 from openai import OpenAI
 from database import get_db, store_embedding
 DASHSCOPE_API_KEY = os.environ.get("DASHSCOPE_API_KEY", "")
 EMBEDDING_MODEL = os.environ.get("EMBEDDING_MODEL", "text-embedding-v3")
 BATCH_SIZE = 6  # DashScope text-embedding-v3 limit: 10 texts, but long texts need smaller batches
 def get_client():
    return OpenAI(
        api_key=DASHSCOPE_API_KEY,
        base_url="https://dashscope-intl.aliyuncs.com/compatible-mode/v1"
    )
 def embed_texts(texts: list[str]) -> list[list[float]]:
    """Embed a batch of texts."""
    client = get_client()
    response = client.embeddings.create(
        model=EMBEDDING_MODEL,
        input=texts,
        dimensions=1024
    )
    return [item.embedding for item in response.data]
 def embed_all_paragraphs(podcast_id: str = None):
    """Embed all paragraphs that don't have embeddings yet."""
    db = get_db()
    if podcast_id:
        rows = db.execute(
            "SELECT id, text FROM paragraphs WHERE podcast_id = ? AND embedding IS NULL",
            (podcast_id,)
        ).fetchall()
    else:
        rows = db.execute(
            "SELECT id, text FROM paragraphs WHERE embedding IS NULL"
        ).fetchall()
    db.close()
    if not rows:
        print("No paragraphs to embed.")
        return
    print(f"Embedding {len(rows)} paragraphs...")
    for i in range(0, len(rows), BATCH_SIZE):
        batch = rows[i:i + BATCH_SIZE]
        texts = [r["text"][:2000] for r in batch]  # Truncate long texts
        try:
            embeddings = embed_texts(texts)
            for row, emb in zip(batch, embeddings):
                store_embedding(row["id"], emb)
            print(f"  Batch {i // BATCH_SIZE + 1}/{(len(rows) + BATCH_SIZE - 1) // BATCH_SIZE}: {len(batch)} paragraphs")
        except Exception as e:
            print(f"  Error at batch {i // BATCH_SIZE + 1}: {e}")
            time.sleep(2)
            continue
    print("Done.")
 if __name__ == "__main__":
    import sys
    podcast_id = sys.argv[1] if len(sys.argv) > 1 else None
    embed_all_paragraphs(podcast_id)
--- a/backend/requirements.txt
+++ b/backend/requirements.txt
@ -0,0 +1,6 @@
 fastapi>=0.115.0
 uvicorn>=0.30.0
 aiosqlite>=0.20.0
 numpy>=1.26.0
 openai>=1.50.0
 pyyaml>=6.0
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -0,0 +1,27 @@
 services:
  podcast-mindmap:
    build: .
    container_name: podcast-mindmap
    restart: unless-stopped
    environment:
      - DATA_DIR=/data
      - AUDIO_DIR=/audio
      - STATIC_DIR=/static
      - DB_PATH=/data/db.sqlite
      - DASHSCOPE_API_KEY=${DASHSCOPE_API_KEY}
    volumes:
      - ./data:/data
      - ./audio:/audio
    labels:
      - "traefik.enable=true"
      - "traefik.http.routers.podcast.rule=Host(`podcast.toppyr.de`)"
      - "traefik.http.routers.podcast.entrypoints=websecure"
      - "traefik.http.routers.podcast.tls=true"
      - "traefik.http.routers.podcast.tls.certresolver=letsencrypt"
      - "traefik.http.services.podcast.loadbalancer.server.port=8000"
    networks:
      - collaboration_collaboration
 networks:
  collaboration_collaboration:
    external: true
--- a/webapp/index.html
+++ b/webapp/index.html
@ -262,6 +262,26 @@
  .view-tab:hover { border-color: var(--accent); }
  .view-tab.active { background: var(--accent); color: var(--bg); border-color: var(--accent); }
  /* ── Podcast Selector ── */
  .podcast-selector {
    display: flex; gap: 8px; flex-wrap: wrap; justify-content: center;
    padding: 40px 20px;
  }
  .podcast-card {
    background: var(--surface2); border: 1px solid var(--border);
    border-radius: 12px; padding: 20px; width: 260px; cursor: pointer;
    transition: all 0.2s;
  }
  .podcast-card:hover { border-color: var(--accent); transform: translateY(-2px); }
  .podcast-card h3 { font-size: 15px; margin-bottom: 4px; }
  .podcast-card p { font-size: 12px; color: var(--text-muted); }
  /* ── Semantic results ── */
  .semantic-badge {
    display: inline-block; background: #2a9d8f33; color: #2a9d8f;
    font-size: 9px; font-weight: 600; padding: 1px 6px; border-radius: 3px;
  }
  .welcome { text-align: center; padding: 40px 20px; color: var(--text-muted); }
  .welcome h2 { color: var(--text); margin-bottom: 8px; }
  .welcome p { font-size: 13px; line-height: 1.6; }
@ -333,7 +353,7 @@ const AudioPlayer = {
    this.currentQuote = q;
    const sameFile = this.el.src && this.el.src.endsWith(q.audioFile);
    if (!sameFile) {
-      this.el.src = `audio/${q.audioFile}`;
+      this.el.src = `audio/${CURRENT_PODCAST ? CURRENT_PODCAST + '/' : ''}${q.audioFile}`;
    }
    this.nowPlaying.innerHTML = `<strong>"${q.text.substring(0, 70)}…"</strong> — ${q.speaker} (${q.episode})`;
    this.bar.classList.add('visible');
@ -436,12 +456,18 @@ const TranscriptView = {
  activeIdx: -1,
  async show(episodeId, seekTime) {
    const epData = await this.loadEpisodeTranscript(episodeId);
    if (!epData || !epData.paragraphs) {
      // Fallback: try old method
      if (!TRANSCRIPTS) await this.loadTranscripts();
-    const key = Object.keys(TRANSCRIPTS).find(k => k.startsWith(episodeId.replace('E', 'E')));
+      const key = Object.keys(TRANSCRIPTS).find(k => k.startsWith(episodeId));
      if (!key || !TRANSCRIPTS[key]) return;
      this.paragraphs = TRANSCRIPTS[key].paragraphs;
    } else {
      this.paragraphs = epData.paragraphs;
    }
    this.episodeId = episodeId;
    this.paragraphs = TRANSCRIPTS[key].paragraphs;
    this.visible = true;
    this.userScrolled = false;
@ -515,18 +541,35 @@ const TranscriptView = {
  },
  async loadTranscripts() {
    if (TRANSCRIPTS && Object.keys(TRANSCRIPTS).length > 0) return;
    TRANSCRIPTS = {};
    try {
      const resp = await fetch('srt_index.json');
-      TRANSCRIPTS = await resp.json();
+      if (resp.ok) { TRANSCRIPTS = await resp.json(); return; }
-    } catch (e) {
+    } catch (e) {}
      // Try from data subdir
    try {
      const resp = await fetch('data/srt_index.json');
-        TRANSCRIPTS = await resp.json();
+      if (resp.ok) { TRANSCRIPTS = await resp.json(); }
-      } catch (e2) {
+    } catch (e2) {}
-        TRANSCRIPTS = {};
+  },
  async loadEpisodeTranscript(episodeId) {
    // Try API first
    if (CURRENT_PODCAST) {
      try {
        const resp = await fetch(`${API_BASE}/api/podcasts/${CURRENT_PODCAST}/transcript/${episodeId}`);
        if (resp.ok) {
          const data = await resp.json();
          if (!TRANSCRIPTS) TRANSCRIPTS = {};
          TRANSCRIPTS[episodeId] = data;
          return data;
        }
      } catch (e) {}
    }
    // Fallback
    await this.loadTranscripts();
    const key = Object.keys(TRANSCRIPTS).find(k => k.startsWith(episodeId));
    return key ? TRANSCRIPTS[key] : null;
  }
 };
@ -546,7 +589,34 @@ const Search = {
  async run(query) {
    if (query.length < 3) { this.clear(); return; }
-    if (!TRANSCRIPTS) await TranscriptView.loadTranscripts();
+
    // Try semantic search via API first
    if (CURRENT_PODCAST) {
      try {
        const resp = await fetch(`${API_BASE}/api/semantic-search?q=${encodeURIComponent(query)}&podcast_id=${CURRENT_PODCAST}`);
        if (resp.ok) {
          const apiResults = await resp.json();
          if (apiResults.length > 0) {
            this.showSemanticResults(apiResults, query);
            return;
          }
        }
      } catch (e) {}
      // Fallback to text search via API
      try {
        const resp = await fetch(`${API_BASE}/api/search?q=${encodeURIComponent(query)}&podcast_id=${CURRENT_PODCAST}`);
        if (resp.ok) {
          const apiResults = await resp.json();
          if (apiResults.length > 0) {
            this.showApiResults(apiResults, query);
            return;
          }
        }
      } catch (e) {}
    }
    // Client-side fallback
    if (!TRANSCRIPTS || Object.keys(TRANSCRIPTS).length === 0) await TranscriptView.loadTranscripts();
    const results = [];
    const qLower = query.toLowerCase();
@ -623,6 +693,33 @@ const Search = {
    panel.innerHTML = html;
  },
  showSemanticResults(results, query) {
    TranscriptView.hide();
    const panel = document.getElementById('panel');
    let html = `<h2>${results.length} semantische Treffer für "${escHtml(query)}" <span class="semantic-badge">KI</span></h2>`;
    results.forEach(r => {
      html += `<div class="search-result" onclick="Search.goTo('${r.episode_id}', ${r.start_time})">`;
      html += `<div class="sr-episode">${r.podcast_id}/${r.episode_id}: ${r.episode_title || ''} — ${r.guest || ''} · ${r.start_time !== null ? fmtTime(r.start_time) : ''} <span class="semantic-badge">${(r.score * 100).toFixed(0)}%</span></div>`;
      html += `<div class="sr-text">${escHtml(r.text_preview || '')}</div>`;
      html += `</div>`;
    });
    panel.innerHTML = html;
  },
  showApiResults(results, query) {
    TranscriptView.hide();
    const panel = document.getElementById('panel');
    let html = `<h2>${results.length} Treffer für "${escHtml(query)}"</h2>`;
    results.forEach(r => {
      const highlighted = this.highlight(r.text.substring(0, 200), query);
      html += `<div class="search-result" onclick="Search.goTo('${r.episode_id}', ${r.start_time})">`;
      html += `<div class="sr-episode">${r.episode_id}: ${r.title || ''} — ${r.guest || ''} · ${r.start_time !== null ? fmtTime(r.start_time) : ''}</div>`;
      html += `<div class="sr-text">${highlighted}</div>`;
      html += `</div>`;
    });
    panel.innerHTML = html;
  },
  highlight(text, query) {
    const escaped = escHtml(text);
    const re = new RegExp(`(${query.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')})`, 'gi');
@ -651,10 +748,76 @@ const Search = {
 };
 // ── Data Loading ──
-fetch('mindmap_data.json')
+// Detect mode: API backend or static files
-  .then(r => r.json())
+const API_BASE = '';  // Same origin
-  .then(data => { DATA = data; init(); })
+let CURRENT_PODCAST = null;
-  .catch(e => console.error('Failed to load data:', e));
+
 async function loadApp() {
  try {
    // Try API first
    const resp = await fetch(`${API_BASE}/api/podcasts`);
    if (resp.ok) {
      const podcasts = await resp.json();
      if (podcasts.length === 1) {
        // Single podcast → load directly
        await selectPodcast(podcasts[0].id);
      } else if (podcasts.length > 1) {
        // Multiple podcasts → show selector
        showPodcastSelector(podcasts);
      } else {
        throw new Error('No podcasts found');
      }
      return;
    }
  } catch (e) {
    // Fallback: static files
    console.log('API not available, falling back to static files');
  }
  // Static file fallback
  try {
    const resp = await fetch('mindmap_data.json');
    DATA = await resp.json();
    CURRENT_PODCAST = 'default';
    init();
  } catch (e) {
    console.error('Failed to load data:', e);
  }
 }
 async function selectPodcast(podcastId) {
  try {
    const resp = await fetch(`${API_BASE}/api/podcasts/${podcastId}`);
    DATA = await resp.json();
    CURRENT_PODCAST = podcastId;
    // Clear existing graph if switching
    document.getElementById('svg').innerHTML = '';
    init();
  } catch (e) {
    console.error('Failed to load podcast:', e);
  }
 }
 function showPodcastSelector(podcasts) {
  const panel = document.getElementById('panel');
  const mindmap = document.getElementById('mindmap');
  let html = '<div class="podcast-selector">';
  podcasts.forEach(p => {
    html += `<div class="podcast-card" onclick="selectPodcast('${p.id}')">`;
    html += `<h3>${escHtml(p.name)}</h3>`;
    html += `<p>${escHtml(p.description || '')}</p>`;
    html += `</div>`;
  });
  html += '</div>';
  panel.innerHTML = html;
  // Also set welcome
  document.getElementById('app-title').textContent = 'Podcast Mindmap';
  document.title = 'Podcast Mindmap';
 }
 loadApp();
 function init() {
  const name = DATA.name || 'Podcast';
@ -1041,7 +1204,7 @@ async function exportSoundbite(quoteId) {
  if (btn) btn.textContent = 'Lädt…';
  try {
-    const response = await fetch(`audio/${q.audioFile}`);
+    const response = await fetch(`audio/${CURRENT_PODCAST ? CURRENT_PODCAST + '/' : ''}${q.audioFile}`);
    const arrayBuffer = await response.arrayBuffer();
    const audioCtx = new (window.AudioContext || window.webkitAudioContext)();