#8 Multi-Podcast-Dashboard, #9 PWA, #10 Cross-Podcast-Links, #12 Wort-Timestamps

- Backend: /api/compare Endpoint für Podcast-Vergleich (Stats, gemeinsame Topics, Top-Querverbindungen), /api/.../words Endpoint für Wort-Timestamps - Frontend: Podcast-Vergleichsansicht mit Statistiken und Cross-Links, Cross-Podcast-Suche-Toggle, semantische Links im Transkript (lazy-loaded), Podcast-Switcher mit Zurück-Navigation - PWA: manifest.json, Service Worker (stale-while-revalidate für Assets, network-first für API, cache-on-success für Audio), Icons - Scripts: transcribe_words.py (mlx-whisper Batch-Transkription mit Wort-Timestamps), import_words.py (Wort-Timestamps in DB importieren) - Dockerfile: PWA-Assets in Container kopieren Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-23 20:53:06 +02:00 · 2026-04-23 20:53:06 +02:00 · e678f75ee1
commit e678f75ee1
parent cb5978132c
9 changed files with 751 additions and 20 deletions
--- a/2
+++ b/2
@ -11,6 +11,8 @@ COPY backend/ .
 # Copy webapp as static files
 COPY webapp/index.html webapp/d3.v7.min.js /static/
 COPY webapp/manifest.json webapp/sw.js /static/
 COPY webapp/icon-192.png webapp/icon-512.png /static/
 EXPOSE 8000
--- a/backend/app.py
+++ b/backend/app.py
@ -88,6 +88,32 @@ def get_transcript(podcast_id: str, episode_id: str):
    return {"paragraphs": [{"start": p["start_time"], "end": p["end_time"], "text": p["text"]} for p in paras]}
@app.get("/api/podcasts/{podcast_id}/transcript/{episode_id}/words")
 def get_words(podcast_id: str, episode_id: str):
    """Get word-level timestamps for an episode."""
    db = get_db()
    # Check if words table exists
    try:
        words = db.execute(
            "SELECT segment_idx, word_idx, word, start_time, end_time FROM words "
            "WHERE podcast_id = ? AND episode_id = ? ORDER BY segment_idx, word_idx",
            (podcast_id, episode_id)
        ).fetchall()
    except Exception:
        db.close()
        return {"words": [], "available": False}
    db.close()
    if not words:
        return {"words": [], "available": False}
    return {
        "available": True,
        "words": [{"seg": w["segment_idx"], "idx": w["word_idx"],
                    "word": w["word"], "start": w["start_time"], "end": w["end_time"]} for w in words]
    }
@app.get("/api/search")
 def search(q: str = Query(..., min_length=2), podcast_id: Optional[str] = None, limit: int = 50):
    """Full-text search across all transcripts."""
@ -213,6 +239,78 @@ def get_precomputed_similar(podcast_id: str, episode_id: str, para_idx: int, lim
    } for r in rows]
@app.get("/api/compare")
 def compare_podcasts(a: str = Query(...), b: str = Query(...)):
    """Compare two podcasts: shared topics, stats, cross-links."""
    db = get_db()
    # Basic stats
    stats = {}
    for pid in (a, b):
        podcast = db.execute("SELECT * FROM podcasts WHERE id = ?", (pid,)).fetchone()
        if not podcast:
            raise HTTPException(404, f"Podcast '{pid}' not found")
        ep_count = db.execute("SELECT COUNT(*) as c FROM episodes WHERE podcast_id = ?", (pid,)).fetchone()["c"]
        q_count = db.execute("SELECT COUNT(*) as c FROM quotes WHERE podcast_id = ?", (pid,)).fetchone()["c"]
        p_count = db.execute("SELECT COUNT(*) as c FROM paragraphs WHERE podcast_id = ?", (pid,)).fetchone()["c"]
        stats[pid] = {"name": podcast["name"], "episodes": ep_count, "quotes": q_count, "paragraphs": p_count}
    # Shared topics via topic tags
    topics_a = db.execute(
        "SELECT DISTINCT t.tag FROM topics t JOIN paragraphs p ON t.paragraph_id = p.id WHERE p.podcast_id = ?", (a,)
    ).fetchall()
    topics_b = db.execute(
        "SELECT DISTINCT t.tag FROM topics t JOIN paragraphs p ON t.paragraph_id = p.id WHERE p.podcast_id = ?", (b,)
    ).fetchall()
    set_a = {r["tag"] for r in topics_a}
    set_b = {r["tag"] for r in topics_b}
    shared = sorted(set_a & set_b)
    only_a = sorted(set_a - set_b)
    only_b = sorted(set_b - set_a)
    # Cross-podcast semantic links count
    cross_links = 0
    top_links = []
    try:
        cross_links = db.execute(
            "SELECT COUNT(*) as c FROM semantic_links WHERE "
            "(podcast_id = ? AND target_podcast = ?) OR (podcast_id = ? AND target_podcast = ?)",
            (a, b, b, a)
        ).fetchone()["c"]
        top_links = db.execute(
            "SELECT sl.*, p1.text as source_text, p2.text as target_text, "
            "e1.title as source_title, e2.title as target_title "
            "FROM semantic_links sl "
            "JOIN paragraphs p1 ON sl.podcast_id = p1.podcast_id AND sl.source_episode = p1.episode_id AND sl.source_idx = p1.idx "
            "JOIN paragraphs p2 ON sl.target_podcast = p2.podcast_id AND sl.target_episode = p2.episode_id AND sl.target_idx = p2.idx "
            "JOIN episodes e1 ON sl.podcast_id = e1.podcast_id AND sl.source_episode = e1.id "
            "JOIN episodes e2 ON sl.target_podcast = e2.podcast_id AND sl.target_episode = e2.id "
            "WHERE (sl.podcast_id = ? AND sl.target_podcast = ?) OR (sl.podcast_id = ? AND sl.target_podcast = ?) "
            "ORDER BY sl.score DESC LIMIT 20",
            (a, b, b, a)
        ).fetchall()
    except Exception:
        pass  # semantic_links table may not exist yet
    db.close()
    return {
        "stats": stats,
        "shared_topics": shared,
        "only_in": {a: only_a, b: only_b},
        "cross_links_count": cross_links,
        "top_cross_links": [{
            "source_podcast": r["podcast_id"], "source_episode": r["source_episode"],
            "source_text": r["source_text"][:150], "source_title": r["source_title"],
            "target_podcast": r["target_podcast"], "target_episode": r["target_episode"],
            "target_text": r["target_text"][:150], "target_title": r["target_title"],
            "score": r["score"]
        } for r in top_links]
    }
@app.get("/api/semantic-search")
 def semantic_search(q: str = Query(..., min_length=3), podcast_id: Optional[str] = None, limit: int = 20):
    """Semantic search using query embedding."""
--- a/scripts/import_words.py
+++ b/scripts/import_words.py
@ -0,0 +1,95 @@
 #!/usr/bin/env python3
 """Importiert Wort-Level-Timestamps in die SQLite-Datenbank.
 Liest *.words.json-Dateien und schreibt in die Tabelle `words`.
 Nutzung:
    python3 import_words.py <podcast_id> <words-json-verzeichnis> [db-pfad]
 Beispiel:
    python3 import_words.py neu-denken ../data/neu-denken/words/ ../data/db.sqlite
 """
 import json
 import os
 import sys
 import sqlite3
 from pathlib import Path
 def init_words_table(db):
    """Erstelle words-Tabelle falls nicht vorhanden."""
    db.executescript("""
    CREATE TABLE IF NOT EXISTS words (
        id INTEGER PRIMARY KEY AUTOINCREMENT,
        podcast_id TEXT NOT NULL,
        episode_id TEXT NOT NULL,
        segment_idx INTEGER NOT NULL,
        word_idx INTEGER NOT NULL,
        word TEXT NOT NULL,
        start_time REAL NOT NULL,
        end_time REAL NOT NULL,
        UNIQUE(podcast_id, episode_id, segment_idx, word_idx)
    );
    CREATE INDEX IF NOT EXISTS idx_words_episode ON words(podcast_id, episode_id);
    CREATE INDEX IF NOT EXISTS idx_words_time ON words(podcast_id, episode_id, start_time);
    """)
 def import_words_file(db, podcast_id: str, words_file: Path):
    """Importiere eine *.words.json-Datei."""
    data = json.loads(words_file.read_text())
    episode_name = data["episode"]
    # Episode-ID aus Dateinamen: S1E1-Wachstum → S1E1
    episode_id = episode_name.split("-")[0]
    # Alte Einträge löschen
    db.execute("DELETE FROM words WHERE podcast_id = ? AND episode_id = ?", (podcast_id, episode_id))
    count = 0
    for seg_idx, segment in enumerate(data.get("segments", [])):
        for word_idx, w in enumerate(segment.get("words", [])):
            db.execute(
                "INSERT INTO words (podcast_id, episode_id, segment_idx, word_idx, word, start_time, end_time) "
                "VALUES (?, ?, ?, ?, ?, ?, ?)",
                (podcast_id, episode_id, seg_idx, word_idx, w["word"], w["start"], w["end"])
            )
            count += 1
    return count
 def main():
    if len(sys.argv) < 3:
        print(f"Nutzung: {sys.argv[0]} <podcast_id> <words-verzeichnis> [db-pfad]")
        sys.exit(1)
    podcast_id = sys.argv[1]
    words_dir = Path(sys.argv[2])
    db_path = sys.argv[3] if len(sys.argv) > 3 else os.environ.get("DB_PATH", "data/db.sqlite")
    db = sqlite3.connect(db_path)
    init_words_table(db)
    files = sorted(words_dir.glob("*.words.json"))
    if not files:
        print(f"Keine *.words.json-Dateien in {words_dir} gefunden.")
        sys.exit(1)
    print(f"Importiere {len(files)} Dateien für Podcast '{podcast_id}'")
    total_words = 0
    for f in files:
        count = import_words_file(db, podcast_id, f)
        print(f"  {f.stem}: {count} Wörter")
        total_words += count
    db.commit()
    db.close()
    print(f"Fertig: {total_words} Wörter importiert.")
 if __name__ == "__main__":
    main()
--- a/scripts/transcribe_words.py
+++ b/scripts/transcribe_words.py
@ -0,0 +1,138 @@
 #!/usr/bin/env python3
 """Batch-Transkription mit wortgenauen Timestamps via mlx-whisper.
 Erzeugt pro Episode eine JSON-Datei mit Wort-Level-Timing.
 Läuft auf Apple Silicon (mlx-metal).
 Nutzung:
    python3 transcribe_words.py /pfad/zu/audio/ /pfad/zu/output/
    python3 transcribe_words.py /pfad/zu/audio/S1E1-Wachstum.m4a  # einzelne Datei
 Modell: whisper-large-v3-turbo (schnell + genau, ~1.5 GB VRAM)
 """
 import json
 import os
 import sys
 import time
 from pathlib import Path
 # ── Config ──
 MODEL = "mlx-community/whisper-large-v3-turbo"
 LANGUAGE = "de"
 AUDIO_EXTENSIONS = {".m4a", ".mp3", ".wav", ".flac", ".ogg", ".opus"}
 def transcribe_episode(audio_path: str, output_dir: str) -> dict:
    """Transkribiere eine Episode mit Wort-Timestamps."""
    import mlx_whisper
    name = Path(audio_path).stem
    output_file = Path(output_dir) / f"{name}.words.json"
    # Skip wenn bereits vorhanden
    if output_file.exists():
        print(f"  ⏭ {name} — bereits vorhanden, überspringe")
        return json.loads(output_file.read_text())
    print(f"  ▶ {name} — transkribiere…")
    t0 = time.time()
    result = mlx_whisper.transcribe(
        audio_path,
        path_or_hf_repo=MODEL,
        language=LANGUAGE,
        word_timestamps=True,
        verbose=False,
        condition_on_previous_text=True,
        initial_prompt="NEU DENKEN Podcast mit Maja Göpel. Themen: Wirtschaft, Demokratie, Sicherheit, Freiheit.",
    )
    elapsed = time.time() - t0
    # Extrahiere Wörter aus Segmenten
    words = []
    for segment in result.get("segments", []):
        for w in segment.get("words", []):
            words.append({
                "word": w["word"].strip(),
                "start": round(w["start"], 3),
                "end": round(w["end"], 3),
            })
    # Auch Segment-Level behalten (für Absatz-Mapping)
    segments = []
    for seg in result.get("segments", []):
        segments.append({
            "start": round(seg["start"], 3),
            "end": round(seg["end"], 3),
            "text": seg["text"].strip(),
            "words": [{
                "word": w["word"].strip(),
                "start": round(w["start"], 3),
                "end": round(w["end"], 3),
            } for w in seg.get("words", [])],
        })
    output = {
        "episode": name,
        "model": MODEL,
        "language": LANGUAGE,
        "duration_seconds": round(elapsed, 1),
        "word_count": len(words),
        "segment_count": len(segments),
        "segments": segments,
    }
    output_file.write_text(json.dumps(output, ensure_ascii=False, indent=2))
    print(f"  ✓ {name} — {len(words)} Wörter, {len(segments)} Segmente, {elapsed:.0f}s")
    return output
 def main():
    if len(sys.argv) < 2:
        print(f"Nutzung: {sys.argv[0]} <audio-pfad-oder-verzeichnis> [output-verzeichnis]")
        sys.exit(1)
    input_path = Path(sys.argv[1])
    output_dir = Path(sys.argv[2]) if len(sys.argv) > 2 else input_path if input_path.is_dir() else input_path.parent
    output_dir.mkdir(parents=True, exist_ok=True)
    # Einzelne Datei oder Verzeichnis?
    if input_path.is_file():
        files = [input_path]
    elif input_path.is_dir():
        files = sorted([f for f in input_path.iterdir() if f.suffix.lower() in AUDIO_EXTENSIONS])
    else:
        print(f"Fehler: {input_path} existiert nicht.")
        sys.exit(1)
    if not files:
        print("Keine Audio-Dateien gefunden.")
        sys.exit(1)
    print(f"Transkribiere {len(files)} Dateien → {output_dir}/")
    print(f"Modell: {MODEL}")
    print()
    total_t0 = time.time()
    results = []
    for i, f in enumerate(files, 1):
        print(f"[{i}/{len(files)}] {f.name}")
        try:
            result = transcribe_episode(str(f), str(output_dir))
            results.append(result)
        except Exception as e:
            print(f"  ✗ FEHLER: {e}")
    total_elapsed = time.time() - total_t0
    total_words = sum(r.get("word_count", 0) for r in results)
    print()
    print(f"Fertig: {len(results)}/{len(files)} Episoden, {total_words} Wörter, {total_elapsed:.0f}s gesamt")
 if __name__ == "__main__":
    main()
--- a/webapp/icon-192.png
+++ b/webapp/icon-192.png
--- a/webapp/icon-512.png
+++ b/webapp/icon-512.png
--- a/webapp/index.html
+++ b/webapp/index.html
@ -3,6 +3,10 @@
 <head>
 <meta charset="UTF-8">
 <meta name="viewport" content="width=device-width, initial-scale=1.0">
 <meta name="theme-color" content="#0f1117">
 <meta name="apple-mobile-web-app-capable" content="yes">
 <meta name="apple-mobile-web-app-status-bar-style" content="black-translucent">
 <link rel="manifest" href="manifest.json">
 <title>Podcast Mindmap</title>
 <style>
  :root {
@ -273,8 +277,63 @@
    transition: all 0.2s;
  }
  .podcast-card:hover { border-color: var(--accent); transform: translateY(-2px); }
  .podcast-card.selected { border-color: var(--accent); background: var(--accent)11; }
  .podcast-card h3 { font-size: 15px; margin-bottom: 4px; }
  .podcast-card p { font-size: 12px; color: var(--text-muted); }
  .podcast-card .pc-stats { font-size: 11px; color: var(--text-muted); margin-top: 6px; }
  /* ── Compare View ── */
  .compare-section { margin-top: 16px; }
  .compare-section h3 { font-size: 13px; color: var(--text); margin-bottom: 8px; }
  .compare-stats { display: flex; gap: 12px; justify-content: center; margin-bottom: 16px; }
  .compare-stat-card {
    background: var(--surface2); border-radius: 8px; padding: 12px 16px;
    text-align: center; flex: 1; max-width: 200px;
  }
  .compare-stat-card .stat-val { font-size: 20px; font-weight: 700; color: var(--accent); }
  .compare-stat-card .stat-label { font-size: 10px; color: var(--text-muted); }
  .shared-topics { display: flex; flex-wrap: wrap; gap: 4px; margin: 8px 0; }
  .shared-topic {
    background: var(--accent)22; color: var(--accent); border: 1px solid var(--accent)44;
    padding: 2px 8px; border-radius: 10px; font-size: 10px; cursor: pointer;
  }
  .shared-topic:hover { background: var(--accent)44; }
  .cross-link-card {
    background: var(--surface2); border-radius: 8px; padding: 10px;
    margin-bottom: 6px; border-left: 3px solid #2a9d8f;
  }
  .cross-link-card .cl-source, .cross-link-card .cl-target {
    font-size: 11px; line-height: 1.4;
  }
  .cross-link-card .cl-label { font-size: 9px; color: var(--text-muted); text-transform: uppercase; }
  .cross-link-card .cl-arrow { text-align: center; color: #2a9d8f; font-size: 10px; margin: 4px 0; }
  .cross-link-card .cl-score { font-size: 9px; color: #2a9d8f; float: right; }
  .compare-actions { display: flex; gap: 8px; justify-content: center; margin: 12px 0; }
  .compare-btn {
    background: var(--surface2); border: 1px solid var(--border); color: var(--text-muted);
    padding: 6px 14px; border-radius: 6px; font-size: 11px; cursor: pointer; transition: all 0.2s;
  }
  .compare-btn:hover { border-color: var(--accent); color: var(--text); }
  .compare-btn.active { background: var(--accent); color: var(--bg); border-color: var(--accent); }
  /* ── Cross-podcast toggle ── */
  .cross-toggle {
    display: flex; align-items: center; gap: 6px; font-size: 11px; color: var(--text-muted);
  }
  .cross-toggle input[type="checkbox"] { accent-color: var(--accent); }
  /* ── Semantic link in transcript ── */
  .semantic-link-card {
    background: #2a9d8f11; border: 1px solid #2a9d8f33; border-radius: 6px;
    padding: 6px 10px; margin: 4px 0; cursor: pointer; transition: background 0.15s;
  }
  .semantic-link-card:hover { background: #2a9d8f22; }
  .semantic-link-card .sl-podcast { font-size: 9px; color: #2a9d8f; text-transform: uppercase; }
  .semantic-link-card .sl-episode { font-size: 10px; color: var(--accent); font-weight: 600; }
  .semantic-link-card .sl-text { font-size: 11px; color: var(--text-muted); font-style: italic; }
  /* ── Semantic results ── */
  .semantic-badge {
@ -301,6 +360,9 @@
  <header>
    <h1><span id="app-title">Podcast</span> Mindmap</h1>
    <input type="search" class="search-box" id="search-input" placeholder="Transkripte durchsuchen…">
    <label class="cross-toggle" id="cross-toggle" style="display:none" title="Suche über alle Podcasts">
      <input type="checkbox" id="cross-search-cb"> Alle Podcasts
    </label>
    <div class="view-tabs">
      <button class="view-tab active" id="tab-mindmap" onclick="switchView('mindmap')">Mindmap</button>
      <button class="view-tab" id="tab-timeline" onclick="switchView('timeline')">Timeline</button>
@ -590,10 +652,13 @@ const Search = {
  async run(query) {
    if (query.length < 3) { this.clear(); return; }
    const crossSearch = document.getElementById('cross-search-cb')?.checked;
    const pidParam = crossSearch ? '' : (CURRENT_PODCAST ? `&podcast_id=${CURRENT_PODCAST}` : '');
    // Try semantic search via API first
-    if (CURRENT_PODCAST) {
+    if (CURRENT_PODCAST || crossSearch) {
      try {
-        const resp = await fetch(`${API_BASE}/api/semantic-search?q=${encodeURIComponent(query)}&podcast_id=${CURRENT_PODCAST}`);
+        const resp = await fetch(`${API_BASE}/api/semantic-search?q=${encodeURIComponent(query)}${pidParam}`);
        if (resp.ok) {
          const apiResults = await resp.json();
          if (apiResults.length > 0) {
@ -604,7 +669,7 @@ const Search = {
      } catch (e) {}
      // Fallback to text search via API
      try {
-        const resp = await fetch(`${API_BASE}/api/search?q=${encodeURIComponent(query)}&podcast_id=${CURRENT_PODCAST}`);
+        const resp = await fetch(`${API_BASE}/api/search?q=${encodeURIComponent(query)}${pidParam}`);
        if (resp.ok) {
          const apiResults = await resp.json();
          if (apiResults.length > 0) {
@ -790,42 +855,193 @@ async function selectPodcast(podcastId) {
    const resp = await fetch(`${API_BASE}/api/podcasts/${podcastId}`);
    DATA = await resp.json();
    CURRENT_PODCAST = podcastId;
-    // Clear existing graph if switching
+    // Clear existing graph + timeline if switching
    document.getElementById('svg').innerHTML = '';
    document.getElementById('staffel-filters').innerHTML = '';
    const tl = document.getElementById('timeline-container');
    if (tl) { tl.remove(); timelineBuilt = false; }
    // Reset to mindmap view
    document.getElementById('mindmap').style.display = '';
    document.querySelectorAll('.view-tab').forEach(t => t.classList.remove('active'));
    document.getElementById('tab-mindmap')?.classList.add('active');
    TRANSCRIPTS = null;
    init();
  } catch (e) {
    console.error('Failed to load podcast:', e);
  }
 }
 let ALL_PODCASTS = [];
 function showPodcastSelector(podcasts) {
  ALL_PODCASTS = podcasts;
  const panel = document.getElementById('panel');
  const mindmap = document.getElementById('mindmap');
-  let html = '<div class="podcast-selector">';
+  // Show cross-search toggle if multiple podcasts
  if (podcasts.length > 1) {
    document.getElementById('cross-toggle').style.display = '';
  }
  let html = '<div class="welcome"><h2>Podcast Mindmap</h2><p>Wähle einen Podcast oder vergleiche zwei.</p></div>';
  html += '<div class="podcast-selector" id="podcast-selector">';
  podcasts.forEach(p => {
-    html += `<div class="podcast-card" onclick="selectPodcast('${p.id}')">`;
+    html += `<div class="podcast-card" id="pc-${p.id}" onclick="selectPodcast('${p.id}')">`;
    html += `<h3>${escHtml(p.name)}</h3>`;
    html += `<p>${escHtml(p.description || '')}</p>`;
    html += `</div>`;
  });
  html += '</div>';
  if (podcasts.length > 1) {
    html += '<div class="compare-actions">';
    html += '<button class="compare-btn" onclick="startCompare()">Podcasts vergleichen</button>';
    html += '</div>';
  }
  html += '<div id="compare-result"></div>';
  panel.innerHTML = html;
-  // Also set welcome
+  document.getElementById('app-title').textContent = 'Podcast';
  document.getElementById('app-title').textContent = 'Podcast Mindmap';
  document.title = 'Podcast Mindmap';
 }
 // ── #8: Compare Podcasts ──
 let compareMode = false;
 let compareSelection = [];
 function startCompare() {
  if (ALL_PODCASTS.length < 2) return;
  compareMode = true;
  compareSelection = [];
  document.querySelectorAll('.podcast-card').forEach(c => c.classList.remove('selected'));
  const result = document.getElementById('compare-result');
  if (result) result.innerHTML = '<p class="subtitle" style="text-align:center">Wähle zwei Podcasts zum Vergleichen.</p>';
  // Override click handlers temporarily
  ALL_PODCASTS.forEach(p => {
    const card = document.getElementById(`pc-${p.id}`);
    if (card) card.onclick = () => toggleCompareSelect(p.id);
  });
 }
 function toggleCompareSelect(id) {
  const card = document.getElementById(`pc-${id}`);
  const idx = compareSelection.indexOf(id);
  if (idx >= 0) {
    compareSelection.splice(idx, 1);
    card.classList.remove('selected');
  } else {
    if (compareSelection.length >= 2) return;
    compareSelection.push(id);
    card.classList.add('selected');
  }
  if (compareSelection.length === 2) {
    runCompare(compareSelection[0], compareSelection[1]);
  }
 }
 async function runCompare(a, b) {
  const result = document.getElementById('compare-result');
  if (!result) return;
  result.innerHTML = '<p class="subtitle" style="text-align:center">Vergleiche…</p>';
  try {
    const resp = await fetch(`${API_BASE}/api/compare?a=${a}&b=${b}`);
    if (!resp.ok) throw new Error('API error');
    const data = await resp.json();
    showCompareResult(data, a, b);
  } catch (e) {
    result.innerHTML = '<p class="subtitle" style="text-align:center">Vergleich nicht verfügbar.</p>';
  }
 }
 function showCompareResult(data, a, b) {
  const result = document.getElementById('compare-result');
  const sa = data.stats[a], sb = data.stats[b];
  let html = '<div class="compare-section">';
  html += '<h3 style="text-align:center">Vergleich</h3>';
  // Stats
  html += '<div class="compare-stats">';
  html += `<div class="compare-stat-card"><div class="stat-val">${sa.episodes + sb.episodes}</div><div class="stat-label">Episoden gesamt</div></div>`;
  html += `<div class="compare-stat-card"><div class="stat-val">${data.shared_topics.length}</div><div class="stat-label">Gemeinsame Themen</div></div>`;
  html += `<div class="compare-stat-card"><div class="stat-val">${data.cross_links_count}</div><div class="stat-label">Semantische Querverbindungen</div></div>`;
  html += '</div>';
  // Shared topics
  if (data.shared_topics.length > 0) {
    html += '<h3>Gemeinsame Themen</h3>';
    html += '<div class="shared-topics">';
    data.shared_topics.forEach(t => {
      html += `<span class="shared-topic" onclick="searchTopic('${escHtml(t)}')">${escHtml(t.replace(/_/g, ' '))}</span>`;
    });
    html += '</div>';
  }
  // Top cross-links
  if (data.top_cross_links.length > 0) {
    html += '<h3 style="margin-top:12px">Stärkste Querverbindungen</h3>';
    data.top_cross_links.slice(0, 10).forEach(link => {
      html += '<div class="cross-link-card">';
      html += `<div class="cl-score">${(link.score * 100).toFixed(0)}%</div>`;
      html += `<div class="cl-label">${escHtml(link.source_podcast)}</div>`;
      html += `<div class="cl-source"><strong>${escHtml(link.source_title)}</strong>: ${escHtml(link.source_text)}</div>`;
      html += '<div class="cl-arrow">↕</div>';
      html += `<div class="cl-label">${escHtml(link.target_podcast)}</div>`;
      html += `<div class="cl-target"><strong>${escHtml(link.target_title)}</strong>: ${escHtml(link.target_text)}</div>`;
      html += '</div>';
    });
  }
  // Back buttons
  html += '<div class="compare-actions" style="margin-top:16px">';
  html += `<button class="compare-btn" onclick="selectPodcast('${a}')">${escHtml(sa.name)} öffnen</button>`;
  html += `<button class="compare-btn" onclick="selectPodcast('${b}')">${escHtml(sb.name)} öffnen</button>`;
  html += '</div>';
  html += '</div>';
  result.innerHTML = html;
  // Reset compare mode
  compareMode = false;
  ALL_PODCASTS.forEach(p => {
    const card = document.getElementById(`pc-${p.id}`);
    if (card) card.onclick = () => selectPodcast(p.id);
  });
 }
 function searchTopic(topic) {
  const input = document.getElementById('search-input');
  input.value = topic.replace(/_/g, ' ');
  // Enable cross-podcast search for topic search
  document.getElementById('cross-search-cb').checked = true;
  Search.run(input.value);
 }
 // Back to podcast list
 function showPodcastList() {
  CURRENT_PODCAST = null;
  document.getElementById('svg').innerHTML = '';
  document.getElementById('staffel-filters').innerHTML = '';
  showPodcastSelector(ALL_PODCASTS);
 }
 loadApp();
 function init() {
  const name = DATA.name || 'Podcast';
  document.title = name + ' — Mindmap';
-  document.getElementById('app-title').textContent = name;
+  document.getElementById('app-title').innerHTML = ALL_PODCASTS.length > 1
    ? `<span style="cursor:pointer" onclick="showPodcastList()" title="Zurück zur Übersicht">←</span> <span>${escHtml(name)}</span>`
    : `<span>${escHtml(name)}</span>`;
  document.getElementById('welcome-panel').innerHTML = `
-    <h2>${name}</h2>
+    <h2>${escHtml(name)}</h2>
-    <p>${DATA.description || ''}<br>
+    <p>${escHtml(DATA.description || '')}<br>
    ${DATA.episodes.length} Folgen, ${DATA.staffeln.length} Staffeln, ${DATA.quotes.length} Zitate</p>
    <p style="margin-top:16px">Klicke auf einen Themenknoten oder eine Episode.</p>`;
@ -1172,7 +1388,7 @@ function buildTopicTags(episodeKey, paraIdx) {
    '</div>';
 }
-// Patch TranscriptView.show to include backlinks
+// Patch TranscriptView.show to include backlinks + semantic cross-links
 const _origTranscriptShow = TranscriptView.show.bind(TranscriptView);
 TranscriptView.show = async function(episodeId, seekTime) {
  await loadTopics();
@ -1180,19 +1396,95 @@ TranscriptView.show = async function(episodeId, seekTime) {
  // Find the episode key in TOPICS
  const epKey = Object.keys(TOPICS.tagged_paragraphs || {}).find(k => k.startsWith(episodeId));
  if (!epKey) return;
-  // Add topic tags and backlinks to each paragraph
+  // Add topic tags, backlinks, and semantic cross-links to each paragraph
-  document.querySelectorAll('.transcript-para').forEach(el => {
+  const paraEls = document.querySelectorAll('.transcript-para');
  for (const el of paraEls) {
    const idx = parseInt(el.dataset.idx);
-    const tags = buildTopicTags(epKey, idx);
+
-    const links = buildBacklinks(epKey, idx);
+    // Topic tags + keyword backlinks
-    if (tags || links) {
+    if (epKey) {
-      el.insertAdjacentHTML('beforeend', tags + links);
+      const tags = buildTopicTags(epKey, idx);
      const links = buildBacklinks(epKey, idx);
      if (tags || links) {
        el.insertAdjacentHTML('beforeend', tags + links);
      }
    }
-  });
+
    // #10: Semantic cross-podcast links (lazy-load on expand)
    if (CURRENT_PODCAST) {
      const expandBtn = document.createElement('span');
      expandBtn.className = 'backlink';
      expandBtn.style.fontSize = '10px';
      expandBtn.style.color = '#2a9d8f';
      expandBtn.textContent = '↔ Verwandte Stellen…';
      expandBtn.onclick = (e) => { e.stopPropagation(); loadSemanticLinks(expandBtn, episodeId, idx); };
      el.appendChild(expandBtn);
    }
  }
 };
 // ============================================================
 // #10: Cross-Podcast Semantic Links
 // ============================================================
 async function loadSemanticLinks(btn, episodeId, paraIdx) {
  btn.textContent = 'Lade…';
  btn.onclick = null;
  try {
    // Try precomputed first, then live
    let results = [];
    try {
      const resp = await fetch(`${API_BASE}/api/similar-precomputed/${CURRENT_PODCAST}/${episodeId}/${paraIdx}?limit=5`);
      if (resp.ok) results = await resp.json();
    } catch (e) {}
    if (results.length === 0) {
      const resp = await fetch(`${API_BASE}/api/similar/${CURRENT_PODCAST}/${episodeId}/${paraIdx}?limit=5&cross_podcast=true`);
      if (resp.ok) results = await resp.json();
    }
    if (results.length === 0) {
      btn.textContent = 'Keine verwandten Stellen gefunden.';
      btn.style.cursor = 'default';
      return;
    }
    // Replace button with results
    const container = document.createElement('div');
    container.className = 'backlinks';
    container.innerHTML = '<div class="backlinks-title">Semantisch verwandte Stellen</div>';
    results.forEach(r => {
      const isCross = r.podcast_id !== CURRENT_PODCAST;
      const card = document.createElement('div');
      card.className = 'semantic-link-card';
      card.onclick = () => {
        if (isCross) {
          // Switch podcast and navigate
          selectPodcast(r.podcast_id).then(() => {
            TranscriptView.show(r.episode_id, r.start_time);
          });
        } else {
          TranscriptView.show(r.episode_id, r.start_time);
        }
      };
      let inner = '';
      if (isCross) inner += `<div class="sl-podcast">${escHtml(r.podcast_id)}</div>`;
      inner += `<div class="sl-episode">${escHtml(r.episode_id)}: ${escHtml(r.episode_title || '')} — ${escHtml(r.guest || '')}`;
      inner += ` <span class="semantic-badge">${(r.score * 100).toFixed(0)}%</span></div>`;
      inner += `<div class="sl-text">"${escHtml(r.text_preview || '')}"</div>`;
      card.innerHTML = inner;
      container.appendChild(card);
    });
    btn.replaceWith(container);
  } catch (e) {
    btn.textContent = 'Fehler beim Laden.';
  }
 }
 // ============================================================
 // #6: Soundbite Export
 // ============================================================
@ -1387,6 +1679,13 @@ function buildTimeline() {
  timelineBuilt = true;
 }
 // ============================================================
 // #9: PWA — Service Worker Registration
 // ============================================================
 if ('serviceWorker' in navigator) {
  navigator.serviceWorker.register('sw.js').catch(() => {});
 }
 function showQuoteInPanel(quoteId) {
  const q = DATA.quotes.find(q => q.id === quoteId);
  if (!q) return;
--- a/webapp/manifest.json
+++ b/webapp/manifest.json
@ -0,0 +1,22 @@
 {
  "name": "Podcast Mindmap",
  "short_name": "Podcast Map",
  "description": "Interaktive Podcast-Mindmaps mit Audio-Playback und semantischer Suche",
  "start_url": "/",
  "display": "standalone",
  "background_color": "#0f1117",
  "theme_color": "#0f1117",
  "orientation": "any",
  "icons": [
    {
      "src": "icon-192.png",
      "sizes": "192x192",
      "type": "image/png"
    },
    {
      "src": "icon-512.png",
      "sizes": "512x512",
      "type": "image/png"
    }
  ]
 }
--- a/webapp/sw.js
+++ b/webapp/sw.js
@ -0,0 +1,77 @@
 const CACHE_NAME = 'podcast-mindmap-v1';
 // Core assets to cache immediately
 const CORE_ASSETS = [
  '/',
  '/index.html',
  '/d3.v7.min.js',
  '/manifest.json'
 ];
 // Install: cache core assets
 self.addEventListener('install', event => {
  event.waitUntil(
    caches.open(CACHE_NAME)
      .then(cache => cache.addAll(CORE_ASSETS))
      .then(() => self.skipWaiting())
  );
 });
 // Activate: clean old caches
 self.addEventListener('activate', event => {
  event.waitUntil(
    caches.keys().then(keys =>
      Promise.all(keys.filter(k => k !== CACHE_NAME).map(k => caches.delete(k)))
    ).then(() => self.clients.claim())
  );
 });
 // Fetch: network-first for API, cache-first for static assets
 self.addEventListener('fetch', event => {
  const url = new URL(event.request.url);
  // Skip non-GET requests
  if (event.request.method !== 'GET') return;
  // API calls: network-first with no cache
  if (url.pathname.startsWith('/api/')) {
    event.respondWith(
      fetch(event.request).catch(() =>
        new Response(JSON.stringify({ error: 'offline' }), {
          headers: { 'Content-Type': 'application/json' }
        })
      )
    );
    return;
  }
  // Audio files: network-first, cache on success (optional offline playback)
  if (url.pathname.startsWith('/audio/')) {
    event.respondWith(
      fetch(event.request).then(resp => {
        // Only cache complete responses (not range requests)
        if (resp.status === 200) {
          const clone = resp.clone();
          caches.open(CACHE_NAME).then(cache => cache.put(event.request, clone));
        }
        return resp;
      }).catch(() => caches.match(event.request))
    );
    return;
  }
  // Static assets + JSON data: stale-while-revalidate
  event.respondWith(
    caches.match(event.request).then(cached => {
      const fetchPromise = fetch(event.request).then(resp => {
        if (resp.ok) {
          const clone = resp.clone();
          caches.open(CACHE_NAME).then(cache => cache.put(event.request, clone));
        }
        return resp;
      }).catch(() => cached);
      return cached || fetchPromise;
    })
  );
 });