#8 Multi-Podcast-Dashboard, #9 PWA, #10 Cross-Podcast-Links, #12 Wort-Timestamps

- Backend: /api/compare Endpoint für Podcast-Vergleich (Stats, gemeinsame Topics,
  Top-Querverbindungen), /api/.../words Endpoint für Wort-Timestamps
- Frontend: Podcast-Vergleichsansicht mit Statistiken und Cross-Links,
  Cross-Podcast-Suche-Toggle, semantische Links im Transkript (lazy-loaded),
  Podcast-Switcher mit Zurück-Navigation
- PWA: manifest.json, Service Worker (stale-while-revalidate für Assets,
  network-first für API, cache-on-success für Audio), Icons
- Scripts: transcribe_words.py (mlx-whisper Batch-Transkription mit Wort-Timestamps),
  import_words.py (Wort-Timestamps in DB importieren)
- Dockerfile: PWA-Assets in Container kopieren

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Dotty Dotter 2026-04-23 20:53:06 +02:00
parent cb5978132c
commit e678f75ee1
9 changed files with 751 additions and 20 deletions

View File

@ -11,6 +11,8 @@ COPY backend/ .
# Copy webapp as static files # Copy webapp as static files
COPY webapp/index.html webapp/d3.v7.min.js /static/ COPY webapp/index.html webapp/d3.v7.min.js /static/
COPY webapp/manifest.json webapp/sw.js /static/
COPY webapp/icon-192.png webapp/icon-512.png /static/
EXPOSE 8000 EXPOSE 8000

View File

@ -88,6 +88,32 @@ def get_transcript(podcast_id: str, episode_id: str):
return {"paragraphs": [{"start": p["start_time"], "end": p["end_time"], "text": p["text"]} for p in paras]} return {"paragraphs": [{"start": p["start_time"], "end": p["end_time"], "text": p["text"]} for p in paras]}
@app.get("/api/podcasts/{podcast_id}/transcript/{episode_id}/words")
def get_words(podcast_id: str, episode_id: str):
"""Get word-level timestamps for an episode."""
db = get_db()
# Check if words table exists
try:
words = db.execute(
"SELECT segment_idx, word_idx, word, start_time, end_time FROM words "
"WHERE podcast_id = ? AND episode_id = ? ORDER BY segment_idx, word_idx",
(podcast_id, episode_id)
).fetchall()
except Exception:
db.close()
return {"words": [], "available": False}
db.close()
if not words:
return {"words": [], "available": False}
return {
"available": True,
"words": [{"seg": w["segment_idx"], "idx": w["word_idx"],
"word": w["word"], "start": w["start_time"], "end": w["end_time"]} for w in words]
}
@app.get("/api/search") @app.get("/api/search")
def search(q: str = Query(..., min_length=2), podcast_id: Optional[str] = None, limit: int = 50): def search(q: str = Query(..., min_length=2), podcast_id: Optional[str] = None, limit: int = 50):
"""Full-text search across all transcripts.""" """Full-text search across all transcripts."""
@ -213,6 +239,78 @@ def get_precomputed_similar(podcast_id: str, episode_id: str, para_idx: int, lim
} for r in rows] } for r in rows]
@app.get("/api/compare")
def compare_podcasts(a: str = Query(...), b: str = Query(...)):
"""Compare two podcasts: shared topics, stats, cross-links."""
db = get_db()
# Basic stats
stats = {}
for pid in (a, b):
podcast = db.execute("SELECT * FROM podcasts WHERE id = ?", (pid,)).fetchone()
if not podcast:
raise HTTPException(404, f"Podcast '{pid}' not found")
ep_count = db.execute("SELECT COUNT(*) as c FROM episodes WHERE podcast_id = ?", (pid,)).fetchone()["c"]
q_count = db.execute("SELECT COUNT(*) as c FROM quotes WHERE podcast_id = ?", (pid,)).fetchone()["c"]
p_count = db.execute("SELECT COUNT(*) as c FROM paragraphs WHERE podcast_id = ?", (pid,)).fetchone()["c"]
stats[pid] = {"name": podcast["name"], "episodes": ep_count, "quotes": q_count, "paragraphs": p_count}
# Shared topics via topic tags
topics_a = db.execute(
"SELECT DISTINCT t.tag FROM topics t JOIN paragraphs p ON t.paragraph_id = p.id WHERE p.podcast_id = ?", (a,)
).fetchall()
topics_b = db.execute(
"SELECT DISTINCT t.tag FROM topics t JOIN paragraphs p ON t.paragraph_id = p.id WHERE p.podcast_id = ?", (b,)
).fetchall()
set_a = {r["tag"] for r in topics_a}
set_b = {r["tag"] for r in topics_b}
shared = sorted(set_a & set_b)
only_a = sorted(set_a - set_b)
only_b = sorted(set_b - set_a)
# Cross-podcast semantic links count
cross_links = 0
top_links = []
try:
cross_links = db.execute(
"SELECT COUNT(*) as c FROM semantic_links WHERE "
"(podcast_id = ? AND target_podcast = ?) OR (podcast_id = ? AND target_podcast = ?)",
(a, b, b, a)
).fetchone()["c"]
top_links = db.execute(
"SELECT sl.*, p1.text as source_text, p2.text as target_text, "
"e1.title as source_title, e2.title as target_title "
"FROM semantic_links sl "
"JOIN paragraphs p1 ON sl.podcast_id = p1.podcast_id AND sl.source_episode = p1.episode_id AND sl.source_idx = p1.idx "
"JOIN paragraphs p2 ON sl.target_podcast = p2.podcast_id AND sl.target_episode = p2.episode_id AND sl.target_idx = p2.idx "
"JOIN episodes e1 ON sl.podcast_id = e1.podcast_id AND sl.source_episode = e1.id "
"JOIN episodes e2 ON sl.target_podcast = e2.podcast_id AND sl.target_episode = e2.id "
"WHERE (sl.podcast_id = ? AND sl.target_podcast = ?) OR (sl.podcast_id = ? AND sl.target_podcast = ?) "
"ORDER BY sl.score DESC LIMIT 20",
(a, b, b, a)
).fetchall()
except Exception:
pass # semantic_links table may not exist yet
db.close()
return {
"stats": stats,
"shared_topics": shared,
"only_in": {a: only_a, b: only_b},
"cross_links_count": cross_links,
"top_cross_links": [{
"source_podcast": r["podcast_id"], "source_episode": r["source_episode"],
"source_text": r["source_text"][:150], "source_title": r["source_title"],
"target_podcast": r["target_podcast"], "target_episode": r["target_episode"],
"target_text": r["target_text"][:150], "target_title": r["target_title"],
"score": r["score"]
} for r in top_links]
}
@app.get("/api/semantic-search") @app.get("/api/semantic-search")
def semantic_search(q: str = Query(..., min_length=3), podcast_id: Optional[str] = None, limit: int = 20): def semantic_search(q: str = Query(..., min_length=3), podcast_id: Optional[str] = None, limit: int = 20):
"""Semantic search using query embedding.""" """Semantic search using query embedding."""

95
scripts/import_words.py Normal file
View File

@ -0,0 +1,95 @@
#!/usr/bin/env python3
"""Importiert Wort-Level-Timestamps in die SQLite-Datenbank.
Liest *.words.json-Dateien und schreibt in die Tabelle `words`.
Nutzung:
python3 import_words.py <podcast_id> <words-json-verzeichnis> [db-pfad]
Beispiel:
python3 import_words.py neu-denken ../data/neu-denken/words/ ../data/db.sqlite
"""
import json
import os
import sys
import sqlite3
from pathlib import Path
def init_words_table(db):
"""Erstelle words-Tabelle falls nicht vorhanden."""
db.executescript("""
CREATE TABLE IF NOT EXISTS words (
id INTEGER PRIMARY KEY AUTOINCREMENT,
podcast_id TEXT NOT NULL,
episode_id TEXT NOT NULL,
segment_idx INTEGER NOT NULL,
word_idx INTEGER NOT NULL,
word TEXT NOT NULL,
start_time REAL NOT NULL,
end_time REAL NOT NULL,
UNIQUE(podcast_id, episode_id, segment_idx, word_idx)
);
CREATE INDEX IF NOT EXISTS idx_words_episode ON words(podcast_id, episode_id);
CREATE INDEX IF NOT EXISTS idx_words_time ON words(podcast_id, episode_id, start_time);
""")
def import_words_file(db, podcast_id: str, words_file: Path):
"""Importiere eine *.words.json-Datei."""
data = json.loads(words_file.read_text())
episode_name = data["episode"]
# Episode-ID aus Dateinamen: S1E1-Wachstum → S1E1
episode_id = episode_name.split("-")[0]
# Alte Einträge löschen
db.execute("DELETE FROM words WHERE podcast_id = ? AND episode_id = ?", (podcast_id, episode_id))
count = 0
for seg_idx, segment in enumerate(data.get("segments", [])):
for word_idx, w in enumerate(segment.get("words", [])):
db.execute(
"INSERT INTO words (podcast_id, episode_id, segment_idx, word_idx, word, start_time, end_time) "
"VALUES (?, ?, ?, ?, ?, ?, ?)",
(podcast_id, episode_id, seg_idx, word_idx, w["word"], w["start"], w["end"])
)
count += 1
return count
def main():
if len(sys.argv) < 3:
print(f"Nutzung: {sys.argv[0]} <podcast_id> <words-verzeichnis> [db-pfad]")
sys.exit(1)
podcast_id = sys.argv[1]
words_dir = Path(sys.argv[2])
db_path = sys.argv[3] if len(sys.argv) > 3 else os.environ.get("DB_PATH", "data/db.sqlite")
db = sqlite3.connect(db_path)
init_words_table(db)
files = sorted(words_dir.glob("*.words.json"))
if not files:
print(f"Keine *.words.json-Dateien in {words_dir} gefunden.")
sys.exit(1)
print(f"Importiere {len(files)} Dateien für Podcast '{podcast_id}'")
total_words = 0
for f in files:
count = import_words_file(db, podcast_id, f)
print(f" {f.stem}: {count} Wörter")
total_words += count
db.commit()
db.close()
print(f"Fertig: {total_words} Wörter importiert.")
if __name__ == "__main__":
main()

138
scripts/transcribe_words.py Normal file
View File

@ -0,0 +1,138 @@
#!/usr/bin/env python3
"""Batch-Transkription mit wortgenauen Timestamps via mlx-whisper.
Erzeugt pro Episode eine JSON-Datei mit Wort-Level-Timing.
Läuft auf Apple Silicon (mlx-metal).
Nutzung:
python3 transcribe_words.py /pfad/zu/audio/ /pfad/zu/output/
python3 transcribe_words.py /pfad/zu/audio/S1E1-Wachstum.m4a # einzelne Datei
Modell: whisper-large-v3-turbo (schnell + genau, ~1.5 GB VRAM)
"""
import json
import os
import sys
import time
from pathlib import Path
# ── Config ──
MODEL = "mlx-community/whisper-large-v3-turbo"
LANGUAGE = "de"
AUDIO_EXTENSIONS = {".m4a", ".mp3", ".wav", ".flac", ".ogg", ".opus"}
def transcribe_episode(audio_path: str, output_dir: str) -> dict:
"""Transkribiere eine Episode mit Wort-Timestamps."""
import mlx_whisper
name = Path(audio_path).stem
output_file = Path(output_dir) / f"{name}.words.json"
# Skip wenn bereits vorhanden
if output_file.exists():
print(f"{name} — bereits vorhanden, überspringe")
return json.loads(output_file.read_text())
print(f"{name} — transkribiere…")
t0 = time.time()
result = mlx_whisper.transcribe(
audio_path,
path_or_hf_repo=MODEL,
language=LANGUAGE,
word_timestamps=True,
verbose=False,
condition_on_previous_text=True,
initial_prompt="NEU DENKEN Podcast mit Maja Göpel. Themen: Wirtschaft, Demokratie, Sicherheit, Freiheit.",
)
elapsed = time.time() - t0
# Extrahiere Wörter aus Segmenten
words = []
for segment in result.get("segments", []):
for w in segment.get("words", []):
words.append({
"word": w["word"].strip(),
"start": round(w["start"], 3),
"end": round(w["end"], 3),
})
# Auch Segment-Level behalten (für Absatz-Mapping)
segments = []
for seg in result.get("segments", []):
segments.append({
"start": round(seg["start"], 3),
"end": round(seg["end"], 3),
"text": seg["text"].strip(),
"words": [{
"word": w["word"].strip(),
"start": round(w["start"], 3),
"end": round(w["end"], 3),
} for w in seg.get("words", [])],
})
output = {
"episode": name,
"model": MODEL,
"language": LANGUAGE,
"duration_seconds": round(elapsed, 1),
"word_count": len(words),
"segment_count": len(segments),
"segments": segments,
}
output_file.write_text(json.dumps(output, ensure_ascii=False, indent=2))
print(f"{name}{len(words)} Wörter, {len(segments)} Segmente, {elapsed:.0f}s")
return output
def main():
if len(sys.argv) < 2:
print(f"Nutzung: {sys.argv[0]} <audio-pfad-oder-verzeichnis> [output-verzeichnis]")
sys.exit(1)
input_path = Path(sys.argv[1])
output_dir = Path(sys.argv[2]) if len(sys.argv) > 2 else input_path if input_path.is_dir() else input_path.parent
output_dir.mkdir(parents=True, exist_ok=True)
# Einzelne Datei oder Verzeichnis?
if input_path.is_file():
files = [input_path]
elif input_path.is_dir():
files = sorted([f for f in input_path.iterdir() if f.suffix.lower() in AUDIO_EXTENSIONS])
else:
print(f"Fehler: {input_path} existiert nicht.")
sys.exit(1)
if not files:
print("Keine Audio-Dateien gefunden.")
sys.exit(1)
print(f"Transkribiere {len(files)} Dateien → {output_dir}/")
print(f"Modell: {MODEL}")
print()
total_t0 = time.time()
results = []
for i, f in enumerate(files, 1):
print(f"[{i}/{len(files)}] {f.name}")
try:
result = transcribe_episode(str(f), str(output_dir))
results.append(result)
except Exception as e:
print(f" ✗ FEHLER: {e}")
total_elapsed = time.time() - total_t0
total_words = sum(r.get("word_count", 0) for r in results)
print()
print(f"Fertig: {len(results)}/{len(files)} Episoden, {total_words} Wörter, {total_elapsed:.0f}s gesamt")
if __name__ == "__main__":
main()

BIN
webapp/icon-192.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.3 KiB

BIN
webapp/icon-512.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.8 KiB

View File

@ -3,6 +3,10 @@
<head> <head>
<meta charset="UTF-8"> <meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0"> <meta name="viewport" content="width=device-width, initial-scale=1.0">
<meta name="theme-color" content="#0f1117">
<meta name="apple-mobile-web-app-capable" content="yes">
<meta name="apple-mobile-web-app-status-bar-style" content="black-translucent">
<link rel="manifest" href="manifest.json">
<title>Podcast Mindmap</title> <title>Podcast Mindmap</title>
<style> <style>
:root { :root {
@ -273,8 +277,63 @@
transition: all 0.2s; transition: all 0.2s;
} }
.podcast-card:hover { border-color: var(--accent); transform: translateY(-2px); } .podcast-card:hover { border-color: var(--accent); transform: translateY(-2px); }
.podcast-card.selected { border-color: var(--accent); background: var(--accent)11; }
.podcast-card h3 { font-size: 15px; margin-bottom: 4px; } .podcast-card h3 { font-size: 15px; margin-bottom: 4px; }
.podcast-card p { font-size: 12px; color: var(--text-muted); } .podcast-card p { font-size: 12px; color: var(--text-muted); }
.podcast-card .pc-stats { font-size: 11px; color: var(--text-muted); margin-top: 6px; }
/* ── Compare View ── */
.compare-section { margin-top: 16px; }
.compare-section h3 { font-size: 13px; color: var(--text); margin-bottom: 8px; }
.compare-stats { display: flex; gap: 12px; justify-content: center; margin-bottom: 16px; }
.compare-stat-card {
background: var(--surface2); border-radius: 8px; padding: 12px 16px;
text-align: center; flex: 1; max-width: 200px;
}
.compare-stat-card .stat-val { font-size: 20px; font-weight: 700; color: var(--accent); }
.compare-stat-card .stat-label { font-size: 10px; color: var(--text-muted); }
.shared-topics { display: flex; flex-wrap: wrap; gap: 4px; margin: 8px 0; }
.shared-topic {
background: var(--accent)22; color: var(--accent); border: 1px solid var(--accent)44;
padding: 2px 8px; border-radius: 10px; font-size: 10px; cursor: pointer;
}
.shared-topic:hover { background: var(--accent)44; }
.cross-link-card {
background: var(--surface2); border-radius: 8px; padding: 10px;
margin-bottom: 6px; border-left: 3px solid #2a9d8f;
}
.cross-link-card .cl-source, .cross-link-card .cl-target {
font-size: 11px; line-height: 1.4;
}
.cross-link-card .cl-label { font-size: 9px; color: var(--text-muted); text-transform: uppercase; }
.cross-link-card .cl-arrow { text-align: center; color: #2a9d8f; font-size: 10px; margin: 4px 0; }
.cross-link-card .cl-score { font-size: 9px; color: #2a9d8f; float: right; }
.compare-actions { display: flex; gap: 8px; justify-content: center; margin: 12px 0; }
.compare-btn {
background: var(--surface2); border: 1px solid var(--border); color: var(--text-muted);
padding: 6px 14px; border-radius: 6px; font-size: 11px; cursor: pointer; transition: all 0.2s;
}
.compare-btn:hover { border-color: var(--accent); color: var(--text); }
.compare-btn.active { background: var(--accent); color: var(--bg); border-color: var(--accent); }
/* ── Cross-podcast toggle ── */
.cross-toggle {
display: flex; align-items: center; gap: 6px; font-size: 11px; color: var(--text-muted);
}
.cross-toggle input[type="checkbox"] { accent-color: var(--accent); }
/* ── Semantic link in transcript ── */
.semantic-link-card {
background: #2a9d8f11; border: 1px solid #2a9d8f33; border-radius: 6px;
padding: 6px 10px; margin: 4px 0; cursor: pointer; transition: background 0.15s;
}
.semantic-link-card:hover { background: #2a9d8f22; }
.semantic-link-card .sl-podcast { font-size: 9px; color: #2a9d8f; text-transform: uppercase; }
.semantic-link-card .sl-episode { font-size: 10px; color: var(--accent); font-weight: 600; }
.semantic-link-card .sl-text { font-size: 11px; color: var(--text-muted); font-style: italic; }
/* ── Semantic results ── */ /* ── Semantic results ── */
.semantic-badge { .semantic-badge {
@ -301,6 +360,9 @@
<header> <header>
<h1><span id="app-title">Podcast</span> Mindmap</h1> <h1><span id="app-title">Podcast</span> Mindmap</h1>
<input type="search" class="search-box" id="search-input" placeholder="Transkripte durchsuchen…"> <input type="search" class="search-box" id="search-input" placeholder="Transkripte durchsuchen…">
<label class="cross-toggle" id="cross-toggle" style="display:none" title="Suche über alle Podcasts">
<input type="checkbox" id="cross-search-cb"> Alle Podcasts
</label>
<div class="view-tabs"> <div class="view-tabs">
<button class="view-tab active" id="tab-mindmap" onclick="switchView('mindmap')">Mindmap</button> <button class="view-tab active" id="tab-mindmap" onclick="switchView('mindmap')">Mindmap</button>
<button class="view-tab" id="tab-timeline" onclick="switchView('timeline')">Timeline</button> <button class="view-tab" id="tab-timeline" onclick="switchView('timeline')">Timeline</button>
@ -590,10 +652,13 @@ const Search = {
async run(query) { async run(query) {
if (query.length < 3) { this.clear(); return; } if (query.length < 3) { this.clear(); return; }
const crossSearch = document.getElementById('cross-search-cb')?.checked;
const pidParam = crossSearch ? '' : (CURRENT_PODCAST ? `&podcast_id=${CURRENT_PODCAST}` : '');
// Try semantic search via API first // Try semantic search via API first
if (CURRENT_PODCAST) { if (CURRENT_PODCAST || crossSearch) {
try { try {
const resp = await fetch(`${API_BASE}/api/semantic-search?q=${encodeURIComponent(query)}&podcast_id=${CURRENT_PODCAST}`); const resp = await fetch(`${API_BASE}/api/semantic-search?q=${encodeURIComponent(query)}${pidParam}`);
if (resp.ok) { if (resp.ok) {
const apiResults = await resp.json(); const apiResults = await resp.json();
if (apiResults.length > 0) { if (apiResults.length > 0) {
@ -604,7 +669,7 @@ const Search = {
} catch (e) {} } catch (e) {}
// Fallback to text search via API // Fallback to text search via API
try { try {
const resp = await fetch(`${API_BASE}/api/search?q=${encodeURIComponent(query)}&podcast_id=${CURRENT_PODCAST}`); const resp = await fetch(`${API_BASE}/api/search?q=${encodeURIComponent(query)}${pidParam}`);
if (resp.ok) { if (resp.ok) {
const apiResults = await resp.json(); const apiResults = await resp.json();
if (apiResults.length > 0) { if (apiResults.length > 0) {
@ -790,42 +855,193 @@ async function selectPodcast(podcastId) {
const resp = await fetch(`${API_BASE}/api/podcasts/${podcastId}`); const resp = await fetch(`${API_BASE}/api/podcasts/${podcastId}`);
DATA = await resp.json(); DATA = await resp.json();
CURRENT_PODCAST = podcastId; CURRENT_PODCAST = podcastId;
// Clear existing graph if switching // Clear existing graph + timeline if switching
document.getElementById('svg').innerHTML = ''; document.getElementById('svg').innerHTML = '';
document.getElementById('staffel-filters').innerHTML = '';
const tl = document.getElementById('timeline-container');
if (tl) { tl.remove(); timelineBuilt = false; }
// Reset to mindmap view
document.getElementById('mindmap').style.display = '';
document.querySelectorAll('.view-tab').forEach(t => t.classList.remove('active'));
document.getElementById('tab-mindmap')?.classList.add('active');
TRANSCRIPTS = null;
init(); init();
} catch (e) { } catch (e) {
console.error('Failed to load podcast:', e); console.error('Failed to load podcast:', e);
} }
} }
let ALL_PODCASTS = [];
function showPodcastSelector(podcasts) { function showPodcastSelector(podcasts) {
ALL_PODCASTS = podcasts;
const panel = document.getElementById('panel'); const panel = document.getElementById('panel');
const mindmap = document.getElementById('mindmap'); const mindmap = document.getElementById('mindmap');
let html = '<div class="podcast-selector">'; // Show cross-search toggle if multiple podcasts
if (podcasts.length > 1) {
document.getElementById('cross-toggle').style.display = '';
}
let html = '<div class="welcome"><h2>Podcast Mindmap</h2><p>Wähle einen Podcast oder vergleiche zwei.</p></div>';
html += '<div class="podcast-selector" id="podcast-selector">';
podcasts.forEach(p => { podcasts.forEach(p => {
html += `<div class="podcast-card" onclick="selectPodcast('${p.id}')">`; html += `<div class="podcast-card" id="pc-${p.id}" onclick="selectPodcast('${p.id}')">`;
html += `<h3>${escHtml(p.name)}</h3>`; html += `<h3>${escHtml(p.name)}</h3>`;
html += `<p>${escHtml(p.description || '')}</p>`; html += `<p>${escHtml(p.description || '')}</p>`;
html += `</div>`; html += `</div>`;
}); });
html += '</div>'; html += '</div>';
if (podcasts.length > 1) {
html += '<div class="compare-actions">';
html += '<button class="compare-btn" onclick="startCompare()">Podcasts vergleichen</button>';
html += '</div>';
}
html += '<div id="compare-result"></div>';
panel.innerHTML = html; panel.innerHTML = html;
// Also set welcome document.getElementById('app-title').textContent = 'Podcast';
document.getElementById('app-title').textContent = 'Podcast Mindmap';
document.title = 'Podcast Mindmap'; document.title = 'Podcast Mindmap';
} }
// ── #8: Compare Podcasts ──
let compareMode = false;
let compareSelection = [];
function startCompare() {
if (ALL_PODCASTS.length < 2) return;
compareMode = true;
compareSelection = [];
document.querySelectorAll('.podcast-card').forEach(c => c.classList.remove('selected'));
const result = document.getElementById('compare-result');
if (result) result.innerHTML = '<p class="subtitle" style="text-align:center">Wähle zwei Podcasts zum Vergleichen.</p>';
// Override click handlers temporarily
ALL_PODCASTS.forEach(p => {
const card = document.getElementById(`pc-${p.id}`);
if (card) card.onclick = () => toggleCompareSelect(p.id);
});
}
function toggleCompareSelect(id) {
const card = document.getElementById(`pc-${id}`);
const idx = compareSelection.indexOf(id);
if (idx >= 0) {
compareSelection.splice(idx, 1);
card.classList.remove('selected');
} else {
if (compareSelection.length >= 2) return;
compareSelection.push(id);
card.classList.add('selected');
}
if (compareSelection.length === 2) {
runCompare(compareSelection[0], compareSelection[1]);
}
}
async function runCompare(a, b) {
const result = document.getElementById('compare-result');
if (!result) return;
result.innerHTML = '<p class="subtitle" style="text-align:center">Vergleiche…</p>';
try {
const resp = await fetch(`${API_BASE}/api/compare?a=${a}&b=${b}`);
if (!resp.ok) throw new Error('API error');
const data = await resp.json();
showCompareResult(data, a, b);
} catch (e) {
result.innerHTML = '<p class="subtitle" style="text-align:center">Vergleich nicht verfügbar.</p>';
}
}
function showCompareResult(data, a, b) {
const result = document.getElementById('compare-result');
const sa = data.stats[a], sb = data.stats[b];
let html = '<div class="compare-section">';
html += '<h3 style="text-align:center">Vergleich</h3>';
// Stats
html += '<div class="compare-stats">';
html += `<div class="compare-stat-card"><div class="stat-val">${sa.episodes + sb.episodes}</div><div class="stat-label">Episoden gesamt</div></div>`;
html += `<div class="compare-stat-card"><div class="stat-val">${data.shared_topics.length}</div><div class="stat-label">Gemeinsame Themen</div></div>`;
html += `<div class="compare-stat-card"><div class="stat-val">${data.cross_links_count}</div><div class="stat-label">Semantische Querverbindungen</div></div>`;
html += '</div>';
// Shared topics
if (data.shared_topics.length > 0) {
html += '<h3>Gemeinsame Themen</h3>';
html += '<div class="shared-topics">';
data.shared_topics.forEach(t => {
html += `<span class="shared-topic" onclick="searchTopic('${escHtml(t)}')">${escHtml(t.replace(/_/g, ' '))}</span>`;
});
html += '</div>';
}
// Top cross-links
if (data.top_cross_links.length > 0) {
html += '<h3 style="margin-top:12px">Stärkste Querverbindungen</h3>';
data.top_cross_links.slice(0, 10).forEach(link => {
html += '<div class="cross-link-card">';
html += `<div class="cl-score">${(link.score * 100).toFixed(0)}%</div>`;
html += `<div class="cl-label">${escHtml(link.source_podcast)}</div>`;
html += `<div class="cl-source"><strong>${escHtml(link.source_title)}</strong>: ${escHtml(link.source_text)}</div>`;
html += '<div class="cl-arrow"></div>';
html += `<div class="cl-label">${escHtml(link.target_podcast)}</div>`;
html += `<div class="cl-target"><strong>${escHtml(link.target_title)}</strong>: ${escHtml(link.target_text)}</div>`;
html += '</div>';
});
}
// Back buttons
html += '<div class="compare-actions" style="margin-top:16px">';
html += `<button class="compare-btn" onclick="selectPodcast('${a}')">${escHtml(sa.name)} öffnen</button>`;
html += `<button class="compare-btn" onclick="selectPodcast('${b}')">${escHtml(sb.name)} öffnen</button>`;
html += '</div>';
html += '</div>';
result.innerHTML = html;
// Reset compare mode
compareMode = false;
ALL_PODCASTS.forEach(p => {
const card = document.getElementById(`pc-${p.id}`);
if (card) card.onclick = () => selectPodcast(p.id);
});
}
function searchTopic(topic) {
const input = document.getElementById('search-input');
input.value = topic.replace(/_/g, ' ');
// Enable cross-podcast search for topic search
document.getElementById('cross-search-cb').checked = true;
Search.run(input.value);
}
// Back to podcast list
function showPodcastList() {
CURRENT_PODCAST = null;
document.getElementById('svg').innerHTML = '';
document.getElementById('staffel-filters').innerHTML = '';
showPodcastSelector(ALL_PODCASTS);
}
loadApp(); loadApp();
function init() { function init() {
const name = DATA.name || 'Podcast'; const name = DATA.name || 'Podcast';
document.title = name + ' — Mindmap'; document.title = name + ' — Mindmap';
document.getElementById('app-title').textContent = name; document.getElementById('app-title').innerHTML = ALL_PODCASTS.length > 1
? `<span style="cursor:pointer" onclick="showPodcastList()" title="Zurück zur Übersicht"></span> <span>${escHtml(name)}</span>`
: `<span>${escHtml(name)}</span>`;
document.getElementById('welcome-panel').innerHTML = ` document.getElementById('welcome-panel').innerHTML = `
<h2>${name}</h2> <h2>${escHtml(name)}</h2>
<p>${DATA.description || ''}<br> <p>${escHtml(DATA.description || '')}<br>
${DATA.episodes.length} Folgen, ${DATA.staffeln.length} Staffeln, ${DATA.quotes.length} Zitate</p> ${DATA.episodes.length} Folgen, ${DATA.staffeln.length} Staffeln, ${DATA.quotes.length} Zitate</p>
<p style="margin-top:16px">Klicke auf einen Themenknoten oder eine Episode.</p>`; <p style="margin-top:16px">Klicke auf einen Themenknoten oder eine Episode.</p>`;
@ -1172,7 +1388,7 @@ function buildTopicTags(episodeKey, paraIdx) {
'</div>'; '</div>';
} }
// Patch TranscriptView.show to include backlinks // Patch TranscriptView.show to include backlinks + semantic cross-links
const _origTranscriptShow = TranscriptView.show.bind(TranscriptView); const _origTranscriptShow = TranscriptView.show.bind(TranscriptView);
TranscriptView.show = async function(episodeId, seekTime) { TranscriptView.show = async function(episodeId, seekTime) {
await loadTopics(); await loadTopics();
@ -1180,19 +1396,95 @@ TranscriptView.show = async function(episodeId, seekTime) {
// Find the episode key in TOPICS // Find the episode key in TOPICS
const epKey = Object.keys(TOPICS.tagged_paragraphs || {}).find(k => k.startsWith(episodeId)); const epKey = Object.keys(TOPICS.tagged_paragraphs || {}).find(k => k.startsWith(episodeId));
if (!epKey) return;
// Add topic tags and backlinks to each paragraph // Add topic tags, backlinks, and semantic cross-links to each paragraph
document.querySelectorAll('.transcript-para').forEach(el => { const paraEls = document.querySelectorAll('.transcript-para');
for (const el of paraEls) {
const idx = parseInt(el.dataset.idx); const idx = parseInt(el.dataset.idx);
const tags = buildTopicTags(epKey, idx);
const links = buildBacklinks(epKey, idx); // Topic tags + keyword backlinks
if (tags || links) { if (epKey) {
el.insertAdjacentHTML('beforeend', tags + links); const tags = buildTopicTags(epKey, idx);
const links = buildBacklinks(epKey, idx);
if (tags || links) {
el.insertAdjacentHTML('beforeend', tags + links);
}
} }
});
// #10: Semantic cross-podcast links (lazy-load on expand)
if (CURRENT_PODCAST) {
const expandBtn = document.createElement('span');
expandBtn.className = 'backlink';
expandBtn.style.fontSize = '10px';
expandBtn.style.color = '#2a9d8f';
expandBtn.textContent = '↔ Verwandte Stellen…';
expandBtn.onclick = (e) => { e.stopPropagation(); loadSemanticLinks(expandBtn, episodeId, idx); };
el.appendChild(expandBtn);
}
}
}; };
// ============================================================
// #10: Cross-Podcast Semantic Links
// ============================================================
async function loadSemanticLinks(btn, episodeId, paraIdx) {
btn.textContent = 'Lade…';
btn.onclick = null;
try {
// Try precomputed first, then live
let results = [];
try {
const resp = await fetch(`${API_BASE}/api/similar-precomputed/${CURRENT_PODCAST}/${episodeId}/${paraIdx}?limit=5`);
if (resp.ok) results = await resp.json();
} catch (e) {}
if (results.length === 0) {
const resp = await fetch(`${API_BASE}/api/similar/${CURRENT_PODCAST}/${episodeId}/${paraIdx}?limit=5&cross_podcast=true`);
if (resp.ok) results = await resp.json();
}
if (results.length === 0) {
btn.textContent = 'Keine verwandten Stellen gefunden.';
btn.style.cursor = 'default';
return;
}
// Replace button with results
const container = document.createElement('div');
container.className = 'backlinks';
container.innerHTML = '<div class="backlinks-title">Semantisch verwandte Stellen</div>';
results.forEach(r => {
const isCross = r.podcast_id !== CURRENT_PODCAST;
const card = document.createElement('div');
card.className = 'semantic-link-card';
card.onclick = () => {
if (isCross) {
// Switch podcast and navigate
selectPodcast(r.podcast_id).then(() => {
TranscriptView.show(r.episode_id, r.start_time);
});
} else {
TranscriptView.show(r.episode_id, r.start_time);
}
};
let inner = '';
if (isCross) inner += `<div class="sl-podcast">${escHtml(r.podcast_id)}</div>`;
inner += `<div class="sl-episode">${escHtml(r.episode_id)}: ${escHtml(r.episode_title || '')} — ${escHtml(r.guest || '')}`;
inner += ` <span class="semantic-badge">${(r.score * 100).toFixed(0)}%</span></div>`;
inner += `<div class="sl-text">"${escHtml(r.text_preview || '')}"</div>`;
card.innerHTML = inner;
container.appendChild(card);
});
btn.replaceWith(container);
} catch (e) {
btn.textContent = 'Fehler beim Laden.';
}
}
// ============================================================ // ============================================================
// #6: Soundbite Export // #6: Soundbite Export
// ============================================================ // ============================================================
@ -1387,6 +1679,13 @@ function buildTimeline() {
timelineBuilt = true; timelineBuilt = true;
} }
// ============================================================
// #9: PWA — Service Worker Registration
// ============================================================
if ('serviceWorker' in navigator) {
navigator.serviceWorker.register('sw.js').catch(() => {});
}
function showQuoteInPanel(quoteId) { function showQuoteInPanel(quoteId) {
const q = DATA.quotes.find(q => q.id === quoteId); const q = DATA.quotes.find(q => q.id === quoteId);
if (!q) return; if (!q) return;

22
webapp/manifest.json Normal file
View File

@ -0,0 +1,22 @@
{
"name": "Podcast Mindmap",
"short_name": "Podcast Map",
"description": "Interaktive Podcast-Mindmaps mit Audio-Playback und semantischer Suche",
"start_url": "/",
"display": "standalone",
"background_color": "#0f1117",
"theme_color": "#0f1117",
"orientation": "any",
"icons": [
{
"src": "icon-192.png",
"sizes": "192x192",
"type": "image/png"
},
{
"src": "icon-512.png",
"sizes": "512x512",
"type": "image/png"
}
]
}

77
webapp/sw.js Normal file
View File

@ -0,0 +1,77 @@
const CACHE_NAME = 'podcast-mindmap-v1';
// Core assets to cache immediately
const CORE_ASSETS = [
'/',
'/index.html',
'/d3.v7.min.js',
'/manifest.json'
];
// Install: cache core assets
self.addEventListener('install', event => {
event.waitUntil(
caches.open(CACHE_NAME)
.then(cache => cache.addAll(CORE_ASSETS))
.then(() => self.skipWaiting())
);
});
// Activate: clean old caches
self.addEventListener('activate', event => {
event.waitUntil(
caches.keys().then(keys =>
Promise.all(keys.filter(k => k !== CACHE_NAME).map(k => caches.delete(k)))
).then(() => self.clients.claim())
);
});
// Fetch: network-first for API, cache-first for static assets
self.addEventListener('fetch', event => {
const url = new URL(event.request.url);
// Skip non-GET requests
if (event.request.method !== 'GET') return;
// API calls: network-first with no cache
if (url.pathname.startsWith('/api/')) {
event.respondWith(
fetch(event.request).catch(() =>
new Response(JSON.stringify({ error: 'offline' }), {
headers: { 'Content-Type': 'application/json' }
})
)
);
return;
}
// Audio files: network-first, cache on success (optional offline playback)
if (url.pathname.startsWith('/audio/')) {
event.respondWith(
fetch(event.request).then(resp => {
// Only cache complete responses (not range requests)
if (resp.status === 200) {
const clone = resp.clone();
caches.open(CACHE_NAME).then(cache => cache.put(event.request, clone));
}
return resp;
}).catch(() => caches.match(event.request))
);
return;
}
// Static assets + JSON data: stale-while-revalidate
event.respondWith(
caches.match(event.request).then(cached => {
const fetchPromise = fetch(event.request).then(resp => {
if (resp.ok) {
const clone = resp.clone();
caches.open(CACHE_NAME).then(cache => cache.put(event.request, clone));
}
return resp;
}).catch(() => cached);
return cached || fetchPromise;
})
);
});