diff --git a/.gitignore b/.gitignore index b908d4c..b00a39a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,8 @@ __pycache__/ *.pyc .DS_Store +data/ +audio/ +*.sqlite +*.sqlite-shm +*.sqlite-wal diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..8c67c2b --- /dev/null +++ b/Dockerfile @@ -0,0 +1,17 @@ +FROM python:3.12-slim + +WORKDIR /app + +# Install dependencies +COPY backend/requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +# Copy backend code +COPY backend/ . + +# Copy webapp as static files +COPY webapp/index.html webapp/d3.v7.min.js /static/ + +EXPOSE 8000 + +CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/backend/app.py b/backend/app.py new file mode 100644 index 0000000..2591681 --- /dev/null +++ b/backend/app.py @@ -0,0 +1,274 @@ +"""FastAPI backend for podcast-mindmap.""" + +import json +import os +import numpy as np +from pathlib import Path +from typing import Optional +from fastapi import FastAPI, Query, HTTPException +from fastapi.staticfiles import StaticFiles +from fastapi.responses import FileResponse + +from database import get_db, init_db, get_all_embeddings + +app = FastAPI(title="Podcast Mindmap API") + +DATA_DIR = os.environ.get("DATA_DIR", "/data") +AUDIO_DIR = os.environ.get("AUDIO_DIR", "/audio") +STATIC_DIR = os.environ.get("STATIC_DIR", "/static") + +# Cache embeddings in memory +_embeddings_cache = {} + + +def _load_embeddings(podcast_id: Optional[str] = None): + """Load and cache embeddings.""" + key = podcast_id or "__all__" + if key not in _embeddings_cache: + vectors, meta = get_all_embeddings(podcast_id) + _embeddings_cache[key] = (vectors, meta) + return _embeddings_cache[key] + + +def _invalidate_cache(): + _embeddings_cache.clear() + + +# ── API Routes ── + +@app.get("/api/podcasts") +def list_podcasts(): + db = get_db() + rows = db.execute("SELECT * FROM podcasts").fetchall() + db.close() + return [dict(r) for r in rows] + + +@app.get("/api/podcasts/{podcast_id}") +def get_podcast(podcast_id: str): + db = get_db() + podcast = db.execute("SELECT * FROM podcasts WHERE id = ?", (podcast_id,)).fetchone() + if not podcast: + raise HTTPException(404, "Podcast not found") + + staffeln = db.execute("SELECT * FROM staffeln WHERE podcast_id = ? ORDER BY id", (podcast_id,)).fetchall() + themes = db.execute("SELECT * FROM themes WHERE podcast_id = ?", (podcast_id,)).fetchall() + episodes = db.execute("SELECT * FROM episodes WHERE podcast_id = ? ORDER BY id", (podcast_id,)).fetchall() + quotes = db.execute("SELECT * FROM quotes WHERE podcast_id = ?", (podcast_id,)).fetchall() + db.close() + + # Build mindmap_data compatible format + return { + "name": podcast["name"], + "host": podcast["host"], + "description": podcast["description"], + "staffeln": [dict(s) for s in staffeln], + "themes": [{**dict(t), "episodes": json.loads(t["episodes_json"])} for t in themes], + "episodes": [{"id": e["id"], "title": e["title"], "guest": e["guest"], + "staffel": e["staffel"], "audioFile": e["audio_file"]} for e in episodes], + "quotes": [{ + "id": q["id"], "text": q["text"], "verbatim": q["verbatim"], + "speaker": q["speaker"], "episode": q["episode_id"], + "startTime": q["start_time"], "endTime": q["end_time"], + "isTopQuote": bool(q["is_top_quote"]), + "themes": json.loads(q["themes_json"]), + "audioFile": next((e["audio_file"] for e in episodes if e["id"] == q["episode_id"]), None) + } for q in quotes], + } + + +@app.get("/api/podcasts/{podcast_id}/transcript/{episode_id}") +def get_transcript(podcast_id: str, episode_id: str): + db = get_db() + paras = db.execute( + "SELECT idx, start_time, end_time, text FROM paragraphs WHERE podcast_id = ? AND episode_id = ? ORDER BY idx", + (podcast_id, episode_id) + ).fetchall() + db.close() + return {"paragraphs": [{"start": p["start_time"], "end": p["end_time"], "text": p["text"]} for p in paras]} + + +@app.get("/api/search") +def search(q: str = Query(..., min_length=2), podcast_id: Optional[str] = None, limit: int = 50): + """Full-text search across all transcripts.""" + db = get_db() + q_like = f"%{q}%" + + if podcast_id: + rows = db.execute( + "SELECT p.podcast_id, p.episode_id, p.idx, p.start_time, p.text, e.title, e.guest " + "FROM paragraphs p JOIN episodes e ON p.podcast_id = e.podcast_id AND p.episode_id = e.id " + "WHERE p.podcast_id = ? AND p.text LIKE ? LIMIT ?", + (podcast_id, q_like, limit) + ).fetchall() + else: + rows = db.execute( + "SELECT p.podcast_id, p.episode_id, p.idx, p.start_time, p.text, e.title, e.guest " + "FROM paragraphs p JOIN episodes e ON p.podcast_id = e.podcast_id AND p.episode_id = e.id " + "WHERE p.text LIKE ? LIMIT ?", + (q_like, limit) + ).fetchall() + + db.close() + return [dict(r) for r in rows] + + +@app.get("/api/similar/{podcast_id}/{episode_id}/{para_idx}") +def find_similar(podcast_id: str, episode_id: str, para_idx: int, + limit: int = 10, cross_podcast: bool = False): + """Find semantically similar paragraphs using embeddings.""" + db = get_db() + row = db.execute( + "SELECT id, embedding FROM paragraphs WHERE podcast_id = ? AND episode_id = ? AND idx = ?", + (podcast_id, episode_id, para_idx) + ).fetchone() + db.close() + + if not row or not row["embedding"]: + raise HTTPException(404, "Paragraph not found or not embedded") + + query_vec = np.frombuffer(row["embedding"], dtype=np.float32) + query_vec = query_vec / np.linalg.norm(query_vec) + + # Load all embeddings + search_podcast = None if cross_podcast else podcast_id + vectors, meta = _load_embeddings(search_podcast) + + if vectors is None or len(meta) == 0: + return [] + + # Cosine similarity (vectors are already normalized) + scores = vectors @ query_vec + + # Get top results (skip self) + indices = np.argsort(scores)[::-1] + results = [] + for idx in indices: + m = meta[idx] + # Skip self + if m["podcast_id"] == podcast_id and m["episode_id"] == episode_id and m["idx"] == para_idx: + continue + # Skip same episode unless cross_podcast + if not cross_podcast and m["episode_id"] == episode_id: + continue + + results.append({ + "podcast_id": m["podcast_id"], + "episode_id": m["episode_id"], + "paragraph_idx": m["idx"], + "score": float(scores[idx]) + }) + + if len(results) >= limit: + break + + # Enrich with text previews + db = get_db() + for r in results: + p = db.execute( + "SELECT text, start_time FROM paragraphs WHERE podcast_id = ? AND episode_id = ? AND idx = ?", + (r["podcast_id"], r["episode_id"], r["paragraph_idx"]) + ).fetchone() + if p: + r["text_preview"] = p["text"][:150] + r["start_time"] = p["start_time"] + + ep = db.execute( + "SELECT title, guest FROM episodes WHERE podcast_id = ? AND id = ?", + (r["podcast_id"], r["episode_id"]) + ).fetchone() + if ep: + r["episode_title"] = ep["title"] + r["guest"] = ep["guest"] + db.close() + + return results + + +@app.get("/api/semantic-search") +def semantic_search(q: str = Query(..., min_length=3), podcast_id: Optional[str] = None, limit: int = 20): + """Semantic search using query embedding.""" + from embeddings import embed_texts + + try: + query_vec = np.array(embed_texts([q])[0], dtype=np.float32) + query_vec = query_vec / np.linalg.norm(query_vec) + except Exception as e: + raise HTTPException(500, f"Embedding failed: {e}") + + vectors, meta = _load_embeddings(podcast_id) + if vectors is None: + return [] + + scores = vectors @ query_vec + indices = np.argsort(scores)[::-1][:limit] + + db = get_db() + results = [] + for idx in indices: + m = meta[idx] + score = float(scores[idx]) + if score < 0.3: + break + + p = db.execute( + "SELECT text, start_time FROM paragraphs WHERE id = ?", (m["id"],) + ).fetchone() + ep = db.execute( + "SELECT title, guest FROM episodes WHERE podcast_id = ? AND id = ?", + (m["podcast_id"], m["episode_id"]) + ).fetchone() + + results.append({ + "podcast_id": m["podcast_id"], + "episode_id": m["episode_id"], + "paragraph_idx": m["idx"], + "score": score, + "text_preview": p["text"][:200] if p else "", + "start_time": p["start_time"] if p else None, + "episode_title": ep["title"] if ep else "", + "guest": ep["guest"] if ep else "", + }) + db.close() + + return results + + +# ── Startup ── + +@app.on_event("startup") +def startup(): + init_db() + + # Auto-import podcasts from data directory + data_path = Path(DATA_DIR) + if data_path.exists(): + for podcast_dir in data_path.iterdir(): + if not podcast_dir.is_dir(): + continue + mindmap_file = podcast_dir / "mindmap_data.json" + srt_file = podcast_dir / "srt_index.json" + if mindmap_file.exists() and srt_file.exists(): + podcast_id = podcast_dir.name + db = get_db() + existing = db.execute("SELECT id FROM podcasts WHERE id = ?", (podcast_id,)).fetchone() + db.close() + if not existing: + print(f"Importing podcast: {podcast_id}") + with open(mindmap_file) as f: + mindmap_data = json.load(f) + with open(srt_file) as f: + srt_index = json.load(f) + from database import import_podcast + import_podcast(podcast_id, mindmap_data, srt_index) + + +# ── Static Files + Audio ── + +# Mount audio directory (per-podcast subdirs) +if os.path.isdir(AUDIO_DIR): + app.mount("/audio", StaticFiles(directory=AUDIO_DIR), name="audio") + +# Serve webapp as static files (fallback) +if os.path.isdir(STATIC_DIR): + app.mount("/", StaticFiles(directory=STATIC_DIR, html=True), name="static") diff --git a/backend/database.py b/backend/database.py new file mode 100644 index 0000000..7143fce --- /dev/null +++ b/backend/database.py @@ -0,0 +1,197 @@ +"""SQLite database for podcast-mindmap: paragraphs, quotes, embeddings.""" + +import json +import sqlite3 +import os +import numpy as np +from typing import Optional + +DB_PATH = os.environ.get("DB_PATH", "/data/db.sqlite") + + +def get_db(): + db = sqlite3.connect(DB_PATH) + db.row_factory = sqlite3.Row + db.execute("PRAGMA journal_mode=WAL") + return db + + +def init_db(): + db = get_db() + db.executescript(""" + CREATE TABLE IF NOT EXISTS podcasts ( + id TEXT PRIMARY KEY, + name TEXT NOT NULL, + host TEXT, + description TEXT, + config_json TEXT + ); + + CREATE TABLE IF NOT EXISTS episodes ( + id TEXT NOT NULL, + podcast_id TEXT NOT NULL, + title TEXT NOT NULL, + guest TEXT, + staffel INTEGER, + youtube_id TEXT, + audio_file TEXT, + PRIMARY KEY (podcast_id, id), + FOREIGN KEY (podcast_id) REFERENCES podcasts(id) + ); + + CREATE TABLE IF NOT EXISTS paragraphs ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + podcast_id TEXT NOT NULL, + episode_id TEXT NOT NULL, + idx INTEGER NOT NULL, + start_time REAL, + end_time REAL, + text TEXT NOT NULL, + embedding BLOB, + UNIQUE(podcast_id, episode_id, idx), + FOREIGN KEY (podcast_id) REFERENCES podcasts(id) + ); + + CREATE TABLE IF NOT EXISTS topics ( + paragraph_id INTEGER NOT NULL, + tag TEXT NOT NULL, + score REAL DEFAULT 1.0, + PRIMARY KEY (paragraph_id, tag), + FOREIGN KEY (paragraph_id) REFERENCES paragraphs(id) + ); + + CREATE TABLE IF NOT EXISTS quotes ( + id TEXT NOT NULL, + podcast_id TEXT NOT NULL, + episode_id TEXT NOT NULL, + text TEXT NOT NULL, + verbatim TEXT, + speaker TEXT, + start_time REAL, + end_time REAL, + is_top_quote BOOLEAN DEFAULT 0, + themes_json TEXT DEFAULT '[]', + PRIMARY KEY (podcast_id, id), + FOREIGN KEY (podcast_id) REFERENCES podcasts(id) + ); + + CREATE TABLE IF NOT EXISTS staffeln ( + id INTEGER NOT NULL, + podcast_id TEXT NOT NULL, + name TEXT NOT NULL, + color TEXT DEFAULT '#666', + PRIMARY KEY (podcast_id, id), + FOREIGN KEY (podcast_id) REFERENCES podcasts(id) + ); + + CREATE TABLE IF NOT EXISTS themes ( + id TEXT NOT NULL, + podcast_id TEXT NOT NULL, + label TEXT NOT NULL, + description TEXT, + color TEXT DEFAULT '#666', + episodes_json TEXT DEFAULT '[]', + PRIMARY KEY (podcast_id, id), + FOREIGN KEY (podcast_id) REFERENCES podcasts(id) + ); + + CREATE INDEX IF NOT EXISTS idx_paragraphs_podcast ON paragraphs(podcast_id, episode_id); + CREATE INDEX IF NOT EXISTS idx_quotes_podcast ON quotes(podcast_id, episode_id); + CREATE INDEX IF NOT EXISTS idx_topics_tag ON topics(tag); + """) + db.commit() + db.close() + + +def import_podcast(podcast_id: str, mindmap_data: dict, srt_index: dict): + """Import a podcast's data from mindmap_data.json + srt_index.json into the DB.""" + db = get_db() + + # Podcast + db.execute( + "INSERT OR REPLACE INTO podcasts (id, name, host, description) VALUES (?, ?, ?, ?)", + (podcast_id, mindmap_data.get("name", ""), mindmap_data.get("host", ""), + mindmap_data.get("description", "")) + ) + + # Staffeln + for s in mindmap_data.get("staffeln", []): + db.execute( + "INSERT OR REPLACE INTO staffeln (id, podcast_id, name, color) VALUES (?, ?, ?, ?)", + (s["id"], podcast_id, s["name"], s.get("color", "#666")) + ) + + # Themes + for t in mindmap_data.get("themes", []): + db.execute( + "INSERT OR REPLACE INTO themes (id, podcast_id, label, description, color, episodes_json) VALUES (?, ?, ?, ?, ?, ?)", + (t["id"], podcast_id, t["label"], t.get("description", ""), + t.get("color", "#666"), json.dumps(t.get("episodes", []))) + ) + + # Episodes + for ep in mindmap_data.get("episodes", []): + db.execute( + "INSERT OR REPLACE INTO episodes (id, podcast_id, title, guest, staffel, audio_file) VALUES (?, ?, ?, ?, ?, ?)", + (ep["id"], podcast_id, ep["title"], ep.get("guest", ""), + ep.get("staffel"), ep.get("audioFile")) + ) + + # Quotes + for q in mindmap_data.get("quotes", []): + db.execute( + "INSERT OR REPLACE INTO quotes (id, podcast_id, episode_id, text, verbatim, speaker, start_time, end_time, is_top_quote, themes_json) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", + (q["id"], podcast_id, q["episode"], q["text"], q.get("verbatim"), + q.get("speaker", ""), q.get("startTime"), q.get("endTime"), + q.get("isTopQuote", False), json.dumps(q.get("themes", []))) + ) + + # Paragraphs from srt_index + for ep_key, ep_data in srt_index.items(): + ep_id = ep_key.split("-")[0] # S1E1-Wachstum → S1E1 + for i, p in enumerate(ep_data.get("paragraphs", [])): + db.execute( + "INSERT OR REPLACE INTO paragraphs (podcast_id, episode_id, idx, start_time, end_time, text) VALUES (?, ?, ?, ?, ?, ?)", + (podcast_id, ep_id, i, p["start"], p["end"], p["text"]) + ) + + db.commit() + db.close() + + +def get_all_embeddings(podcast_id: Optional[str] = None): + """Load all embeddings as numpy array + metadata.""" + db = get_db() + if podcast_id: + rows = db.execute( + "SELECT id, podcast_id, episode_id, idx, embedding FROM paragraphs WHERE podcast_id = ? AND embedding IS NOT NULL", + (podcast_id,) + ).fetchall() + else: + rows = db.execute( + "SELECT id, podcast_id, episode_id, idx, embedding FROM paragraphs WHERE embedding IS NOT NULL" + ).fetchall() + db.close() + + if not rows: + return None, [] + + meta = [{"id": r["id"], "podcast_id": r["podcast_id"], + "episode_id": r["episode_id"], "idx": r["idx"]} for r in rows] + vectors = np.array([np.frombuffer(r["embedding"], dtype=np.float32) for r in rows]) + + # Normalize for cosine similarity + norms = np.linalg.norm(vectors, axis=1, keepdims=True) + norms[norms == 0] = 1 + vectors = vectors / norms + + return vectors, meta + + +def store_embedding(paragraph_id: int, embedding: list[float]): + """Store embedding as binary blob.""" + db = get_db() + blob = np.array(embedding, dtype=np.float32).tobytes() + db.execute("UPDATE paragraphs SET embedding = ? WHERE id = ?", (blob, paragraph_id)) + db.commit() + db.close() diff --git a/backend/embeddings.py b/backend/embeddings.py new file mode 100644 index 0000000..8ade771 --- /dev/null +++ b/backend/embeddings.py @@ -0,0 +1,73 @@ +"""Generate embeddings via DashScope (Qwen text-embedding-v3).""" + +import os +import time +from openai import OpenAI +from database import get_db, store_embedding + +DASHSCOPE_API_KEY = os.environ.get("DASHSCOPE_API_KEY", "") +EMBEDDING_MODEL = os.environ.get("EMBEDDING_MODEL", "text-embedding-v3") +BATCH_SIZE = 6 # DashScope text-embedding-v3 limit: 10 texts, but long texts need smaller batches + + +def get_client(): + return OpenAI( + api_key=DASHSCOPE_API_KEY, + base_url="https://dashscope-intl.aliyuncs.com/compatible-mode/v1" + ) + + +def embed_texts(texts: list[str]) -> list[list[float]]: + """Embed a batch of texts.""" + client = get_client() + response = client.embeddings.create( + model=EMBEDDING_MODEL, + input=texts, + dimensions=1024 + ) + return [item.embedding for item in response.data] + + +def embed_all_paragraphs(podcast_id: str = None): + """Embed all paragraphs that don't have embeddings yet.""" + db = get_db() + + if podcast_id: + rows = db.execute( + "SELECT id, text FROM paragraphs WHERE podcast_id = ? AND embedding IS NULL", + (podcast_id,) + ).fetchall() + else: + rows = db.execute( + "SELECT id, text FROM paragraphs WHERE embedding IS NULL" + ).fetchall() + + db.close() + + if not rows: + print("No paragraphs to embed.") + return + + print(f"Embedding {len(rows)} paragraphs...") + + for i in range(0, len(rows), BATCH_SIZE): + batch = rows[i:i + BATCH_SIZE] + texts = [r["text"][:2000] for r in batch] # Truncate long texts + + try: + embeddings = embed_texts(texts) + for row, emb in zip(batch, embeddings): + store_embedding(row["id"], emb) + print(f" Batch {i // BATCH_SIZE + 1}/{(len(rows) + BATCH_SIZE - 1) // BATCH_SIZE}: {len(batch)} paragraphs") + except Exception as e: + print(f" Error at batch {i // BATCH_SIZE + 1}: {e}") + time.sleep(2) + continue + + print("Done.") + + +if __name__ == "__main__": + import sys + podcast_id = sys.argv[1] if len(sys.argv) > 1 else None + embed_all_paragraphs(podcast_id) diff --git a/backend/requirements.txt b/backend/requirements.txt new file mode 100644 index 0000000..f44e9a0 --- /dev/null +++ b/backend/requirements.txt @@ -0,0 +1,6 @@ +fastapi>=0.115.0 +uvicorn>=0.30.0 +aiosqlite>=0.20.0 +numpy>=1.26.0 +openai>=1.50.0 +pyyaml>=6.0 diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..b68a437 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,27 @@ +services: + podcast-mindmap: + build: . + container_name: podcast-mindmap + restart: unless-stopped + environment: + - DATA_DIR=/data + - AUDIO_DIR=/audio + - STATIC_DIR=/static + - DB_PATH=/data/db.sqlite + - DASHSCOPE_API_KEY=${DASHSCOPE_API_KEY} + volumes: + - ./data:/data + - ./audio:/audio + labels: + - "traefik.enable=true" + - "traefik.http.routers.podcast.rule=Host(`podcast.toppyr.de`)" + - "traefik.http.routers.podcast.entrypoints=websecure" + - "traefik.http.routers.podcast.tls=true" + - "traefik.http.routers.podcast.tls.certresolver=letsencrypt" + - "traefik.http.services.podcast.loadbalancer.server.port=8000" + networks: + - collaboration_collaboration + +networks: + collaboration_collaboration: + external: true diff --git a/webapp/index.html b/webapp/index.html index 03e7919..8d8c368 100644 --- a/webapp/index.html +++ b/webapp/index.html @@ -262,6 +262,26 @@ .view-tab:hover { border-color: var(--accent); } .view-tab.active { background: var(--accent); color: var(--bg); border-color: var(--accent); } + /* ── Podcast Selector ── */ + .podcast-selector { + display: flex; gap: 8px; flex-wrap: wrap; justify-content: center; + padding: 40px 20px; + } + .podcast-card { + background: var(--surface2); border: 1px solid var(--border); + border-radius: 12px; padding: 20px; width: 260px; cursor: pointer; + transition: all 0.2s; + } + .podcast-card:hover { border-color: var(--accent); transform: translateY(-2px); } + .podcast-card h3 { font-size: 15px; margin-bottom: 4px; } + .podcast-card p { font-size: 12px; color: var(--text-muted); } + + /* ── Semantic results ── */ + .semantic-badge { + display: inline-block; background: #2a9d8f33; color: #2a9d8f; + font-size: 9px; font-weight: 600; padding: 1px 6px; border-radius: 3px; + } + .welcome { text-align: center; padding: 40px 20px; color: var(--text-muted); } .welcome h2 { color: var(--text); margin-bottom: 8px; } .welcome p { font-size: 13px; line-height: 1.6; } @@ -333,7 +353,7 @@ const AudioPlayer = { this.currentQuote = q; const sameFile = this.el.src && this.el.src.endsWith(q.audioFile); if (!sameFile) { - this.el.src = `audio/${q.audioFile}`; + this.el.src = `audio/${CURRENT_PODCAST ? CURRENT_PODCAST + '/' : ''}${q.audioFile}`; } this.nowPlaying.innerHTML = `"${q.text.substring(0, 70)}…" — ${q.speaker} (${q.episode})`; this.bar.classList.add('visible'); @@ -436,12 +456,18 @@ const TranscriptView = { activeIdx: -1, async show(episodeId, seekTime) { - if (!TRANSCRIPTS) await this.loadTranscripts(); - const key = Object.keys(TRANSCRIPTS).find(k => k.startsWith(episodeId.replace('E', 'E'))); - if (!key || !TRANSCRIPTS[key]) return; + const epData = await this.loadEpisodeTranscript(episodeId); + if (!epData || !epData.paragraphs) { + // Fallback: try old method + if (!TRANSCRIPTS) await this.loadTranscripts(); + const key = Object.keys(TRANSCRIPTS).find(k => k.startsWith(episodeId)); + if (!key || !TRANSCRIPTS[key]) return; + this.paragraphs = TRANSCRIPTS[key].paragraphs; + } else { + this.paragraphs = epData.paragraphs; + } this.episodeId = episodeId; - this.paragraphs = TRANSCRIPTS[key].paragraphs; this.visible = true; this.userScrolled = false; @@ -515,18 +541,35 @@ const TranscriptView = { }, async loadTranscripts() { + if (TRANSCRIPTS && Object.keys(TRANSCRIPTS).length > 0) return; + TRANSCRIPTS = {}; try { const resp = await fetch('srt_index.json'); - TRANSCRIPTS = await resp.json(); - } catch (e) { - // Try from data subdir + if (resp.ok) { TRANSCRIPTS = await resp.json(); return; } + } catch (e) {} + try { + const resp = await fetch('data/srt_index.json'); + if (resp.ok) { TRANSCRIPTS = await resp.json(); } + } catch (e2) {} + }, + + async loadEpisodeTranscript(episodeId) { + // Try API first + if (CURRENT_PODCAST) { try { - const resp = await fetch('data/srt_index.json'); - TRANSCRIPTS = await resp.json(); - } catch (e2) { - TRANSCRIPTS = {}; - } + const resp = await fetch(`${API_BASE}/api/podcasts/${CURRENT_PODCAST}/transcript/${episodeId}`); + if (resp.ok) { + const data = await resp.json(); + if (!TRANSCRIPTS) TRANSCRIPTS = {}; + TRANSCRIPTS[episodeId] = data; + return data; + } + } catch (e) {} } + // Fallback + await this.loadTranscripts(); + const key = Object.keys(TRANSCRIPTS).find(k => k.startsWith(episodeId)); + return key ? TRANSCRIPTS[key] : null; } }; @@ -546,7 +589,34 @@ const Search = { async run(query) { if (query.length < 3) { this.clear(); return; } - if (!TRANSCRIPTS) await TranscriptView.loadTranscripts(); + + // Try semantic search via API first + if (CURRENT_PODCAST) { + try { + const resp = await fetch(`${API_BASE}/api/semantic-search?q=${encodeURIComponent(query)}&podcast_id=${CURRENT_PODCAST}`); + if (resp.ok) { + const apiResults = await resp.json(); + if (apiResults.length > 0) { + this.showSemanticResults(apiResults, query); + return; + } + } + } catch (e) {} + // Fallback to text search via API + try { + const resp = await fetch(`${API_BASE}/api/search?q=${encodeURIComponent(query)}&podcast_id=${CURRENT_PODCAST}`); + if (resp.ok) { + const apiResults = await resp.json(); + if (apiResults.length > 0) { + this.showApiResults(apiResults, query); + return; + } + } + } catch (e) {} + } + + // Client-side fallback + if (!TRANSCRIPTS || Object.keys(TRANSCRIPTS).length === 0) await TranscriptView.loadTranscripts(); const results = []; const qLower = query.toLowerCase(); @@ -623,6 +693,33 @@ const Search = { panel.innerHTML = html; }, + showSemanticResults(results, query) { + TranscriptView.hide(); + const panel = document.getElementById('panel'); + let html = `
${escHtml(p.description || '')}
`; + html += `