#8/#10 Cross-Mindmap: Combined-Visualisierung mit Theme-Clustering ueber Podcast-Grenzen

scripts/cluster_themes.py:
- Berechnet Embeddings je Theme (label + description), single-linkage Clustering
  ueber Cosinus-Schwelle (default 0.55) ueber Podcast-Grenzen hinweg.
- Output: data/theme_clusters.json mit Cluster-Liste, je Cluster Mitglieder
  (podcast_id, theme_id, label) und Cross-Flag.

Backend:
- /api/analyses/cross-themes: liefert die Cluster-Datei aus.

Frontend (CrossMindmapView):
- Force-Graph mit zwei Podcast-Hubs (links/rechts), je Themen radial drumherum,
  Cross-Cluster-Member als gestrichelte gelbe Verbindungslinien mit Cluster-Label.
- Panel: Cross-Cluster oben (klickbar zur Episode), Solo-Cluster darunter.
- Klick auf Theme oeffnet den jeweiligen Podcast.
- 'Cross-Mindmap'-Button im Selector.

Initiales Cluster-Ergebnis: 13 Cluster aus 14 Themen, 1 Cross-Cluster
('Trump-USA' x 'Plattformmacht/Debattenkultur').

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Dotty Dotter 2026-04-28 08:37:55 +02:00
parent b8c808cd87
commit d7a0ed2715
3 changed files with 355 additions and 8 deletions

View File

@ -271,6 +271,21 @@ def get_shifts_analysis(podcast: Optional[str] = None, theme: Optional[str] = No
}
@app.get("/api/analyses/cross-themes")
def get_cross_themes():
"""Cross-Podcast-Themen-Cluster (#8/#10): Themen aus verschiedenen Podcasts,
die semantisch zusammengehoeren."""
path = Path(DATA_DIR) / "theme_clusters.json"
if not path.exists():
return {"available": False}
try:
with open(path) as f:
data = json.load(f)
except Exception:
return {"available": False}
return {"available": True, **data}
@app.get("/api/analyses/density")
def get_density(podcast_id: Optional[str] = None, bins: int = 20):
"""Faktendichte (#16): claims-Verteilung je Episode in N Bins ueber die Paragraph-Achse."""

150
scripts/cluster_themes.py Normal file
View File

@ -0,0 +1,150 @@
#!/usr/bin/env python3
"""#8 Standardisiertes Themen-Tagging: cluster Themes ueber Podcasts hinweg.
Berechnet Embeddings fuer alle Themes (label + description) und ordnet aehnliche
Themes ueber Podcast-Grenzen hinweg einem gemeinsamen Cluster zu.
Output: data/theme_clusters.json
{
"clusters": [
{
"id": "klima",
"label": "Klima",
"members": [
{"podcast_id": "ldn", "theme_id": "klima-verkehr", "label": "..."},
{"podcast_id": "neu-denken", "theme_id": "klimakrise", "label": "..."}
]
}
]
}
"""
import json
import os
import sqlite3
import sys
from pathlib import Path
import numpy as np
from openai import OpenAI
DB_PATH = sys.argv[1] if len(sys.argv) > 1 else "data/db.sqlite"
OUT_PATH = sys.argv[2] if len(sys.argv) > 2 else "data/theme_clusters.json"
THRESHOLD = float(os.environ.get("THEME_CLUSTER_THRESHOLD", "0.65"))
API_KEY = os.environ.get("DASHSCOPE_API_KEY", "")
BASE_URL = "https://dashscope-intl.aliyuncs.com/compatible-mode/v1"
EMBED_MODEL = "text-embedding-v3"
def embed(client, texts):
resp = client.embeddings.create(model=EMBED_MODEL, input=texts, dimensions=1024)
return [item.embedding for item in resp.data]
def normalize(v):
n = np.linalg.norm(v)
return v / n if n else v
def main():
if not API_KEY:
print("DASHSCOPE_API_KEY nicht gesetzt.")
sys.exit(1)
client = OpenAI(api_key=API_KEY, base_url=BASE_URL)
db = sqlite3.connect(DB_PATH)
db.row_factory = sqlite3.Row
rows = db.execute("""
SELECT podcast_id, id, label, description, color
FROM themes ORDER BY podcast_id, id
""").fetchall()
print(f"Themes gefunden: {len(rows)}")
if not rows:
print("Nichts zu tun.")
return
texts = []
for r in rows:
snippet = r["label"]
if r["description"]:
snippet += "" + r["description"]
texts.append(snippet[:500])
# Batch limit der DashScope-API ist 10 Texte je Call
embs = []
for i in range(0, len(texts), 8):
embs.extend(embed(client, texts[i:i + 8]))
vectors = np.array([normalize(np.array(e, dtype=np.float32)) for e in embs])
# Single-Linkage Clustering ueber THRESHOLD
n = len(rows)
parent = list(range(n))
def find(x):
while parent[x] != x:
parent[x] = parent[parent[x]]
x = parent[x]
return x
def union(a, b):
ra, rb = find(a), find(b)
if ra != rb:
parent[rb] = ra
sim = vectors @ vectors.T
for i in range(n):
for j in range(i + 1, n):
# Nur ueber Podcast-Grenzen oder bei sehr hoher Aehnlichkeit clustern
if rows[i]["podcast_id"] != rows[j]["podcast_id"] and sim[i, j] >= THRESHOLD:
union(i, j)
# Cluster bilden
cluster_map = {}
for i in range(n):
cluster_map.setdefault(find(i), []).append(i)
clusters = []
for cid, idxs in cluster_map.items():
members = []
for i in idxs:
r = rows[i]
members.append({
"podcast_id": r["podcast_id"],
"theme_id": r["id"],
"label": r["label"],
"color": r["color"],
})
# Cluster-Label: kuerzeste Member-Bezeichnung
cluster_label = min((m["label"] for m in members), key=len)
if len(members) > 1:
# Cluster-ID aus erstem Theme-ID
cid_str = members[0]["theme_id"]
else:
cid_str = members[0]["theme_id"]
clusters.append({
"id": cid_str,
"label": cluster_label,
"n_members": len(members),
"is_cross": len(set(m["podcast_id"] for m in members)) > 1,
"members": members,
})
# Sortieren: Cross-Cluster zuerst, dann nach Mitgliederzahl
clusters.sort(key=lambda c: (-c["is_cross"], -c["n_members"]))
out = {"threshold": THRESHOLD, "clusters": clusters}
Path(OUT_PATH).parent.mkdir(parents=True, exist_ok=True)
with open(OUT_PATH, "w", encoding="utf-8") as f:
json.dump(out, f, ensure_ascii=False, indent=2)
print(f"\n{len(clusters)} Cluster geschrieben nach {OUT_PATH}")
cross = [c for c in clusters if c["is_cross"]]
print(f" davon cross-podcast: {len(cross)}")
for c in cross:
members = ", ".join(f"{m['podcast_id']}/{m['theme_id']}" for m in c["members"])
print(f" [{c['label']}] {members}")
db.close()
if __name__ == "__main__":
main()

View File

@ -746,7 +746,7 @@ const AnalysisView = {
async show(episodeId, mode) {
if (!CURRENT_PODCAST) return;
TranscriptView.hide(); AnalysisView.hide(); GapsView.hide(); ShiftsView.hide(); DebatesView.hide(); ArgumentsView.hide(); DensityView.hide();
TranscriptView.hide(); AnalysisView.hide(); GapsView.hide(); ShiftsView.hide(); DebatesView.hide(); ArgumentsView.hide(); DensityView.hide(); CrossMindmapView.hide();
this.episodeId = episodeId;
this.mode = mode;
this.visible = true;
@ -919,7 +919,7 @@ const GapsView = {
minSize: 0,
async show() {
TranscriptView.hide(); AnalysisView.hide(); GapsView.hide(); ShiftsView.hide(); DebatesView.hide(); ArgumentsView.hide(); DensityView.hide();
TranscriptView.hide(); AnalysisView.hide(); GapsView.hide(); ShiftsView.hide(); DebatesView.hide(); ArgumentsView.hide(); DensityView.hide(); CrossMindmapView.hide();
this.visible = true;
const panel = document.getElementById('panel');
panel.innerHTML = `<h2>Leerstellen</h2><p class="subtitle">Lädt …</p>`;
@ -1041,7 +1041,7 @@ const ShiftsView = {
expanded: {},
async show() {
TranscriptView.hide(); AnalysisView.hide(); GapsView.hide(); ShiftsView.hide(); DebatesView.hide(); ArgumentsView.hide(); DensityView.hide();
TranscriptView.hide(); AnalysisView.hide(); GapsView.hide(); ShiftsView.hide(); DebatesView.hide(); ArgumentsView.hide(); DensityView.hide(); CrossMindmapView.hide();
this.visible = true;
const panel = document.getElementById('panel');
panel.innerHTML = `<h2>Narrative Shifts</h2><p class="subtitle">Lädt …</p>`;
@ -1425,7 +1425,7 @@ const DensityView = {
sort: 'density', // 'density' | 'order'
async show() {
TranscriptView.hide(); AnalysisView.hide(); GapsView.hide(); ShiftsView.hide(); DebatesView.hide(); ArgumentsView.hide(); DensityView.hide();
TranscriptView.hide(); AnalysisView.hide(); GapsView.hide(); ShiftsView.hide(); DebatesView.hide(); ArgumentsView.hide(); DensityView.hide(); CrossMindmapView.hide();
this.visible = true;
const panel = document.getElementById('panel');
panel.innerHTML = `<h2>Faktendichte</h2><p class="subtitle">Lädt …</p>`;
@ -1526,6 +1526,187 @@ const DensityView = {
hide() { this.visible = false; }
};
// ── Cross Mindmap (#8/#10 Combined) ──
const CrossMindmapView = {
visible: false,
data: null,
podcastsData: null,
async show() {
TranscriptView.hide(); AnalysisView.hide(); GapsView.hide(); ShiftsView.hide(); DebatesView.hide(); ArgumentsView.hide(); DensityView.hide(); CrossMindmapView.hide();
this.visible = true;
// Mindmap-Container fuer SVG aufbereiten
const mindmap = document.getElementById('mindmap');
mindmap.style.overflow = 'hidden';
mindmap.style.display = '';
mindmap.style.padding = '';
mindmap.innerHTML = '<svg id="svg"></svg>';
document.querySelectorAll('.view-tab').forEach(t => t.classList.remove('active'));
document.getElementById('tab-mindmap')?.classList.add('active');
document.getElementById('staffel-filters').innerHTML = '';
document.getElementById('app-title').innerHTML = `<span style="cursor:pointer" onclick="showPodcastList()" title="Zurück zur Übersicht"></span> <span>Cross-Mindmap</span>`;
document.title = 'Cross-Mindmap — Podcast Mindmap';
const panel = document.getElementById('panel');
panel.innerHTML = `<div class="welcome"><h2>Cross-Mindmap</h2><p>Themen aus verschiedenen Podcasts in einer Visualisierung. Cross-Cluster verbinden Themen, die semantisch zusammengehören.</p><p class="subtitle">Lädt …</p></div>`;
try {
// Cluster-Daten holen
const cr = await fetch(`${API_BASE}/api/analyses/cross-themes`);
this.data = await cr.json();
// Daten beider Podcasts holen
const pr = await fetch(`${API_BASE}/api/podcasts`);
const podcasts = await pr.json();
this.podcastsData = {};
for (const p of podcasts) {
const r = await fetch(`${API_BASE}/api/podcasts/${p.id}`);
this.podcastsData[p.id] = await r.json();
}
} catch (e) {
panel.innerHTML = `<h2>Cross-Mindmap</h2><p style="color:var(--accent-warm)">Fehler: ${escHtml(e.message)}</p>`;
return;
}
requestAnimationFrame(() => requestAnimationFrame(() => this.render()));
},
render() {
const svg = d3.select('#svg');
svg.selectAll('*').remove();
const container = document.getElementById('mindmap');
let W = container.clientWidth, H = container.clientHeight;
if (!W || W < 100) W = window.innerWidth - 400;
if (!H || H < 100) H = window.innerHeight - 52;
if (W < 200) W = 800;
if (H < 200) H = 600;
const isMobile = W < 600;
const sc = isMobile ? 0.6 : 1;
svg.attr('viewBox', `0 0 ${W} ${H}`).attr('preserveAspectRatio', 'xMidYMid meet');
const nodes = [], links = [];
const podcastIds = Object.keys(this.podcastsData);
const colors = { 0: '#60a5fa', 1: '#dc7850' }; // blau / orange als Hub-Farben
// Hubs links und rechts
podcastIds.forEach((pid, i) => {
const data = this.podcastsData[pid];
const cx = i === 0 ? W * 0.28 : W * 0.72;
const cy = H * 0.5;
nodes.push({
id: `hub-${pid}`, type: 'hub', pid,
label: data.name,
r: 36 * sc, fx: cx, fy: cy, color: colors[i] || '#888',
});
});
// Themen je Podcast
podcastIds.forEach(pid => {
const data = this.podcastsData[pid];
(data.themes || []).forEach(t => {
nodes.push({
id: `t-${pid}-${t.id}`, type: 'theme', pid,
themeId: t.id, label: t.label.length > 22 ? t.label.slice(0, 20) + '…' : t.label,
fullLabel: t.label, r: 22 * sc, color: t.color,
});
links.push({ source: `hub-${pid}`, target: `t-${pid}-${t.id}`, type: 'hub-theme' });
});
});
// Cross-Links zwischen Themen verschiedener Podcasts
const crossClusters = (this.data?.clusters || []).filter(c => c.is_cross);
crossClusters.forEach(c => {
const memberIds = c.members.map(m => `t-${m.podcast_id}-${m.theme_id}`);
// Vollverbinde alle Mitglieder eines Cross-Clusters
for (let i = 0; i < memberIds.length; i++) {
for (let j = i + 1; j < memberIds.length; j++) {
links.push({ source: memberIds[i], target: memberIds[j], type: 'cross', cluster: c.label });
}
}
});
const sim = d3.forceSimulation(nodes)
.force('link', d3.forceLink(links).id(d => d.id).distance(d => d.type === 'hub-theme' ? 130 * sc : 220 * sc).strength(d => d.type === 'cross' ? 0.06 : 0.6))
.force('charge', d3.forceManyBody().strength(d => d.type === 'hub' ? -1200 * sc : -250 * sc))
.force('collision', d3.forceCollide().radius(d => d.r + 6))
.alphaDecay(0.02);
const zoom = d3.zoom().scaleExtent([0.3, 3]).on('zoom', e => g.attr('transform', e.transform));
svg.call(zoom);
const g = svg.append('g');
const linkEls = g.append('g').selectAll('line').data(links).join('line')
.attr('stroke', d => d.type === 'cross' ? '#fcd34d' : '#374151')
.attr('stroke-width', d => d.type === 'cross' ? 2.4 : 1)
.attr('stroke-dasharray', d => d.type === 'cross' ? '6,3' : null)
.attr('opacity', d => d.type === 'cross' ? 0.85 : 0.5);
// Cross-Cluster-Label auf den Verbindungen
const crossLabels = g.append('g').selectAll('text').data(links.filter(l => l.type === 'cross')).join('text')
.attr('font-size', '10px').attr('fill', '#fcd34d').attr('text-anchor', 'middle')
.text(d => d.cluster);
const nodeG = g.append('g');
const themeNodes = nodeG.selectAll('.cross-theme').data(nodes.filter(n => n.type === 'theme')).join('g')
.attr('class', 'cross-theme').style('cursor', 'pointer')
.on('click', (e, d) => CrossMindmapView.openTheme(d.pid, d.themeId));
themeNodes.append('circle').attr('r', d => d.r).attr('fill', d => d.color + '55').attr('stroke', d => d.color).attr('stroke-width', 1.6);
themeNodes.append('text').attr('dy', d => -d.r - 6).attr('text-anchor', 'middle').attr('font-size', '11px').attr('fill', 'var(--text)').text(d => d.label);
themeNodes.append('title').text(d => d.fullLabel);
const hubNodes = nodeG.selectAll('.cross-hub').data(nodes.filter(n => n.type === 'hub')).join('g')
.attr('class', 'cross-hub').style('cursor', 'pointer')
.on('click', (e, d) => selectPodcast(d.pid));
hubNodes.append('circle').attr('r', d => d.r).attr('fill', d => d.color + '22').attr('stroke', d => d.color).attr('stroke-width', 2.5);
hubNodes.append('text').attr('text-anchor', 'middle').attr('font-size', '12px').attr('font-weight', '700').attr('fill', d => d.color)
.selectAll('tspan').data(d => d.label.split(/\s+/)).join('tspan').attr('x', 0).attr('dy', (d, i) => i === 0 ? '-0.3em' : '1.2em').text(d => d);
sim.on('tick', () => {
linkEls.attr('x1', d => d.source.x).attr('y1', d => d.source.y).attr('x2', d => d.target.x).attr('y2', d => d.target.y);
crossLabels.attr('x', d => (d.source.x + d.target.x) / 2).attr('y', d => (d.source.y + d.target.y) / 2 - 4);
themeNodes.attr('transform', d => `translate(${d.x},${d.y})`);
hubNodes.attr('transform', d => `translate(${d.x},${d.y})`);
});
// Panel: Cluster-Liste
const panel = document.getElementById('panel');
let html = `<h2>Cross-Mindmap</h2>`;
const cross = crossClusters;
html += `<p class="subtitle">${this.data.clusters.length} Themen-Cluster · ${cross.length} davon cross-podcast (Cosinus-Schwelle ${(this.data.threshold || 0).toFixed(2)})</p>`;
if (cross.length) {
html += `<h3 style="margin-top:14px;font-size:14px">Cross-Cluster</h3>`;
cross.forEach(c => {
html += `<div class="transcript-para" style="cursor:default;border-left:3px solid #fcd34d">`;
html += `<strong>${escHtml(c.label)}</strong>`;
html += `<div class="subtitle" style="margin-top:4px">`;
html += c.members.map(m => `<span class="theme-tag" style="cursor:pointer" onclick="CrossMindmapView.openTheme('${escAttr(m.podcast_id)}','${escAttr(m.theme_id)}')">${escHtml(m.podcast_id)} / ${escHtml(m.label)}</span>`).join(' ');
html += `</div></div>`;
});
}
html += `<h3 style="margin-top:14px;font-size:14px">Solo-Cluster (kein Cross-Match)</h3>`;
this.data.clusters.filter(c => !c.is_cross).forEach(c => {
const m = c.members[0];
html += `<div class="transcript-para" style="cursor:pointer" onclick="CrossMindmapView.openTheme('${escAttr(m.podcast_id)}','${escAttr(m.theme_id)}')">`;
html += `<span class="theme-tag" style="font-size:10px">${escHtml(m.podcast_id)}</span> ${escHtml(c.label)}`;
html += `</div>`;
});
panel.innerHTML = html;
},
openTheme(podcastId, themeId) {
selectPodcast(podcastId).then(() => {
// Theme-Knoten in der normalen Mindmap fokussieren
setTimeout(() => {
const t = (DATA?.themes || []).find(x => x.id === themeId);
if (t && typeof showTheme === 'function') showTheme(t);
}, 400);
});
},
hide() { this.visible = false; }
};
// ── Search ──
const Search = {
init() {
@ -1622,7 +1803,7 @@ const Search = {
showResults(results, query) {
const panel = document.getElementById('panel');
TranscriptView.hide(); AnalysisView.hide(); GapsView.hide(); ShiftsView.hide(); DebatesView.hide(); ArgumentsView.hide(); DensityView.hide();
TranscriptView.hide(); AnalysisView.hide(); GapsView.hide(); ShiftsView.hide(); DebatesView.hide(); ArgumentsView.hide(); DensityView.hide(); CrossMindmapView.hide();
if (results.length === 0) {
panel.innerHTML = `<p class="subtitle">Keine Treffer für "${escHtml(query)}"</p>`;
@ -1650,7 +1831,7 @@ const Search = {
},
showSemanticResults(results, query) {
TranscriptView.hide(); AnalysisView.hide(); GapsView.hide(); ShiftsView.hide(); DebatesView.hide(); ArgumentsView.hide(); DensityView.hide();
TranscriptView.hide(); AnalysisView.hide(); GapsView.hide(); ShiftsView.hide(); DebatesView.hide(); ArgumentsView.hide(); DensityView.hide(); CrossMindmapView.hide();
const panel = document.getElementById('panel');
let html = `<h2>${results.length} semantische Treffer für "${escHtml(query)}" <span class="semantic-badge">KI</span></h2>`;
results.forEach(r => {
@ -1663,7 +1844,7 @@ const Search = {
},
showApiResults(results, query) {
TranscriptView.hide(); AnalysisView.hide(); GapsView.hide(); ShiftsView.hide(); DebatesView.hide(); ArgumentsView.hide(); DensityView.hide();
TranscriptView.hide(); AnalysisView.hide(); GapsView.hide(); ShiftsView.hide(); DebatesView.hide(); ArgumentsView.hide(); DensityView.hide(); CrossMindmapView.hide();
const panel = document.getElementById('panel');
let html = `<h2>${results.length} Treffer für "${escHtml(query)}"</h2>`;
results.forEach(r => {
@ -1826,6 +2007,7 @@ function showPodcastSelector(podcasts) {
selectorHtml += '<button class="compare-btn" onclick="DebatesView.show()">Debatten</button>';
selectorHtml += '<button class="compare-btn" onclick="ArgumentsView.show()">Argumentketten</button>';
selectorHtml += '<button class="compare-btn" onclick="DensityView.show()">Faktendichte</button>';
selectorHtml += '<button class="compare-btn" onclick="CrossMindmapView.show()">Cross-Mindmap</button>';
selectorHtml += '</div>';
}
@ -2219,7 +2401,7 @@ function drag(sim) {
// ── Panel: Theme ──
function showTheme(theme) {
TranscriptView.hide(); AnalysisView.hide(); GapsView.hide(); ShiftsView.hide(); DebatesView.hide(); ArgumentsView.hide(); DensityView.hide();
TranscriptView.hide(); AnalysisView.hide(); GapsView.hide(); ShiftsView.hide(); DebatesView.hide(); ArgumentsView.hide(); DensityView.hide(); CrossMindmapView.hide();
const panel = document.getElementById('panel');
const td = DATA.themes.find(t => t.id === theme.id);
const quotes = DATA.quotes.filter(q => q.themes.includes(theme.id));