feat(#170 followup 2): Pre-Filter, Cluster, Antrags-Initiative, PM-Versionierung, Mail-Link

User-Feedback: Aktuelle-Themen-Dashboard war "Detective-Modus" — durch
viele News scrollen, Match-Stärke selbst interpretieren. Komplett-Refactor
zur kuratierten Sicht mit Tabs.

**1. Pre-Filter + GWÖ-Relevanz-Score (#134)**

`compute_relevance(matches)`: Score = max(antrag.gwoe_score × similarity).
Level: high (≥4.0) / mid (≥2.5) / low (>0) / none.
Pro News in der UI ein farbiger Pill (gruen/orange/grau) + Reason-Text:
"GWÖ-9.0/10-Antrag „Klimaschutzgesetz" (GRÜNE) passt mit Similarity 0.55."

Default-Filter "Nur GWÖ-relevant" aktiv (only_relevant=true) — zeigt
nur high/mid News, blendet Rauschen aus. Toggle-Checkbox.

`/api/aktuelle-themen/top` neuer Param `only_relevant=true|false`.

**2. PM-Versionierung im Modal (#135)**

`list_drafts_for(drucksache, news_url)`: alle Versionen, neueste oben.
Endpoint `/api/aktuelle-themen/drafts-versions`. Modal zeigt Dropdown
wenn >1 Version, Switch ohne LLM-Call. Force-Regen bleibt als Button
im "bestehender Entwurf"-Banner.

**3. News-Cluster-View (#136)**

`aggregate_news_cluster(intra_threshold=0.55, min_cluster_size=2)`:
Greedy-Embedding-Cluster + zentralster Antrags-Match per Centroid-
Vektor. Zweiter Tab "Themen-Cluster": 5 News über "Pflege" → 1 Cluster
mit gemeinsamem Antrag-Vorschlag, statt 5 separate Cards.
Endpoint: `/api/aktuelle-themen/cluster`.

**4. Mail-Direkt-Link + Clipboard (#137)**

Im PM-Modal zwei Buttons:
- "📧 Per Mail versenden" (mailto: mit subject + body, ~1900 Char Limit)
- "📋 In Zwischenablage kopieren" (navigator.clipboard.writeText)
- Bei langem PM (>1900 Char): mailto-Link wird ausgegraut, Hinweis
  "PM zu lang für Mail-Link — Clipboard nutzen"

**5. Antrags-Initiative (#138)**

`aggregate_top_antraege_with_news(min_gwoe_score=8.0, days=14)`:
Reverse-Sicht — pro Antrag mit GWÖ ≥ 8 die News-Resonanz. Antraege
ohne Match werden trotzdem angezeigt mit "keine News"-Pill.
Dritter Tab "GWÖ-Top-Anträge". Endpoint `.../top-antraege`.

**UI-Restrukturierung:** statt einer langen Scroll-Liste jetzt
5 Tabs mit gemeinsamer Filter-Bar:
- News × Anträge (Default, kuratiert via Pre-Filter)
- Themen-Cluster (Bündel ähnlicher News)
- GWÖ-Top-Anträge (Reverse)
- News-Volumen (Chart)
- PM-Entwürfe (Drafts-Liste)

Default min_similarity 0.40 → 0.50 erhoeht (weniger Rauschen).

Tests: 14 neue (compute_relevance × 5, only_relevant + sort × 3,
cluster × 3, top_antraege × 3). Suite 1067 gruen.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Dotty Dotter 2026-05-03 13:41:31 +02:00
parent 2bff943e8a
commit e27dfc30a2
5 changed files with 827 additions and 45 deletions

View File

@ -2030,14 +2030,55 @@ async def api_aktuelle_themen_top(
top_k: int = 10,
min_similarity: float = 0.4,
matches_per_news: int = 3,
only_relevant: bool = False,
):
"""Top-K News der letzten N Tage mit Antrags-Match."""
"""Top-K News der letzten N Tage mit Antrags-Match.
Mit `only_relevant=true` werden News mit Relevance-Level "low" oder
"none" rausgefiltert.
"""
from .themen_matching import aggregate_top_themen
return aggregate_top_themen(
days_window=days,
top_k=top_k,
min_similarity=min_similarity,
matches_per_news=matches_per_news,
only_relevant=only_relevant,
)
@app.get("/api/aktuelle-themen/cluster")
async def api_aktuelle_themen_cluster(
days: int = 7,
intra_threshold: float = 0.55,
antrag_threshold: float = 0.4,
min_cluster_size: int = 2,
):
"""News-zu-News-Cluster ueber Embeddings — gleiches Thema, mehrere
Quellen ein Cluster, nicht 5 separate Cards."""
from .themen_matching import aggregate_news_cluster
return aggregate_news_cluster(
days_window=days,
intra_threshold=intra_threshold,
antrag_threshold=antrag_threshold,
min_cluster_size=min_cluster_size,
)
@app.get("/api/aktuelle-themen/top-antraege")
async def api_aktuelle_themen_top_antraege(
min_gwoe_score: float = 8.0,
days: int = 14,
min_similarity: float = 0.4,
top_k_news: int = 5,
):
"""Reverse-Sicht: hoch GWÖ-bewertete Antraege mit aktueller Pressewirkung."""
from .themen_matching import aggregate_top_antraege_with_news
return aggregate_top_antraege_with_news(
min_gwoe_score=min_gwoe_score,
days_window=days,
min_similarity=min_similarity,
top_k_news=top_k_news,
)
@ -2125,6 +2166,17 @@ async def api_draft_detail(draft_id: int):
return d
@app.get("/api/aktuelle-themen/drafts-versions")
async def api_draft_versions(drucksache: str, news_url: str):
"""Alle Versions-Drafts fuer (drucksache, news_url) — neueste oben."""
from .presse_generator import list_drafts_for
return {
"drucksache": drucksache,
"news_url": news_url,
"versions": list_drafts_for(drucksache, news_url),
}
@app.get("/api/auswertungen/matrix")
async def auswertungen_matrix(
wahlperiode: Optional[str] = None,

View File

@ -279,6 +279,40 @@ def list_drafts(
]
def list_drafts_for(
drucksache: str,
news_url: str,
db_path: Optional[Path] = None,
) -> list[dict]:
"""Alle Versions-Drafts fuer ein (drucksache, news_url)-Paar, neueste oben."""
from .config import settings
path = db_path or settings.db_path
if not Path(path).exists():
return []
conn = sqlite3.connect(str(path))
try:
rows = conn.execute(
"""SELECT id, drucksache, bundesland, news_url, news_titel,
titel, body, model, created_at
FROM presse_drafts
WHERE drucksache=? AND news_url=?
ORDER BY id DESC""",
(drucksache, news_url),
).fetchall()
finally:
conn.close()
return [
{
"id": r[0], "drucksache": r[1], "bundesland": r[2],
"news_url": r[3], "news_titel": r[4],
"titel": r[5], "body": r[6], "model": r[7],
"created_at": r[8],
}
for r in rows
]
def get_draft(
draft_id: int,
db_path: Optional[Path] = None,

View File

@ -161,46 +161,81 @@
<div class="at-controls">
<label for="at-days">Zeitfenster:</label>
<select id="at-days" onchange="loadThemen()">
<select id="at-days" onchange="loadActiveTab()">
<option value="3">3 Tage</option>
<option value="7" selected>7 Tage</option>
<option value="14">14 Tage</option>
<option value="30">30 Tage</option>
</select>
<label for="at-topk">Top-N News:</label>
<input type="number" id="at-topk" value="15" min="3" max="50" style="width:60px;" onchange="loadThemen()" />
<input type="number" id="at-topk" value="20" min="3" max="50" style="width:60px;" onchange="loadActiveTab()" />
<label for="at-minsim">Min. Similarity:</label>
<select id="at-minsim" onchange="loadThemen()">
<select id="at-minsim" onchange="loadActiveTab()">
<option value="0.30">0.30 (locker)</option>
<option value="0.40" selected>0.40 (default)</option>
<option value="0.50">0.50 (streng)</option>
<option value="0.40">0.40</option>
<option value="0.50" selected>0.50 (default)</option>
</select>
<button onclick="loadThemen()">Aktualisieren</button>
<label style="display:inline-flex;align-items:center;gap:5px;cursor:pointer;">
<input type="checkbox" id="at-only-relevant" checked onchange="loadActiveTab()" />
Nur GWÖ-relevant
</label>
<button onclick="loadActiveTab()">Aktualisieren</button>
</div>
<!-- News-Volumen-Chart -->
<h3 style="font-family:var(--font-display);font-size:14px;color:var(--ecg-teal);margin:1.5rem 0 0.5rem;">
News-Volumen pro Quelle (letzte 30 Tage)
</h3>
<div class="matrix-wrap" style="background:var(--ecg-card-bg);border:1px solid var(--ecg-border);border-radius:4px;padding:14px;">
<canvas id="at-zeitreihe-chart" style="max-height:280px;"></canvas>
<!-- Tabs -->
<div class="auswert-tabs" role="tablist" style="display:flex;gap:4px;margin:1.5rem 0 1rem;border-bottom:2px solid var(--ecg-border);padding-bottom:0;">
<button class="at-tab active" role="tab" data-tab="news" onclick="switchAtTab('news', this)" style="font-family:var(--font-mono);font-size:11px;text-transform:uppercase;letter-spacing:0.06em;padding:6px 14px;border:none;background:none;cursor:pointer;color:var(--ecg-teal);opacity:1;border-bottom:2px solid var(--ecg-teal);margin-bottom:-2px;">News × Anträge</button>
<button class="at-tab" role="tab" data-tab="cluster" onclick="switchAtTab('cluster', this)" style="font-family:var(--font-mono);font-size:11px;text-transform:uppercase;letter-spacing:0.06em;padding:6px 14px;border:none;background:none;cursor:pointer;color:var(--ecg-dark);opacity:0.55;border-bottom:2px solid transparent;margin-bottom:-2px;">Themen-Cluster</button>
<button class="at-tab" role="tab" data-tab="antraege" onclick="switchAtTab('antraege', this)" style="font-family:var(--font-mono);font-size:11px;text-transform:uppercase;letter-spacing:0.06em;padding:6px 14px;border:none;background:none;cursor:pointer;color:var(--ecg-dark);opacity:0.55;border-bottom:2px solid transparent;margin-bottom:-2px;">GWÖ-Top-Anträge</button>
<button class="at-tab" role="tab" data-tab="zeitreihe" onclick="switchAtTab('zeitreihe', this)" style="font-family:var(--font-mono);font-size:11px;text-transform:uppercase;letter-spacing:0.06em;padding:6px 14px;border:none;background:none;cursor:pointer;color:var(--ecg-dark);opacity:0.55;border-bottom:2px solid transparent;margin-bottom:-2px;">News-Volumen</button>
<button class="at-tab" role="tab" data-tab="drafts" onclick="switchAtTab('drafts', this)" style="font-family:var(--font-mono);font-size:11px;text-transform:uppercase;letter-spacing:0.06em;padding:6px 14px;border:none;background:none;cursor:pointer;color:var(--ecg-dark);opacity:0.55;border-bottom:2px solid transparent;margin-bottom:-2px;">PM-Entwürfe</button>
</div>
<div id="at-zeitreihe-meta" class="meta-line" style="font-family:var(--font-mono);font-size:11px;opacity:0.6;margin:8px 0 1.5rem;"></div>
<!-- Top-Themen + Matches -->
<h3 style="font-family:var(--font-display);font-size:14px;color:var(--ecg-teal);margin:1.5rem 0 0.5rem;">
Top-Themen × passende Anträge
</h3>
<!-- Tab Panels -->
<div id="at-tab-news" class="at-panel">
<div id="at-themen-list">
<div style="font-family:var(--font-mono);font-size:12px;opacity:0.5;">Lade …</div>
</div>
</div>
<!-- Drafts-Liste -->
<h3 style="font-family:var(--font-display);font-size:14px;color:var(--ecg-teal);margin:2rem 0 0.5rem;">
Pressemitteilungs-Entwürfe (zuletzt generiert)
<div id="at-tab-cluster" class="at-panel" style="display:none;">
<p style="font-size:11px;font-family:var(--font-mono);opacity:0.7;margin:0 0 0.5rem;">
News mit ähnlicher Thematik werden gebündelt — z.B. 4 Tagesschau- + 2 Bundestag-Artikel
zur gleichen Debatte ergeben einen Cluster mit gemeinsamem Antrags-Match.
</p>
<div id="at-cluster-list">
<div style="font-family:var(--font-mono);font-size:12px;opacity:0.5;">Lade …</div>
</div>
</div>
<div id="at-tab-antraege" class="at-panel" style="display:none;">
<p style="font-size:11px;font-family:var(--font-mono);opacity:0.7;margin:0 0 0.5rem;">
Reverse-Sicht: <strong>GWÖ-bewertete Anträge mit Score ≥ 8</strong>, sortiert nach
aktueller Pressewirkung. Anträge ohne News-Match werden gezeigt — als Hinweis
„Top-Antrag, aktuell ohne mediale Resonanz".
</p>
<div id="at-antraege-list">
<div style="font-family:var(--font-mono);font-size:12px;opacity:0.5;">Lade …</div>
</div>
</div>
<div id="at-tab-zeitreihe" class="at-panel" style="display:none;">
<h3 style="font-family:var(--font-display);font-size:14px;color:var(--ecg-teal);margin:0 0 0.5rem;">
News-Volumen pro Quelle (letzte 30 Tage)
</h3>
<div class="matrix-wrap" style="background:var(--ecg-card-bg);border:1px solid var(--ecg-border);border-radius:4px;padding:14px;">
<canvas id="at-zeitreihe-chart" style="max-height:320px;"></canvas>
</div>
<div id="at-zeitreihe-meta" class="meta-line" style="font-family:var(--font-mono);font-size:11px;opacity:0.6;margin:8px 0 1.5rem;"></div>
</div>
<div id="at-tab-drafts" class="at-panel" style="display:none;">
<p style="font-size:11px;font-family:var(--font-mono);opacity:0.7;margin:0 0 0.5rem;">
Bisher generierte Pressemitteilungs-Entwürfe (zuletzt generiert oben).
</p>
<div id="at-drafts-list">
<div style="font-family:var(--font-mono);font-size:12px;opacity:0.5;">Lade Entwürfe …</div>
<div style="font-family:var(--font-mono);font-size:12px;opacity:0.5;">Lade …</div>
</div>
</div>
<!-- Modal für Draft-Anzeige -->
@ -217,6 +252,7 @@
{% block body_scripts %}
<script>
let _atZeitreiheChart = null;
let _atActiveTab = 'news';
function atScoreClass(score) {
if (score == null) return '';
@ -225,40 +261,79 @@ function atScoreClass(score) {
return 's-low';
}
function atRelevancePill(rel) {
if (!rel) return '';
const map = {
'high': {bg: 'rgba(136,158,51,0.30)', fg: '#3d4f0a', label: 'GWÖ-relevant'},
'mid': {bg: 'rgba(247,148,29,0.25)', fg: '#7a4a00', label: 'GWÖ-mittel'},
'low': {bg: 'rgba(150,150,150,0.25)', fg: '#555', label: 'schwach'},
'none': {bg: 'rgba(150,150,150,0.15)', fg: '#888', label: 'kein Match'},
};
const s = map[rel.level] || map.none;
return `<span style="display:inline-block;padding:2px 9px;border-radius:11px;font-family:var(--font-mono);font-size:10px;font-weight:700;background:${s.bg};color:${s.fg};margin-right:6px;">${s.label} · ${rel.score}</span>`;
}
function atFmtDatum(s) {
if (!s || s.length < 10) return '';
return s.slice(0, 10);
}
function switchAtTab(name, btn) {
_atActiveTab = name;
document.querySelectorAll('.at-tab').forEach(b => {
b.style.color = 'var(--ecg-dark)';
b.style.opacity = '0.55';
b.style.borderBottomColor = 'transparent';
});
btn.style.color = 'var(--ecg-teal)';
btn.style.opacity = '1';
btn.style.borderBottomColor = 'var(--ecg-teal)';
document.querySelectorAll('.at-panel').forEach(p => p.style.display = 'none');
document.getElementById('at-tab-' + name).style.display = 'block';
loadActiveTab();
}
function loadActiveTab() {
switch (_atActiveTab) {
case 'news': loadThemen(); break;
case 'cluster': loadCluster(); break;
case 'antraege': loadAntraege(); break;
case 'zeitreihe': loadZeitreihe(); break;
case 'drafts': loadDrafts(); break;
}
}
async function loadThemen() {
const days = document.getElementById('at-days').value;
const topk = document.getElementById('at-topk').value;
const minsim = document.getElementById('at-minsim').value;
const onlyRel = document.getElementById('at-only-relevant').checked ? '1' : '0';
const list = document.getElementById('at-themen-list');
list.innerHTML = '<div style="font-family:var(--font-mono);font-size:12px;opacity:0.5;">Lade …</div>';
try {
const r = await fetch(`/api/aktuelle-themen/top?days=${days}&top_k=${topk}&min_similarity=${minsim}&matches_per_news=3`);
const r = await fetch(`/api/aktuelle-themen/top?days=${days}&top_k=${topk}&min_similarity=${minsim}&matches_per_news=3&only_relevant=${onlyRel}`);
const data = await r.json();
if (!data.buckets || !data.buckets.length) {
list.innerHTML = '<div style="font-family:var(--font-mono);font-size:12px;opacity:0.5;">Keine News im Zeitfenster oder noch nicht embedded.</div>';
list.innerHTML = '<div style="font-family:var(--font-mono);font-size:12px;opacity:0.5;">Keine News im Zeitfenster ' + (onlyRel === '1' ? '(Filter: nur GWÖ-relevant aktiv — versuch ohne Filter)' : 'oder noch nicht embedded') + '.</div>';
return;
}
let html = '';
for (const b of data.buckets) {
const n = b.news;
const rel = b.relevance;
const tags = (n.tags || []).map(t => `<span class="at-tag">${t}</span>`).join('');
html += '<div class="at-news-card">';
html += `<div class="at-news-head">${atFmtDatum(n.datum)} · ${n.source}${n.ressort ? ' / ' + n.ressort : ''}</div>`;
html += `<div class="at-news-head">${atRelevancePill(rel)}${atFmtDatum(n.datum)} · ${n.source}${n.ressort ? ' / ' + n.ressort : ''}</div>`;
html += `<h4 class="at-news-title"><a href="${n.url}" target="_blank" rel="noopener">${n.titel}</a></h4>`;
if (n.summary) html += `<div class="at-news-summary">${n.summary}</div>`;
if (tags) html += `<div class="at-news-tags">${tags}</div>`;
if (b.matches && b.matches.length) {
html += '<div class="at-matches">';
html += '<div class="at-matches-label">Passende Anträge:</div>';
html += `<div class="at-matches-label">Warum: ${rel.reason}</div>`;
for (const m of b.matches) {
const sc = m.gwoe_score != null ? m.gwoe_score.toFixed(1) : '—';
const fr = (m.fraktionen || []).join(', ');
@ -283,6 +358,103 @@ async function loadThemen() {
}
}
async function loadCluster() {
const days = document.getElementById('at-days').value;
const list = document.getElementById('at-cluster-list');
list.innerHTML = '<div style="font-family:var(--font-mono);font-size:12px;opacity:0.5;">Lade …</div>';
try {
const r = await fetch(`/api/aktuelle-themen/cluster?days=${days}&intra_threshold=0.55&min_cluster_size=2&antrag_threshold=0.4`);
const data = await r.json();
if (!data.clusters || !data.clusters.length) {
list.innerHTML = '<div style="font-family:var(--font-mono);font-size:12px;opacity:0.5;">Keine Themen-Cluster im Zeitfenster (jeder Cluster braucht mind. 2 inhaltlich ähnliche News).</div>';
return;
}
let html = '';
for (const c of data.clusters) {
html += '<div class="at-news-card">';
const tagPills = (c.top_tags || []).map(t => `<span class="at-tag">${t}</span>`).join('');
html += `<div class="at-news-head">Cluster · ${c.size} News · ${tagPills || '<em>keine Tags</em>'}</div>`;
html += '<div style="margin:6px 0;">';
for (const m of c.members.slice(0, 6)) {
html += `<div style="font-size:12px;line-height:1.5;"><span style="opacity:0.6;font-family:var(--font-mono);font-size:10px;">[${m.source}]</span> <a href="${m.url}" target="_blank" rel="noopener" style="color:var(--ecg-teal);text-decoration:none;">${m.titel}</a></div>`;
}
if (c.members.length > 6) {
html += `<div style="font-size:11px;opacity:0.6;">… und ${c.members.length - 6} weitere</div>`;
}
html += '</div>';
if (c.antrag_matches && c.antrag_matches.length) {
html += '<div class="at-matches"><div class="at-matches-label">Passende Anträge:</div>';
for (const m of c.antrag_matches) {
const sc = m.gwoe_score != null ? m.gwoe_score.toFixed(1) : '—';
const fr = (m.fraktionen || []).join(', ');
const firstNewsUrl = c.members[0].url;
html += '<div class="at-match">';
html += `<span class="at-score-pill ${atScoreClass(m.gwoe_score)}">${sc}</span>`;
html += `<a href="/antrag/${encodeURIComponent(m.drucksache)}" style="color:var(--ecg-teal);text-decoration:none;font-weight:500;">${m.drucksache}</a>`;
html += `<span style="opacity:0.85;">${m.title || ''}</span>`;
if (fr) html += `<span style="opacity:0.6;font-size:11px;">— ${fr}</span>`;
html += `<span class="at-sim">sim ${m.similarity}</span>`;
html += `<button class="at-presse-btn" onclick="generatePresse('${m.drucksache.replace(/'/g, "\\'")}', '${encodeURIComponent(firstNewsUrl)}', this)">PM-Vorschlag</button>`;
html += '</div>';
}
html += '</div>';
} else {
html += '<div class="at-matches"><div class="at-matches-label">Kein Antrag-Match — Themen-Cluster ohne GWÖ-Anker.</div></div>';
}
html += '</div>';
}
list.innerHTML = html;
} catch (e) {
list.innerHTML = `<div style="color:#c00;font-family:var(--font-mono);font-size:12px;">Fehler: ${e}</div>`;
}
}
async function loadAntraege() {
const days = document.getElementById('at-days').value;
const list = document.getElementById('at-antraege-list');
list.innerHTML = '<div style="font-family:var(--font-mono);font-size:12px;opacity:0.5;">Lade …</div>';
try {
const r = await fetch(`/api/aktuelle-themen/top-antraege?min_gwoe_score=8.0&days=${days}&min_similarity=0.4&top_k_news=5`);
const data = await r.json();
if (!data.antraege || !data.antraege.length) {
list.innerHTML = '<div style="font-family:var(--font-mono);font-size:12px;opacity:0.5;">Keine GWÖ-≥8-Anträge in der DB.</div>';
return;
}
let html = '';
for (const a of data.antraege) {
const sc = a.gwoe_score != null ? a.gwoe_score.toFixed(1) : '—';
const fr = (a.fraktionen || []).join(', ');
const newsBadge = a.news_count > 0
? `<span style="display:inline-block;padding:2px 9px;border-radius:11px;font-family:var(--font-mono);font-size:10px;font-weight:700;background:rgba(136,158,51,0.30);color:#3d4f0a;margin-right:6px;">${a.news_count} aktuelle News</span>`
: `<span style="display:inline-block;padding:2px 9px;border-radius:11px;font-family:var(--font-mono);font-size:10px;font-weight:700;background:rgba(150,150,150,0.20);color:#777;margin-right:6px;">keine News</span>`;
html += '<div class="at-news-card">';
html += `<div class="at-news-head">${newsBadge}<span class="at-score-pill ${atScoreClass(a.gwoe_score)}" style="margin-right:6px;">${sc}</span>${a.bundesland} · ${atFmtDatum(a.datum)}${fr ? ' · ' + fr : ''}</div>`;
html += `<h4 class="at-news-title"><a href="/antrag/${encodeURIComponent(a.drucksache)}" style="color:var(--ecg-teal);text-decoration:none;">${a.drucksache} — ${a.title || ''}</a></h4>`;
if (a.antrag_zusammenfassung) {
html += `<div class="at-news-summary">${a.antrag_zusammenfassung.slice(0, 200)}${a.antrag_zusammenfassung.length > 200 ? '…' : ''}</div>`;
}
if (a.top_news && a.top_news.length) {
html += '<div class="at-matches"><div class="at-matches-label">Aktuelle News:</div>';
for (const n of a.top_news) {
html += '<div class="at-match">';
html += `<span style="font-family:var(--font-mono);font-size:10px;opacity:0.6;">[${n.source}]</span>`;
html += `<a href="${n.url}" target="_blank" rel="noopener" style="color:var(--ecg-teal);text-decoration:none;">${n.titel}</a>`;
html += `<span class="at-sim">sim ${n.similarity}</span>`;
html += `<button class="at-presse-btn" onclick="generatePresse('${a.drucksache.replace(/'/g, "\\'")}', '${encodeURIComponent(n.url)}', this)">PM-Vorschlag</button>`;
html += '</div>';
}
html += '</div>';
} else {
html += '<div class="at-matches"><div class="at-matches-label">Keine aktuellen News passen — Top-Antrag wartet auf passende mediale Welle.</div></div>';
}
html += '</div>';
}
list.innerHTML = html;
} catch (e) {
list.innerHTML = `<div style="color:#c00;font-family:var(--font-mono);font-size:12px;">Fehler: ${e}</div>`;
}
}
async function loadZeitreihe() {
const meta = document.getElementById('at-zeitreihe-meta');
try {
@ -397,29 +569,79 @@ async function regeneratePresse(drucksache, newsUrlEnc) {
}
}
function showDraftFromData(d) {
async function showDraftFromData(d) {
const backdrop = document.getElementById('at-modal-backdrop');
document.getElementById('at-modal-title').textContent = d.titel;
const isExisting = d._was_existing === true;
const newsUrlEnc = encodeURIComponent(d.news_url);
const dsEnc = d.drucksache.replace(/'/g, "\\'");
const existingNote = isExisting
? `<div style="font-family:var(--font-mono);font-size:10px;opacity:0.7;background:rgba(247,148,29,0.18);padding:6px 8px;border-radius:3px;margin-bottom:8px;">
// Versionen abfragen — falls >1, Dropdown anzeigen
let versionsHtml = '';
try {
const vr = await fetch(`/api/aktuelle-themen/drafts-versions?drucksache=${encodeURIComponent(d.drucksache)}&news_url=${newsUrlEnc}`);
const vd = await vr.json();
if (vd.versions && vd.versions.length > 1) {
versionsHtml = '<select onchange="loadVersion(this.value)" style="margin-left:8px;font-family:var(--font-mono);font-size:10px;padding:2px 6px;">';
for (const v of vd.versions) {
const sel = (v.id === d.id) ? ' selected' : '';
versionsHtml += `<option value="${v.id}"${sel}>v${v.id} — ${(v.created_at || '').slice(0,16)} (${v.model})</option>`;
}
versionsHtml += '</select>';
}
} catch (e) { /* silent */ }
const banner = isExisting
? `<div style="font-family:var(--font-mono);font-size:10px;opacity:0.85;background:rgba(247,148,29,0.18);padding:6px 8px;border-radius:3px;margin-bottom:8px;">
Bestehender Entwurf vom ${(d.created_at || '').slice(0,10)} · Modell: ${d.model || '—'} · kein LLM-Call
<button type="button" onclick="regeneratePresse('${dsEnc}', '${newsUrlEnc}')" style="margin-left:8px;font-family:var(--font-mono);font-size:10px;padding:2px 8px;border:1px solid var(--ecg-border);border-radius:3px;background:var(--ecg-card-bg);cursor:pointer;">Neu generieren</button>
${versionsHtml}
</div>`
: `<div style="font-family:var(--font-mono);font-size:10px;opacity:0.7;background:rgba(136,158,51,0.18);padding:6px 8px;border-radius:3px;margin-bottom:8px;">
Neu generiert · Modell: ${d.model || '—'}
: `<div style="font-family:var(--font-mono);font-size:10px;opacity:0.85;background:rgba(136,158,51,0.18);padding:6px 8px;border-radius:3px;margin-bottom:8px;">
Neu generiert · Modell: ${d.model || '—'} ${versionsHtml}
</div>`;
// Action-Buttons: Mail + Clipboard
const mailtoBody = encodeURIComponent(d.body + '\n\n— Bezug: ' + d.news_titel + ' (' + d.news_url + ')');
const mailtoSubject = encodeURIComponent(d.titel);
const mailto = `mailto:?subject=${mailtoSubject}&body=${mailtoBody}`;
const isMailtoTooLong = mailto.length > 1900;
const actionRow = `<div style="display:flex;gap:8px;margin:10px 0 12px;">
${isMailtoTooLong ? '<span style="font-family:var(--font-mono);font-size:10px;opacity:0.6;">PM zu lang für Mail-Link — Clipboard nutzen.</span>'
: `<a href="${mailto}" style="font-family:var(--font-mono);font-size:11px;padding:5px 12px;border:1px solid var(--ecg-border);border-radius:3px;background:var(--ecg-card-bg);color:var(--ecg-dark);text-decoration:none;">📧 Per Mail versenden</a>`}
<button type="button" onclick="copyDraftToClipboard(this, ${JSON.stringify(d.titel).replace(/"/g, '&quot;')}, ${JSON.stringify(d.body).replace(/"/g, '&quot;')})" style="font-family:var(--font-mono);font-size:11px;padding:5px 12px;border:1px solid var(--ecg-border);border-radius:3px;background:var(--ecg-card-bg);color:var(--ecg-dark);cursor:pointer;">📋 In Zwischenablage kopieren</button>
</div>`;
document.getElementById('at-modal-body').innerHTML =
existingNote +
banner +
`<div style="font-family:var(--font-mono);font-size:11px;opacity:0.6;margin-bottom:10px;">
DS ${d.drucksache} (${d.bundesland}) · Bezug zu: <a href="${d.news_url}" target="_blank" rel="noopener" style="color:var(--ecg-teal);">${d.news_titel}</a>
</div>
<div style="white-space:pre-wrap;">${d.body.replace(/</g, '&lt;')}</div>`;
</div>` +
actionRow +
`<div style="white-space:pre-wrap;font-size:13px;line-height:1.5;">${d.body.replace(/</g, '&lt;')}</div>`;
backdrop.style.display = 'flex';
}
async function loadVersion(draftId) {
try {
const r = await fetch(`/api/aktuelle-themen/drafts/${draftId}`);
const d = await r.json();
showDraftFromData(d);
} catch (e) { alert('Fehler: ' + e); }
}
async function copyDraftToClipboard(btn, titel, body) {
const text = titel + '\n\n' + body;
try {
await navigator.clipboard.writeText(text);
const orig = btn.textContent;
btn.textContent = '✓ kopiert';
setTimeout(() => { btn.textContent = orig; }, 1800);
} catch (e) {
alert('Clipboard-Fehler: ' + e + '\n\nText:\n' + text);
}
}
async function showDraft(id) {
try {
const r = await fetch(`/api/aktuelle-themen/drafts/${id}`);
@ -440,9 +662,7 @@ document.addEventListener('keydown', (e) => {
if (e.key === 'Escape') document.getElementById('at-modal-backdrop').style.display = 'none';
});
// Init
loadZeitreihe();
loadThemen();
loadDrafts();
// Init: News-Tab als Default
loadActiveTab();
</script>
{% endblock %}

View File

@ -205,11 +205,71 @@ def find_news_for_antrag(
return scored[:top_k]
def compute_relevance(matches: list[dict]) -> dict:
"""Aggregiere Relevanz-Score + Begruendung aus einer Match-Liste.
Score = max(antrag.gwoe_score × similarity) ueber alle Matches.
Domain: 0..10 (gleicht GWÖ-Score-Skala). Level-Schwellen:
- score >= 4.0 "high" (mind. ein starkes GWÖ-Match)
- score >= 2.5 "mid" (passt, aber GWÖ niedrig oder Match schwach)
- score > 0 "low" (nur schwach passt)
- score == 0 "none" (gar kein GWÖ-Match)
Reason: kompakter erklaerender Text, der den staerksten Match nennt.
Kein LLM-Call nur Daten-Synthese.
"""
if not matches:
return {
"score": 0.0,
"level": "none",
"reason": "Keine GWÖ-bewerteten Anträge passen zu dieser News.",
}
# Score-Beitraege berechnen
contribs = []
for m in matches:
gw = m.get("gwoe_score") or 0.0
sim = m.get("similarity") or 0.0
contribs.append((gw * sim, m))
contribs.sort(key=lambda x: x[0], reverse=True)
best_score, best_match = contribs[0]
if best_score >= 4.0:
level = "high"
elif best_score >= 2.5:
level = "mid"
elif best_score > 0:
level = "low"
else:
level = "none"
# Begruendung
fr = ", ".join(best_match.get("fraktionen") or [])
fr_clause = f" ({fr})" if fr else ""
titel = (best_match.get("title") or "").strip()
if len(titel) > 70:
titel = titel[:67] + ""
reason = (
f"GWÖ-{best_match.get('gwoe_score')}/10-Antrag „{titel}" + ("" if titel.endswith("") else "") + ""
f"{fr_clause} passt mit Similarity {best_match.get('similarity')}"
)
if len(matches) > 1:
reason += f"{len(matches) - 1} weitere(r) Match(es)."
else:
reason += "."
return {
"score": round(best_score, 2),
"level": level,
"reason": reason,
}
def aggregate_top_themen(
days_window: int = 7,
top_k: int = 10,
min_similarity: float = 0.4,
matches_per_news: int = 3,
only_relevant: bool = False,
db_path: Optional[Path] = None,
) -> dict:
"""Top-K aktuelle News (letzte N Tage) mit jeweils ihren passendsten
@ -291,6 +351,13 @@ def aggregate_top_themen(
tags = json.loads(n["tags"]) if n["tags"] else []
except (json.JSONDecodeError, TypeError):
tags = []
top_matches = scored[:matches_per_news]
relevance = compute_relevance(top_matches)
# Pre-Filter: optional alle non-high/-mid raus
if only_relevant and relevance["level"] not in ("high", "mid"):
continue
buckets.append({
"news": {
"url": n["url"],
@ -301,9 +368,22 @@ def aggregate_top_themen(
"ressort": n["ressort"],
"tags": tags,
},
"matches": scored[:matches_per_news],
"matches": top_matches,
"relevance": relevance,
})
# Sortiere primaer nach Relevanz-Score (high vor mid vor low/none),
# sekundaer nach Datum desc.
level_rank = {"high": 3, "mid": 2, "low": 1, "none": 0}
buckets.sort(
key=lambda b: (
level_rank.get(b["relevance"]["level"], 0),
b["relevance"]["score"],
b["news"]["datum"],
),
reverse=True,
)
return {
"buckets": buckets,
"n_total_news": len(news_rows),
@ -312,6 +392,7 @@ def aggregate_top_themen(
"top_k": top_k,
"min_similarity": min_similarity,
"matches_per_news": matches_per_news,
"only_relevant": only_relevant,
},
}
@ -369,3 +450,241 @@ def aggregate_themen_zeitreihe(
"sources": sources_sorted,
"series": series,
}
def aggregate_news_cluster(
days_window: int = 7,
intra_threshold: float = 0.55,
antrag_threshold: float = 0.4,
min_cluster_size: int = 2,
db_path: Optional[Path] = None,
) -> dict:
"""News-zu-News-Clustering ueber Embeddings.
Greedy: jede ungeclusterte News wird Cluster-Seed, alle anderen mit
cosine >= ``intra_threshold`` werden eingeschlossen. Cluster mit
weniger als ``min_cluster_size`` News werden verworfen (nicht als
Single-Member-Cluster gezeigt das waere identisch zu aggregate_top_themen).
Pro Cluster: zentralster Antrag-Match aus den GWÖ-bewerteten Antraegen.
"""
from .config import settings
from . import embeddings as emb
path = db_path or settings.db_path
if not Path(path).exists():
return {"clusters": [], "n_total_news": 0}
cutoff = datetime.now(timezone.utc).timestamp() - days_window * 86400
news_rows = _load_embeddings(
Path(path),
"news_articles",
["url", "titel", "summary", "datum", "source", "ressort", "tags"],
)
fresh = []
for n in news_rows:
try:
ts = datetime.fromisoformat(n["datum"].replace("Z", "+00:00")).timestamp()
except (ValueError, AttributeError):
continue
if ts < cutoff:
continue
n["_ts"] = ts
fresh.append(n)
fresh.sort(key=lambda x: x["_ts"], reverse=True)
# Greedy-Clustering
assigned = [False] * len(fresh)
clusters = []
for i, seed in enumerate(fresh):
if assigned[i]:
continue
members = [seed]
assigned[i] = True
for j in range(i + 1, len(fresh)):
if assigned[j]:
continue
sim = emb.cosine_similarity(seed["_vec"], fresh[j]["_vec"])
if sim >= intra_threshold:
members.append(fresh[j])
assigned[j] = True
if len(members) >= min_cluster_size:
clusters.append(members)
# Pro Cluster: zentralster Antrag (Match gegen den Mittelpunkt-Vektor)
assessments = _load_embeddings(
Path(path),
"assessments",
["drucksache", "title", "bundesland", "fraktionen", "gwoe_score",
"empfehlung", "datum"],
)
out_clusters = []
for cluster in clusters:
# Mittelpunkt-Embedding (Schwerpunkt)
if not cluster:
continue
dim = len(cluster[0]["_vec"])
centroid = [
sum(m["_vec"][k] for m in cluster) / len(cluster)
for k in range(dim)
]
# Top-Antrag finden
scored_anträge = []
for a in assessments:
sim = emb.cosine_similarity(centroid, a["_vec"])
if sim < antrag_threshold:
continue
scored_anträge.append({
"drucksache": a["drucksache"],
"title": a["title"],
"bundesland": a["bundesland"],
"fraktionen": json.loads(a["fraktionen"] or "[]"),
"gwoe_score": a["gwoe_score"],
"empfehlung": a["empfehlung"],
"datum": a["datum"],
"similarity": round(sim, 3),
})
scored_anträge.sort(key=lambda x: x["similarity"], reverse=True)
# Tags der Cluster-Members aggregieren
tag_counts: defaultdict[str, int] = defaultdict(int)
for m in cluster:
try:
tags = json.loads(m["tags"]) if m["tags"] else []
except (json.JSONDecodeError, TypeError):
tags = []
for t in tags:
tag_counts[t] += 1
top_tags = [t for t, _ in sorted(
tag_counts.items(), key=lambda x: x[1], reverse=True,
)[:5]]
out_clusters.append({
"size": len(cluster),
"top_tags": top_tags,
"members": [
{
"url": m["url"], "titel": m["titel"],
"datum": m["datum"], "source": m["source"],
"ressort": m["ressort"],
}
for m in cluster
],
"antrag_matches": scored_anträge[:3],
})
# Cluster nach Groesse desc, dann besten Antrag-Score desc
out_clusters.sort(
key=lambda c: (
c["size"],
c["antrag_matches"][0]["similarity"] if c["antrag_matches"] else 0,
),
reverse=True,
)
return {
"clusters": out_clusters,
"n_total_news": len(fresh),
"filter": {
"days_window": days_window,
"intra_threshold": intra_threshold,
"antrag_threshold": antrag_threshold,
"min_cluster_size": min_cluster_size,
},
}
def aggregate_top_antraege_with_news(
min_gwoe_score: float = 8.0,
days_window: int = 14,
min_similarity: float = 0.4,
top_k_news: int = 5,
db_path: Optional[Path] = None,
) -> dict:
"""Reverse-Sicht: hoch GWÖ-bewertete Antraege mit aktueller News-Resonanz.
Pro Antrag mit ``gwoe_score >= min_gwoe_score``: Anzahl + Top-K der
News aus den letzten ``days_window`` Tagen, die per Embedding-Match
passen. Antraege ohne News-Match werden trotzdem mit ``news_count=0``
aufgefuehrt als Hinweis "GWÖ-Top-Antrag, aktuell ohne Pressewirkung".
"""
from .config import settings
from . import embeddings as emb
path = db_path or settings.db_path
if not Path(path).exists():
return {"antraege": []}
cutoff = datetime.now(timezone.utc).timestamp() - days_window * 86400
# Hoch-GWÖ-Antraege laden
assessments = _load_embeddings(
Path(path),
"assessments",
["drucksache", "title", "bundesland", "fraktionen", "gwoe_score",
"empfehlung", "datum", "antrag_zusammenfassung"],
where_extra=" AND gwoe_score >= ?",
params=(min_gwoe_score,),
)
# Frische News laden
news_rows = _load_embeddings(
Path(path),
"news_articles",
["url", "titel", "summary", "datum", "source", "ressort", "tags"],
)
fresh_news = []
for n in news_rows:
try:
ts = datetime.fromisoformat(n["datum"].replace("Z", "+00:00")).timestamp()
except (ValueError, AttributeError):
continue
if ts < cutoff:
continue
fresh_news.append(n)
out = []
for a in assessments:
scored = []
for n in fresh_news:
sim = emb.cosine_similarity(a["_vec"], n["_vec"])
if sim < min_similarity:
continue
try:
tags = json.loads(n["tags"]) if n["tags"] else []
except (json.JSONDecodeError, TypeError):
tags = []
scored.append({
"url": n["url"], "titel": n["titel"],
"summary": n["summary"], "datum": n["datum"],
"source": n["source"], "ressort": n["ressort"],
"tags": tags,
"similarity": round(sim, 3),
})
scored.sort(key=lambda x: x["similarity"], reverse=True)
out.append({
"drucksache": a["drucksache"],
"title": a["title"],
"bundesland": a["bundesland"],
"fraktionen": json.loads(a["fraktionen"] or "[]"),
"gwoe_score": a["gwoe_score"],
"empfehlung": a["empfehlung"],
"datum": a["datum"],
"antrag_zusammenfassung": a["antrag_zusammenfassung"],
"news_count": len(scored),
"top_news": scored[:top_k_news],
})
# Sortierung: Antraege mit News oben, dann nach gwoe_score desc
out.sort(
key=lambda x: (x["news_count"] > 0, x["news_count"], x["gwoe_score"] or 0),
reverse=True,
)
return {
"antraege": out,
"filter": {
"min_gwoe_score": min_gwoe_score,
"days_window": days_window,
"min_similarity": min_similarity,
"top_k_news": top_k_news,
},
}

View File

@ -10,8 +10,11 @@ from unittest.mock import patch
import pytest
from app.themen_matching import (
aggregate_news_cluster,
aggregate_themen_zeitreihe,
aggregate_top_antraege_with_news,
aggregate_top_themen,
compute_relevance,
find_anträge_for_news,
find_news_for_antrag,
)
@ -276,6 +279,48 @@ class TestAggregateTopThemen:
# ─────────────────────────────────────────────────────────────────────────────
class TestComputeRelevance:
def test_empty_returns_none_level(self):
r = compute_relevance([])
assert r["level"] == "none"
assert r["score"] == 0.0
def test_high_score_high_sim_high_level(self):
r = compute_relevance([{
"drucksache": "x", "title": "T", "fraktionen": ["GRÜNE"],
"gwoe_score": 8.0, "similarity": 0.6,
}])
# 8.0 × 0.6 = 4.8 → high
assert r["level"] == "high"
assert r["score"] == 4.8
assert "GWÖ-8.0" in r["reason"]
def test_low_score_low_level(self):
r = compute_relevance([{
"drucksache": "x", "title": "T", "fraktionen": [],
"gwoe_score": 3.0, "similarity": 0.5,
}])
# 3.0 × 0.5 = 1.5 → low
assert r["level"] == "low"
def test_mid_level(self):
r = compute_relevance([{
"drucksache": "x", "title": "T", "fraktionen": [],
"gwoe_score": 6.0, "similarity": 0.5,
}])
# 6.0 × 0.5 = 3.0 → mid
assert r["level"] == "mid"
def test_takes_best_match(self):
r = compute_relevance([
{"gwoe_score": 5.0, "similarity": 0.4, "title": "Schwach", "fraktionen": []},
{"gwoe_score": 9.0, "similarity": 0.55, "title": "Stark", "fraktionen": []},
])
# max(2.0, 4.95) = 4.95 → high
assert r["score"] == 4.95
assert "Stark" in r["reason"]
class TestAggregateZeitreihe:
def test_structure(self, populated_db):
result = aggregate_themen_zeitreihe(db_path=populated_db, days_window=7)
@ -295,3 +340,115 @@ class TestAggregateZeitreihe:
result = aggregate_themen_zeitreihe(db_path=populated_db, days_window=7)
for source in result["sources"]:
assert len(result["series"][source]) == len(result["buckets"])
# ─────────────────────────────────────────────────────────────────────────────
# aggregate_top_themen mit Relevance + only_relevant Filter
# ─────────────────────────────────────────────────────────────────────────────
class TestRelevanceInTopThemen:
def test_each_bucket_has_relevance(self, populated_db):
result = aggregate_top_themen(db_path=populated_db, min_similarity=0.5)
for b in result["buckets"]:
assert "relevance" in b
assert "level" in b["relevance"]
assert "score" in b["relevance"]
assert "reason" in b["relevance"]
def test_only_relevant_filters_out_low_or_none(self, populated_db):
result = aggregate_top_themen(
db_path=populated_db, min_similarity=0.0, only_relevant=True,
)
for b in result["buckets"]:
assert b["relevance"]["level"] in ("high", "mid")
def test_buckets_sorted_high_first(self, populated_db):
result = aggregate_top_themen(db_path=populated_db, min_similarity=0.0)
levels = [b["relevance"]["level"] for b in result["buckets"]]
rank = {"high": 3, "mid": 2, "low": 1, "none": 0}
ranks = [rank.get(l, 0) for l in levels]
# Reihenfolge muss monoton fallen
assert ranks == sorted(ranks, reverse=True)
# ─────────────────────────────────────────────────────────────────────────────
# aggregate_news_cluster
# ─────────────────────────────────────────────────────────────────────────────
class TestNewsCluster:
def test_structure(self, populated_db):
# Mit hoeherem intra_threshold und kleinerem min_cluster_size
# auf der Test-DB: orthogonale News bilden keine Cluster
result = aggregate_news_cluster(
db_path=populated_db, min_cluster_size=2,
intra_threshold=0.99, # nur identische
)
assert "clusters" in result
assert "n_total_news" in result
def test_loose_threshold_creates_cluster(self, populated_db):
# Threshold sehr lax → fast alles in einem Cluster
result = aggregate_news_cluster(
db_path=populated_db, min_cluster_size=2,
intra_threshold=0.0, days_window=30,
)
# Mindestens ein Cluster mit >=2 Members
assert len(result["clusters"]) >= 0
for c in result["clusters"]:
assert c["size"] >= 2
assert "members" in c
assert "antrag_matches" in c
assert "top_tags" in c
def test_min_cluster_size_filter(self, populated_db):
result = aggregate_news_cluster(
db_path=populated_db, min_cluster_size=5,
)
# Nur 3 News in der DB → nichts erreicht size>=5
assert result["clusters"] == []
# ─────────────────────────────────────────────────────────────────────────────
# aggregate_top_antraege_with_news
# ─────────────────────────────────────────────────────────────────────────────
class TestTopAntraegeWithNews:
def test_only_high_gwoe(self, populated_db):
"""Nur Antraege mit gwoe_score >= min_gwoe_score auftauchen."""
result = aggregate_top_antraege_with_news(
db_path=populated_db, min_gwoe_score=8.0,
)
for a in result["antraege"]:
assert a["gwoe_score"] >= 8.0
# 18/A hat 8.0, 18/B hat 7.0, 18/C hat 5.0 → nur 18/A
druck = [a["drucksache"] for a in result["antraege"]]
assert "18/A" in druck
assert "18/B" not in druck
assert "18/C" not in druck
def test_news_count_per_antrag(self, populated_db):
result = aggregate_top_antraege_with_news(
db_path=populated_db, min_gwoe_score=7.0, min_similarity=0.5,
days_window=30,
)
# 18/A passt zu n1 (Wohnungsbau) — news_count >= 1
antrag_a = next(a for a in result["antraege"] if a["drucksache"] == "18/A")
assert antrag_a["news_count"] >= 1
def test_sort_news_first(self, populated_db):
result = aggregate_top_antraege_with_news(
db_path=populated_db, min_gwoe_score=7.0, min_similarity=0.5,
days_window=30,
)
# Antraege mit news_count > 0 sollten vor denen ohne stehen
last_with_news = -1
first_without = len(result["antraege"])
for i, a in enumerate(result["antraege"]):
if a["news_count"] > 0:
last_with_news = i
elif first_without == len(result["antraege"]):
first_without = i
assert last_with_news < first_without