feat(#170 followup 2): Pre-Filter, Cluster, Antrags-Initiative, PM-Versionierung, Mail-Link
User-Feedback: Aktuelle-Themen-Dashboard war "Detective-Modus" — durch viele News scrollen, Match-Stärke selbst interpretieren. Komplett-Refactor zur kuratierten Sicht mit Tabs. **1. Pre-Filter + GWÖ-Relevanz-Score (#134)** `compute_relevance(matches)`: Score = max(antrag.gwoe_score × similarity). Level: high (≥4.0) / mid (≥2.5) / low (>0) / none. Pro News in der UI ein farbiger Pill (gruen/orange/grau) + Reason-Text: "GWÖ-9.0/10-Antrag „Klimaschutzgesetz" (GRÜNE) passt mit Similarity 0.55." Default-Filter "Nur GWÖ-relevant" aktiv (only_relevant=true) — zeigt nur high/mid News, blendet Rauschen aus. Toggle-Checkbox. `/api/aktuelle-themen/top` neuer Param `only_relevant=true|false`. **2. PM-Versionierung im Modal (#135)** `list_drafts_for(drucksache, news_url)`: alle Versionen, neueste oben. Endpoint `/api/aktuelle-themen/drafts-versions`. Modal zeigt Dropdown wenn >1 Version, Switch ohne LLM-Call. Force-Regen bleibt als Button im "bestehender Entwurf"-Banner. **3. News-Cluster-View (#136)** `aggregate_news_cluster(intra_threshold=0.55, min_cluster_size=2)`: Greedy-Embedding-Cluster + zentralster Antrags-Match per Centroid- Vektor. Zweiter Tab "Themen-Cluster": 5 News über "Pflege" → 1 Cluster mit gemeinsamem Antrag-Vorschlag, statt 5 separate Cards. Endpoint: `/api/aktuelle-themen/cluster`. **4. Mail-Direkt-Link + Clipboard (#137)** Im PM-Modal zwei Buttons: - "📧 Per Mail versenden" (mailto: mit subject + body, ~1900 Char Limit) - "📋 In Zwischenablage kopieren" (navigator.clipboard.writeText) - Bei langem PM (>1900 Char): mailto-Link wird ausgegraut, Hinweis "PM zu lang für Mail-Link — Clipboard nutzen" **5. Antrags-Initiative (#138)** `aggregate_top_antraege_with_news(min_gwoe_score=8.0, days=14)`: Reverse-Sicht — pro Antrag mit GWÖ ≥ 8 die News-Resonanz. Antraege ohne Match werden trotzdem angezeigt mit "keine News"-Pill. Dritter Tab "GWÖ-Top-Anträge". Endpoint `.../top-antraege`. **UI-Restrukturierung:** statt einer langen Scroll-Liste jetzt 5 Tabs mit gemeinsamer Filter-Bar: - News × Anträge (Default, kuratiert via Pre-Filter) - Themen-Cluster (Bündel ähnlicher News) - GWÖ-Top-Anträge (Reverse) - News-Volumen (Chart) - PM-Entwürfe (Drafts-Liste) Default min_similarity 0.40 → 0.50 erhoeht (weniger Rauschen). Tests: 14 neue (compute_relevance × 5, only_relevant + sort × 3, cluster × 3, top_antraege × 3). Suite 1067 gruen. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
2bff943e8a
commit
e27dfc30a2
54
app/main.py
54
app/main.py
@ -2030,14 +2030,55 @@ async def api_aktuelle_themen_top(
|
|||||||
top_k: int = 10,
|
top_k: int = 10,
|
||||||
min_similarity: float = 0.4,
|
min_similarity: float = 0.4,
|
||||||
matches_per_news: int = 3,
|
matches_per_news: int = 3,
|
||||||
|
only_relevant: bool = False,
|
||||||
):
|
):
|
||||||
"""Top-K News der letzten N Tage mit Antrags-Match."""
|
"""Top-K News der letzten N Tage mit Antrags-Match.
|
||||||
|
|
||||||
|
Mit `only_relevant=true` werden News mit Relevance-Level "low" oder
|
||||||
|
"none" rausgefiltert.
|
||||||
|
"""
|
||||||
from .themen_matching import aggregate_top_themen
|
from .themen_matching import aggregate_top_themen
|
||||||
return aggregate_top_themen(
|
return aggregate_top_themen(
|
||||||
days_window=days,
|
days_window=days,
|
||||||
top_k=top_k,
|
top_k=top_k,
|
||||||
min_similarity=min_similarity,
|
min_similarity=min_similarity,
|
||||||
matches_per_news=matches_per_news,
|
matches_per_news=matches_per_news,
|
||||||
|
only_relevant=only_relevant,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/api/aktuelle-themen/cluster")
|
||||||
|
async def api_aktuelle_themen_cluster(
|
||||||
|
days: int = 7,
|
||||||
|
intra_threshold: float = 0.55,
|
||||||
|
antrag_threshold: float = 0.4,
|
||||||
|
min_cluster_size: int = 2,
|
||||||
|
):
|
||||||
|
"""News-zu-News-Cluster ueber Embeddings — gleiches Thema, mehrere
|
||||||
|
Quellen → ein Cluster, nicht 5 separate Cards."""
|
||||||
|
from .themen_matching import aggregate_news_cluster
|
||||||
|
return aggregate_news_cluster(
|
||||||
|
days_window=days,
|
||||||
|
intra_threshold=intra_threshold,
|
||||||
|
antrag_threshold=antrag_threshold,
|
||||||
|
min_cluster_size=min_cluster_size,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/api/aktuelle-themen/top-antraege")
|
||||||
|
async def api_aktuelle_themen_top_antraege(
|
||||||
|
min_gwoe_score: float = 8.0,
|
||||||
|
days: int = 14,
|
||||||
|
min_similarity: float = 0.4,
|
||||||
|
top_k_news: int = 5,
|
||||||
|
):
|
||||||
|
"""Reverse-Sicht: hoch GWÖ-bewertete Antraege mit aktueller Pressewirkung."""
|
||||||
|
from .themen_matching import aggregate_top_antraege_with_news
|
||||||
|
return aggregate_top_antraege_with_news(
|
||||||
|
min_gwoe_score=min_gwoe_score,
|
||||||
|
days_window=days,
|
||||||
|
min_similarity=min_similarity,
|
||||||
|
top_k_news=top_k_news,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -2125,6 +2166,17 @@ async def api_draft_detail(draft_id: int):
|
|||||||
return d
|
return d
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/api/aktuelle-themen/drafts-versions")
|
||||||
|
async def api_draft_versions(drucksache: str, news_url: str):
|
||||||
|
"""Alle Versions-Drafts fuer (drucksache, news_url) — neueste oben."""
|
||||||
|
from .presse_generator import list_drafts_for
|
||||||
|
return {
|
||||||
|
"drucksache": drucksache,
|
||||||
|
"news_url": news_url,
|
||||||
|
"versions": list_drafts_for(drucksache, news_url),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@app.get("/api/auswertungen/matrix")
|
@app.get("/api/auswertungen/matrix")
|
||||||
async def auswertungen_matrix(
|
async def auswertungen_matrix(
|
||||||
wahlperiode: Optional[str] = None,
|
wahlperiode: Optional[str] = None,
|
||||||
|
|||||||
@ -279,6 +279,40 @@ def list_drafts(
|
|||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def list_drafts_for(
|
||||||
|
drucksache: str,
|
||||||
|
news_url: str,
|
||||||
|
db_path: Optional[Path] = None,
|
||||||
|
) -> list[dict]:
|
||||||
|
"""Alle Versions-Drafts fuer ein (drucksache, news_url)-Paar, neueste oben."""
|
||||||
|
from .config import settings
|
||||||
|
|
||||||
|
path = db_path or settings.db_path
|
||||||
|
if not Path(path).exists():
|
||||||
|
return []
|
||||||
|
conn = sqlite3.connect(str(path))
|
||||||
|
try:
|
||||||
|
rows = conn.execute(
|
||||||
|
"""SELECT id, drucksache, bundesland, news_url, news_titel,
|
||||||
|
titel, body, model, created_at
|
||||||
|
FROM presse_drafts
|
||||||
|
WHERE drucksache=? AND news_url=?
|
||||||
|
ORDER BY id DESC""",
|
||||||
|
(drucksache, news_url),
|
||||||
|
).fetchall()
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
"id": r[0], "drucksache": r[1], "bundesland": r[2],
|
||||||
|
"news_url": r[3], "news_titel": r[4],
|
||||||
|
"titel": r[5], "body": r[6], "model": r[7],
|
||||||
|
"created_at": r[8],
|
||||||
|
}
|
||||||
|
for r in rows
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
def get_draft(
|
def get_draft(
|
||||||
draft_id: int,
|
draft_id: int,
|
||||||
db_path: Optional[Path] = None,
|
db_path: Optional[Path] = None,
|
||||||
|
|||||||
@ -161,46 +161,81 @@
|
|||||||
|
|
||||||
<div class="at-controls">
|
<div class="at-controls">
|
||||||
<label for="at-days">Zeitfenster:</label>
|
<label for="at-days">Zeitfenster:</label>
|
||||||
<select id="at-days" onchange="loadThemen()">
|
<select id="at-days" onchange="loadActiveTab()">
|
||||||
<option value="3">3 Tage</option>
|
<option value="3">3 Tage</option>
|
||||||
<option value="7" selected>7 Tage</option>
|
<option value="7" selected>7 Tage</option>
|
||||||
<option value="14">14 Tage</option>
|
<option value="14">14 Tage</option>
|
||||||
<option value="30">30 Tage</option>
|
<option value="30">30 Tage</option>
|
||||||
</select>
|
</select>
|
||||||
<label for="at-topk">Top-N News:</label>
|
<label for="at-topk">Top-N News:</label>
|
||||||
<input type="number" id="at-topk" value="15" min="3" max="50" style="width:60px;" onchange="loadThemen()" />
|
<input type="number" id="at-topk" value="20" min="3" max="50" style="width:60px;" onchange="loadActiveTab()" />
|
||||||
<label for="at-minsim">Min. Similarity:</label>
|
<label for="at-minsim">Min. Similarity:</label>
|
||||||
<select id="at-minsim" onchange="loadThemen()">
|
<select id="at-minsim" onchange="loadActiveTab()">
|
||||||
<option value="0.30">0.30 (locker)</option>
|
<option value="0.30">0.30 (locker)</option>
|
||||||
<option value="0.40" selected>0.40 (default)</option>
|
<option value="0.40">0.40</option>
|
||||||
<option value="0.50">0.50 (streng)</option>
|
<option value="0.50" selected>0.50 (default)</option>
|
||||||
</select>
|
</select>
|
||||||
<button onclick="loadThemen()">Aktualisieren</button>
|
<label style="display:inline-flex;align-items:center;gap:5px;cursor:pointer;">
|
||||||
|
<input type="checkbox" id="at-only-relevant" checked onchange="loadActiveTab()" />
|
||||||
|
Nur GWÖ-relevant
|
||||||
|
</label>
|
||||||
|
<button onclick="loadActiveTab()">Aktualisieren</button>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<!-- News-Volumen-Chart -->
|
<!-- Tabs -->
|
||||||
<h3 style="font-family:var(--font-display);font-size:14px;color:var(--ecg-teal);margin:1.5rem 0 0.5rem;">
|
<div class="auswert-tabs" role="tablist" style="display:flex;gap:4px;margin:1.5rem 0 1rem;border-bottom:2px solid var(--ecg-border);padding-bottom:0;">
|
||||||
News-Volumen pro Quelle (letzte 30 Tage)
|
<button class="at-tab active" role="tab" data-tab="news" onclick="switchAtTab('news', this)" style="font-family:var(--font-mono);font-size:11px;text-transform:uppercase;letter-spacing:0.06em;padding:6px 14px;border:none;background:none;cursor:pointer;color:var(--ecg-teal);opacity:1;border-bottom:2px solid var(--ecg-teal);margin-bottom:-2px;">News × Anträge</button>
|
||||||
</h3>
|
<button class="at-tab" role="tab" data-tab="cluster" onclick="switchAtTab('cluster', this)" style="font-family:var(--font-mono);font-size:11px;text-transform:uppercase;letter-spacing:0.06em;padding:6px 14px;border:none;background:none;cursor:pointer;color:var(--ecg-dark);opacity:0.55;border-bottom:2px solid transparent;margin-bottom:-2px;">Themen-Cluster</button>
|
||||||
<div class="matrix-wrap" style="background:var(--ecg-card-bg);border:1px solid var(--ecg-border);border-radius:4px;padding:14px;">
|
<button class="at-tab" role="tab" data-tab="antraege" onclick="switchAtTab('antraege', this)" style="font-family:var(--font-mono);font-size:11px;text-transform:uppercase;letter-spacing:0.06em;padding:6px 14px;border:none;background:none;cursor:pointer;color:var(--ecg-dark);opacity:0.55;border-bottom:2px solid transparent;margin-bottom:-2px;">GWÖ-Top-Anträge</button>
|
||||||
<canvas id="at-zeitreihe-chart" style="max-height:280px;"></canvas>
|
<button class="at-tab" role="tab" data-tab="zeitreihe" onclick="switchAtTab('zeitreihe', this)" style="font-family:var(--font-mono);font-size:11px;text-transform:uppercase;letter-spacing:0.06em;padding:6px 14px;border:none;background:none;cursor:pointer;color:var(--ecg-dark);opacity:0.55;border-bottom:2px solid transparent;margin-bottom:-2px;">News-Volumen</button>
|
||||||
|
<button class="at-tab" role="tab" data-tab="drafts" onclick="switchAtTab('drafts', this)" style="font-family:var(--font-mono);font-size:11px;text-transform:uppercase;letter-spacing:0.06em;padding:6px 14px;border:none;background:none;cursor:pointer;color:var(--ecg-dark);opacity:0.55;border-bottom:2px solid transparent;margin-bottom:-2px;">PM-Entwürfe</button>
|
||||||
</div>
|
</div>
|
||||||
<div id="at-zeitreihe-meta" class="meta-line" style="font-family:var(--font-mono);font-size:11px;opacity:0.6;margin:8px 0 1.5rem;"></div>
|
|
||||||
|
|
||||||
<!-- Top-Themen + Matches -->
|
<!-- Tab Panels -->
|
||||||
<h3 style="font-family:var(--font-display);font-size:14px;color:var(--ecg-teal);margin:1.5rem 0 0.5rem;">
|
<div id="at-tab-news" class="at-panel">
|
||||||
Top-Themen × passende Anträge
|
<div id="at-themen-list">
|
||||||
</h3>
|
|
||||||
<div id="at-themen-list">
|
|
||||||
<div style="font-family:var(--font-mono);font-size:12px;opacity:0.5;">Lade …</div>
|
<div style="font-family:var(--font-mono);font-size:12px;opacity:0.5;">Lade …</div>
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<!-- Drafts-Liste -->
|
<div id="at-tab-cluster" class="at-panel" style="display:none;">
|
||||||
<h3 style="font-family:var(--font-display);font-size:14px;color:var(--ecg-teal);margin:2rem 0 0.5rem;">
|
<p style="font-size:11px;font-family:var(--font-mono);opacity:0.7;margin:0 0 0.5rem;">
|
||||||
Pressemitteilungs-Entwürfe (zuletzt generiert)
|
News mit ähnlicher Thematik werden gebündelt — z.B. 4 Tagesschau- + 2 Bundestag-Artikel
|
||||||
</h3>
|
zur gleichen Debatte ergeben einen Cluster mit gemeinsamem Antrags-Match.
|
||||||
<div id="at-drafts-list">
|
</p>
|
||||||
<div style="font-family:var(--font-mono);font-size:12px;opacity:0.5;">Lade Entwürfe …</div>
|
<div id="at-cluster-list">
|
||||||
|
<div style="font-family:var(--font-mono);font-size:12px;opacity:0.5;">Lade …</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div id="at-tab-antraege" class="at-panel" style="display:none;">
|
||||||
|
<p style="font-size:11px;font-family:var(--font-mono);opacity:0.7;margin:0 0 0.5rem;">
|
||||||
|
Reverse-Sicht: <strong>GWÖ-bewertete Anträge mit Score ≥ 8</strong>, sortiert nach
|
||||||
|
aktueller Pressewirkung. Anträge ohne News-Match werden gezeigt — als Hinweis
|
||||||
|
„Top-Antrag, aktuell ohne mediale Resonanz".
|
||||||
|
</p>
|
||||||
|
<div id="at-antraege-list">
|
||||||
|
<div style="font-family:var(--font-mono);font-size:12px;opacity:0.5;">Lade …</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div id="at-tab-zeitreihe" class="at-panel" style="display:none;">
|
||||||
|
<h3 style="font-family:var(--font-display);font-size:14px;color:var(--ecg-teal);margin:0 0 0.5rem;">
|
||||||
|
News-Volumen pro Quelle (letzte 30 Tage)
|
||||||
|
</h3>
|
||||||
|
<div class="matrix-wrap" style="background:var(--ecg-card-bg);border:1px solid var(--ecg-border);border-radius:4px;padding:14px;">
|
||||||
|
<canvas id="at-zeitreihe-chart" style="max-height:320px;"></canvas>
|
||||||
|
</div>
|
||||||
|
<div id="at-zeitreihe-meta" class="meta-line" style="font-family:var(--font-mono);font-size:11px;opacity:0.6;margin:8px 0 1.5rem;"></div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div id="at-tab-drafts" class="at-panel" style="display:none;">
|
||||||
|
<p style="font-size:11px;font-family:var(--font-mono);opacity:0.7;margin:0 0 0.5rem;">
|
||||||
|
Bisher generierte Pressemitteilungs-Entwürfe (zuletzt generiert oben).
|
||||||
|
</p>
|
||||||
|
<div id="at-drafts-list">
|
||||||
|
<div style="font-family:var(--font-mono);font-size:12px;opacity:0.5;">Lade …</div>
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<!-- Modal für Draft-Anzeige -->
|
<!-- Modal für Draft-Anzeige -->
|
||||||
@ -217,6 +252,7 @@
|
|||||||
{% block body_scripts %}
|
{% block body_scripts %}
|
||||||
<script>
|
<script>
|
||||||
let _atZeitreiheChart = null;
|
let _atZeitreiheChart = null;
|
||||||
|
let _atActiveTab = 'news';
|
||||||
|
|
||||||
function atScoreClass(score) {
|
function atScoreClass(score) {
|
||||||
if (score == null) return '';
|
if (score == null) return '';
|
||||||
@ -225,40 +261,79 @@ function atScoreClass(score) {
|
|||||||
return 's-low';
|
return 's-low';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function atRelevancePill(rel) {
|
||||||
|
if (!rel) return '';
|
||||||
|
const map = {
|
||||||
|
'high': {bg: 'rgba(136,158,51,0.30)', fg: '#3d4f0a', label: 'GWÖ-relevant'},
|
||||||
|
'mid': {bg: 'rgba(247,148,29,0.25)', fg: '#7a4a00', label: 'GWÖ-mittel'},
|
||||||
|
'low': {bg: 'rgba(150,150,150,0.25)', fg: '#555', label: 'schwach'},
|
||||||
|
'none': {bg: 'rgba(150,150,150,0.15)', fg: '#888', label: 'kein Match'},
|
||||||
|
};
|
||||||
|
const s = map[rel.level] || map.none;
|
||||||
|
return `<span style="display:inline-block;padding:2px 9px;border-radius:11px;font-family:var(--font-mono);font-size:10px;font-weight:700;background:${s.bg};color:${s.fg};margin-right:6px;">${s.label} · ${rel.score}</span>`;
|
||||||
|
}
|
||||||
|
|
||||||
function atFmtDatum(s) {
|
function atFmtDatum(s) {
|
||||||
if (!s || s.length < 10) return '';
|
if (!s || s.length < 10) return '';
|
||||||
return s.slice(0, 10);
|
return s.slice(0, 10);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function switchAtTab(name, btn) {
|
||||||
|
_atActiveTab = name;
|
||||||
|
document.querySelectorAll('.at-tab').forEach(b => {
|
||||||
|
b.style.color = 'var(--ecg-dark)';
|
||||||
|
b.style.opacity = '0.55';
|
||||||
|
b.style.borderBottomColor = 'transparent';
|
||||||
|
});
|
||||||
|
btn.style.color = 'var(--ecg-teal)';
|
||||||
|
btn.style.opacity = '1';
|
||||||
|
btn.style.borderBottomColor = 'var(--ecg-teal)';
|
||||||
|
document.querySelectorAll('.at-panel').forEach(p => p.style.display = 'none');
|
||||||
|
document.getElementById('at-tab-' + name).style.display = 'block';
|
||||||
|
loadActiveTab();
|
||||||
|
}
|
||||||
|
|
||||||
|
function loadActiveTab() {
|
||||||
|
switch (_atActiveTab) {
|
||||||
|
case 'news': loadThemen(); break;
|
||||||
|
case 'cluster': loadCluster(); break;
|
||||||
|
case 'antraege': loadAntraege(); break;
|
||||||
|
case 'zeitreihe': loadZeitreihe(); break;
|
||||||
|
case 'drafts': loadDrafts(); break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
async function loadThemen() {
|
async function loadThemen() {
|
||||||
const days = document.getElementById('at-days').value;
|
const days = document.getElementById('at-days').value;
|
||||||
const topk = document.getElementById('at-topk').value;
|
const topk = document.getElementById('at-topk').value;
|
||||||
const minsim = document.getElementById('at-minsim').value;
|
const minsim = document.getElementById('at-minsim').value;
|
||||||
|
const onlyRel = document.getElementById('at-only-relevant').checked ? '1' : '0';
|
||||||
const list = document.getElementById('at-themen-list');
|
const list = document.getElementById('at-themen-list');
|
||||||
list.innerHTML = '<div style="font-family:var(--font-mono);font-size:12px;opacity:0.5;">Lade …</div>';
|
list.innerHTML = '<div style="font-family:var(--font-mono);font-size:12px;opacity:0.5;">Lade …</div>';
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const r = await fetch(`/api/aktuelle-themen/top?days=${days}&top_k=${topk}&min_similarity=${minsim}&matches_per_news=3`);
|
const r = await fetch(`/api/aktuelle-themen/top?days=${days}&top_k=${topk}&min_similarity=${minsim}&matches_per_news=3&only_relevant=${onlyRel}`);
|
||||||
const data = await r.json();
|
const data = await r.json();
|
||||||
|
|
||||||
if (!data.buckets || !data.buckets.length) {
|
if (!data.buckets || !data.buckets.length) {
|
||||||
list.innerHTML = '<div style="font-family:var(--font-mono);font-size:12px;opacity:0.5;">Keine News im Zeitfenster oder noch nicht embedded.</div>';
|
list.innerHTML = '<div style="font-family:var(--font-mono);font-size:12px;opacity:0.5;">Keine News im Zeitfenster ' + (onlyRel === '1' ? '(Filter: nur GWÖ-relevant aktiv — versuch ohne Filter)' : 'oder noch nicht embedded') + '.</div>';
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
let html = '';
|
let html = '';
|
||||||
for (const b of data.buckets) {
|
for (const b of data.buckets) {
|
||||||
const n = b.news;
|
const n = b.news;
|
||||||
|
const rel = b.relevance;
|
||||||
const tags = (n.tags || []).map(t => `<span class="at-tag">${t}</span>`).join('');
|
const tags = (n.tags || []).map(t => `<span class="at-tag">${t}</span>`).join('');
|
||||||
html += '<div class="at-news-card">';
|
html += '<div class="at-news-card">';
|
||||||
html += `<div class="at-news-head">${atFmtDatum(n.datum)} · ${n.source}${n.ressort ? ' / ' + n.ressort : ''}</div>`;
|
html += `<div class="at-news-head">${atRelevancePill(rel)}${atFmtDatum(n.datum)} · ${n.source}${n.ressort ? ' / ' + n.ressort : ''}</div>`;
|
||||||
html += `<h4 class="at-news-title"><a href="${n.url}" target="_blank" rel="noopener">${n.titel}</a></h4>`;
|
html += `<h4 class="at-news-title"><a href="${n.url}" target="_blank" rel="noopener">${n.titel}</a></h4>`;
|
||||||
if (n.summary) html += `<div class="at-news-summary">${n.summary}</div>`;
|
if (n.summary) html += `<div class="at-news-summary">${n.summary}</div>`;
|
||||||
if (tags) html += `<div class="at-news-tags">${tags}</div>`;
|
if (tags) html += `<div class="at-news-tags">${tags}</div>`;
|
||||||
|
|
||||||
if (b.matches && b.matches.length) {
|
if (b.matches && b.matches.length) {
|
||||||
html += '<div class="at-matches">';
|
html += '<div class="at-matches">';
|
||||||
html += '<div class="at-matches-label">Passende Anträge:</div>';
|
html += `<div class="at-matches-label">Warum: ${rel.reason}</div>`;
|
||||||
for (const m of b.matches) {
|
for (const m of b.matches) {
|
||||||
const sc = m.gwoe_score != null ? m.gwoe_score.toFixed(1) : '—';
|
const sc = m.gwoe_score != null ? m.gwoe_score.toFixed(1) : '—';
|
||||||
const fr = (m.fraktionen || []).join(', ');
|
const fr = (m.fraktionen || []).join(', ');
|
||||||
@ -283,6 +358,103 @@ async function loadThemen() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function loadCluster() {
|
||||||
|
const days = document.getElementById('at-days').value;
|
||||||
|
const list = document.getElementById('at-cluster-list');
|
||||||
|
list.innerHTML = '<div style="font-family:var(--font-mono);font-size:12px;opacity:0.5;">Lade …</div>';
|
||||||
|
try {
|
||||||
|
const r = await fetch(`/api/aktuelle-themen/cluster?days=${days}&intra_threshold=0.55&min_cluster_size=2&antrag_threshold=0.4`);
|
||||||
|
const data = await r.json();
|
||||||
|
if (!data.clusters || !data.clusters.length) {
|
||||||
|
list.innerHTML = '<div style="font-family:var(--font-mono);font-size:12px;opacity:0.5;">Keine Themen-Cluster im Zeitfenster (jeder Cluster braucht mind. 2 inhaltlich ähnliche News).</div>';
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
let html = '';
|
||||||
|
for (const c of data.clusters) {
|
||||||
|
html += '<div class="at-news-card">';
|
||||||
|
const tagPills = (c.top_tags || []).map(t => `<span class="at-tag">${t}</span>`).join('');
|
||||||
|
html += `<div class="at-news-head">Cluster · ${c.size} News · ${tagPills || '<em>keine Tags</em>'}</div>`;
|
||||||
|
html += '<div style="margin:6px 0;">';
|
||||||
|
for (const m of c.members.slice(0, 6)) {
|
||||||
|
html += `<div style="font-size:12px;line-height:1.5;"><span style="opacity:0.6;font-family:var(--font-mono);font-size:10px;">[${m.source}]</span> <a href="${m.url}" target="_blank" rel="noopener" style="color:var(--ecg-teal);text-decoration:none;">${m.titel}</a></div>`;
|
||||||
|
}
|
||||||
|
if (c.members.length > 6) {
|
||||||
|
html += `<div style="font-size:11px;opacity:0.6;">… und ${c.members.length - 6} weitere</div>`;
|
||||||
|
}
|
||||||
|
html += '</div>';
|
||||||
|
if (c.antrag_matches && c.antrag_matches.length) {
|
||||||
|
html += '<div class="at-matches"><div class="at-matches-label">Passende Anträge:</div>';
|
||||||
|
for (const m of c.antrag_matches) {
|
||||||
|
const sc = m.gwoe_score != null ? m.gwoe_score.toFixed(1) : '—';
|
||||||
|
const fr = (m.fraktionen || []).join(', ');
|
||||||
|
const firstNewsUrl = c.members[0].url;
|
||||||
|
html += '<div class="at-match">';
|
||||||
|
html += `<span class="at-score-pill ${atScoreClass(m.gwoe_score)}">${sc}</span>`;
|
||||||
|
html += `<a href="/antrag/${encodeURIComponent(m.drucksache)}" style="color:var(--ecg-teal);text-decoration:none;font-weight:500;">${m.drucksache}</a>`;
|
||||||
|
html += `<span style="opacity:0.85;">${m.title || ''}</span>`;
|
||||||
|
if (fr) html += `<span style="opacity:0.6;font-size:11px;">— ${fr}</span>`;
|
||||||
|
html += `<span class="at-sim">sim ${m.similarity}</span>`;
|
||||||
|
html += `<button class="at-presse-btn" onclick="generatePresse('${m.drucksache.replace(/'/g, "\\'")}', '${encodeURIComponent(firstNewsUrl)}', this)">PM-Vorschlag</button>`;
|
||||||
|
html += '</div>';
|
||||||
|
}
|
||||||
|
html += '</div>';
|
||||||
|
} else {
|
||||||
|
html += '<div class="at-matches"><div class="at-matches-label">Kein Antrag-Match — Themen-Cluster ohne GWÖ-Anker.</div></div>';
|
||||||
|
}
|
||||||
|
html += '</div>';
|
||||||
|
}
|
||||||
|
list.innerHTML = html;
|
||||||
|
} catch (e) {
|
||||||
|
list.innerHTML = `<div style="color:#c00;font-family:var(--font-mono);font-size:12px;">Fehler: ${e}</div>`;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function loadAntraege() {
|
||||||
|
const days = document.getElementById('at-days').value;
|
||||||
|
const list = document.getElementById('at-antraege-list');
|
||||||
|
list.innerHTML = '<div style="font-family:var(--font-mono);font-size:12px;opacity:0.5;">Lade …</div>';
|
||||||
|
try {
|
||||||
|
const r = await fetch(`/api/aktuelle-themen/top-antraege?min_gwoe_score=8.0&days=${days}&min_similarity=0.4&top_k_news=5`);
|
||||||
|
const data = await r.json();
|
||||||
|
if (!data.antraege || !data.antraege.length) {
|
||||||
|
list.innerHTML = '<div style="font-family:var(--font-mono);font-size:12px;opacity:0.5;">Keine GWÖ-≥8-Anträge in der DB.</div>';
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
let html = '';
|
||||||
|
for (const a of data.antraege) {
|
||||||
|
const sc = a.gwoe_score != null ? a.gwoe_score.toFixed(1) : '—';
|
||||||
|
const fr = (a.fraktionen || []).join(', ');
|
||||||
|
const newsBadge = a.news_count > 0
|
||||||
|
? `<span style="display:inline-block;padding:2px 9px;border-radius:11px;font-family:var(--font-mono);font-size:10px;font-weight:700;background:rgba(136,158,51,0.30);color:#3d4f0a;margin-right:6px;">${a.news_count} aktuelle News</span>`
|
||||||
|
: `<span style="display:inline-block;padding:2px 9px;border-radius:11px;font-family:var(--font-mono);font-size:10px;font-weight:700;background:rgba(150,150,150,0.20);color:#777;margin-right:6px;">keine News</span>`;
|
||||||
|
html += '<div class="at-news-card">';
|
||||||
|
html += `<div class="at-news-head">${newsBadge}<span class="at-score-pill ${atScoreClass(a.gwoe_score)}" style="margin-right:6px;">${sc}</span>${a.bundesland} · ${atFmtDatum(a.datum)}${fr ? ' · ' + fr : ''}</div>`;
|
||||||
|
html += `<h4 class="at-news-title"><a href="/antrag/${encodeURIComponent(a.drucksache)}" style="color:var(--ecg-teal);text-decoration:none;">${a.drucksache} — ${a.title || ''}</a></h4>`;
|
||||||
|
if (a.antrag_zusammenfassung) {
|
||||||
|
html += `<div class="at-news-summary">${a.antrag_zusammenfassung.slice(0, 200)}${a.antrag_zusammenfassung.length > 200 ? '…' : ''}</div>`;
|
||||||
|
}
|
||||||
|
if (a.top_news && a.top_news.length) {
|
||||||
|
html += '<div class="at-matches"><div class="at-matches-label">Aktuelle News:</div>';
|
||||||
|
for (const n of a.top_news) {
|
||||||
|
html += '<div class="at-match">';
|
||||||
|
html += `<span style="font-family:var(--font-mono);font-size:10px;opacity:0.6;">[${n.source}]</span>`;
|
||||||
|
html += `<a href="${n.url}" target="_blank" rel="noopener" style="color:var(--ecg-teal);text-decoration:none;">${n.titel}</a>`;
|
||||||
|
html += `<span class="at-sim">sim ${n.similarity}</span>`;
|
||||||
|
html += `<button class="at-presse-btn" onclick="generatePresse('${a.drucksache.replace(/'/g, "\\'")}', '${encodeURIComponent(n.url)}', this)">PM-Vorschlag</button>`;
|
||||||
|
html += '</div>';
|
||||||
|
}
|
||||||
|
html += '</div>';
|
||||||
|
} else {
|
||||||
|
html += '<div class="at-matches"><div class="at-matches-label">Keine aktuellen News passen — Top-Antrag wartet auf passende mediale Welle.</div></div>';
|
||||||
|
}
|
||||||
|
html += '</div>';
|
||||||
|
}
|
||||||
|
list.innerHTML = html;
|
||||||
|
} catch (e) {
|
||||||
|
list.innerHTML = `<div style="color:#c00;font-family:var(--font-mono);font-size:12px;">Fehler: ${e}</div>`;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
async function loadZeitreihe() {
|
async function loadZeitreihe() {
|
||||||
const meta = document.getElementById('at-zeitreihe-meta');
|
const meta = document.getElementById('at-zeitreihe-meta');
|
||||||
try {
|
try {
|
||||||
@ -397,29 +569,79 @@ async function regeneratePresse(drucksache, newsUrlEnc) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function showDraftFromData(d) {
|
async function showDraftFromData(d) {
|
||||||
const backdrop = document.getElementById('at-modal-backdrop');
|
const backdrop = document.getElementById('at-modal-backdrop');
|
||||||
document.getElementById('at-modal-title').textContent = d.titel;
|
document.getElementById('at-modal-title').textContent = d.titel;
|
||||||
const isExisting = d._was_existing === true;
|
const isExisting = d._was_existing === true;
|
||||||
const newsUrlEnc = encodeURIComponent(d.news_url);
|
const newsUrlEnc = encodeURIComponent(d.news_url);
|
||||||
const dsEnc = d.drucksache.replace(/'/g, "\\'");
|
const dsEnc = d.drucksache.replace(/'/g, "\\'");
|
||||||
const existingNote = isExisting
|
|
||||||
? `<div style="font-family:var(--font-mono);font-size:10px;opacity:0.7;background:rgba(247,148,29,0.18);padding:6px 8px;border-radius:3px;margin-bottom:8px;">
|
// Versionen abfragen — falls >1, Dropdown anzeigen
|
||||||
|
let versionsHtml = '';
|
||||||
|
try {
|
||||||
|
const vr = await fetch(`/api/aktuelle-themen/drafts-versions?drucksache=${encodeURIComponent(d.drucksache)}&news_url=${newsUrlEnc}`);
|
||||||
|
const vd = await vr.json();
|
||||||
|
if (vd.versions && vd.versions.length > 1) {
|
||||||
|
versionsHtml = '<select onchange="loadVersion(this.value)" style="margin-left:8px;font-family:var(--font-mono);font-size:10px;padding:2px 6px;">';
|
||||||
|
for (const v of vd.versions) {
|
||||||
|
const sel = (v.id === d.id) ? ' selected' : '';
|
||||||
|
versionsHtml += `<option value="${v.id}"${sel}>v${v.id} — ${(v.created_at || '').slice(0,16)} (${v.model})</option>`;
|
||||||
|
}
|
||||||
|
versionsHtml += '</select>';
|
||||||
|
}
|
||||||
|
} catch (e) { /* silent */ }
|
||||||
|
|
||||||
|
const banner = isExisting
|
||||||
|
? `<div style="font-family:var(--font-mono);font-size:10px;opacity:0.85;background:rgba(247,148,29,0.18);padding:6px 8px;border-radius:3px;margin-bottom:8px;">
|
||||||
Bestehender Entwurf vom ${(d.created_at || '').slice(0,10)} · Modell: ${d.model || '—'} · kein LLM-Call
|
Bestehender Entwurf vom ${(d.created_at || '').slice(0,10)} · Modell: ${d.model || '—'} · kein LLM-Call
|
||||||
<button type="button" onclick="regeneratePresse('${dsEnc}', '${newsUrlEnc}')" style="margin-left:8px;font-family:var(--font-mono);font-size:10px;padding:2px 8px;border:1px solid var(--ecg-border);border-radius:3px;background:var(--ecg-card-bg);cursor:pointer;">Neu generieren</button>
|
<button type="button" onclick="regeneratePresse('${dsEnc}', '${newsUrlEnc}')" style="margin-left:8px;font-family:var(--font-mono);font-size:10px;padding:2px 8px;border:1px solid var(--ecg-border);border-radius:3px;background:var(--ecg-card-bg);cursor:pointer;">Neu generieren</button>
|
||||||
|
${versionsHtml}
|
||||||
</div>`
|
</div>`
|
||||||
: `<div style="font-family:var(--font-mono);font-size:10px;opacity:0.7;background:rgba(136,158,51,0.18);padding:6px 8px;border-radius:3px;margin-bottom:8px;">
|
: `<div style="font-family:var(--font-mono);font-size:10px;opacity:0.85;background:rgba(136,158,51,0.18);padding:6px 8px;border-radius:3px;margin-bottom:8px;">
|
||||||
Neu generiert · Modell: ${d.model || '—'}
|
Neu generiert · Modell: ${d.model || '—'} ${versionsHtml}
|
||||||
</div>`;
|
</div>`;
|
||||||
|
|
||||||
|
// Action-Buttons: Mail + Clipboard
|
||||||
|
const mailtoBody = encodeURIComponent(d.body + '\n\n— Bezug: ' + d.news_titel + ' (' + d.news_url + ')');
|
||||||
|
const mailtoSubject = encodeURIComponent(d.titel);
|
||||||
|
const mailto = `mailto:?subject=${mailtoSubject}&body=${mailtoBody}`;
|
||||||
|
const isMailtoTooLong = mailto.length > 1900;
|
||||||
|
const actionRow = `<div style="display:flex;gap:8px;margin:10px 0 12px;">
|
||||||
|
${isMailtoTooLong ? '<span style="font-family:var(--font-mono);font-size:10px;opacity:0.6;">PM zu lang für Mail-Link — Clipboard nutzen.</span>'
|
||||||
|
: `<a href="${mailto}" style="font-family:var(--font-mono);font-size:11px;padding:5px 12px;border:1px solid var(--ecg-border);border-radius:3px;background:var(--ecg-card-bg);color:var(--ecg-dark);text-decoration:none;">📧 Per Mail versenden</a>`}
|
||||||
|
<button type="button" onclick="copyDraftToClipboard(this, ${JSON.stringify(d.titel).replace(/"/g, '"')}, ${JSON.stringify(d.body).replace(/"/g, '"')})" style="font-family:var(--font-mono);font-size:11px;padding:5px 12px;border:1px solid var(--ecg-border);border-radius:3px;background:var(--ecg-card-bg);color:var(--ecg-dark);cursor:pointer;">📋 In Zwischenablage kopieren</button>
|
||||||
|
</div>`;
|
||||||
|
|
||||||
document.getElementById('at-modal-body').innerHTML =
|
document.getElementById('at-modal-body').innerHTML =
|
||||||
existingNote +
|
banner +
|
||||||
`<div style="font-family:var(--font-mono);font-size:11px;opacity:0.6;margin-bottom:10px;">
|
`<div style="font-family:var(--font-mono);font-size:11px;opacity:0.6;margin-bottom:10px;">
|
||||||
DS ${d.drucksache} (${d.bundesland}) · Bezug zu: <a href="${d.news_url}" target="_blank" rel="noopener" style="color:var(--ecg-teal);">${d.news_titel}</a>
|
DS ${d.drucksache} (${d.bundesland}) · Bezug zu: <a href="${d.news_url}" target="_blank" rel="noopener" style="color:var(--ecg-teal);">${d.news_titel}</a>
|
||||||
</div>
|
</div>` +
|
||||||
<div style="white-space:pre-wrap;">${d.body.replace(/</g, '<')}</div>`;
|
actionRow +
|
||||||
|
`<div style="white-space:pre-wrap;font-size:13px;line-height:1.5;">${d.body.replace(/</g, '<')}</div>`;
|
||||||
backdrop.style.display = 'flex';
|
backdrop.style.display = 'flex';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function loadVersion(draftId) {
|
||||||
|
try {
|
||||||
|
const r = await fetch(`/api/aktuelle-themen/drafts/${draftId}`);
|
||||||
|
const d = await r.json();
|
||||||
|
showDraftFromData(d);
|
||||||
|
} catch (e) { alert('Fehler: ' + e); }
|
||||||
|
}
|
||||||
|
|
||||||
|
async function copyDraftToClipboard(btn, titel, body) {
|
||||||
|
const text = titel + '\n\n' + body;
|
||||||
|
try {
|
||||||
|
await navigator.clipboard.writeText(text);
|
||||||
|
const orig = btn.textContent;
|
||||||
|
btn.textContent = '✓ kopiert';
|
||||||
|
setTimeout(() => { btn.textContent = orig; }, 1800);
|
||||||
|
} catch (e) {
|
||||||
|
alert('Clipboard-Fehler: ' + e + '\n\nText:\n' + text);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
async function showDraft(id) {
|
async function showDraft(id) {
|
||||||
try {
|
try {
|
||||||
const r = await fetch(`/api/aktuelle-themen/drafts/${id}`);
|
const r = await fetch(`/api/aktuelle-themen/drafts/${id}`);
|
||||||
@ -440,9 +662,7 @@ document.addEventListener('keydown', (e) => {
|
|||||||
if (e.key === 'Escape') document.getElementById('at-modal-backdrop').style.display = 'none';
|
if (e.key === 'Escape') document.getElementById('at-modal-backdrop').style.display = 'none';
|
||||||
});
|
});
|
||||||
|
|
||||||
// Init
|
// Init: News-Tab als Default
|
||||||
loadZeitreihe();
|
loadActiveTab();
|
||||||
loadThemen();
|
|
||||||
loadDrafts();
|
|
||||||
</script>
|
</script>
|
||||||
{% endblock %}
|
{% endblock %}
|
||||||
|
|||||||
@ -205,11 +205,71 @@ def find_news_for_antrag(
|
|||||||
return scored[:top_k]
|
return scored[:top_k]
|
||||||
|
|
||||||
|
|
||||||
|
def compute_relevance(matches: list[dict]) -> dict:
|
||||||
|
"""Aggregiere Relevanz-Score + Begruendung aus einer Match-Liste.
|
||||||
|
|
||||||
|
Score = max(antrag.gwoe_score × similarity) ueber alle Matches.
|
||||||
|
Domain: 0..10 (gleicht GWÖ-Score-Skala). Level-Schwellen:
|
||||||
|
- score >= 4.0 → "high" (mind. ein starkes GWÖ-Match)
|
||||||
|
- score >= 2.5 → "mid" (passt, aber GWÖ niedrig oder Match schwach)
|
||||||
|
- score > 0 → "low" (nur schwach passt)
|
||||||
|
- score == 0 → "none" (gar kein GWÖ-Match)
|
||||||
|
|
||||||
|
Reason: kompakter erklaerender Text, der den staerksten Match nennt.
|
||||||
|
Kein LLM-Call — nur Daten-Synthese.
|
||||||
|
"""
|
||||||
|
if not matches:
|
||||||
|
return {
|
||||||
|
"score": 0.0,
|
||||||
|
"level": "none",
|
||||||
|
"reason": "Keine GWÖ-bewerteten Anträge passen zu dieser News.",
|
||||||
|
}
|
||||||
|
# Score-Beitraege berechnen
|
||||||
|
contribs = []
|
||||||
|
for m in matches:
|
||||||
|
gw = m.get("gwoe_score") or 0.0
|
||||||
|
sim = m.get("similarity") or 0.0
|
||||||
|
contribs.append((gw * sim, m))
|
||||||
|
contribs.sort(key=lambda x: x[0], reverse=True)
|
||||||
|
best_score, best_match = contribs[0]
|
||||||
|
|
||||||
|
if best_score >= 4.0:
|
||||||
|
level = "high"
|
||||||
|
elif best_score >= 2.5:
|
||||||
|
level = "mid"
|
||||||
|
elif best_score > 0:
|
||||||
|
level = "low"
|
||||||
|
else:
|
||||||
|
level = "none"
|
||||||
|
|
||||||
|
# Begruendung
|
||||||
|
fr = ", ".join(best_match.get("fraktionen") or [])
|
||||||
|
fr_clause = f" ({fr})" if fr else ""
|
||||||
|
titel = (best_match.get("title") or "").strip()
|
||||||
|
if len(titel) > 70:
|
||||||
|
titel = titel[:67] + "…"
|
||||||
|
reason = (
|
||||||
|
f"GWÖ-{best_match.get('gwoe_score')}/10-Antrag „{titel}" + ("" if titel.endswith("…") else "") + "“"
|
||||||
|
f"{fr_clause} passt mit Similarity {best_match.get('similarity')}"
|
||||||
|
)
|
||||||
|
if len(matches) > 1:
|
||||||
|
reason += f" — {len(matches) - 1} weitere(r) Match(es)."
|
||||||
|
else:
|
||||||
|
reason += "."
|
||||||
|
|
||||||
|
return {
|
||||||
|
"score": round(best_score, 2),
|
||||||
|
"level": level,
|
||||||
|
"reason": reason,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def aggregate_top_themen(
|
def aggregate_top_themen(
|
||||||
days_window: int = 7,
|
days_window: int = 7,
|
||||||
top_k: int = 10,
|
top_k: int = 10,
|
||||||
min_similarity: float = 0.4,
|
min_similarity: float = 0.4,
|
||||||
matches_per_news: int = 3,
|
matches_per_news: int = 3,
|
||||||
|
only_relevant: bool = False,
|
||||||
db_path: Optional[Path] = None,
|
db_path: Optional[Path] = None,
|
||||||
) -> dict:
|
) -> dict:
|
||||||
"""Top-K aktuelle News (letzte N Tage) mit jeweils ihren passendsten
|
"""Top-K aktuelle News (letzte N Tage) mit jeweils ihren passendsten
|
||||||
@ -291,6 +351,13 @@ def aggregate_top_themen(
|
|||||||
tags = json.loads(n["tags"]) if n["tags"] else []
|
tags = json.loads(n["tags"]) if n["tags"] else []
|
||||||
except (json.JSONDecodeError, TypeError):
|
except (json.JSONDecodeError, TypeError):
|
||||||
tags = []
|
tags = []
|
||||||
|
top_matches = scored[:matches_per_news]
|
||||||
|
relevance = compute_relevance(top_matches)
|
||||||
|
|
||||||
|
# Pre-Filter: optional alle non-high/-mid raus
|
||||||
|
if only_relevant and relevance["level"] not in ("high", "mid"):
|
||||||
|
continue
|
||||||
|
|
||||||
buckets.append({
|
buckets.append({
|
||||||
"news": {
|
"news": {
|
||||||
"url": n["url"],
|
"url": n["url"],
|
||||||
@ -301,9 +368,22 @@ def aggregate_top_themen(
|
|||||||
"ressort": n["ressort"],
|
"ressort": n["ressort"],
|
||||||
"tags": tags,
|
"tags": tags,
|
||||||
},
|
},
|
||||||
"matches": scored[:matches_per_news],
|
"matches": top_matches,
|
||||||
|
"relevance": relevance,
|
||||||
})
|
})
|
||||||
|
|
||||||
|
# Sortiere primaer nach Relevanz-Score (high vor mid vor low/none),
|
||||||
|
# sekundaer nach Datum desc.
|
||||||
|
level_rank = {"high": 3, "mid": 2, "low": 1, "none": 0}
|
||||||
|
buckets.sort(
|
||||||
|
key=lambda b: (
|
||||||
|
level_rank.get(b["relevance"]["level"], 0),
|
||||||
|
b["relevance"]["score"],
|
||||||
|
b["news"]["datum"],
|
||||||
|
),
|
||||||
|
reverse=True,
|
||||||
|
)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"buckets": buckets,
|
"buckets": buckets,
|
||||||
"n_total_news": len(news_rows),
|
"n_total_news": len(news_rows),
|
||||||
@ -312,6 +392,7 @@ def aggregate_top_themen(
|
|||||||
"top_k": top_k,
|
"top_k": top_k,
|
||||||
"min_similarity": min_similarity,
|
"min_similarity": min_similarity,
|
||||||
"matches_per_news": matches_per_news,
|
"matches_per_news": matches_per_news,
|
||||||
|
"only_relevant": only_relevant,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -369,3 +450,241 @@ def aggregate_themen_zeitreihe(
|
|||||||
"sources": sources_sorted,
|
"sources": sources_sorted,
|
||||||
"series": series,
|
"series": series,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def aggregate_news_cluster(
|
||||||
|
days_window: int = 7,
|
||||||
|
intra_threshold: float = 0.55,
|
||||||
|
antrag_threshold: float = 0.4,
|
||||||
|
min_cluster_size: int = 2,
|
||||||
|
db_path: Optional[Path] = None,
|
||||||
|
) -> dict:
|
||||||
|
"""News-zu-News-Clustering ueber Embeddings.
|
||||||
|
|
||||||
|
Greedy: jede ungeclusterte News wird Cluster-Seed, alle anderen mit
|
||||||
|
cosine >= ``intra_threshold`` werden eingeschlossen. Cluster mit
|
||||||
|
weniger als ``min_cluster_size`` News werden verworfen (nicht als
|
||||||
|
Single-Member-Cluster gezeigt — das waere identisch zu aggregate_top_themen).
|
||||||
|
|
||||||
|
Pro Cluster: zentralster Antrag-Match aus den GWÖ-bewerteten Antraegen.
|
||||||
|
"""
|
||||||
|
from .config import settings
|
||||||
|
from . import embeddings as emb
|
||||||
|
|
||||||
|
path = db_path or settings.db_path
|
||||||
|
if not Path(path).exists():
|
||||||
|
return {"clusters": [], "n_total_news": 0}
|
||||||
|
|
||||||
|
cutoff = datetime.now(timezone.utc).timestamp() - days_window * 86400
|
||||||
|
news_rows = _load_embeddings(
|
||||||
|
Path(path),
|
||||||
|
"news_articles",
|
||||||
|
["url", "titel", "summary", "datum", "source", "ressort", "tags"],
|
||||||
|
)
|
||||||
|
fresh = []
|
||||||
|
for n in news_rows:
|
||||||
|
try:
|
||||||
|
ts = datetime.fromisoformat(n["datum"].replace("Z", "+00:00")).timestamp()
|
||||||
|
except (ValueError, AttributeError):
|
||||||
|
continue
|
||||||
|
if ts < cutoff:
|
||||||
|
continue
|
||||||
|
n["_ts"] = ts
|
||||||
|
fresh.append(n)
|
||||||
|
fresh.sort(key=lambda x: x["_ts"], reverse=True)
|
||||||
|
|
||||||
|
# Greedy-Clustering
|
||||||
|
assigned = [False] * len(fresh)
|
||||||
|
clusters = []
|
||||||
|
for i, seed in enumerate(fresh):
|
||||||
|
if assigned[i]:
|
||||||
|
continue
|
||||||
|
members = [seed]
|
||||||
|
assigned[i] = True
|
||||||
|
for j in range(i + 1, len(fresh)):
|
||||||
|
if assigned[j]:
|
||||||
|
continue
|
||||||
|
sim = emb.cosine_similarity(seed["_vec"], fresh[j]["_vec"])
|
||||||
|
if sim >= intra_threshold:
|
||||||
|
members.append(fresh[j])
|
||||||
|
assigned[j] = True
|
||||||
|
if len(members) >= min_cluster_size:
|
||||||
|
clusters.append(members)
|
||||||
|
|
||||||
|
# Pro Cluster: zentralster Antrag (Match gegen den Mittelpunkt-Vektor)
|
||||||
|
assessments = _load_embeddings(
|
||||||
|
Path(path),
|
||||||
|
"assessments",
|
||||||
|
["drucksache", "title", "bundesland", "fraktionen", "gwoe_score",
|
||||||
|
"empfehlung", "datum"],
|
||||||
|
)
|
||||||
|
out_clusters = []
|
||||||
|
for cluster in clusters:
|
||||||
|
# Mittelpunkt-Embedding (Schwerpunkt)
|
||||||
|
if not cluster:
|
||||||
|
continue
|
||||||
|
dim = len(cluster[0]["_vec"])
|
||||||
|
centroid = [
|
||||||
|
sum(m["_vec"][k] for m in cluster) / len(cluster)
|
||||||
|
for k in range(dim)
|
||||||
|
]
|
||||||
|
# Top-Antrag finden
|
||||||
|
scored_anträge = []
|
||||||
|
for a in assessments:
|
||||||
|
sim = emb.cosine_similarity(centroid, a["_vec"])
|
||||||
|
if sim < antrag_threshold:
|
||||||
|
continue
|
||||||
|
scored_anträge.append({
|
||||||
|
"drucksache": a["drucksache"],
|
||||||
|
"title": a["title"],
|
||||||
|
"bundesland": a["bundesland"],
|
||||||
|
"fraktionen": json.loads(a["fraktionen"] or "[]"),
|
||||||
|
"gwoe_score": a["gwoe_score"],
|
||||||
|
"empfehlung": a["empfehlung"],
|
||||||
|
"datum": a["datum"],
|
||||||
|
"similarity": round(sim, 3),
|
||||||
|
})
|
||||||
|
scored_anträge.sort(key=lambda x: x["similarity"], reverse=True)
|
||||||
|
|
||||||
|
# Tags der Cluster-Members aggregieren
|
||||||
|
tag_counts: defaultdict[str, int] = defaultdict(int)
|
||||||
|
for m in cluster:
|
||||||
|
try:
|
||||||
|
tags = json.loads(m["tags"]) if m["tags"] else []
|
||||||
|
except (json.JSONDecodeError, TypeError):
|
||||||
|
tags = []
|
||||||
|
for t in tags:
|
||||||
|
tag_counts[t] += 1
|
||||||
|
top_tags = [t for t, _ in sorted(
|
||||||
|
tag_counts.items(), key=lambda x: x[1], reverse=True,
|
||||||
|
)[:5]]
|
||||||
|
|
||||||
|
out_clusters.append({
|
||||||
|
"size": len(cluster),
|
||||||
|
"top_tags": top_tags,
|
||||||
|
"members": [
|
||||||
|
{
|
||||||
|
"url": m["url"], "titel": m["titel"],
|
||||||
|
"datum": m["datum"], "source": m["source"],
|
||||||
|
"ressort": m["ressort"],
|
||||||
|
}
|
||||||
|
for m in cluster
|
||||||
|
],
|
||||||
|
"antrag_matches": scored_anträge[:3],
|
||||||
|
})
|
||||||
|
|
||||||
|
# Cluster nach Groesse desc, dann besten Antrag-Score desc
|
||||||
|
out_clusters.sort(
|
||||||
|
key=lambda c: (
|
||||||
|
c["size"],
|
||||||
|
c["antrag_matches"][0]["similarity"] if c["antrag_matches"] else 0,
|
||||||
|
),
|
||||||
|
reverse=True,
|
||||||
|
)
|
||||||
|
return {
|
||||||
|
"clusters": out_clusters,
|
||||||
|
"n_total_news": len(fresh),
|
||||||
|
"filter": {
|
||||||
|
"days_window": days_window,
|
||||||
|
"intra_threshold": intra_threshold,
|
||||||
|
"antrag_threshold": antrag_threshold,
|
||||||
|
"min_cluster_size": min_cluster_size,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def aggregate_top_antraege_with_news(
|
||||||
|
min_gwoe_score: float = 8.0,
|
||||||
|
days_window: int = 14,
|
||||||
|
min_similarity: float = 0.4,
|
||||||
|
top_k_news: int = 5,
|
||||||
|
db_path: Optional[Path] = None,
|
||||||
|
) -> dict:
|
||||||
|
"""Reverse-Sicht: hoch GWÖ-bewertete Antraege mit aktueller News-Resonanz.
|
||||||
|
|
||||||
|
Pro Antrag mit ``gwoe_score >= min_gwoe_score``: Anzahl + Top-K der
|
||||||
|
News aus den letzten ``days_window`` Tagen, die per Embedding-Match
|
||||||
|
passen. Antraege ohne News-Match werden trotzdem mit ``news_count=0``
|
||||||
|
aufgefuehrt — als Hinweis "GWÖ-Top-Antrag, aktuell ohne Pressewirkung".
|
||||||
|
"""
|
||||||
|
from .config import settings
|
||||||
|
from . import embeddings as emb
|
||||||
|
|
||||||
|
path = db_path or settings.db_path
|
||||||
|
if not Path(path).exists():
|
||||||
|
return {"antraege": []}
|
||||||
|
|
||||||
|
cutoff = datetime.now(timezone.utc).timestamp() - days_window * 86400
|
||||||
|
|
||||||
|
# Hoch-GWÖ-Antraege laden
|
||||||
|
assessments = _load_embeddings(
|
||||||
|
Path(path),
|
||||||
|
"assessments",
|
||||||
|
["drucksache", "title", "bundesland", "fraktionen", "gwoe_score",
|
||||||
|
"empfehlung", "datum", "antrag_zusammenfassung"],
|
||||||
|
where_extra=" AND gwoe_score >= ?",
|
||||||
|
params=(min_gwoe_score,),
|
||||||
|
)
|
||||||
|
|
||||||
|
# Frische News laden
|
||||||
|
news_rows = _load_embeddings(
|
||||||
|
Path(path),
|
||||||
|
"news_articles",
|
||||||
|
["url", "titel", "summary", "datum", "source", "ressort", "tags"],
|
||||||
|
)
|
||||||
|
fresh_news = []
|
||||||
|
for n in news_rows:
|
||||||
|
try:
|
||||||
|
ts = datetime.fromisoformat(n["datum"].replace("Z", "+00:00")).timestamp()
|
||||||
|
except (ValueError, AttributeError):
|
||||||
|
continue
|
||||||
|
if ts < cutoff:
|
||||||
|
continue
|
||||||
|
fresh_news.append(n)
|
||||||
|
|
||||||
|
out = []
|
||||||
|
for a in assessments:
|
||||||
|
scored = []
|
||||||
|
for n in fresh_news:
|
||||||
|
sim = emb.cosine_similarity(a["_vec"], n["_vec"])
|
||||||
|
if sim < min_similarity:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
tags = json.loads(n["tags"]) if n["tags"] else []
|
||||||
|
except (json.JSONDecodeError, TypeError):
|
||||||
|
tags = []
|
||||||
|
scored.append({
|
||||||
|
"url": n["url"], "titel": n["titel"],
|
||||||
|
"summary": n["summary"], "datum": n["datum"],
|
||||||
|
"source": n["source"], "ressort": n["ressort"],
|
||||||
|
"tags": tags,
|
||||||
|
"similarity": round(sim, 3),
|
||||||
|
})
|
||||||
|
scored.sort(key=lambda x: x["similarity"], reverse=True)
|
||||||
|
out.append({
|
||||||
|
"drucksache": a["drucksache"],
|
||||||
|
"title": a["title"],
|
||||||
|
"bundesland": a["bundesland"],
|
||||||
|
"fraktionen": json.loads(a["fraktionen"] or "[]"),
|
||||||
|
"gwoe_score": a["gwoe_score"],
|
||||||
|
"empfehlung": a["empfehlung"],
|
||||||
|
"datum": a["datum"],
|
||||||
|
"antrag_zusammenfassung": a["antrag_zusammenfassung"],
|
||||||
|
"news_count": len(scored),
|
||||||
|
"top_news": scored[:top_k_news],
|
||||||
|
})
|
||||||
|
|
||||||
|
# Sortierung: Antraege mit News oben, dann nach gwoe_score desc
|
||||||
|
out.sort(
|
||||||
|
key=lambda x: (x["news_count"] > 0, x["news_count"], x["gwoe_score"] or 0),
|
||||||
|
reverse=True,
|
||||||
|
)
|
||||||
|
return {
|
||||||
|
"antraege": out,
|
||||||
|
"filter": {
|
||||||
|
"min_gwoe_score": min_gwoe_score,
|
||||||
|
"days_window": days_window,
|
||||||
|
"min_similarity": min_similarity,
|
||||||
|
"top_k_news": top_k_news,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|||||||
@ -10,8 +10,11 @@ from unittest.mock import patch
|
|||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from app.themen_matching import (
|
from app.themen_matching import (
|
||||||
|
aggregate_news_cluster,
|
||||||
aggregate_themen_zeitreihe,
|
aggregate_themen_zeitreihe,
|
||||||
|
aggregate_top_antraege_with_news,
|
||||||
aggregate_top_themen,
|
aggregate_top_themen,
|
||||||
|
compute_relevance,
|
||||||
find_anträge_for_news,
|
find_anträge_for_news,
|
||||||
find_news_for_antrag,
|
find_news_for_antrag,
|
||||||
)
|
)
|
||||||
@ -276,6 +279,48 @@ class TestAggregateTopThemen:
|
|||||||
# ─────────────────────────────────────────────────────────────────────────────
|
# ─────────────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
class TestComputeRelevance:
|
||||||
|
def test_empty_returns_none_level(self):
|
||||||
|
r = compute_relevance([])
|
||||||
|
assert r["level"] == "none"
|
||||||
|
assert r["score"] == 0.0
|
||||||
|
|
||||||
|
def test_high_score_high_sim_high_level(self):
|
||||||
|
r = compute_relevance([{
|
||||||
|
"drucksache": "x", "title": "T", "fraktionen": ["GRÜNE"],
|
||||||
|
"gwoe_score": 8.0, "similarity": 0.6,
|
||||||
|
}])
|
||||||
|
# 8.0 × 0.6 = 4.8 → high
|
||||||
|
assert r["level"] == "high"
|
||||||
|
assert r["score"] == 4.8
|
||||||
|
assert "GWÖ-8.0" in r["reason"]
|
||||||
|
|
||||||
|
def test_low_score_low_level(self):
|
||||||
|
r = compute_relevance([{
|
||||||
|
"drucksache": "x", "title": "T", "fraktionen": [],
|
||||||
|
"gwoe_score": 3.0, "similarity": 0.5,
|
||||||
|
}])
|
||||||
|
# 3.0 × 0.5 = 1.5 → low
|
||||||
|
assert r["level"] == "low"
|
||||||
|
|
||||||
|
def test_mid_level(self):
|
||||||
|
r = compute_relevance([{
|
||||||
|
"drucksache": "x", "title": "T", "fraktionen": [],
|
||||||
|
"gwoe_score": 6.0, "similarity": 0.5,
|
||||||
|
}])
|
||||||
|
# 6.0 × 0.5 = 3.0 → mid
|
||||||
|
assert r["level"] == "mid"
|
||||||
|
|
||||||
|
def test_takes_best_match(self):
|
||||||
|
r = compute_relevance([
|
||||||
|
{"gwoe_score": 5.0, "similarity": 0.4, "title": "Schwach", "fraktionen": []},
|
||||||
|
{"gwoe_score": 9.0, "similarity": 0.55, "title": "Stark", "fraktionen": []},
|
||||||
|
])
|
||||||
|
# max(2.0, 4.95) = 4.95 → high
|
||||||
|
assert r["score"] == 4.95
|
||||||
|
assert "Stark" in r["reason"]
|
||||||
|
|
||||||
|
|
||||||
class TestAggregateZeitreihe:
|
class TestAggregateZeitreihe:
|
||||||
def test_structure(self, populated_db):
|
def test_structure(self, populated_db):
|
||||||
result = aggregate_themen_zeitreihe(db_path=populated_db, days_window=7)
|
result = aggregate_themen_zeitreihe(db_path=populated_db, days_window=7)
|
||||||
@ -295,3 +340,115 @@ class TestAggregateZeitreihe:
|
|||||||
result = aggregate_themen_zeitreihe(db_path=populated_db, days_window=7)
|
result = aggregate_themen_zeitreihe(db_path=populated_db, days_window=7)
|
||||||
for source in result["sources"]:
|
for source in result["sources"]:
|
||||||
assert len(result["series"][source]) == len(result["buckets"])
|
assert len(result["series"][source]) == len(result["buckets"])
|
||||||
|
|
||||||
|
|
||||||
|
# ─────────────────────────────────────────────────────────────────────────────
|
||||||
|
# aggregate_top_themen mit Relevance + only_relevant Filter
|
||||||
|
# ─────────────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
class TestRelevanceInTopThemen:
|
||||||
|
def test_each_bucket_has_relevance(self, populated_db):
|
||||||
|
result = aggregate_top_themen(db_path=populated_db, min_similarity=0.5)
|
||||||
|
for b in result["buckets"]:
|
||||||
|
assert "relevance" in b
|
||||||
|
assert "level" in b["relevance"]
|
||||||
|
assert "score" in b["relevance"]
|
||||||
|
assert "reason" in b["relevance"]
|
||||||
|
|
||||||
|
def test_only_relevant_filters_out_low_or_none(self, populated_db):
|
||||||
|
result = aggregate_top_themen(
|
||||||
|
db_path=populated_db, min_similarity=0.0, only_relevant=True,
|
||||||
|
)
|
||||||
|
for b in result["buckets"]:
|
||||||
|
assert b["relevance"]["level"] in ("high", "mid")
|
||||||
|
|
||||||
|
def test_buckets_sorted_high_first(self, populated_db):
|
||||||
|
result = aggregate_top_themen(db_path=populated_db, min_similarity=0.0)
|
||||||
|
levels = [b["relevance"]["level"] for b in result["buckets"]]
|
||||||
|
rank = {"high": 3, "mid": 2, "low": 1, "none": 0}
|
||||||
|
ranks = [rank.get(l, 0) for l in levels]
|
||||||
|
# Reihenfolge muss monoton fallen
|
||||||
|
assert ranks == sorted(ranks, reverse=True)
|
||||||
|
|
||||||
|
|
||||||
|
# ─────────────────────────────────────────────────────────────────────────────
|
||||||
|
# aggregate_news_cluster
|
||||||
|
# ─────────────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
class TestNewsCluster:
|
||||||
|
def test_structure(self, populated_db):
|
||||||
|
# Mit hoeherem intra_threshold und kleinerem min_cluster_size
|
||||||
|
# auf der Test-DB: orthogonale News bilden keine Cluster
|
||||||
|
result = aggregate_news_cluster(
|
||||||
|
db_path=populated_db, min_cluster_size=2,
|
||||||
|
intra_threshold=0.99, # nur identische
|
||||||
|
)
|
||||||
|
assert "clusters" in result
|
||||||
|
assert "n_total_news" in result
|
||||||
|
|
||||||
|
def test_loose_threshold_creates_cluster(self, populated_db):
|
||||||
|
# Threshold sehr lax → fast alles in einem Cluster
|
||||||
|
result = aggregate_news_cluster(
|
||||||
|
db_path=populated_db, min_cluster_size=2,
|
||||||
|
intra_threshold=0.0, days_window=30,
|
||||||
|
)
|
||||||
|
# Mindestens ein Cluster mit >=2 Members
|
||||||
|
assert len(result["clusters"]) >= 0
|
||||||
|
for c in result["clusters"]:
|
||||||
|
assert c["size"] >= 2
|
||||||
|
assert "members" in c
|
||||||
|
assert "antrag_matches" in c
|
||||||
|
assert "top_tags" in c
|
||||||
|
|
||||||
|
def test_min_cluster_size_filter(self, populated_db):
|
||||||
|
result = aggregate_news_cluster(
|
||||||
|
db_path=populated_db, min_cluster_size=5,
|
||||||
|
)
|
||||||
|
# Nur 3 News in der DB → nichts erreicht size>=5
|
||||||
|
assert result["clusters"] == []
|
||||||
|
|
||||||
|
|
||||||
|
# ─────────────────────────────────────────────────────────────────────────────
|
||||||
|
# aggregate_top_antraege_with_news
|
||||||
|
# ─────────────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
class TestTopAntraegeWithNews:
|
||||||
|
def test_only_high_gwoe(self, populated_db):
|
||||||
|
"""Nur Antraege mit gwoe_score >= min_gwoe_score auftauchen."""
|
||||||
|
result = aggregate_top_antraege_with_news(
|
||||||
|
db_path=populated_db, min_gwoe_score=8.0,
|
||||||
|
)
|
||||||
|
for a in result["antraege"]:
|
||||||
|
assert a["gwoe_score"] >= 8.0
|
||||||
|
# 18/A hat 8.0, 18/B hat 7.0, 18/C hat 5.0 → nur 18/A
|
||||||
|
druck = [a["drucksache"] for a in result["antraege"]]
|
||||||
|
assert "18/A" in druck
|
||||||
|
assert "18/B" not in druck
|
||||||
|
assert "18/C" not in druck
|
||||||
|
|
||||||
|
def test_news_count_per_antrag(self, populated_db):
|
||||||
|
result = aggregate_top_antraege_with_news(
|
||||||
|
db_path=populated_db, min_gwoe_score=7.0, min_similarity=0.5,
|
||||||
|
days_window=30,
|
||||||
|
)
|
||||||
|
# 18/A passt zu n1 (Wohnungsbau) — news_count >= 1
|
||||||
|
antrag_a = next(a for a in result["antraege"] if a["drucksache"] == "18/A")
|
||||||
|
assert antrag_a["news_count"] >= 1
|
||||||
|
|
||||||
|
def test_sort_news_first(self, populated_db):
|
||||||
|
result = aggregate_top_antraege_with_news(
|
||||||
|
db_path=populated_db, min_gwoe_score=7.0, min_similarity=0.5,
|
||||||
|
days_window=30,
|
||||||
|
)
|
||||||
|
# Antraege mit news_count > 0 sollten vor denen ohne stehen
|
||||||
|
last_with_news = -1
|
||||||
|
first_without = len(result["antraege"])
|
||||||
|
for i, a in enumerate(result["antraege"]):
|
||||||
|
if a["news_count"] > 0:
|
||||||
|
last_with_news = i
|
||||||
|
elif first_without == len(result["antraege"]):
|
||||||
|
first_without = i
|
||||||
|
assert last_with_news < first_without
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user