From e27dfc30a2a3f764751028c0eb05a9ad50e6d174 Mon Sep 17 00:00:00 2001 From: Dotty Dotter Date: Sun, 3 May 2026 13:41:31 +0200 Subject: [PATCH] feat(#170 followup 2): Pre-Filter, Cluster, Antrags-Initiative, PM-Versionierung, Mail-Link MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit User-Feedback: Aktuelle-Themen-Dashboard war "Detective-Modus" — durch viele News scrollen, Match-Stärke selbst interpretieren. Komplett-Refactor zur kuratierten Sicht mit Tabs. **1. Pre-Filter + GWÖ-Relevanz-Score (#134)** `compute_relevance(matches)`: Score = max(antrag.gwoe_score × similarity). Level: high (≥4.0) / mid (≥2.5) / low (>0) / none. Pro News in der UI ein farbiger Pill (gruen/orange/grau) + Reason-Text: "GWÖ-9.0/10-Antrag „Klimaschutzgesetz" (GRÜNE) passt mit Similarity 0.55." Default-Filter "Nur GWÖ-relevant" aktiv (only_relevant=true) — zeigt nur high/mid News, blendet Rauschen aus. Toggle-Checkbox. `/api/aktuelle-themen/top` neuer Param `only_relevant=true|false`. **2. PM-Versionierung im Modal (#135)** `list_drafts_for(drucksache, news_url)`: alle Versionen, neueste oben. Endpoint `/api/aktuelle-themen/drafts-versions`. Modal zeigt Dropdown wenn >1 Version, Switch ohne LLM-Call. Force-Regen bleibt als Button im "bestehender Entwurf"-Banner. **3. News-Cluster-View (#136)** `aggregate_news_cluster(intra_threshold=0.55, min_cluster_size=2)`: Greedy-Embedding-Cluster + zentralster Antrags-Match per Centroid- Vektor. Zweiter Tab "Themen-Cluster": 5 News über "Pflege" → 1 Cluster mit gemeinsamem Antrag-Vorschlag, statt 5 separate Cards. Endpoint: `/api/aktuelle-themen/cluster`. **4. Mail-Direkt-Link + Clipboard (#137)** Im PM-Modal zwei Buttons: - "📧 Per Mail versenden" (mailto: mit subject + body, ~1900 Char Limit) - "📋 In Zwischenablage kopieren" (navigator.clipboard.writeText) - Bei langem PM (>1900 Char): mailto-Link wird ausgegraut, Hinweis "PM zu lang für Mail-Link — Clipboard nutzen" **5. Antrags-Initiative (#138)** `aggregate_top_antraege_with_news(min_gwoe_score=8.0, days=14)`: Reverse-Sicht — pro Antrag mit GWÖ ≥ 8 die News-Resonanz. Antraege ohne Match werden trotzdem angezeigt mit "keine News"-Pill. Dritter Tab "GWÖ-Top-Anträge". Endpoint `.../top-antraege`. **UI-Restrukturierung:** statt einer langen Scroll-Liste jetzt 5 Tabs mit gemeinsamer Filter-Bar: - News × Anträge (Default, kuratiert via Pre-Filter) - Themen-Cluster (Bündel ähnlicher News) - GWÖ-Top-Anträge (Reverse) - News-Volumen (Chart) - PM-Entwürfe (Drafts-Liste) Default min_similarity 0.40 → 0.50 erhoeht (weniger Rauschen). Tests: 14 neue (compute_relevance × 5, only_relevant + sort × 3, cluster × 3, top_antraege × 3). Suite 1067 gruen. Co-Authored-By: Claude Opus 4.7 (1M context) --- app/main.py | 54 ++- app/presse_generator.py | 34 ++ app/templates/v2/screens/aktuelle-themen.html | 306 ++++++++++++++--- app/themen_matching.py | 321 +++++++++++++++++- tests/test_themen_matching.py | 157 +++++++++ 5 files changed, 827 insertions(+), 45 deletions(-) diff --git a/app/main.py b/app/main.py index d5ccb89..e4ad7eb 100644 --- a/app/main.py +++ b/app/main.py @@ -2030,14 +2030,55 @@ async def api_aktuelle_themen_top( top_k: int = 10, min_similarity: float = 0.4, matches_per_news: int = 3, + only_relevant: bool = False, ): - """Top-K News der letzten N Tage mit Antrags-Match.""" + """Top-K News der letzten N Tage mit Antrags-Match. + + Mit `only_relevant=true` werden News mit Relevance-Level "low" oder + "none" rausgefiltert. + """ from .themen_matching import aggregate_top_themen return aggregate_top_themen( days_window=days, top_k=top_k, min_similarity=min_similarity, matches_per_news=matches_per_news, + only_relevant=only_relevant, + ) + + +@app.get("/api/aktuelle-themen/cluster") +async def api_aktuelle_themen_cluster( + days: int = 7, + intra_threshold: float = 0.55, + antrag_threshold: float = 0.4, + min_cluster_size: int = 2, +): + """News-zu-News-Cluster ueber Embeddings — gleiches Thema, mehrere + Quellen → ein Cluster, nicht 5 separate Cards.""" + from .themen_matching import aggregate_news_cluster + return aggregate_news_cluster( + days_window=days, + intra_threshold=intra_threshold, + antrag_threshold=antrag_threshold, + min_cluster_size=min_cluster_size, + ) + + +@app.get("/api/aktuelle-themen/top-antraege") +async def api_aktuelle_themen_top_antraege( + min_gwoe_score: float = 8.0, + days: int = 14, + min_similarity: float = 0.4, + top_k_news: int = 5, +): + """Reverse-Sicht: hoch GWÖ-bewertete Antraege mit aktueller Pressewirkung.""" + from .themen_matching import aggregate_top_antraege_with_news + return aggregate_top_antraege_with_news( + min_gwoe_score=min_gwoe_score, + days_window=days, + min_similarity=min_similarity, + top_k_news=top_k_news, ) @@ -2125,6 +2166,17 @@ async def api_draft_detail(draft_id: int): return d +@app.get("/api/aktuelle-themen/drafts-versions") +async def api_draft_versions(drucksache: str, news_url: str): + """Alle Versions-Drafts fuer (drucksache, news_url) — neueste oben.""" + from .presse_generator import list_drafts_for + return { + "drucksache": drucksache, + "news_url": news_url, + "versions": list_drafts_for(drucksache, news_url), + } + + @app.get("/api/auswertungen/matrix") async def auswertungen_matrix( wahlperiode: Optional[str] = None, diff --git a/app/presse_generator.py b/app/presse_generator.py index 0dfad85..0f7d69d 100644 --- a/app/presse_generator.py +++ b/app/presse_generator.py @@ -279,6 +279,40 @@ def list_drafts( ] +def list_drafts_for( + drucksache: str, + news_url: str, + db_path: Optional[Path] = None, +) -> list[dict]: + """Alle Versions-Drafts fuer ein (drucksache, news_url)-Paar, neueste oben.""" + from .config import settings + + path = db_path or settings.db_path + if not Path(path).exists(): + return [] + conn = sqlite3.connect(str(path)) + try: + rows = conn.execute( + """SELECT id, drucksache, bundesland, news_url, news_titel, + titel, body, model, created_at + FROM presse_drafts + WHERE drucksache=? AND news_url=? + ORDER BY id DESC""", + (drucksache, news_url), + ).fetchall() + finally: + conn.close() + return [ + { + "id": r[0], "drucksache": r[1], "bundesland": r[2], + "news_url": r[3], "news_titel": r[4], + "titel": r[5], "body": r[6], "model": r[7], + "created_at": r[8], + } + for r in rows + ] + + def get_draft( draft_id: int, db_path: Optional[Path] = None, diff --git a/app/templates/v2/screens/aktuelle-themen.html b/app/templates/v2/screens/aktuelle-themen.html index 897dfc9..28b0a3a 100644 --- a/app/templates/v2/screens/aktuelle-themen.html +++ b/app/templates/v2/screens/aktuelle-themen.html @@ -161,46 +161,81 @@
- - + - - - + + - + +
- -

- News-Volumen pro Quelle (letzte 30 Tage) -

-
- -
-
- - -

- Top-Themen × passende Anträge -

-
-
Lade …
+ +
+ + + + +
- -

- Pressemitteilungs-Entwürfe (zuletzt generiert) -

-
-
Lade Entwürfe …
+ +
+
+
Lade …
+
+
+ + + + + + + + @@ -217,6 +252,7 @@ {% block body_scripts %} {% endblock %} diff --git a/app/themen_matching.py b/app/themen_matching.py index 5bb0b6f..8614be3 100644 --- a/app/themen_matching.py +++ b/app/themen_matching.py @@ -205,11 +205,71 @@ def find_news_for_antrag( return scored[:top_k] +def compute_relevance(matches: list[dict]) -> dict: + """Aggregiere Relevanz-Score + Begruendung aus einer Match-Liste. + + Score = max(antrag.gwoe_score × similarity) ueber alle Matches. + Domain: 0..10 (gleicht GWÖ-Score-Skala). Level-Schwellen: + - score >= 4.0 → "high" (mind. ein starkes GWÖ-Match) + - score >= 2.5 → "mid" (passt, aber GWÖ niedrig oder Match schwach) + - score > 0 → "low" (nur schwach passt) + - score == 0 → "none" (gar kein GWÖ-Match) + + Reason: kompakter erklaerender Text, der den staerksten Match nennt. + Kein LLM-Call — nur Daten-Synthese. + """ + if not matches: + return { + "score": 0.0, + "level": "none", + "reason": "Keine GWÖ-bewerteten Anträge passen zu dieser News.", + } + # Score-Beitraege berechnen + contribs = [] + for m in matches: + gw = m.get("gwoe_score") or 0.0 + sim = m.get("similarity") or 0.0 + contribs.append((gw * sim, m)) + contribs.sort(key=lambda x: x[0], reverse=True) + best_score, best_match = contribs[0] + + if best_score >= 4.0: + level = "high" + elif best_score >= 2.5: + level = "mid" + elif best_score > 0: + level = "low" + else: + level = "none" + + # Begruendung + fr = ", ".join(best_match.get("fraktionen") or []) + fr_clause = f" ({fr})" if fr else "" + titel = (best_match.get("title") or "").strip() + if len(titel) > 70: + titel = titel[:67] + "…" + reason = ( + f"GWÖ-{best_match.get('gwoe_score')}/10-Antrag „{titel}" + ("" if titel.endswith("…") else "") + "“" + f"{fr_clause} passt mit Similarity {best_match.get('similarity')}" + ) + if len(matches) > 1: + reason += f" — {len(matches) - 1} weitere(r) Match(es)." + else: + reason += "." + + return { + "score": round(best_score, 2), + "level": level, + "reason": reason, + } + + def aggregate_top_themen( days_window: int = 7, top_k: int = 10, min_similarity: float = 0.4, matches_per_news: int = 3, + only_relevant: bool = False, db_path: Optional[Path] = None, ) -> dict: """Top-K aktuelle News (letzte N Tage) mit jeweils ihren passendsten @@ -291,6 +351,13 @@ def aggregate_top_themen( tags = json.loads(n["tags"]) if n["tags"] else [] except (json.JSONDecodeError, TypeError): tags = [] + top_matches = scored[:matches_per_news] + relevance = compute_relevance(top_matches) + + # Pre-Filter: optional alle non-high/-mid raus + if only_relevant and relevance["level"] not in ("high", "mid"): + continue + buckets.append({ "news": { "url": n["url"], @@ -301,9 +368,22 @@ def aggregate_top_themen( "ressort": n["ressort"], "tags": tags, }, - "matches": scored[:matches_per_news], + "matches": top_matches, + "relevance": relevance, }) + # Sortiere primaer nach Relevanz-Score (high vor mid vor low/none), + # sekundaer nach Datum desc. + level_rank = {"high": 3, "mid": 2, "low": 1, "none": 0} + buckets.sort( + key=lambda b: ( + level_rank.get(b["relevance"]["level"], 0), + b["relevance"]["score"], + b["news"]["datum"], + ), + reverse=True, + ) + return { "buckets": buckets, "n_total_news": len(news_rows), @@ -312,6 +392,7 @@ def aggregate_top_themen( "top_k": top_k, "min_similarity": min_similarity, "matches_per_news": matches_per_news, + "only_relevant": only_relevant, }, } @@ -369,3 +450,241 @@ def aggregate_themen_zeitreihe( "sources": sources_sorted, "series": series, } + + +def aggregate_news_cluster( + days_window: int = 7, + intra_threshold: float = 0.55, + antrag_threshold: float = 0.4, + min_cluster_size: int = 2, + db_path: Optional[Path] = None, +) -> dict: + """News-zu-News-Clustering ueber Embeddings. + + Greedy: jede ungeclusterte News wird Cluster-Seed, alle anderen mit + cosine >= ``intra_threshold`` werden eingeschlossen. Cluster mit + weniger als ``min_cluster_size`` News werden verworfen (nicht als + Single-Member-Cluster gezeigt — das waere identisch zu aggregate_top_themen). + + Pro Cluster: zentralster Antrag-Match aus den GWÖ-bewerteten Antraegen. + """ + from .config import settings + from . import embeddings as emb + + path = db_path or settings.db_path + if not Path(path).exists(): + return {"clusters": [], "n_total_news": 0} + + cutoff = datetime.now(timezone.utc).timestamp() - days_window * 86400 + news_rows = _load_embeddings( + Path(path), + "news_articles", + ["url", "titel", "summary", "datum", "source", "ressort", "tags"], + ) + fresh = [] + for n in news_rows: + try: + ts = datetime.fromisoformat(n["datum"].replace("Z", "+00:00")).timestamp() + except (ValueError, AttributeError): + continue + if ts < cutoff: + continue + n["_ts"] = ts + fresh.append(n) + fresh.sort(key=lambda x: x["_ts"], reverse=True) + + # Greedy-Clustering + assigned = [False] * len(fresh) + clusters = [] + for i, seed in enumerate(fresh): + if assigned[i]: + continue + members = [seed] + assigned[i] = True + for j in range(i + 1, len(fresh)): + if assigned[j]: + continue + sim = emb.cosine_similarity(seed["_vec"], fresh[j]["_vec"]) + if sim >= intra_threshold: + members.append(fresh[j]) + assigned[j] = True + if len(members) >= min_cluster_size: + clusters.append(members) + + # Pro Cluster: zentralster Antrag (Match gegen den Mittelpunkt-Vektor) + assessments = _load_embeddings( + Path(path), + "assessments", + ["drucksache", "title", "bundesland", "fraktionen", "gwoe_score", + "empfehlung", "datum"], + ) + out_clusters = [] + for cluster in clusters: + # Mittelpunkt-Embedding (Schwerpunkt) + if not cluster: + continue + dim = len(cluster[0]["_vec"]) + centroid = [ + sum(m["_vec"][k] for m in cluster) / len(cluster) + for k in range(dim) + ] + # Top-Antrag finden + scored_anträge = [] + for a in assessments: + sim = emb.cosine_similarity(centroid, a["_vec"]) + if sim < antrag_threshold: + continue + scored_anträge.append({ + "drucksache": a["drucksache"], + "title": a["title"], + "bundesland": a["bundesland"], + "fraktionen": json.loads(a["fraktionen"] or "[]"), + "gwoe_score": a["gwoe_score"], + "empfehlung": a["empfehlung"], + "datum": a["datum"], + "similarity": round(sim, 3), + }) + scored_anträge.sort(key=lambda x: x["similarity"], reverse=True) + + # Tags der Cluster-Members aggregieren + tag_counts: defaultdict[str, int] = defaultdict(int) + for m in cluster: + try: + tags = json.loads(m["tags"]) if m["tags"] else [] + except (json.JSONDecodeError, TypeError): + tags = [] + for t in tags: + tag_counts[t] += 1 + top_tags = [t for t, _ in sorted( + tag_counts.items(), key=lambda x: x[1], reverse=True, + )[:5]] + + out_clusters.append({ + "size": len(cluster), + "top_tags": top_tags, + "members": [ + { + "url": m["url"], "titel": m["titel"], + "datum": m["datum"], "source": m["source"], + "ressort": m["ressort"], + } + for m in cluster + ], + "antrag_matches": scored_anträge[:3], + }) + + # Cluster nach Groesse desc, dann besten Antrag-Score desc + out_clusters.sort( + key=lambda c: ( + c["size"], + c["antrag_matches"][0]["similarity"] if c["antrag_matches"] else 0, + ), + reverse=True, + ) + return { + "clusters": out_clusters, + "n_total_news": len(fresh), + "filter": { + "days_window": days_window, + "intra_threshold": intra_threshold, + "antrag_threshold": antrag_threshold, + "min_cluster_size": min_cluster_size, + }, + } + + +def aggregate_top_antraege_with_news( + min_gwoe_score: float = 8.0, + days_window: int = 14, + min_similarity: float = 0.4, + top_k_news: int = 5, + db_path: Optional[Path] = None, +) -> dict: + """Reverse-Sicht: hoch GWÖ-bewertete Antraege mit aktueller News-Resonanz. + + Pro Antrag mit ``gwoe_score >= min_gwoe_score``: Anzahl + Top-K der + News aus den letzten ``days_window`` Tagen, die per Embedding-Match + passen. Antraege ohne News-Match werden trotzdem mit ``news_count=0`` + aufgefuehrt — als Hinweis "GWÖ-Top-Antrag, aktuell ohne Pressewirkung". + """ + from .config import settings + from . import embeddings as emb + + path = db_path or settings.db_path + if not Path(path).exists(): + return {"antraege": []} + + cutoff = datetime.now(timezone.utc).timestamp() - days_window * 86400 + + # Hoch-GWÖ-Antraege laden + assessments = _load_embeddings( + Path(path), + "assessments", + ["drucksache", "title", "bundesland", "fraktionen", "gwoe_score", + "empfehlung", "datum", "antrag_zusammenfassung"], + where_extra=" AND gwoe_score >= ?", + params=(min_gwoe_score,), + ) + + # Frische News laden + news_rows = _load_embeddings( + Path(path), + "news_articles", + ["url", "titel", "summary", "datum", "source", "ressort", "tags"], + ) + fresh_news = [] + for n in news_rows: + try: + ts = datetime.fromisoformat(n["datum"].replace("Z", "+00:00")).timestamp() + except (ValueError, AttributeError): + continue + if ts < cutoff: + continue + fresh_news.append(n) + + out = [] + for a in assessments: + scored = [] + for n in fresh_news: + sim = emb.cosine_similarity(a["_vec"], n["_vec"]) + if sim < min_similarity: + continue + try: + tags = json.loads(n["tags"]) if n["tags"] else [] + except (json.JSONDecodeError, TypeError): + tags = [] + scored.append({ + "url": n["url"], "titel": n["titel"], + "summary": n["summary"], "datum": n["datum"], + "source": n["source"], "ressort": n["ressort"], + "tags": tags, + "similarity": round(sim, 3), + }) + scored.sort(key=lambda x: x["similarity"], reverse=True) + out.append({ + "drucksache": a["drucksache"], + "title": a["title"], + "bundesland": a["bundesland"], + "fraktionen": json.loads(a["fraktionen"] or "[]"), + "gwoe_score": a["gwoe_score"], + "empfehlung": a["empfehlung"], + "datum": a["datum"], + "antrag_zusammenfassung": a["antrag_zusammenfassung"], + "news_count": len(scored), + "top_news": scored[:top_k_news], + }) + + # Sortierung: Antraege mit News oben, dann nach gwoe_score desc + out.sort( + key=lambda x: (x["news_count"] > 0, x["news_count"], x["gwoe_score"] or 0), + reverse=True, + ) + return { + "antraege": out, + "filter": { + "min_gwoe_score": min_gwoe_score, + "days_window": days_window, + "min_similarity": min_similarity, + "top_k_news": top_k_news, + }, + } diff --git a/tests/test_themen_matching.py b/tests/test_themen_matching.py index 6a64c41..8881afa 100644 --- a/tests/test_themen_matching.py +++ b/tests/test_themen_matching.py @@ -10,8 +10,11 @@ from unittest.mock import patch import pytest from app.themen_matching import ( + aggregate_news_cluster, aggregate_themen_zeitreihe, + aggregate_top_antraege_with_news, aggregate_top_themen, + compute_relevance, find_anträge_for_news, find_news_for_antrag, ) @@ -276,6 +279,48 @@ class TestAggregateTopThemen: # ───────────────────────────────────────────────────────────────────────────── +class TestComputeRelevance: + def test_empty_returns_none_level(self): + r = compute_relevance([]) + assert r["level"] == "none" + assert r["score"] == 0.0 + + def test_high_score_high_sim_high_level(self): + r = compute_relevance([{ + "drucksache": "x", "title": "T", "fraktionen": ["GRÜNE"], + "gwoe_score": 8.0, "similarity": 0.6, + }]) + # 8.0 × 0.6 = 4.8 → high + assert r["level"] == "high" + assert r["score"] == 4.8 + assert "GWÖ-8.0" in r["reason"] + + def test_low_score_low_level(self): + r = compute_relevance([{ + "drucksache": "x", "title": "T", "fraktionen": [], + "gwoe_score": 3.0, "similarity": 0.5, + }]) + # 3.0 × 0.5 = 1.5 → low + assert r["level"] == "low" + + def test_mid_level(self): + r = compute_relevance([{ + "drucksache": "x", "title": "T", "fraktionen": [], + "gwoe_score": 6.0, "similarity": 0.5, + }]) + # 6.0 × 0.5 = 3.0 → mid + assert r["level"] == "mid" + + def test_takes_best_match(self): + r = compute_relevance([ + {"gwoe_score": 5.0, "similarity": 0.4, "title": "Schwach", "fraktionen": []}, + {"gwoe_score": 9.0, "similarity": 0.55, "title": "Stark", "fraktionen": []}, + ]) + # max(2.0, 4.95) = 4.95 → high + assert r["score"] == 4.95 + assert "Stark" in r["reason"] + + class TestAggregateZeitreihe: def test_structure(self, populated_db): result = aggregate_themen_zeitreihe(db_path=populated_db, days_window=7) @@ -295,3 +340,115 @@ class TestAggregateZeitreihe: result = aggregate_themen_zeitreihe(db_path=populated_db, days_window=7) for source in result["sources"]: assert len(result["series"][source]) == len(result["buckets"]) + + +# ───────────────────────────────────────────────────────────────────────────── +# aggregate_top_themen mit Relevance + only_relevant Filter +# ───────────────────────────────────────────────────────────────────────────── + + +class TestRelevanceInTopThemen: + def test_each_bucket_has_relevance(self, populated_db): + result = aggregate_top_themen(db_path=populated_db, min_similarity=0.5) + for b in result["buckets"]: + assert "relevance" in b + assert "level" in b["relevance"] + assert "score" in b["relevance"] + assert "reason" in b["relevance"] + + def test_only_relevant_filters_out_low_or_none(self, populated_db): + result = aggregate_top_themen( + db_path=populated_db, min_similarity=0.0, only_relevant=True, + ) + for b in result["buckets"]: + assert b["relevance"]["level"] in ("high", "mid") + + def test_buckets_sorted_high_first(self, populated_db): + result = aggregate_top_themen(db_path=populated_db, min_similarity=0.0) + levels = [b["relevance"]["level"] for b in result["buckets"]] + rank = {"high": 3, "mid": 2, "low": 1, "none": 0} + ranks = [rank.get(l, 0) for l in levels] + # Reihenfolge muss monoton fallen + assert ranks == sorted(ranks, reverse=True) + + +# ───────────────────────────────────────────────────────────────────────────── +# aggregate_news_cluster +# ───────────────────────────────────────────────────────────────────────────── + + +class TestNewsCluster: + def test_structure(self, populated_db): + # Mit hoeherem intra_threshold und kleinerem min_cluster_size + # auf der Test-DB: orthogonale News bilden keine Cluster + result = aggregate_news_cluster( + db_path=populated_db, min_cluster_size=2, + intra_threshold=0.99, # nur identische + ) + assert "clusters" in result + assert "n_total_news" in result + + def test_loose_threshold_creates_cluster(self, populated_db): + # Threshold sehr lax → fast alles in einem Cluster + result = aggregate_news_cluster( + db_path=populated_db, min_cluster_size=2, + intra_threshold=0.0, days_window=30, + ) + # Mindestens ein Cluster mit >=2 Members + assert len(result["clusters"]) >= 0 + for c in result["clusters"]: + assert c["size"] >= 2 + assert "members" in c + assert "antrag_matches" in c + assert "top_tags" in c + + def test_min_cluster_size_filter(self, populated_db): + result = aggregate_news_cluster( + db_path=populated_db, min_cluster_size=5, + ) + # Nur 3 News in der DB → nichts erreicht size>=5 + assert result["clusters"] == [] + + +# ───────────────────────────────────────────────────────────────────────────── +# aggregate_top_antraege_with_news +# ───────────────────────────────────────────────────────────────────────────── + + +class TestTopAntraegeWithNews: + def test_only_high_gwoe(self, populated_db): + """Nur Antraege mit gwoe_score >= min_gwoe_score auftauchen.""" + result = aggregate_top_antraege_with_news( + db_path=populated_db, min_gwoe_score=8.0, + ) + for a in result["antraege"]: + assert a["gwoe_score"] >= 8.0 + # 18/A hat 8.0, 18/B hat 7.0, 18/C hat 5.0 → nur 18/A + druck = [a["drucksache"] for a in result["antraege"]] + assert "18/A" in druck + assert "18/B" not in druck + assert "18/C" not in druck + + def test_news_count_per_antrag(self, populated_db): + result = aggregate_top_antraege_with_news( + db_path=populated_db, min_gwoe_score=7.0, min_similarity=0.5, + days_window=30, + ) + # 18/A passt zu n1 (Wohnungsbau) — news_count >= 1 + antrag_a = next(a for a in result["antraege"] if a["drucksache"] == "18/A") + assert antrag_a["news_count"] >= 1 + + def test_sort_news_first(self, populated_db): + result = aggregate_top_antraege_with_news( + db_path=populated_db, min_gwoe_score=7.0, min_similarity=0.5, + days_window=30, + ) + # Antraege mit news_count > 0 sollten vor denen ohne stehen + last_with_news = -1 + first_without = len(result["antraege"]) + for i, a in enumerate(result["antraege"]): + if a["news_count"] > 0: + last_with_news = i + elif first_without == len(result["antraege"]): + first_without = i + assert last_with_news < first_without