diff --git a/app/auswertungen.py b/app/auswertungen.py index faa037e..51ba4a2 100644 --- a/app/auswertungen.py +++ b/app/auswertungen.py @@ -366,6 +366,78 @@ def _avg(values: list[float]) -> Optional[float]: return round(sum(values) / len(values), 2) if values else None +def get_vote_orphans( + filter_bl: Optional[str] = None, + limit: int = 200, + db_path: Optional[Path] = None, +) -> dict: + """Drucksachen mit Plenum-Vote, aber ohne GWÖ-Bewertung (#172). + + Liefert Anzahl + Top-N nach Datum desc. Top-N ist die Ranking-Basis + fuer das Bulk-Auto-Bewerten — neueste Anträge zuerst, weil aktuell + relevante Themen hoehere Pressewirkung haben. + """ + path = db_path or settings.db_path + if not Path(path).exists(): + return {"count": 0, "items": [], "by_bundesland": {}} + + conn = sqlite3.connect(str(path)) + try: + # Pro BL: Anzahl Vote-only Drucksachen + sql_by_bl = """ + SELECT p.bundesland, COUNT(DISTINCT p.drucksache) AS n + FROM plenum_vote_results p + LEFT JOIN assessments a + ON a.bundesland = p.bundesland AND a.drucksache = p.drucksache + WHERE a.drucksache IS NULL + """ + params = () + if filter_bl: + sql_by_bl += " AND p.bundesland = ?" + params = (filter_bl,) + sql_by_bl += " GROUP BY p.bundesland ORDER BY n DESC" + by_bl = dict(conn.execute(sql_by_bl, params).fetchall()) + + # Top-N Drucksachen mit Vote-Result, ohne Assessment + sql_top = """ + SELECT p.bundesland, p.drucksache, + MAX(p.parsed_at) AS latest_parsed, + p.ergebnis, + p.quelle_protokoll + FROM plenum_vote_results p + LEFT JOIN assessments a + ON a.bundesland = p.bundesland AND a.drucksache = p.drucksache + WHERE a.drucksache IS NULL + """ + if filter_bl: + sql_top += " AND p.bundesland = ?" + sql_top += """ + GROUP BY p.bundesland, p.drucksache + ORDER BY latest_parsed DESC + LIMIT ? + """ + params2 = ((filter_bl, limit) if filter_bl else (limit,)) + rows = conn.execute(sql_top, params2).fetchall() + finally: + conn.close() + + items = [ + { + "bundesland": r[0], + "drucksache": r[1], + "parsed_at": r[2], + "ergebnis": r[3], + "quelle_protokoll": r[4], + } + for r in rows + ] + return { + "count": sum(by_bl.values()), + "items": items, + "by_bundesland": by_bl, + } + + def aggregate_stimm_index( filter_bl: Optional[str] = None, filter_wp: Optional[str] = None, diff --git a/app/main.py b/app/main.py index 4ef1213..62bb5bd 100644 --- a/app/main.py +++ b/app/main.py @@ -2628,6 +2628,88 @@ async def auswertungen_stimm_index_pro_gruppe( ) +@app.get("/api/auswertungen/vote-orphans") +async def api_vote_orphans(bundesland: Optional[str] = None, limit: int = 200): + """Drucksachen mit Plenum-Vote aber ohne GWÖ-Bewertung (#172).""" + from .auswertungen import get_vote_orphans + return get_vote_orphans(filter_bl=bundesland, limit=limit) + + +@app.post("/api/auswertungen/vote-orphans/auto-rate") +@limiter.limit("3/minute") +async def api_auto_rate_vote_orphans( + request: Request, + bundesland: Optional[str] = Form(None), + limit: int = Form(10), + user: dict = Depends(require_admin), +): + """Bulk-Auto-Bewerten der Top-N Vote-Orphans (#172). + + Admin-only + rate-limited. Nimmt die neuesten Drucksachen aus + `vote-orphans`, laedt den Antragstext per Adapter herunter und + enqueued einen Job pro Drucksache. Konservatives Default-Limit 10. + """ + if limit < 1 or limit > 50: + raise HTTPException(status_code=400, detail="limit muss 1-50 sein") + + from .auswertungen import get_vote_orphans + from .queue import enqueue, QueueFullError + + orphans = get_vote_orphans(filter_bl=bundesland, limit=limit) + + enqueued = [] + skipped = [] + for item in orphans["items"]: + if len(enqueued) >= limit: + break + bl = item["bundesland"] + ds = item["drucksache"] + # Defensive: nochmal pruefen + existing = await get_assessment(ds) + if existing: + skipped.append({"drucksache": ds, "reason": "already_rated"}) + continue + adapter = get_adapter(bl) + if not adapter: + skipped.append({"drucksache": ds, "reason": f"no_adapter_for_{bl}"}) + continue + try: + text = await adapter.download_text(ds) + except Exception as e: + skipped.append({"drucksache": ds, "reason": f"download_error: {str(e)[:80]}"}) + continue + if not text: + skipped.append({"drucksache": ds, "reason": "empty_text"}) + continue + # Doc-Stub (ohne adapter.search) + from .parlamente import Drucksache + doc = Drucksache( + drucksache=ds, title=ds, fraktionen=[], datum="", + link="", bundesland=bl, + ) + job_id = str(uuid.uuid4()) + await create_job(job_id, text[:500], bl, "qwen-plus", drucksache=ds) + try: + position = await enqueue( + job_id, run_drucksache_analysis, + job_id, ds, text, bl, "qwen-plus", doc, + drucksache=ds, + ) + enqueued.append({ + "drucksache": ds, "bundesland": bl, + "job_id": job_id, "queue_position": position, + }) + except QueueFullError: + skipped.append({"drucksache": ds, "reason": "queue_full"}) + break + return { + "status": "auto_rate_enqueued", + "enqueued": len(enqueued), + "skipped": skipped, + "jobs": enqueued, + } + + @app.get("/api/auswertungen/empfehlungs-konsistenz") async def auswertungen_empfehlungs_konsistenz( bundesland: Optional[str] = None, diff --git a/app/templates/v2/screens/auswertungen.html b/app/templates/v2/screens/auswertungen.html index 6770933..fefaccb 100644 --- a/app/templates/v2/screens/auswertungen.html +++ b/app/templates/v2/screens/auswertungen.html @@ -201,6 +201,28 @@ table.modal-table th { background: var(--ecg-bg-subtle); font-weight: 700; }
@@ -616,6 +638,7 @@ async function loadStimmverhalten() { const bl = svGetBl(); const exclude = document.getElementById('sv-exclude-antragsteller').checked ? '1' : '0'; + loadVoteOrphansBanner(bl); loadStimmIndex(bl, exclude); loadHeuchelei(bl); loadMatrixHeatmap(); @@ -624,6 +647,54 @@ async function loadStimmverhalten() { loadStimmIndexCrossBl(exclude); } +async function loadVoteOrphansBanner(bl) { + const banner = document.getElementById('sv-orphans-banner'); + const countEl = document.getElementById('sv-orphans-count'); + const byBlEl = document.getElementById('sv-orphans-by-bl'); + let url = '/api/auswertungen/vote-orphans?limit=10'; + if (bl) url += '&bundesland=' + encodeURIComponent(bl); + try { + const r = await fetch(url); + const d = await r.json(); + if (!d.count) { + banner.style.display = 'none'; + return; + } + banner.style.display = ''; + countEl.textContent = d.count.toLocaleString('de-DE'); + const sortedBl = Object.entries(d.by_bundesland).sort((a, b) => b[1] - a[1]).slice(0, 8); + byBlEl.textContent = sortedBl.map(e => `${e[0]}:${e[1]}`).join(' · '); + } catch (e) { + banner.style.display = 'none'; + } +} + +async function bulkRateOrphans() { + const bl = svGetBl(); + const limit = document.getElementById('sv-orphans-limit').value; + const result = document.getElementById('sv-orphans-result'); + result.textContent = '… enqueue läuft'; + const fd = new FormData(); + if (bl) fd.append('bundesland', bl); + fd.append('limit', limit); + try { + const r = await fetch('/api/auswertungen/vote-orphans/auto-rate', { method: 'POST', body: fd }); + if (r.status === 403) { result.textContent = 'Admin-Rechte fehlen'; return; } + if (r.status === 429) { result.textContent = 'Rate-Limit (3/min)'; return; } + if (!r.ok) { + const err = await r.json().catch(() => ({})); + result.textContent = 'Fehler: ' + (err.detail || r.statusText); + return; + } + const data = await r.json(); + const skip = (data.skipped || []).length; + result.innerHTML = `${data.enqueued} enqueued${skip ? `, ${skip} skipped` : ''} — Queue ansehen →`; + setTimeout(() => loadVoteOrphansBanner(bl), 800); + } catch (e) { + result.textContent = 'Fehler: ' + e; + } +} + function downloadStimmverhaltenCsv() { const bl = svGetBl(); const exclude = document.getElementById('sv-exclude-antragsteller').checked ? '1' : '0'; diff --git a/tests/test_auswertungen.py b/tests/test_auswertungen.py index 4b4fea9..4e687b7 100644 --- a/tests/test_auswertungen.py +++ b/tests/test_auswertungen.py @@ -312,3 +312,84 @@ class TestGetWahlperioden: assert wps == sorted(wps) # Sample-DB enthaelt NRW-WP18, MV-WP8, MV-WP7 sowie BB-WP8 assert any("NRW" in w for w in wps) + + +# ───────────────────────────────────────────────────────────────────────────── +# get_vote_orphans (#172) +# ───────────────────────────────────────────────────────────────────────────── + + +class TestGetVoteOrphans: + @pytest.fixture + def orphan_db(self, tmp_path): + db = tmp_path / "orphans.db" + conn = sqlite3.connect(str(db)) + conn.execute(""" + CREATE TABLE assessments ( + drucksache TEXT PRIMARY KEY, title TEXT, fraktionen TEXT, + datum TEXT, bundesland TEXT, gwoe_score REAL, + source TEXT, model TEXT, created_at TEXT, updated_at TEXT + ) + """) + conn.execute(""" + CREATE TABLE plenum_vote_results ( + bundesland TEXT NOT NULL, drucksache TEXT NOT NULL, + ergebnis TEXT, einstimmig INTEGER DEFAULT 0, + fraktionen_ja TEXT DEFAULT '[]', + fraktionen_nein TEXT DEFAULT '[]', + fraktionen_enthaltung TEXT DEFAULT '[]', + quelle_protokoll TEXT NOT NULL, + quelle_url TEXT, + parsed_at TEXT NOT NULL DEFAULT (datetime('now')), + PRIMARY KEY (bundesland, drucksache, quelle_protokoll) + ) + """) + # 3 Votes: 2 davon mit Bewertung, 1 orphan + conn.execute("INSERT INTO assessments VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", + ("18/1", "T", '["CDU"]', "2024-01-01", "NRW", 7.0, + "test", "qwen", "now", "now")) + conn.execute( + "INSERT INTO plenum_vote_results " + "(bundesland, drucksache, ergebnis, quelle_protokoll, parsed_at) " + "VALUES (?, ?, ?, ?, ?)", + ("NRW", "18/1", "angenommen", "MMP18-1", "2024-01-02"), + ) + conn.execute( + "INSERT INTO plenum_vote_results " + "(bundesland, drucksache, ergebnis, quelle_protokoll, parsed_at) " + "VALUES (?, ?, ?, ?, ?)", + ("NRW", "18/2", "abgelehnt", "MMP18-2", "2024-01-05"), + ) + conn.execute( + "INSERT INTO plenum_vote_results " + "(bundesland, drucksache, ergebnis, quelle_protokoll, parsed_at) " + "VALUES (?, ?, ?, ?, ?)", + ("BB", "8/3", "angenommen", "BB8-1", "2024-01-04"), + ) + conn.commit() + conn.close() + return db + + def test_count_excludes_rated(self, orphan_db): + from app.auswertungen import get_vote_orphans + result = get_vote_orphans(db_path=orphan_db) + # 18/1 hat assessment → nicht orphan. 18/2 (NRW) + 8/3 (BB) sind orphans. + assert result["count"] == 2 + assert result["by_bundesland"] == {"NRW": 1, "BB": 1} + + def test_filter_bl(self, orphan_db): + from app.auswertungen import get_vote_orphans + nrw = get_vote_orphans(filter_bl="NRW", db_path=orphan_db) + assert nrw["count"] == 1 + assert nrw["items"][0]["drucksache"] == "18/2" + + def test_sort_by_latest_parsed_desc(self, orphan_db): + from app.auswertungen import get_vote_orphans + result = get_vote_orphans(db_path=orphan_db) + # 18/2 ist 2024-01-05, 8/3 ist 2024-01-04 → 18/2 zuerst + assert result["items"][0]["drucksache"] == "18/2" + + def test_empty_db(self, tmp_path): + from app.auswertungen import get_vote_orphans + result = get_vote_orphans(db_path=tmp_path / "missing.db") + assert result == {"count": 0, "items": [], "by_bundesland": {}}