diff --git a/app/database.py b/app/database.py index 5d7dd00..b01fbe5 100644 --- a/app/database.py +++ b/app/database.py @@ -334,6 +334,26 @@ async def init_db(): "ON presse_drafts(created_at DESC)" ) + # auto_rate_runs (#173 Phase 3) — Tracking der Vote-Orphans-Auto-Bewertung + await db.execute(""" + CREATE TABLE IF NOT EXISTS auto_rate_runs ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + started_at TEXT NOT NULL DEFAULT (datetime('now')), + source TEXT NOT NULL, -- 'cron'|'manual'|'api' + bundesland TEXT, -- NULL = ALL + limit_requested INTEGER NOT NULL, + n_attempted INTEGER NOT NULL DEFAULT 0, + n_succeeded INTEGER NOT NULL DEFAULT 0, + n_failed INTEGER NOT NULL DEFAULT 0, + n_skipped INTEGER NOT NULL DEFAULT 0, + error_summary TEXT + ) + """) + await db.execute( + "CREATE INDEX IF NOT EXISTS idx_auto_rate_runs_started " + "ON auto_rate_runs(started_at DESC)" + ) + await db.commit() @@ -667,6 +687,70 @@ async def get_votes(drucksache: str, user_id: str = None) -> dict: return {"counts": counts, "my_votes": my_votes} +# ─── auto_rate_runs (#173) ────────────────────────────────────────────────── + +async def record_auto_rate_run( + source: str, + limit_requested: int, + bundesland: Optional[str] = None, + n_attempted: int = 0, + n_succeeded: int = 0, + n_failed: int = 0, + n_skipped: int = 0, + error_summary: Optional[str] = None, +) -> int: + """Schreibt einen Run-Eintrag in auto_rate_runs und liefert die id.""" + async with aiosqlite.connect(settings.db_path) as db: + cur = await db.execute( + """ + INSERT INTO auto_rate_runs + (source, bundesland, limit_requested, n_attempted, + n_succeeded, n_failed, n_skipped, error_summary) + VALUES (?, ?, ?, ?, ?, ?, ?, ?) + """, + (source, bundesland, limit_requested, n_attempted, + n_succeeded, n_failed, n_skipped, error_summary), + ) + await db.commit() + return cur.lastrowid + + +async def list_auto_rate_runs(limit: int = 20) -> list[dict]: + """Letzte N Runs (neueste zuerst).""" + async with aiosqlite.connect(settings.db_path) as db: + db.row_factory = aiosqlite.Row + rows = await db.execute( + """ + SELECT id, started_at, source, bundesland, limit_requested, + n_attempted, n_succeeded, n_failed, n_skipped, error_summary + FROM auto_rate_runs + ORDER BY started_at DESC LIMIT ? + """, + (limit,), + ) + return [dict(r) for r in await rows.fetchall()] + + +async def auto_rate_today_total() -> dict: + """Aggregat fuer den aktuellen Tag (UTC) — fuer Cron-Throttling.""" + async with aiosqlite.connect(settings.db_path) as db: + cur = await db.execute( + """ + SELECT COUNT(*) AS n_runs, + COALESCE(SUM(n_attempted), 0) AS total_attempted, + COALESCE(SUM(n_succeeded), 0) AS total_succeeded + FROM auto_rate_runs + WHERE date(started_at) = date('now') + """ + ) + row = await cur.fetchone() + return { + "n_runs": row[0], + "total_attempted": row[1], + "total_succeeded": row[2], + } + + async def create_job( job_id: str, input_preview: str, diff --git a/app/main.py b/app/main.py index 2a51cca..f96f8e0 100644 --- a/app/main.py +++ b/app/main.py @@ -2735,6 +2735,8 @@ async def api_auto_rate_vote_orphans( request: Request, bundesland: Optional[str] = Form(None), limit: int = Form(10), + source: str = Form("manual"), + daily_cap: int = Form(200), user: dict = Depends(require_admin), ): """Bulk-Auto-Bewerten der Top-N Vote-Orphans (#172). @@ -2742,13 +2744,38 @@ async def api_auto_rate_vote_orphans( Admin-only + rate-limited. Nimmt die neuesten Drucksachen aus `vote-orphans`, laedt den Antragstext per Adapter herunter und enqueued einen Job pro Drucksache. Konservatives Default-Limit 10. + + `source` = 'manual'|'cron'|'api' wird in auto_rate_runs persistiert. + `daily_cap` = max. Tagessumme an Auto-Bewertungen (Default 200), wird + gegen die Run-Historie geprueft. """ if limit < 1 or limit > 50: raise HTTPException(status_code=400, detail="limit muss 1-50 sein") from .auswertungen import get_vote_orphans + from .database import ( + record_auto_rate_run, + auto_rate_today_total, + ) from .queue import enqueue, QueueFullError + today = await auto_rate_today_total() + if today["total_attempted"] + limit > daily_cap: + remaining = max(0, daily_cap - today["total_attempted"]) + if remaining == 0: + await record_auto_rate_run( + source=source, limit_requested=limit, bundesland=bundesland, + n_attempted=0, n_succeeded=0, n_failed=0, n_skipped=0, + error_summary=f"daily_cap_reached:{daily_cap}", + ) + return { + "status": "skipped", + "reason": "daily_cap_reached", + "today": today, + "daily_cap": daily_cap, + } + limit = remaining + orphans = get_vote_orphans(filter_bl=bundesland, limit=limit) enqueued = [] @@ -2796,14 +2823,44 @@ async def api_auto_rate_vote_orphans( except QueueFullError: skipped.append({"drucksache": ds, "reason": "queue_full"}) break + + # Run in auto_rate_runs persistieren — auch wenn enqueued=0 ist. + error_summary = None + if skipped: + error_summary = ", ".join( + f"{s['drucksache']}:{s['reason'][:30]}" for s in skipped[:3] + ) + if len(skipped) > 3: + error_summary += f", … (+{len(skipped) - 3} weitere)" + run_id = await record_auto_rate_run( + source=source, limit_requested=limit, bundesland=bundesland, + n_attempted=len(orphans["items"]), + n_succeeded=len(enqueued), + n_failed=0, # Job-Failures kommen nach Worker-Run, nicht hier + n_skipped=len(skipped), + error_summary=error_summary, + ) return { "status": "auto_rate_enqueued", + "run_id": run_id, "enqueued": len(enqueued), "skipped": skipped, "jobs": enqueued, } +@app.get("/api/auto-rate-runs") +async def api_auto_rate_runs( + limit: int = 20, + user: dict = Depends(require_admin), +): + """Letzte N Runs der Vote-Orphans-Auto-Bewertung (admin-only).""" + from .database import list_auto_rate_runs, auto_rate_today_total + runs = await list_auto_rate_runs(limit=limit) + today = await auto_rate_today_total() + return {"runs": runs, "today": today} + + @app.get("/api/auswertungen/empfehlungs-konsistenz") async def auswertungen_empfehlungs_konsistenz( bundesland: Optional[str] = None, @@ -3084,6 +3141,25 @@ async def api_admin_stand(user: dict = Depends(require_admin)): n_bookmarks = db.execute("SELECT COUNT(*) FROM bookmarks").fetchone()[0] except sqlite3.OperationalError: n_bookmarks = 0 + + # Auto-Rate-Runs (#173) + try: + auto_rate_today = db.execute(""" + SELECT + COUNT(*) AS n_runs, + COALESCE(SUM(n_attempted), 0) AS total_attempted, + COALESCE(SUM(n_succeeded), 0) AS total_succeeded + FROM auto_rate_runs + WHERE date(started_at) = date('now') + """).fetchone() + auto_rate_recent = list(db.execute(""" + SELECT id, started_at, source, bundesland, limit_requested, + n_attempted, n_succeeded, n_failed, n_skipped, error_summary + FROM auto_rate_runs ORDER BY started_at DESC LIMIT 5 + """).fetchall()) + except sqlite3.OperationalError: + auto_rate_today = (0, 0, 0) + auto_rate_recent = [] finally: db.close() @@ -3113,6 +3189,21 @@ async def api_admin_stand(user: dict = Depends(require_admin)): "last_7_days": n_drafts_7d, }, "bookmarks": n_bookmarks, + "auto_rate": { + "today_runs": auto_rate_today[0], + "today_attempted": auto_rate_today[1], + "today_succeeded": auto_rate_today[2], + "recent": [ + { + "id": r[0], "started_at": r[1], "source": r[2], + "bundesland": r[3], "limit_requested": r[4], + "n_attempted": r[5], "n_succeeded": r[6], + "n_failed": r[7], "n_skipped": r[8], + "error_summary": r[9], + } + for r in auto_rate_recent + ], + }, } diff --git a/app/templates/v2/screens/admin_stand.html b/app/templates/v2/screens/admin_stand.html index 8c22f94..d9d5346 100644 --- a/app/templates/v2/screens/admin_stand.html +++ b/app/templates/v2/screens/admin_stand.html @@ -145,6 +145,27 @@ + +
+

Vote-Orphans-Auto-Bewertung

+

+ Heute: . + Cron läuft alle 6h und enqueued bis zu 30 Orphans pro Lauf, max. 200 Anträge/Tag. +

+ + + + + + + + + + + +
ZeitpunktQuelleBLVersuchtEnqueuedSkippedNotiz
+
+
@@ -229,6 +250,30 @@ async function loadStand() { Object.entries(ns).sort((a, b) => b[1] - a[1]).map(([s, n]) => ` ${s}${fmtN(n)}`).join(''); + // Auto-Rate-Run-Tabelle (#173) + const ar = d.auto_rate || {}; + const elToday = document.getElementById('auto-rate-today'); + if (elToday) { + elToday.textContent = + `${fmtN(ar.today_runs || 0)} Runs · ${fmtN(ar.today_attempted || 0)} Anträge versucht · ${fmtN(ar.today_succeeded || 0)} enqueued`; + } + const arRecent = ar.recent || []; + const arRowsEl = document.getElementById('stand-autorate-rows'); + if (arRowsEl) { + arRowsEl.innerHTML = arRecent.length + ? arRecent.map(r => ` + + ${(r.started_at || '').slice(0, 16)} + ${r.source || '—'} + ${r.bundesland || 'ALL'} + ${fmtN(r.n_attempted)} + ${fmtN(r.n_succeeded)} + ${fmtN(r.n_skipped)} + ${r.error_summary || ''} + `).join('') + : 'Noch kein Run heute oder in den letzten 5 Läufen.'; + } + document.getElementById('stand-meta').textContent = 'Aktualisiert: ' + new Date().toLocaleTimeString('de-DE'); } catch (e) { diff --git a/scripts/auto-rate-orphans.sh b/scripts/auto-rate-orphans.sh new file mode 100755 index 0000000..0726d71 --- /dev/null +++ b/scripts/auto-rate-orphans.sh @@ -0,0 +1,153 @@ +#!/bin/bash +# Vote-Orphans-Auto-Bewertung als Cron-Job (#173 Phase 3). +# +# Ruft pro Lauf maximal `MAX_PER_RUN` Drucksachen auf, die einen +# Plenum-Vote, aber noch keine GWÖ-Bewertung haben. Limitiert auf +# `MAX_PER_DAY` (Tagessumme) — gemessen an `auto_rate_runs.n_attempted`. +# +# Idempotent: bei 0 Orphans loggt das Skript sauber und beendet. +# Bei Throttle (Daily-Cap erreicht) wird dies in auto_rate_runs +# als `error_summary='daily_cap_reached:N'` festgehalten. +# +# Install als Host-Cron (alle 6h, max 30 Calls/Run, 200/Tag): +# +# crontab -e +# 0 */6 * * * /opt/gwoe-antragspruefer-dev/scripts/auto-rate-orphans.sh \ +# gwoe-antragspruefer-dev >> /var/log/gwoe-auto-rate.log 2>&1 +# +# Manueller Aufruf: +# MAX_PER_RUN=10 ./scripts/auto-rate-orphans.sh gwoe-antragspruefer-dev + +set -euo pipefail + +CONTAINER="${1:-gwoe-antragspruefer-dev}" +MAX_PER_RUN="${MAX_PER_RUN:-30}" +MAX_PER_DAY="${MAX_PER_DAY:-200}" +BUNDESLAND="${BUNDESLAND:-}" # leer = alle BL + +# Skip wenn Container nicht laeuft. +if ! docker ps --format '{{.Names}}' | grep -q "^${CONTAINER}$"; then + echo "$(date -Iseconds) SKIP — ${CONTAINER} is not running" + exit 0 +fi + +echo "$(date -Iseconds) START auto-rate-orphans (container=${CONTAINER}, max_per_run=${MAX_PER_RUN}, max_per_day=${MAX_PER_DAY}, bl=${BUNDESLAND:-ALL})" + +docker exec -i "$CONTAINER" \ + -e MAX_PER_RUN="$MAX_PER_RUN" \ + -e MAX_PER_DAY="$MAX_PER_DAY" \ + -e BUNDESLAND="$BUNDESLAND" \ + python <<'EOF' +import os +import asyncio + +from app.auswertungen import get_vote_orphans +from app.database import ( + record_auto_rate_run, + auto_rate_today_total, + get_assessment, + create_job, +) +from app.parlamente import get_adapter, Drucksache +from app.queue import enqueue, QueueFullError +from app.main import run_drucksache_analysis # async-Worker-Funktion +import uuid + + +MAX_PER_RUN = int(os.environ.get("MAX_PER_RUN", "30")) +MAX_PER_DAY = int(os.environ.get("MAX_PER_DAY", "200")) +BUNDESLAND = os.environ.get("BUNDESLAND") or None + + +async def main() -> None: + today = await auto_rate_today_total() + today_attempted = today["total_attempted"] + + if today_attempted >= MAX_PER_DAY: + print(f" SKIP daily_cap_reached: {today_attempted}/{MAX_PER_DAY}") + await record_auto_rate_run( + source="cron", + limit_requested=MAX_PER_RUN, + bundesland=BUNDESLAND, + n_attempted=0, n_succeeded=0, n_failed=0, n_skipped=0, + error_summary=f"daily_cap_reached:{MAX_PER_DAY}", + ) + return + + remaining_today = MAX_PER_DAY - today_attempted + limit = min(MAX_PER_RUN, remaining_today) + orphans = get_vote_orphans(filter_bl=BUNDESLAND, limit=limit) + items = orphans["items"] + if not items: + print(" no orphans") + await record_auto_rate_run( + source="cron", + limit_requested=limit, + bundesland=BUNDESLAND, + n_attempted=0, n_succeeded=0, n_failed=0, n_skipped=0, + ) + return + + enqueued, skipped = 0, [] + for item in items: + bl = item["bundesland"] + ds = item["drucksache"] + existing = await get_assessment(ds) + if existing: + skipped.append((ds, "already_rated")) + continue + adapter = get_adapter(bl) + if not adapter: + skipped.append((ds, f"no_adapter_for_{bl}")) + continue + try: + text = await adapter.download_text(ds) + except Exception as e: + skipped.append((ds, f"download_error:{str(e)[:40]}")) + continue + if not text: + skipped.append((ds, "empty_text")) + continue + doc = Drucksache( + drucksache=ds, title=ds, fraktionen=[], datum="", + link="", bundesland=bl, + ) + job_id = str(uuid.uuid4()) + await create_job(job_id, text[:500], bl, "qwen-plus", drucksache=ds) + try: + await enqueue( + job_id, run_drucksache_analysis, + job_id, ds, text, bl, "qwen-plus", doc, + drucksache=ds, + ) + enqueued += 1 + except QueueFullError: + skipped.append((ds, "queue_full")) + break + + error_summary = ( + ", ".join(f"{ds}:{r[:30]}" for ds, r in skipped[:3]) + + (f", … (+{len(skipped) - 3} weitere)" if len(skipped) > 3 else "") + if skipped else None + ) + + await record_auto_rate_run( + source="cron", + limit_requested=limit, + bundesland=BUNDESLAND, + n_attempted=len(items), + n_succeeded=enqueued, + n_failed=0, + n_skipped=len(skipped), + error_summary=error_summary, + ) + print(f" attempted={len(items)} enqueued={enqueued} skipped={len(skipped)}") + if skipped: + for ds, reason in skipped[:5]: + print(f" skip {ds}: {reason}") + + +asyncio.run(main()) +EOF + +echo "$(date -Iseconds) END auto-rate-orphans"