feat(#173): Vote-Orphans-Auto-Bewertung als Cron-Job + Tracking

Phase 3 (Vote-Orphans-Auto-Bewertung):

- Neue Tabelle `auto_rate_runs` (additiv) mit started_at, source,
  bundesland, limit_requested, n_attempted/succeeded/failed/skipped,
  error_summary.
- Neue DB-Helper: record_auto_rate_run, list_auto_rate_runs,
  auto_rate_today_total.
- POST /api/auswertungen/vote-orphans/auto-rate erweitert um source,
  daily_cap und Run-Persistenz. Throttled gegen Tagessumme.
- Neuer Endpoint GET /api/auto-rate-runs (admin) — letzte N Runs +
  Tagessumme.
- scripts/auto-rate-orphans.sh: Cron-Wrapper (analog auto-fetch-news.sh)
  mit MAX_PER_RUN=30 / MAX_PER_DAY=200 Defaults, BUNDESLAND-Filter
  optional, ruft direkt die Python-Worker-Funktion via docker exec.
- Admin-Stand-Dashboard: KPI-Zeile "heute X Runs / Y versucht" + Tabelle
  der letzten 5 Runs mit BL/Counts/Notiz.

Refs: #173, ADR 0010

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Dotty Dotter 2026-05-06 16:02:33 +02:00
parent 1a94b27a22
commit c241d329aa
4 changed files with 373 additions and 0 deletions

View File

@ -334,6 +334,26 @@ async def init_db():
"ON presse_drafts(created_at DESC)"
)
# auto_rate_runs (#173 Phase 3) — Tracking der Vote-Orphans-Auto-Bewertung
await db.execute("""
CREATE TABLE IF NOT EXISTS auto_rate_runs (
id INTEGER PRIMARY KEY AUTOINCREMENT,
started_at TEXT NOT NULL DEFAULT (datetime('now')),
source TEXT NOT NULL, -- 'cron'|'manual'|'api'
bundesland TEXT, -- NULL = ALL
limit_requested INTEGER NOT NULL,
n_attempted INTEGER NOT NULL DEFAULT 0,
n_succeeded INTEGER NOT NULL DEFAULT 0,
n_failed INTEGER NOT NULL DEFAULT 0,
n_skipped INTEGER NOT NULL DEFAULT 0,
error_summary TEXT
)
""")
await db.execute(
"CREATE INDEX IF NOT EXISTS idx_auto_rate_runs_started "
"ON auto_rate_runs(started_at DESC)"
)
await db.commit()
@ -667,6 +687,70 @@ async def get_votes(drucksache: str, user_id: str = None) -> dict:
return {"counts": counts, "my_votes": my_votes}
# ─── auto_rate_runs (#173) ──────────────────────────────────────────────────
async def record_auto_rate_run(
source: str,
limit_requested: int,
bundesland: Optional[str] = None,
n_attempted: int = 0,
n_succeeded: int = 0,
n_failed: int = 0,
n_skipped: int = 0,
error_summary: Optional[str] = None,
) -> int:
"""Schreibt einen Run-Eintrag in auto_rate_runs und liefert die id."""
async with aiosqlite.connect(settings.db_path) as db:
cur = await db.execute(
"""
INSERT INTO auto_rate_runs
(source, bundesland, limit_requested, n_attempted,
n_succeeded, n_failed, n_skipped, error_summary)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
""",
(source, bundesland, limit_requested, n_attempted,
n_succeeded, n_failed, n_skipped, error_summary),
)
await db.commit()
return cur.lastrowid
async def list_auto_rate_runs(limit: int = 20) -> list[dict]:
"""Letzte N Runs (neueste zuerst)."""
async with aiosqlite.connect(settings.db_path) as db:
db.row_factory = aiosqlite.Row
rows = await db.execute(
"""
SELECT id, started_at, source, bundesland, limit_requested,
n_attempted, n_succeeded, n_failed, n_skipped, error_summary
FROM auto_rate_runs
ORDER BY started_at DESC LIMIT ?
""",
(limit,),
)
return [dict(r) for r in await rows.fetchall()]
async def auto_rate_today_total() -> dict:
"""Aggregat fuer den aktuellen Tag (UTC) — fuer Cron-Throttling."""
async with aiosqlite.connect(settings.db_path) as db:
cur = await db.execute(
"""
SELECT COUNT(*) AS n_runs,
COALESCE(SUM(n_attempted), 0) AS total_attempted,
COALESCE(SUM(n_succeeded), 0) AS total_succeeded
FROM auto_rate_runs
WHERE date(started_at) = date('now')
"""
)
row = await cur.fetchone()
return {
"n_runs": row[0],
"total_attempted": row[1],
"total_succeeded": row[2],
}
async def create_job(
job_id: str,
input_preview: str,

View File

@ -2735,6 +2735,8 @@ async def api_auto_rate_vote_orphans(
request: Request,
bundesland: Optional[str] = Form(None),
limit: int = Form(10),
source: str = Form("manual"),
daily_cap: int = Form(200),
user: dict = Depends(require_admin),
):
"""Bulk-Auto-Bewerten der Top-N Vote-Orphans (#172).
@ -2742,13 +2744,38 @@ async def api_auto_rate_vote_orphans(
Admin-only + rate-limited. Nimmt die neuesten Drucksachen aus
`vote-orphans`, laedt den Antragstext per Adapter herunter und
enqueued einen Job pro Drucksache. Konservatives Default-Limit 10.
`source` = 'manual'|'cron'|'api' wird in auto_rate_runs persistiert.
`daily_cap` = max. Tagessumme an Auto-Bewertungen (Default 200), wird
gegen die Run-Historie geprueft.
"""
if limit < 1 or limit > 50:
raise HTTPException(status_code=400, detail="limit muss 1-50 sein")
from .auswertungen import get_vote_orphans
from .database import (
record_auto_rate_run,
auto_rate_today_total,
)
from .queue import enqueue, QueueFullError
today = await auto_rate_today_total()
if today["total_attempted"] + limit > daily_cap:
remaining = max(0, daily_cap - today["total_attempted"])
if remaining == 0:
await record_auto_rate_run(
source=source, limit_requested=limit, bundesland=bundesland,
n_attempted=0, n_succeeded=0, n_failed=0, n_skipped=0,
error_summary=f"daily_cap_reached:{daily_cap}",
)
return {
"status": "skipped",
"reason": "daily_cap_reached",
"today": today,
"daily_cap": daily_cap,
}
limit = remaining
orphans = get_vote_orphans(filter_bl=bundesland, limit=limit)
enqueued = []
@ -2796,14 +2823,44 @@ async def api_auto_rate_vote_orphans(
except QueueFullError:
skipped.append({"drucksache": ds, "reason": "queue_full"})
break
# Run in auto_rate_runs persistieren — auch wenn enqueued=0 ist.
error_summary = None
if skipped:
error_summary = ", ".join(
f"{s['drucksache']}:{s['reason'][:30]}" for s in skipped[:3]
)
if len(skipped) > 3:
error_summary += f", … (+{len(skipped) - 3} weitere)"
run_id = await record_auto_rate_run(
source=source, limit_requested=limit, bundesland=bundesland,
n_attempted=len(orphans["items"]),
n_succeeded=len(enqueued),
n_failed=0, # Job-Failures kommen nach Worker-Run, nicht hier
n_skipped=len(skipped),
error_summary=error_summary,
)
return {
"status": "auto_rate_enqueued",
"run_id": run_id,
"enqueued": len(enqueued),
"skipped": skipped,
"jobs": enqueued,
}
@app.get("/api/auto-rate-runs")
async def api_auto_rate_runs(
limit: int = 20,
user: dict = Depends(require_admin),
):
"""Letzte N Runs der Vote-Orphans-Auto-Bewertung (admin-only)."""
from .database import list_auto_rate_runs, auto_rate_today_total
runs = await list_auto_rate_runs(limit=limit)
today = await auto_rate_today_total()
return {"runs": runs, "today": today}
@app.get("/api/auswertungen/empfehlungs-konsistenz")
async def auswertungen_empfehlungs_konsistenz(
bundesland: Optional[str] = None,
@ -3084,6 +3141,25 @@ async def api_admin_stand(user: dict = Depends(require_admin)):
n_bookmarks = db.execute("SELECT COUNT(*) FROM bookmarks").fetchone()[0]
except sqlite3.OperationalError:
n_bookmarks = 0
# Auto-Rate-Runs (#173)
try:
auto_rate_today = db.execute("""
SELECT
COUNT(*) AS n_runs,
COALESCE(SUM(n_attempted), 0) AS total_attempted,
COALESCE(SUM(n_succeeded), 0) AS total_succeeded
FROM auto_rate_runs
WHERE date(started_at) = date('now')
""").fetchone()
auto_rate_recent = list(db.execute("""
SELECT id, started_at, source, bundesland, limit_requested,
n_attempted, n_succeeded, n_failed, n_skipped, error_summary
FROM auto_rate_runs ORDER BY started_at DESC LIMIT 5
""").fetchall())
except sqlite3.OperationalError:
auto_rate_today = (0, 0, 0)
auto_rate_recent = []
finally:
db.close()
@ -3113,6 +3189,21 @@ async def api_admin_stand(user: dict = Depends(require_admin)):
"last_7_days": n_drafts_7d,
},
"bookmarks": n_bookmarks,
"auto_rate": {
"today_runs": auto_rate_today[0],
"today_attempted": auto_rate_today[1],
"today_succeeded": auto_rate_today[2],
"recent": [
{
"id": r[0], "started_at": r[1], "source": r[2],
"bundesland": r[3], "limit_requested": r[4],
"n_attempted": r[5], "n_succeeded": r[6],
"n_failed": r[7], "n_skipped": r[8],
"error_summary": r[9],
}
for r in auto_rate_recent
],
},
}

View File

@ -145,6 +145,27 @@
</table>
</div>
<!-- Vote-Orphans-Auto-Bewertung (#173) -->
<div class="stand-section">
<h2>Vote-Orphans-Auto-Bewertung</h2>
<p style="font-size:12px;opacity:0.65;margin:-4px 0 8px;">
Heute: <strong id="auto-rate-today"></strong>.
Cron läuft alle 6h und enqueued bis zu 30 Orphans pro Lauf, max. 200 Anträge/Tag.
</p>
<table class="stand-table">
<thead>
<tr>
<th>Zeitpunkt</th><th>Quelle</th><th>BL</th>
<th style="text-align:right;">Versucht</th>
<th style="text-align:right;">Enqueued</th>
<th style="text-align:right;">Skipped</th>
<th>Notiz</th>
</tr>
</thead>
<tbody id="stand-autorate-rows"></tbody>
</table>
</div>
<div id="stand-meta" style="font-family:var(--font-mono);font-size:11px;opacity:0.5;margin-top:1.5rem;"></div>
</div>
@ -229,6 +250,30 @@ async function loadStand() {
Object.entries(ns).sort((a, b) => b[1] - a[1]).map(([s, n]) => `
<tr><td>${s}</td><td>${fmtN(n)}</td></tr>`).join('');
// Auto-Rate-Run-Tabelle (#173)
const ar = d.auto_rate || {};
const elToday = document.getElementById('auto-rate-today');
if (elToday) {
elToday.textContent =
`${fmtN(ar.today_runs || 0)} Runs · ${fmtN(ar.today_attempted || 0)} Anträge versucht · ${fmtN(ar.today_succeeded || 0)} enqueued`;
}
const arRecent = ar.recent || [];
const arRowsEl = document.getElementById('stand-autorate-rows');
if (arRowsEl) {
arRowsEl.innerHTML = arRecent.length
? arRecent.map(r => `
<tr>
<td style="font-family:var(--font-mono);font-size:11px;">${(r.started_at || '').slice(0, 16)}</td>
<td style="font-family:var(--font-mono);font-size:11px;">${r.source || '—'}</td>
<td>${r.bundesland || 'ALL'}</td>
<td style="text-align:right;">${fmtN(r.n_attempted)}</td>
<td style="text-align:right;">${fmtN(r.n_succeeded)}</td>
<td style="text-align:right;">${fmtN(r.n_skipped)}</td>
<td style="font-family:var(--font-mono);font-size:11px;opacity:0.65;">${r.error_summary || ''}</td>
</tr>`).join('')
: '<tr><td colspan="7" style="opacity:0.5;font-style:italic;">Noch kein Run heute oder in den letzten 5 Läufen.</td></tr>';
}
document.getElementById('stand-meta').textContent =
'Aktualisiert: ' + new Date().toLocaleTimeString('de-DE');
} catch (e) {

153
scripts/auto-rate-orphans.sh Executable file
View File

@ -0,0 +1,153 @@
#!/bin/bash
# Vote-Orphans-Auto-Bewertung als Cron-Job (#173 Phase 3).
#
# Ruft pro Lauf maximal `MAX_PER_RUN` Drucksachen auf, die einen
# Plenum-Vote, aber noch keine GWÖ-Bewertung haben. Limitiert auf
# `MAX_PER_DAY` (Tagessumme) — gemessen an `auto_rate_runs.n_attempted`.
#
# Idempotent: bei 0 Orphans loggt das Skript sauber und beendet.
# Bei Throttle (Daily-Cap erreicht) wird dies in auto_rate_runs
# als `error_summary='daily_cap_reached:N'` festgehalten.
#
# Install als Host-Cron (alle 6h, max 30 Calls/Run, 200/Tag):
#
# crontab -e
# 0 */6 * * * /opt/gwoe-antragspruefer-dev/scripts/auto-rate-orphans.sh \
# gwoe-antragspruefer-dev >> /var/log/gwoe-auto-rate.log 2>&1
#
# Manueller Aufruf:
# MAX_PER_RUN=10 ./scripts/auto-rate-orphans.sh gwoe-antragspruefer-dev
set -euo pipefail
CONTAINER="${1:-gwoe-antragspruefer-dev}"
MAX_PER_RUN="${MAX_PER_RUN:-30}"
MAX_PER_DAY="${MAX_PER_DAY:-200}"
BUNDESLAND="${BUNDESLAND:-}" # leer = alle BL
# Skip wenn Container nicht laeuft.
if ! docker ps --format '{{.Names}}' | grep -q "^${CONTAINER}$"; then
echo "$(date -Iseconds) SKIP — ${CONTAINER} is not running"
exit 0
fi
echo "$(date -Iseconds) START auto-rate-orphans (container=${CONTAINER}, max_per_run=${MAX_PER_RUN}, max_per_day=${MAX_PER_DAY}, bl=${BUNDESLAND:-ALL})"
docker exec -i "$CONTAINER" \
-e MAX_PER_RUN="$MAX_PER_RUN" \
-e MAX_PER_DAY="$MAX_PER_DAY" \
-e BUNDESLAND="$BUNDESLAND" \
python <<'EOF'
import os
import asyncio
from app.auswertungen import get_vote_orphans
from app.database import (
record_auto_rate_run,
auto_rate_today_total,
get_assessment,
create_job,
)
from app.parlamente import get_adapter, Drucksache
from app.queue import enqueue, QueueFullError
from app.main import run_drucksache_analysis # async-Worker-Funktion
import uuid
MAX_PER_RUN = int(os.environ.get("MAX_PER_RUN", "30"))
MAX_PER_DAY = int(os.environ.get("MAX_PER_DAY", "200"))
BUNDESLAND = os.environ.get("BUNDESLAND") or None
async def main() -> None:
today = await auto_rate_today_total()
today_attempted = today["total_attempted"]
if today_attempted >= MAX_PER_DAY:
print(f" SKIP daily_cap_reached: {today_attempted}/{MAX_PER_DAY}")
await record_auto_rate_run(
source="cron",
limit_requested=MAX_PER_RUN,
bundesland=BUNDESLAND,
n_attempted=0, n_succeeded=0, n_failed=0, n_skipped=0,
error_summary=f"daily_cap_reached:{MAX_PER_DAY}",
)
return
remaining_today = MAX_PER_DAY - today_attempted
limit = min(MAX_PER_RUN, remaining_today)
orphans = get_vote_orphans(filter_bl=BUNDESLAND, limit=limit)
items = orphans["items"]
if not items:
print(" no orphans")
await record_auto_rate_run(
source="cron",
limit_requested=limit,
bundesland=BUNDESLAND,
n_attempted=0, n_succeeded=0, n_failed=0, n_skipped=0,
)
return
enqueued, skipped = 0, []
for item in items:
bl = item["bundesland"]
ds = item["drucksache"]
existing = await get_assessment(ds)
if existing:
skipped.append((ds, "already_rated"))
continue
adapter = get_adapter(bl)
if not adapter:
skipped.append((ds, f"no_adapter_for_{bl}"))
continue
try:
text = await adapter.download_text(ds)
except Exception as e:
skipped.append((ds, f"download_error:{str(e)[:40]}"))
continue
if not text:
skipped.append((ds, "empty_text"))
continue
doc = Drucksache(
drucksache=ds, title=ds, fraktionen=[], datum="",
link="", bundesland=bl,
)
job_id = str(uuid.uuid4())
await create_job(job_id, text[:500], bl, "qwen-plus", drucksache=ds)
try:
await enqueue(
job_id, run_drucksache_analysis,
job_id, ds, text, bl, "qwen-plus", doc,
drucksache=ds,
)
enqueued += 1
except QueueFullError:
skipped.append((ds, "queue_full"))
break
error_summary = (
", ".join(f"{ds}:{r[:30]}" for ds, r in skipped[:3])
+ (f", … (+{len(skipped) - 3} weitere)" if len(skipped) > 3 else "")
if skipped else None
)
await record_auto_rate_run(
source="cron",
limit_requested=limit,
bundesland=BUNDESLAND,
n_attempted=len(items),
n_succeeded=enqueued,
n_failed=0,
n_skipped=len(skipped),
error_summary=error_summary,
)
print(f" attempted={len(items)} enqueued={enqueued} skipped={len(skipped)}")
if skipped:
for ds, reason in skipped[:5]:
print(f" skip {ds}: {reason}")
asyncio.run(main())
EOF
echo "$(date -Iseconds) END auto-rate-orphans"