feat(#173): Vote-Orphans-Auto-Bewertung als Cron-Job + Tracking
Phase 3 (Vote-Orphans-Auto-Bewertung): - Neue Tabelle `auto_rate_runs` (additiv) mit started_at, source, bundesland, limit_requested, n_attempted/succeeded/failed/skipped, error_summary. - Neue DB-Helper: record_auto_rate_run, list_auto_rate_runs, auto_rate_today_total. - POST /api/auswertungen/vote-orphans/auto-rate erweitert um source, daily_cap und Run-Persistenz. Throttled gegen Tagessumme. - Neuer Endpoint GET /api/auto-rate-runs (admin) — letzte N Runs + Tagessumme. - scripts/auto-rate-orphans.sh: Cron-Wrapper (analog auto-fetch-news.sh) mit MAX_PER_RUN=30 / MAX_PER_DAY=200 Defaults, BUNDESLAND-Filter optional, ruft direkt die Python-Worker-Funktion via docker exec. - Admin-Stand-Dashboard: KPI-Zeile "heute X Runs / Y versucht" + Tabelle der letzten 5 Runs mit BL/Counts/Notiz. Refs: #173, ADR 0010 Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
1a94b27a22
commit
c241d329aa
@ -334,6 +334,26 @@ async def init_db():
|
||||
"ON presse_drafts(created_at DESC)"
|
||||
)
|
||||
|
||||
# auto_rate_runs (#173 Phase 3) — Tracking der Vote-Orphans-Auto-Bewertung
|
||||
await db.execute("""
|
||||
CREATE TABLE IF NOT EXISTS auto_rate_runs (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
started_at TEXT NOT NULL DEFAULT (datetime('now')),
|
||||
source TEXT NOT NULL, -- 'cron'|'manual'|'api'
|
||||
bundesland TEXT, -- NULL = ALL
|
||||
limit_requested INTEGER NOT NULL,
|
||||
n_attempted INTEGER NOT NULL DEFAULT 0,
|
||||
n_succeeded INTEGER NOT NULL DEFAULT 0,
|
||||
n_failed INTEGER NOT NULL DEFAULT 0,
|
||||
n_skipped INTEGER NOT NULL DEFAULT 0,
|
||||
error_summary TEXT
|
||||
)
|
||||
""")
|
||||
await db.execute(
|
||||
"CREATE INDEX IF NOT EXISTS idx_auto_rate_runs_started "
|
||||
"ON auto_rate_runs(started_at DESC)"
|
||||
)
|
||||
|
||||
await db.commit()
|
||||
|
||||
|
||||
@ -667,6 +687,70 @@ async def get_votes(drucksache: str, user_id: str = None) -> dict:
|
||||
return {"counts": counts, "my_votes": my_votes}
|
||||
|
||||
|
||||
# ─── auto_rate_runs (#173) ──────────────────────────────────────────────────
|
||||
|
||||
async def record_auto_rate_run(
|
||||
source: str,
|
||||
limit_requested: int,
|
||||
bundesland: Optional[str] = None,
|
||||
n_attempted: int = 0,
|
||||
n_succeeded: int = 0,
|
||||
n_failed: int = 0,
|
||||
n_skipped: int = 0,
|
||||
error_summary: Optional[str] = None,
|
||||
) -> int:
|
||||
"""Schreibt einen Run-Eintrag in auto_rate_runs und liefert die id."""
|
||||
async with aiosqlite.connect(settings.db_path) as db:
|
||||
cur = await db.execute(
|
||||
"""
|
||||
INSERT INTO auto_rate_runs
|
||||
(source, bundesland, limit_requested, n_attempted,
|
||||
n_succeeded, n_failed, n_skipped, error_summary)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""",
|
||||
(source, bundesland, limit_requested, n_attempted,
|
||||
n_succeeded, n_failed, n_skipped, error_summary),
|
||||
)
|
||||
await db.commit()
|
||||
return cur.lastrowid
|
||||
|
||||
|
||||
async def list_auto_rate_runs(limit: int = 20) -> list[dict]:
|
||||
"""Letzte N Runs (neueste zuerst)."""
|
||||
async with aiosqlite.connect(settings.db_path) as db:
|
||||
db.row_factory = aiosqlite.Row
|
||||
rows = await db.execute(
|
||||
"""
|
||||
SELECT id, started_at, source, bundesland, limit_requested,
|
||||
n_attempted, n_succeeded, n_failed, n_skipped, error_summary
|
||||
FROM auto_rate_runs
|
||||
ORDER BY started_at DESC LIMIT ?
|
||||
""",
|
||||
(limit,),
|
||||
)
|
||||
return [dict(r) for r in await rows.fetchall()]
|
||||
|
||||
|
||||
async def auto_rate_today_total() -> dict:
|
||||
"""Aggregat fuer den aktuellen Tag (UTC) — fuer Cron-Throttling."""
|
||||
async with aiosqlite.connect(settings.db_path) as db:
|
||||
cur = await db.execute(
|
||||
"""
|
||||
SELECT COUNT(*) AS n_runs,
|
||||
COALESCE(SUM(n_attempted), 0) AS total_attempted,
|
||||
COALESCE(SUM(n_succeeded), 0) AS total_succeeded
|
||||
FROM auto_rate_runs
|
||||
WHERE date(started_at) = date('now')
|
||||
"""
|
||||
)
|
||||
row = await cur.fetchone()
|
||||
return {
|
||||
"n_runs": row[0],
|
||||
"total_attempted": row[1],
|
||||
"total_succeeded": row[2],
|
||||
}
|
||||
|
||||
|
||||
async def create_job(
|
||||
job_id: str,
|
||||
input_preview: str,
|
||||
|
||||
91
app/main.py
91
app/main.py
@ -2735,6 +2735,8 @@ async def api_auto_rate_vote_orphans(
|
||||
request: Request,
|
||||
bundesland: Optional[str] = Form(None),
|
||||
limit: int = Form(10),
|
||||
source: str = Form("manual"),
|
||||
daily_cap: int = Form(200),
|
||||
user: dict = Depends(require_admin),
|
||||
):
|
||||
"""Bulk-Auto-Bewerten der Top-N Vote-Orphans (#172).
|
||||
@ -2742,13 +2744,38 @@ async def api_auto_rate_vote_orphans(
|
||||
Admin-only + rate-limited. Nimmt die neuesten Drucksachen aus
|
||||
`vote-orphans`, laedt den Antragstext per Adapter herunter und
|
||||
enqueued einen Job pro Drucksache. Konservatives Default-Limit 10.
|
||||
|
||||
`source` = 'manual'|'cron'|'api' wird in auto_rate_runs persistiert.
|
||||
`daily_cap` = max. Tagessumme an Auto-Bewertungen (Default 200), wird
|
||||
gegen die Run-Historie geprueft.
|
||||
"""
|
||||
if limit < 1 or limit > 50:
|
||||
raise HTTPException(status_code=400, detail="limit muss 1-50 sein")
|
||||
|
||||
from .auswertungen import get_vote_orphans
|
||||
from .database import (
|
||||
record_auto_rate_run,
|
||||
auto_rate_today_total,
|
||||
)
|
||||
from .queue import enqueue, QueueFullError
|
||||
|
||||
today = await auto_rate_today_total()
|
||||
if today["total_attempted"] + limit > daily_cap:
|
||||
remaining = max(0, daily_cap - today["total_attempted"])
|
||||
if remaining == 0:
|
||||
await record_auto_rate_run(
|
||||
source=source, limit_requested=limit, bundesland=bundesland,
|
||||
n_attempted=0, n_succeeded=0, n_failed=0, n_skipped=0,
|
||||
error_summary=f"daily_cap_reached:{daily_cap}",
|
||||
)
|
||||
return {
|
||||
"status": "skipped",
|
||||
"reason": "daily_cap_reached",
|
||||
"today": today,
|
||||
"daily_cap": daily_cap,
|
||||
}
|
||||
limit = remaining
|
||||
|
||||
orphans = get_vote_orphans(filter_bl=bundesland, limit=limit)
|
||||
|
||||
enqueued = []
|
||||
@ -2796,14 +2823,44 @@ async def api_auto_rate_vote_orphans(
|
||||
except QueueFullError:
|
||||
skipped.append({"drucksache": ds, "reason": "queue_full"})
|
||||
break
|
||||
|
||||
# Run in auto_rate_runs persistieren — auch wenn enqueued=0 ist.
|
||||
error_summary = None
|
||||
if skipped:
|
||||
error_summary = ", ".join(
|
||||
f"{s['drucksache']}:{s['reason'][:30]}" for s in skipped[:3]
|
||||
)
|
||||
if len(skipped) > 3:
|
||||
error_summary += f", … (+{len(skipped) - 3} weitere)"
|
||||
run_id = await record_auto_rate_run(
|
||||
source=source, limit_requested=limit, bundesland=bundesland,
|
||||
n_attempted=len(orphans["items"]),
|
||||
n_succeeded=len(enqueued),
|
||||
n_failed=0, # Job-Failures kommen nach Worker-Run, nicht hier
|
||||
n_skipped=len(skipped),
|
||||
error_summary=error_summary,
|
||||
)
|
||||
return {
|
||||
"status": "auto_rate_enqueued",
|
||||
"run_id": run_id,
|
||||
"enqueued": len(enqueued),
|
||||
"skipped": skipped,
|
||||
"jobs": enqueued,
|
||||
}
|
||||
|
||||
|
||||
@app.get("/api/auto-rate-runs")
|
||||
async def api_auto_rate_runs(
|
||||
limit: int = 20,
|
||||
user: dict = Depends(require_admin),
|
||||
):
|
||||
"""Letzte N Runs der Vote-Orphans-Auto-Bewertung (admin-only)."""
|
||||
from .database import list_auto_rate_runs, auto_rate_today_total
|
||||
runs = await list_auto_rate_runs(limit=limit)
|
||||
today = await auto_rate_today_total()
|
||||
return {"runs": runs, "today": today}
|
||||
|
||||
|
||||
@app.get("/api/auswertungen/empfehlungs-konsistenz")
|
||||
async def auswertungen_empfehlungs_konsistenz(
|
||||
bundesland: Optional[str] = None,
|
||||
@ -3084,6 +3141,25 @@ async def api_admin_stand(user: dict = Depends(require_admin)):
|
||||
n_bookmarks = db.execute("SELECT COUNT(*) FROM bookmarks").fetchone()[0]
|
||||
except sqlite3.OperationalError:
|
||||
n_bookmarks = 0
|
||||
|
||||
# Auto-Rate-Runs (#173)
|
||||
try:
|
||||
auto_rate_today = db.execute("""
|
||||
SELECT
|
||||
COUNT(*) AS n_runs,
|
||||
COALESCE(SUM(n_attempted), 0) AS total_attempted,
|
||||
COALESCE(SUM(n_succeeded), 0) AS total_succeeded
|
||||
FROM auto_rate_runs
|
||||
WHERE date(started_at) = date('now')
|
||||
""").fetchone()
|
||||
auto_rate_recent = list(db.execute("""
|
||||
SELECT id, started_at, source, bundesland, limit_requested,
|
||||
n_attempted, n_succeeded, n_failed, n_skipped, error_summary
|
||||
FROM auto_rate_runs ORDER BY started_at DESC LIMIT 5
|
||||
""").fetchall())
|
||||
except sqlite3.OperationalError:
|
||||
auto_rate_today = (0, 0, 0)
|
||||
auto_rate_recent = []
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
@ -3113,6 +3189,21 @@ async def api_admin_stand(user: dict = Depends(require_admin)):
|
||||
"last_7_days": n_drafts_7d,
|
||||
},
|
||||
"bookmarks": n_bookmarks,
|
||||
"auto_rate": {
|
||||
"today_runs": auto_rate_today[0],
|
||||
"today_attempted": auto_rate_today[1],
|
||||
"today_succeeded": auto_rate_today[2],
|
||||
"recent": [
|
||||
{
|
||||
"id": r[0], "started_at": r[1], "source": r[2],
|
||||
"bundesland": r[3], "limit_requested": r[4],
|
||||
"n_attempted": r[5], "n_succeeded": r[6],
|
||||
"n_failed": r[7], "n_skipped": r[8],
|
||||
"error_summary": r[9],
|
||||
}
|
||||
for r in auto_rate_recent
|
||||
],
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -145,6 +145,27 @@
|
||||
</table>
|
||||
</div>
|
||||
|
||||
<!-- Vote-Orphans-Auto-Bewertung (#173) -->
|
||||
<div class="stand-section">
|
||||
<h2>Vote-Orphans-Auto-Bewertung</h2>
|
||||
<p style="font-size:12px;opacity:0.65;margin:-4px 0 8px;">
|
||||
Heute: <strong id="auto-rate-today">—</strong>.
|
||||
Cron läuft alle 6h und enqueued bis zu 30 Orphans pro Lauf, max. 200 Anträge/Tag.
|
||||
</p>
|
||||
<table class="stand-table">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Zeitpunkt</th><th>Quelle</th><th>BL</th>
|
||||
<th style="text-align:right;">Versucht</th>
|
||||
<th style="text-align:right;">Enqueued</th>
|
||||
<th style="text-align:right;">Skipped</th>
|
||||
<th>Notiz</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody id="stand-autorate-rows"></tbody>
|
||||
</table>
|
||||
</div>
|
||||
|
||||
<div id="stand-meta" style="font-family:var(--font-mono);font-size:11px;opacity:0.5;margin-top:1.5rem;"></div>
|
||||
</div>
|
||||
|
||||
@ -229,6 +250,30 @@ async function loadStand() {
|
||||
Object.entries(ns).sort((a, b) => b[1] - a[1]).map(([s, n]) => `
|
||||
<tr><td>${s}</td><td>${fmtN(n)}</td></tr>`).join('');
|
||||
|
||||
// Auto-Rate-Run-Tabelle (#173)
|
||||
const ar = d.auto_rate || {};
|
||||
const elToday = document.getElementById('auto-rate-today');
|
||||
if (elToday) {
|
||||
elToday.textContent =
|
||||
`${fmtN(ar.today_runs || 0)} Runs · ${fmtN(ar.today_attempted || 0)} Anträge versucht · ${fmtN(ar.today_succeeded || 0)} enqueued`;
|
||||
}
|
||||
const arRecent = ar.recent || [];
|
||||
const arRowsEl = document.getElementById('stand-autorate-rows');
|
||||
if (arRowsEl) {
|
||||
arRowsEl.innerHTML = arRecent.length
|
||||
? arRecent.map(r => `
|
||||
<tr>
|
||||
<td style="font-family:var(--font-mono);font-size:11px;">${(r.started_at || '').slice(0, 16)}</td>
|
||||
<td style="font-family:var(--font-mono);font-size:11px;">${r.source || '—'}</td>
|
||||
<td>${r.bundesland || 'ALL'}</td>
|
||||
<td style="text-align:right;">${fmtN(r.n_attempted)}</td>
|
||||
<td style="text-align:right;">${fmtN(r.n_succeeded)}</td>
|
||||
<td style="text-align:right;">${fmtN(r.n_skipped)}</td>
|
||||
<td style="font-family:var(--font-mono);font-size:11px;opacity:0.65;">${r.error_summary || ''}</td>
|
||||
</tr>`).join('')
|
||||
: '<tr><td colspan="7" style="opacity:0.5;font-style:italic;">Noch kein Run heute oder in den letzten 5 Läufen.</td></tr>';
|
||||
}
|
||||
|
||||
document.getElementById('stand-meta').textContent =
|
||||
'Aktualisiert: ' + new Date().toLocaleTimeString('de-DE');
|
||||
} catch (e) {
|
||||
|
||||
153
scripts/auto-rate-orphans.sh
Executable file
153
scripts/auto-rate-orphans.sh
Executable file
@ -0,0 +1,153 @@
|
||||
#!/bin/bash
|
||||
# Vote-Orphans-Auto-Bewertung als Cron-Job (#173 Phase 3).
|
||||
#
|
||||
# Ruft pro Lauf maximal `MAX_PER_RUN` Drucksachen auf, die einen
|
||||
# Plenum-Vote, aber noch keine GWÖ-Bewertung haben. Limitiert auf
|
||||
# `MAX_PER_DAY` (Tagessumme) — gemessen an `auto_rate_runs.n_attempted`.
|
||||
#
|
||||
# Idempotent: bei 0 Orphans loggt das Skript sauber und beendet.
|
||||
# Bei Throttle (Daily-Cap erreicht) wird dies in auto_rate_runs
|
||||
# als `error_summary='daily_cap_reached:N'` festgehalten.
|
||||
#
|
||||
# Install als Host-Cron (alle 6h, max 30 Calls/Run, 200/Tag):
|
||||
#
|
||||
# crontab -e
|
||||
# 0 */6 * * * /opt/gwoe-antragspruefer-dev/scripts/auto-rate-orphans.sh \
|
||||
# gwoe-antragspruefer-dev >> /var/log/gwoe-auto-rate.log 2>&1
|
||||
#
|
||||
# Manueller Aufruf:
|
||||
# MAX_PER_RUN=10 ./scripts/auto-rate-orphans.sh gwoe-antragspruefer-dev
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
CONTAINER="${1:-gwoe-antragspruefer-dev}"
|
||||
MAX_PER_RUN="${MAX_PER_RUN:-30}"
|
||||
MAX_PER_DAY="${MAX_PER_DAY:-200}"
|
||||
BUNDESLAND="${BUNDESLAND:-}" # leer = alle BL
|
||||
|
||||
# Skip wenn Container nicht laeuft.
|
||||
if ! docker ps --format '{{.Names}}' | grep -q "^${CONTAINER}$"; then
|
||||
echo "$(date -Iseconds) SKIP — ${CONTAINER} is not running"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "$(date -Iseconds) START auto-rate-orphans (container=${CONTAINER}, max_per_run=${MAX_PER_RUN}, max_per_day=${MAX_PER_DAY}, bl=${BUNDESLAND:-ALL})"
|
||||
|
||||
docker exec -i "$CONTAINER" \
|
||||
-e MAX_PER_RUN="$MAX_PER_RUN" \
|
||||
-e MAX_PER_DAY="$MAX_PER_DAY" \
|
||||
-e BUNDESLAND="$BUNDESLAND" \
|
||||
python <<'EOF'
|
||||
import os
|
||||
import asyncio
|
||||
|
||||
from app.auswertungen import get_vote_orphans
|
||||
from app.database import (
|
||||
record_auto_rate_run,
|
||||
auto_rate_today_total,
|
||||
get_assessment,
|
||||
create_job,
|
||||
)
|
||||
from app.parlamente import get_adapter, Drucksache
|
||||
from app.queue import enqueue, QueueFullError
|
||||
from app.main import run_drucksache_analysis # async-Worker-Funktion
|
||||
import uuid
|
||||
|
||||
|
||||
MAX_PER_RUN = int(os.environ.get("MAX_PER_RUN", "30"))
|
||||
MAX_PER_DAY = int(os.environ.get("MAX_PER_DAY", "200"))
|
||||
BUNDESLAND = os.environ.get("BUNDESLAND") or None
|
||||
|
||||
|
||||
async def main() -> None:
|
||||
today = await auto_rate_today_total()
|
||||
today_attempted = today["total_attempted"]
|
||||
|
||||
if today_attempted >= MAX_PER_DAY:
|
||||
print(f" SKIP daily_cap_reached: {today_attempted}/{MAX_PER_DAY}")
|
||||
await record_auto_rate_run(
|
||||
source="cron",
|
||||
limit_requested=MAX_PER_RUN,
|
||||
bundesland=BUNDESLAND,
|
||||
n_attempted=0, n_succeeded=0, n_failed=0, n_skipped=0,
|
||||
error_summary=f"daily_cap_reached:{MAX_PER_DAY}",
|
||||
)
|
||||
return
|
||||
|
||||
remaining_today = MAX_PER_DAY - today_attempted
|
||||
limit = min(MAX_PER_RUN, remaining_today)
|
||||
orphans = get_vote_orphans(filter_bl=BUNDESLAND, limit=limit)
|
||||
items = orphans["items"]
|
||||
if not items:
|
||||
print(" no orphans")
|
||||
await record_auto_rate_run(
|
||||
source="cron",
|
||||
limit_requested=limit,
|
||||
bundesland=BUNDESLAND,
|
||||
n_attempted=0, n_succeeded=0, n_failed=0, n_skipped=0,
|
||||
)
|
||||
return
|
||||
|
||||
enqueued, skipped = 0, []
|
||||
for item in items:
|
||||
bl = item["bundesland"]
|
||||
ds = item["drucksache"]
|
||||
existing = await get_assessment(ds)
|
||||
if existing:
|
||||
skipped.append((ds, "already_rated"))
|
||||
continue
|
||||
adapter = get_adapter(bl)
|
||||
if not adapter:
|
||||
skipped.append((ds, f"no_adapter_for_{bl}"))
|
||||
continue
|
||||
try:
|
||||
text = await adapter.download_text(ds)
|
||||
except Exception as e:
|
||||
skipped.append((ds, f"download_error:{str(e)[:40]}"))
|
||||
continue
|
||||
if not text:
|
||||
skipped.append((ds, "empty_text"))
|
||||
continue
|
||||
doc = Drucksache(
|
||||
drucksache=ds, title=ds, fraktionen=[], datum="",
|
||||
link="", bundesland=bl,
|
||||
)
|
||||
job_id = str(uuid.uuid4())
|
||||
await create_job(job_id, text[:500], bl, "qwen-plus", drucksache=ds)
|
||||
try:
|
||||
await enqueue(
|
||||
job_id, run_drucksache_analysis,
|
||||
job_id, ds, text, bl, "qwen-plus", doc,
|
||||
drucksache=ds,
|
||||
)
|
||||
enqueued += 1
|
||||
except QueueFullError:
|
||||
skipped.append((ds, "queue_full"))
|
||||
break
|
||||
|
||||
error_summary = (
|
||||
", ".join(f"{ds}:{r[:30]}" for ds, r in skipped[:3])
|
||||
+ (f", … (+{len(skipped) - 3} weitere)" if len(skipped) > 3 else "")
|
||||
if skipped else None
|
||||
)
|
||||
|
||||
await record_auto_rate_run(
|
||||
source="cron",
|
||||
limit_requested=limit,
|
||||
bundesland=BUNDESLAND,
|
||||
n_attempted=len(items),
|
||||
n_succeeded=enqueued,
|
||||
n_failed=0,
|
||||
n_skipped=len(skipped),
|
||||
error_summary=error_summary,
|
||||
)
|
||||
print(f" attempted={len(items)} enqueued={enqueued} skipped={len(skipped)}")
|
||||
if skipped:
|
||||
for ds, reason in skipped[:5]:
|
||||
print(f" skip {ds}: {reason}")
|
||||
|
||||
|
||||
asyncio.run(main())
|
||||
EOF
|
||||
|
||||
echo "$(date -Iseconds) END auto-rate-orphans"
|
||||
Loading…
Reference in New Issue
Block a user