Phase 3 (Vote-Orphans-Auto-Bewertung): - Neue Tabelle `auto_rate_runs` (additiv) mit started_at, source, bundesland, limit_requested, n_attempted/succeeded/failed/skipped, error_summary. - Neue DB-Helper: record_auto_rate_run, list_auto_rate_runs, auto_rate_today_total. - POST /api/auswertungen/vote-orphans/auto-rate erweitert um source, daily_cap und Run-Persistenz. Throttled gegen Tagessumme. - Neuer Endpoint GET /api/auto-rate-runs (admin) — letzte N Runs + Tagessumme. - scripts/auto-rate-orphans.sh: Cron-Wrapper (analog auto-fetch-news.sh) mit MAX_PER_RUN=30 / MAX_PER_DAY=200 Defaults, BUNDESLAND-Filter optional, ruft direkt die Python-Worker-Funktion via docker exec. - Admin-Stand-Dashboard: KPI-Zeile "heute X Runs / Y versucht" + Tabelle der letzten 5 Runs mit BL/Counts/Notiz. Refs: #173, ADR 0010 Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
154 lines
4.8 KiB
Bash
Executable File
154 lines
4.8 KiB
Bash
Executable File
#!/bin/bash
|
|
# Vote-Orphans-Auto-Bewertung als Cron-Job (#173 Phase 3).
|
|
#
|
|
# Ruft pro Lauf maximal `MAX_PER_RUN` Drucksachen auf, die einen
|
|
# Plenum-Vote, aber noch keine GWÖ-Bewertung haben. Limitiert auf
|
|
# `MAX_PER_DAY` (Tagessumme) — gemessen an `auto_rate_runs.n_attempted`.
|
|
#
|
|
# Idempotent: bei 0 Orphans loggt das Skript sauber und beendet.
|
|
# Bei Throttle (Daily-Cap erreicht) wird dies in auto_rate_runs
|
|
# als `error_summary='daily_cap_reached:N'` festgehalten.
|
|
#
|
|
# Install als Host-Cron (alle 6h, max 30 Calls/Run, 200/Tag):
|
|
#
|
|
# crontab -e
|
|
# 0 */6 * * * /opt/gwoe-antragspruefer-dev/scripts/auto-rate-orphans.sh \
|
|
# gwoe-antragspruefer-dev >> /var/log/gwoe-auto-rate.log 2>&1
|
|
#
|
|
# Manueller Aufruf:
|
|
# MAX_PER_RUN=10 ./scripts/auto-rate-orphans.sh gwoe-antragspruefer-dev
|
|
|
|
set -euo pipefail
|
|
|
|
CONTAINER="${1:-gwoe-antragspruefer-dev}"
|
|
MAX_PER_RUN="${MAX_PER_RUN:-30}"
|
|
MAX_PER_DAY="${MAX_PER_DAY:-200}"
|
|
BUNDESLAND="${BUNDESLAND:-}" # leer = alle BL
|
|
|
|
# Skip wenn Container nicht laeuft.
|
|
if ! docker ps --format '{{.Names}}' | grep -q "^${CONTAINER}$"; then
|
|
echo "$(date -Iseconds) SKIP — ${CONTAINER} is not running"
|
|
exit 0
|
|
fi
|
|
|
|
echo "$(date -Iseconds) START auto-rate-orphans (container=${CONTAINER}, max_per_run=${MAX_PER_RUN}, max_per_day=${MAX_PER_DAY}, bl=${BUNDESLAND:-ALL})"
|
|
|
|
docker exec -i "$CONTAINER" \
|
|
-e MAX_PER_RUN="$MAX_PER_RUN" \
|
|
-e MAX_PER_DAY="$MAX_PER_DAY" \
|
|
-e BUNDESLAND="$BUNDESLAND" \
|
|
python <<'EOF'
|
|
import os
|
|
import asyncio
|
|
|
|
from app.auswertungen import get_vote_orphans
|
|
from app.database import (
|
|
record_auto_rate_run,
|
|
auto_rate_today_total,
|
|
get_assessment,
|
|
create_job,
|
|
)
|
|
from app.parlamente import get_adapter, Drucksache
|
|
from app.queue import enqueue, QueueFullError
|
|
from app.main import run_drucksache_analysis # async-Worker-Funktion
|
|
import uuid
|
|
|
|
|
|
MAX_PER_RUN = int(os.environ.get("MAX_PER_RUN", "30"))
|
|
MAX_PER_DAY = int(os.environ.get("MAX_PER_DAY", "200"))
|
|
BUNDESLAND = os.environ.get("BUNDESLAND") or None
|
|
|
|
|
|
async def main() -> None:
|
|
today = await auto_rate_today_total()
|
|
today_attempted = today["total_attempted"]
|
|
|
|
if today_attempted >= MAX_PER_DAY:
|
|
print(f" SKIP daily_cap_reached: {today_attempted}/{MAX_PER_DAY}")
|
|
await record_auto_rate_run(
|
|
source="cron",
|
|
limit_requested=MAX_PER_RUN,
|
|
bundesland=BUNDESLAND,
|
|
n_attempted=0, n_succeeded=0, n_failed=0, n_skipped=0,
|
|
error_summary=f"daily_cap_reached:{MAX_PER_DAY}",
|
|
)
|
|
return
|
|
|
|
remaining_today = MAX_PER_DAY - today_attempted
|
|
limit = min(MAX_PER_RUN, remaining_today)
|
|
orphans = get_vote_orphans(filter_bl=BUNDESLAND, limit=limit)
|
|
items = orphans["items"]
|
|
if not items:
|
|
print(" no orphans")
|
|
await record_auto_rate_run(
|
|
source="cron",
|
|
limit_requested=limit,
|
|
bundesland=BUNDESLAND,
|
|
n_attempted=0, n_succeeded=0, n_failed=0, n_skipped=0,
|
|
)
|
|
return
|
|
|
|
enqueued, skipped = 0, []
|
|
for item in items:
|
|
bl = item["bundesland"]
|
|
ds = item["drucksache"]
|
|
existing = await get_assessment(ds)
|
|
if existing:
|
|
skipped.append((ds, "already_rated"))
|
|
continue
|
|
adapter = get_adapter(bl)
|
|
if not adapter:
|
|
skipped.append((ds, f"no_adapter_for_{bl}"))
|
|
continue
|
|
try:
|
|
text = await adapter.download_text(ds)
|
|
except Exception as e:
|
|
skipped.append((ds, f"download_error:{str(e)[:40]}"))
|
|
continue
|
|
if not text:
|
|
skipped.append((ds, "empty_text"))
|
|
continue
|
|
doc = Drucksache(
|
|
drucksache=ds, title=ds, fraktionen=[], datum="",
|
|
link="", bundesland=bl,
|
|
)
|
|
job_id = str(uuid.uuid4())
|
|
await create_job(job_id, text[:500], bl, "qwen-plus", drucksache=ds)
|
|
try:
|
|
await enqueue(
|
|
job_id, run_drucksache_analysis,
|
|
job_id, ds, text, bl, "qwen-plus", doc,
|
|
drucksache=ds,
|
|
)
|
|
enqueued += 1
|
|
except QueueFullError:
|
|
skipped.append((ds, "queue_full"))
|
|
break
|
|
|
|
error_summary = (
|
|
", ".join(f"{ds}:{r[:30]}" for ds, r in skipped[:3])
|
|
+ (f", … (+{len(skipped) - 3} weitere)" if len(skipped) > 3 else "")
|
|
if skipped else None
|
|
)
|
|
|
|
await record_auto_rate_run(
|
|
source="cron",
|
|
limit_requested=limit,
|
|
bundesland=BUNDESLAND,
|
|
n_attempted=len(items),
|
|
n_succeeded=enqueued,
|
|
n_failed=0,
|
|
n_skipped=len(skipped),
|
|
error_summary=error_summary,
|
|
)
|
|
print(f" attempted={len(items)} enqueued={enqueued} skipped={len(skipped)}")
|
|
if skipped:
|
|
for ds, reason in skipped[:5]:
|
|
print(f" skip {ds}: {reason}")
|
|
|
|
|
|
asyncio.run(main())
|
|
EOF
|
|
|
|
echo "$(date -Iseconds) END auto-rate-orphans"
|