gwoe-antragspruefer/scripts/auto-rate-orphans.sh

154 lines
4.8 KiB
Bash
Raw Normal View History

#!/bin/bash
# Vote-Orphans-Auto-Bewertung als Cron-Job (#173 Phase 3).
#
# Ruft pro Lauf maximal `MAX_PER_RUN` Drucksachen auf, die einen
# Plenum-Vote, aber noch keine GWÖ-Bewertung haben. Limitiert auf
# `MAX_PER_DAY` (Tagessumme) — gemessen an `auto_rate_runs.n_attempted`.
#
# Idempotent: bei 0 Orphans loggt das Skript sauber und beendet.
# Bei Throttle (Daily-Cap erreicht) wird dies in auto_rate_runs
# als `error_summary='daily_cap_reached:N'` festgehalten.
#
# Install als Host-Cron (alle 6h, max 30 Calls/Run, 200/Tag):
#
# crontab -e
# 0 */6 * * * /opt/gwoe-antragspruefer-dev/scripts/auto-rate-orphans.sh \
# gwoe-antragspruefer-dev >> /var/log/gwoe-auto-rate.log 2>&1
#
# Manueller Aufruf:
# MAX_PER_RUN=10 ./scripts/auto-rate-orphans.sh gwoe-antragspruefer-dev
set -euo pipefail
CONTAINER="${1:-gwoe-antragspruefer-dev}"
MAX_PER_RUN="${MAX_PER_RUN:-30}"
MAX_PER_DAY="${MAX_PER_DAY:-200}"
BUNDESLAND="${BUNDESLAND:-}" # leer = alle BL
# Skip wenn Container nicht laeuft.
if ! docker ps --format '{{.Names}}' | grep -q "^${CONTAINER}$"; then
echo "$(date -Iseconds) SKIP — ${CONTAINER} is not running"
exit 0
fi
echo "$(date -Iseconds) START auto-rate-orphans (container=${CONTAINER}, max_per_run=${MAX_PER_RUN}, max_per_day=${MAX_PER_DAY}, bl=${BUNDESLAND:-ALL})"
docker exec -i \
-e MAX_PER_RUN="$MAX_PER_RUN" \
-e MAX_PER_DAY="$MAX_PER_DAY" \
-e BUNDESLAND="$BUNDESLAND" \
"$CONTAINER" python <<'EOF'
import os
import asyncio
from app.auswertungen import get_vote_orphans
from app.database import (
record_auto_rate_run,
auto_rate_today_total,
get_assessment,
create_job,
)
from app.parlamente import get_adapter, Drucksache
from app.queue import enqueue, QueueFullError
from app.main import run_drucksache_analysis # async-Worker-Funktion
import uuid
MAX_PER_RUN = int(os.environ.get("MAX_PER_RUN", "30"))
MAX_PER_DAY = int(os.environ.get("MAX_PER_DAY", "200"))
BUNDESLAND = os.environ.get("BUNDESLAND") or None
async def main() -> None:
today = await auto_rate_today_total()
today_attempted = today["total_attempted"]
if today_attempted >= MAX_PER_DAY:
print(f" SKIP daily_cap_reached: {today_attempted}/{MAX_PER_DAY}")
await record_auto_rate_run(
source="cron",
limit_requested=MAX_PER_RUN,
bundesland=BUNDESLAND,
n_attempted=0, n_succeeded=0, n_failed=0, n_skipped=0,
error_summary=f"daily_cap_reached:{MAX_PER_DAY}",
)
return
remaining_today = MAX_PER_DAY - today_attempted
limit = min(MAX_PER_RUN, remaining_today)
orphans = get_vote_orphans(filter_bl=BUNDESLAND, limit=limit)
items = orphans["items"]
if not items:
print(" no orphans")
await record_auto_rate_run(
source="cron",
limit_requested=limit,
bundesland=BUNDESLAND,
n_attempted=0, n_succeeded=0, n_failed=0, n_skipped=0,
)
return
enqueued, skipped = 0, []
for item in items:
bl = item["bundesland"]
ds = item["drucksache"]
existing = await get_assessment(ds)
if existing:
skipped.append((ds, "already_rated"))
continue
adapter = get_adapter(bl)
if not adapter:
skipped.append((ds, f"no_adapter_for_{bl}"))
continue
try:
text = await adapter.download_text(ds)
except Exception as e:
skipped.append((ds, f"download_error:{str(e)[:40]}"))
continue
if not text:
skipped.append((ds, "empty_text"))
continue
doc = Drucksache(
drucksache=ds, title=ds, fraktionen=[], datum="",
link="", bundesland=bl,
)
job_id = str(uuid.uuid4())
await create_job(job_id, text[:500], bl, "qwen-plus", drucksache=ds)
try:
await enqueue(
job_id, run_drucksache_analysis,
job_id, ds, text, bl, "qwen-plus", doc,
drucksache=ds,
)
enqueued += 1
except QueueFullError:
skipped.append((ds, "queue_full"))
break
error_summary = (
", ".join(f"{ds}:{r[:30]}" for ds, r in skipped[:3])
+ (f", … (+{len(skipped) - 3} weitere)" if len(skipped) > 3 else "")
if skipped else None
)
await record_auto_rate_run(
source="cron",
limit_requested=limit,
bundesland=BUNDESLAND,
n_attempted=len(items),
n_succeeded=enqueued,
n_failed=0,
n_skipped=len(skipped),
error_summary=error_summary,
)
print(f" attempted={len(items)} enqueued={enqueued} skipped={len(skipped)}")
if skipped:
for ds, reason in skipped[:5]:
print(f" skip {ds}: {reason}")
asyncio.run(main())
EOF
echo "$(date -Iseconds) END auto-rate-orphans"