antragstracker/scripts/run_ketten_match.sh

57 lines
2.0 KiB
Bash
Raw Permalink Normal View History

#!/bin/bash
# Chunked Ketten-Match Runner
# Läuft in Batches à 200, pausiert 30s zwischen Batches
# Gemini Free Tier: 1500 RPM / 1M TPM — wir bleiben drunter
# set -e # removed: single batch failure should not kill runner
cd "$(dirname "$0")/.."
BATCH_SIZE=200
WORKERS=3
PAUSE=30
MAX_BATCHES=50 # Safety cap
echo "=== Chunked Ketten-Match Runner ==="
echo "Batch: $BATCH_SIZE | Workers: $WORKERS | Pause: ${PAUSE}s"
for i in $(seq 1 $MAX_BATCHES); do
echo ""
echo "--- Batch $i/$MAX_BATCHES ($(date +%H:%M:%S)) ---"
# Check ob noch was offen ist
REMAINING=$(sqlite3 data/tracker_remote.db "
SELECT COUNT(*) FROM ketten k
JOIN ketten_glieder kg_u ON kg_u.kette_id = k.id AND kg_u.position = 0
JOIN vorlagen v_u ON kg_u.vorlage_id = v_u.id
JOIN ketten_glieder kg_b ON kg_b.kette_id = k.id
AND kg_b.position = (SELECT MAX(position) FROM ketten_glieder WHERE kette_id = k.id)
JOIN vorlagen v_b ON kg_b.vorlage_id = v_b.id
LEFT JOIN beratungen b ON b.vorlage_id = v_b.id AND b.beschlusstext IS NOT NULL
LEFT JOIN ki_bewertungen ki_match ON ki_match.vorlage_id = v_u.id AND ki_match.typ = 'umsetzung_match'
WHERE b.beschlusstext IS NOT NULL
AND v_u.volltext_clean IS NOT NULL AND v_u.volltext_clean != ''
AND ki_match.id IS NULL
")
echo "Noch offen: $REMAINING"
if [ "$REMAINING" -eq 0 ]; then
echo "✅ Alle Ketten bewertet!"
break
fi
python3 scripts/ketten_match.py --batch-size $BATCH_SIZE --workers $WORKERS
DONE=$(sqlite3 data/tracker_remote.db "SELECT COUNT(*) FROM ki_bewertungen WHERE typ='umsetzung_match'")
echo "Gesamt bewertet: $DONE"
if [ "$i" -lt "$MAX_BATCHES" ] && [ "$REMAINING" -gt "$BATCH_SIZE" ]; then
echo "Pause ${PAUSE}s..."
sleep $PAUSE
fi
done
echo ""
echo "=== Runner beendet ==="
TOTAL=$(sqlite3 data/tracker_remote.db "SELECT COUNT(*) FROM ki_bewertungen WHERE typ='umsetzung_match'")
echo "Gesamt bewertet: $TOTAL"