antragstracker/scripts/geocode_background.sh

69 lines
1.8 KiB
Bash
Raw Permalink Normal View History

#!/bin/bash
# Background Geocoding mit Heartbeat alle 20 Minuten
# Schreibt Status nach data/geocode-heartbeat.json
set -euo pipefail
cd "$(dirname "$0")/.."
HEARTBEAT_FILE="data/geocode-heartbeat.json"
BATCH_SIZE=1200 # ~20 Min bei 1/s
while true; do
START=$(date +%s)
START_ISO=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
# Run batch
OUTPUT=$(python3 scripts/geocode_pending.py --limit $BATCH_SIZE 2>&1) || true
END=$(date +%s)
DURATION=$((END - START))
# Parse output for stats
GEOCODED=$(echo "$OUTPUT" | grep -oP 'geocoded: \K\d+' 2>/dev/null || echo "0")
FAILED=$(echo "$OUTPUT" | grep -oP 'failed: \K\d+' 2>/dev/null || echo "0")
REMAINING=$(python3 -c "
import sqlite3
conn = sqlite3.connect('data/tracker.db')
r = conn.execute('SELECT COUNT(*) FROM orte WHERE lat IS NULL').fetchone()[0]
t = conn.execute('SELECT COUNT(*) FROM orte WHERE lat IS NOT NULL').fetchone()[0]
print(f'{r}|{t}')
conn.close()
" 2>/dev/null || echo "0|0")
PENDING=$(echo "$REMAINING" | cut -d'|' -f1)
DONE=$(echo "$REMAINING" | cut -d'|' -f2)
# Write heartbeat
cat > "$HEARTBEAT_FILE" << EOF
{
"status": "running",
"last_batch_at": "$START_ISO",
"duration_seconds": $DURATION,
"batch_geocoded": $GEOCODED,
"batch_failed": $FAILED,
"total_geocoded": $DONE,
"total_pending": $PENDING,
"pid": $$
}
EOF
echo "[$(date)] Batch done: +$GEOCODED geocoded, $FAILED failed, $PENDING remaining"
# Done?
if [ "$PENDING" = "0" ]; then
cat > "$HEARTBEAT_FILE" << EOF
{
"status": "completed",
"completed_at": "$(date -u +"%Y-%m-%dT%H:%M:%SZ")",
"total_geocoded": $DONE,
"total_pending": 0,
"pid": null
}
EOF
echo "✅ Geocoding fertig!"
exit 0
fi
# Kleine Pause zwischen Batches
sleep 5
done