antragstracker/scripts/geocode_pending.py
Dotty Dotter 9d8a73e2a9 feat: Parteien-Filter, Klassifikation, Umsetzungsbewertung, KI-Neubewertung
- Vorlagen + Ketten: Partei-Dropdown-Filter mit Badges (#9)
- Vorlagen-Detail: Ketten-Klassifikation mit Begründung anzeigen
- Vorlagen-Detail: Umsetzungsbewertungen mit Score + Begründung
- SPA-Routing: Catch-All für direkten URL-Zugriff
- Status-Engine: Begründungen für alle Ketten-Status generieren
- Kurze Beschlusstexte (<=5 Zeichen) nicht mehr als Beschluss werten
- POST /api/bewertung/vorlagen/{id} + /ketten/{id} für KI-Neubewertung
- Frontend: 'Neu bewerten' Button + Kommentarfeld auf beiden Detailseiten
- Job-Status-Polling mit Spinner
- ALLRIS-Rescrape vor Bewertung noch offen (#10)

Closes #9
2026-04-01 10:36:22 +02:00

94 lines
3.1 KiB
Python

#!/usr/bin/env python3
"""Geocodiert pending Orte via Nominatim (1 req/s)."""
import argparse
import sqlite3
import time
from pathlib import Path
from typing import Optional, Tuple
import httpx
DB = Path(__file__).resolve().parent.parent / "data" / "tracker_remote.db"
NOMINATIM = "https://nominatim.openstreetmap.org/search"
UA = "Antragstracker-Hagen/1.0 (tobias.roedel@econgood.org)"
HAGEN_BBOX = "7.35,51.30,7.65,51.45"
def geocode(client: httpx.Client, name: str) -> Optional[Tuple[float, float]]:
# Clean name: remove trailing "Hagen" to avoid duplication
clean = name.strip().rstrip(",").strip()
if clean.lower().endswith(" hagen"):
clean = clean[:-6].strip().rstrip(",").strip()
# Try multiple query variants, progressively less strict
queries = [
(f"{clean}, Hagen, Germany", True), # bounded to Hagen
(f"{clean}, Hagen, NRW", False), # unbounded fallback
(f"{name}, Germany", False), # original name
]
for q, bounded in queries:
try:
params = {"q": q, "format": "json", "limit": 1}
if bounded:
params["viewbox"] = HAGEN_BBOX
params["bounded"] = 1
r = client.get(NOMINATIM, params=params,
headers={"User-Agent": UA}, timeout=10)
if r.status_code == 200 and r.json():
d = r.json()[0]
lat, lon = float(d["lat"]), float(d["lon"])
# Sanity check: roughly in Hagen area
if 51.25 <= lat <= 51.50 and 7.30 <= lon <= 7.70:
return lat, lon
except Exception:
pass
time.sleep(3.0) # Conservative: Nominatim blocks aggressively
return None
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--limit", type=int, default=500)
args = parser.parse_args()
conn = sqlite3.connect(str(DB))
conn.row_factory = sqlite3.Row
pending = conn.execute(
"SELECT id, name FROM orte WHERE geocode_status='pending' ORDER BY vorlage_count DESC LIMIT ?",
(args.limit,)
).fetchall()
total_pending = conn.execute("SELECT COUNT(*) FROM orte WHERE geocode_status='pending'").fetchone()[0]
print(f"Pending: {len(pending)} (von {total_pending} total)")
success = 0
failed = 0
client = httpx.Client()
for i, row in enumerate(pending):
coords = geocode(client, row["name"])
if coords:
conn.execute("UPDATE orte SET lat=?, lon=?, geocode_status='success' WHERE id=?",
(coords[0], coords[1], row["id"]))
success += 1
if success % 20 == 0:
print(f" [{i+1}/{len(pending)}] ✓ {success} geocoded, ✗ {failed} failed")
else:
conn.execute("UPDATE orte SET geocode_status='failed' WHERE id=?", (row["id"],))
failed += 1
if (i + 1) % 50 == 0:
conn.commit()
conn.commit()
conn.close()
client.close()
total_geo = success # just this run
print(f"\n{success} | ✗ {failed} | Gesamt: {success + failed}")
if __name__ == "__main__":
main()