#!/usr/bin/env python3 """Geocodiert pending Orte via Nominatim (1 req/s).""" import argparse import sqlite3 import time from pathlib import Path from typing import Optional, Tuple import httpx DB = Path(__file__).resolve().parent.parent / "data" / "tracker_remote.db" NOMINATIM = "https://nominatim.openstreetmap.org/search" UA = "Antragstracker-Hagen/1.0 (tobias.roedel@econgood.org)" HAGEN_BBOX = "7.35,51.30,7.65,51.45" def geocode(client: httpx.Client, name: str) -> Optional[Tuple[float, float]]: # Clean name: remove trailing "Hagen" to avoid duplication clean = name.strip().rstrip(",").strip() if clean.lower().endswith(" hagen"): clean = clean[:-6].strip().rstrip(",").strip() # Try multiple query variants, progressively less strict queries = [ (f"{clean}, Hagen, Germany", True), # bounded to Hagen (f"{clean}, Hagen, NRW", False), # unbounded fallback (f"{name}, Germany", False), # original name ] for q, bounded in queries: try: params = {"q": q, "format": "json", "limit": 1} if bounded: params["viewbox"] = HAGEN_BBOX params["bounded"] = 1 r = client.get(NOMINATIM, params=params, headers={"User-Agent": UA}, timeout=10) if r.status_code == 200 and r.json(): d = r.json()[0] lat, lon = float(d["lat"]), float(d["lon"]) # Sanity check: roughly in Hagen area if 51.25 <= lat <= 51.50 and 7.30 <= lon <= 7.70: return lat, lon except Exception: pass time.sleep(3.0) # Conservative: Nominatim blocks aggressively return None def main(): parser = argparse.ArgumentParser() parser.add_argument("--limit", type=int, default=500) args = parser.parse_args() conn = sqlite3.connect(str(DB)) conn.row_factory = sqlite3.Row pending = conn.execute( "SELECT id, name FROM orte WHERE geocode_status='pending' ORDER BY vorlage_count DESC LIMIT ?", (args.limit,) ).fetchall() total_pending = conn.execute("SELECT COUNT(*) FROM orte WHERE geocode_status='pending'").fetchone()[0] print(f"Pending: {len(pending)} (von {total_pending} total)") success = 0 failed = 0 client = httpx.Client() for i, row in enumerate(pending): coords = geocode(client, row["name"]) if coords: conn.execute("UPDATE orte SET lat=?, lon=?, geocode_status='success' WHERE id=?", (coords[0], coords[1], row["id"])) success += 1 if success % 20 == 0: print(f" [{i+1}/{len(pending)}] āœ“ {success} geocoded, āœ— {failed} failed") else: conn.execute("UPDATE orte SET geocode_status='failed' WHERE id=?", (row["id"],)) failed += 1 if (i + 1) % 50 == 0: conn.commit() conn.commit() conn.close() client.close() total_geo = success # just this run print(f"\nāœ“ {success} | āœ— {failed} | Gesamt: {success + failed}") if __name__ == "__main__": main()