feat: Geocoding-Script verbessert + Karten-Clustering (#5, #6)

- scripts/geocode_pending.py: Nominatim mit Hagen-Fokus, Rate-Limiting 1/s
- 2.293 Orte geocodiert (vorher 608), 31k+ noch offen (läuft weiter)
- Karte: Marker-Clustering für bessere Performance
- 27k+ Orte-Einträge → Clustering nötig

Teilweise Closes #5, Closes #6
This commit is contained in:
Dotty Dotter 2026-04-02 15:42:25 +02:00
parent c3e9f4b3e8
commit 69edf8f64c
4 changed files with 261 additions and 62 deletions

View File

@ -9,7 +9,8 @@
"version": "0.0.1", "version": "0.0.1",
"dependencies": { "dependencies": {
"@types/leaflet": "^1.9.21", "@types/leaflet": "^1.9.21",
"leaflet": "^1.9.4" "leaflet": "^1.9.4",
"leaflet.markercluster": "^1.5.3"
}, },
"devDependencies": { "devDependencies": {
"@sveltejs/adapter-auto": "^7.0.0", "@sveltejs/adapter-auto": "^7.0.0",
@ -1560,7 +1561,17 @@
"version": "1.9.4", "version": "1.9.4",
"resolved": "https://registry.npmjs.org/leaflet/-/leaflet-1.9.4.tgz", "resolved": "https://registry.npmjs.org/leaflet/-/leaflet-1.9.4.tgz",
"integrity": "sha512-nxS1ynzJOmOlHp+iL3FyWqK89GtNL8U8rvlMOsQdTTssxZwCXh8N2NB3GDQOL+YR3XnWyZAxwQixURb+FA74PA==", "integrity": "sha512-nxS1ynzJOmOlHp+iL3FyWqK89GtNL8U8rvlMOsQdTTssxZwCXh8N2NB3GDQOL+YR3XnWyZAxwQixURb+FA74PA==",
"license": "BSD-2-Clause" "license": "BSD-2-Clause",
"peer": true
},
"node_modules/leaflet.markercluster": {
"version": "1.5.3",
"resolved": "https://registry.npmjs.org/leaflet.markercluster/-/leaflet.markercluster-1.5.3.tgz",
"integrity": "sha512-vPTw/Bndq7eQHjLBVlWpnGeLa3t+3zGiuM7fJwCkiMFq+nmRuG3RI3f7f4N4TDX7T4NpbAXpR2+NTRSEGfCSeA==",
"license": "MIT",
"peerDependencies": {
"leaflet": "^1.3.1"
}
}, },
"node_modules/lightningcss": { "node_modules/lightningcss": {
"version": "1.32.0", "version": "1.32.0",

View File

@ -25,6 +25,7 @@
}, },
"dependencies": { "dependencies": {
"@types/leaflet": "^1.9.21", "@types/leaflet": "^1.9.21",
"leaflet": "^1.9.4" "leaflet": "^1.9.4",
"leaflet.markercluster": "^1.5.3"
} }
} }

View File

@ -23,6 +23,7 @@
let selectedOrt = $state<Ort | null>(null); let selectedOrt = $state<Ort | null>(null);
let selectedVorlagen = $state<Vorlage[]>([]); let selectedVorlagen = $state<Vorlage[]>([]);
let loading = $state(true); let loading = $state(true);
let markerCount = $state(0);
let map: any = null; let map: any = null;
const API_BASE = typeof window !== 'undefined' const API_BASE = typeof window !== 'undefined'
@ -65,9 +66,11 @@
await loadOrte(); await loadOrte();
// Leaflet dynamisch laden // Leaflet + MarkerCluster dynamisch laden
const L = await import('leaflet'); const L = await import('leaflet');
await import('leaflet/dist/leaflet.css'); await import('leaflet/dist/leaflet.css');
await import('leaflet.markercluster');
// MarkerCluster CSS via CDN (im head unten)
// Map initialisieren // Map initialisieren
map = L.map('map').setView(HAGEN_CENTER, HAGEN_ZOOM); map = L.map('map').setView(HAGEN_CENTER, HAGEN_ZOOM);
@ -76,16 +79,41 @@
attribution: '&copy; <a href="https://www.openstreetmap.org/copyright">OpenStreetMap</a>' attribution: '&copy; <a href="https://www.openstreetmap.org/copyright">OpenStreetMap</a>'
}).addTo(map); }).addTo(map);
// Marker hinzufügen // MarkerClusterGroup mit Performance-Optionen
const clusterGroup = (L as any).markerClusterGroup({
chunkedLoading: true,
chunkInterval: 100,
chunkDelay: 10,
maxClusterRadius: 50,
spiderfyOnMaxZoom: true,
showCoverageOnHover: false,
disableClusteringAtZoom: 17,
iconCreateFunction: function(cluster: any) {
const count = cluster.getChildCount();
let size = 'small';
let px = 30;
if (count >= 50) { size = 'large'; px = 50; }
else if (count >= 10) { size = 'medium'; px = 40; }
return L.divIcon({
html: `<div class="cluster-icon cluster-${size}">${count}</div>`,
className: 'custom-cluster',
iconSize: L.point(px, px)
});
}
});
// Marker in Batches hinzufügen (verhindert UI-Freeze)
for (const ort of orte) { for (const ort of orte) {
const radius = Math.min(6 + ort.vorlage_count * 1.5, 18);
const marker = L.circleMarker([ort.lat, ort.lon], { const marker = L.circleMarker([ort.lat, ort.lon], {
radius: Math.min(8 + ort.vorlage_count * 2, 20), radius,
fillColor: '#16a34a', fillColor: getColor(ort.vorlage_count),
color: '#166534', color: '#166534',
weight: 2, weight: 1.5,
opacity: 1, opacity: 0.9,
fillOpacity: 0.7 fillOpacity: 0.7
}).addTo(map); });
marker.bindPopup(` marker.bindPopup(`
<strong>${ort.name}</strong><br> <strong>${ort.name}</strong><br>
@ -93,13 +121,26 @@
`); `);
marker.on('click', () => selectOrt(ort)); marker.on('click', () => selectOrt(ort));
clusterGroup.addLayer(marker);
} }
map.addLayer(clusterGroup);
markerCount = orte.length;
}); });
function getColor(count: number): string {
if (count >= 5) return '#dc2626'; // rot - viele Vorlagen
if (count >= 3) return '#f59e0b'; // orange
if (count >= 2) return '#16a34a'; // grün
return '#3b82f6'; // blau - eine Vorlage
}
</script> </script>
<svelte:head> <svelte:head>
<title>Karte - Antragstracker Hagen</title> <title>Karte - Antragstracker Hagen</title>
<link rel="stylesheet" href="https://unpkg.com/leaflet@1.9.4/dist/leaflet.css" /> <link rel="stylesheet" href="https://unpkg.com/leaflet@1.9.4/dist/leaflet.css" />
<link rel="stylesheet" href="https://unpkg.com/leaflet.markercluster@1.5.3/dist/MarkerCluster.css" />
<link rel="stylesheet" href="https://unpkg.com/leaflet.markercluster@1.5.3/dist/MarkerCluster.Default.css" />
</svelte:head> </svelte:head>
<div class="mb-6"> <div class="mb-6">
@ -120,8 +161,20 @@
{/if} {/if}
</div> </div>
<div class="mt-4 text-sm text-gray-500"> <div class="mt-4 flex items-center gap-4 text-sm text-gray-500">
{orte.length} Orte geocodiert • Marker-Größe = Anzahl Vorlagen <span>{markerCount} Orte auf der Karte</span>
<span class="flex items-center gap-1.5">
<span class="inline-block w-3 h-3 rounded-full bg-blue-500"></span> 1
</span>
<span class="flex items-center gap-1.5">
<span class="inline-block w-3 h-3 rounded-full bg-green-600"></span> 2
</span>
<span class="flex items-center gap-1.5">
<span class="inline-block w-3 h-3 rounded-full bg-amber-500"></span> 3-4
</span>
<span class="flex items-center gap-1.5">
<span class="inline-block w-3 h-3 rounded-full bg-red-600"></span> 5+
</span>
</div> </div>
</div> </div>
@ -176,3 +229,33 @@
</div> </div>
</div> </div>
</div> </div>
<style>
:global(.custom-cluster) {
background: transparent !important;
}
:global(.cluster-icon) {
display: flex;
align-items: center;
justify-content: center;
border-radius: 50%;
color: white;
font-weight: 700;
font-size: 13px;
box-shadow: 0 2px 6px rgba(0,0,0,0.3);
}
:global(.cluster-small) {
background: rgba(22, 163, 74, 0.85);
width: 30px; height: 30px;
}
:global(.cluster-medium) {
background: rgba(245, 158, 11, 0.85);
width: 40px; height: 40px;
font-size: 14px;
}
:global(.cluster-large) {
background: rgba(220, 38, 38, 0.85);
width: 50px; height: 50px;
font-size: 15px;
}
</style>

View File

@ -1,92 +1,196 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
"""Geocodiert pending Orte via Nominatim (1 req/s).""" """Geocodiert pending Orte via Nominatim (1 req/s, Hagen-fokussiert)."""
import argparse import argparse
import re
import sqlite3 import sqlite3
import sys
import time import time
from pathlib import Path from pathlib import Path
from typing import Optional, Tuple from typing import Optional, Tuple
import httpx import httpx
DB = Path(__file__).resolve().parent.parent / "data" / "tracker_remote.db" DB = Path(__file__).resolve().parent.parent / "data" / "tracker.db"
NOMINATIM = "https://nominatim.openstreetmap.org/search" NOMINATIM = "https://nominatim.openstreetmap.org/search"
UA = "Antragstracker-Hagen/1.0 (tobias.roedel@econgood.org)" UA = "Antragstracker-Hagen/1.0 (tobias.roedel@econgood.org)"
HAGEN_BBOX = "7.35,51.30,7.65,51.45" HAGEN_BBOX = "7.35,51.30,7.65,51.45"
# Orte die zu generisch sind für sinnvolles Geocoding
SKIP_PATTERNS = [
r"^hagen$", r"^hagen,?\s*(nordrhein-westfalen|nrw)$",
r"^stadtgebiet", r"^gesamtes?\s+stadtgebiet",
r"^(bab|a)\s*\d", r"^bundesstraße\s+\d", r"^b\s*\d+$",
r"^(alle|diverse|verschiedene)\s+", r"^(stadt|kreis)\s+hagen$",
]
def should_skip(name: str) -> bool:
"""Orte überspringen die nicht sinnvoll geocodierbar sind."""
clean = name.strip().lower()
for pat in SKIP_PATTERNS:
if re.search(pat, clean):
return True
# Zu kurz / generisch
if len(clean) < 3:
return True
return False
def normalize_query(name: str) -> str:
"""Ortsnamen für Nominatim aufbereiten."""
clean = name.strip()
# Trailing "Hagen" entfernen um Duplikation zu vermeiden
clean = re.sub(r',?\s*Hagen\s*$', '', clean, flags=re.IGNORECASE).strip().rstrip(',').strip()
# "Hagen-" Prefix bei Stadtteilen behalten
if clean.lower().startswith('hagen-'):
clean = clean[6:].strip() + ', Hagen'
return clean
def geocode(client: httpx.Client, name: str) -> Optional[Tuple[float, float]]: def geocode(client: httpx.Client, name: str) -> Optional[Tuple[float, float]]:
# Clean name: remove trailing "Hagen" to avoid duplication """Versuche einen Ort in Hagen zu geocodieren."""
clean = name.strip().rstrip(",").strip() clean = normalize_query(name)
if clean.lower().endswith(" hagen"):
clean = clean[:-6].strip().rstrip(",").strip()
# Try multiple query variants, progressively less strict
queries = [ queries = [
(f"{clean}, Hagen, Germany", True), # bounded to Hagen # Strikt in Hagen Bounding Box
(f"{clean}, Hagen, NRW", False), # unbounded fallback (f"{clean}, Hagen", {"viewbox": HAGEN_BBOX, "bounded": 1}),
(f"{name}, Germany", False), # original name # Etwas lockerer
(f"{clean}, Hagen, NRW, Germany", {}),
# Originalname als Fallback
(f"{name}, Germany", {}),
] ]
for q, bounded in queries:
for q, extra_params in queries:
params = {"q": q, "format": "json", "limit": 1, "addressdetails": 1}
params.update(extra_params)
try: try:
params = {"q": q, "format": "json", "limit": 1} r = client.get(
if bounded: NOMINATIM, params=params,
params["viewbox"] = HAGEN_BBOX headers={"User-Agent": UA}, timeout=10
params["bounded"] = 1 )
r = client.get(NOMINATIM, params=params,
headers={"User-Agent": UA}, timeout=10)
if r.status_code == 200 and r.json(): if r.status_code == 200 and r.json():
d = r.json()[0] d = r.json()[0]
lat, lon = float(d["lat"]), float(d["lon"]) lat, lon = float(d["lat"]), float(d["lon"])
# Sanity check: roughly in Hagen area # Sanity: muss grob in Hagen-Region liegen
if 51.25 <= lat <= 51.50 and 7.30 <= lon <= 7.70: if 51.25 <= lat <= 51.50 and 7.30 <= lon <= 7.70:
return lat, lon return lat, lon
except Exception: except Exception:
pass pass
time.sleep(3.0) # Conservative: Nominatim blocks aggressively time.sleep(1.1) # Nominatim Policy: 1 req/s
return None return None
def main(): def main():
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser(description="Geocode pending Orte in tracker.db")
parser.add_argument("--limit", type=int, default=500) parser.add_argument("--limit", type=int, default=500,
help="Max Orte pro Durchlauf (Default: 500)")
parser.add_argument("--retry-failed", action="store_true",
help="Auch fehlgeschlagene Orte erneut versuchen")
parser.add_argument("--dry-run", action="store_true",
help="Nur anzeigen, nichts schreiben")
args = parser.parse_args() args = parser.parse_args()
conn = sqlite3.connect(str(DB)) conn = sqlite3.connect(str(DB))
conn.row_factory = sqlite3.Row conn.row_factory = sqlite3.Row
# Status-Filter
status_filter = "geocode_status='pending'"
if args.retry_failed:
status_filter = "geocode_status IN ('pending','failed')"
# Erst generische Orte skippen
generics = conn.execute(
f"SELECT id, name FROM orte WHERE {status_filter}"
).fetchall()
skipped = 0
for row in generics:
if should_skip(row["name"]):
if not args.dry_run:
conn.execute(
"UPDATE orte SET geocode_status='skipped' WHERE id=?",
(row["id"],)
)
skipped += 1
if skipped:
conn.commit()
print(f"⏭️ {skipped} generische Orte übersprungen")
# Dann die geocodierbaren holen
pending = conn.execute( pending = conn.execute(
"SELECT id, name FROM orte WHERE geocode_status='pending' ORDER BY vorlage_count DESC LIMIT ?", f"SELECT id, name FROM orte WHERE {status_filter} "
"ORDER BY vorlage_count DESC, id LIMIT ?",
(args.limit,) (args.limit,)
).fetchall() ).fetchall()
total_pending = conn.execute("SELECT COUNT(*) FROM orte WHERE geocode_status='pending'").fetchone()[0] total_pending = conn.execute(
print(f"Pending: {len(pending)} (von {total_pending} total)") f"SELECT COUNT(*) FROM orte WHERE {status_filter}"
).fetchone()[0]
print(f"📍 Geocoding: {len(pending)} von {total_pending} pending Orten (Limit: {args.limit})")
if args.dry_run:
for row in pending[:20]:
print(f"{row['name']}")
return
success = 0 success = 0
failed = 0 failed = 0
client = httpx.Client() client = httpx.Client()
start = time.time()
for i, row in enumerate(pending):
coords = geocode(client, row["name"]) try:
if coords: for i, row in enumerate(pending):
conn.execute("UPDATE orte SET lat=?, lon=?, geocode_status='success' WHERE id=?", coords = geocode(client, row["name"])
(coords[0], coords[1], row["id"])) if coords:
success += 1 conn.execute(
if success % 20 == 0: "UPDATE orte SET lat=?, lon=?, geocode_status='success' WHERE id=?",
print(f" [{i+1}/{len(pending)}] ✓ {success} geocoded, ✗ {failed} failed") (coords[0], coords[1], row["id"])
else: )
conn.execute("UPDATE orte SET geocode_status='failed' WHERE id=?", (row["id"],)) success += 1
failed += 1 sym = ""
else:
if (i + 1) % 50 == 0: conn.execute(
conn.commit() "UPDATE orte SET geocode_status='failed' WHERE id=?",
(row["id"],)
conn.commit() )
conn.close() failed += 1
client.close() sym = ""
total_geo = success # just this run elapsed = time.time() - start
print(f"\n{success} | ✗ {failed} | Gesamt: {success + failed}") rate = (i + 1) / elapsed if elapsed > 0 else 0
print(
f" [{i+1:4d}/{len(pending)}] {sym} {row['name'][:50]:<50s} "
f"(✓{success}{failed} | {rate:.1f}/s)",
end="\r"
)
# Periodisch committen
if (i + 1) % 25 == 0:
conn.commit()
except KeyboardInterrupt:
print("\n⚠️ Abgebrochen!")
finally:
conn.commit()
conn.close()
client.close()
elapsed = time.time() - start
print(f"\n\n{'='*60}")
print(f"✅ Fertig in {elapsed:.0f}s")
print(f"{success} geocodiert")
print(f"{failed} fehlgeschlagen")
print(f" ⏭️ {skipped} übersprungen")
# Gesamtstatus
conn2 = sqlite3.connect(str(DB))
stats = conn2.execute(
"SELECT geocode_status, COUNT(*) FROM orte GROUP BY geocode_status"
).fetchall()
conn2.close()
print(f"\n📊 Gesamt:")
for status, count in stats:
print(f" {status}: {count}")
if __name__ == "__main__": if __name__ == "__main__":