- scripts/geocode_pending.py: Nominatim mit Hagen-Fokus, Rate-Limiting 1/s - 2.293 Orte geocodiert (vorher 608), 31k+ noch offen (läuft weiter) - Karte: Marker-Clustering für bessere Performance - 27k+ Orte-Einträge → Clustering nötig Teilweise Closes #5, Closes #6
This commit is contained in:
parent
c3e9f4b3e8
commit
69edf8f64c
15
frontend/package-lock.json
generated
15
frontend/package-lock.json
generated
@ -9,7 +9,8 @@
|
||||
"version": "0.0.1",
|
||||
"dependencies": {
|
||||
"@types/leaflet": "^1.9.21",
|
||||
"leaflet": "^1.9.4"
|
||||
"leaflet": "^1.9.4",
|
||||
"leaflet.markercluster": "^1.5.3"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@sveltejs/adapter-auto": "^7.0.0",
|
||||
@ -1560,7 +1561,17 @@
|
||||
"version": "1.9.4",
|
||||
"resolved": "https://registry.npmjs.org/leaflet/-/leaflet-1.9.4.tgz",
|
||||
"integrity": "sha512-nxS1ynzJOmOlHp+iL3FyWqK89GtNL8U8rvlMOsQdTTssxZwCXh8N2NB3GDQOL+YR3XnWyZAxwQixURb+FA74PA==",
|
||||
"license": "BSD-2-Clause"
|
||||
"license": "BSD-2-Clause",
|
||||
"peer": true
|
||||
},
|
||||
"node_modules/leaflet.markercluster": {
|
||||
"version": "1.5.3",
|
||||
"resolved": "https://registry.npmjs.org/leaflet.markercluster/-/leaflet.markercluster-1.5.3.tgz",
|
||||
"integrity": "sha512-vPTw/Bndq7eQHjLBVlWpnGeLa3t+3zGiuM7fJwCkiMFq+nmRuG3RI3f7f4N4TDX7T4NpbAXpR2+NTRSEGfCSeA==",
|
||||
"license": "MIT",
|
||||
"peerDependencies": {
|
||||
"leaflet": "^1.3.1"
|
||||
}
|
||||
},
|
||||
"node_modules/lightningcss": {
|
||||
"version": "1.32.0",
|
||||
|
||||
@ -25,6 +25,7 @@
|
||||
},
|
||||
"dependencies": {
|
||||
"@types/leaflet": "^1.9.21",
|
||||
"leaflet": "^1.9.4"
|
||||
"leaflet": "^1.9.4",
|
||||
"leaflet.markercluster": "^1.5.3"
|
||||
}
|
||||
}
|
||||
|
||||
@ -23,6 +23,7 @@
|
||||
let selectedOrt = $state<Ort | null>(null);
|
||||
let selectedVorlagen = $state<Vorlage[]>([]);
|
||||
let loading = $state(true);
|
||||
let markerCount = $state(0);
|
||||
let map: any = null;
|
||||
|
||||
const API_BASE = typeof window !== 'undefined'
|
||||
@ -65,9 +66,11 @@
|
||||
|
||||
await loadOrte();
|
||||
|
||||
// Leaflet dynamisch laden
|
||||
// Leaflet + MarkerCluster dynamisch laden
|
||||
const L = await import('leaflet');
|
||||
await import('leaflet/dist/leaflet.css');
|
||||
await import('leaflet.markercluster');
|
||||
// MarkerCluster CSS via CDN (im head unten)
|
||||
|
||||
// Map initialisieren
|
||||
map = L.map('map').setView(HAGEN_CENTER, HAGEN_ZOOM);
|
||||
@ -76,16 +79,41 @@
|
||||
attribution: '© <a href="https://www.openstreetmap.org/copyright">OpenStreetMap</a>'
|
||||
}).addTo(map);
|
||||
|
||||
// Marker hinzufügen
|
||||
// MarkerClusterGroup mit Performance-Optionen
|
||||
const clusterGroup = (L as any).markerClusterGroup({
|
||||
chunkedLoading: true,
|
||||
chunkInterval: 100,
|
||||
chunkDelay: 10,
|
||||
maxClusterRadius: 50,
|
||||
spiderfyOnMaxZoom: true,
|
||||
showCoverageOnHover: false,
|
||||
disableClusteringAtZoom: 17,
|
||||
iconCreateFunction: function(cluster: any) {
|
||||
const count = cluster.getChildCount();
|
||||
let size = 'small';
|
||||
let px = 30;
|
||||
if (count >= 50) { size = 'large'; px = 50; }
|
||||
else if (count >= 10) { size = 'medium'; px = 40; }
|
||||
|
||||
return L.divIcon({
|
||||
html: `<div class="cluster-icon cluster-${size}">${count}</div>`,
|
||||
className: 'custom-cluster',
|
||||
iconSize: L.point(px, px)
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
// Marker in Batches hinzufügen (verhindert UI-Freeze)
|
||||
for (const ort of orte) {
|
||||
const radius = Math.min(6 + ort.vorlage_count * 1.5, 18);
|
||||
const marker = L.circleMarker([ort.lat, ort.lon], {
|
||||
radius: Math.min(8 + ort.vorlage_count * 2, 20),
|
||||
fillColor: '#16a34a',
|
||||
radius,
|
||||
fillColor: getColor(ort.vorlage_count),
|
||||
color: '#166534',
|
||||
weight: 2,
|
||||
opacity: 1,
|
||||
weight: 1.5,
|
||||
opacity: 0.9,
|
||||
fillOpacity: 0.7
|
||||
}).addTo(map);
|
||||
});
|
||||
|
||||
marker.bindPopup(`
|
||||
<strong>${ort.name}</strong><br>
|
||||
@ -93,13 +121,26 @@
|
||||
`);
|
||||
|
||||
marker.on('click', () => selectOrt(ort));
|
||||
clusterGroup.addLayer(marker);
|
||||
}
|
||||
|
||||
map.addLayer(clusterGroup);
|
||||
markerCount = orte.length;
|
||||
});
|
||||
|
||||
function getColor(count: number): string {
|
||||
if (count >= 5) return '#dc2626'; // rot - viele Vorlagen
|
||||
if (count >= 3) return '#f59e0b'; // orange
|
||||
if (count >= 2) return '#16a34a'; // grün
|
||||
return '#3b82f6'; // blau - eine Vorlage
|
||||
}
|
||||
</script>
|
||||
|
||||
<svelte:head>
|
||||
<title>Karte - Antragstracker Hagen</title>
|
||||
<link rel="stylesheet" href="https://unpkg.com/leaflet@1.9.4/dist/leaflet.css" />
|
||||
<link rel="stylesheet" href="https://unpkg.com/leaflet.markercluster@1.5.3/dist/MarkerCluster.css" />
|
||||
<link rel="stylesheet" href="https://unpkg.com/leaflet.markercluster@1.5.3/dist/MarkerCluster.Default.css" />
|
||||
</svelte:head>
|
||||
|
||||
<div class="mb-6">
|
||||
@ -120,8 +161,20 @@
|
||||
{/if}
|
||||
</div>
|
||||
|
||||
<div class="mt-4 text-sm text-gray-500">
|
||||
{orte.length} Orte geocodiert • Marker-Größe = Anzahl Vorlagen
|
||||
<div class="mt-4 flex items-center gap-4 text-sm text-gray-500">
|
||||
<span>{markerCount} Orte auf der Karte</span>
|
||||
<span class="flex items-center gap-1.5">
|
||||
<span class="inline-block w-3 h-3 rounded-full bg-blue-500"></span> 1
|
||||
</span>
|
||||
<span class="flex items-center gap-1.5">
|
||||
<span class="inline-block w-3 h-3 rounded-full bg-green-600"></span> 2
|
||||
</span>
|
||||
<span class="flex items-center gap-1.5">
|
||||
<span class="inline-block w-3 h-3 rounded-full bg-amber-500"></span> 3-4
|
||||
</span>
|
||||
<span class="flex items-center gap-1.5">
|
||||
<span class="inline-block w-3 h-3 rounded-full bg-red-600"></span> 5+
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@ -176,3 +229,33 @@
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<style>
|
||||
:global(.custom-cluster) {
|
||||
background: transparent !important;
|
||||
}
|
||||
:global(.cluster-icon) {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
border-radius: 50%;
|
||||
color: white;
|
||||
font-weight: 700;
|
||||
font-size: 13px;
|
||||
box-shadow: 0 2px 6px rgba(0,0,0,0.3);
|
||||
}
|
||||
:global(.cluster-small) {
|
||||
background: rgba(22, 163, 74, 0.85);
|
||||
width: 30px; height: 30px;
|
||||
}
|
||||
:global(.cluster-medium) {
|
||||
background: rgba(245, 158, 11, 0.85);
|
||||
width: 40px; height: 40px;
|
||||
font-size: 14px;
|
||||
}
|
||||
:global(.cluster-large) {
|
||||
background: rgba(220, 38, 38, 0.85);
|
||||
width: 50px; height: 50px;
|
||||
font-size: 15px;
|
||||
}
|
||||
</style>
|
||||
|
||||
@ -1,92 +1,196 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Geocodiert pending Orte via Nominatim (1 req/s)."""
|
||||
"""Geocodiert pending Orte via Nominatim (1 req/s, Hagen-fokussiert)."""
|
||||
import argparse
|
||||
import re
|
||||
import sqlite3
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Optional, Tuple
|
||||
|
||||
import httpx
|
||||
|
||||
DB = Path(__file__).resolve().parent.parent / "data" / "tracker_remote.db"
|
||||
DB = Path(__file__).resolve().parent.parent / "data" / "tracker.db"
|
||||
NOMINATIM = "https://nominatim.openstreetmap.org/search"
|
||||
UA = "Antragstracker-Hagen/1.0 (tobias.roedel@econgood.org)"
|
||||
HAGEN_BBOX = "7.35,51.30,7.65,51.45"
|
||||
|
||||
# Orte die zu generisch sind für sinnvolles Geocoding
|
||||
SKIP_PATTERNS = [
|
||||
r"^hagen$", r"^hagen,?\s*(nordrhein-westfalen|nrw)$",
|
||||
r"^stadtgebiet", r"^gesamtes?\s+stadtgebiet",
|
||||
r"^(bab|a)\s*\d", r"^bundesstraße\s+\d", r"^b\s*\d+$",
|
||||
r"^(alle|diverse|verschiedene)\s+", r"^(stadt|kreis)\s+hagen$",
|
||||
]
|
||||
|
||||
|
||||
def should_skip(name: str) -> bool:
|
||||
"""Orte überspringen die nicht sinnvoll geocodierbar sind."""
|
||||
clean = name.strip().lower()
|
||||
for pat in SKIP_PATTERNS:
|
||||
if re.search(pat, clean):
|
||||
return True
|
||||
# Zu kurz / generisch
|
||||
if len(clean) < 3:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def normalize_query(name: str) -> str:
|
||||
"""Ortsnamen für Nominatim aufbereiten."""
|
||||
clean = name.strip()
|
||||
# Trailing "Hagen" entfernen um Duplikation zu vermeiden
|
||||
clean = re.sub(r',?\s*Hagen\s*$', '', clean, flags=re.IGNORECASE).strip().rstrip(',').strip()
|
||||
# "Hagen-" Prefix bei Stadtteilen behalten
|
||||
if clean.lower().startswith('hagen-'):
|
||||
clean = clean[6:].strip() + ', Hagen'
|
||||
return clean
|
||||
|
||||
|
||||
def geocode(client: httpx.Client, name: str) -> Optional[Tuple[float, float]]:
|
||||
# Clean name: remove trailing "Hagen" to avoid duplication
|
||||
clean = name.strip().rstrip(",").strip()
|
||||
if clean.lower().endswith(" hagen"):
|
||||
clean = clean[:-6].strip().rstrip(",").strip()
|
||||
"""Versuche einen Ort in Hagen zu geocodieren."""
|
||||
clean = normalize_query(name)
|
||||
|
||||
# Try multiple query variants, progressively less strict
|
||||
queries = [
|
||||
(f"{clean}, Hagen, Germany", True), # bounded to Hagen
|
||||
(f"{clean}, Hagen, NRW", False), # unbounded fallback
|
||||
(f"{name}, Germany", False), # original name
|
||||
# Strikt in Hagen Bounding Box
|
||||
(f"{clean}, Hagen", {"viewbox": HAGEN_BBOX, "bounded": 1}),
|
||||
# Etwas lockerer
|
||||
(f"{clean}, Hagen, NRW, Germany", {}),
|
||||
# Originalname als Fallback
|
||||
(f"{name}, Germany", {}),
|
||||
]
|
||||
for q, bounded in queries:
|
||||
|
||||
for q, extra_params in queries:
|
||||
params = {"q": q, "format": "json", "limit": 1, "addressdetails": 1}
|
||||
params.update(extra_params)
|
||||
try:
|
||||
params = {"q": q, "format": "json", "limit": 1}
|
||||
if bounded:
|
||||
params["viewbox"] = HAGEN_BBOX
|
||||
params["bounded"] = 1
|
||||
r = client.get(NOMINATIM, params=params,
|
||||
headers={"User-Agent": UA}, timeout=10)
|
||||
r = client.get(
|
||||
NOMINATIM, params=params,
|
||||
headers={"User-Agent": UA}, timeout=10
|
||||
)
|
||||
if r.status_code == 200 and r.json():
|
||||
d = r.json()[0]
|
||||
lat, lon = float(d["lat"]), float(d["lon"])
|
||||
# Sanity check: roughly in Hagen area
|
||||
# Sanity: muss grob in Hagen-Region liegen
|
||||
if 51.25 <= lat <= 51.50 and 7.30 <= lon <= 7.70:
|
||||
return lat, lon
|
||||
except Exception:
|
||||
pass
|
||||
time.sleep(3.0) # Conservative: Nominatim blocks aggressively
|
||||
time.sleep(1.1) # Nominatim Policy: 1 req/s
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--limit", type=int, default=500)
|
||||
parser = argparse.ArgumentParser(description="Geocode pending Orte in tracker.db")
|
||||
parser.add_argument("--limit", type=int, default=500,
|
||||
help="Max Orte pro Durchlauf (Default: 500)")
|
||||
parser.add_argument("--retry-failed", action="store_true",
|
||||
help="Auch fehlgeschlagene Orte erneut versuchen")
|
||||
parser.add_argument("--dry-run", action="store_true",
|
||||
help="Nur anzeigen, nichts schreiben")
|
||||
args = parser.parse_args()
|
||||
|
||||
conn = sqlite3.connect(str(DB))
|
||||
conn.row_factory = sqlite3.Row
|
||||
|
||||
# Status-Filter
|
||||
status_filter = "geocode_status='pending'"
|
||||
if args.retry_failed:
|
||||
status_filter = "geocode_status IN ('pending','failed')"
|
||||
|
||||
# Erst generische Orte skippen
|
||||
generics = conn.execute(
|
||||
f"SELECT id, name FROM orte WHERE {status_filter}"
|
||||
).fetchall()
|
||||
|
||||
skipped = 0
|
||||
for row in generics:
|
||||
if should_skip(row["name"]):
|
||||
if not args.dry_run:
|
||||
conn.execute(
|
||||
"UPDATE orte SET geocode_status='skipped' WHERE id=?",
|
||||
(row["id"],)
|
||||
)
|
||||
skipped += 1
|
||||
if skipped:
|
||||
conn.commit()
|
||||
print(f"⏭️ {skipped} generische Orte übersprungen")
|
||||
|
||||
# Dann die geocodierbaren holen
|
||||
pending = conn.execute(
|
||||
"SELECT id, name FROM orte WHERE geocode_status='pending' ORDER BY vorlage_count DESC LIMIT ?",
|
||||
f"SELECT id, name FROM orte WHERE {status_filter} "
|
||||
"ORDER BY vorlage_count DESC, id LIMIT ?",
|
||||
(args.limit,)
|
||||
).fetchall()
|
||||
|
||||
total_pending = conn.execute("SELECT COUNT(*) FROM orte WHERE geocode_status='pending'").fetchone()[0]
|
||||
print(f"Pending: {len(pending)} (von {total_pending} total)")
|
||||
total_pending = conn.execute(
|
||||
f"SELECT COUNT(*) FROM orte WHERE {status_filter}"
|
||||
).fetchone()[0]
|
||||
|
||||
print(f"📍 Geocoding: {len(pending)} von {total_pending} pending Orten (Limit: {args.limit})")
|
||||
if args.dry_run:
|
||||
for row in pending[:20]:
|
||||
print(f" → {row['name']}")
|
||||
return
|
||||
|
||||
success = 0
|
||||
failed = 0
|
||||
client = httpx.Client()
|
||||
start = time.time()
|
||||
|
||||
for i, row in enumerate(pending):
|
||||
coords = geocode(client, row["name"])
|
||||
if coords:
|
||||
conn.execute("UPDATE orte SET lat=?, lon=?, geocode_status='success' WHERE id=?",
|
||||
(coords[0], coords[1], row["id"]))
|
||||
success += 1
|
||||
if success % 20 == 0:
|
||||
print(f" [{i+1}/{len(pending)}] ✓ {success} geocoded, ✗ {failed} failed")
|
||||
else:
|
||||
conn.execute("UPDATE orte SET geocode_status='failed' WHERE id=?", (row["id"],))
|
||||
failed += 1
|
||||
try:
|
||||
for i, row in enumerate(pending):
|
||||
coords = geocode(client, row["name"])
|
||||
if coords:
|
||||
conn.execute(
|
||||
"UPDATE orte SET lat=?, lon=?, geocode_status='success' WHERE id=?",
|
||||
(coords[0], coords[1], row["id"])
|
||||
)
|
||||
success += 1
|
||||
sym = "✓"
|
||||
else:
|
||||
conn.execute(
|
||||
"UPDATE orte SET geocode_status='failed' WHERE id=?",
|
||||
(row["id"],)
|
||||
)
|
||||
failed += 1
|
||||
sym = "✗"
|
||||
|
||||
if (i + 1) % 50 == 0:
|
||||
conn.commit()
|
||||
elapsed = time.time() - start
|
||||
rate = (i + 1) / elapsed if elapsed > 0 else 0
|
||||
print(
|
||||
f" [{i+1:4d}/{len(pending)}] {sym} {row['name'][:50]:<50s} "
|
||||
f"(✓{success} ✗{failed} | {rate:.1f}/s)",
|
||||
end="\r"
|
||||
)
|
||||
|
||||
conn.commit()
|
||||
conn.close()
|
||||
client.close()
|
||||
# Periodisch committen
|
||||
if (i + 1) % 25 == 0:
|
||||
conn.commit()
|
||||
except KeyboardInterrupt:
|
||||
print("\n⚠️ Abgebrochen!")
|
||||
finally:
|
||||
conn.commit()
|
||||
conn.close()
|
||||
client.close()
|
||||
|
||||
total_geo = success # just this run
|
||||
print(f"\n✓ {success} | ✗ {failed} | Gesamt: {success + failed}")
|
||||
elapsed = time.time() - start
|
||||
print(f"\n\n{'='*60}")
|
||||
print(f"✅ Fertig in {elapsed:.0f}s")
|
||||
print(f" ✓ {success} geocodiert")
|
||||
print(f" ✗ {failed} fehlgeschlagen")
|
||||
print(f" ⏭️ {skipped} übersprungen")
|
||||
|
||||
# Gesamtstatus
|
||||
conn2 = sqlite3.connect(str(DB))
|
||||
stats = conn2.execute(
|
||||
"SELECT geocode_status, COUNT(*) FROM orte GROUP BY geocode_status"
|
||||
).fetchall()
|
||||
conn2.close()
|
||||
print(f"\n📊 Gesamt:")
|
||||
for status, count in stats:
|
||||
print(f" {status}: {count}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
Loading…
Reference in New Issue
Block a user