263 lines
8.3 KiB
Python
263 lines
8.3 KiB
Python
|
|
#!/usr/bin/env python3
|
||
|
|
"""
|
||
|
|
KI-Zusammenfassung für Anträge via Qwen (DashScope).
|
||
|
|
"""
|
||
|
|
|
||
|
|
import argparse
|
||
|
|
import json
|
||
|
|
import os
|
||
|
|
import sqlite3
|
||
|
|
import time
|
||
|
|
from pathlib import Path
|
||
|
|
|
||
|
|
import httpx
|
||
|
|
|
||
|
|
# Nominatim für Geocoding
|
||
|
|
NOMINATIM_URL = "https://nominatim.openstreetmap.org/search"
|
||
|
|
USER_AGENT = "Antragstracker-Hagen/1.0"
|
||
|
|
HAGEN_BBOX = "7.35,51.30,7.65,51.45"
|
||
|
|
|
||
|
|
PROJECT_ROOT = Path(__file__).resolve().parent.parent
|
||
|
|
DB_PATH = PROJECT_ROOT / "data" / "tracker_remote.db"
|
||
|
|
|
||
|
|
# DashScope API
|
||
|
|
DASHSCOPE_URL = "https://dashscope-intl.aliyuncs.com/compatible-mode/v1/chat/completions"
|
||
|
|
DASHSCOPE_KEY = os.environ.get("QWEN_API_KEY") or os.popen("security find-generic-password -s qwen-api -w 2>/dev/null").read().strip()
|
||
|
|
|
||
|
|
PROMPT_TEMPLATE = """Analysiere diesen kommunalpolitischen Antrag aus Hagen.
|
||
|
|
|
||
|
|
DOKUMENT:
|
||
|
|
{volltext}
|
||
|
|
|
||
|
|
---
|
||
|
|
|
||
|
|
Erstelle eine strukturierte Zusammenfassung im JSON-Format:
|
||
|
|
|
||
|
|
{{
|
||
|
|
"zusammenfassung": "2-3 Sätze, was gefordert wird",
|
||
|
|
"kernforderung": "Die zentrale Forderung in einem Satz",
|
||
|
|
"begruendung": "Warum wird das gefordert? (kurz)",
|
||
|
|
"thema": "Hauptthema (z.B. Verkehr, Soziales, Umwelt)",
|
||
|
|
"partei": "Antragstellende Fraktion falls erkennbar",
|
||
|
|
"orte": [
|
||
|
|
{{
|
||
|
|
"rohtext": "Die genaue Formulierung im Text (z.B. 'Polizeiwache an der Boeler Straße')",
|
||
|
|
"kontext": "Der Satz in dem der Ort erwähnt wird",
|
||
|
|
"typ": "strasse|platz|stadtteil|gebaeude|sonstiges",
|
||
|
|
"geocodierbar": true/false,
|
||
|
|
"geocode_query": "Suchbegriff für Karte (z.B. 'Boeler Straße' statt 'Polizeiwache an der Boeler Straße')"
|
||
|
|
}}
|
||
|
|
]
|
||
|
|
}}
|
||
|
|
|
||
|
|
WICHTIG für orte:
|
||
|
|
- Extrahiere ALLE geografischen Erwähnungen
|
||
|
|
- Bei "X an der Y-Straße" ist geocode_query = "Y-Straße"
|
||
|
|
- Straßennamen, Plätze, Stadtteile sind geocodierbar=true
|
||
|
|
- "Spielplatz", "Schule", "Bushaltestelle" ohne Straße sind geocodierbar=false
|
||
|
|
- geocode_query nur bei geocodierbar=true setzen
|
||
|
|
|
||
|
|
NUR JSON ausgeben, keine Erklärungen."""
|
||
|
|
|
||
|
|
|
||
|
|
def get_db():
|
||
|
|
conn = sqlite3.connect(str(DB_PATH))
|
||
|
|
conn.row_factory = sqlite3.Row
|
||
|
|
return conn
|
||
|
|
|
||
|
|
|
||
|
|
def call_qwen(prompt: str) -> dict | None:
|
||
|
|
"""Ruft Qwen API auf und gibt JSON zurück."""
|
||
|
|
if not DASHSCOPE_KEY:
|
||
|
|
print(" FEHLER: Kein QWEN_API_KEY gefunden")
|
||
|
|
return None
|
||
|
|
|
||
|
|
try:
|
||
|
|
resp = httpx.post(
|
||
|
|
DASHSCOPE_URL,
|
||
|
|
headers={
|
||
|
|
"Authorization": f"Bearer {DASHSCOPE_KEY}",
|
||
|
|
"Content-Type": "application/json"
|
||
|
|
},
|
||
|
|
json={
|
||
|
|
"model": "qwen-plus-latest",
|
||
|
|
"messages": [{"role": "user", "content": prompt}],
|
||
|
|
"temperature": 0.3
|
||
|
|
},
|
||
|
|
timeout=180
|
||
|
|
)
|
||
|
|
resp.raise_for_status()
|
||
|
|
|
||
|
|
content = resp.json()["choices"][0]["message"]["content"]
|
||
|
|
|
||
|
|
# JSON extrahieren (falls in Markdown-Block)
|
||
|
|
if "```json" in content:
|
||
|
|
content = content.split("```json")[1].split("```")[0]
|
||
|
|
elif "```" in content:
|
||
|
|
content = content.split("```")[1].split("```")[0]
|
||
|
|
|
||
|
|
return json.loads(content.strip())
|
||
|
|
|
||
|
|
except json.JSONDecodeError as e:
|
||
|
|
print(f" JSON-Parse-Fehler: {e}")
|
||
|
|
print(f" Content: {content[:200]}...")
|
||
|
|
return None
|
||
|
|
except Exception as e:
|
||
|
|
print(f" API-Fehler: {e}")
|
||
|
|
return None
|
||
|
|
|
||
|
|
|
||
|
|
def process_vorlage(conn: sqlite3.Connection, vorlage: dict) -> bool:
|
||
|
|
"""Erstellt KI-Zusammenfassung für eine Vorlage."""
|
||
|
|
vid = vorlage['id']
|
||
|
|
akz = vorlage['aktenzeichen'] or f"#{vid}"
|
||
|
|
volltext = vorlage['volltext_clean']
|
||
|
|
|
||
|
|
if not volltext or len(volltext) < 100:
|
||
|
|
print(f" {akz}: Volltext zu kurz")
|
||
|
|
return False
|
||
|
|
|
||
|
|
# Volltext kürzen falls zu lang
|
||
|
|
if len(volltext) > 8000:
|
||
|
|
volltext = volltext[:8000] + "\n[...gekürzt...]"
|
||
|
|
|
||
|
|
prompt = PROMPT_TEMPLATE.format(volltext=volltext)
|
||
|
|
result = call_qwen(prompt)
|
||
|
|
|
||
|
|
if not result:
|
||
|
|
return False
|
||
|
|
|
||
|
|
# In ki_bewertungen speichern
|
||
|
|
conn.execute("""
|
||
|
|
INSERT INTO ki_bewertungen (vorlage_id, typ, begruendung, anmerkungen, modell, prompt_version)
|
||
|
|
VALUES (?, 'zusammenfassung', ?, ?, 'qwen-plus-latest', 'v1')
|
||
|
|
""", (vid, result.get('zusammenfassung'), json.dumps(result, ensure_ascii=False)))
|
||
|
|
|
||
|
|
# thema_kurz in vorlagen aktualisieren
|
||
|
|
if result.get('kernforderung'):
|
||
|
|
conn.execute("UPDATE vorlagen SET thema_kurz = ? WHERE id = ?",
|
||
|
|
(result['kernforderung'][:200], vid))
|
||
|
|
|
||
|
|
conn.commit()
|
||
|
|
|
||
|
|
# Orte geocodieren und speichern
|
||
|
|
orte = result.get('orte', [])
|
||
|
|
geocoded_count = 0
|
||
|
|
for ort in orte:
|
||
|
|
geocoded_count += process_ort(conn, vid, ort)
|
||
|
|
|
||
|
|
print(f" {akz}: ✓ {result.get('thema', '?')} ({len(orte)} Orte, {geocoded_count} geocodiert)")
|
||
|
|
return True
|
||
|
|
|
||
|
|
|
||
|
|
def geocode_nominatim(query: str) -> tuple[float, float] | None:
|
||
|
|
"""Geocodiert einen Ort in Hagen via Nominatim."""
|
||
|
|
try:
|
||
|
|
resp = httpx.get(
|
||
|
|
NOMINATIM_URL,
|
||
|
|
params={"q": f"{query}, Hagen, Germany", "format": "json", "limit": 1,
|
||
|
|
"viewbox": HAGEN_BBOX, "bounded": 1},
|
||
|
|
headers={"User-Agent": USER_AGENT},
|
||
|
|
timeout=10
|
||
|
|
)
|
||
|
|
resp.raise_for_status()
|
||
|
|
results = resp.json()
|
||
|
|
if results:
|
||
|
|
return (float(results[0]['lat']), float(results[0]['lon']))
|
||
|
|
except Exception as e:
|
||
|
|
pass
|
||
|
|
return None
|
||
|
|
|
||
|
|
|
||
|
|
def process_ort(conn: sqlite3.Connection, vorlage_id: int, ort: dict) -> int:
|
||
|
|
"""Speichert einen Ort und geocodiert ihn wenn möglich. Returns 1 wenn geocodiert."""
|
||
|
|
rohtext = ort.get('rohtext', '')
|
||
|
|
kontext = ort.get('kontext', '')
|
||
|
|
typ = ort.get('typ', 'sonstiges')
|
||
|
|
geocodierbar = ort.get('geocodierbar', False)
|
||
|
|
geocode_query = ort.get('geocode_query')
|
||
|
|
|
||
|
|
if not rohtext:
|
||
|
|
return 0
|
||
|
|
|
||
|
|
# Prüfen ob schon existiert
|
||
|
|
existing = conn.execute(
|
||
|
|
"SELECT id, lat FROM orte WHERE name = ? OR rohtext = ?",
|
||
|
|
(geocode_query or rohtext, rohtext)
|
||
|
|
).fetchone()
|
||
|
|
|
||
|
|
if existing:
|
||
|
|
# Nur Verknüpfung erstellen
|
||
|
|
conn.execute("""
|
||
|
|
INSERT OR IGNORE INTO vorlagen_orte (vorlage_id, ort_id, kontext)
|
||
|
|
VALUES (?, ?, ?)
|
||
|
|
""", (vorlage_id, existing['id'], kontext[:500] if kontext else None))
|
||
|
|
conn.execute("UPDATE orte SET vorlage_count = vorlage_count + 1 WHERE id = ?", (existing['id'],))
|
||
|
|
conn.commit()
|
||
|
|
return 1 if existing['lat'] else 0
|
||
|
|
|
||
|
|
# Neuen Ort anlegen
|
||
|
|
lat, lon = None, None
|
||
|
|
status = 'skipped'
|
||
|
|
|
||
|
|
if geocodierbar and geocode_query:
|
||
|
|
time.sleep(1.1) # Nominatim Rate Limit
|
||
|
|
coords = geocode_nominatim(geocode_query)
|
||
|
|
if coords:
|
||
|
|
lat, lon = coords
|
||
|
|
status = 'success'
|
||
|
|
else:
|
||
|
|
status = 'failed'
|
||
|
|
|
||
|
|
cursor = conn.execute("""
|
||
|
|
INSERT INTO orte (name, typ, lat, lon, rohtext, kontext_satz, geocode_status, vorlage_count)
|
||
|
|
VALUES (?, ?, ?, ?, ?, ?, ?, 1)
|
||
|
|
""", (geocode_query or rohtext, typ, lat, lon, rohtext, kontext[:500] if kontext else None, status))
|
||
|
|
|
||
|
|
ort_id = cursor.lastrowid
|
||
|
|
conn.execute("""
|
||
|
|
INSERT OR IGNORE INTO vorlagen_orte (vorlage_id, ort_id, kontext)
|
||
|
|
VALUES (?, ?, ?)
|
||
|
|
""", (vorlage_id, ort_id, kontext[:500] if kontext else None))
|
||
|
|
conn.commit()
|
||
|
|
|
||
|
|
return 1 if lat else 0
|
||
|
|
|
||
|
|
|
||
|
|
def main():
|
||
|
|
parser = argparse.ArgumentParser(description="KI-Zusammenfassung für Anträge")
|
||
|
|
parser.add_argument("--limit", type=int, default=5, help="Max. Anzahl (default: 5)")
|
||
|
|
parser.add_argument("--typ", type=str, default="", help="Vorlagen-Typ")
|
||
|
|
args = parser.parse_args()
|
||
|
|
|
||
|
|
print(f"=== KI-Zusammenfassung ===\n")
|
||
|
|
|
||
|
|
conn = get_db()
|
||
|
|
|
||
|
|
# Vorlagen mit Volltext aber ohne KI-Zusammenfassung
|
||
|
|
query = """
|
||
|
|
SELECT v.id, v.aktenzeichen, v.volltext_clean
|
||
|
|
FROM vorlagen v
|
||
|
|
LEFT JOIN ki_bewertungen kb ON v.id = kb.vorlage_id AND kb.typ = 'zusammenfassung'
|
||
|
|
WHERE v.volltext_clean IS NOT NULL
|
||
|
|
AND kb.id IS NULL
|
||
|
|
"""
|
||
|
|
if args.typ:
|
||
|
|
query += f" AND v.typ = '{args.typ}'"
|
||
|
|
query += f" ORDER BY v.datum_eingang DESC LIMIT {args.limit}"
|
||
|
|
|
||
|
|
vorlagen = conn.execute(query).fetchall()
|
||
|
|
print(f"Verarbeite {len(vorlagen)} Vorlagen\n")
|
||
|
|
|
||
|
|
success = 0
|
||
|
|
for v in vorlagen:
|
||
|
|
if process_vorlage(conn, dict(v)):
|
||
|
|
success += 1
|
||
|
|
|
||
|
|
conn.close()
|
||
|
|
print(f"\n=== Fertig: {success}/{len(vorlagen)} erfolgreich ===")
|
||
|
|
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
main()
|