Vollständige Pipeline zur Analyse kommunaler Vorlagen aus ALLRIS: - OParl-Import: 20.149 Vorlagen - PDF-Extraktion: 10.045 Volltexte (adaptives Throttling) - KI-Zusammenfassungen: 10.026 via Qwen Plus (parallelisiert) - Beratungsfolge-Scraper: Beschlusstexte + Wortprotokolle - Abstimmungs-Analyse mit Koalitionsmatrix - Georeferenzierung (Nominatim) Stack: FastAPI + SvelteKit + SQLite Deployment: Docker + Traefik auf VServer Daten (DB, Logs) nicht im Repo — siehe Restic-Backup. Repo-Setup: scripts/setup.sh für Neuaufbau aus OParl-API.
153 lines
5.4 KiB
Python
153 lines
5.4 KiB
Python
"""Ketten-Builder: groups Vorlagen into chains based on Aktenzeichen-Suffix references."""
|
|
|
|
import sqlite3
|
|
from collections import defaultdict
|
|
|
|
from tracker.core.status import compute_status
|
|
|
|
|
|
def build_suffix_references(conn: sqlite3.Connection) -> int:
|
|
"""Create referenzen entries for Aktenzeichen-Suffix relations.
|
|
|
|
E.g. 0362/2025-1 references 0362/2025 via suffix relation.
|
|
Returns the number of new references created.
|
|
"""
|
|
cursor = conn.execute("""
|
|
INSERT OR IGNORE INTO referenzen (quelle_id, ziel_id, typ, konfidenz)
|
|
SELECT child.id, parent.id, 'suffix', 1.0
|
|
FROM vorlagen child
|
|
JOIN vorlagen parent ON child.aktenzeichen_basis = parent.aktenzeichen_basis
|
|
WHERE child.aktenzeichen_suffix IS NOT NULL
|
|
AND parent.aktenzeichen_suffix IS NULL
|
|
AND child.id != parent.id
|
|
""")
|
|
# Also link sequential suffixes: -2 -> -1, -3 -> -2, etc.
|
|
conn.execute("""
|
|
INSERT OR IGNORE INTO referenzen (quelle_id, ziel_id, typ, konfidenz)
|
|
SELECT later.id, earlier.id, 'suffix', 1.0
|
|
FROM vorlagen later
|
|
JOIN vorlagen earlier
|
|
ON later.aktenzeichen_basis = earlier.aktenzeichen_basis
|
|
AND later.aktenzeichen_suffix IS NOT NULL
|
|
AND earlier.aktenzeichen_suffix IS NOT NULL
|
|
AND CAST(REPLACE(later.aktenzeichen_suffix, '-', '') AS INTEGER)
|
|
= CAST(REPLACE(earlier.aktenzeichen_suffix, '-', '') AS INTEGER) + 1
|
|
WHERE later.id != earlier.id
|
|
""")
|
|
conn.commit()
|
|
return cursor.rowcount
|
|
|
|
|
|
def build_chains(conn: sqlite3.Connection) -> int:
|
|
"""Build ketten from Vorlagen that share the same aktenzeichen_basis.
|
|
|
|
A chain's Ursprung is the Vorlage without suffix (the original).
|
|
Chain members are ordered by suffix number.
|
|
Returns the number of chains created/updated.
|
|
"""
|
|
# Find all aktenzeichen_basis values that have at least one entry
|
|
# and where the base vorlage is an antrag, anfrage, or stellungnahme
|
|
rows = conn.execute("""
|
|
SELECT aktenzeichen_basis, COUNT(*) as cnt
|
|
FROM vorlagen
|
|
WHERE aktenzeichen_basis IS NOT NULL
|
|
GROUP BY aktenzeichen_basis
|
|
HAVING cnt >= 1
|
|
""").fetchall()
|
|
|
|
count = 0
|
|
for row in rows:
|
|
basis = row["aktenzeichen_basis"]
|
|
|
|
# Get all Vorlagen in this chain, ordered by suffix
|
|
members = conn.execute("""
|
|
SELECT id, aktenzeichen, aktenzeichen_suffix, typ, datum_eingang, betreff
|
|
FROM vorlagen
|
|
WHERE aktenzeichen_basis = ?
|
|
ORDER BY
|
|
CASE WHEN aktenzeichen_suffix IS NULL THEN 0
|
|
ELSE CAST(REPLACE(aktenzeichen_suffix, '-', '') AS INTEGER)
|
|
END
|
|
""", (basis,)).fetchall()
|
|
|
|
if not members:
|
|
continue
|
|
|
|
ursprung = members[0]
|
|
|
|
# Only create chains for antrag/anfrage types (the base should be one)
|
|
chain_typ = ursprung["typ"]
|
|
if chain_typ not in ("antrag", "anfrage"):
|
|
continue
|
|
|
|
# Compute status
|
|
status_info = compute_status(conn, ursprung["id"], chain_typ, members)
|
|
|
|
# Determine letzte_aktivitaet
|
|
dates = [m["datum_eingang"] for m in members if m["datum_eingang"]]
|
|
letzte_aktivitaet = max(dates) if dates else ursprung["datum_eingang"]
|
|
|
|
# Check if chain already exists
|
|
existing = conn.execute(
|
|
"SELECT id FROM ketten WHERE ursprung_id = ?", (ursprung["id"],)
|
|
).fetchone()
|
|
|
|
if existing:
|
|
kette_id = existing["id"]
|
|
conn.execute("""
|
|
UPDATE ketten
|
|
SET typ = ?, thema = ?, status = ?, status_seit = ?,
|
|
letzte_aktivitaet = ?, vertagungen_count = ?
|
|
WHERE id = ?
|
|
""", (
|
|
chain_typ,
|
|
ursprung["betreff"],
|
|
status_info["status"],
|
|
status_info.get("status_seit"),
|
|
letzte_aktivitaet,
|
|
status_info.get("vertagungen_count", 0),
|
|
kette_id,
|
|
))
|
|
conn.execute("DELETE FROM ketten_glieder WHERE kette_id = ?", (kette_id,))
|
|
else:
|
|
cursor = conn.execute("""
|
|
INSERT INTO ketten (ursprung_id, typ, thema, status, status_seit,
|
|
letzte_aktivitaet, vertagungen_count)
|
|
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
""", (
|
|
ursprung["id"],
|
|
chain_typ,
|
|
ursprung["betreff"],
|
|
status_info["status"],
|
|
status_info.get("status_seit"),
|
|
letzte_aktivitaet,
|
|
status_info.get("vertagungen_count", 0),
|
|
))
|
|
kette_id = cursor.lastrowid
|
|
|
|
# Insert chain members
|
|
for pos, member in enumerate(members):
|
|
rolle = _determine_rolle(member, pos)
|
|
conn.execute("""
|
|
INSERT OR REPLACE INTO ketten_glieder (kette_id, vorlage_id, position, rolle)
|
|
VALUES (?, ?, ?, ?)
|
|
""", (kette_id, member["id"], pos, rolle))
|
|
|
|
count += 1
|
|
|
|
conn.commit()
|
|
return count
|
|
|
|
|
|
def _determine_rolle(member: sqlite3.Row, position: int) -> str:
|
|
if position == 0:
|
|
return "ursprung"
|
|
typ = member["typ"]
|
|
if typ == "stellungnahme":
|
|
return "stellungnahme"
|
|
if typ == "bericht":
|
|
return "bericht"
|
|
if typ in ("antrag", "anfrage"):
|
|
return "aenderung"
|
|
return "ergaenzung"
|