feat: Initial commit — Antragstracker Hagen
Vollständige Pipeline zur Analyse kommunaler Vorlagen aus ALLRIS: - OParl-Import: 20.149 Vorlagen - PDF-Extraktion: 10.045 Volltexte (adaptives Throttling) - KI-Zusammenfassungen: 10.026 via Qwen Plus (parallelisiert) - Beratungsfolge-Scraper: Beschlusstexte + Wortprotokolle - Abstimmungs-Analyse mit Koalitionsmatrix - Georeferenzierung (Nominatim) Stack: FastAPI + SvelteKit + SQLite Deployment: Docker + Traefik auf VServer Daten (DB, Logs) nicht im Repo — siehe Restic-Backup. Repo-Setup: scripts/setup.sh für Neuaufbau aus OParl-API.
This commit is contained in:
commit
17606ab237
28
.gitignore
vendored
Normal file
28
.gitignore
vendored
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
# Data (500MB+ DBs, Logs, State)
|
||||||
|
data/
|
||||||
|
antraege.db
|
||||||
|
|
||||||
|
# Python
|
||||||
|
.venv/
|
||||||
|
venv/
|
||||||
|
__pycache__/
|
||||||
|
*.pyc
|
||||||
|
*.egg-info/
|
||||||
|
|
||||||
|
# Node
|
||||||
|
node_modules/
|
||||||
|
frontend/.svelte-kit/
|
||||||
|
frontend/build/
|
||||||
|
|
||||||
|
# OS
|
||||||
|
.DS_Store
|
||||||
|
*.swp
|
||||||
|
Thumbs.db
|
||||||
|
|
||||||
|
# Logs & Runtime
|
||||||
|
*.log
|
||||||
|
nohup.out
|
||||||
|
extract.log
|
||||||
|
import_urls.log
|
||||||
|
system_metrics.log
|
||||||
|
antraege.db
|
||||||
41
Dockerfile
Normal file
41
Dockerfile
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
# Multi-stage build für Antragstracker Hagen
|
||||||
|
|
||||||
|
# Stage 1: Frontend Build
|
||||||
|
FROM node:20-alpine AS frontend-build
|
||||||
|
WORKDIR /app/frontend
|
||||||
|
COPY frontend/package*.json ./
|
||||||
|
RUN npm ci
|
||||||
|
COPY frontend/ ./
|
||||||
|
RUN npm run build
|
||||||
|
|
||||||
|
# Stage 2: Backend + Frontend
|
||||||
|
FROM python:3.12-slim
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# System dependencies
|
||||||
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||||
|
curl \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
# Python dependencies
|
||||||
|
COPY backend/requirements.txt ./
|
||||||
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
|
# Backend code
|
||||||
|
COPY backend/src/ ./src/
|
||||||
|
|
||||||
|
# Frontend static files (from build stage)
|
||||||
|
COPY --from=frontend-build /app/frontend/build ./static
|
||||||
|
|
||||||
|
# Data directory (will be mounted as volume)
|
||||||
|
RUN mkdir -p /app/data
|
||||||
|
|
||||||
|
# Environment
|
||||||
|
ENV PYTHONPATH=/app/src
|
||||||
|
ENV DATABASE_PATH=/app/data/tracker.db
|
||||||
|
|
||||||
|
EXPOSE 8000
|
||||||
|
|
||||||
|
# Run with uvicorn
|
||||||
|
CMD ["uvicorn", "tracker.main:app", "--host", "0.0.0.0", "--port", "8000"]
|
||||||
4
backend/requirements.txt
Normal file
4
backend/requirements.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
fastapi>=0.109.0
|
||||||
|
uvicorn[standard]>=0.27.0
|
||||||
|
pydantic>=2.5.0
|
||||||
|
httpx>=0.26.0
|
||||||
0
backend/src/tracker/__init__.py
Normal file
0
backend/src/tracker/__init__.py
Normal file
0
backend/src/tracker/api/__init__.py
Normal file
0
backend/src/tracker/api/__init__.py
Normal file
123
backend/src/tracker/api/models.py
Normal file
123
backend/src/tracker/api/models.py
Normal file
@ -0,0 +1,123 @@
|
|||||||
|
"""Pydantic response models for the API."""
|
||||||
|
|
||||||
|
from datetime import date, datetime
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
|
||||||
|
class ParteiOut(BaseModel):
|
||||||
|
id: int
|
||||||
|
kuerzel: str
|
||||||
|
name: str | None = None
|
||||||
|
farbe: str | None = None
|
||||||
|
|
||||||
|
|
||||||
|
class GremiumOut(BaseModel):
|
||||||
|
id: int
|
||||||
|
name: str
|
||||||
|
kuerzel: str | None = None
|
||||||
|
typ: str | None = None
|
||||||
|
|
||||||
|
|
||||||
|
class BeratungOut(BaseModel):
|
||||||
|
id: int
|
||||||
|
gremium: GremiumOut | None = None
|
||||||
|
sitzung_datum: date | None = None
|
||||||
|
rolle: str | None = None
|
||||||
|
ergebnis: str | None = None
|
||||||
|
ergebnis_text: str | None = None
|
||||||
|
|
||||||
|
|
||||||
|
class VorlageKurz(BaseModel):
|
||||||
|
id: int
|
||||||
|
aktenzeichen: str | None = None
|
||||||
|
typ: str | None = None
|
||||||
|
betreff: str | None = None
|
||||||
|
datum_eingang: date | None = None
|
||||||
|
ist_verwaltungsvorlage: bool = False
|
||||||
|
|
||||||
|
|
||||||
|
class ReferenzOut(BaseModel):
|
||||||
|
vorlage_id: int
|
||||||
|
aktenzeichen: str | None = None
|
||||||
|
betreff: str | None = None
|
||||||
|
vorlage_typ: str | None = None
|
||||||
|
datum_eingang: date | None = None
|
||||||
|
ref_typ: str | None = None
|
||||||
|
konfidenz: float | None = None
|
||||||
|
kontext: str | None = None
|
||||||
|
|
||||||
|
|
||||||
|
class KiZusammenfassung(BaseModel):
|
||||||
|
zusammenfassung: str | None = None
|
||||||
|
kernforderung: str | None = None
|
||||||
|
begruendung: str | None = None
|
||||||
|
betroffene_orte: list[str] = []
|
||||||
|
thema: str | None = None
|
||||||
|
partei: str | list[str] | None = None
|
||||||
|
|
||||||
|
|
||||||
|
class VorlageDetail(BaseModel):
|
||||||
|
id: int
|
||||||
|
aktenzeichen: str | None = None
|
||||||
|
aktenzeichen_basis: str | None = None
|
||||||
|
aktenzeichen_suffix: str | None = None
|
||||||
|
typ: str | None = None
|
||||||
|
betreff: str | None = None
|
||||||
|
volltext_clean: str | None = None
|
||||||
|
datum_eingang: date | None = None
|
||||||
|
pdf_url: str | None = None
|
||||||
|
web_url: str | None = None
|
||||||
|
ki_zusammenfassung: KiZusammenfassung | None = None
|
||||||
|
ist_verwaltungsvorlage: bool = False
|
||||||
|
thema_kurz: str | None = None
|
||||||
|
antragsteller: list[ParteiOut] = []
|
||||||
|
beratungen: list[BeratungOut] = []
|
||||||
|
referenzen_ausgehend: list[ReferenzOut] = []
|
||||||
|
referenzen_eingehend: list[ReferenzOut] = []
|
||||||
|
kette_id: int | None = None
|
||||||
|
|
||||||
|
|
||||||
|
class KettenGliedOut(BaseModel):
|
||||||
|
vorlage: VorlageKurz
|
||||||
|
position: int
|
||||||
|
rolle: str | None = None
|
||||||
|
|
||||||
|
|
||||||
|
class KetteKurz(BaseModel):
|
||||||
|
id: int
|
||||||
|
ursprung: VorlageKurz | None = None
|
||||||
|
typ: str | None = None
|
||||||
|
thema: str | None = None
|
||||||
|
status: str | None = None
|
||||||
|
status_seit: date | None = None
|
||||||
|
letzte_aktivitaet: date | None = None
|
||||||
|
vertagungen_count: int = 0
|
||||||
|
glieder_count: int = 0
|
||||||
|
|
||||||
|
|
||||||
|
class KetteDetail(BaseModel):
|
||||||
|
id: int
|
||||||
|
ursprung: VorlageKurz | None = None
|
||||||
|
typ: str | None = None
|
||||||
|
thema: str | None = None
|
||||||
|
status: str | None = None
|
||||||
|
status_seit: date | None = None
|
||||||
|
letzte_aktivitaet: date | None = None
|
||||||
|
vertagungen_count: int = 0
|
||||||
|
glieder: list[KettenGliedOut] = []
|
||||||
|
antragsteller: list[ParteiOut] = []
|
||||||
|
graph: dict | None = None
|
||||||
|
|
||||||
|
|
||||||
|
class PaginatedVorlagen(BaseModel):
|
||||||
|
items: list[VorlageKurz]
|
||||||
|
total: int
|
||||||
|
page: int
|
||||||
|
page_size: int
|
||||||
|
|
||||||
|
|
||||||
|
class PaginatedKetten(BaseModel):
|
||||||
|
items: list[KetteKurz]
|
||||||
|
total: int
|
||||||
|
page: int
|
||||||
|
page_size: int
|
||||||
0
backend/src/tracker/api/routes/__init__.py
Normal file
0
backend/src/tracker/api/routes/__init__.py
Normal file
188
backend/src/tracker/api/routes/abstimmungen.py
Normal file
188
backend/src/tracker/api/routes/abstimmungen.py
Normal file
@ -0,0 +1,188 @@
|
|||||||
|
"""API routes for Abstimmungen und Stimmverhalten-Analysen."""
|
||||||
|
|
||||||
|
from fastapi import APIRouter, Depends, Query
|
||||||
|
|
||||||
|
from tracker.db.session import get_connection
|
||||||
|
|
||||||
|
router = APIRouter(prefix="/abstimmungen", tags=["Abstimmungen"])
|
||||||
|
|
||||||
|
|
||||||
|
def _db():
|
||||||
|
conn = get_connection()
|
||||||
|
try:
|
||||||
|
yield conn
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/stats")
|
||||||
|
def get_abstimmungen_stats(conn=Depends(_db)):
|
||||||
|
"""Übersicht: Anzahl Abstimmungen, Ergebnisverteilung."""
|
||||||
|
total = conn.execute("SELECT COUNT(*) FROM abstimmungen").fetchone()[0]
|
||||||
|
|
||||||
|
by_ergebnis = conn.execute("""
|
||||||
|
SELECT ergebnis, COUNT(*) as anzahl
|
||||||
|
FROM abstimmungen
|
||||||
|
WHERE ergebnis IS NOT NULL
|
||||||
|
GROUP BY ergebnis
|
||||||
|
ORDER BY anzahl DESC
|
||||||
|
""").fetchall()
|
||||||
|
|
||||||
|
return {
|
||||||
|
"total": total,
|
||||||
|
"nach_ergebnis": [{"ergebnis": r[0], "anzahl": r[1]} for r in by_ergebnis]
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/fraktionen")
|
||||||
|
def get_fraktionen_uebersicht(conn=Depends(_db)):
|
||||||
|
"""Stimmverhalten aller Fraktionen aggregiert."""
|
||||||
|
rows = conn.execute("""
|
||||||
|
SELECT fraktion,
|
||||||
|
SUM(CASE WHEN stimme='ja' THEN 1 ELSE 0 END) as ja,
|
||||||
|
SUM(CASE WHEN stimme='nein' THEN 1 ELSE 0 END) as nein,
|
||||||
|
SUM(CASE WHEN stimme='enthaltung' THEN 1 ELSE 0 END) as enthaltung,
|
||||||
|
COUNT(*) as gesamt
|
||||||
|
FROM abstimmungen_fraktionen
|
||||||
|
GROUP BY fraktion
|
||||||
|
ORDER BY gesamt DESC
|
||||||
|
""").fetchall()
|
||||||
|
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
"fraktion": r[0],
|
||||||
|
"ja": r[1],
|
||||||
|
"nein": r[2],
|
||||||
|
"enthaltung": r[3],
|
||||||
|
"gesamt": r[4],
|
||||||
|
"ja_quote": round(r[1] / r[4] * 100, 1) if r[4] > 0 else 0
|
||||||
|
}
|
||||||
|
for r in rows
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/koalitionsmatrix")
|
||||||
|
def get_koalitionsmatrix(conn=Depends(_db)):
|
||||||
|
"""Matrix: Wie oft stimmen Fraktionen gleich ab?"""
|
||||||
|
# Alle Abstimmungen mit mindestens 2 Fraktionen
|
||||||
|
abstimmungen = conn.execute("""
|
||||||
|
SELECT abstimmung_id, fraktion, stimme
|
||||||
|
FROM abstimmungen_fraktionen
|
||||||
|
WHERE stimme IN ('ja', 'nein')
|
||||||
|
""").fetchall()
|
||||||
|
|
||||||
|
# Gruppieren nach Abstimmung
|
||||||
|
from collections import defaultdict
|
||||||
|
by_abstimmung = defaultdict(dict)
|
||||||
|
for aid, fraktion, stimme in abstimmungen:
|
||||||
|
by_abstimmung[aid][fraktion] = stimme
|
||||||
|
|
||||||
|
# Paarweise Übereinstimmung zählen
|
||||||
|
fraktionen = list(set(r[1] for r in abstimmungen))
|
||||||
|
matrix = {f1: {f2: {"gleich": 0, "gesamt": 0} for f2 in fraktionen} for f1 in fraktionen}
|
||||||
|
|
||||||
|
for aid, stimmen in by_abstimmung.items():
|
||||||
|
for f1 in stimmen:
|
||||||
|
for f2 in stimmen:
|
||||||
|
if f1 != f2:
|
||||||
|
matrix[f1][f2]["gesamt"] += 1
|
||||||
|
if stimmen[f1] == stimmen[f2]:
|
||||||
|
matrix[f1][f2]["gleich"] += 1
|
||||||
|
|
||||||
|
# Als Liste für Frontend
|
||||||
|
result = []
|
||||||
|
for f1 in sorted(fraktionen):
|
||||||
|
row = {"fraktion": f1, "uebereinstimmung": {}}
|
||||||
|
for f2 in sorted(fraktionen):
|
||||||
|
if f1 != f2 and matrix[f1][f2]["gesamt"] > 0:
|
||||||
|
quote = round(matrix[f1][f2]["gleich"] / matrix[f1][f2]["gesamt"] * 100, 1)
|
||||||
|
row["uebereinstimmung"][f2] = {
|
||||||
|
"quote": quote,
|
||||||
|
"gleich": matrix[f1][f2]["gleich"],
|
||||||
|
"gesamt": matrix[f1][f2]["gesamt"]
|
||||||
|
}
|
||||||
|
result.append(row)
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/ablehnungen")
|
||||||
|
def get_ablehnungsverhalten(conn=Depends(_db)):
|
||||||
|
"""Wer lehnt wessen Anträge ab?"""
|
||||||
|
# Anträge mit Antragsteller-Fraktion und Abstimmungsverhalten
|
||||||
|
rows = conn.execute("""
|
||||||
|
SELECT
|
||||||
|
kb.anmerkungen,
|
||||||
|
af.fraktion,
|
||||||
|
af.stimme,
|
||||||
|
a.ergebnis
|
||||||
|
FROM abstimmungen a
|
||||||
|
JOIN abstimmungen_fraktionen af ON a.id = af.abstimmung_id
|
||||||
|
LEFT JOIN ki_bewertungen kb ON a.vorlage_id = kb.vorlage_id AND kb.typ = 'zusammenfassung'
|
||||||
|
WHERE af.stimme IN ('ja', 'nein', 'enthaltung')
|
||||||
|
""").fetchall()
|
||||||
|
|
||||||
|
import json
|
||||||
|
from collections import defaultdict
|
||||||
|
|
||||||
|
# antragsteller -> abstimmende_fraktion -> stimme -> count
|
||||||
|
matrix = defaultdict(lambda: defaultdict(lambda: {"ja": 0, "nein": 0, "enthaltung": 0}))
|
||||||
|
|
||||||
|
for anmerkungen, fraktion, stimme, ergebnis in rows:
|
||||||
|
if not anmerkungen:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
data = json.loads(anmerkungen)
|
||||||
|
antragsteller = data.get("partei")
|
||||||
|
if isinstance(antragsteller, list):
|
||||||
|
antragsteller = antragsteller[0] if antragsteller else None
|
||||||
|
if antragsteller and antragsteller != fraktion:
|
||||||
|
matrix[antragsteller][fraktion][stimme] += 1
|
||||||
|
except:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Als Liste
|
||||||
|
result = []
|
||||||
|
for antragsteller, abstimmungen in sorted(matrix.items()):
|
||||||
|
result.append({
|
||||||
|
"antragsteller": antragsteller,
|
||||||
|
"abstimmungen": {
|
||||||
|
f: counts for f, counts in sorted(abstimmungen.items())
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/vorlage/{vorlage_id}")
|
||||||
|
def get_abstimmungen_fuer_vorlage(vorlage_id: int, conn=Depends(_db)):
|
||||||
|
"""Alle Abstimmungen zu einer Vorlage."""
|
||||||
|
abstimmungen = conn.execute("""
|
||||||
|
SELECT a.id, a.sitzung_datum, a.ergebnis, a.volltext, g.name as gremium
|
||||||
|
FROM abstimmungen a
|
||||||
|
LEFT JOIN gremien g ON a.gremium_id = g.id
|
||||||
|
WHERE a.vorlage_id = ?
|
||||||
|
ORDER BY a.sitzung_datum
|
||||||
|
""", (vorlage_id,)).fetchall()
|
||||||
|
|
||||||
|
result = []
|
||||||
|
for a in abstimmungen:
|
||||||
|
fraktionen = conn.execute("""
|
||||||
|
SELECT fraktion, stimme, anzahl, bemerkung
|
||||||
|
FROM abstimmungen_fraktionen
|
||||||
|
WHERE abstimmung_id = ?
|
||||||
|
""", (a[0],)).fetchall()
|
||||||
|
|
||||||
|
result.append({
|
||||||
|
"id": a[0],
|
||||||
|
"sitzung_datum": a[1],
|
||||||
|
"ergebnis": a[2],
|
||||||
|
"volltext": a[3],
|
||||||
|
"gremium": a[4],
|
||||||
|
"fraktionen": [
|
||||||
|
{"fraktion": f[0], "stimme": f[1], "anzahl": f[2], "bemerkung": f[3]}
|
||||||
|
for f in fraktionen
|
||||||
|
]
|
||||||
|
})
|
||||||
|
|
||||||
|
return result
|
||||||
176
backend/src/tracker/api/routes/ketten.py
Normal file
176
backend/src/tracker/api/routes/ketten.py
Normal file
@ -0,0 +1,176 @@
|
|||||||
|
"""API routes for Ketten (chains)."""
|
||||||
|
|
||||||
|
from fastapi import APIRouter, Depends, HTTPException, Query
|
||||||
|
|
||||||
|
from tracker.api.models import (
|
||||||
|
KetteDetail,
|
||||||
|
KetteKurz,
|
||||||
|
KettenGliedOut,
|
||||||
|
PaginatedKetten,
|
||||||
|
ParteiOut,
|
||||||
|
VorlageKurz,
|
||||||
|
)
|
||||||
|
from tracker.core.graph import get_kette_graph
|
||||||
|
from tracker.db.session import get_connection
|
||||||
|
|
||||||
|
router = APIRouter(prefix="/ketten", tags=["Ketten"])
|
||||||
|
|
||||||
|
|
||||||
|
def _db():
|
||||||
|
conn = get_connection()
|
||||||
|
try:
|
||||||
|
yield conn
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("", response_model=PaginatedKetten)
|
||||||
|
def list_ketten(
|
||||||
|
page: int = Query(1, ge=1),
|
||||||
|
page_size: int = Query(50, ge=1, le=200),
|
||||||
|
status: str | None = None,
|
||||||
|
typ: str | None = None,
|
||||||
|
suche: str | None = None,
|
||||||
|
conn=Depends(_db),
|
||||||
|
):
|
||||||
|
"""List Ketten with optional filters."""
|
||||||
|
where_clauses = []
|
||||||
|
params: list = []
|
||||||
|
|
||||||
|
if status:
|
||||||
|
where_clauses.append("k.status = ?")
|
||||||
|
params.append(status)
|
||||||
|
|
||||||
|
if typ:
|
||||||
|
where_clauses.append("k.typ = ?")
|
||||||
|
params.append(typ)
|
||||||
|
|
||||||
|
if suche:
|
||||||
|
where_clauses.append("k.thema LIKE ?")
|
||||||
|
params.append(f"%{suche}%")
|
||||||
|
|
||||||
|
where_sql = ("WHERE " + " AND ".join(where_clauses)) if where_clauses else ""
|
||||||
|
|
||||||
|
total = conn.execute(
|
||||||
|
f"SELECT COUNT(*) as cnt FROM ketten k {where_sql}", params
|
||||||
|
).fetchone()["cnt"]
|
||||||
|
|
||||||
|
offset = (page - 1) * page_size
|
||||||
|
rows = conn.execute(
|
||||||
|
f"""SELECT k.id, k.typ, k.thema, k.status, k.status_seit,
|
||||||
|
k.letzte_aktivitaet, k.vertagungen_count, k.ursprung_id,
|
||||||
|
v.aktenzeichen, v.typ as v_typ, v.betreff, v.datum_eingang,
|
||||||
|
v.ist_verwaltungsvorlage,
|
||||||
|
(SELECT COUNT(*) FROM ketten_glieder kg WHERE kg.kette_id = k.id) as glieder_count
|
||||||
|
FROM ketten k
|
||||||
|
LEFT JOIN vorlagen v ON k.ursprung_id = v.id
|
||||||
|
{where_sql}
|
||||||
|
ORDER BY k.letzte_aktivitaet DESC NULLS LAST, k.id DESC
|
||||||
|
LIMIT ? OFFSET ?""",
|
||||||
|
params + [page_size, offset],
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
items = [
|
||||||
|
KetteKurz(
|
||||||
|
id=r["id"],
|
||||||
|
ursprung=VorlageKurz(
|
||||||
|
id=r["ursprung_id"],
|
||||||
|
aktenzeichen=r["aktenzeichen"],
|
||||||
|
typ=r["v_typ"],
|
||||||
|
betreff=r["betreff"],
|
||||||
|
datum_eingang=r["datum_eingang"],
|
||||||
|
ist_verwaltungsvorlage=bool(r["ist_verwaltungsvorlage"]),
|
||||||
|
) if r["ursprung_id"] else None,
|
||||||
|
typ=r["typ"],
|
||||||
|
thema=r["thema"],
|
||||||
|
status=r["status"],
|
||||||
|
status_seit=r["status_seit"],
|
||||||
|
letzte_aktivitaet=r["letzte_aktivitaet"],
|
||||||
|
vertagungen_count=r["vertagungen_count"],
|
||||||
|
glieder_count=r["glieder_count"],
|
||||||
|
)
|
||||||
|
for r in rows
|
||||||
|
]
|
||||||
|
|
||||||
|
return PaginatedKetten(items=items, total=total, page=page, page_size=page_size)
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/{kette_id}", response_model=KetteDetail)
|
||||||
|
def get_kette(kette_id: int, conn=Depends(_db)):
|
||||||
|
"""Get a single Kette with all Glieder."""
|
||||||
|
row = conn.execute(
|
||||||
|
"""SELECT k.id, k.typ, k.thema, k.status, k.status_seit,
|
||||||
|
k.letzte_aktivitaet, k.vertagungen_count, k.ursprung_id,
|
||||||
|
v.aktenzeichen, v.typ as v_typ, v.betreff, v.datum_eingang,
|
||||||
|
v.ist_verwaltungsvorlage
|
||||||
|
FROM ketten k
|
||||||
|
LEFT JOIN vorlagen v ON k.ursprung_id = v.id
|
||||||
|
WHERE k.id = ?""",
|
||||||
|
(kette_id,),
|
||||||
|
).fetchone()
|
||||||
|
|
||||||
|
if not row:
|
||||||
|
raise HTTPException(status_code=404, detail="Kette nicht gefunden")
|
||||||
|
|
||||||
|
# Get Glieder
|
||||||
|
glieder_rows = conn.execute(
|
||||||
|
"""SELECT kg.position, kg.rolle,
|
||||||
|
v.id, v.aktenzeichen, v.typ, v.betreff, v.datum_eingang,
|
||||||
|
v.ist_verwaltungsvorlage
|
||||||
|
FROM ketten_glieder kg
|
||||||
|
JOIN vorlagen v ON kg.vorlage_id = v.id
|
||||||
|
WHERE kg.kette_id = ?
|
||||||
|
ORDER BY kg.position""",
|
||||||
|
(kette_id,),
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
glieder = [
|
||||||
|
KettenGliedOut(
|
||||||
|
vorlage=VorlageKurz(
|
||||||
|
id=g["id"],
|
||||||
|
aktenzeichen=g["aktenzeichen"],
|
||||||
|
typ=g["typ"],
|
||||||
|
betreff=g["betreff"],
|
||||||
|
datum_eingang=g["datum_eingang"],
|
||||||
|
ist_verwaltungsvorlage=bool(g["ist_verwaltungsvorlage"]),
|
||||||
|
),
|
||||||
|
position=g["position"],
|
||||||
|
rolle=g["rolle"],
|
||||||
|
)
|
||||||
|
for g in glieder_rows
|
||||||
|
]
|
||||||
|
|
||||||
|
# Antragsteller des Ursprungs
|
||||||
|
antragsteller = []
|
||||||
|
if row["ursprung_id"]:
|
||||||
|
antragsteller_rows = conn.execute("""
|
||||||
|
SELECT p.id, p.kuerzel, p.name, p.farbe
|
||||||
|
FROM antragsteller a
|
||||||
|
JOIN parteien p ON a.partei_id = p.id
|
||||||
|
WHERE a.vorlage_id = ?
|
||||||
|
""", (row["ursprung_id"],)).fetchall()
|
||||||
|
antragsteller = [ParteiOut(**dict(a)) for a in antragsteller_rows]
|
||||||
|
|
||||||
|
# Graph/Perlenschnur data
|
||||||
|
graph = get_kette_graph(conn, kette_id)
|
||||||
|
|
||||||
|
return KetteDetail(
|
||||||
|
id=row["id"],
|
||||||
|
ursprung=VorlageKurz(
|
||||||
|
id=row["ursprung_id"],
|
||||||
|
aktenzeichen=row["aktenzeichen"],
|
||||||
|
typ=row["v_typ"],
|
||||||
|
betreff=row["betreff"],
|
||||||
|
datum_eingang=row["datum_eingang"],
|
||||||
|
ist_verwaltungsvorlage=bool(row["ist_verwaltungsvorlage"]),
|
||||||
|
) if row["ursprung_id"] else None,
|
||||||
|
typ=row["typ"],
|
||||||
|
thema=row["thema"],
|
||||||
|
status=row["status"],
|
||||||
|
status_seit=row["status_seit"],
|
||||||
|
letzte_aktivitaet=row["letzte_aktivitaet"],
|
||||||
|
vertagungen_count=row["vertagungen_count"],
|
||||||
|
glieder=glieder,
|
||||||
|
antragsteller=antragsteller,
|
||||||
|
graph=graph,
|
||||||
|
)
|
||||||
98
backend/src/tracker/api/routes/orte.py
Normal file
98
backend/src/tracker/api/routes/orte.py
Normal file
@ -0,0 +1,98 @@
|
|||||||
|
"""API routes for Orte und Karten-Daten."""
|
||||||
|
|
||||||
|
from fastapi import APIRouter, Depends
|
||||||
|
|
||||||
|
from tracker.db.session import get_connection
|
||||||
|
|
||||||
|
router = APIRouter(prefix="/orte", tags=["Orte"])
|
||||||
|
|
||||||
|
|
||||||
|
def _db():
|
||||||
|
conn = get_connection()
|
||||||
|
try:
|
||||||
|
yield conn
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("")
|
||||||
|
def list_orte(conn=Depends(_db)):
|
||||||
|
"""Alle geocodierten Orte mit Vorlagen-Anzahl."""
|
||||||
|
rows = conn.execute("""
|
||||||
|
SELECT o.id, o.name, o.typ, o.lat, o.lon, o.vorlage_count
|
||||||
|
FROM orte o
|
||||||
|
WHERE o.lat IS NOT NULL
|
||||||
|
ORDER BY o.vorlage_count DESC
|
||||||
|
""").fetchall()
|
||||||
|
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
"id": r[0],
|
||||||
|
"name": r[1],
|
||||||
|
"typ": r[2],
|
||||||
|
"lat": r[3],
|
||||||
|
"lon": r[4],
|
||||||
|
"vorlage_count": r[5]
|
||||||
|
}
|
||||||
|
for r in rows
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/geojson")
|
||||||
|
def get_orte_geojson(conn=Depends(_db)):
|
||||||
|
"""Orte als GeoJSON für Leaflet."""
|
||||||
|
rows = conn.execute("""
|
||||||
|
SELECT o.id, o.name, o.lat, o.lon, o.vorlage_count,
|
||||||
|
GROUP_CONCAT(v.aktenzeichen, ', ') as vorlagen
|
||||||
|
FROM orte o
|
||||||
|
LEFT JOIN vorlagen_orte vo ON o.id = vo.ort_id
|
||||||
|
LEFT JOIN vorlagen v ON vo.vorlage_id = v.id
|
||||||
|
WHERE o.lat IS NOT NULL
|
||||||
|
GROUP BY o.id
|
||||||
|
ORDER BY o.vorlage_count DESC
|
||||||
|
""").fetchall()
|
||||||
|
|
||||||
|
features = []
|
||||||
|
for r in rows:
|
||||||
|
features.append({
|
||||||
|
"type": "Feature",
|
||||||
|
"geometry": {
|
||||||
|
"type": "Point",
|
||||||
|
"coordinates": [r[3], r[2]] # lon, lat
|
||||||
|
},
|
||||||
|
"properties": {
|
||||||
|
"id": r[0],
|
||||||
|
"name": r[1],
|
||||||
|
"vorlage_count": r[4],
|
||||||
|
"vorlagen": r[5]
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
"type": "FeatureCollection",
|
||||||
|
"features": features
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/{ort_id}/vorlagen")
|
||||||
|
def get_vorlagen_fuer_ort(ort_id: int, conn=Depends(_db)):
|
||||||
|
"""Alle Vorlagen die einen bestimmten Ort betreffen."""
|
||||||
|
rows = conn.execute("""
|
||||||
|
SELECT v.id, v.aktenzeichen, v.typ, v.betreff, v.datum_eingang, vo.kontext
|
||||||
|
FROM vorlagen_orte vo
|
||||||
|
JOIN vorlagen v ON vo.vorlage_id = v.id
|
||||||
|
WHERE vo.ort_id = ?
|
||||||
|
ORDER BY v.datum_eingang DESC
|
||||||
|
""", (ort_id,)).fetchall()
|
||||||
|
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
"id": r[0],
|
||||||
|
"aktenzeichen": r[1],
|
||||||
|
"typ": r[2],
|
||||||
|
"betreff": r[3],
|
||||||
|
"datum_eingang": r[4],
|
||||||
|
"kontext": r[5]
|
||||||
|
}
|
||||||
|
for r in rows
|
||||||
|
]
|
||||||
162
backend/src/tracker/api/routes/stats.py
Normal file
162
backend/src/tracker/api/routes/stats.py
Normal file
@ -0,0 +1,162 @@
|
|||||||
|
"""API routes for Dashboard statistics."""
|
||||||
|
|
||||||
|
from fastapi import APIRouter, Depends
|
||||||
|
|
||||||
|
from tracker.db.session import get_connection
|
||||||
|
|
||||||
|
router = APIRouter(prefix="/stats", tags=["Statistics"])
|
||||||
|
|
||||||
|
|
||||||
|
def _db():
|
||||||
|
conn = get_connection()
|
||||||
|
try:
|
||||||
|
yield conn
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("")
|
||||||
|
def get_stats(conn=Depends(_db)):
|
||||||
|
"""Dashboard-level KPI statistics."""
|
||||||
|
vorlagen_total = conn.execute("SELECT COUNT(*) as c FROM vorlagen").fetchone()["c"]
|
||||||
|
beratungen_total = conn.execute("SELECT COUNT(*) as c FROM beratungen").fetchone()["c"]
|
||||||
|
ketten_total = conn.execute("SELECT COUNT(*) as c FROM ketten").fetchone()["c"]
|
||||||
|
|
||||||
|
# Vorlagen by type
|
||||||
|
typ_rows = conn.execute("""
|
||||||
|
SELECT typ, COUNT(*) as c FROM vorlagen
|
||||||
|
WHERE typ IS NOT NULL
|
||||||
|
GROUP BY typ ORDER BY c DESC
|
||||||
|
""").fetchall()
|
||||||
|
|
||||||
|
# Ketten by status
|
||||||
|
status_rows = conn.execute("""
|
||||||
|
SELECT status, COUNT(*) as c FROM ketten
|
||||||
|
WHERE status IS NOT NULL
|
||||||
|
GROUP BY status ORDER BY c DESC
|
||||||
|
""").fetchall()
|
||||||
|
|
||||||
|
# Ketten by type
|
||||||
|
ketten_typ_rows = conn.execute("""
|
||||||
|
SELECT typ, COUNT(*) as c FROM ketten
|
||||||
|
WHERE typ IS NOT NULL
|
||||||
|
GROUP BY typ ORDER BY c DESC
|
||||||
|
""").fetchall()
|
||||||
|
|
||||||
|
# Recent activity (last 10 Vorlagen)
|
||||||
|
recent = conn.execute("""
|
||||||
|
SELECT id, aktenzeichen, betreff, typ, datum_eingang
|
||||||
|
FROM vorlagen
|
||||||
|
WHERE datum_eingang IS NOT NULL
|
||||||
|
ORDER BY datum_eingang DESC
|
||||||
|
LIMIT 10
|
||||||
|
""").fetchall()
|
||||||
|
|
||||||
|
# Parteien with antrag counts
|
||||||
|
parteien = conn.execute("""
|
||||||
|
SELECT p.kuerzel, p.name, p.farbe, COUNT(a.vorlage_id) as anzahl
|
||||||
|
FROM parteien p
|
||||||
|
LEFT JOIN antragsteller a ON p.id = a.partei_id
|
||||||
|
GROUP BY p.id
|
||||||
|
ORDER BY anzahl DESC
|
||||||
|
""").fetchall()
|
||||||
|
|
||||||
|
# Gremien with beratung counts
|
||||||
|
gremien = conn.execute("""
|
||||||
|
SELECT g.name, g.kuerzel, g.typ, COUNT(b.id) as anzahl
|
||||||
|
FROM gremien g
|
||||||
|
LEFT JOIN beratungen b ON g.id = b.gremium_id
|
||||||
|
GROUP BY g.id
|
||||||
|
ORDER BY anzahl DESC
|
||||||
|
LIMIT 15
|
||||||
|
""").fetchall()
|
||||||
|
|
||||||
|
# Zeitliche Verteilung (Vorlagen pro Monat, letzte 24 Monate)
|
||||||
|
timeline = conn.execute("""
|
||||||
|
SELECT strftime('%Y-%m', datum_eingang) as monat, COUNT(*) as c
|
||||||
|
FROM vorlagen
|
||||||
|
WHERE datum_eingang IS NOT NULL
|
||||||
|
GROUP BY monat
|
||||||
|
ORDER BY monat DESC
|
||||||
|
LIMIT 24
|
||||||
|
""").fetchall()
|
||||||
|
|
||||||
|
return {
|
||||||
|
"vorlagen_total": vorlagen_total,
|
||||||
|
"beratungen_total": beratungen_total,
|
||||||
|
"ketten_total": ketten_total,
|
||||||
|
"vorlagen_nach_typ": [{"typ": r["typ"], "anzahl": r["c"]} for r in typ_rows],
|
||||||
|
"ketten_nach_status": [{"status": r["status"], "anzahl": r["c"]} for r in status_rows],
|
||||||
|
"ketten_nach_typ": [{"typ": r["typ"], "anzahl": r["c"]} for r in ketten_typ_rows],
|
||||||
|
"letzte_vorlagen": [
|
||||||
|
{
|
||||||
|
"id": r["id"],
|
||||||
|
"aktenzeichen": r["aktenzeichen"],
|
||||||
|
"betreff": r["betreff"],
|
||||||
|
"typ": r["typ"],
|
||||||
|
"datum_eingang": r["datum_eingang"],
|
||||||
|
}
|
||||||
|
for r in recent
|
||||||
|
],
|
||||||
|
"parteien": [
|
||||||
|
{"kuerzel": r["kuerzel"], "name": r["name"], "farbe": r["farbe"], "anzahl": r["anzahl"]}
|
||||||
|
for r in parteien
|
||||||
|
],
|
||||||
|
"gremien": [
|
||||||
|
{"name": r["name"], "kuerzel": r["kuerzel"], "typ": r["typ"], "anzahl": r["anzahl"]}
|
||||||
|
for r in gremien
|
||||||
|
],
|
||||||
|
"timeline": [{"monat": r["monat"], "anzahl": r["c"]} for r in reversed(timeline)],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/ketten-stats")
|
||||||
|
def get_ketten_stats(conn=Depends(_db)):
|
||||||
|
"""Aggregated Ketten status distribution with breakdowns."""
|
||||||
|
# Status by type
|
||||||
|
rows = conn.execute("""
|
||||||
|
SELECT k.typ, k.status, COUNT(*) as c
|
||||||
|
FROM ketten k
|
||||||
|
GROUP BY k.typ, k.status
|
||||||
|
ORDER BY k.typ, c DESC
|
||||||
|
""").fetchall()
|
||||||
|
|
||||||
|
by_type: dict = {}
|
||||||
|
for r in rows:
|
||||||
|
typ = r["typ"] or "unbekannt"
|
||||||
|
if typ not in by_type:
|
||||||
|
by_type[typ] = []
|
||||||
|
by_type[typ].append({"status": r["status"], "anzahl": r["c"]})
|
||||||
|
|
||||||
|
# Status counts with average age
|
||||||
|
status_detail = conn.execute("""
|
||||||
|
SELECT status,
|
||||||
|
COUNT(*) as anzahl,
|
||||||
|
AVG(julianday('now') - julianday(status_seit)) as avg_tage,
|
||||||
|
AVG(vertagungen_count) as avg_vertagungen
|
||||||
|
FROM ketten
|
||||||
|
WHERE status IS NOT NULL
|
||||||
|
GROUP BY status
|
||||||
|
ORDER BY anzahl DESC
|
||||||
|
""").fetchall()
|
||||||
|
|
||||||
|
# Versandungs-Fruhwarnung: beschlossen > 6 Monate ohne Bericht
|
||||||
|
fruehwarnung = conn.execute("""
|
||||||
|
SELECT COUNT(*) as c FROM ketten
|
||||||
|
WHERE status = 'beschlossen'
|
||||||
|
AND julianday('now') - julianday(status_seit) > 180
|
||||||
|
""").fetchone()["c"]
|
||||||
|
|
||||||
|
return {
|
||||||
|
"nach_typ": by_type,
|
||||||
|
"status_detail": [
|
||||||
|
{
|
||||||
|
"status": r["status"],
|
||||||
|
"anzahl": r["anzahl"],
|
||||||
|
"avg_tage": round(r["avg_tage"] or 0, 1),
|
||||||
|
"avg_vertagungen": round(r["avg_vertagungen"] or 0, 1),
|
||||||
|
}
|
||||||
|
for r in status_detail
|
||||||
|
],
|
||||||
|
"versandungs_fruehwarnung": fruehwarnung,
|
||||||
|
}
|
||||||
171
backend/src/tracker/api/routes/vorlagen.py
Normal file
171
backend/src/tracker/api/routes/vorlagen.py
Normal file
@ -0,0 +1,171 @@
|
|||||||
|
"""API routes for Vorlagen."""
|
||||||
|
|
||||||
|
from fastapi import APIRouter, Depends, HTTPException, Query
|
||||||
|
|
||||||
|
import json
|
||||||
|
|
||||||
|
from tracker.api.models import (
|
||||||
|
BeratungOut,
|
||||||
|
GremiumOut,
|
||||||
|
KiZusammenfassung,
|
||||||
|
PaginatedVorlagen,
|
||||||
|
ParteiOut,
|
||||||
|
ReferenzOut,
|
||||||
|
VorlageDetail,
|
||||||
|
VorlageKurz,
|
||||||
|
)
|
||||||
|
from tracker.core.graph import get_references_for_vorlage
|
||||||
|
from tracker.db.session import get_connection
|
||||||
|
|
||||||
|
router = APIRouter(prefix="/vorlagen", tags=["Vorlagen"])
|
||||||
|
|
||||||
|
|
||||||
|
def _db():
|
||||||
|
conn = get_connection()
|
||||||
|
try:
|
||||||
|
yield conn
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("", response_model=PaginatedVorlagen)
|
||||||
|
def list_vorlagen(
|
||||||
|
page: int = Query(1, ge=1),
|
||||||
|
page_size: int = Query(50, ge=1, le=200),
|
||||||
|
typ: str | None = None,
|
||||||
|
suche: str | None = None,
|
||||||
|
conn=Depends(_db),
|
||||||
|
):
|
||||||
|
"""List Vorlagen with optional filters."""
|
||||||
|
where_clauses = []
|
||||||
|
params: list = []
|
||||||
|
|
||||||
|
if typ:
|
||||||
|
where_clauses.append("v.typ = ?")
|
||||||
|
params.append(typ)
|
||||||
|
|
||||||
|
if suche:
|
||||||
|
where_clauses.append("(v.betreff LIKE ? OR v.aktenzeichen LIKE ?)")
|
||||||
|
params.extend([f"%{suche}%", f"%{suche}%"])
|
||||||
|
|
||||||
|
where_sql = ("WHERE " + " AND ".join(where_clauses)) if where_clauses else ""
|
||||||
|
|
||||||
|
total = conn.execute(
|
||||||
|
f"SELECT COUNT(*) as cnt FROM vorlagen v {where_sql}", params
|
||||||
|
).fetchone()["cnt"]
|
||||||
|
|
||||||
|
offset = (page - 1) * page_size
|
||||||
|
rows = conn.execute(
|
||||||
|
f"""SELECT v.id, v.aktenzeichen, v.typ, v.betreff, v.datum_eingang,
|
||||||
|
v.ist_verwaltungsvorlage
|
||||||
|
FROM vorlagen v {where_sql}
|
||||||
|
ORDER BY v.datum_eingang DESC NULLS LAST, v.id DESC
|
||||||
|
LIMIT ? OFFSET ?""",
|
||||||
|
params + [page_size, offset],
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
items = [
|
||||||
|
VorlageKurz(
|
||||||
|
id=r["id"],
|
||||||
|
aktenzeichen=r["aktenzeichen"],
|
||||||
|
typ=r["typ"],
|
||||||
|
betreff=r["betreff"],
|
||||||
|
datum_eingang=r["datum_eingang"],
|
||||||
|
ist_verwaltungsvorlage=bool(r["ist_verwaltungsvorlage"]),
|
||||||
|
)
|
||||||
|
for r in rows
|
||||||
|
]
|
||||||
|
|
||||||
|
return PaginatedVorlagen(items=items, total=total, page=page, page_size=page_size)
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/{vorlage_id}", response_model=VorlageDetail)
|
||||||
|
def get_vorlage(vorlage_id: int, conn=Depends(_db)):
|
||||||
|
"""Get a single Vorlage with all details."""
|
||||||
|
row = conn.execute(
|
||||||
|
"""SELECT id, aktenzeichen, aktenzeichen_basis, aktenzeichen_suffix,
|
||||||
|
typ, betreff, volltext_clean, datum_eingang, pdf_url, web_url,
|
||||||
|
ist_verwaltungsvorlage, thema_kurz
|
||||||
|
FROM vorlagen WHERE id = ?""",
|
||||||
|
(vorlage_id,),
|
||||||
|
).fetchone()
|
||||||
|
|
||||||
|
if not row:
|
||||||
|
raise HTTPException(status_code=404, detail="Vorlage nicht gefunden")
|
||||||
|
|
||||||
|
# Antragsteller
|
||||||
|
antragsteller = conn.execute(
|
||||||
|
"""SELECT p.id, p.kuerzel, p.name, p.farbe
|
||||||
|
FROM antragsteller a
|
||||||
|
JOIN parteien p ON a.partei_id = p.id
|
||||||
|
WHERE a.vorlage_id = ?""",
|
||||||
|
(vorlage_id,),
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
# Beratungen
|
||||||
|
beratungen_rows = conn.execute(
|
||||||
|
"""SELECT b.id, b.sitzung_datum, b.rolle, b.ergebnis, b.ergebnis_text,
|
||||||
|
g.id as g_id, g.name as g_name, g.kuerzel as g_kuerzel, g.typ as g_typ
|
||||||
|
FROM beratungen b
|
||||||
|
LEFT JOIN gremien g ON b.gremium_id = g.id
|
||||||
|
WHERE b.vorlage_id = ?
|
||||||
|
ORDER BY b.sitzung_datum DESC NULLS LAST""",
|
||||||
|
(vorlage_id,),
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
beratungen = [
|
||||||
|
BeratungOut(
|
||||||
|
id=b["id"],
|
||||||
|
gremium=GremiumOut(id=b["g_id"], name=b["g_name"], kuerzel=b["g_kuerzel"], typ=b["g_typ"])
|
||||||
|
if b["g_id"]
|
||||||
|
else None,
|
||||||
|
sitzung_datum=b["sitzung_datum"],
|
||||||
|
rolle=b["rolle"],
|
||||||
|
ergebnis=b["ergebnis"],
|
||||||
|
ergebnis_text=b["ergebnis_text"],
|
||||||
|
)
|
||||||
|
for b in beratungen_rows
|
||||||
|
]
|
||||||
|
|
||||||
|
# Referenzen
|
||||||
|
refs = get_references_for_vorlage(conn, vorlage_id)
|
||||||
|
|
||||||
|
# Kette-Zugehörigkeit
|
||||||
|
kette_row = conn.execute(
|
||||||
|
"SELECT kette_id FROM ketten_glieder WHERE vorlage_id = ? LIMIT 1",
|
||||||
|
(vorlage_id,),
|
||||||
|
).fetchone()
|
||||||
|
|
||||||
|
# KI-Zusammenfassung
|
||||||
|
ki_row = conn.execute(
|
||||||
|
"SELECT anmerkungen FROM ki_bewertungen WHERE vorlage_id = ? AND typ = 'zusammenfassung' LIMIT 1",
|
||||||
|
(vorlage_id,),
|
||||||
|
).fetchone()
|
||||||
|
ki_zusammenfassung = None
|
||||||
|
if ki_row and ki_row["anmerkungen"]:
|
||||||
|
try:
|
||||||
|
ki_data = json.loads(ki_row["anmerkungen"])
|
||||||
|
ki_zusammenfassung = KiZusammenfassung(**ki_data)
|
||||||
|
except (json.JSONDecodeError, TypeError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
return VorlageDetail(
|
||||||
|
id=row["id"],
|
||||||
|
aktenzeichen=row["aktenzeichen"],
|
||||||
|
aktenzeichen_basis=row["aktenzeichen_basis"],
|
||||||
|
aktenzeichen_suffix=row["aktenzeichen_suffix"],
|
||||||
|
typ=row["typ"],
|
||||||
|
betreff=row["betreff"],
|
||||||
|
volltext_clean=row["volltext_clean"],
|
||||||
|
datum_eingang=row["datum_eingang"],
|
||||||
|
pdf_url=row["pdf_url"],
|
||||||
|
web_url=row["web_url"],
|
||||||
|
ist_verwaltungsvorlage=bool(row["ist_verwaltungsvorlage"]),
|
||||||
|
thema_kurz=row["thema_kurz"],
|
||||||
|
antragsteller=[ParteiOut(**dict(a)) for a in antragsteller],
|
||||||
|
beratungen=beratungen,
|
||||||
|
referenzen_ausgehend=[ReferenzOut(**r) for r in refs["ausgehend"]],
|
||||||
|
referenzen_eingehend=[ReferenzOut(**r) for r in refs["eingehend"]],
|
||||||
|
kette_id=kette_row["kette_id"] if kette_row else None,
|
||||||
|
ki_zusammenfassung=ki_zusammenfassung,
|
||||||
|
)
|
||||||
0
backend/src/tracker/core/__init__.py
Normal file
0
backend/src/tracker/core/__init__.py
Normal file
152
backend/src/tracker/core/chains.py
Normal file
152
backend/src/tracker/core/chains.py
Normal file
@ -0,0 +1,152 @@
|
|||||||
|
"""Ketten-Builder: groups Vorlagen into chains based on Aktenzeichen-Suffix references."""
|
||||||
|
|
||||||
|
import sqlite3
|
||||||
|
from collections import defaultdict
|
||||||
|
|
||||||
|
from tracker.core.status import compute_status
|
||||||
|
|
||||||
|
|
||||||
|
def build_suffix_references(conn: sqlite3.Connection) -> int:
|
||||||
|
"""Create referenzen entries for Aktenzeichen-Suffix relations.
|
||||||
|
|
||||||
|
E.g. 0362/2025-1 references 0362/2025 via suffix relation.
|
||||||
|
Returns the number of new references created.
|
||||||
|
"""
|
||||||
|
cursor = conn.execute("""
|
||||||
|
INSERT OR IGNORE INTO referenzen (quelle_id, ziel_id, typ, konfidenz)
|
||||||
|
SELECT child.id, parent.id, 'suffix', 1.0
|
||||||
|
FROM vorlagen child
|
||||||
|
JOIN vorlagen parent ON child.aktenzeichen_basis = parent.aktenzeichen_basis
|
||||||
|
WHERE child.aktenzeichen_suffix IS NOT NULL
|
||||||
|
AND parent.aktenzeichen_suffix IS NULL
|
||||||
|
AND child.id != parent.id
|
||||||
|
""")
|
||||||
|
# Also link sequential suffixes: -2 -> -1, -3 -> -2, etc.
|
||||||
|
conn.execute("""
|
||||||
|
INSERT OR IGNORE INTO referenzen (quelle_id, ziel_id, typ, konfidenz)
|
||||||
|
SELECT later.id, earlier.id, 'suffix', 1.0
|
||||||
|
FROM vorlagen later
|
||||||
|
JOIN vorlagen earlier
|
||||||
|
ON later.aktenzeichen_basis = earlier.aktenzeichen_basis
|
||||||
|
AND later.aktenzeichen_suffix IS NOT NULL
|
||||||
|
AND earlier.aktenzeichen_suffix IS NOT NULL
|
||||||
|
AND CAST(REPLACE(later.aktenzeichen_suffix, '-', '') AS INTEGER)
|
||||||
|
= CAST(REPLACE(earlier.aktenzeichen_suffix, '-', '') AS INTEGER) + 1
|
||||||
|
WHERE later.id != earlier.id
|
||||||
|
""")
|
||||||
|
conn.commit()
|
||||||
|
return cursor.rowcount
|
||||||
|
|
||||||
|
|
||||||
|
def build_chains(conn: sqlite3.Connection) -> int:
|
||||||
|
"""Build ketten from Vorlagen that share the same aktenzeichen_basis.
|
||||||
|
|
||||||
|
A chain's Ursprung is the Vorlage without suffix (the original).
|
||||||
|
Chain members are ordered by suffix number.
|
||||||
|
Returns the number of chains created/updated.
|
||||||
|
"""
|
||||||
|
# Find all aktenzeichen_basis values that have at least one entry
|
||||||
|
# and where the base vorlage is an antrag, anfrage, or stellungnahme
|
||||||
|
rows = conn.execute("""
|
||||||
|
SELECT aktenzeichen_basis, COUNT(*) as cnt
|
||||||
|
FROM vorlagen
|
||||||
|
WHERE aktenzeichen_basis IS NOT NULL
|
||||||
|
GROUP BY aktenzeichen_basis
|
||||||
|
HAVING cnt >= 1
|
||||||
|
""").fetchall()
|
||||||
|
|
||||||
|
count = 0
|
||||||
|
for row in rows:
|
||||||
|
basis = row["aktenzeichen_basis"]
|
||||||
|
|
||||||
|
# Get all Vorlagen in this chain, ordered by suffix
|
||||||
|
members = conn.execute("""
|
||||||
|
SELECT id, aktenzeichen, aktenzeichen_suffix, typ, datum_eingang, betreff
|
||||||
|
FROM vorlagen
|
||||||
|
WHERE aktenzeichen_basis = ?
|
||||||
|
ORDER BY
|
||||||
|
CASE WHEN aktenzeichen_suffix IS NULL THEN 0
|
||||||
|
ELSE CAST(REPLACE(aktenzeichen_suffix, '-', '') AS INTEGER)
|
||||||
|
END
|
||||||
|
""", (basis,)).fetchall()
|
||||||
|
|
||||||
|
if not members:
|
||||||
|
continue
|
||||||
|
|
||||||
|
ursprung = members[0]
|
||||||
|
|
||||||
|
# Only create chains for antrag/anfrage types (the base should be one)
|
||||||
|
chain_typ = ursprung["typ"]
|
||||||
|
if chain_typ not in ("antrag", "anfrage"):
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Compute status
|
||||||
|
status_info = compute_status(conn, ursprung["id"], chain_typ, members)
|
||||||
|
|
||||||
|
# Determine letzte_aktivitaet
|
||||||
|
dates = [m["datum_eingang"] for m in members if m["datum_eingang"]]
|
||||||
|
letzte_aktivitaet = max(dates) if dates else ursprung["datum_eingang"]
|
||||||
|
|
||||||
|
# Check if chain already exists
|
||||||
|
existing = conn.execute(
|
||||||
|
"SELECT id FROM ketten WHERE ursprung_id = ?", (ursprung["id"],)
|
||||||
|
).fetchone()
|
||||||
|
|
||||||
|
if existing:
|
||||||
|
kette_id = existing["id"]
|
||||||
|
conn.execute("""
|
||||||
|
UPDATE ketten
|
||||||
|
SET typ = ?, thema = ?, status = ?, status_seit = ?,
|
||||||
|
letzte_aktivitaet = ?, vertagungen_count = ?
|
||||||
|
WHERE id = ?
|
||||||
|
""", (
|
||||||
|
chain_typ,
|
||||||
|
ursprung["betreff"],
|
||||||
|
status_info["status"],
|
||||||
|
status_info.get("status_seit"),
|
||||||
|
letzte_aktivitaet,
|
||||||
|
status_info.get("vertagungen_count", 0),
|
||||||
|
kette_id,
|
||||||
|
))
|
||||||
|
conn.execute("DELETE FROM ketten_glieder WHERE kette_id = ?", (kette_id,))
|
||||||
|
else:
|
||||||
|
cursor = conn.execute("""
|
||||||
|
INSERT INTO ketten (ursprung_id, typ, thema, status, status_seit,
|
||||||
|
letzte_aktivitaet, vertagungen_count)
|
||||||
|
VALUES (?, ?, ?, ?, ?, ?, ?)
|
||||||
|
""", (
|
||||||
|
ursprung["id"],
|
||||||
|
chain_typ,
|
||||||
|
ursprung["betreff"],
|
||||||
|
status_info["status"],
|
||||||
|
status_info.get("status_seit"),
|
||||||
|
letzte_aktivitaet,
|
||||||
|
status_info.get("vertagungen_count", 0),
|
||||||
|
))
|
||||||
|
kette_id = cursor.lastrowid
|
||||||
|
|
||||||
|
# Insert chain members
|
||||||
|
for pos, member in enumerate(members):
|
||||||
|
rolle = _determine_rolle(member, pos)
|
||||||
|
conn.execute("""
|
||||||
|
INSERT OR REPLACE INTO ketten_glieder (kette_id, vorlage_id, position, rolle)
|
||||||
|
VALUES (?, ?, ?, ?)
|
||||||
|
""", (kette_id, member["id"], pos, rolle))
|
||||||
|
|
||||||
|
count += 1
|
||||||
|
|
||||||
|
conn.commit()
|
||||||
|
return count
|
||||||
|
|
||||||
|
|
||||||
|
def _determine_rolle(member: sqlite3.Row, position: int) -> str:
|
||||||
|
if position == 0:
|
||||||
|
return "ursprung"
|
||||||
|
typ = member["typ"]
|
||||||
|
if typ == "stellungnahme":
|
||||||
|
return "stellungnahme"
|
||||||
|
if typ == "bericht":
|
||||||
|
return "bericht"
|
||||||
|
if typ in ("antrag", "anfrage"):
|
||||||
|
return "aenderung"
|
||||||
|
return "ergaenzung"
|
||||||
181
backend/src/tracker/core/graph.py
Normal file
181
backend/src/tracker/core/graph.py
Normal file
@ -0,0 +1,181 @@
|
|||||||
|
"""Graph-Builder: builds reference graph for navigation between Vorlagen."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import sqlite3
|
||||||
|
|
||||||
|
|
||||||
|
def get_references_for_vorlage(conn: sqlite3.Connection, vorlage_id: int) -> dict:
|
||||||
|
"""Get all references (incoming and outgoing) for a Vorlage.
|
||||||
|
|
||||||
|
Returns dict with 'ausgehend' (outgoing) and 'eingehend' (incoming) lists.
|
||||||
|
"""
|
||||||
|
ausgehend = conn.execute("""
|
||||||
|
SELECT r.id, r.ziel_id as vorlage_id, r.typ, r.konfidenz, r.kontext,
|
||||||
|
v.aktenzeichen, v.betreff, v.typ as vorlage_typ, v.datum_eingang
|
||||||
|
FROM referenzen r
|
||||||
|
JOIN vorlagen v ON r.ziel_id = v.id
|
||||||
|
WHERE r.quelle_id = ?
|
||||||
|
ORDER BY v.datum_eingang
|
||||||
|
""", (vorlage_id,)).fetchall()
|
||||||
|
|
||||||
|
eingehend = conn.execute("""
|
||||||
|
SELECT r.id, r.quelle_id as vorlage_id, r.typ, r.konfidenz, r.kontext,
|
||||||
|
v.aktenzeichen, v.betreff, v.typ as vorlage_typ, v.datum_eingang
|
||||||
|
FROM referenzen r
|
||||||
|
JOIN vorlagen v ON r.quelle_id = v.id
|
||||||
|
WHERE r.ziel_id = ?
|
||||||
|
ORDER BY v.datum_eingang
|
||||||
|
""", (vorlage_id,)).fetchall()
|
||||||
|
|
||||||
|
return {
|
||||||
|
"ausgehend": [_ref_to_dict(r) for r in ausgehend],
|
||||||
|
"eingehend": [_ref_to_dict(r) for r in eingehend],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def get_kette_graph(conn: sqlite3.Connection, kette_id: int) -> dict:
|
||||||
|
"""Build the Perlenschnur graph data for a Kette.
|
||||||
|
|
||||||
|
Returns nodes (Vorlagen) and edges (Referenzen) for visualization.
|
||||||
|
"""
|
||||||
|
# Get all Vorlagen in this chain
|
||||||
|
glieder = conn.execute("""
|
||||||
|
SELECT kg.position, kg.rolle, v.id, v.aktenzeichen, v.typ, v.betreff,
|
||||||
|
v.datum_eingang, v.ist_verwaltungsvorlage
|
||||||
|
FROM ketten_glieder kg
|
||||||
|
JOIN vorlagen v ON kg.vorlage_id = v.id
|
||||||
|
WHERE kg.kette_id = ?
|
||||||
|
ORDER BY kg.position
|
||||||
|
""", (kette_id,)).fetchall()
|
||||||
|
|
||||||
|
vorlage_ids = [g["id"] for g in glieder]
|
||||||
|
if not vorlage_ids:
|
||||||
|
return {"nodes": [], "edges": []}
|
||||||
|
|
||||||
|
placeholders = ",".join("?" * len(vorlage_ids))
|
||||||
|
|
||||||
|
# Get all references between chain members
|
||||||
|
edges_internal = conn.execute(f"""
|
||||||
|
SELECT r.id, r.quelle_id, r.ziel_id, r.typ, r.konfidenz
|
||||||
|
FROM referenzen r
|
||||||
|
WHERE r.quelle_id IN ({placeholders})
|
||||||
|
AND r.ziel_id IN ({placeholders})
|
||||||
|
""", vorlage_ids + vorlage_ids).fetchall()
|
||||||
|
|
||||||
|
# Get references to/from external Vorlagen (for side-links)
|
||||||
|
edges_external = conn.execute(f"""
|
||||||
|
SELECT r.id, r.quelle_id, r.ziel_id, r.typ, r.konfidenz,
|
||||||
|
v.aktenzeichen, v.betreff, v.typ as vorlage_typ, v.datum_eingang
|
||||||
|
FROM referenzen r
|
||||||
|
JOIN vorlagen v ON CASE
|
||||||
|
WHEN r.quelle_id IN ({placeholders}) THEN r.ziel_id = v.id
|
||||||
|
ELSE r.quelle_id = v.id
|
||||||
|
END
|
||||||
|
WHERE (r.quelle_id IN ({placeholders}) AND r.ziel_id NOT IN ({placeholders}))
|
||||||
|
OR (r.ziel_id IN ({placeholders}) AND r.quelle_id NOT IN ({placeholders}))
|
||||||
|
""", vorlage_ids * 5).fetchall()
|
||||||
|
|
||||||
|
# Get Beratungen for each chain Vorlage
|
||||||
|
beratungen = conn.execute(f"""
|
||||||
|
SELECT b.vorlage_id, b.sitzung_datum, b.rolle, b.ergebnis,
|
||||||
|
g.name as gremium_name
|
||||||
|
FROM beratungen b
|
||||||
|
LEFT JOIN gremien g ON b.gremium_id = g.id
|
||||||
|
WHERE b.vorlage_id IN ({placeholders})
|
||||||
|
ORDER BY b.sitzung_datum
|
||||||
|
""", vorlage_ids).fetchall()
|
||||||
|
|
||||||
|
beratungen_map: dict[int, list] = {}
|
||||||
|
for b in beratungen:
|
||||||
|
vid = b["vorlage_id"]
|
||||||
|
if vid not in beratungen_map:
|
||||||
|
beratungen_map[vid] = []
|
||||||
|
beratungen_map[vid].append({
|
||||||
|
"sitzung_datum": b["sitzung_datum"],
|
||||||
|
"rolle": b["rolle"],
|
||||||
|
"ergebnis": b["ergebnis"],
|
||||||
|
"gremium_name": b["gremium_name"],
|
||||||
|
})
|
||||||
|
|
||||||
|
# Get Antragsteller for Ursprung
|
||||||
|
antragsteller = conn.execute(f"""
|
||||||
|
SELECT a.vorlage_id, p.kuerzel, p.name, p.farbe
|
||||||
|
FROM antragsteller a
|
||||||
|
JOIN parteien p ON a.partei_id = p.id
|
||||||
|
WHERE a.vorlage_id IN ({placeholders})
|
||||||
|
""", vorlage_ids).fetchall()
|
||||||
|
|
||||||
|
antragsteller_map: dict[int, list] = {}
|
||||||
|
for a in antragsteller:
|
||||||
|
vid = a["vorlage_id"]
|
||||||
|
if vid not in antragsteller_map:
|
||||||
|
antragsteller_map[vid] = []
|
||||||
|
antragsteller_map[vid].append({
|
||||||
|
"kuerzel": a["kuerzel"],
|
||||||
|
"name": a["name"],
|
||||||
|
"farbe": a["farbe"],
|
||||||
|
})
|
||||||
|
|
||||||
|
nodes = [
|
||||||
|
{
|
||||||
|
"id": g["id"],
|
||||||
|
"aktenzeichen": g["aktenzeichen"],
|
||||||
|
"typ": g["typ"],
|
||||||
|
"betreff": g["betreff"],
|
||||||
|
"datum_eingang": g["datum_eingang"],
|
||||||
|
"position": g["position"],
|
||||||
|
"rolle": g["rolle"],
|
||||||
|
"ist_verwaltungsvorlage": bool(g["ist_verwaltungsvorlage"]),
|
||||||
|
"beratungen": beratungen_map.get(g["id"], []),
|
||||||
|
"antragsteller": antragsteller_map.get(g["id"], []),
|
||||||
|
}
|
||||||
|
for g in glieder
|
||||||
|
]
|
||||||
|
|
||||||
|
edges = [
|
||||||
|
{
|
||||||
|
"quelle_id": e["quelle_id"],
|
||||||
|
"ziel_id": e["ziel_id"],
|
||||||
|
"typ": e["typ"],
|
||||||
|
"konfidenz": e["konfidenz"],
|
||||||
|
}
|
||||||
|
for e in edges_internal
|
||||||
|
]
|
||||||
|
|
||||||
|
external_nodes = []
|
||||||
|
for e in edges_external:
|
||||||
|
ext_id = e["ziel_id"] if e["quelle_id"] in vorlage_ids else e["quelle_id"]
|
||||||
|
if not any(n["id"] == ext_id for n in external_nodes):
|
||||||
|
external_nodes.append({
|
||||||
|
"id": ext_id,
|
||||||
|
"aktenzeichen": e["aktenzeichen"],
|
||||||
|
"typ": e["vorlage_typ"],
|
||||||
|
"betreff": e["betreff"],
|
||||||
|
"datum_eingang": e["datum_eingang"],
|
||||||
|
"extern": True,
|
||||||
|
})
|
||||||
|
edges.append({
|
||||||
|
"quelle_id": e["quelle_id"],
|
||||||
|
"ziel_id": e["ziel_id"],
|
||||||
|
"typ": e["typ"],
|
||||||
|
"konfidenz": e["konfidenz"],
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
"nodes": nodes + external_nodes,
|
||||||
|
"edges": edges,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _ref_to_dict(r: sqlite3.Row) -> dict:
|
||||||
|
return {
|
||||||
|
"vorlage_id": r["vorlage_id"],
|
||||||
|
"aktenzeichen": r["aktenzeichen"],
|
||||||
|
"betreff": r["betreff"],
|
||||||
|
"vorlage_typ": r["vorlage_typ"],
|
||||||
|
"datum_eingang": r["datum_eingang"],
|
||||||
|
"ref_typ": r["typ"],
|
||||||
|
"konfidenz": r["konfidenz"],
|
||||||
|
"kontext": r["kontext"],
|
||||||
|
}
|
||||||
222
backend/src/tracker/core/status.py
Normal file
222
backend/src/tracker/core/status.py
Normal file
@ -0,0 +1,222 @@
|
|||||||
|
"""Status-Engine: computes chain status based on KONZEPT.md section 6."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import sqlite3
|
||||||
|
from datetime import date, timedelta
|
||||||
|
|
||||||
|
# Threshold: a Vorlage is considered "old" after this many days without activity
|
||||||
|
VERSANDET_TAGE = 365
|
||||||
|
|
||||||
|
|
||||||
|
def compute_status(
|
||||||
|
conn: sqlite3.Connection,
|
||||||
|
ursprung_id: int,
|
||||||
|
chain_typ: str,
|
||||||
|
members: list[sqlite3.Row],
|
||||||
|
) -> dict:
|
||||||
|
"""Compute the overall status for a chain.
|
||||||
|
|
||||||
|
Returns dict with keys: status, status_seit, vertagungen_count
|
||||||
|
"""
|
||||||
|
if chain_typ == "anfrage":
|
||||||
|
return _status_anfrage(conn, ursprung_id, members)
|
||||||
|
elif chain_typ == "antrag":
|
||||||
|
return _status_antrag(conn, ursprung_id, members)
|
||||||
|
return {"status": "unbekannt", "status_seit": None, "vertagungen_count": 0}
|
||||||
|
|
||||||
|
|
||||||
|
def _status_anfrage(
|
||||||
|
conn: sqlite3.Connection,
|
||||||
|
ursprung_id: int,
|
||||||
|
members: list[sqlite3.Row],
|
||||||
|
) -> dict:
|
||||||
|
"""Status logic for Anfragen (KONZEPT.md 6.1).
|
||||||
|
|
||||||
|
angefragt: Keine Stellungnahme, <1 Jahr
|
||||||
|
beantwortet: Stellungnahme + KI-Match >=0.7 + Kenntnisnahme
|
||||||
|
offen: Stellungnahme da, aber keine Kenntnisnahme
|
||||||
|
abgewiegelt: Stellungnahme + KI-Match <0.5
|
||||||
|
versandet: Keine Antwort, >1 Jahr
|
||||||
|
zurückgezogen: Explizit zurückgezogen
|
||||||
|
"""
|
||||||
|
heute = date.today()
|
||||||
|
ursprung_datum = _parse_date(members[0]["datum_eingang"])
|
||||||
|
|
||||||
|
# Check for Stellungnahme in chain members
|
||||||
|
stellungnahmen = [m for m in members if m["typ"] == "stellungnahme"]
|
||||||
|
has_stellungnahme = len(stellungnahmen) > 0
|
||||||
|
|
||||||
|
# Check for Kenntnisnahme in Beratungen
|
||||||
|
beratungen = conn.execute("""
|
||||||
|
SELECT rolle, ergebnis, sitzung_datum
|
||||||
|
FROM beratungen
|
||||||
|
WHERE vorlage_id = ?
|
||||||
|
ORDER BY sitzung_datum DESC
|
||||||
|
""", (ursprung_id,)).fetchall()
|
||||||
|
|
||||||
|
has_kenntnisnahme = any(
|
||||||
|
b["rolle"] and "kenntnisnahme" in b["rolle"].lower()
|
||||||
|
for b in beratungen
|
||||||
|
)
|
||||||
|
|
||||||
|
# Check KI-Match score for Antwort
|
||||||
|
ki_score = _get_ki_score(conn, ursprung_id, "antwort_match")
|
||||||
|
|
||||||
|
# Check zurückgezogen
|
||||||
|
if _is_zurueckgezogen(beratungen):
|
||||||
|
return {"status": "zurückgezogen", "status_seit": _latest_date(beratungen), "vertagungen_count": 0}
|
||||||
|
|
||||||
|
if has_stellungnahme:
|
||||||
|
if ki_score is not None and ki_score < 0.5:
|
||||||
|
return {"status": "abgewiegelt", "status_seit": _vorlage_date(stellungnahmen[0]), "vertagungen_count": 0}
|
||||||
|
if has_kenntnisnahme and (ki_score is None or ki_score >= 0.7):
|
||||||
|
return {"status": "beantwortet", "status_seit": _vorlage_date(stellungnahmen[0]), "vertagungen_count": 0}
|
||||||
|
return {"status": "offen", "status_seit": _vorlage_date(stellungnahmen[0]), "vertagungen_count": 0}
|
||||||
|
|
||||||
|
# No Stellungnahme
|
||||||
|
if ursprung_datum and (heute - ursprung_datum).days > VERSANDET_TAGE:
|
||||||
|
return {"status": "versandet", "status_seit": ursprung_datum, "vertagungen_count": 0}
|
||||||
|
|
||||||
|
return {"status": "angefragt", "status_seit": ursprung_datum, "vertagungen_count": 0}
|
||||||
|
|
||||||
|
|
||||||
|
def _status_antrag(
|
||||||
|
conn: sqlite3.Connection,
|
||||||
|
ursprung_id: int,
|
||||||
|
members: list[sqlite3.Row],
|
||||||
|
) -> dict:
|
||||||
|
"""Status logic for Anträge (KONZEPT.md 6.2).
|
||||||
|
|
||||||
|
eingereicht: Neu, noch keine Beratung
|
||||||
|
in_beratung: Mindestens eine Beratung ohne Endbeschluss
|
||||||
|
vertagt: Letzte Beratung = vertagt
|
||||||
|
verwiesen: An anderen Ausschuss überwiesen
|
||||||
|
beschlossen: Angenommen, <1 Jahr, kein Umsetzungsbericht
|
||||||
|
umgesetzt: Umsetzungsbericht + KI-Match >=0.7
|
||||||
|
teilweise_umgesetzt: Umsetzungsbericht + KI-Match 0.4-0.7
|
||||||
|
abgelehnt: Beschluss = abgelehnt
|
||||||
|
abgewiegelt: Beschlossen + Bericht + KI-Match <0.4
|
||||||
|
versandet: Beschlossen, >1 Jahr, kein Bericht
|
||||||
|
zurückgezogen: Explizit zurückgezogen
|
||||||
|
"""
|
||||||
|
heute = date.today()
|
||||||
|
ursprung_datum = _parse_date(members[0]["datum_eingang"])
|
||||||
|
|
||||||
|
beratungen = conn.execute("""
|
||||||
|
SELECT rolle, ergebnis, sitzung_datum
|
||||||
|
FROM beratungen
|
||||||
|
WHERE vorlage_id = ?
|
||||||
|
ORDER BY sitzung_datum DESC NULLS LAST
|
||||||
|
""", (ursprung_id,)).fetchall()
|
||||||
|
|
||||||
|
# Count Vertagungen
|
||||||
|
vertagungen = sum(1 for b in beratungen if b["ergebnis"] and "vertagt" in b["ergebnis"].lower())
|
||||||
|
|
||||||
|
# Check zurückgezogen
|
||||||
|
if _is_zurueckgezogen(beratungen):
|
||||||
|
return {"status": "zurückgezogen", "status_seit": _latest_date(beratungen), "vertagungen_count": vertagungen}
|
||||||
|
|
||||||
|
# Check for Berichte in chain
|
||||||
|
berichte = [m for m in members if m["typ"] == "bericht"]
|
||||||
|
has_bericht = len(berichte) > 0
|
||||||
|
|
||||||
|
# Determine beschluss from beratungen
|
||||||
|
beschluss = _get_beschluss(beratungen)
|
||||||
|
|
||||||
|
if beschluss == "abgelehnt":
|
||||||
|
return {"status": "abgelehnt", "status_seit": _latest_date(beratungen), "vertagungen_count": vertagungen}
|
||||||
|
|
||||||
|
if beschluss == "angenommen":
|
||||||
|
beschluss_datum = _latest_date(beratungen)
|
||||||
|
|
||||||
|
if has_bericht:
|
||||||
|
ki_score = _get_ki_score(conn, ursprung_id, "umsetzung_match")
|
||||||
|
bericht_datum = _vorlage_date(berichte[-1])
|
||||||
|
|
||||||
|
if ki_score is not None:
|
||||||
|
if ki_score >= 0.7:
|
||||||
|
return {"status": "umgesetzt", "status_seit": bericht_datum, "vertagungen_count": vertagungen}
|
||||||
|
elif ki_score >= 0.4:
|
||||||
|
return {"status": "teilweise_umgesetzt", "status_seit": bericht_datum, "vertagungen_count": vertagungen}
|
||||||
|
else:
|
||||||
|
return {"status": "abgewiegelt", "status_seit": bericht_datum, "vertagungen_count": vertagungen}
|
||||||
|
return {"status": "umgesetzt", "status_seit": bericht_datum, "vertagungen_count": vertagungen}
|
||||||
|
|
||||||
|
# Angenommen but no Bericht
|
||||||
|
if beschluss_datum and (heute - beschluss_datum).days > VERSANDET_TAGE:
|
||||||
|
return {"status": "versandet", "status_seit": beschluss_datum, "vertagungen_count": vertagungen}
|
||||||
|
|
||||||
|
return {"status": "beschlossen", "status_seit": beschluss_datum, "vertagungen_count": vertagungen}
|
||||||
|
|
||||||
|
if beschluss == "verwiesen":
|
||||||
|
return {"status": "verwiesen", "status_seit": _latest_date(beratungen), "vertagungen_count": vertagungen}
|
||||||
|
|
||||||
|
# No final decision yet
|
||||||
|
if beratungen:
|
||||||
|
last = beratungen[0]
|
||||||
|
if last["ergebnis"] and "vertagt" in last["ergebnis"].lower():
|
||||||
|
return {"status": "vertagt", "status_seit": _latest_date(beratungen), "vertagungen_count": vertagungen}
|
||||||
|
return {"status": "in_beratung", "status_seit": _latest_date(beratungen), "vertagungen_count": vertagungen}
|
||||||
|
|
||||||
|
# No beratungen at all
|
||||||
|
return {"status": "eingereicht", "status_seit": ursprung_datum, "vertagungen_count": vertagungen}
|
||||||
|
|
||||||
|
|
||||||
|
# --- Helpers ---
|
||||||
|
|
||||||
|
def _parse_date(val: str | None) -> date | None:
|
||||||
|
if not val:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
return date.fromisoformat(val)
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _vorlage_date(member: sqlite3.Row) -> date | None:
|
||||||
|
return _parse_date(member["datum_eingang"])
|
||||||
|
|
||||||
|
|
||||||
|
def _latest_date(beratungen: list[sqlite3.Row]) -> date | None:
|
||||||
|
dates = [_parse_date(b["sitzung_datum"]) for b in beratungen if b["sitzung_datum"]]
|
||||||
|
return max(dates) if dates else None
|
||||||
|
|
||||||
|
|
||||||
|
def _get_ki_score(conn: sqlite3.Connection, vorlage_id: int, typ: str) -> float | None:
|
||||||
|
row = conn.execute("""
|
||||||
|
SELECT score FROM ki_bewertungen
|
||||||
|
WHERE vorlage_id = ? AND typ = ?
|
||||||
|
ORDER BY erstellt_at DESC
|
||||||
|
LIMIT 1
|
||||||
|
""", (vorlage_id, typ)).fetchone()
|
||||||
|
return row["score"] if row else None
|
||||||
|
|
||||||
|
|
||||||
|
def _is_zurueckgezogen(beratungen: list[sqlite3.Row]) -> bool:
|
||||||
|
return any(
|
||||||
|
b["ergebnis"] and "zurückgezogen" in b["ergebnis"].lower()
|
||||||
|
for b in beratungen
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _get_beschluss(beratungen: list[sqlite3.Row]) -> str | None:
|
||||||
|
"""Determine the final decision from Beratungen.
|
||||||
|
|
||||||
|
Looks for Entscheidung-role beratungen with a result.
|
||||||
|
"""
|
||||||
|
for b in beratungen:
|
||||||
|
ergebnis = (b["ergebnis"] or "").lower()
|
||||||
|
rolle = (b["rolle"] or "").lower()
|
||||||
|
|
||||||
|
if "abgelehnt" in ergebnis:
|
||||||
|
return "abgelehnt"
|
||||||
|
if "verwiesen" in ergebnis:
|
||||||
|
return "verwiesen"
|
||||||
|
if any(kw in ergebnis for kw in ("angenommen", "empfohlen", "beschlossen", "zugestimmt")):
|
||||||
|
return "angenommen"
|
||||||
|
# If rolle is Entscheidung and there's any ergebnis, it's likely a decision
|
||||||
|
if "entscheidung" in rolle and ergebnis and "vertagt" not in ergebnis:
|
||||||
|
return "angenommen"
|
||||||
|
|
||||||
|
return None
|
||||||
0
backend/src/tracker/db/__init__.py
Normal file
0
backend/src/tracker/db/__init__.py
Normal file
298
backend/src/tracker/db/schema.sql
Normal file
298
backend/src/tracker/db/schema.sql
Normal file
@ -0,0 +1,298 @@
|
|||||||
|
-- Antragstracker Hagen — SQLite Schema
|
||||||
|
-- Schicht 1: ALLRIS-Mirror + Schicht 2: Analyse
|
||||||
|
|
||||||
|
PRAGMA journal_mode = WAL;
|
||||||
|
PRAGMA foreign_keys = ON;
|
||||||
|
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
-- SCHICHT 1: ALLRIS-Mirror
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
|
||||||
|
-- Parteien (Lookup)
|
||||||
|
CREATE TABLE IF NOT EXISTS parteien (
|
||||||
|
id INTEGER PRIMARY KEY,
|
||||||
|
kuerzel TEXT UNIQUE NOT NULL,
|
||||||
|
name TEXT,
|
||||||
|
farbe TEXT
|
||||||
|
);
|
||||||
|
|
||||||
|
INSERT OR IGNORE INTO parteien (kuerzel, name, farbe) VALUES
|
||||||
|
('GRÜNE', 'Bündnis 90/Die Grünen', '#1AA037'),
|
||||||
|
('CDU', 'Christlich Demokratische Union', '#000000'),
|
||||||
|
('SPD', 'Sozialdemokratische Partei Deutschlands', '#E3000F'),
|
||||||
|
('FDP', 'Freie Demokratische Partei', '#FFED00'),
|
||||||
|
('LINKE', 'Die Linke', '#BE3075'),
|
||||||
|
('AfD', 'Alternative für Deutschland', '#009EE0'),
|
||||||
|
('HBL', 'Hagener Bürger-Liste', '#FF8C00'),
|
||||||
|
('BfHo', 'Bürger für Hohenlimburg', '#8B4513'),
|
||||||
|
('Volt', 'Volt Deutschland', '#502379'),
|
||||||
|
('parteilos', 'Parteilos', '#808080');
|
||||||
|
|
||||||
|
-- Gremien (Lookup)
|
||||||
|
CREATE TABLE IF NOT EXISTS gremien (
|
||||||
|
id INTEGER PRIMARY KEY,
|
||||||
|
oparl_id TEXT UNIQUE,
|
||||||
|
name TEXT NOT NULL,
|
||||||
|
kuerzel TEXT,
|
||||||
|
typ TEXT -- 'rat', 'ausschuss', 'bv'
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Thematische Klassifikation (Aktenplan)
|
||||||
|
CREATE TABLE IF NOT EXISTS themen_kategorien (
|
||||||
|
id INTEGER PRIMARY KEY,
|
||||||
|
code TEXT UNIQUE NOT NULL,
|
||||||
|
name TEXT NOT NULL,
|
||||||
|
ebene INTEGER,
|
||||||
|
parent_code TEXT
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Hauptgruppen (Ebene 1)
|
||||||
|
INSERT OR IGNORE INTO themen_kategorien (code, name, ebene, parent_code) VALUES
|
||||||
|
('000', 'Allgemeine Verwaltung', 1, NULL),
|
||||||
|
('100', 'Sicherheit und Ordnung', 1, NULL),
|
||||||
|
('200', 'Schulen', 1, NULL),
|
||||||
|
('300', 'Kultur', 1, NULL),
|
||||||
|
('400', 'Soziales', 1, NULL),
|
||||||
|
('500', 'Gesundheit und Sport', 1, NULL),
|
||||||
|
('600', 'Bauwesen', 1, NULL),
|
||||||
|
('700', 'Öffentliche Einrichtungen', 1, NULL),
|
||||||
|
('800', 'Wirtschaft und Verkehr', 1, NULL),
|
||||||
|
('900', 'Finanzen', 1, NULL);
|
||||||
|
|
||||||
|
-- Gruppen (Ebene 2)
|
||||||
|
INSERT OR IGNORE INTO themen_kategorien (code, name, ebene, parent_code) VALUES
|
||||||
|
('001', 'Verfassung, Verwaltungsorganisation', 2, '000'),
|
||||||
|
('002', 'Statistik, Wahlen', 2, '000'),
|
||||||
|
('003', 'Ratsangelegenheiten', 2, '000'),
|
||||||
|
('004', 'Personal', 2, '000'),
|
||||||
|
('005', 'IT, Digitalisierung', 2, '000'),
|
||||||
|
('006', 'Öffentlichkeitsarbeit', 2, '000'),
|
||||||
|
('007', 'Interkommunale Zusammenarbeit', 2, '000'),
|
||||||
|
('008', 'Bürgerservice, Bürgerbeteiligung', 2, '000'),
|
||||||
|
('110', 'Polizeiwesen, öffentliche Sicherheit', 2, '100'),
|
||||||
|
('111', 'Straßenverkehr, Verkehrssicherheit', 2, '100'),
|
||||||
|
('112', 'Gewerbeaufsicht', 2, '100'),
|
||||||
|
('113', 'Feuerschutz, Feuerwehr', 2, '100'),
|
||||||
|
('114', 'Katastrophenschutz, Rettungsdienst', 2, '100'),
|
||||||
|
('115', 'Ordnungsamt, Ordnungswidrigkeiten', 2, '100'),
|
||||||
|
('116', 'Ausländerwesen', 2, '100'),
|
||||||
|
('117', 'Standesamt', 2, '100'),
|
||||||
|
('210', 'Schulverwaltung', 2, '200'),
|
||||||
|
('211', 'Grundschulen', 2, '200'),
|
||||||
|
('212', 'Weiterführende Schulen', 2, '200'),
|
||||||
|
('213', 'Förderschulen', 2, '200'),
|
||||||
|
('214', 'Berufsschulen', 2, '200'),
|
||||||
|
('215', 'Schulentwicklungsplanung', 2, '200'),
|
||||||
|
('216', 'Schülerbeförderung', 2, '200'),
|
||||||
|
('217', 'OGS, Ganztagsbetreuung', 2, '200'),
|
||||||
|
('310', 'Kulturförderung, Kulturpolitik', 2, '300'),
|
||||||
|
('311', 'Museen', 2, '300'),
|
||||||
|
('312', 'Bibliotheken', 2, '300'),
|
||||||
|
('313', 'Theater, Konzerte', 2, '300'),
|
||||||
|
('314', 'Musikschulen', 2, '300'),
|
||||||
|
('315', 'VHS, Erwachsenenbildung', 2, '300'),
|
||||||
|
('316', 'Denkmalschutz', 2, '300'),
|
||||||
|
('317', 'Stadtgeschichte, Archive', 2, '300'),
|
||||||
|
('410', 'Sozialplanung', 2, '400'),
|
||||||
|
('411', 'Jugendhilfe', 2, '400'),
|
||||||
|
('412', 'Kindertagesstätten', 2, '400'),
|
||||||
|
('413', 'Jugendarbeit', 2, '400'),
|
||||||
|
('414', 'Seniorenarbeit', 2, '400'),
|
||||||
|
('415', 'Behindertenhilfe, Inklusion', 2, '400'),
|
||||||
|
('416', 'Integration, Migration', 2, '400'),
|
||||||
|
('417', 'Wohnungslosenhilfe', 2, '400'),
|
||||||
|
('418', 'Sozialleistungen', 2, '400'),
|
||||||
|
('510', 'Gesundheitsförderung', 2, '500'),
|
||||||
|
('511', 'Gesundheitsamt', 2, '500'),
|
||||||
|
('512', 'Krankenhäuser, Kliniken', 2, '500'),
|
||||||
|
('513', 'Suchtprävention', 2, '500'),
|
||||||
|
('520', 'Sport allgemein', 2, '500'),
|
||||||
|
('521', 'Sportförderung', 2, '500'),
|
||||||
|
('522', 'Sportstätten', 2, '500'),
|
||||||
|
('523', 'Vereinsförderung', 2, '500'),
|
||||||
|
('610', 'Stadtplanung, Bauleitplanung', 2, '600'),
|
||||||
|
('611', 'Bebauungspläne', 2, '600'),
|
||||||
|
('612', 'Flächennutzungsplan', 2, '600'),
|
||||||
|
('613', 'Bauordnung', 2, '600'),
|
||||||
|
('614', 'Hochbau, Gebäudemanagement', 2, '600'),
|
||||||
|
('615', 'Tiefbau, Straßenbau', 2, '600'),
|
||||||
|
('616', 'Wohnungsbau, Wohnungsförderung', 2, '600'),
|
||||||
|
('617', 'Grünflächen, Parks', 2, '600'),
|
||||||
|
('618', 'Spielplätze', 2, '600'),
|
||||||
|
('619', 'Klimaschutz, Umwelt', 2, '600'),
|
||||||
|
('710', 'Versorgung, Entsorgung', 2, '700'),
|
||||||
|
('711', 'Wasserversorgung', 2, '700'),
|
||||||
|
('712', 'Abwasser, Kanalisation', 2, '700'),
|
||||||
|
('713', 'Abfallwirtschaft', 2, '700'),
|
||||||
|
('714', 'Friedhöfe', 2, '700'),
|
||||||
|
('715', 'Bäder, Schwimmbäder', 2, '700'),
|
||||||
|
('716', 'Märkte', 2, '700'),
|
||||||
|
('717', 'Stadtwerke', 2, '700'),
|
||||||
|
('810', 'Wirtschaftsförderung', 2, '800'),
|
||||||
|
('811', 'Arbeitsmarkt', 2, '800'),
|
||||||
|
('812', 'Tourismus', 2, '800'),
|
||||||
|
('820', 'Verkehr allgemein', 2, '800'),
|
||||||
|
('821', 'ÖPNV, Nahverkehr', 2, '800'),
|
||||||
|
('822', 'Radverkehr', 2, '800'),
|
||||||
|
('823', 'Fußverkehr', 2, '800'),
|
||||||
|
('824', 'Straßen, Parkraum', 2, '800'),
|
||||||
|
('825', 'Mobilität, Verkehrswende', 2, '800'),
|
||||||
|
('910', 'Haushalt', 2, '900'),
|
||||||
|
('911', 'Steuern, Abgaben', 2, '900'),
|
||||||
|
('912', 'Liegenschaften', 2, '900'),
|
||||||
|
('913', 'Beteiligungen', 2, '900'),
|
||||||
|
('914', 'Fördermittel', 2, '900'),
|
||||||
|
('915', 'Gebühren', 2, '900');
|
||||||
|
|
||||||
|
-- Schlagwörter
|
||||||
|
CREATE TABLE IF NOT EXISTS schlagwoerter (
|
||||||
|
id INTEGER PRIMARY KEY,
|
||||||
|
name TEXT UNIQUE NOT NULL
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Vorlagen (Kerntabelle)
|
||||||
|
CREATE TABLE IF NOT EXISTS vorlagen (
|
||||||
|
id INTEGER PRIMARY KEY,
|
||||||
|
oparl_id TEXT UNIQUE,
|
||||||
|
volfdnr INTEGER,
|
||||||
|
aktenzeichen TEXT,
|
||||||
|
aktenzeichen_basis TEXT,
|
||||||
|
aktenzeichen_suffix TEXT,
|
||||||
|
typ TEXT,
|
||||||
|
betreff TEXT,
|
||||||
|
volltext TEXT,
|
||||||
|
volltext_clean TEXT,
|
||||||
|
datum_eingang DATE,
|
||||||
|
pdf_url TEXT,
|
||||||
|
web_url TEXT,
|
||||||
|
ist_verwaltungsvorlage BOOLEAN DEFAULT FALSE,
|
||||||
|
thema_kategorie_id INTEGER REFERENCES themen_kategorien(id),
|
||||||
|
thema_kurz TEXT,
|
||||||
|
scraped_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_vorlagen_aktenzeichen ON vorlagen(aktenzeichen);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_vorlagen_aktenzeichen_basis ON vorlagen(aktenzeichen_basis);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_vorlagen_typ ON vorlagen(typ);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_vorlagen_datum ON vorlagen(datum_eingang);
|
||||||
|
|
||||||
|
-- Verschlagwortung (n:m)
|
||||||
|
CREATE TABLE IF NOT EXISTS vorlage_schlagwoerter (
|
||||||
|
vorlage_id INTEGER REFERENCES vorlagen(id),
|
||||||
|
schlagwort_id INTEGER REFERENCES schlagwoerter(id),
|
||||||
|
konfidenz REAL DEFAULT 1.0,
|
||||||
|
PRIMARY KEY (vorlage_id, schlagwort_id)
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Ortsbezüge
|
||||||
|
CREATE TABLE IF NOT EXISTS ortsbezuege (
|
||||||
|
id INTEGER PRIMARY KEY,
|
||||||
|
vorlage_id INTEGER REFERENCES vorlagen(id),
|
||||||
|
ortsangabe TEXT NOT NULL,
|
||||||
|
ortstyp TEXT,
|
||||||
|
strasse1 TEXT,
|
||||||
|
strasse2 TEXT,
|
||||||
|
hausnummer TEXT,
|
||||||
|
stadtteil TEXT,
|
||||||
|
lat REAL,
|
||||||
|
lon REAL,
|
||||||
|
geocoding_quelle TEXT,
|
||||||
|
geocoding_konfidenz REAL,
|
||||||
|
konfidenz REAL DEFAULT 1.0,
|
||||||
|
kontext TEXT,
|
||||||
|
ist_antragsgegenstand BOOLEAN DEFAULT TRUE
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Antragsteller (n:m)
|
||||||
|
CREATE TABLE IF NOT EXISTS antragsteller (
|
||||||
|
vorlage_id INTEGER REFERENCES vorlagen(id),
|
||||||
|
partei_id INTEGER REFERENCES parteien(id),
|
||||||
|
rolle TEXT DEFAULT 'antragsteller',
|
||||||
|
PRIMARY KEY (vorlage_id, partei_id)
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Beratungsfolge
|
||||||
|
CREATE TABLE IF NOT EXISTS beratungen (
|
||||||
|
id INTEGER PRIMARY KEY,
|
||||||
|
vorlage_id INTEGER REFERENCES vorlagen(id),
|
||||||
|
gremium_id INTEGER REFERENCES gremien(id),
|
||||||
|
sitzung_datum DATE,
|
||||||
|
sitzung_id INTEGER,
|
||||||
|
tagesordnungspunkt TEXT,
|
||||||
|
rolle TEXT,
|
||||||
|
ergebnis TEXT,
|
||||||
|
ergebnis_text TEXT,
|
||||||
|
protokoll_auszug TEXT
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_beratungen_vorlage ON beratungen(vorlage_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_beratungen_gremium ON beratungen(gremium_id);
|
||||||
|
|
||||||
|
-- Anlagen
|
||||||
|
CREATE TABLE IF NOT EXISTS anlagen (
|
||||||
|
id INTEGER PRIMARY KEY,
|
||||||
|
vorlage_id INTEGER REFERENCES vorlagen(id),
|
||||||
|
dateiname TEXT,
|
||||||
|
url TEXT,
|
||||||
|
inhalt_text TEXT,
|
||||||
|
braucht_extraktion BOOLEAN DEFAULT FALSE
|
||||||
|
);
|
||||||
|
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
-- SCHICHT 2: Analyse
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
|
||||||
|
-- Referenzen (Graph-Kanten)
|
||||||
|
CREATE TABLE IF NOT EXISTS referenzen (
|
||||||
|
id INTEGER PRIMARY KEY,
|
||||||
|
quelle_id INTEGER REFERENCES vorlagen(id),
|
||||||
|
ziel_id INTEGER REFERENCES vorlagen(id),
|
||||||
|
typ TEXT NOT NULL,
|
||||||
|
konfidenz REAL DEFAULT 1.0,
|
||||||
|
kontext TEXT,
|
||||||
|
UNIQUE(quelle_id, ziel_id, typ)
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_referenzen_quelle ON referenzen(quelle_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_referenzen_ziel ON referenzen(ziel_id);
|
||||||
|
|
||||||
|
-- Ketten
|
||||||
|
CREATE TABLE IF NOT EXISTS ketten (
|
||||||
|
id INTEGER PRIMARY KEY,
|
||||||
|
ursprung_id INTEGER REFERENCES vorlagen(id),
|
||||||
|
typ TEXT,
|
||||||
|
thema TEXT,
|
||||||
|
status TEXT,
|
||||||
|
status_seit DATE,
|
||||||
|
letzte_aktivitaet DATE,
|
||||||
|
vertagungen_count INTEGER DEFAULT 0
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Ketten-Glieder
|
||||||
|
CREATE TABLE IF NOT EXISTS ketten_glieder (
|
||||||
|
kette_id INTEGER REFERENCES ketten(id),
|
||||||
|
vorlage_id INTEGER REFERENCES vorlagen(id),
|
||||||
|
position INTEGER,
|
||||||
|
rolle TEXT,
|
||||||
|
PRIMARY KEY (kette_id, vorlage_id)
|
||||||
|
);
|
||||||
|
|
||||||
|
-- KI-Bewertungen
|
||||||
|
CREATE TABLE IF NOT EXISTS ki_bewertungen (
|
||||||
|
id INTEGER PRIMARY KEY,
|
||||||
|
vorlage_id INTEGER REFERENCES vorlagen(id),
|
||||||
|
bezug_id INTEGER,
|
||||||
|
typ TEXT NOT NULL,
|
||||||
|
score REAL,
|
||||||
|
konfidenz REAL,
|
||||||
|
begruendung TEXT,
|
||||||
|
anmerkungen TEXT,
|
||||||
|
unsicher BOOLEAN DEFAULT FALSE,
|
||||||
|
review_grund TEXT,
|
||||||
|
modell TEXT,
|
||||||
|
prompt_version TEXT,
|
||||||
|
erstellt_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_ki_bewertungen_vorlage ON ki_bewertungen(vorlage_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_ki_bewertungen_typ ON ki_bewertungen(typ);
|
||||||
27
backend/src/tracker/db/session.py
Normal file
27
backend/src/tracker/db/session.py
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
"""SQLite database connection management."""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sqlite3
|
||||||
|
from contextlib import contextmanager
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
# Use environment variable or default to relative path
|
||||||
|
DB_PATH = Path(os.environ.get("DATABASE_PATH", Path(__file__).resolve().parents[4] / "data" / "tracker.db"))
|
||||||
|
|
||||||
|
|
||||||
|
def get_connection(db_path: Path | str | None = None) -> sqlite3.Connection:
|
||||||
|
path = str(db_path or DB_PATH)
|
||||||
|
conn = sqlite3.connect(path, detect_types=0)
|
||||||
|
conn.row_factory = sqlite3.Row
|
||||||
|
conn.execute("PRAGMA journal_mode = WAL")
|
||||||
|
conn.execute("PRAGMA foreign_keys = ON")
|
||||||
|
return conn
|
||||||
|
|
||||||
|
|
||||||
|
@contextmanager
|
||||||
|
def get_db(db_path: Path | str | None = None):
|
||||||
|
conn = get_connection(db_path)
|
||||||
|
try:
|
||||||
|
yield conn
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
43
backend/src/tracker/main.py
Normal file
43
backend/src/tracker/main.py
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
"""FastAPI application for Antragstracker Hagen."""
|
||||||
|
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from fastapi import FastAPI
|
||||||
|
from fastapi.middleware.cors import CORSMiddleware
|
||||||
|
from fastapi.staticfiles import StaticFiles
|
||||||
|
|
||||||
|
from tracker.api.routes import abstimmungen, ketten, orte, stats, vorlagen
|
||||||
|
|
||||||
|
app = FastAPI(
|
||||||
|
title="Antragstracker Hagen",
|
||||||
|
description="API zur Nachverfolgung kommunaler Anträge und Anfragen",
|
||||||
|
version="0.1.0",
|
||||||
|
)
|
||||||
|
|
||||||
|
app.add_middleware(
|
||||||
|
CORSMiddleware,
|
||||||
|
allow_origins=["*"],
|
||||||
|
allow_methods=["GET"],
|
||||||
|
allow_headers=["*"],
|
||||||
|
)
|
||||||
|
|
||||||
|
app.include_router(vorlagen.router, prefix="/api")
|
||||||
|
app.include_router(ketten.router, prefix="/api")
|
||||||
|
app.include_router(stats.router, prefix="/api")
|
||||||
|
app.include_router(abstimmungen.router, prefix="/api")
|
||||||
|
app.include_router(orte.router, prefix="/api")
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/api/health")
|
||||||
|
def health():
|
||||||
|
return {"status": "ok"}
|
||||||
|
|
||||||
|
|
||||||
|
# Serve static frontend files in production
|
||||||
|
# Try multiple paths (Docker vs local dev)
|
||||||
|
for static_path in ["/app/static", Path(__file__).parent.parent.parent.parent / "static"]:
|
||||||
|
static_dir = Path(static_path)
|
||||||
|
if static_dir.exists() and (static_dir / "index.html").exists():
|
||||||
|
app.mount("/", StaticFiles(directory=str(static_dir), html=True), name="static")
|
||||||
|
break
|
||||||
0
backend/tests/__init__.py
Normal file
0
backend/tests/__init__.py
Normal file
113
backend/tests/test_api.py
Normal file
113
backend/tests/test_api.py
Normal file
@ -0,0 +1,113 @@
|
|||||||
|
"""Tests for API endpoints against the real database."""
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from fastapi.testclient import TestClient
|
||||||
|
|
||||||
|
from tracker.main import app
|
||||||
|
from tracker.db.session import get_connection
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def client():
|
||||||
|
return TestClient(app)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def db():
|
||||||
|
conn = get_connection()
|
||||||
|
yield conn
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
# --- Sanity checks on the DB ---
|
||||||
|
|
||||||
|
class TestDatabaseSanity:
|
||||||
|
def test_vorlagen_count(self, db):
|
||||||
|
count = db.execute("SELECT COUNT(*) as cnt FROM vorlagen").fetchone()["cnt"]
|
||||||
|
assert count >= 6000, f"Expected >=6000 Vorlagen, got {count}"
|
||||||
|
|
||||||
|
def test_vorlagen_types(self, db):
|
||||||
|
types = db.execute(
|
||||||
|
"SELECT DISTINCT typ FROM vorlagen WHERE typ IS NOT NULL"
|
||||||
|
).fetchall()
|
||||||
|
type_names = {r["typ"] for r in types}
|
||||||
|
assert "antrag" in type_names
|
||||||
|
assert "anfrage" in type_names
|
||||||
|
|
||||||
|
def test_beratungen_exist(self, db):
|
||||||
|
count = db.execute("SELECT COUNT(*) as cnt FROM beratungen").fetchone()["cnt"]
|
||||||
|
assert count > 0, "No Beratungen in DB"
|
||||||
|
|
||||||
|
def test_suffix_vorlagen_exist(self, db):
|
||||||
|
count = db.execute(
|
||||||
|
"SELECT COUNT(*) as cnt FROM vorlagen WHERE aktenzeichen_suffix IS NOT NULL"
|
||||||
|
).fetchone()["cnt"]
|
||||||
|
assert count > 0, "No suffix Vorlagen in DB"
|
||||||
|
|
||||||
|
|
||||||
|
# --- API: Health ---
|
||||||
|
|
||||||
|
class TestHealth:
|
||||||
|
def test_health(self, client):
|
||||||
|
resp = client.get("/api/health")
|
||||||
|
assert resp.status_code == 200
|
||||||
|
assert resp.json()["status"] == "ok"
|
||||||
|
|
||||||
|
|
||||||
|
# --- API: Vorlagen ---
|
||||||
|
|
||||||
|
class TestVorlagenAPI:
|
||||||
|
def test_list_vorlagen(self, client):
|
||||||
|
resp = client.get("/api/vorlagen?page=1&page_size=10")
|
||||||
|
assert resp.status_code == 200
|
||||||
|
data = resp.json()
|
||||||
|
assert data["total"] >= 6000
|
||||||
|
assert len(data["items"]) == 10
|
||||||
|
assert data["page"] == 1
|
||||||
|
|
||||||
|
def test_list_vorlagen_filter_typ(self, client):
|
||||||
|
resp = client.get("/api/vorlagen?typ=antrag&page_size=5")
|
||||||
|
assert resp.status_code == 200
|
||||||
|
data = resp.json()
|
||||||
|
assert data["total"] > 0
|
||||||
|
for item in data["items"]:
|
||||||
|
assert item["typ"] == "antrag"
|
||||||
|
|
||||||
|
def test_list_vorlagen_filter_suche(self, client):
|
||||||
|
resp = client.get("/api/vorlagen?suche=Klimaschutz&page_size=5")
|
||||||
|
assert resp.status_code == 200
|
||||||
|
# May or may not find results, but should not error
|
||||||
|
|
||||||
|
def test_get_vorlage_detail(self, client, db):
|
||||||
|
# Get first vorlage with aktenzeichen
|
||||||
|
row = db.execute(
|
||||||
|
"SELECT id FROM vorlagen WHERE aktenzeichen IS NOT NULL LIMIT 1"
|
||||||
|
).fetchone()
|
||||||
|
assert row is not None
|
||||||
|
|
||||||
|
resp = client.get(f"/api/vorlagen/{row['id']}")
|
||||||
|
assert resp.status_code == 200
|
||||||
|
data = resp.json()
|
||||||
|
assert data["id"] == row["id"]
|
||||||
|
assert data["aktenzeichen"] is not None
|
||||||
|
|
||||||
|
def test_get_vorlage_not_found(self, client):
|
||||||
|
resp = client.get("/api/vorlagen/999999")
|
||||||
|
assert resp.status_code == 404
|
||||||
|
|
||||||
|
|
||||||
|
# --- API: Ketten ---
|
||||||
|
|
||||||
|
class TestKettenAPI:
|
||||||
|
def test_list_ketten_empty_initially(self, client):
|
||||||
|
"""Before building chains, the list may be empty."""
|
||||||
|
resp = client.get("/api/ketten?page_size=5")
|
||||||
|
assert resp.status_code == 200
|
||||||
|
|
||||||
|
def test_list_ketten_filter(self, client):
|
||||||
|
resp = client.get("/api/ketten?status=eingereicht&page_size=5")
|
||||||
|
assert resp.status_code == 200
|
||||||
|
|
||||||
|
def test_get_kette_not_found(self, client):
|
||||||
|
resp = client.get("/api/ketten/999999")
|
||||||
|
assert resp.status_code == 404
|
||||||
89
backend/tests/test_chains.py
Normal file
89
backend/tests/test_chains.py
Normal file
@ -0,0 +1,89 @@
|
|||||||
|
"""Tests for the Ketten-Builder and Status-Engine."""
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from tracker.core.chains import build_suffix_references, build_chains
|
||||||
|
from tracker.db.session import get_connection
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def db():
|
||||||
|
conn = get_connection()
|
||||||
|
yield conn
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
class TestSuffixReferences:
|
||||||
|
def test_build_suffix_references(self, db):
|
||||||
|
"""Should create referenzen for suffix-linked Vorlagen."""
|
||||||
|
count = build_suffix_references(db)
|
||||||
|
# We know there are 166 suffix Vorlagen, so we should get references
|
||||||
|
total = db.execute(
|
||||||
|
"SELECT COUNT(*) as cnt FROM referenzen WHERE typ = 'suffix'"
|
||||||
|
).fetchone()["cnt"]
|
||||||
|
assert total > 0, "No suffix references created"
|
||||||
|
|
||||||
|
def test_suffix_reference_links_correct(self, db):
|
||||||
|
"""Verify that suffix references link child -> parent correctly."""
|
||||||
|
build_suffix_references(db)
|
||||||
|
# Check a known suffix pair
|
||||||
|
row = db.execute("""
|
||||||
|
SELECT r.quelle_id, r.ziel_id,
|
||||||
|
q.aktenzeichen as q_az, z.aktenzeichen as z_az
|
||||||
|
FROM referenzen r
|
||||||
|
JOIN vorlagen q ON r.quelle_id = q.id
|
||||||
|
JOIN vorlagen z ON r.ziel_id = z.id
|
||||||
|
WHERE r.typ = 'suffix'
|
||||||
|
LIMIT 5
|
||||||
|
""").fetchall()
|
||||||
|
|
||||||
|
for r in row:
|
||||||
|
# The quelle (child) should have a suffix, the ziel (parent) should not or have lower suffix
|
||||||
|
assert r["q_az"] is not None
|
||||||
|
|
||||||
|
|
||||||
|
class TestChainBuilder:
|
||||||
|
def test_build_chains(self, db):
|
||||||
|
"""Should create ketten for Anträge and Anfragen."""
|
||||||
|
build_suffix_references(db)
|
||||||
|
count = build_chains(db)
|
||||||
|
assert count > 0, "No chains built"
|
||||||
|
|
||||||
|
# Verify chains exist in DB
|
||||||
|
total = db.execute("SELECT COUNT(*) as cnt FROM ketten").fetchone()["cnt"]
|
||||||
|
assert total > 0
|
||||||
|
|
||||||
|
def test_chain_has_glieder(self, db):
|
||||||
|
"""Each chain should have at least one Glied (the Ursprung)."""
|
||||||
|
build_suffix_references(db)
|
||||||
|
build_chains(db)
|
||||||
|
|
||||||
|
orphans = db.execute("""
|
||||||
|
SELECT k.id FROM ketten k
|
||||||
|
LEFT JOIN ketten_glieder kg ON k.id = kg.kette_id
|
||||||
|
WHERE kg.kette_id IS NULL
|
||||||
|
""").fetchall()
|
||||||
|
|
||||||
|
assert len(orphans) == 0, f"Found {len(orphans)} chains without Glieder"
|
||||||
|
|
||||||
|
def test_chain_status_is_set(self, db):
|
||||||
|
"""Every chain should have a status."""
|
||||||
|
build_suffix_references(db)
|
||||||
|
build_chains(db)
|
||||||
|
|
||||||
|
no_status = db.execute(
|
||||||
|
"SELECT COUNT(*) as cnt FROM ketten WHERE status IS NULL"
|
||||||
|
).fetchone()["cnt"]
|
||||||
|
|
||||||
|
assert no_status == 0, f"Found {no_status} chains without status"
|
||||||
|
|
||||||
|
def test_chain_types(self, db):
|
||||||
|
"""Chains should only be antrag or anfrage."""
|
||||||
|
build_suffix_references(db)
|
||||||
|
build_chains(db)
|
||||||
|
|
||||||
|
types = db.execute(
|
||||||
|
"SELECT DISTINCT typ FROM ketten"
|
||||||
|
).fetchall()
|
||||||
|
type_names = {r["typ"] for r in types}
|
||||||
|
assert type_names <= {"antrag", "anfrage"}, f"Unexpected chain types: {type_names}"
|
||||||
23
docker-compose.yml
Normal file
23
docker-compose.yml
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
version: '3.8'
|
||||||
|
|
||||||
|
services:
|
||||||
|
antragstracker:
|
||||||
|
build: .
|
||||||
|
container_name: antragstracker-hagen
|
||||||
|
restart: unless-stopped
|
||||||
|
volumes:
|
||||||
|
- ./data:/app/data
|
||||||
|
environment:
|
||||||
|
- DATABASE_PATH=/app/data/tracker.db
|
||||||
|
networks:
|
||||||
|
- collaboration_collaboration
|
||||||
|
labels:
|
||||||
|
- "traefik.enable=true"
|
||||||
|
- "traefik.http.routers.antragstracker.rule=Host(`antraege.toppyr.de`)"
|
||||||
|
- "traefik.http.routers.antragstracker.entrypoints=websecure"
|
||||||
|
- "traefik.http.routers.antragstracker.tls.certresolver=letsencrypt"
|
||||||
|
- "traefik.http.services.antragstracker.loadbalancer.server.port=8000"
|
||||||
|
|
||||||
|
networks:
|
||||||
|
collaboration_collaboration:
|
||||||
|
external: true
|
||||||
23
frontend/.gitignore
vendored
Normal file
23
frontend/.gitignore
vendored
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
node_modules
|
||||||
|
|
||||||
|
# Output
|
||||||
|
.output
|
||||||
|
.vercel
|
||||||
|
.netlify
|
||||||
|
.wrangler
|
||||||
|
/.svelte-kit
|
||||||
|
/build
|
||||||
|
|
||||||
|
# OS
|
||||||
|
.DS_Store
|
||||||
|
Thumbs.db
|
||||||
|
|
||||||
|
# Env
|
||||||
|
.env
|
||||||
|
.env.*
|
||||||
|
!.env.example
|
||||||
|
!.env.test
|
||||||
|
|
||||||
|
# Vite
|
||||||
|
vite.config.js.timestamp-*
|
||||||
|
vite.config.ts.timestamp-*
|
||||||
1
frontend/.npmrc
Normal file
1
frontend/.npmrc
Normal file
@ -0,0 +1 @@
|
|||||||
|
engine-strict=true
|
||||||
3
frontend/.vscode/extensions.json
vendored
Normal file
3
frontend/.vscode/extensions.json
vendored
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
{
|
||||||
|
"recommendations": ["svelte.svelte-vscode"]
|
||||||
|
}
|
||||||
42
frontend/README.md
Normal file
42
frontend/README.md
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
# sv
|
||||||
|
|
||||||
|
Everything you need to build a Svelte project, powered by [`sv`](https://github.com/sveltejs/cli).
|
||||||
|
|
||||||
|
## Creating a project
|
||||||
|
|
||||||
|
If you're seeing this, you've probably already done this step. Congrats!
|
||||||
|
|
||||||
|
```sh
|
||||||
|
# create a new project
|
||||||
|
npx sv create my-app
|
||||||
|
```
|
||||||
|
|
||||||
|
To recreate this project with the same configuration:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
# recreate this project
|
||||||
|
npx sv@0.13.0 create --template minimal --types ts --install npm frontend
|
||||||
|
```
|
||||||
|
|
||||||
|
## Developing
|
||||||
|
|
||||||
|
Once you've created a project and installed dependencies with `npm install` (or `pnpm install` or `yarn`), start a development server:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
npm run dev
|
||||||
|
|
||||||
|
# or start the server and open the app in a new browser tab
|
||||||
|
npm run dev -- --open
|
||||||
|
```
|
||||||
|
|
||||||
|
## Building
|
||||||
|
|
||||||
|
To create a production version of your app:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
npm run build
|
||||||
|
```
|
||||||
|
|
||||||
|
You can preview the production build with `npm run preview`.
|
||||||
|
|
||||||
|
> To deploy your app, you may need to install an [adapter](https://svelte.dev/docs/kit/adapters) for your target environment.
|
||||||
2267
frontend/package-lock.json
generated
Normal file
2267
frontend/package-lock.json
generated
Normal file
File diff suppressed because it is too large
Load Diff
30
frontend/package.json
Normal file
30
frontend/package.json
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
{
|
||||||
|
"name": "frontend",
|
||||||
|
"private": true,
|
||||||
|
"version": "0.0.1",
|
||||||
|
"type": "module",
|
||||||
|
"scripts": {
|
||||||
|
"dev": "vite dev",
|
||||||
|
"build": "vite build",
|
||||||
|
"preview": "vite preview",
|
||||||
|
"prepare": "svelte-kit sync || echo ''",
|
||||||
|
"check": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json",
|
||||||
|
"check:watch": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json --watch"
|
||||||
|
},
|
||||||
|
"devDependencies": {
|
||||||
|
"@sveltejs/adapter-auto": "^7.0.0",
|
||||||
|
"@sveltejs/adapter-static": "^3.0.10",
|
||||||
|
"@sveltejs/kit": "^2.50.2",
|
||||||
|
"@sveltejs/vite-plugin-svelte": "^6.2.4",
|
||||||
|
"@tailwindcss/vite": "^4.2.2",
|
||||||
|
"svelte": "^5.54.0",
|
||||||
|
"svelte-check": "^4.4.2",
|
||||||
|
"tailwindcss": "^4.2.2",
|
||||||
|
"typescript": "^5.9.3",
|
||||||
|
"vite": "^7.3.1"
|
||||||
|
},
|
||||||
|
"dependencies": {
|
||||||
|
"@types/leaflet": "^1.9.21",
|
||||||
|
"leaflet": "^1.9.4"
|
||||||
|
}
|
||||||
|
}
|
||||||
1
frontend/src/app.css
Normal file
1
frontend/src/app.css
Normal file
@ -0,0 +1 @@
|
|||||||
|
@import "tailwindcss";
|
||||||
13
frontend/src/app.d.ts
vendored
Normal file
13
frontend/src/app.d.ts
vendored
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
// See https://svelte.dev/docs/kit/types#app.d.ts
|
||||||
|
// for information about these interfaces
|
||||||
|
declare global {
|
||||||
|
namespace App {
|
||||||
|
// interface Error {}
|
||||||
|
// interface Locals {}
|
||||||
|
// interface PageData {}
|
||||||
|
// interface PageState {}
|
||||||
|
// interface Platform {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export {};
|
||||||
11
frontend/src/app.html
Normal file
11
frontend/src/app.html
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
<!doctype html>
|
||||||
|
<html lang="de">
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8" />
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
||||||
|
%sveltekit.head%
|
||||||
|
</head>
|
||||||
|
<body data-sveltekit-preload-data="hover">
|
||||||
|
<div style="display: contents">%sveltekit.body%</div>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
169
frontend/src/lib/api.ts
Normal file
169
frontend/src/lib/api.ts
Normal file
@ -0,0 +1,169 @@
|
|||||||
|
// API-Base: In Produktion relativ, in Dev mit Port
|
||||||
|
const BASE = typeof window !== 'undefined'
|
||||||
|
? (window.location.port === '5173'
|
||||||
|
? `http://${window.location.hostname}:8099/api` // Dev
|
||||||
|
: '/api') // Produktion
|
||||||
|
: '/api';
|
||||||
|
|
||||||
|
async function get<T>(path: string): Promise<T> {
|
||||||
|
const res = await fetch(`${BASE}${path}`);
|
||||||
|
if (!res.ok) throw new Error(`API error: ${res.status}`);
|
||||||
|
return res.json();
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface VorlageKurz {
|
||||||
|
id: number;
|
||||||
|
aktenzeichen: string | null;
|
||||||
|
typ: string | null;
|
||||||
|
betreff: string | null;
|
||||||
|
datum_eingang: string | null;
|
||||||
|
ist_verwaltungsvorlage: boolean;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface ParteiOut {
|
||||||
|
id: number;
|
||||||
|
kuerzel: string;
|
||||||
|
name: string | null;
|
||||||
|
farbe: string | null;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface GremiumOut {
|
||||||
|
id: number;
|
||||||
|
name: string;
|
||||||
|
kuerzel: string | null;
|
||||||
|
typ: string | null;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface BeratungOut {
|
||||||
|
id: number;
|
||||||
|
gremium: GremiumOut | null;
|
||||||
|
sitzung_datum: string | null;
|
||||||
|
rolle: string | null;
|
||||||
|
ergebnis: string | null;
|
||||||
|
ergebnis_text: string | null;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface ReferenzOut {
|
||||||
|
vorlage_id: number;
|
||||||
|
aktenzeichen: string | null;
|
||||||
|
betreff: string | null;
|
||||||
|
vorlage_typ: string | null;
|
||||||
|
datum_eingang: string | null;
|
||||||
|
ref_typ: string | null;
|
||||||
|
konfidenz: number | null;
|
||||||
|
kontext: string | null;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface VorlageDetail extends VorlageKurz {
|
||||||
|
aktenzeichen_basis: string | null;
|
||||||
|
aktenzeichen_suffix: string | null;
|
||||||
|
volltext_clean: string | null;
|
||||||
|
pdf_url: string | null;
|
||||||
|
web_url: string | null;
|
||||||
|
thema_kurz: string | null;
|
||||||
|
antragsteller: ParteiOut[];
|
||||||
|
beratungen: BeratungOut[];
|
||||||
|
referenzen_ausgehend: ReferenzOut[];
|
||||||
|
referenzen_eingehend: ReferenzOut[];
|
||||||
|
kette_id: number | null;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface KetteKurz {
|
||||||
|
id: number;
|
||||||
|
ursprung: VorlageKurz | null;
|
||||||
|
typ: string | null;
|
||||||
|
thema: string | null;
|
||||||
|
status: string | null;
|
||||||
|
status_seit: string | null;
|
||||||
|
letzte_aktivitaet: string | null;
|
||||||
|
vertagungen_count: number;
|
||||||
|
glieder_count: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface KettenGliedOut {
|
||||||
|
vorlage: VorlageKurz;
|
||||||
|
position: number;
|
||||||
|
rolle: string | null;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface KetteDetail {
|
||||||
|
id: number;
|
||||||
|
ursprung: VorlageKurz | null;
|
||||||
|
typ: string | null;
|
||||||
|
thema: string | null;
|
||||||
|
status: string | null;
|
||||||
|
status_seit: string | null;
|
||||||
|
letzte_aktivitaet: string | null;
|
||||||
|
vertagungen_count: number;
|
||||||
|
glieder: KettenGliedOut[];
|
||||||
|
antragsteller: ParteiOut[];
|
||||||
|
graph: {
|
||||||
|
nodes: GraphNode[];
|
||||||
|
edges: GraphEdge[];
|
||||||
|
} | null;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface GraphNode {
|
||||||
|
id: number;
|
||||||
|
aktenzeichen: string | null;
|
||||||
|
typ: string | null;
|
||||||
|
betreff: string | null;
|
||||||
|
datum_eingang: string | null;
|
||||||
|
position?: number;
|
||||||
|
rolle?: string;
|
||||||
|
ist_verwaltungsvorlage?: boolean;
|
||||||
|
extern?: boolean;
|
||||||
|
beratungen?: { sitzung_datum: string; rolle: string; ergebnis: string; gremium_name: string }[];
|
||||||
|
antragsteller?: { kuerzel: string; name: string; farbe: string }[];
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface GraphEdge {
|
||||||
|
quelle_id: number;
|
||||||
|
ziel_id: number;
|
||||||
|
typ: string;
|
||||||
|
konfidenz: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface Paginated<T> {
|
||||||
|
items: T[];
|
||||||
|
total: number;
|
||||||
|
page: number;
|
||||||
|
page_size: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface Stats {
|
||||||
|
vorlagen_total: number;
|
||||||
|
beratungen_total: number;
|
||||||
|
ketten_total: number;
|
||||||
|
vorlagen_nach_typ: { typ: string; anzahl: number }[];
|
||||||
|
ketten_nach_status: { status: string; anzahl: number }[];
|
||||||
|
ketten_nach_typ: { typ: string; anzahl: number }[];
|
||||||
|
letzte_vorlagen: VorlageKurz[];
|
||||||
|
parteien: { kuerzel: string; name: string; farbe: string | null; anzahl: number }[];
|
||||||
|
gremien: { name: string; kuerzel: string | null; typ: string | null; anzahl: number }[];
|
||||||
|
timeline: { monat: string; anzahl: number }[];
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface KettenStats {
|
||||||
|
nach_typ: Record<string, { status: string; anzahl: number }[]>;
|
||||||
|
status_detail: { status: string; anzahl: number; avg_tage: number; avg_vertagungen: number }[];
|
||||||
|
versandungs_fruehwarnung: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
// API functions
|
||||||
|
export const fetchStats = () => get<Stats>('/stats');
|
||||||
|
export const fetchKettenStats = () => get<KettenStats>('/stats/ketten-stats');
|
||||||
|
|
||||||
|
export const fetchVorlagen = (params: Record<string, string>) => {
|
||||||
|
const qs = new URLSearchParams(params).toString();
|
||||||
|
return get<Paginated<VorlageKurz>>(`/vorlagen?${qs}`);
|
||||||
|
};
|
||||||
|
|
||||||
|
export const fetchVorlage = (id: number) => get<VorlageDetail>(`/vorlagen/${id}`);
|
||||||
|
|
||||||
|
export const fetchKetten = (params: Record<string, string>) => {
|
||||||
|
const qs = new URLSearchParams(params).toString();
|
||||||
|
return get<Paginated<KetteKurz>>(`/ketten?${qs}`);
|
||||||
|
};
|
||||||
|
|
||||||
|
export const fetchKette = (id: number) => get<KetteDetail>(`/ketten/${id}`);
|
||||||
1
frontend/src/lib/assets/favicon.svg
Normal file
1
frontend/src/lib/assets/favicon.svg
Normal file
@ -0,0 +1 @@
|
|||||||
|
<svg xmlns="http://www.w3.org/2000/svg" width="107" height="128" viewBox="0 0 107 128"><title>svelte-logo</title><path d="M94.157 22.819c-10.4-14.885-30.94-19.297-45.792-9.835L22.282 29.608A29.92 29.92 0 0 0 8.764 49.65a31.5 31.5 0 0 0 3.108 20.231 30 30 0 0 0-4.477 11.183 31.9 31.9 0 0 0 5.448 24.116c10.402 14.887 30.942 19.297 45.791 9.835l26.083-16.624A29.92 29.92 0 0 0 98.235 78.35a31.53 31.53 0 0 0-3.105-20.232 30 30 0 0 0 4.474-11.182 31.88 31.88 0 0 0-5.447-24.116" style="fill:#ff3e00"/><path d="M45.817 106.582a20.72 20.72 0 0 1-22.237-8.243 19.17 19.17 0 0 1-3.277-14.503 18 18 0 0 1 .624-2.435l.49-1.498 1.337.981a33.6 33.6 0 0 0 10.203 5.098l.97.294-.09.968a5.85 5.85 0 0 0 1.052 3.878 6.24 6.24 0 0 0 6.695 2.485 5.8 5.8 0 0 0 1.603-.704L69.27 76.28a5.43 5.43 0 0 0 2.45-3.631 5.8 5.8 0 0 0-.987-4.371 6.24 6.24 0 0 0-6.698-2.487 5.7 5.7 0 0 0-1.6.704l-9.953 6.345a19 19 0 0 1-5.296 2.326 20.72 20.72 0 0 1-22.237-8.243 19.17 19.17 0 0 1-3.277-14.502 17.99 17.99 0 0 1 8.13-12.052l26.081-16.623a19 19 0 0 1 5.3-2.329 20.72 20.72 0 0 1 22.237 8.243 19.17 19.17 0 0 1 3.277 14.503 18 18 0 0 1-.624 2.435l-.49 1.498-1.337-.98a33.6 33.6 0 0 0-10.203-5.1l-.97-.294.09-.968a5.86 5.86 0 0 0-1.052-3.878 6.24 6.24 0 0 0-6.696-2.485 5.8 5.8 0 0 0-1.602.704L37.73 51.72a5.42 5.42 0 0 0-2.449 3.63 5.79 5.79 0 0 0 .986 4.372 6.24 6.24 0 0 0 6.698 2.486 5.8 5.8 0 0 0 1.602-.704l9.952-6.342a19 19 0 0 1 5.295-2.328 20.72 20.72 0 0 1 22.237 8.242 19.17 19.17 0 0 1 3.277 14.503 18 18 0 0 1-8.13 12.053l-26.081 16.622a19 19 0 0 1-5.3 2.328" style="fill:#fff"/></svg>
|
||||||
|
After Width: | Height: | Size: 1.5 KiB |
11
frontend/src/lib/components/KpiCard.svelte
Normal file
11
frontend/src/lib/components/KpiCard.svelte
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
<script lang="ts">
|
||||||
|
let { title, value, subtitle = '' }: { title: string; value: string | number; subtitle?: string } = $props();
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<div class="bg-white rounded-xl shadow-sm border border-gray-200 p-6">
|
||||||
|
<dt class="text-sm font-medium text-gray-500 truncate">{title}</dt>
|
||||||
|
<dd class="mt-1 text-3xl font-bold text-gray-900">{value.toLocaleString('de-DE')}</dd>
|
||||||
|
{#if subtitle}
|
||||||
|
<dd class="mt-1 text-sm text-gray-500">{subtitle}</dd>
|
||||||
|
{/if}
|
||||||
|
</div>
|
||||||
55
frontend/src/lib/components/Perlenschnur.svelte
Normal file
55
frontend/src/lib/components/Perlenschnur.svelte
Normal file
@ -0,0 +1,55 @@
|
|||||||
|
<script lang="ts">
|
||||||
|
import type { KettenGliedOut } from '$lib/api';
|
||||||
|
import { statusInfo, typLabel, formatDate } from '$lib/status';
|
||||||
|
|
||||||
|
let { glieder, status }: { glieder: KettenGliedOut[]; status: string | null } = $props();
|
||||||
|
const info = $derived(statusInfo(status));
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<div class="relative">
|
||||||
|
<!-- Timeline line -->
|
||||||
|
<div class="absolute left-6 top-0 bottom-0 w-0.5 bg-gray-200"></div>
|
||||||
|
|
||||||
|
<div class="space-y-6">
|
||||||
|
{#each glieder as glied, i}
|
||||||
|
{@const isFirst = i === 0}
|
||||||
|
{@const isLast = i === glieder.length - 1}
|
||||||
|
<div class="relative flex items-start group">
|
||||||
|
<!-- Timeline dot -->
|
||||||
|
<div class="flex-shrink-0 relative z-10">
|
||||||
|
<div class="w-12 h-12 rounded-full flex items-center justify-center text-lg
|
||||||
|
{isFirst ? 'bg-green-100 ring-2 ring-green-500' : isLast ? 'bg-blue-100 ring-2 ring-blue-500' : 'bg-white ring-2 ring-gray-300'}">
|
||||||
|
{#if isFirst}
|
||||||
|
{info.emoji}
|
||||||
|
{:else if glied.rolle === 'stellungnahme'}
|
||||||
|
📄
|
||||||
|
{:else if glied.rolle === 'bericht'}
|
||||||
|
📊
|
||||||
|
{:else}
|
||||||
|
📎
|
||||||
|
{/if}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Content card -->
|
||||||
|
<a href="/vorlagen/{glied.vorlage.id}" class="ml-4 flex-1 bg-white rounded-lg border border-gray-200 p-4 hover:shadow-md transition-shadow">
|
||||||
|
<div class="flex items-center justify-between">
|
||||||
|
<div class="flex items-center space-x-2">
|
||||||
|
{#if glied.vorlage.aktenzeichen}
|
||||||
|
<span class="font-mono text-sm font-medium text-gray-900">{glied.vorlage.aktenzeichen}</span>
|
||||||
|
{/if}
|
||||||
|
<span class="text-xs px-2 py-0.5 rounded bg-gray-100 text-gray-600">{typLabel(glied.vorlage.typ)}</span>
|
||||||
|
{#if glied.rolle}
|
||||||
|
<span class="text-xs text-gray-400">{glied.rolle}</span>
|
||||||
|
{/if}
|
||||||
|
</div>
|
||||||
|
<span class="text-xs text-gray-500">{formatDate(glied.vorlage.datum_eingang)}</span>
|
||||||
|
</div>
|
||||||
|
{#if glied.vorlage.betreff}
|
||||||
|
<p class="mt-1 text-sm text-gray-700 line-clamp-2">{glied.vorlage.betreff}</p>
|
||||||
|
{/if}
|
||||||
|
</a>
|
||||||
|
</div>
|
||||||
|
{/each}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
16
frontend/src/lib/components/StatusBadge.svelte
Normal file
16
frontend/src/lib/components/StatusBadge.svelte
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
<script lang="ts">
|
||||||
|
import { statusInfo } from '$lib/status';
|
||||||
|
|
||||||
|
let { status, linked = false }: { status: string | null; linked?: boolean } = $props();
|
||||||
|
const info = $derived(statusInfo(status));
|
||||||
|
</script>
|
||||||
|
|
||||||
|
{#if linked && status}
|
||||||
|
<a href="/ketten?status={status}" class="inline-flex items-center px-2.5 py-0.5 rounded-full text-xs font-medium {info.color} hover:opacity-80 transition-opacity">
|
||||||
|
<span class="mr-1">{info.emoji}</span> {info.label}
|
||||||
|
</a>
|
||||||
|
{:else}
|
||||||
|
<span class="inline-flex items-center px-2.5 py-0.5 rounded-full text-xs font-medium {info.color}">
|
||||||
|
<span class="mr-1">{info.emoji}</span> {info.label}
|
||||||
|
</span>
|
||||||
|
{/if}
|
||||||
1
frontend/src/lib/index.ts
Normal file
1
frontend/src/lib/index.ts
Normal file
@ -0,0 +1 @@
|
|||||||
|
// place files you want to import through the `$lib` alias in this folder.
|
||||||
43
frontend/src/lib/status.ts
Normal file
43
frontend/src/lib/status.ts
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
export const STATUS_CONFIG: Record<string, { emoji: string; label: string; color: string }> = {
|
||||||
|
angefragt: { emoji: '\ud83d\udcdd', label: 'Angefragt', color: 'bg-blue-100 text-blue-800' },
|
||||||
|
beantwortet: { emoji: '\u2705', label: 'Beantwortet', color: 'bg-green-100 text-green-800' },
|
||||||
|
offen: { emoji: '\u23f3', label: 'Offen', color: 'bg-yellow-100 text-yellow-800' },
|
||||||
|
abgewiegelt: { emoji: '\u26a0\ufe0f', label: 'Abgewiegelt', color: 'bg-orange-100 text-orange-800' },
|
||||||
|
versandet: { emoji: '\ud83d\udc80', label: 'Versandet', color: 'bg-red-100 text-red-800' },
|
||||||
|
zur\u00fcckgezogen: { emoji: '\ud83d\udd19', label: 'Zur\u00fcckgezogen', color: 'bg-gray-100 text-gray-800' },
|
||||||
|
eingereicht: { emoji: '\ud83d\udcdd', label: 'Eingereicht', color: 'bg-blue-100 text-blue-800' },
|
||||||
|
in_beratung: { emoji: '\ud83d\udd04', label: 'In Beratung', color: 'bg-indigo-100 text-indigo-800' },
|
||||||
|
vertagt: { emoji: '\u23f8\ufe0f', label: 'Vertagt', color: 'bg-amber-100 text-amber-800' },
|
||||||
|
verwiesen: { emoji: '\u21aa\ufe0f', label: 'Verwiesen', color: 'bg-purple-100 text-purple-800' },
|
||||||
|
beschlossen: { emoji: '\ud83d\udccb', label: 'Beschlossen', color: 'bg-teal-100 text-teal-800' },
|
||||||
|
umgesetzt: { emoji: '\u2705', label: 'Umgesetzt', color: 'bg-green-100 text-green-800' },
|
||||||
|
teilweise_umgesetzt: { emoji: '\ud83d\udd36', label: 'Teilw. umgesetzt', color: 'bg-lime-100 text-lime-800' },
|
||||||
|
abgelehnt: { emoji: '\u274c', label: 'Abgelehnt', color: 'bg-red-100 text-red-800' },
|
||||||
|
still_uebernommen: { emoji: '\ud83d\udd04\u2728', label: 'Still \u00fcbernommen', color: 'bg-pink-100 text-pink-800' },
|
||||||
|
};
|
||||||
|
|
||||||
|
export function statusInfo(status: string | null) {
|
||||||
|
if (!status) return { emoji: '\u2753', label: 'Unbekannt', color: 'bg-gray-100 text-gray-600' };
|
||||||
|
return STATUS_CONFIG[status] ?? { emoji: '\u2753', label: status, color: 'bg-gray-100 text-gray-600' };
|
||||||
|
}
|
||||||
|
|
||||||
|
export const TYP_LABELS: Record<string, string> = {
|
||||||
|
antrag: 'Antrag',
|
||||||
|
anfrage: 'Anfrage',
|
||||||
|
stellungnahme: 'Stellungnahme',
|
||||||
|
bericht: 'Bericht',
|
||||||
|
};
|
||||||
|
|
||||||
|
export function typLabel(typ: string | null): string {
|
||||||
|
if (!typ) return 'Unbekannt';
|
||||||
|
return TYP_LABELS[typ] ?? typ;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function formatDate(d: string | null): string {
|
||||||
|
if (!d) return '\u2013';
|
||||||
|
try {
|
||||||
|
return new Date(d).toLocaleDateString('de-DE', { day: '2-digit', month: '2-digit', year: 'numeric' });
|
||||||
|
} catch {
|
||||||
|
return d;
|
||||||
|
}
|
||||||
|
}
|
||||||
31
frontend/src/routes/+layout.svelte
Normal file
31
frontend/src/routes/+layout.svelte
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
<script lang="ts">
|
||||||
|
import '../app.css';
|
||||||
|
let { children } = $props();
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<div class="min-h-screen bg-gray-50">
|
||||||
|
<!-- Navigation -->
|
||||||
|
<nav class="bg-white border-b border-gray-200 shadow-sm">
|
||||||
|
<div class="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8">
|
||||||
|
<div class="flex justify-between h-16">
|
||||||
|
<div class="flex items-center space-x-8">
|
||||||
|
<a href="/" class="text-xl font-bold text-gray-900">
|
||||||
|
Antragstracker <span class="text-green-600">Hagen</span>
|
||||||
|
</a>
|
||||||
|
<div class="hidden sm:flex space-x-4">
|
||||||
|
<a href="/" class="text-gray-600 hover:text-gray-900 px-3 py-2 rounded-md text-sm font-medium">Dashboard</a>
|
||||||
|
<a href="/ketten" class="text-gray-600 hover:text-gray-900 px-3 py-2 rounded-md text-sm font-medium">Ketten</a>
|
||||||
|
<a href="/vorlagen" class="text-gray-600 hover:text-gray-900 px-3 py-2 rounded-md text-sm font-medium">Vorlagen</a>
|
||||||
|
<a href="/abstimmungen" class="text-gray-600 hover:text-gray-900 px-3 py-2 rounded-md text-sm font-medium">Abstimmungen</a>
|
||||||
|
<a href="/karte" class="text-gray-600 hover:text-gray-900 px-3 py-2 rounded-md text-sm font-medium">Karte</a>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</nav>
|
||||||
|
|
||||||
|
<!-- Content -->
|
||||||
|
<main class="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8 py-8">
|
||||||
|
{@render children()}
|
||||||
|
</main>
|
||||||
|
</div>
|
||||||
3
frontend/src/routes/+layout.ts
Normal file
3
frontend/src/routes/+layout.ts
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
// Enable client-side rendering for static adapter
|
||||||
|
export const prerender = false;
|
||||||
|
export const ssr = false;
|
||||||
145
frontend/src/routes/+page.svelte
Normal file
145
frontend/src/routes/+page.svelte
Normal file
@ -0,0 +1,145 @@
|
|||||||
|
<script lang="ts">
|
||||||
|
import '../app.css';
|
||||||
|
|
||||||
|
interface Vorlage {
|
||||||
|
id: number;
|
||||||
|
aktenzeichen: string;
|
||||||
|
typ: string;
|
||||||
|
betreff: string;
|
||||||
|
datum_eingang: string;
|
||||||
|
ist_verwaltungsvorlage: boolean;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface Stats {
|
||||||
|
vorlagen: number;
|
||||||
|
beratungen: number;
|
||||||
|
ketten: number;
|
||||||
|
gremien: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
let stats = $state<Stats>({ vorlagen: 0, beratungen: 0, ketten: 0, gremien: 0 });
|
||||||
|
let antraege = $state<Vorlage[]>([]);
|
||||||
|
let loading = $state(true);
|
||||||
|
let error = $state('');
|
||||||
|
|
||||||
|
// API-Base: In Produktion relativ, in Dev mit Port
|
||||||
|
const API_BASE = typeof window !== 'undefined'
|
||||||
|
? (window.location.port === '5173'
|
||||||
|
? `http://${window.location.hostname}:8099/api`
|
||||||
|
: '/api')
|
||||||
|
: '/api';
|
||||||
|
|
||||||
|
async function loadData() {
|
||||||
|
console.log('API_BASE:', API_BASE);
|
||||||
|
try {
|
||||||
|
// Stats laden
|
||||||
|
console.log('Fetching health...');
|
||||||
|
const statsRes = await fetch(`${API_BASE}/health`);
|
||||||
|
console.log('Health response:', statsRes.status);
|
||||||
|
if (statsRes.ok) {
|
||||||
|
// Vorlagen zählen
|
||||||
|
console.log('Fetching vorlagen...');
|
||||||
|
const vorlagenRes = await fetch(`${API_BASE}/vorlagen?page_size=1`);
|
||||||
|
console.log('Vorlagen response:', vorlagenRes.status);
|
||||||
|
const vorlagenData = await vorlagenRes.json();
|
||||||
|
stats.vorlagen = vorlagenData.total;
|
||||||
|
|
||||||
|
// Ketten zählen
|
||||||
|
const kettenRes = await fetch(`${API_BASE}/ketten?page_size=1`);
|
||||||
|
const kettenData = await kettenRes.json();
|
||||||
|
stats.ketten = kettenData.total;
|
||||||
|
} else {
|
||||||
|
error = `Health check failed: ${statsRes.status}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Letzte 10 Anträge laden
|
||||||
|
const antraegeRes = await fetch(`${API_BASE}/vorlagen?typ=antrag&page_size=10`);
|
||||||
|
console.log('Antraege response:', antraegeRes.status);
|
||||||
|
if (antraegeRes.ok) {
|
||||||
|
const data = await antraegeRes.json();
|
||||||
|
console.log('Antraege data:', data.items.length);
|
||||||
|
antraege = data.items;
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
console.error('API Fehler:', e);
|
||||||
|
error = `Fehler: ${e}`;
|
||||||
|
} finally {
|
||||||
|
loading = false;
|
||||||
|
console.log('Loading done, antraege:', antraege.length);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
loadData();
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<svelte:head>
|
||||||
|
<title>Antragstracker Hagen</title>
|
||||||
|
</svelte:head>
|
||||||
|
|
||||||
|
<main class="min-h-screen bg-gray-50">
|
||||||
|
<!-- Header -->
|
||||||
|
<header class="bg-green-700 text-white py-6 shadow-lg">
|
||||||
|
<div class="max-w-6xl mx-auto px-4">
|
||||||
|
<h1 class="text-3xl font-bold">🏛️ Antragstracker Hagen</h1>
|
||||||
|
<p class="text-green-100 mt-1">Kommunale Anträge & Anfragen nachverfolgen</p>
|
||||||
|
</div>
|
||||||
|
</header>
|
||||||
|
|
||||||
|
<div class="max-w-6xl mx-auto px-4 py-8">
|
||||||
|
<!-- Stats Cards -->
|
||||||
|
<div class="grid grid-cols-2 md:grid-cols-4 gap-4 mb-8">
|
||||||
|
<div class="bg-white rounded-lg shadow p-4 text-center">
|
||||||
|
<div class="text-3xl font-bold text-green-600">{stats.vorlagen.toLocaleString()}</div>
|
||||||
|
<div class="text-gray-500 text-sm">Vorlagen</div>
|
||||||
|
</div>
|
||||||
|
<div class="bg-white rounded-lg shadow p-4 text-center">
|
||||||
|
<div class="text-3xl font-bold text-blue-600">{stats.ketten.toLocaleString()}</div>
|
||||||
|
<div class="text-gray-500 text-sm">Ketten</div>
|
||||||
|
</div>
|
||||||
|
<div class="bg-white rounded-lg shadow p-4 text-center">
|
||||||
|
<div class="text-3xl font-bold text-purple-600">41</div>
|
||||||
|
<div class="text-gray-500 text-sm">Gremien</div>
|
||||||
|
</div>
|
||||||
|
<div class="bg-white rounded-lg shadow p-4 text-center">
|
||||||
|
<div class="text-3xl font-bold text-orange-600">2004–2026</div>
|
||||||
|
<div class="text-gray-500 text-sm">Zeitraum</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Aktuelle Anträge -->
|
||||||
|
<section class="bg-white rounded-lg shadow">
|
||||||
|
<div class="px-6 py-4 border-b border-gray-200">
|
||||||
|
<h2 class="text-xl font-semibold text-gray-800">📋 Aktuelle Anträge</h2>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{#if error}
|
||||||
|
<div class="p-6 text-center text-red-500">{error}</div>
|
||||||
|
{:else if loading}
|
||||||
|
<div class="p-6 text-center text-gray-500">Lade Daten... (API: {API_BASE})</div>
|
||||||
|
{:else if antraege.length === 0}
|
||||||
|
<div class="p-6 text-center text-gray-500">Keine Anträge gefunden</div>
|
||||||
|
{:else}
|
||||||
|
<ul class="divide-y divide-gray-100">
|
||||||
|
{#each antraege as antrag}
|
||||||
|
<li class="px-6 py-4 hover:bg-gray-50 cursor-pointer">
|
||||||
|
<div class="flex justify-between items-start">
|
||||||
|
<div class="flex-1">
|
||||||
|
<div class="flex items-center gap-2">
|
||||||
|
<span class="font-mono text-sm text-green-700 bg-green-50 px-2 py-0.5 rounded">
|
||||||
|
{antrag.aktenzeichen}
|
||||||
|
</span>
|
||||||
|
<span class="text-xs text-gray-400">{antrag.datum_eingang}</span>
|
||||||
|
</div>
|
||||||
|
<p class="mt-1 text-gray-700 line-clamp-2">{antrag.betreff}</p>
|
||||||
|
</div>
|
||||||
|
<span class="text-xs px-2 py-1 rounded-full bg-yellow-100 text-yellow-800">
|
||||||
|
⏳ offen
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
</li>
|
||||||
|
{/each}
|
||||||
|
</ul>
|
||||||
|
{/if}
|
||||||
|
</section>
|
||||||
|
</div>
|
||||||
|
</main>
|
||||||
185
frontend/src/routes/abstimmungen/+page.svelte
Normal file
185
frontend/src/routes/abstimmungen/+page.svelte
Normal file
@ -0,0 +1,185 @@
|
|||||||
|
<script lang="ts">
|
||||||
|
import { onMount } from 'svelte';
|
||||||
|
|
||||||
|
interface FraktionStats {
|
||||||
|
fraktion: string;
|
||||||
|
ja: number;
|
||||||
|
nein: number;
|
||||||
|
enthaltung: number;
|
||||||
|
gesamt: number;
|
||||||
|
ja_quote: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface Uebereinstimmung {
|
||||||
|
quote: number;
|
||||||
|
gleich: number;
|
||||||
|
gesamt: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface KoalitionsRow {
|
||||||
|
fraktion: string;
|
||||||
|
uebereinstimmung: Record<string, Uebereinstimmung>;
|
||||||
|
}
|
||||||
|
|
||||||
|
let fraktionen = $state<FraktionStats[]>([]);
|
||||||
|
let koalitionsmatrix = $state<KoalitionsRow[]>([]);
|
||||||
|
let loading = $state(true);
|
||||||
|
let error = $state('');
|
||||||
|
|
||||||
|
const API_BASE = typeof window !== 'undefined'
|
||||||
|
? (window.location.port === '5173'
|
||||||
|
? `http://${window.location.hostname}:8099/api`
|
||||||
|
: '/api')
|
||||||
|
: '/api';
|
||||||
|
|
||||||
|
onMount(async () => {
|
||||||
|
try {
|
||||||
|
const [frakRes, koalRes] = await Promise.all([
|
||||||
|
fetch(`${API_BASE}/abstimmungen/fraktionen`),
|
||||||
|
fetch(`${API_BASE}/abstimmungen/koalitionsmatrix`)
|
||||||
|
]);
|
||||||
|
|
||||||
|
if (frakRes.ok) fraktionen = await frakRes.json();
|
||||||
|
if (koalRes.ok) koalitionsmatrix = await koalRes.json();
|
||||||
|
} catch (e) {
|
||||||
|
error = `Fehler: ${e}`;
|
||||||
|
} finally {
|
||||||
|
loading = false;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
function getColor(quote: number): string {
|
||||||
|
if (quote >= 90) return 'bg-green-500';
|
||||||
|
if (quote >= 70) return 'bg-green-400';
|
||||||
|
if (quote >= 50) return 'bg-yellow-400';
|
||||||
|
if (quote >= 30) return 'bg-orange-400';
|
||||||
|
return 'bg-red-400';
|
||||||
|
}
|
||||||
|
|
||||||
|
function getTextColor(quote: number): string {
|
||||||
|
return quote >= 50 ? 'text-white' : 'text-gray-900';
|
||||||
|
}
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<svelte:head>
|
||||||
|
<title>Abstimmungen - Antragstracker Hagen</title>
|
||||||
|
</svelte:head>
|
||||||
|
|
||||||
|
<div class="mb-6">
|
||||||
|
<h1 class="text-2xl font-bold text-gray-900">Abstimmungsverhalten</h1>
|
||||||
|
<p class="text-gray-500 text-sm mt-1">Analyse des Stimmverhaltens der Ratsfraktionen</p>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{#if error}
|
||||||
|
<div class="bg-red-50 text-red-700 p-4 rounded-lg mb-6">{error}</div>
|
||||||
|
{/if}
|
||||||
|
|
||||||
|
{#if loading}
|
||||||
|
<div class="flex justify-center py-20">
|
||||||
|
<div class="animate-spin rounded-full h-12 w-12 border-b-2 border-green-600"></div>
|
||||||
|
</div>
|
||||||
|
{:else}
|
||||||
|
<!-- Fraktionen Übersicht -->
|
||||||
|
<section class="bg-white rounded-xl shadow-sm border border-gray-200 p-6 mb-8">
|
||||||
|
<h2 class="text-lg font-semibold text-gray-900 mb-4">📊 Stimmverhalten nach Fraktion</h2>
|
||||||
|
|
||||||
|
<div class="overflow-x-auto">
|
||||||
|
<table class="w-full">
|
||||||
|
<thead class="bg-gray-50">
|
||||||
|
<tr>
|
||||||
|
<th class="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Fraktion</th>
|
||||||
|
<th class="px-4 py-3 text-center text-xs font-medium text-gray-500 uppercase">Ja</th>
|
||||||
|
<th class="px-4 py-3 text-center text-xs font-medium text-gray-500 uppercase">Nein</th>
|
||||||
|
<th class="px-4 py-3 text-center text-xs font-medium text-gray-500 uppercase">Enthaltung</th>
|
||||||
|
<th class="px-4 py-3 text-center text-xs font-medium text-gray-500 uppercase">Gesamt</th>
|
||||||
|
<th class="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Zustimmungsquote</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody class="divide-y divide-gray-100">
|
||||||
|
{#each fraktionen as f}
|
||||||
|
<tr class="hover:bg-gray-50">
|
||||||
|
<td class="px-4 py-3 font-medium text-gray-900">{f.fraktion}</td>
|
||||||
|
<td class="px-4 py-3 text-center text-green-600 font-medium">{f.ja}</td>
|
||||||
|
<td class="px-4 py-3 text-center text-red-600 font-medium">{f.nein}</td>
|
||||||
|
<td class="px-4 py-3 text-center text-yellow-600 font-medium">{f.enthaltung}</td>
|
||||||
|
<td class="px-4 py-3 text-center text-gray-600">{f.gesamt}</td>
|
||||||
|
<td class="px-4 py-3">
|
||||||
|
<div class="flex items-center gap-2">
|
||||||
|
<div class="flex-1 bg-gray-200 rounded-full h-2 max-w-24">
|
||||||
|
<div class="bg-green-500 h-2 rounded-full" style="width: {f.ja_quote}%"></div>
|
||||||
|
</div>
|
||||||
|
<span class="text-sm text-gray-600">{f.ja_quote}%</span>
|
||||||
|
</div>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
{/each}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<!-- Koalitionsmatrix -->
|
||||||
|
<section class="bg-white rounded-xl shadow-sm border border-gray-200 p-6">
|
||||||
|
<h2 class="text-lg font-semibold text-gray-900 mb-4">🤝 Koalitionsmatrix</h2>
|
||||||
|
<p class="text-sm text-gray-500 mb-4">Wie oft stimmen Fraktionen gleich ab? (nur Ja/Nein-Stimmen)</p>
|
||||||
|
|
||||||
|
{#if koalitionsmatrix.length > 0}
|
||||||
|
{@const allFraktionen = koalitionsmatrix.map(r => r.fraktion).sort()}
|
||||||
|
<div class="overflow-x-auto">
|
||||||
|
<table class="text-xs">
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th class="p-2 text-left"></th>
|
||||||
|
{#each allFraktionen as f}
|
||||||
|
<th class="p-2 text-center writing-mode-vertical" style="writing-mode: vertical-lr; transform: rotate(180deg); height: 100px;">
|
||||||
|
{f}
|
||||||
|
</th>
|
||||||
|
{/each}
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
{#each koalitionsmatrix as row}
|
||||||
|
<tr>
|
||||||
|
<td class="p-2 font-medium text-right pr-3 whitespace-nowrap">{row.fraktion}</td>
|
||||||
|
{#each allFraktionen as f2}
|
||||||
|
{#if row.fraktion === f2}
|
||||||
|
<td class="p-1">
|
||||||
|
<div class="w-10 h-10 bg-gray-300 rounded flex items-center justify-center text-gray-500">
|
||||||
|
—
|
||||||
|
</div>
|
||||||
|
</td>
|
||||||
|
{:else if row.uebereinstimmung[f2]}
|
||||||
|
{@const data = row.uebereinstimmung[f2]}
|
||||||
|
<td class="p-1">
|
||||||
|
<div class="w-10 h-10 {getColor(data.quote)} {getTextColor(data.quote)} rounded flex items-center justify-center font-medium"
|
||||||
|
title="{row.fraktion} & {f2}: {data.gleich}/{data.gesamt} ({data.quote}%)">
|
||||||
|
{Math.round(data.quote)}
|
||||||
|
</div>
|
||||||
|
</td>
|
||||||
|
{:else}
|
||||||
|
<td class="p-1">
|
||||||
|
<div class="w-10 h-10 bg-gray-100 rounded flex items-center justify-center text-gray-400">
|
||||||
|
-
|
||||||
|
</div>
|
||||||
|
</td>
|
||||||
|
{/if}
|
||||||
|
{/each}
|
||||||
|
</tr>
|
||||||
|
{/each}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="mt-4 flex items-center gap-4 text-xs text-gray-500">
|
||||||
|
<span>Legende:</span>
|
||||||
|
<span class="flex items-center gap-1"><span class="w-4 h-4 bg-green-500 rounded"></span> 90-100%</span>
|
||||||
|
<span class="flex items-center gap-1"><span class="w-4 h-4 bg-green-400 rounded"></span> 70-90%</span>
|
||||||
|
<span class="flex items-center gap-1"><span class="w-4 h-4 bg-yellow-400 rounded"></span> 50-70%</span>
|
||||||
|
<span class="flex items-center gap-1"><span class="w-4 h-4 bg-orange-400 rounded"></span> 30-50%</span>
|
||||||
|
<span class="flex items-center gap-1"><span class="w-4 h-4 bg-red-400 rounded"></span> <30%</span>
|
||||||
|
</div>
|
||||||
|
{:else}
|
||||||
|
<p class="text-gray-500">Noch keine Koalitionsdaten verfügbar.</p>
|
||||||
|
{/if}
|
||||||
|
</section>
|
||||||
|
{/if}
|
||||||
178
frontend/src/routes/karte/+page.svelte
Normal file
178
frontend/src/routes/karte/+page.svelte
Normal file
@ -0,0 +1,178 @@
|
|||||||
|
<script lang="ts">
|
||||||
|
import { onMount } from 'svelte';
|
||||||
|
import { browser } from '$app/environment';
|
||||||
|
|
||||||
|
interface Ort {
|
||||||
|
id: number;
|
||||||
|
name: string;
|
||||||
|
lat: number;
|
||||||
|
lon: number;
|
||||||
|
vorlage_count: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface Vorlage {
|
||||||
|
id: number;
|
||||||
|
aktenzeichen: string;
|
||||||
|
typ: string;
|
||||||
|
betreff: string;
|
||||||
|
datum_eingang: string;
|
||||||
|
kontext: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
let orte = $state<Ort[]>([]);
|
||||||
|
let selectedOrt = $state<Ort | null>(null);
|
||||||
|
let selectedVorlagen = $state<Vorlage[]>([]);
|
||||||
|
let loading = $state(true);
|
||||||
|
let map: any = null;
|
||||||
|
|
||||||
|
const API_BASE = typeof window !== 'undefined'
|
||||||
|
? (window.location.port === '5173'
|
||||||
|
? `http://${window.location.hostname}:8099/api`
|
||||||
|
: '/api')
|
||||||
|
: '/api';
|
||||||
|
|
||||||
|
// Hagen Zentrum
|
||||||
|
const HAGEN_CENTER: [number, number] = [51.361, 7.476];
|
||||||
|
const HAGEN_ZOOM = 12;
|
||||||
|
|
||||||
|
async function loadOrte() {
|
||||||
|
try {
|
||||||
|
const res = await fetch(`${API_BASE}/orte`);
|
||||||
|
if (res.ok) {
|
||||||
|
orte = await res.json();
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
console.error('Fehler beim Laden der Orte:', e);
|
||||||
|
} finally {
|
||||||
|
loading = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function selectOrt(ort: Ort) {
|
||||||
|
selectedOrt = ort;
|
||||||
|
try {
|
||||||
|
const res = await fetch(`${API_BASE}/orte/${ort.id}/vorlagen`);
|
||||||
|
if (res.ok) {
|
||||||
|
selectedVorlagen = await res.json();
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
console.error('Fehler:', e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
onMount(async () => {
|
||||||
|
if (!browser) return;
|
||||||
|
|
||||||
|
await loadOrte();
|
||||||
|
|
||||||
|
// Leaflet dynamisch laden
|
||||||
|
const L = await import('leaflet');
|
||||||
|
await import('leaflet/dist/leaflet.css');
|
||||||
|
|
||||||
|
// Map initialisieren
|
||||||
|
map = L.map('map').setView(HAGEN_CENTER, HAGEN_ZOOM);
|
||||||
|
|
||||||
|
L.tileLayer('https://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png', {
|
||||||
|
attribution: '© <a href="https://www.openstreetmap.org/copyright">OpenStreetMap</a>'
|
||||||
|
}).addTo(map);
|
||||||
|
|
||||||
|
// Marker hinzufügen
|
||||||
|
for (const ort of orte) {
|
||||||
|
const marker = L.circleMarker([ort.lat, ort.lon], {
|
||||||
|
radius: Math.min(8 + ort.vorlage_count * 2, 20),
|
||||||
|
fillColor: '#16a34a',
|
||||||
|
color: '#166534',
|
||||||
|
weight: 2,
|
||||||
|
opacity: 1,
|
||||||
|
fillOpacity: 0.7
|
||||||
|
}).addTo(map);
|
||||||
|
|
||||||
|
marker.bindPopup(`
|
||||||
|
<strong>${ort.name}</strong><br>
|
||||||
|
${ort.vorlage_count} Vorlage(n)
|
||||||
|
`);
|
||||||
|
|
||||||
|
marker.on('click', () => selectOrt(ort));
|
||||||
|
}
|
||||||
|
});
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<svelte:head>
|
||||||
|
<title>Karte - Antragstracker Hagen</title>
|
||||||
|
<link rel="stylesheet" href="https://unpkg.com/leaflet@1.9.4/dist/leaflet.css" />
|
||||||
|
</svelte:head>
|
||||||
|
|
||||||
|
<div class="mb-6">
|
||||||
|
<h1 class="text-2xl font-bold text-gray-900">📍 Anträge auf der Karte</h1>
|
||||||
|
<p class="text-gray-500 text-sm mt-1">Orte aus Anträgen und Anfragen in Hagen</p>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="grid grid-cols-1 lg:grid-cols-3 gap-6">
|
||||||
|
<!-- Karte -->
|
||||||
|
<div class="lg:col-span-2">
|
||||||
|
<div class="bg-white rounded-xl shadow-sm border border-gray-200 overflow-hidden">
|
||||||
|
{#if loading}
|
||||||
|
<div class="h-[500px] flex items-center justify-center">
|
||||||
|
<div class="animate-spin rounded-full h-12 w-12 border-b-2 border-green-600"></div>
|
||||||
|
</div>
|
||||||
|
{:else}
|
||||||
|
<div id="map" class="h-[500px]"></div>
|
||||||
|
{/if}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="mt-4 text-sm text-gray-500">
|
||||||
|
{orte.length} Orte geocodiert • Marker-Größe = Anzahl Vorlagen
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Sidebar -->
|
||||||
|
<div class="space-y-6">
|
||||||
|
<!-- Ausgewählter Ort -->
|
||||||
|
{#if selectedOrt}
|
||||||
|
<div class="bg-white rounded-xl shadow-sm border border-gray-200 p-6">
|
||||||
|
<h2 class="text-lg font-semibold text-gray-900 mb-2">{selectedOrt.name}</h2>
|
||||||
|
<p class="text-sm text-gray-500 mb-4">
|
||||||
|
{selectedOrt.vorlage_count} Vorlage(n) betreffen diesen Ort
|
||||||
|
</p>
|
||||||
|
|
||||||
|
{#if selectedVorlagen.length > 0}
|
||||||
|
<ul class="space-y-2">
|
||||||
|
{#each selectedVorlagen as v}
|
||||||
|
<li>
|
||||||
|
<a href="/vorlagen/{v.id}" class="block p-2 rounded-lg border border-gray-100 hover:bg-gray-50 transition-colors">
|
||||||
|
<div class="flex items-center gap-2">
|
||||||
|
<span class="font-mono text-xs font-medium text-green-700">{v.aktenzeichen}</span>
|
||||||
|
<span class="text-xs px-1.5 py-0.5 rounded bg-gray-100 text-gray-500">{v.typ}</span>
|
||||||
|
</div>
|
||||||
|
<p class="text-xs text-gray-600 mt-1 line-clamp-2">{v.betreff}</p>
|
||||||
|
</a>
|
||||||
|
</li>
|
||||||
|
{/each}
|
||||||
|
</ul>
|
||||||
|
{/if}
|
||||||
|
</div>
|
||||||
|
{:else}
|
||||||
|
<div class="bg-white rounded-xl shadow-sm border border-gray-200 p-6">
|
||||||
|
<p class="text-gray-500 text-sm">Klicke auf einen Marker um die zugehörigen Vorlagen zu sehen.</p>
|
||||||
|
</div>
|
||||||
|
{/if}
|
||||||
|
|
||||||
|
<!-- Orte-Liste -->
|
||||||
|
<div class="bg-white rounded-xl shadow-sm border border-gray-200 p-6">
|
||||||
|
<h2 class="text-lg font-semibold text-gray-900 mb-4">🗺️ Alle Orte</h2>
|
||||||
|
<ul class="space-y-1 max-h-80 overflow-y-auto">
|
||||||
|
{#each orte as ort}
|
||||||
|
<li>
|
||||||
|
<button
|
||||||
|
onclick={() => selectOrt(ort)}
|
||||||
|
class="w-full text-left px-3 py-2 rounded-lg hover:bg-gray-50 transition-colors text-sm
|
||||||
|
{selectedOrt?.id === ort.id ? 'bg-green-50 text-green-700' : 'text-gray-700'}">
|
||||||
|
<span class="font-medium">{ort.name}</span>
|
||||||
|
<span class="text-gray-400 ml-2">({ort.vorlage_count})</span>
|
||||||
|
</button>
|
||||||
|
</li>
|
||||||
|
{/each}
|
||||||
|
</ul>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
174
frontend/src/routes/ketten/+page.svelte
Normal file
174
frontend/src/routes/ketten/+page.svelte
Normal file
@ -0,0 +1,174 @@
|
|||||||
|
<script lang="ts">
|
||||||
|
import { onMount } from 'svelte';
|
||||||
|
import { page } from '$app/stores';
|
||||||
|
import { goto } from '$app/navigation';
|
||||||
|
import { fetchKetten, type KetteKurz, type Paginated } from '$lib/api';
|
||||||
|
import { formatDate } from '$lib/status';
|
||||||
|
import StatusBadge from '$lib/components/StatusBadge.svelte';
|
||||||
|
|
||||||
|
let data: Paginated<KetteKurz> | null = $state(null);
|
||||||
|
let error: string | null = $state(null);
|
||||||
|
let loading = $state(false);
|
||||||
|
|
||||||
|
// Filters from URL
|
||||||
|
let filterStatus = $state('');
|
||||||
|
let filterTyp = $state('');
|
||||||
|
let filterSuche = $state('');
|
||||||
|
let currentPage = $state(1);
|
||||||
|
|
||||||
|
function syncFromUrl() {
|
||||||
|
const p = new URL(window.location.href).searchParams;
|
||||||
|
filterStatus = p.get('status') || '';
|
||||||
|
filterTyp = p.get('typ') || '';
|
||||||
|
filterSuche = p.get('suche') || '';
|
||||||
|
currentPage = parseInt(p.get('page') || '1');
|
||||||
|
}
|
||||||
|
|
||||||
|
async function load() {
|
||||||
|
loading = true;
|
||||||
|
try {
|
||||||
|
const params: Record<string, string> = { page: String(currentPage), page_size: '30' };
|
||||||
|
if (filterStatus) params.status = filterStatus;
|
||||||
|
if (filterTyp) params.typ = filterTyp;
|
||||||
|
if (filterSuche) params.suche = filterSuche;
|
||||||
|
data = await fetchKetten(params);
|
||||||
|
} catch (e) {
|
||||||
|
error = e instanceof Error ? e.message : 'Fehler';
|
||||||
|
} finally {
|
||||||
|
loading = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function applyFilters() {
|
||||||
|
const params = new URLSearchParams();
|
||||||
|
if (filterStatus) params.set('status', filterStatus);
|
||||||
|
if (filterTyp) params.set('typ', filterTyp);
|
||||||
|
if (filterSuche) params.set('suche', filterSuche);
|
||||||
|
currentPage = 1;
|
||||||
|
params.set('page', '1');
|
||||||
|
goto(`/ketten?${params.toString()}`, { replaceState: true });
|
||||||
|
load();
|
||||||
|
}
|
||||||
|
|
||||||
|
function goPage(p: number) {
|
||||||
|
currentPage = p;
|
||||||
|
const params = new URLSearchParams(window.location.search);
|
||||||
|
params.set('page', String(p));
|
||||||
|
goto(`/ketten?${params.toString()}`, { replaceState: true });
|
||||||
|
load();
|
||||||
|
}
|
||||||
|
|
||||||
|
onMount(() => {
|
||||||
|
syncFromUrl();
|
||||||
|
load();
|
||||||
|
});
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<svelte:head>
|
||||||
|
<title>Ketten - Antragstracker Hagen</title>
|
||||||
|
</svelte:head>
|
||||||
|
|
||||||
|
<div class="mb-6">
|
||||||
|
<h1 class="text-2xl font-bold text-gray-900">Ketten</h1>
|
||||||
|
<p class="text-gray-500 text-sm mt-1">Zusammengehörige Vorlagen als Ketten nachverfolgen</p>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Filters -->
|
||||||
|
<div class="bg-white rounded-xl shadow-sm border border-gray-200 p-4 mb-6">
|
||||||
|
<div class="flex flex-wrap gap-3 items-end">
|
||||||
|
<div>
|
||||||
|
<label for="suche" class="block text-xs font-medium text-gray-500 mb-1">Suche</label>
|
||||||
|
<input id="suche" type="text" bind:value={filterSuche} placeholder="Thema suchen..."
|
||||||
|
class="border border-gray-300 rounded-lg px-3 py-2 text-sm focus:ring-2 focus:ring-green-500 focus:border-green-500"
|
||||||
|
onkeydown={(e) => { if (e.key === 'Enter') applyFilters(); }} />
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<label for="status" class="block text-xs font-medium text-gray-500 mb-1">Status</label>
|
||||||
|
<select id="status" bind:value={filterStatus} onchange={applyFilters}
|
||||||
|
class="border border-gray-300 rounded-lg px-3 py-2 text-sm focus:ring-2 focus:ring-green-500">
|
||||||
|
<option value="">Alle</option>
|
||||||
|
<option value="eingereicht">Eingereicht</option>
|
||||||
|
<option value="in_beratung">In Beratung</option>
|
||||||
|
<option value="vertagt">Vertagt</option>
|
||||||
|
<option value="beschlossen">Beschlossen</option>
|
||||||
|
<option value="umgesetzt">Umgesetzt</option>
|
||||||
|
<option value="abgelehnt">Abgelehnt</option>
|
||||||
|
<option value="versandet">Versandet</option>
|
||||||
|
<option value="angefragt">Angefragt</option>
|
||||||
|
<option value="beantwortet">Beantwortet</option>
|
||||||
|
<option value="offen">Offen</option>
|
||||||
|
<option value="abgewiegelt">Abgewiegelt</option>
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<label for="typ" class="block text-xs font-medium text-gray-500 mb-1">Typ</label>
|
||||||
|
<select id="typ" bind:value={filterTyp} onchange={applyFilters}
|
||||||
|
class="border border-gray-300 rounded-lg px-3 py-2 text-sm focus:ring-2 focus:ring-green-500">
|
||||||
|
<option value="">Alle</option>
|
||||||
|
<option value="antrag">Antrag</option>
|
||||||
|
<option value="anfrage">Anfrage</option>
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
|
<button onclick={applyFilters}
|
||||||
|
class="bg-green-600 text-white px-4 py-2 rounded-lg text-sm font-medium hover:bg-green-700 transition-colors">
|
||||||
|
Filtern
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{#if error}
|
||||||
|
<div class="bg-red-50 text-red-700 p-4 rounded-lg">{error}</div>
|
||||||
|
{:else if loading && !data}
|
||||||
|
<div class="flex justify-center py-20">
|
||||||
|
<div class="animate-spin rounded-full h-12 w-12 border-b-2 border-green-600"></div>
|
||||||
|
</div>
|
||||||
|
{:else if data}
|
||||||
|
<div class="text-sm text-gray-500 mb-3">{data.total} Ketten gefunden</div>
|
||||||
|
|
||||||
|
<div class="bg-white rounded-xl shadow-sm border border-gray-200 overflow-hidden">
|
||||||
|
<table class="w-full">
|
||||||
|
<thead class="bg-gray-50">
|
||||||
|
<tr>
|
||||||
|
<th class="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Aktenzeichen</th>
|
||||||
|
<th class="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Thema</th>
|
||||||
|
<th class="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Typ</th>
|
||||||
|
<th class="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Status</th>
|
||||||
|
<th class="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Glieder</th>
|
||||||
|
<th class="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Letzte Akt.</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody class="divide-y divide-gray-100">
|
||||||
|
{#each data.items as kette}
|
||||||
|
<tr class="hover:bg-gray-50 transition-colors cursor-pointer" onclick={() => goto(`/ketten/${kette.id}`)}>
|
||||||
|
<td class="px-4 py-3">
|
||||||
|
<a href="/ketten/{kette.id}" class="font-mono text-sm font-medium text-green-700 hover:underline">
|
||||||
|
{kette.ursprung?.aktenzeichen || `#${kette.id}`}
|
||||||
|
</a>
|
||||||
|
</td>
|
||||||
|
<td class="px-4 py-3 text-sm text-gray-700 max-w-md truncate">{kette.thema || '-'}</td>
|
||||||
|
<td class="px-4 py-3 text-sm text-gray-600 capitalize">{kette.typ || '-'}</td>
|
||||||
|
<td class="px-4 py-3"><StatusBadge status={kette.status} /></td>
|
||||||
|
<td class="px-4 py-3 text-sm text-gray-600">{kette.glieder_count}</td>
|
||||||
|
<td class="px-4 py-3 text-sm text-gray-500">{formatDate(kette.letzte_aktivitaet)}</td>
|
||||||
|
</tr>
|
||||||
|
{/each}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Pagination -->
|
||||||
|
{#if data.total > data.page_size}
|
||||||
|
{@const totalPages = Math.ceil(data.total / data.page_size)}
|
||||||
|
<div class="flex justify-center mt-6 space-x-2">
|
||||||
|
<button disabled={currentPage <= 1} onclick={() => goPage(currentPage - 1)}
|
||||||
|
class="px-3 py-2 rounded-lg text-sm border border-gray-300 hover:bg-gray-50 disabled:opacity-50 disabled:cursor-not-allowed">
|
||||||
|
« Zurück
|
||||||
|
</button>
|
||||||
|
<span class="px-3 py-2 text-sm text-gray-600">Seite {currentPage} von {totalPages}</span>
|
||||||
|
<button disabled={currentPage >= totalPages} onclick={() => goPage(currentPage + 1)}
|
||||||
|
class="px-3 py-2 rounded-lg text-sm border border-gray-300 hover:bg-gray-50 disabled:opacity-50 disabled:cursor-not-allowed">
|
||||||
|
Weiter »
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
{/if}
|
||||||
|
{/if}
|
||||||
114
frontend/src/routes/ketten/[id]/+page.svelte
Normal file
114
frontend/src/routes/ketten/[id]/+page.svelte
Normal file
@ -0,0 +1,114 @@
|
|||||||
|
<script lang="ts">
|
||||||
|
import { onMount } from 'svelte';
|
||||||
|
import { page } from '$app/stores';
|
||||||
|
import { fetchKette, type KetteDetail } from '$lib/api';
|
||||||
|
import { statusInfo, typLabel, formatDate } from '$lib/status';
|
||||||
|
import StatusBadge from '$lib/components/StatusBadge.svelte';
|
||||||
|
import Perlenschnur from '$lib/components/Perlenschnur.svelte';
|
||||||
|
|
||||||
|
let kette: KetteDetail | null = $state(null);
|
||||||
|
let error: string | null = $state(null);
|
||||||
|
|
||||||
|
onMount(async () => {
|
||||||
|
try {
|
||||||
|
const id = parseInt($page.params.id);
|
||||||
|
kette = await fetchKette(id);
|
||||||
|
} catch (e) {
|
||||||
|
error = e instanceof Error ? e.message : 'Fehler';
|
||||||
|
}
|
||||||
|
});
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<svelte:head>
|
||||||
|
<title>{kette?.ursprung?.aktenzeichen || 'Kette'} - Antragstracker Hagen</title>
|
||||||
|
</svelte:head>
|
||||||
|
|
||||||
|
{#if error}
|
||||||
|
<div class="bg-red-50 text-red-700 p-4 rounded-lg">{error}</div>
|
||||||
|
{:else if !kette}
|
||||||
|
<div class="flex justify-center py-20">
|
||||||
|
<div class="animate-spin rounded-full h-12 w-12 border-b-2 border-green-600"></div>
|
||||||
|
</div>
|
||||||
|
{:else}
|
||||||
|
<!-- Back link -->
|
||||||
|
<a href="/ketten" class="text-sm text-gray-500 hover:text-gray-700 mb-4 inline-block">← Zurück zur Liste</a>
|
||||||
|
|
||||||
|
<!-- Header -->
|
||||||
|
<div class="bg-white rounded-xl shadow-sm border border-gray-200 p-6 mb-6">
|
||||||
|
<div class="flex flex-wrap items-start justify-between gap-4">
|
||||||
|
<div>
|
||||||
|
<div class="flex items-center space-x-3 mb-2">
|
||||||
|
{#if kette.ursprung?.aktenzeichen}
|
||||||
|
<h1 class="text-2xl font-bold text-gray-900 font-mono">{kette.ursprung.aktenzeichen}</h1>
|
||||||
|
{/if}
|
||||||
|
<StatusBadge status={kette.status} />
|
||||||
|
{#if kette.typ}
|
||||||
|
<span class="text-sm px-2 py-0.5 rounded bg-gray-100 text-gray-600 capitalize">{typLabel(kette.typ)}</span>
|
||||||
|
{/if}
|
||||||
|
</div>
|
||||||
|
{#if kette.thema}
|
||||||
|
<p class="text-gray-700">{kette.thema}</p>
|
||||||
|
{/if}
|
||||||
|
</div>
|
||||||
|
<div class="text-right text-sm text-gray-500 space-y-1">
|
||||||
|
{#if kette.status_seit}
|
||||||
|
<div>Status seit: <strong>{formatDate(kette.status_seit)}</strong></div>
|
||||||
|
{/if}
|
||||||
|
{#if kette.letzte_aktivitaet}
|
||||||
|
<div>Letzte Aktivität: <strong>{formatDate(kette.letzte_aktivitaet)}</strong></div>
|
||||||
|
{/if}
|
||||||
|
{#if kette.vertagungen_count > 0}
|
||||||
|
<div class="text-amber-600">Vertagungen: <strong>{kette.vertagungen_count}</strong></div>
|
||||||
|
{/if}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Antragsteller -->
|
||||||
|
{#if kette.antragsteller.length > 0}
|
||||||
|
<div class="mt-4 flex items-center space-x-2">
|
||||||
|
<span class="text-sm text-gray-500">Antragsteller:</span>
|
||||||
|
{#each kette.antragsteller as p}
|
||||||
|
<span class="inline-flex items-center px-2.5 py-0.5 rounded-full text-xs font-medium"
|
||||||
|
style="background-color: {p.farbe || '#e5e7eb'}20; color: {p.farbe || '#4b5563'}; border: 1px solid {p.farbe || '#d1d5db'}">
|
||||||
|
{p.kuerzel}
|
||||||
|
</span>
|
||||||
|
{/each}
|
||||||
|
</div>
|
||||||
|
{/if}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Perlenschnur Timeline -->
|
||||||
|
<div class="bg-white rounded-xl shadow-sm border border-gray-200 p-6 mb-6">
|
||||||
|
<h2 class="text-lg font-semibold text-gray-900 mb-6">Perlenschnur</h2>
|
||||||
|
{#if kette.glieder.length > 0}
|
||||||
|
<Perlenschnur glieder={kette.glieder} status={kette.status} />
|
||||||
|
{:else}
|
||||||
|
<p class="text-gray-500 text-sm">Keine Glieder in dieser Kette.</p>
|
||||||
|
{/if}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- External references from graph -->
|
||||||
|
{#if kette.graph && kette.graph.nodes.filter(n => n.extern).length > 0}
|
||||||
|
<div class="bg-white rounded-xl shadow-sm border border-gray-200 p-6">
|
||||||
|
<h2 class="text-lg font-semibold text-gray-900 mb-4">Verknüpfte Vorlagen</h2>
|
||||||
|
<div class="space-y-2">
|
||||||
|
{#each kette.graph.nodes.filter(n => n.extern) as ext}
|
||||||
|
<a href="/vorlagen/{ext.id}" class="flex items-center justify-between p-3 rounded-lg border border-gray-100 hover:bg-gray-50 transition-colors">
|
||||||
|
<div class="flex items-center space-x-3">
|
||||||
|
{#if ext.aktenzeichen}
|
||||||
|
<span class="font-mono text-sm font-medium text-green-700">{ext.aktenzeichen}</span>
|
||||||
|
{/if}
|
||||||
|
{#if ext.typ}
|
||||||
|
<span class="text-xs px-2 py-0.5 rounded bg-gray-100 text-gray-600 capitalize">{ext.typ}</span>
|
||||||
|
{/if}
|
||||||
|
{#if ext.betreff}
|
||||||
|
<span class="text-sm text-gray-600 truncate max-w-md">{ext.betreff}</span>
|
||||||
|
{/if}
|
||||||
|
</div>
|
||||||
|
<span class="text-xs text-gray-500">{formatDate(ext.datum_eingang)}</span>
|
||||||
|
</a>
|
||||||
|
{/each}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{/if}
|
||||||
|
{/if}
|
||||||
147
frontend/src/routes/vorlagen/+page.svelte
Normal file
147
frontend/src/routes/vorlagen/+page.svelte
Normal file
@ -0,0 +1,147 @@
|
|||||||
|
<script lang="ts">
|
||||||
|
import { onMount } from 'svelte';
|
||||||
|
import { goto } from '$app/navigation';
|
||||||
|
import { fetchVorlagen, type VorlageKurz, type Paginated } from '$lib/api';
|
||||||
|
import { formatDate } from '$lib/status';
|
||||||
|
|
||||||
|
let data: Paginated<VorlageKurz> | null = $state(null);
|
||||||
|
let error: string | null = $state(null);
|
||||||
|
let loading = $state(false);
|
||||||
|
|
||||||
|
let filterTyp = $state('');
|
||||||
|
let filterSuche = $state('');
|
||||||
|
let currentPage = $state(1);
|
||||||
|
|
||||||
|
function syncFromUrl() {
|
||||||
|
const p = new URL(window.location.href).searchParams;
|
||||||
|
filterTyp = p.get('typ') || '';
|
||||||
|
filterSuche = p.get('suche') || '';
|
||||||
|
currentPage = parseInt(p.get('page') || '1');
|
||||||
|
}
|
||||||
|
|
||||||
|
async function load() {
|
||||||
|
loading = true;
|
||||||
|
try {
|
||||||
|
const params: Record<string, string> = { page: String(currentPage), page_size: '50' };
|
||||||
|
if (filterTyp) params.typ = filterTyp;
|
||||||
|
if (filterSuche) params.suche = filterSuche;
|
||||||
|
data = await fetchVorlagen(params);
|
||||||
|
} catch (e) {
|
||||||
|
error = e instanceof Error ? e.message : 'Fehler';
|
||||||
|
} finally {
|
||||||
|
loading = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function applyFilters() {
|
||||||
|
const params = new URLSearchParams();
|
||||||
|
if (filterTyp) params.set('typ', filterTyp);
|
||||||
|
if (filterSuche) params.set('suche', filterSuche);
|
||||||
|
currentPage = 1;
|
||||||
|
params.set('page', '1');
|
||||||
|
goto(`/vorlagen?${params.toString()}`, { replaceState: true });
|
||||||
|
load();
|
||||||
|
}
|
||||||
|
|
||||||
|
function goPage(p: number) {
|
||||||
|
currentPage = p;
|
||||||
|
const params = new URLSearchParams(window.location.search);
|
||||||
|
params.set('page', String(p));
|
||||||
|
goto(`/vorlagen?${params.toString()}`, { replaceState: true });
|
||||||
|
load();
|
||||||
|
}
|
||||||
|
|
||||||
|
onMount(() => {
|
||||||
|
syncFromUrl();
|
||||||
|
load();
|
||||||
|
});
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<svelte:head>
|
||||||
|
<title>Vorlagen - Antragstracker Hagen</title>
|
||||||
|
</svelte:head>
|
||||||
|
|
||||||
|
<div class="mb-6">
|
||||||
|
<h1 class="text-2xl font-bold text-gray-900">Vorlagen</h1>
|
||||||
|
<p class="text-gray-500 text-sm mt-1">Alle importierten Vorlagen aus dem ALLRIS-System</p>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Filters -->
|
||||||
|
<div class="bg-white rounded-xl shadow-sm border border-gray-200 p-4 mb-6">
|
||||||
|
<div class="flex flex-wrap gap-3 items-end">
|
||||||
|
<div>
|
||||||
|
<label for="suche" class="block text-xs font-medium text-gray-500 mb-1">Suche</label>
|
||||||
|
<input id="suche" type="text" bind:value={filterSuche} placeholder="Betreff oder Aktenzeichen..."
|
||||||
|
class="border border-gray-300 rounded-lg px-3 py-2 text-sm focus:ring-2 focus:ring-green-500 focus:border-green-500"
|
||||||
|
onkeydown={(e) => { if (e.key === 'Enter') applyFilters(); }} />
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<label for="typ" class="block text-xs font-medium text-gray-500 mb-1">Typ</label>
|
||||||
|
<select id="typ" bind:value={filterTyp} onchange={applyFilters}
|
||||||
|
class="border border-gray-300 rounded-lg px-3 py-2 text-sm focus:ring-2 focus:ring-green-500">
|
||||||
|
<option value="">Alle</option>
|
||||||
|
<option value="antrag">Antrag</option>
|
||||||
|
<option value="anfrage">Anfrage</option>
|
||||||
|
<option value="stellungnahme">Stellungnahme</option>
|
||||||
|
<option value="bericht">Bericht</option>
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
|
<button onclick={applyFilters}
|
||||||
|
class="bg-green-600 text-white px-4 py-2 rounded-lg text-sm font-medium hover:bg-green-700 transition-colors">
|
||||||
|
Filtern
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{#if error}
|
||||||
|
<div class="bg-red-50 text-red-700 p-4 rounded-lg">{error}</div>
|
||||||
|
{:else if loading && !data}
|
||||||
|
<div class="flex justify-center py-20">
|
||||||
|
<div class="animate-spin rounded-full h-12 w-12 border-b-2 border-green-600"></div>
|
||||||
|
</div>
|
||||||
|
{:else if data}
|
||||||
|
<div class="text-sm text-gray-500 mb-3">{data.total} Vorlagen gefunden</div>
|
||||||
|
|
||||||
|
<div class="bg-white rounded-xl shadow-sm border border-gray-200 overflow-hidden">
|
||||||
|
<table class="w-full">
|
||||||
|
<thead class="bg-gray-50">
|
||||||
|
<tr>
|
||||||
|
<th class="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Aktenzeichen</th>
|
||||||
|
<th class="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Betreff</th>
|
||||||
|
<th class="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Typ</th>
|
||||||
|
<th class="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Datum</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody class="divide-y divide-gray-100">
|
||||||
|
{#each data.items as v}
|
||||||
|
<tr class="hover:bg-gray-50 transition-colors cursor-pointer" onclick={() => goto(`/vorlagen/${v.id}`)}>
|
||||||
|
<td class="px-4 py-3">
|
||||||
|
<a href="/vorlagen/{v.id}" class="font-mono text-sm font-medium text-green-700 hover:underline">
|
||||||
|
{v.aktenzeichen || `#${v.id}`}
|
||||||
|
</a>
|
||||||
|
</td>
|
||||||
|
<td class="px-4 py-3 text-sm text-gray-700 max-w-lg truncate">{v.betreff || '-'}</td>
|
||||||
|
<td class="px-4 py-3 text-sm text-gray-600 capitalize">{v.typ || '-'}</td>
|
||||||
|
<td class="px-4 py-3 text-sm text-gray-500">{formatDate(v.datum_eingang)}</td>
|
||||||
|
</tr>
|
||||||
|
{/each}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Pagination -->
|
||||||
|
{#if data.total > data.page_size}
|
||||||
|
{@const totalPages = Math.ceil(data.total / data.page_size)}
|
||||||
|
<div class="flex justify-center mt-6 space-x-2">
|
||||||
|
<button disabled={currentPage <= 1} onclick={() => goPage(currentPage - 1)}
|
||||||
|
class="px-3 py-2 rounded-lg text-sm border border-gray-300 hover:bg-gray-50 disabled:opacity-50 disabled:cursor-not-allowed">
|
||||||
|
« Zurück
|
||||||
|
</button>
|
||||||
|
<span class="px-3 py-2 text-sm text-gray-600">Seite {currentPage} von {totalPages}</span>
|
||||||
|
<button disabled={currentPage >= totalPages} onclick={() => goPage(currentPage + 1)}
|
||||||
|
class="px-3 py-2 rounded-lg text-sm border border-gray-300 hover:bg-gray-50 disabled:opacity-50 disabled:cursor-not-allowed">
|
||||||
|
Weiter »
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
{/if}
|
||||||
|
{/if}
|
||||||
259
frontend/src/routes/vorlagen/[id]/+page.svelte
Normal file
259
frontend/src/routes/vorlagen/[id]/+page.svelte
Normal file
@ -0,0 +1,259 @@
|
|||||||
|
<script lang="ts">
|
||||||
|
import { onMount } from 'svelte';
|
||||||
|
import { page } from '$app/stores';
|
||||||
|
import { fetchVorlage, type VorlageDetail } from '$lib/api';
|
||||||
|
import { typLabel, formatDate } from '$lib/status';
|
||||||
|
|
||||||
|
let vorlage: VorlageDetail | null = $state(null);
|
||||||
|
let error: string | null = $state(null);
|
||||||
|
let showVolltext = $state(false);
|
||||||
|
|
||||||
|
onMount(async () => {
|
||||||
|
try {
|
||||||
|
const id = parseInt($page.params.id);
|
||||||
|
vorlage = await fetchVorlage(id);
|
||||||
|
} catch (e) {
|
||||||
|
error = e instanceof Error ? e.message : 'Fehler';
|
||||||
|
}
|
||||||
|
});
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<svelte:head>
|
||||||
|
<title>{vorlage?.aktenzeichen || 'Vorlage'} - Antragstracker Hagen</title>
|
||||||
|
</svelte:head>
|
||||||
|
|
||||||
|
{#if error}
|
||||||
|
<div class="bg-red-50 text-red-700 p-4 rounded-lg">{error}</div>
|
||||||
|
{:else if !vorlage}
|
||||||
|
<div class="flex justify-center py-20">
|
||||||
|
<div class="animate-spin rounded-full h-12 w-12 border-b-2 border-green-600"></div>
|
||||||
|
</div>
|
||||||
|
{:else}
|
||||||
|
<!-- Back link -->
|
||||||
|
<a href="/vorlagen" class="text-sm text-gray-500 hover:text-gray-700 mb-4 inline-block">← Zurück zur Liste</a>
|
||||||
|
|
||||||
|
<!-- Header -->
|
||||||
|
<div class="bg-white rounded-xl shadow-sm border border-gray-200 p-6 mb-6">
|
||||||
|
<div class="flex flex-wrap items-start justify-between gap-4">
|
||||||
|
<div>
|
||||||
|
<div class="flex items-center space-x-3 mb-2">
|
||||||
|
{#if vorlage.aktenzeichen}
|
||||||
|
<h1 class="text-2xl font-bold text-gray-900 font-mono">{vorlage.aktenzeichen}</h1>
|
||||||
|
{/if}
|
||||||
|
{#if vorlage.typ}
|
||||||
|
<span class="text-sm px-2 py-0.5 rounded bg-gray-100 text-gray-600">{typLabel(vorlage.typ)}</span>
|
||||||
|
{/if}
|
||||||
|
{#if vorlage.ist_verwaltungsvorlage}
|
||||||
|
<span class="text-xs px-2 py-0.5 rounded bg-blue-100 text-blue-700">Verwaltungsvorlage</span>
|
||||||
|
{/if}
|
||||||
|
</div>
|
||||||
|
{#if vorlage.betreff}
|
||||||
|
<p class="text-gray-700 text-lg">{vorlage.betreff}</p>
|
||||||
|
{/if}
|
||||||
|
{#if vorlage.thema_kurz}
|
||||||
|
<p class="text-sm text-gray-500 mt-1">Thema: {vorlage.thema_kurz}</p>
|
||||||
|
{/if}
|
||||||
|
</div>
|
||||||
|
<div class="text-right text-sm text-gray-500 space-y-1">
|
||||||
|
{#if vorlage.datum_eingang}
|
||||||
|
<div>Eingegangen: <strong>{formatDate(vorlage.datum_eingang)}</strong></div>
|
||||||
|
{/if}
|
||||||
|
{#if vorlage.kette_id}
|
||||||
|
<a href="/ketten/{vorlage.kette_id}" class="text-green-600 hover:underline block">Zur Kette →</a>
|
||||||
|
{/if}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Antragsteller -->
|
||||||
|
{#if vorlage.antragsteller.length > 0}
|
||||||
|
<div class="mt-4 flex items-center space-x-2">
|
||||||
|
<span class="text-sm text-gray-500">Antragsteller:</span>
|
||||||
|
{#each vorlage.antragsteller as p}
|
||||||
|
<span class="inline-flex items-center px-2.5 py-0.5 rounded-full text-xs font-medium"
|
||||||
|
style="background-color: {p.farbe || '#e5e7eb'}20; color: {p.farbe || '#4b5563'}; border: 1px solid {p.farbe || '#d1d5db'}">
|
||||||
|
{p.kuerzel}
|
||||||
|
</span>
|
||||||
|
{/each}
|
||||||
|
</div>
|
||||||
|
{/if}
|
||||||
|
|
||||||
|
<!-- External links -->
|
||||||
|
<div class="mt-4 flex space-x-4">
|
||||||
|
{#if vorlage.web_url}
|
||||||
|
<a href={vorlage.web_url} target="_blank" rel="noopener"
|
||||||
|
class="text-sm text-green-600 hover:underline">ALLRIS ↗</a>
|
||||||
|
{/if}
|
||||||
|
{#if vorlage.pdf_url}
|
||||||
|
<a href={vorlage.pdf_url} target="_blank" rel="noopener"
|
||||||
|
class="text-sm text-green-600 hover:underline">PDF ↗</a>
|
||||||
|
{/if}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="grid grid-cols-1 lg:grid-cols-3 gap-6">
|
||||||
|
<!-- Main content -->
|
||||||
|
<div class="lg:col-span-2 space-y-6">
|
||||||
|
<!-- KI-Zusammenfassung -->
|
||||||
|
{#if vorlage.ki_zusammenfassung}
|
||||||
|
<div class="bg-gradient-to-r from-green-50 to-emerald-50 rounded-xl shadow-sm border border-green-200 p-6">
|
||||||
|
<h2 class="text-lg font-semibold text-green-800 mb-3 flex items-center gap-2">
|
||||||
|
<span>🤖</span> KI-Zusammenfassung
|
||||||
|
</h2>
|
||||||
|
<p class="text-gray-700 mb-4">{vorlage.ki_zusammenfassung.zusammenfassung}</p>
|
||||||
|
|
||||||
|
{#if vorlage.ki_zusammenfassung.kernforderung}
|
||||||
|
<div class="mb-3">
|
||||||
|
<span class="text-xs font-medium text-green-700 uppercase">Kernforderung:</span>
|
||||||
|
<p class="text-gray-800 font-medium">{vorlage.ki_zusammenfassung.kernforderung}</p>
|
||||||
|
</div>
|
||||||
|
{/if}
|
||||||
|
|
||||||
|
{#if vorlage.ki_zusammenfassung.begruendung}
|
||||||
|
<div class="mb-3">
|
||||||
|
<span class="text-xs font-medium text-green-700 uppercase">Begründung:</span>
|
||||||
|
<p class="text-gray-600 text-sm">{vorlage.ki_zusammenfassung.begruendung}</p>
|
||||||
|
</div>
|
||||||
|
{/if}
|
||||||
|
|
||||||
|
<div class="flex flex-wrap gap-2 mt-4">
|
||||||
|
{#if vorlage.ki_zusammenfassung.thema}
|
||||||
|
<span class="text-xs px-2 py-1 rounded-full bg-green-100 text-green-800">
|
||||||
|
📂 {vorlage.ki_zusammenfassung.thema}
|
||||||
|
</span>
|
||||||
|
{/if}
|
||||||
|
{#if vorlage.ki_zusammenfassung.partei}
|
||||||
|
<span class="text-xs px-2 py-1 rounded-full bg-purple-100 text-purple-800">
|
||||||
|
🏛️ {vorlage.ki_zusammenfassung.partei}
|
||||||
|
</span>
|
||||||
|
{/if}
|
||||||
|
{#each vorlage.ki_zusammenfassung.betroffene_orte || [] as ort}
|
||||||
|
<span class="text-xs px-2 py-1 rounded-full bg-blue-100 text-blue-800">
|
||||||
|
📍 {ort}
|
||||||
|
</span>
|
||||||
|
{/each}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{/if}
|
||||||
|
|
||||||
|
<!-- Volltext -->
|
||||||
|
{#if vorlage.volltext_clean}
|
||||||
|
<div class="bg-white rounded-xl shadow-sm border border-gray-200 p-6">
|
||||||
|
<div class="flex items-center justify-between mb-4">
|
||||||
|
<h2 class="text-lg font-semibold text-gray-900">Volltext</h2>
|
||||||
|
<button onclick={() => showVolltext = !showVolltext}
|
||||||
|
class="text-sm text-green-600 hover:underline">
|
||||||
|
{showVolltext ? 'Einklappen' : 'Aufklappen'}
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
{#if showVolltext}
|
||||||
|
<div class="prose prose-sm max-w-none text-gray-700 whitespace-pre-wrap">{vorlage.volltext_clean}</div>
|
||||||
|
{:else}
|
||||||
|
<p class="text-sm text-gray-500 line-clamp-4">{vorlage.volltext_clean}</p>
|
||||||
|
{/if}
|
||||||
|
</div>
|
||||||
|
{/if}
|
||||||
|
|
||||||
|
<!-- Beratungen -->
|
||||||
|
{#if vorlage.beratungen.length > 0}
|
||||||
|
<div class="bg-white rounded-xl shadow-sm border border-gray-200 p-6">
|
||||||
|
<h2 class="text-lg font-semibold text-gray-900 mb-4">Beratungsfolge</h2>
|
||||||
|
<div class="space-y-3">
|
||||||
|
{#each vorlage.beratungen as b}
|
||||||
|
<div class="flex items-start justify-between p-3 rounded-lg border border-gray-100">
|
||||||
|
<div>
|
||||||
|
{#if b.gremium}
|
||||||
|
<span class="text-sm font-medium text-gray-900">{b.gremium.name}</span>
|
||||||
|
{/if}
|
||||||
|
{#if b.rolle}
|
||||||
|
<span class="text-xs ml-2 text-gray-500">({b.rolle})</span>
|
||||||
|
{/if}
|
||||||
|
{#if b.ergebnis}
|
||||||
|
<div class="mt-1">
|
||||||
|
<span class="text-xs px-2 py-0.5 rounded
|
||||||
|
{b.ergebnis.includes('angenommen') || b.ergebnis.includes('empfohlen') ? 'bg-green-100 text-green-700' :
|
||||||
|
b.ergebnis.includes('abgelehnt') ? 'bg-red-100 text-red-700' :
|
||||||
|
b.ergebnis.includes('vertagt') ? 'bg-amber-100 text-amber-700' :
|
||||||
|
'bg-gray-100 text-gray-700'}">
|
||||||
|
{b.ergebnis}
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
{/if}
|
||||||
|
{#if b.ergebnis_text}
|
||||||
|
<p class="text-xs text-gray-500 mt-1">{b.ergebnis_text}</p>
|
||||||
|
{/if}
|
||||||
|
</div>
|
||||||
|
<span class="text-xs text-gray-500 flex-shrink-0 ml-4">{formatDate(b.sitzung_datum)}</span>
|
||||||
|
</div>
|
||||||
|
{/each}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{/if}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Sidebar -->
|
||||||
|
<div class="space-y-6">
|
||||||
|
<!-- Referenzen ausgehend -->
|
||||||
|
{#if vorlage.referenzen_ausgehend.length > 0}
|
||||||
|
<div class="bg-white rounded-xl shadow-sm border border-gray-200 p-6">
|
||||||
|
<h2 class="text-base font-semibold text-gray-900 mb-3">Verweist auf</h2>
|
||||||
|
<div class="space-y-2">
|
||||||
|
{#each vorlage.referenzen_ausgehend as ref}
|
||||||
|
<a href="/vorlagen/{ref.vorlage_id}" class="block p-2 rounded-lg border border-gray-100 hover:bg-gray-50 transition-colors">
|
||||||
|
<div class="flex items-center space-x-2">
|
||||||
|
<span class="font-mono text-xs font-medium text-green-700">{ref.aktenzeichen || `#${ref.vorlage_id}`}</span>
|
||||||
|
<span class="text-xs px-1.5 py-0.5 rounded bg-gray-100 text-gray-500">{ref.ref_typ}</span>
|
||||||
|
</div>
|
||||||
|
{#if ref.betreff}
|
||||||
|
<p class="text-xs text-gray-600 truncate mt-0.5">{ref.betreff}</p>
|
||||||
|
{/if}
|
||||||
|
</a>
|
||||||
|
{/each}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{/if}
|
||||||
|
|
||||||
|
<!-- Referenzen eingehend -->
|
||||||
|
{#if vorlage.referenzen_eingehend.length > 0}
|
||||||
|
<div class="bg-white rounded-xl shadow-sm border border-gray-200 p-6">
|
||||||
|
<h2 class="text-base font-semibold text-gray-900 mb-3">Referenziert von</h2>
|
||||||
|
<div class="space-y-2">
|
||||||
|
{#each vorlage.referenzen_eingehend as ref}
|
||||||
|
<a href="/vorlagen/{ref.vorlage_id}" class="block p-2 rounded-lg border border-gray-100 hover:bg-gray-50 transition-colors">
|
||||||
|
<div class="flex items-center space-x-2">
|
||||||
|
<span class="font-mono text-xs font-medium text-green-700">{ref.aktenzeichen || `#${ref.vorlage_id}`}</span>
|
||||||
|
<span class="text-xs px-1.5 py-0.5 rounded bg-gray-100 text-gray-500">{ref.ref_typ}</span>
|
||||||
|
</div>
|
||||||
|
{#if ref.betreff}
|
||||||
|
<p class="text-xs text-gray-600 truncate mt-0.5">{ref.betreff}</p>
|
||||||
|
{/if}
|
||||||
|
</a>
|
||||||
|
{/each}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{/if}
|
||||||
|
|
||||||
|
<!-- Metadaten -->
|
||||||
|
<div class="bg-white rounded-xl shadow-sm border border-gray-200 p-6">
|
||||||
|
<h2 class="text-base font-semibold text-gray-900 mb-3">Details</h2>
|
||||||
|
<dl class="space-y-2 text-sm">
|
||||||
|
{#if vorlage.aktenzeichen_basis}
|
||||||
|
<div class="flex justify-between">
|
||||||
|
<dt class="text-gray-500">Basis</dt>
|
||||||
|
<dd class="font-mono text-gray-900">{vorlage.aktenzeichen_basis}</dd>
|
||||||
|
</div>
|
||||||
|
{/if}
|
||||||
|
{#if vorlage.aktenzeichen_suffix}
|
||||||
|
<div class="flex justify-between">
|
||||||
|
<dt class="text-gray-500">Suffix</dt>
|
||||||
|
<dd class="font-mono text-gray-900">{vorlage.aktenzeichen_suffix}</dd>
|
||||||
|
</div>
|
||||||
|
{/if}
|
||||||
|
<div class="flex justify-between">
|
||||||
|
<dt class="text-gray-500">ID</dt>
|
||||||
|
<dd class="text-gray-900">{vorlage.id}</dd>
|
||||||
|
</div>
|
||||||
|
</dl>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{/if}
|
||||||
3
frontend/static/robots.txt
Normal file
3
frontend/static/robots.txt
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
# allow crawling everything by default
|
||||||
|
User-agent: *
|
||||||
|
Disallow:
|
||||||
30
frontend/svelte.config.js
Normal file
30
frontend/svelte.config.js
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
import adapter from '@sveltejs/adapter-static';
|
||||||
|
import { relative, sep } from 'node:path';
|
||||||
|
|
||||||
|
/** @type {import('@sveltejs/kit').Config} */
|
||||||
|
const config = {
|
||||||
|
compilerOptions: {
|
||||||
|
// defaults to rune mode for the project, execept for `node_modules`. Can be removed in svelte 6.
|
||||||
|
runes: ({ filename }) => {
|
||||||
|
const relativePath = relative(import.meta.dirname, filename);
|
||||||
|
const pathSegments = relativePath.toLowerCase().split(sep);
|
||||||
|
const isExternalLibrary = pathSegments.includes('node_modules');
|
||||||
|
|
||||||
|
return isExternalLibrary ? undefined : true;
|
||||||
|
}
|
||||||
|
},
|
||||||
|
kit: {
|
||||||
|
// adapter-auto only supports some environments, see https://svelte.dev/docs/kit/adapter-auto for a list.
|
||||||
|
// If your environment is not supported, or you settled on a specific environment, switch out the adapter.
|
||||||
|
// See https://svelte.dev/docs/kit/adapters for more information about adapters.
|
||||||
|
adapter: adapter({
|
||||||
|
pages: 'build',
|
||||||
|
assets: 'build',
|
||||||
|
fallback: 'index.html',
|
||||||
|
precompress: false,
|
||||||
|
strict: true
|
||||||
|
})
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
export default config;
|
||||||
20
frontend/tsconfig.json
Normal file
20
frontend/tsconfig.json
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
{
|
||||||
|
"extends": "./.svelte-kit/tsconfig.json",
|
||||||
|
"compilerOptions": {
|
||||||
|
"rewriteRelativeImportExtensions": true,
|
||||||
|
"allowJs": true,
|
||||||
|
"checkJs": true,
|
||||||
|
"esModuleInterop": true,
|
||||||
|
"forceConsistentCasingInFileNames": true,
|
||||||
|
"resolveJsonModule": true,
|
||||||
|
"skipLibCheck": true,
|
||||||
|
"sourceMap": true,
|
||||||
|
"strict": true,
|
||||||
|
"moduleResolution": "bundler"
|
||||||
|
}
|
||||||
|
// Path aliases are handled by https://svelte.dev/docs/kit/configuration#alias
|
||||||
|
// except $lib which is handled by https://svelte.dev/docs/kit/configuration#files
|
||||||
|
//
|
||||||
|
// To make changes to top-level options such as include and exclude, we recommend extending
|
||||||
|
// the generated config; see https://svelte.dev/docs/kit/configuration#typescript
|
||||||
|
}
|
||||||
10
frontend/vite.config.ts
Normal file
10
frontend/vite.config.ts
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
import { sveltekit } from '@sveltejs/kit/vite';
|
||||||
|
import tailwindcss from '@tailwindcss/vite';
|
||||||
|
import { defineConfig } from 'vite';
|
||||||
|
|
||||||
|
export default defineConfig({
|
||||||
|
plugins: [tailwindcss(), sveltekit()],
|
||||||
|
server: {
|
||||||
|
allowedHosts: ['mac-mini-von-dotty.local', 'Mac.wideopen.space', 'localhost']
|
||||||
|
}
|
||||||
|
});
|
||||||
27
pyproject.toml
Normal file
27
pyproject.toml
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
[project]
|
||||||
|
name = "antragstracker-hagen"
|
||||||
|
version = "0.1.0"
|
||||||
|
description = "Automatisierte Nachverfolgung kommunaler Anträge und Anfragen in Hagen"
|
||||||
|
requires-python = ">=3.11"
|
||||||
|
dependencies = [
|
||||||
|
"fastapi>=0.110",
|
||||||
|
"uvicorn>=0.29",
|
||||||
|
"httpx>=0.27",
|
||||||
|
"aiosqlite>=0.20",
|
||||||
|
]
|
||||||
|
|
||||||
|
[project.optional-dependencies]
|
||||||
|
dev = [
|
||||||
|
"pytest>=8.0",
|
||||||
|
"ruff>=0.4",
|
||||||
|
]
|
||||||
|
|
||||||
|
[tool.pytest.ini_options]
|
||||||
|
pythonpath = ["backend/src"]
|
||||||
|
filterwarnings = [
|
||||||
|
"ignore::DeprecationWarning:tracker.core.chains",
|
||||||
|
]
|
||||||
|
|
||||||
|
[build-system]
|
||||||
|
requires = ["setuptools>=68"]
|
||||||
|
build-backend = "setuptools.backends._legacy:_Backend"
|
||||||
527
scripts/extract_adaptive.py
Normal file
527
scripts/extract_adaptive.py
Normal file
@ -0,0 +1,527 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Adaptive PDF-Extraktion mit Throttle-Detection.
|
||||||
|
|
||||||
|
Startet konservativ und erhöht Geschwindigkeit bis zum Limit.
|
||||||
|
Robustes Logging für Wiederaufnahme nach Abbruch.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import sqlite3
|
||||||
|
import tempfile
|
||||||
|
import time
|
||||||
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from datetime import datetime
|
||||||
|
from pathlib import Path
|
||||||
|
from threading import Lock
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
import pymupdf
|
||||||
|
|
||||||
|
# Netdata Metrics HTTP Endpoint (VServer)
|
||||||
|
METRICS_URL = os.environ.get("METRICS_URL", "http://152.53.119.77:8127")
|
||||||
|
|
||||||
|
PROJECT_ROOT = Path(__file__).resolve().parent.parent
|
||||||
|
DB_PATH = PROJECT_ROOT / "data" / "tracker_remote.db"
|
||||||
|
STATE_FILE = PROJECT_ROOT / "data" / "extract_state.json"
|
||||||
|
LOG_FILE = PROJECT_ROOT / "data" / "extract.log"
|
||||||
|
METRICS_FILE = PROJECT_ROOT / "data" / "extract_metrics.jsonl"
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class AdaptiveConfig:
|
||||||
|
"""Adaptive Throttling-Konfiguration."""
|
||||||
|
delay: float = 0.2 # Start nahe Optimum
|
||||||
|
workers: int = 4 # Start bei ~optimal-35% (basierend auf Daten: optimal ~6)
|
||||||
|
min_delay: float = 0.1 # Minimaler Delay
|
||||||
|
max_workers: int = 15 # Hartes Maximum
|
||||||
|
success_streak: int = 0 # Erfolge in Folge
|
||||||
|
streak_threshold: int = 30 # Erfolge bis Speedup
|
||||||
|
cooldown_until: float = 0 # Timestamp bis Cooldown endet
|
||||||
|
best_delay_per_worker: dict = field(default_factory=dict) # worker_count -> min stable delay
|
||||||
|
delay_fully_explored: bool = False # True wenn delay bei aktuellem worker-level am min
|
||||||
|
throughput_per_worker: dict = field(default_factory=dict) # worker_count -> best throughput
|
||||||
|
saturation_detected: bool = False # True wenn mehr Workers keinen Gewinn bringen
|
||||||
|
saturation_threshold: float = 0.1 # 10% Verbesserung nötig für neuen Worker
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class State:
|
||||||
|
"""Persistenter Zustand für Wiederaufnahme."""
|
||||||
|
processed: set = field(default_factory=set)
|
||||||
|
failed: dict = field(default_factory=dict) # vorlage_id -> retry_count
|
||||||
|
failed_permanent: set = field(default_factory=set)
|
||||||
|
started_at: str = ""
|
||||||
|
last_update: str = ""
|
||||||
|
stats: dict = field(default_factory=lambda: {
|
||||||
|
"success": 0, "failed": 0, "retried": 0, "total": 0
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
class AdaptiveExtractor:
|
||||||
|
def __init__(self, state_file: Path = STATE_FILE, notify: bool = True):
|
||||||
|
self.state_file = state_file
|
||||||
|
self.config = AdaptiveConfig()
|
||||||
|
self.db_lock = Lock()
|
||||||
|
self.log_lock = Lock()
|
||||||
|
self.notify = notify
|
||||||
|
self.last_notify = 0
|
||||||
|
self.notify_interval = 300 # 5 Minuten
|
||||||
|
self.batch_start_time = None
|
||||||
|
self.batch_metrics = []
|
||||||
|
self.state = self._load_state() # Must be after log_lock init
|
||||||
|
|
||||||
|
def _load_state(self) -> State:
|
||||||
|
"""Lädt Zustand aus Datei oder erstellt neuen."""
|
||||||
|
if self.state_file.exists():
|
||||||
|
try:
|
||||||
|
data = json.loads(self.state_file.read_text())
|
||||||
|
state = State(
|
||||||
|
processed=set(data.get("processed", [])),
|
||||||
|
failed=data.get("failed", {}),
|
||||||
|
failed_permanent=set(data.get("failed_permanent", [])),
|
||||||
|
started_at=data.get("started_at", ""),
|
||||||
|
last_update=data.get("last_update", ""),
|
||||||
|
stats=data.get("stats", State().stats)
|
||||||
|
)
|
||||||
|
self._log(f"State geladen: {len(state.processed)} verarbeitet, {len(state.failed)} pending retries")
|
||||||
|
return state
|
||||||
|
except Exception as e:
|
||||||
|
self._log(f"State-Laden fehlgeschlagen: {e}")
|
||||||
|
|
||||||
|
return State(started_at=datetime.now().isoformat())
|
||||||
|
|
||||||
|
def _save_state(self):
|
||||||
|
"""Speichert Zustand."""
|
||||||
|
self.state.last_update = datetime.now().isoformat()
|
||||||
|
data = {
|
||||||
|
"processed": list(self.state.processed),
|
||||||
|
"failed": self.state.failed,
|
||||||
|
"failed_permanent": list(self.state.failed_permanent),
|
||||||
|
"started_at": self.state.started_at,
|
||||||
|
"last_update": self.state.last_update,
|
||||||
|
"stats": self.state.stats
|
||||||
|
}
|
||||||
|
self.state_file.write_text(json.dumps(data, indent=2))
|
||||||
|
|
||||||
|
def _log(self, msg: str):
|
||||||
|
"""Thread-safe Logging."""
|
||||||
|
timestamp = datetime.now().strftime("%H:%M:%S")
|
||||||
|
line = f"[{timestamp}] {msg}"
|
||||||
|
print(line)
|
||||||
|
with self.log_lock:
|
||||||
|
with open(LOG_FILE, "a") as f:
|
||||||
|
f.write(line + "\n")
|
||||||
|
|
||||||
|
def _record_metric(self, batch_num: int, batch_time: float, success: int, failed: int, bytes_downloaded: int = 0):
|
||||||
|
"""Speichert Metriken für Visualisierung."""
|
||||||
|
mb_downloaded = bytes_downloaded / (1024 * 1024)
|
||||||
|
mb_per_sec = mb_downloaded / max(batch_time, 0.1)
|
||||||
|
|
||||||
|
metric = {
|
||||||
|
"timestamp": datetime.now().isoformat(),
|
||||||
|
"batch": batch_num,
|
||||||
|
"batch_time_sec": round(batch_time, 2),
|
||||||
|
"success": success,
|
||||||
|
"failed": failed,
|
||||||
|
"delay": round(self.config.delay, 3),
|
||||||
|
"workers": self.config.workers,
|
||||||
|
"throughput": round(success / max(batch_time, 0.1), 2), # docs/sec
|
||||||
|
"total_success": self.state.stats["success"],
|
||||||
|
"total_failed": len(self.state.failed_permanent),
|
||||||
|
"pending_retries": len(self.state.failed),
|
||||||
|
"mb_downloaded": round(mb_downloaded, 2),
|
||||||
|
"mb_per_sec": round(mb_per_sec, 2),
|
||||||
|
}
|
||||||
|
self.batch_metrics.append(metric)
|
||||||
|
|
||||||
|
with open(METRICS_FILE, "a") as f:
|
||||||
|
f.write(json.dumps(metric) + "\n")
|
||||||
|
|
||||||
|
return metric
|
||||||
|
|
||||||
|
def _push_metrics(self, metric: dict):
|
||||||
|
"""Pusht Metriken per HTTP an VServer → Netdata Statsd."""
|
||||||
|
try:
|
||||||
|
payload = {
|
||||||
|
"throughput": metric["throughput"],
|
||||||
|
"delay": metric["delay"],
|
||||||
|
"workers": metric["workers"],
|
||||||
|
"success_total": metric["total_success"],
|
||||||
|
"failed_total": metric["total_failed"],
|
||||||
|
"batch_time": metric["batch_time_sec"],
|
||||||
|
"pending_retries": metric["pending_retries"],
|
||||||
|
"items_per_sec": metric["throughput"], # Alias
|
||||||
|
"mb_per_sec": metric.get("mb_per_sec", 0),
|
||||||
|
"mb_downloaded": metric.get("mb_downloaded", 0),
|
||||||
|
}
|
||||||
|
httpx.post(METRICS_URL, json=payload, timeout=5)
|
||||||
|
except Exception as e:
|
||||||
|
pass # Silent fail, don't block extraction
|
||||||
|
|
||||||
|
def _send_telegram(self, message: str):
|
||||||
|
"""Loggt Update."""
|
||||||
|
self._log(f"[NOTIFY] {message[:100]}...")
|
||||||
|
|
||||||
|
def _maybe_notify(self, force: bool = False):
|
||||||
|
"""Sendet periodische Updates nach Telegram."""
|
||||||
|
if not self.notify:
|
||||||
|
return
|
||||||
|
|
||||||
|
now = time.time()
|
||||||
|
if not force and (now - self.last_notify) < self.notify_interval:
|
||||||
|
return
|
||||||
|
|
||||||
|
self.last_notify = now
|
||||||
|
|
||||||
|
# Letzte Metriken
|
||||||
|
if not self.batch_metrics:
|
||||||
|
return
|
||||||
|
|
||||||
|
recent = self.batch_metrics[-1]
|
||||||
|
elapsed = (datetime.now() - datetime.fromisoformat(self.state.started_at)).total_seconds() / 60
|
||||||
|
|
||||||
|
# Throughput-Trend (letzte 5 Batches)
|
||||||
|
recent_throughputs = [m["throughput"] for m in self.batch_metrics[-5:]]
|
||||||
|
avg_throughput = sum(recent_throughputs) / len(recent_throughputs)
|
||||||
|
|
||||||
|
# ETA
|
||||||
|
remaining = self.state.stats["total"] - self.state.stats["success"] - len(self.state.failed_permanent)
|
||||||
|
eta_min = remaining / max(avg_throughput * 60, 0.1)
|
||||||
|
|
||||||
|
msg = f"""📊 *PDF-Extraktion Update*
|
||||||
|
|
||||||
|
✓ Erfolg: {self.state.stats['success']:,}
|
||||||
|
✗ Fehler: {len(self.state.failed_permanent)}
|
||||||
|
↻ Retries: {len(self.state.failed)}
|
||||||
|
|
||||||
|
⚡ Config: {self.config.workers} workers, {self.config.delay:.2f}s delay
|
||||||
|
📈 Throughput: {avg_throughput:.1f} docs/sec
|
||||||
|
⏱️ Laufzeit: {elapsed:.0f} min
|
||||||
|
🎯 ETA: ~{eta_min:.0f} min
|
||||||
|
|
||||||
|
Batch {recent['batch']}: {recent['success']}✓ {recent['failed']}✗ in {recent['batch_time_sec']}s"""
|
||||||
|
|
||||||
|
self._send_telegram(msg)
|
||||||
|
|
||||||
|
def _get_db(self):
|
||||||
|
conn = sqlite3.connect(str(DB_PATH), check_same_thread=False)
|
||||||
|
conn.row_factory = sqlite3.Row
|
||||||
|
return conn
|
||||||
|
|
||||||
|
def _download_and_extract(self, vorlage_id: int, url: str) -> tuple[int, str | None, str | None, int]:
|
||||||
|
"""Lädt PDF und extrahiert Text. Returns: (vorlage_id, text, error, bytes_downloaded)"""
|
||||||
|
try:
|
||||||
|
resp = httpx.get(url, timeout=60, follow_redirects=True)
|
||||||
|
|
||||||
|
# Throttling-Detection
|
||||||
|
if resp.status_code in (429, 503):
|
||||||
|
return (vorlage_id, None, f"THROTTLED:{resp.status_code}", 0)
|
||||||
|
|
||||||
|
resp.raise_for_status()
|
||||||
|
|
||||||
|
content_size = len(resp.content)
|
||||||
|
|
||||||
|
if content_size < 100:
|
||||||
|
return (vorlage_id, None, "PDF zu klein", content_size)
|
||||||
|
|
||||||
|
with tempfile.NamedTemporaryFile(suffix=".pdf", delete=True) as tmp:
|
||||||
|
tmp.write(resp.content)
|
||||||
|
tmp.flush()
|
||||||
|
|
||||||
|
doc = pymupdf.open(tmp.name)
|
||||||
|
text_parts = []
|
||||||
|
for page in doc:
|
||||||
|
text_parts.append(page.get_text())
|
||||||
|
doc.close()
|
||||||
|
|
||||||
|
text = "\n".join(text_parts).strip()
|
||||||
|
|
||||||
|
if len(text) < 50:
|
||||||
|
return (vorlage_id, None, "Kein Text", content_size)
|
||||||
|
|
||||||
|
# Bereinigen
|
||||||
|
import re
|
||||||
|
text = re.sub(r'\n{3,}', '\n\n', text)
|
||||||
|
text = re.sub(r' {2,}', ' ', text)
|
||||||
|
|
||||||
|
return (vorlage_id, text, None, content_size)
|
||||||
|
|
||||||
|
except httpx.HTTPStatusError as e:
|
||||||
|
return (vorlage_id, None, f"HTTP:{e.response.status_code}", 0)
|
||||||
|
except Exception as e:
|
||||||
|
return (vorlage_id, None, str(e)[:80], 0)
|
||||||
|
|
||||||
|
def _handle_success(self, vorlage_id: int, text: str):
|
||||||
|
"""Verarbeitet erfolgreiche Extraktion."""
|
||||||
|
with self.db_lock:
|
||||||
|
conn = self._get_db()
|
||||||
|
conn.execute("""
|
||||||
|
UPDATE vorlagen SET volltext = ?, volltext_clean = ?
|
||||||
|
WHERE id = ?
|
||||||
|
""", (text, text, vorlage_id))
|
||||||
|
conn.execute("UPDATE anlagen SET downloaded = 1 WHERE vorlage_id = ?", (vorlage_id,))
|
||||||
|
conn.commit()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
self.state.processed.add(vorlage_id)
|
||||||
|
self.state.stats["success"] += 1
|
||||||
|
self.config.success_streak += 1
|
||||||
|
|
||||||
|
# Adaptive Speedup
|
||||||
|
if self.config.success_streak >= self.config.streak_threshold:
|
||||||
|
self._speedup()
|
||||||
|
self.config.success_streak = 0
|
||||||
|
|
||||||
|
def _handle_failure(self, vorlage_id: int, error: str):
|
||||||
|
"""Verarbeitet Fehler mit Retry-Logik."""
|
||||||
|
retry_count = self.state.failed.get(str(vorlage_id), 0) + 1
|
||||||
|
|
||||||
|
if "THROTTLED" in error:
|
||||||
|
# Cooldown aktivieren
|
||||||
|
self._slowdown(severe=True)
|
||||||
|
self.state.failed[str(vorlage_id)] = retry_count
|
||||||
|
return
|
||||||
|
|
||||||
|
if retry_count >= 3:
|
||||||
|
self.state.failed_permanent.add(vorlage_id)
|
||||||
|
if str(vorlage_id) in self.state.failed:
|
||||||
|
del self.state.failed[str(vorlage_id)]
|
||||||
|
self._log(f" ✗ #{vorlage_id} permanent failed: {error}")
|
||||||
|
else:
|
||||||
|
self.state.failed[str(vorlage_id)] = retry_count
|
||||||
|
self._log(f" ↻ #{vorlage_id} retry {retry_count}/3: {error}")
|
||||||
|
|
||||||
|
self.state.stats["failed"] += 1
|
||||||
|
self.config.success_streak = 0
|
||||||
|
|
||||||
|
def _speedup(self):
|
||||||
|
"""Erhöht Geschwindigkeit mit Worker-Level-Exploration und Sättigungs-Erkennung."""
|
||||||
|
old_delay = self.config.delay
|
||||||
|
old_workers = self.config.workers
|
||||||
|
|
||||||
|
# Aktuellen Delay als stabil für diesen Worker-Level merken
|
||||||
|
w = self.config.workers
|
||||||
|
if w not in self.config.best_delay_per_worker:
|
||||||
|
self.config.best_delay_per_worker[w] = self.config.delay
|
||||||
|
else:
|
||||||
|
self.config.best_delay_per_worker[w] = min(
|
||||||
|
self.config.best_delay_per_worker[w],
|
||||||
|
self.config.delay
|
||||||
|
)
|
||||||
|
|
||||||
|
# Aktuellen Throughput für Worker-Level tracken
|
||||||
|
if self.batch_metrics:
|
||||||
|
recent_throughput = sum(m["throughput"] for m in self.batch_metrics[-3:]) / min(3, len(self.batch_metrics))
|
||||||
|
if w not in self.config.throughput_per_worker:
|
||||||
|
self.config.throughput_per_worker[w] = recent_throughput
|
||||||
|
else:
|
||||||
|
# Gleitender Durchschnitt
|
||||||
|
self.config.throughput_per_worker[w] = (
|
||||||
|
self.config.throughput_per_worker[w] * 0.7 + recent_throughput * 0.3
|
||||||
|
)
|
||||||
|
|
||||||
|
if self.config.delay > self.config.min_delay:
|
||||||
|
# Delay noch nicht am Minimum → weiter reduzieren
|
||||||
|
self.config.delay = max(self.config.min_delay, self.config.delay * 0.8)
|
||||||
|
self.config.delay_fully_explored = False
|
||||||
|
elif self.config.saturation_detected:
|
||||||
|
# Sättigung erkannt → nicht mehr skalieren
|
||||||
|
self._log(f"📊 Sättigung bei {self.config.workers} Workers — mehr bringt nichts")
|
||||||
|
elif self.config.workers < self.config.max_workers:
|
||||||
|
# Prüfe ob letzter Worker-Sprung Verbesserung gebracht hat
|
||||||
|
prev_throughput = self.config.throughput_per_worker.get(w - 1, 0)
|
||||||
|
curr_throughput = self.config.throughput_per_worker.get(w, 0)
|
||||||
|
|
||||||
|
if prev_throughput > 0 and curr_throughput > 0:
|
||||||
|
improvement = (curr_throughput - prev_throughput) / prev_throughput
|
||||||
|
if improvement < self.config.saturation_threshold:
|
||||||
|
# Weniger als 10% Verbesserung → Sättigung
|
||||||
|
self.config.saturation_detected = True
|
||||||
|
self._log(f"📊 Sättigung erkannt: {w-1}→{w} Workers nur +{improvement*100:.1f}% Throughput")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Worker hinzufügen
|
||||||
|
self.config.workers += 1
|
||||||
|
prev_best = self.config.best_delay_per_worker.get(w, 0.5)
|
||||||
|
self.config.delay = max(prev_best, 0.3)
|
||||||
|
self.config.delay_fully_explored = False
|
||||||
|
self._log(f"🔄 Neuer Worker-Level: reset delay auf {self.config.delay:.2f}s für Exploration")
|
||||||
|
else:
|
||||||
|
self.config.delay_fully_explored = True
|
||||||
|
|
||||||
|
if old_delay != self.config.delay or old_workers != self.config.workers:
|
||||||
|
self._log(f"⚡ Speedup: delay={self.config.delay:.2f}s, workers={self.config.workers}")
|
||||||
|
|
||||||
|
def _slowdown(self, severe: bool = False):
|
||||||
|
"""Verlangsamt bei Problemen."""
|
||||||
|
if severe:
|
||||||
|
self.config.cooldown_until = time.time() + 30 # 30s Pause
|
||||||
|
self.config.delay = min(2.0, self.config.delay * 2)
|
||||||
|
self.config.workers = max(1, self.config.workers - 1)
|
||||||
|
self.config.delay_fully_explored = False # Exploration resetten
|
||||||
|
self._log(f"🛑 Throttled! Cooldown 30s, delay={self.config.delay:.2f}s, workers={self.config.workers}")
|
||||||
|
else:
|
||||||
|
self.config.delay = min(2.0, self.config.delay * 1.2)
|
||||||
|
self._log(f"⚠️ Slowdown: delay={self.config.delay:.2f}s")
|
||||||
|
|
||||||
|
def _wait_cooldown(self):
|
||||||
|
"""Wartet Cooldown ab."""
|
||||||
|
if self.config.cooldown_until > time.time():
|
||||||
|
wait = self.config.cooldown_until - time.time()
|
||||||
|
self._log(f"⏳ Cooldown: {wait:.0f}s warten...")
|
||||||
|
time.sleep(wait)
|
||||||
|
|
||||||
|
def get_pending(self, limit: int) -> list[dict]:
|
||||||
|
"""Holt zu verarbeitende Vorlagen."""
|
||||||
|
conn = self._get_db()
|
||||||
|
|
||||||
|
# Alle mit URL aber ohne Volltext, die nicht schon verarbeitet sind
|
||||||
|
processed_ids = self.state.processed | self.state.failed_permanent
|
||||||
|
|
||||||
|
query = """
|
||||||
|
SELECT a.vorlage_id, a.url
|
||||||
|
FROM anlagen a
|
||||||
|
JOIN vorlagen v ON a.vorlage_id = v.id
|
||||||
|
WHERE a.url IS NOT NULL
|
||||||
|
AND a.downloaded = 0
|
||||||
|
AND (v.volltext_clean IS NULL OR v.volltext_clean = '')
|
||||||
|
ORDER BY v.datum_eingang DESC
|
||||||
|
"""
|
||||||
|
|
||||||
|
all_pending = conn.execute(query).fetchall()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
# Filtern
|
||||||
|
result = []
|
||||||
|
for row in all_pending:
|
||||||
|
if row['vorlage_id'] not in processed_ids:
|
||||||
|
result.append(dict(row))
|
||||||
|
if len(result) >= limit:
|
||||||
|
break
|
||||||
|
|
||||||
|
# Retries hinzufügen (am Ende)
|
||||||
|
for vid_str, count in list(self.state.failed.items()):
|
||||||
|
if len(result) >= limit:
|
||||||
|
break
|
||||||
|
vid = int(vid_str)
|
||||||
|
# URL nochmal holen
|
||||||
|
conn = self._get_db()
|
||||||
|
row = conn.execute("SELECT vorlage_id, url FROM anlagen WHERE vorlage_id = ?", (vid,)).fetchone()
|
||||||
|
conn.close()
|
||||||
|
if row:
|
||||||
|
result.append(dict(row))
|
||||||
|
self.state.stats["retried"] += 1
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
def run(self, limit: int = 1000):
|
||||||
|
"""Hauptschleife."""
|
||||||
|
self._log(f"=== Adaptive Extraktion gestartet ===")
|
||||||
|
self._log(f"Limit: {limit}, Start-Config: delay={self.config.delay}s, workers={self.config.workers}")
|
||||||
|
|
||||||
|
pending = self.get_pending(limit)
|
||||||
|
self.state.stats["total"] = len(pending)
|
||||||
|
self._log(f"Zu verarbeiten: {len(pending)}")
|
||||||
|
|
||||||
|
if not pending:
|
||||||
|
self._log("Nichts zu tun!")
|
||||||
|
return
|
||||||
|
|
||||||
|
batch_size = 50
|
||||||
|
processed_count = 0
|
||||||
|
batch_num = 0
|
||||||
|
|
||||||
|
for i in range(0, len(pending), batch_size):
|
||||||
|
self._wait_cooldown()
|
||||||
|
|
||||||
|
batch = pending[i:i+batch_size]
|
||||||
|
batch_num += 1
|
||||||
|
batch_start = time.time()
|
||||||
|
batch_success = 0
|
||||||
|
batch_failed = 0
|
||||||
|
batch_bytes = 0
|
||||||
|
|
||||||
|
self._log(f"\n--- Batch {batch_num}: {len(batch)} Vorlagen ---")
|
||||||
|
self._log(f"Config: delay={self.config.delay:.2f}s, workers={self.config.workers}")
|
||||||
|
|
||||||
|
with ThreadPoolExecutor(max_workers=self.config.workers) as executor:
|
||||||
|
futures = {}
|
||||||
|
for item in batch:
|
||||||
|
time.sleep(self.config.delay)
|
||||||
|
future = executor.submit(
|
||||||
|
self._download_and_extract,
|
||||||
|
item['vorlage_id'],
|
||||||
|
item['url']
|
||||||
|
)
|
||||||
|
futures[future] = item
|
||||||
|
|
||||||
|
for future in as_completed(futures):
|
||||||
|
vorlage_id, text, error, bytes_dl = future.result()
|
||||||
|
batch_bytes += bytes_dl
|
||||||
|
|
||||||
|
if text:
|
||||||
|
self._handle_success(vorlage_id, text)
|
||||||
|
self._log(f" ✓ #{vorlage_id}: {len(text)} Zeichen")
|
||||||
|
batch_success += 1
|
||||||
|
else:
|
||||||
|
self._handle_failure(vorlage_id, error)
|
||||||
|
batch_failed += 1
|
||||||
|
|
||||||
|
processed_count += 1
|
||||||
|
|
||||||
|
# Metriken aufzeichnen
|
||||||
|
batch_time = time.time() - batch_start
|
||||||
|
metric = self._record_metric(batch_num, batch_time, batch_success, batch_failed, batch_bytes)
|
||||||
|
|
||||||
|
# Push to Netdata Statsd
|
||||||
|
self._push_metrics(metric)
|
||||||
|
|
||||||
|
# State speichern nach jedem Batch
|
||||||
|
self._save_state()
|
||||||
|
|
||||||
|
# Fortschritt
|
||||||
|
stats = self.state.stats
|
||||||
|
self._log(f"Progress: {processed_count}/{len(pending)} | ✓{stats['success']} ✗{len(self.state.failed_permanent)} | {batch_success/max(batch_time,0.1):.1f} docs/sec")
|
||||||
|
|
||||||
|
# Telegram-Update (alle 5 min)
|
||||||
|
self._maybe_notify()
|
||||||
|
|
||||||
|
self._log(f"\n=== Fertig ===")
|
||||||
|
self._log(f"Erfolgreich: {self.state.stats['success']}")
|
||||||
|
self._log(f"Fehlgeschlagen: {len(self.state.failed_permanent)}")
|
||||||
|
self._log(f"State gespeichert: {self.state_file}")
|
||||||
|
|
||||||
|
# Finale Notification
|
||||||
|
self._maybe_notify(force=True)
|
||||||
|
|
||||||
|
if self.notify:
|
||||||
|
self._send_telegram(f"✅ *PDF-Extraktion abgeschlossen*\n\n✓ {self.state.stats['success']:,} erfolgreich\n✗ {len(self.state.failed_permanent)} fehlgeschlagen")
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(description="Adaptive PDF-Extraktion")
|
||||||
|
parser.add_argument("--limit", type=int, default=1000, help="Max. Anzahl")
|
||||||
|
parser.add_argument("--reset", action="store_true", help="State zurücksetzen")
|
||||||
|
parser.add_argument("--no-notify", action="store_true", help="Keine Telegram-Updates")
|
||||||
|
parser.add_argument("--notify-interval", type=int, default=300, help="Sekunden zwischen Updates")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
if args.reset and STATE_FILE.exists():
|
||||||
|
STATE_FILE.unlink()
|
||||||
|
print("State zurückgesetzt")
|
||||||
|
|
||||||
|
# Metriken-Datei leeren bei Reset
|
||||||
|
if args.reset and METRICS_FILE.exists():
|
||||||
|
METRICS_FILE.unlink()
|
||||||
|
|
||||||
|
extractor = AdaptiveExtractor(notify=not args.no_notify)
|
||||||
|
extractor.notify_interval = args.notify_interval
|
||||||
|
extractor.run(limit=args.limit)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
279
scripts/extract_orte_ki.py
Normal file
279
scripts/extract_orte_ki.py
Normal file
@ -0,0 +1,279 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
KI-gestützte Ortsextraktion aus Volltexten.
|
||||||
|
Zweistufiger Prozess:
|
||||||
|
1. Extraktion aller Ortsangaben mit Kontext
|
||||||
|
2. Intelligente Georeferenzierung mit Kontextverständnis
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import sqlite3
|
||||||
|
import time
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
PROJECT_ROOT = Path(__file__).resolve().parent.parent
|
||||||
|
DB_PATH = PROJECT_ROOT / "data" / "tracker_remote.db"
|
||||||
|
|
||||||
|
DASHSCOPE_URL = "https://dashscope-intl.aliyuncs.com/compatible-mode/v1/chat/completions"
|
||||||
|
DASHSCOPE_KEY = os.environ.get("QWEN_API_KEY") or os.popen("security find-generic-password -s qwen-api -w 2>/dev/null").read().strip()
|
||||||
|
|
||||||
|
# Nominatim für Geocoding
|
||||||
|
NOMINATIM_URL = "https://nominatim.openstreetmap.org/search"
|
||||||
|
USER_AGENT = "Antragstracker-Hagen/1.0"
|
||||||
|
HAGEN_BBOX = "7.35,51.30,7.65,51.45"
|
||||||
|
|
||||||
|
EXTRACTION_PROMPT = """Extrahiere ALLE geografischen Ortsangaben aus diesem kommunalpolitischen Dokument aus Hagen.
|
||||||
|
|
||||||
|
DOKUMENT:
|
||||||
|
{volltext}
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
Gib eine Liste aller Orte zurück, die im Text erwähnt werden. Für jeden Ort:
|
||||||
|
- rohtext: Die genaue Formulierung im Text
|
||||||
|
- kontext: Der Satz oder Absatz, in dem der Ort erwähnt wird
|
||||||
|
- typ: strasse|platz|stadtteil|gebaeude|sonstiges
|
||||||
|
- geocodierbar: true/false (kann man das auf einer Karte finden?)
|
||||||
|
- geocode_query: Falls geocodierbar, der beste Suchbegriff für Nominatim (z.B. bei "Polizeiwache an der Boeler Straße" → "Boeler Straße")
|
||||||
|
|
||||||
|
JSON-Format:
|
||||||
|
{{
|
||||||
|
"orte": [
|
||||||
|
{{
|
||||||
|
"rohtext": "Altenhagener Brücke",
|
||||||
|
"kontext": "Der Abschnitt ab der Altenhagener Brücke bis zum Aldi",
|
||||||
|
"typ": "strasse",
|
||||||
|
"geocodierbar": true,
|
||||||
|
"geocode_query": "Altenhagener Brücke, Hagen"
|
||||||
|
}},
|
||||||
|
{{
|
||||||
|
"rohtext": "Spielplatz",
|
||||||
|
"kontext": "Darüber hinaus befindet sich ein Spielplatz",
|
||||||
|
"typ": "gebaeude",
|
||||||
|
"geocodierbar": false,
|
||||||
|
"geocode_query": null
|
||||||
|
}}
|
||||||
|
]
|
||||||
|
}}
|
||||||
|
|
||||||
|
WICHTIG:
|
||||||
|
- Extrahiere ALLE Orte, auch generische
|
||||||
|
- Bei "X an der Y-Straße" ist Y-Straße der geocode_query
|
||||||
|
- Stadtteile wie "Altenhagen", "Haspe" sind geocodierbar
|
||||||
|
- Generische Begriffe wie "Schule", "Spielplatz" ohne Straßenangabe sind NICHT geocodierbar
|
||||||
|
|
||||||
|
NUR JSON, keine Erklärungen."""
|
||||||
|
|
||||||
|
|
||||||
|
GEOCODE_REFINEMENT_PROMPT = """Du bist ein Geocoding-Experte für die Stadt Hagen (NRW).
|
||||||
|
|
||||||
|
Ich habe folgende Ortsangaben aus einem kommunalpolitischen Dokument extrahiert:
|
||||||
|
{orte_json}
|
||||||
|
|
||||||
|
Der Volltext-Kontext war:
|
||||||
|
{kontext}
|
||||||
|
|
||||||
|
Nominatim hat für "{query}" folgende Ergebnisse in Hagen gefunden:
|
||||||
|
{nominatim_results}
|
||||||
|
|
||||||
|
Welches Ergebnis passt am besten zum Kontext? Antworte mit der Nummer (1, 2, 3...) oder "keins" wenn keins passt.
|
||||||
|
Nur die Nummer oder "keins", keine Erklärung."""
|
||||||
|
|
||||||
|
|
||||||
|
def get_db():
|
||||||
|
conn = sqlite3.connect(str(DB_PATH))
|
||||||
|
conn.row_factory = sqlite3.Row
|
||||||
|
return conn
|
||||||
|
|
||||||
|
|
||||||
|
def call_qwen(prompt: str, model: str = "qwen-turbo-latest") -> dict | str | None:
|
||||||
|
"""Ruft Qwen API auf."""
|
||||||
|
if not DASHSCOPE_KEY:
|
||||||
|
return None
|
||||||
|
|
||||||
|
try:
|
||||||
|
resp = httpx.post(
|
||||||
|
DASHSCOPE_URL,
|
||||||
|
headers={"Authorization": f"Bearer {DASHSCOPE_KEY}", "Content-Type": "application/json"},
|
||||||
|
json={"model": model, "messages": [{"role": "user", "content": prompt}], "temperature": 0.1},
|
||||||
|
timeout=60
|
||||||
|
)
|
||||||
|
resp.raise_for_status()
|
||||||
|
content = resp.json()["choices"][0]["message"]["content"]
|
||||||
|
|
||||||
|
# JSON extrahieren wenn vorhanden
|
||||||
|
if "```json" in content:
|
||||||
|
content = content.split("```json")[1].split("```")[0]
|
||||||
|
elif "```" in content:
|
||||||
|
parts = content.split("```")
|
||||||
|
if len(parts) >= 2:
|
||||||
|
content = parts[1]
|
||||||
|
|
||||||
|
try:
|
||||||
|
return json.loads(content.strip())
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
return content.strip()
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f" API-Fehler: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def geocode_nominatim(client: httpx.Client, query: str) -> list[dict]:
|
||||||
|
"""Sucht mit Nominatim in Hagen."""
|
||||||
|
try:
|
||||||
|
resp = client.get(
|
||||||
|
NOMINATIM_URL,
|
||||||
|
params={"q": f"{query}, Hagen, Germany", "format": "json", "limit": 3,
|
||||||
|
"viewbox": HAGEN_BBOX, "bounded": 1},
|
||||||
|
headers={"User-Agent": USER_AGENT},
|
||||||
|
timeout=10
|
||||||
|
)
|
||||||
|
resp.raise_for_status()
|
||||||
|
return resp.json()
|
||||||
|
except Exception as e:
|
||||||
|
print(f" Nominatim-Fehler: {e}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
def process_vorlage(conn: sqlite3.Connection, client: httpx.Client, vorlage: dict) -> int:
|
||||||
|
"""Extrahiert und geocodiert Orte aus einer Vorlage."""
|
||||||
|
vid = vorlage['id']
|
||||||
|
akz = vorlage['aktenzeichen'] or f"#{vid}"
|
||||||
|
volltext = vorlage['volltext_clean']
|
||||||
|
|
||||||
|
if not volltext or len(volltext) < 100:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
# Volltext kürzen
|
||||||
|
volltext_short = volltext[:6000] if len(volltext) > 6000 else volltext
|
||||||
|
|
||||||
|
# Schritt 1: KI-Extraktion
|
||||||
|
prompt = EXTRACTION_PROMPT.format(volltext=volltext_short)
|
||||||
|
result = call_qwen(prompt)
|
||||||
|
|
||||||
|
if not result or not isinstance(result, dict) or 'orte' not in result:
|
||||||
|
print(f" {akz}: Keine Orte extrahiert")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
orte = result['orte']
|
||||||
|
print(f" {akz}: {len(orte)} Orte gefunden")
|
||||||
|
|
||||||
|
# Schritt 2: Geocoding für geocodierbare Orte
|
||||||
|
success = 0
|
||||||
|
for ort in orte:
|
||||||
|
rohtext = ort.get('rohtext', '')
|
||||||
|
kontext = ort.get('kontext', '')
|
||||||
|
typ = ort.get('typ', 'sonstiges')
|
||||||
|
geocodierbar = ort.get('geocodierbar', False)
|
||||||
|
geocode_query = ort.get('geocode_query')
|
||||||
|
|
||||||
|
if not rohtext:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Prüfen ob schon existiert
|
||||||
|
existing = conn.execute(
|
||||||
|
"SELECT id FROM orte WHERE name = ? OR rohtext = ?",
|
||||||
|
(rohtext, rohtext)
|
||||||
|
).fetchone()
|
||||||
|
|
||||||
|
if existing:
|
||||||
|
# Nur Verknüpfung erstellen
|
||||||
|
conn.execute("""
|
||||||
|
INSERT OR IGNORE INTO vorlagen_orte (vorlage_id, ort_id, kontext)
|
||||||
|
VALUES (?, ?, ?)
|
||||||
|
""", (vid, existing['id'], kontext[:500]))
|
||||||
|
conn.execute("UPDATE orte SET vorlage_count = vorlage_count + 1 WHERE id = ?", (existing['id'],))
|
||||||
|
conn.commit()
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Neuen Ort anlegen
|
||||||
|
lat, lon = None, None
|
||||||
|
status = 'skipped'
|
||||||
|
|
||||||
|
if geocodierbar and geocode_query:
|
||||||
|
time.sleep(1.1) # Nominatim Rate Limit
|
||||||
|
results = geocode_nominatim(client, geocode_query)
|
||||||
|
|
||||||
|
if results:
|
||||||
|
# Ersten Treffer nehmen (könnte mit KI verfeinert werden)
|
||||||
|
lat = float(results[0]['lat'])
|
||||||
|
lon = float(results[0]['lon'])
|
||||||
|
status = 'success'
|
||||||
|
print(f" ✓ {rohtext} → ({lat:.4f}, {lon:.4f})")
|
||||||
|
else:
|
||||||
|
status = 'failed'
|
||||||
|
print(f" ✗ {rohtext} (nicht gefunden)")
|
||||||
|
else:
|
||||||
|
print(f" ⊘ {rohtext} (nicht geocodierbar)")
|
||||||
|
|
||||||
|
cursor = conn.execute("""
|
||||||
|
INSERT INTO orte (name, typ, lat, lon, rohtext, kontext_satz, geocode_status, vorlage_count)
|
||||||
|
VALUES (?, ?, ?, ?, ?, ?, ?, 1)
|
||||||
|
""", (geocode_query or rohtext, typ, lat, lon, rohtext, kontext[:500], status))
|
||||||
|
|
||||||
|
ort_id = cursor.lastrowid
|
||||||
|
conn.execute("""
|
||||||
|
INSERT OR IGNORE INTO vorlagen_orte (vorlage_id, ort_id, kontext)
|
||||||
|
VALUES (?, ?, ?)
|
||||||
|
""", (vid, ort_id, kontext[:500]))
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
if lat:
|
||||||
|
success += 1
|
||||||
|
|
||||||
|
return success
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(description="KI-gestützte Ortsextraktion")
|
||||||
|
parser.add_argument("--limit", type=int, default=10, help="Max. Anzahl Vorlagen")
|
||||||
|
parser.add_argument("--vorlage", type=int, help="Einzelne Vorlage-ID")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
print(f"=== KI-Ortsextraktion ===\n")
|
||||||
|
|
||||||
|
conn = get_db()
|
||||||
|
client = httpx.Client()
|
||||||
|
|
||||||
|
if args.vorlage:
|
||||||
|
query = "SELECT id, aktenzeichen, volltext_clean FROM vorlagen WHERE id = ?"
|
||||||
|
params = [args.vorlage]
|
||||||
|
else:
|
||||||
|
# Vorlagen mit Volltext die noch nicht verarbeitet wurden
|
||||||
|
query = """
|
||||||
|
SELECT v.id, v.aktenzeichen, v.volltext_clean
|
||||||
|
FROM vorlagen v
|
||||||
|
WHERE v.volltext_clean IS NOT NULL
|
||||||
|
AND v.id NOT IN (SELECT DISTINCT vorlage_id FROM vorlagen_orte)
|
||||||
|
ORDER BY v.datum_eingang DESC
|
||||||
|
LIMIT ?
|
||||||
|
"""
|
||||||
|
params = [args.limit]
|
||||||
|
|
||||||
|
vorlagen = conn.execute(query, params).fetchall()
|
||||||
|
print(f"Verarbeite {len(vorlagen)} Vorlagen\n")
|
||||||
|
|
||||||
|
total_success = 0
|
||||||
|
for v in vorlagen:
|
||||||
|
total_success += process_vorlage(conn, client, dict(v))
|
||||||
|
|
||||||
|
client.close()
|
||||||
|
|
||||||
|
# Stats
|
||||||
|
total_orte = conn.execute("SELECT COUNT(*) FROM orte").fetchone()[0]
|
||||||
|
geocoded = conn.execute("SELECT COUNT(*) FROM orte WHERE lat IS NOT NULL").fetchone()[0]
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
print(f"\n=== Fertig ===")
|
||||||
|
print(f"Orte gesamt: {total_orte}")
|
||||||
|
print(f"Geocodiert: {geocoded}")
|
||||||
|
print(f"Diese Runde: {total_success} neue geocodiert")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
155
scripts/extract_pdfs.py
Normal file
155
scripts/extract_pdfs.py
Normal file
@ -0,0 +1,155 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Lädt PDFs von URLs und extrahiert Text mit PyMuPDF.
|
||||||
|
Parallelisiert für Geschwindigkeit.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import sqlite3
|
||||||
|
import tempfile
|
||||||
|
import time
|
||||||
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
import pymupdf # PyMuPDF
|
||||||
|
|
||||||
|
PROJECT_ROOT = Path(__file__).resolve().parent.parent
|
||||||
|
DB_PATH = PROJECT_ROOT / "data" / "tracker_remote.db"
|
||||||
|
|
||||||
|
# Rate limiting
|
||||||
|
REQUESTS_PER_SECOND = 5
|
||||||
|
MIN_DELAY = 1.0 / REQUESTS_PER_SECOND
|
||||||
|
|
||||||
|
|
||||||
|
def get_db():
|
||||||
|
conn = sqlite3.connect(str(DB_PATH), check_same_thread=False)
|
||||||
|
conn.row_factory = sqlite3.Row
|
||||||
|
return conn
|
||||||
|
|
||||||
|
|
||||||
|
def download_and_extract(vorlage_id: int, url: str) -> tuple[int, str | None, str | None]:
|
||||||
|
"""
|
||||||
|
Lädt PDF und extrahiert Text.
|
||||||
|
Returns: (vorlage_id, text, error)
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Download
|
||||||
|
resp = httpx.get(url, timeout=60, follow_redirects=True)
|
||||||
|
resp.raise_for_status()
|
||||||
|
|
||||||
|
if len(resp.content) < 100:
|
||||||
|
return (vorlage_id, None, "PDF zu klein")
|
||||||
|
|
||||||
|
# Text extrahieren
|
||||||
|
with tempfile.NamedTemporaryFile(suffix=".pdf", delete=True) as tmp:
|
||||||
|
tmp.write(resp.content)
|
||||||
|
tmp.flush()
|
||||||
|
|
||||||
|
doc = pymupdf.open(tmp.name)
|
||||||
|
text_parts = []
|
||||||
|
for page in doc:
|
||||||
|
text_parts.append(page.get_text())
|
||||||
|
doc.close()
|
||||||
|
|
||||||
|
text = "\n".join(text_parts).strip()
|
||||||
|
|
||||||
|
if len(text) < 50:
|
||||||
|
return (vorlage_id, None, "Kein Text extrahiert")
|
||||||
|
|
||||||
|
return (vorlage_id, text, None)
|
||||||
|
|
||||||
|
except httpx.HTTPStatusError as e:
|
||||||
|
return (vorlage_id, None, f"HTTP {e.response.status_code}")
|
||||||
|
except Exception as e:
|
||||||
|
return (vorlage_id, None, str(e)[:100])
|
||||||
|
|
||||||
|
|
||||||
|
def clean_text(text: str) -> str:
|
||||||
|
"""Bereinigt extrahierten Text."""
|
||||||
|
import re
|
||||||
|
# Mehrfache Leerzeilen reduzieren
|
||||||
|
text = re.sub(r'\n{3,}', '\n\n', text)
|
||||||
|
# Mehrfache Leerzeichen
|
||||||
|
text = re.sub(r' {2,}', ' ', text)
|
||||||
|
return text.strip()
|
||||||
|
|
||||||
|
|
||||||
|
def process_batch(vorlagen: list[dict], workers: int = 5) -> dict:
|
||||||
|
"""Verarbeitet einen Batch parallel."""
|
||||||
|
results = {"success": 0, "failed": 0, "errors": []}
|
||||||
|
conn = get_db()
|
||||||
|
|
||||||
|
with ThreadPoolExecutor(max_workers=workers) as executor:
|
||||||
|
futures = {}
|
||||||
|
for v in vorlagen:
|
||||||
|
time.sleep(MIN_DELAY) # Rate limiting
|
||||||
|
future = executor.submit(download_and_extract, v['vorlage_id'], v['url'])
|
||||||
|
futures[future] = v
|
||||||
|
|
||||||
|
for future in as_completed(futures):
|
||||||
|
v = futures[future]
|
||||||
|
vorlage_id, text, error = future.result()
|
||||||
|
|
||||||
|
if text:
|
||||||
|
clean = clean_text(text)
|
||||||
|
conn.execute("""
|
||||||
|
UPDATE vorlagen SET volltext = ?, volltext_clean = ?
|
||||||
|
WHERE id = ?
|
||||||
|
""", (text, clean, vorlage_id))
|
||||||
|
conn.execute("""
|
||||||
|
UPDATE anlagen SET downloaded = 1 WHERE vorlage_id = ?
|
||||||
|
""", (vorlage_id,))
|
||||||
|
conn.commit()
|
||||||
|
results["success"] += 1
|
||||||
|
print(f" ✓ #{vorlage_id}: {len(clean)} Zeichen")
|
||||||
|
else:
|
||||||
|
results["failed"] += 1
|
||||||
|
results["errors"].append((vorlage_id, error))
|
||||||
|
print(f" ✗ #{vorlage_id}: {error}")
|
||||||
|
|
||||||
|
conn.close()
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(description="PDF-Extraktion")
|
||||||
|
parser.add_argument("--limit", type=int, default=100, help="Max. Anzahl")
|
||||||
|
parser.add_argument("--workers", type=int, default=5, help="Parallele Downloads")
|
||||||
|
parser.add_argument("--offset", type=int, default=0, help="Start-Offset")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
print(f"=== PDF-Extraktion ===")
|
||||||
|
print(f"Limit: {args.limit}, Workers: {args.workers}\n")
|
||||||
|
|
||||||
|
conn = get_db()
|
||||||
|
|
||||||
|
# Vorlagen mit PDF-URL aber ohne Volltext
|
||||||
|
vorlagen = conn.execute("""
|
||||||
|
SELECT a.vorlage_id, a.url
|
||||||
|
FROM anlagen a
|
||||||
|
JOIN vorlagen v ON a.vorlage_id = v.id
|
||||||
|
WHERE a.url IS NOT NULL
|
||||||
|
AND a.downloaded = 0
|
||||||
|
AND (v.volltext_clean IS NULL OR v.volltext_clean = '')
|
||||||
|
ORDER BY v.datum_eingang DESC
|
||||||
|
LIMIT ? OFFSET ?
|
||||||
|
""", (args.limit, args.offset)).fetchall()
|
||||||
|
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
print(f"Zu verarbeiten: {len(vorlagen)}\n")
|
||||||
|
|
||||||
|
if not vorlagen:
|
||||||
|
print("Nichts zu tun!")
|
||||||
|
return
|
||||||
|
|
||||||
|
results = process_batch([dict(v) for v in vorlagen], args.workers)
|
||||||
|
|
||||||
|
print(f"\n=== Fertig ===")
|
||||||
|
print(f"Erfolgreich: {results['success']}")
|
||||||
|
print(f"Fehlgeschlagen: {results['failed']}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
132
scripts/extract_volltext.py
Normal file
132
scripts/extract_volltext.py
Normal file
@ -0,0 +1,132 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
PDF-Volltext-Extraktion für Antragstracker Hagen.
|
||||||
|
Lädt PDFs von ALLRIS und extrahiert den Text.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import sqlite3
|
||||||
|
import time
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import fitz # PyMuPDF
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
PROJECT_ROOT = Path(__file__).resolve().parent.parent
|
||||||
|
DB_PATH = PROJECT_ROOT / "data" / "tracker_remote.db"
|
||||||
|
|
||||||
|
|
||||||
|
def get_db():
|
||||||
|
conn = sqlite3.connect(str(DB_PATH))
|
||||||
|
conn.row_factory = sqlite3.Row
|
||||||
|
return conn
|
||||||
|
|
||||||
|
|
||||||
|
def extract_text_from_pdf(pdf_bytes: bytes) -> str:
|
||||||
|
"""Extrahiert Text aus PDF-Bytes."""
|
||||||
|
try:
|
||||||
|
doc = fitz.open(stream=pdf_bytes, filetype="pdf")
|
||||||
|
text_parts = []
|
||||||
|
for page in doc:
|
||||||
|
text_parts.append(page.get_text())
|
||||||
|
doc.close()
|
||||||
|
return "\n".join(text_parts).strip()
|
||||||
|
except Exception as e:
|
||||||
|
print(f" PDF-Fehler: {e}")
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
def clean_text(text: str) -> str:
|
||||||
|
"""Bereinigt extrahierten Text."""
|
||||||
|
# Mehrfache Leerzeilen reduzieren
|
||||||
|
import re
|
||||||
|
text = re.sub(r'\n{3,}', '\n\n', text)
|
||||||
|
# Führende/trailing Whitespace pro Zeile
|
||||||
|
lines = [line.strip() for line in text.split('\n')]
|
||||||
|
return '\n'.join(lines).strip()
|
||||||
|
|
||||||
|
|
||||||
|
def process_vorlage(conn: sqlite3.Connection, client: httpx.Client, vorlage: dict) -> bool:
|
||||||
|
"""Lädt PDF und extrahiert Volltext für eine Vorlage."""
|
||||||
|
vid = vorlage['id']
|
||||||
|
akz = vorlage['aktenzeichen'] or f"#{vid}"
|
||||||
|
pdf_url = vorlage['pdf_url']
|
||||||
|
|
||||||
|
if not pdf_url:
|
||||||
|
print(f" {akz}: Keine PDF-URL")
|
||||||
|
return False
|
||||||
|
|
||||||
|
try:
|
||||||
|
resp = client.get(pdf_url, timeout=30, follow_redirects=True)
|
||||||
|
resp.raise_for_status()
|
||||||
|
|
||||||
|
if 'application/pdf' not in resp.headers.get('content-type', ''):
|
||||||
|
print(f" {akz}: Kein PDF ({resp.headers.get('content-type')})")
|
||||||
|
return False
|
||||||
|
|
||||||
|
text = extract_text_from_pdf(resp.content)
|
||||||
|
if not text:
|
||||||
|
print(f" {akz}: Kein Text extrahiert")
|
||||||
|
return False
|
||||||
|
|
||||||
|
text_clean = clean_text(text)
|
||||||
|
|
||||||
|
conn.execute(
|
||||||
|
"UPDATE vorlagen SET volltext = ?, volltext_clean = ? WHERE id = ?",
|
||||||
|
(text, text_clean, vid)
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
print(f" {akz}: {len(text_clean)} Zeichen")
|
||||||
|
return True
|
||||||
|
|
||||||
|
except httpx.TimeoutException:
|
||||||
|
print(f" {akz}: Timeout")
|
||||||
|
return False
|
||||||
|
except Exception as e:
|
||||||
|
print(f" {akz}: Fehler {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(description="PDF-Volltext-Extraktion")
|
||||||
|
parser.add_argument("--limit", type=int, default=10, help="Max. Anzahl (default: 10)")
|
||||||
|
parser.add_argument("--typ", type=str, default="antrag", help="Vorlagen-Typ (default: antrag)")
|
||||||
|
parser.add_argument("--all", action="store_true", help="Alle ohne Volltext")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
print(f"=== PDF-Volltext-Extraktion ===\n")
|
||||||
|
|
||||||
|
conn = get_db()
|
||||||
|
client = httpx.Client()
|
||||||
|
|
||||||
|
# Vorlagen ohne Volltext finden
|
||||||
|
query = """
|
||||||
|
SELECT id, aktenzeichen, pdf_url
|
||||||
|
FROM vorlagen
|
||||||
|
WHERE volltext IS NULL
|
||||||
|
AND pdf_url IS NOT NULL
|
||||||
|
"""
|
||||||
|
if args.typ:
|
||||||
|
query += f" AND typ = '{args.typ}'"
|
||||||
|
query += " ORDER BY datum_eingang DESC"
|
||||||
|
if not args.all:
|
||||||
|
query += f" LIMIT {args.limit}"
|
||||||
|
|
||||||
|
vorlagen = conn.execute(query).fetchall()
|
||||||
|
print(f"Verarbeite {len(vorlagen)} Vorlagen (Typ: {args.typ or 'alle'})\n")
|
||||||
|
|
||||||
|
success = 0
|
||||||
|
for v in vorlagen:
|
||||||
|
if process_vorlage(conn, client, dict(v)):
|
||||||
|
success += 1
|
||||||
|
time.sleep(0.5) # Rate limiting
|
||||||
|
|
||||||
|
client.close()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
print(f"\n=== Fertig: {success}/{len(vorlagen)} erfolgreich ===")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
222
scripts/geocode_orte.py
Normal file
222
scripts/geocode_orte.py
Normal file
@ -0,0 +1,222 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Geocodierung von Orten aus KI-Zusammenfassungen.
|
||||||
|
Nutzt Nominatim (OpenStreetMap) für Hagen-spezifische Orte.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import sqlite3
|
||||||
|
import time
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
PROJECT_ROOT = Path(__file__).resolve().parent.parent
|
||||||
|
DB_PATH = PROJECT_ROOT / "data" / "tracker_remote.db"
|
||||||
|
|
||||||
|
# Nominatim API (OpenStreetMap)
|
||||||
|
NOMINATIM_URL = "https://nominatim.openstreetmap.org/search"
|
||||||
|
USER_AGENT = "Antragstracker-Hagen/1.0 (tobias.roedel@econgood.org)"
|
||||||
|
|
||||||
|
# Hagen Bounding Box (ungefähr)
|
||||||
|
HAGEN_BBOX = "7.35,51.30,7.65,51.45" # minLon,minLat,maxLon,maxLat
|
||||||
|
|
||||||
|
# Generische Begriffe die nicht geocodiert werden sollten
|
||||||
|
BLACKLIST = {
|
||||||
|
"polizeiwache", "polizei", "feuerwehr", "krankenhaus", "rathaus",
|
||||||
|
"aldi", "aldi-markt", "lidl", "rewe", "edeka", "penny", "netto",
|
||||||
|
"schule", "grundschule", "gymnasium", "kindergarten", "kita",
|
||||||
|
"spielplatz", "kirche", "friedhof", "sportplatz", "schwimmbad",
|
||||||
|
"bushaltestelle", "bahnhof", "parkplatz", "parkhaus",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def get_db():
|
||||||
|
conn = sqlite3.connect(str(DB_PATH))
|
||||||
|
conn.row_factory = sqlite3.Row
|
||||||
|
return conn
|
||||||
|
|
||||||
|
|
||||||
|
def geocode_ort(client: httpx.Client, name: str) -> tuple[float, float] | None:
|
||||||
|
"""Geocodiert einen Ort in Hagen."""
|
||||||
|
# Verschiedene Suchvarianten
|
||||||
|
queries = [
|
||||||
|
f"{name}, Hagen, Germany",
|
||||||
|
f"{name}, Hagen",
|
||||||
|
f"{name} Hagen",
|
||||||
|
]
|
||||||
|
|
||||||
|
for q in queries:
|
||||||
|
try:
|
||||||
|
resp = client.get(
|
||||||
|
NOMINATIM_URL,
|
||||||
|
params={
|
||||||
|
"q": q,
|
||||||
|
"format": "json",
|
||||||
|
"limit": 1,
|
||||||
|
"viewbox": HAGEN_BBOX,
|
||||||
|
"bounded": 1,
|
||||||
|
},
|
||||||
|
headers={"User-Agent": USER_AGENT},
|
||||||
|
timeout=10
|
||||||
|
)
|
||||||
|
resp.raise_for_status()
|
||||||
|
|
||||||
|
results = resp.json()
|
||||||
|
if results:
|
||||||
|
lat = float(results[0]["lat"])
|
||||||
|
lon = float(results[0]["lon"])
|
||||||
|
return (lat, lon)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f" Geocoding-Fehler für '{q}': {e}")
|
||||||
|
|
||||||
|
time.sleep(1.1) # Nominatim Rate Limit: 1 req/s
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def extract_orte_from_ki(conn: sqlite3.Connection) -> list[tuple[int, str, str]]:
|
||||||
|
"""Extrahiert Orte aus ki_bewertungen.anmerkungen.
|
||||||
|
Returns: List of (vorlage_id, ort_name, kontext)
|
||||||
|
"""
|
||||||
|
rows = conn.execute("""
|
||||||
|
SELECT vorlage_id, anmerkungen
|
||||||
|
FROM ki_bewertungen
|
||||||
|
WHERE typ = 'zusammenfassung'
|
||||||
|
AND anmerkungen IS NOT NULL
|
||||||
|
""").fetchall()
|
||||||
|
|
||||||
|
orte = []
|
||||||
|
for vorlage_id, anmerkungen in rows:
|
||||||
|
try:
|
||||||
|
data = json.loads(anmerkungen)
|
||||||
|
for ort in data.get("betroffene_orte", []):
|
||||||
|
if ort and len(ort) > 2:
|
||||||
|
orte.append((vorlage_id, ort, data.get("kernforderung", "")))
|
||||||
|
except:
|
||||||
|
continue
|
||||||
|
|
||||||
|
return orte
|
||||||
|
|
||||||
|
|
||||||
|
def process_ort(conn: sqlite3.Connection, client: httpx.Client,
|
||||||
|
vorlage_id: int, ort_name: str, kontext: str) -> bool:
|
||||||
|
"""Verarbeitet einen einzelnen Ort."""
|
||||||
|
# Normalisieren
|
||||||
|
ort_name_clean = ort_name.strip()
|
||||||
|
|
||||||
|
# Blacklist prüfen
|
||||||
|
if ort_name_clean.lower() in BLACKLIST:
|
||||||
|
print(f" ⊘ {ort_name_clean} (generisch, übersprungen)")
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Zu kurze Namen ignorieren
|
||||||
|
if len(ort_name_clean) < 4:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Prüfen ob Ort schon existiert
|
||||||
|
existing = conn.execute(
|
||||||
|
"SELECT id, lat, lon FROM orte WHERE name = ?",
|
||||||
|
(ort_name_clean,)
|
||||||
|
).fetchone()
|
||||||
|
|
||||||
|
if existing:
|
||||||
|
ort_id = existing["id"]
|
||||||
|
# Verknüpfung erstellen falls nicht vorhanden
|
||||||
|
conn.execute("""
|
||||||
|
INSERT OR IGNORE INTO vorlagen_orte (vorlage_id, ort_id, kontext)
|
||||||
|
VALUES (?, ?, ?)
|
||||||
|
""", (vorlage_id, ort_id, kontext[:500] if kontext else None))
|
||||||
|
conn.execute("UPDATE orte SET vorlage_count = vorlage_count + 1 WHERE id = ?", (ort_id,))
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
if existing["lat"]:
|
||||||
|
print(f" ✓ {ort_name_clean} (cached)")
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
# Noch nicht geocodiert, versuchen
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
# Neuen Ort anlegen
|
||||||
|
cursor = conn.execute(
|
||||||
|
"INSERT INTO orte (name, vorlage_count) VALUES (?, 1)",
|
||||||
|
(ort_name_clean,)
|
||||||
|
)
|
||||||
|
ort_id = cursor.lastrowid
|
||||||
|
conn.execute("""
|
||||||
|
INSERT OR IGNORE INTO vorlagen_orte (vorlage_id, ort_id, kontext)
|
||||||
|
VALUES (?, ?, ?)
|
||||||
|
""", (vorlage_id, ort_id, kontext[:500] if kontext else None))
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
# Geocodieren
|
||||||
|
coords = geocode_ort(client, ort_name_clean)
|
||||||
|
|
||||||
|
if coords:
|
||||||
|
lat, lon = coords
|
||||||
|
conn.execute(
|
||||||
|
"UPDATE orte SET lat = ?, lon = ?, typ = 'geocoded' WHERE id = ?",
|
||||||
|
(lat, lon, ort_id)
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
print(f" ✓ {ort_name_clean} → ({lat:.5f}, {lon:.5f})")
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
print(f" ✗ {ort_name_clean} (nicht gefunden)")
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(description="Geocodierung von Orten")
|
||||||
|
parser.add_argument("--limit", type=int, default=50, help="Max. Anzahl neuer Orte")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
print(f"=== Geocodierung von Orten ===\n")
|
||||||
|
|
||||||
|
conn = get_db()
|
||||||
|
client = httpx.Client()
|
||||||
|
|
||||||
|
# Orte aus KI-Zusammenfassungen extrahieren
|
||||||
|
orte = extract_orte_from_ki(conn)
|
||||||
|
print(f"Gefunden: {len(orte)} Ort-Erwähnungen\n")
|
||||||
|
|
||||||
|
# Deduplizieren
|
||||||
|
seen = set()
|
||||||
|
unique_orte = []
|
||||||
|
for vorlage_id, ort, kontext in orte:
|
||||||
|
key = (vorlage_id, ort)
|
||||||
|
if key not in seen:
|
||||||
|
seen.add(key)
|
||||||
|
unique_orte.append((vorlage_id, ort, kontext))
|
||||||
|
|
||||||
|
print(f"Unique: {len(unique_orte)} Verknüpfungen\n")
|
||||||
|
|
||||||
|
# Verarbeiten
|
||||||
|
success = 0
|
||||||
|
processed = 0
|
||||||
|
for vorlage_id, ort, kontext in unique_orte:
|
||||||
|
if processed >= args.limit:
|
||||||
|
break
|
||||||
|
if process_ort(conn, client, vorlage_id, ort, kontext):
|
||||||
|
success += 1
|
||||||
|
processed += 1
|
||||||
|
|
||||||
|
client.close()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
# Stats
|
||||||
|
conn = get_db()
|
||||||
|
total_orte = conn.execute("SELECT COUNT(*) FROM orte").fetchone()[0]
|
||||||
|
geocoded = conn.execute("SELECT COUNT(*) FROM orte WHERE lat IS NOT NULL").fetchone()[0]
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
print(f"\n=== Fertig ===")
|
||||||
|
print(f"Orte gesamt: {total_orte}")
|
||||||
|
print(f"Geocodiert: {geocoded}")
|
||||||
|
print(f"Diese Runde: {success}/{processed}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
503
scripts/import_oparl.py
Normal file
503
scripts/import_oparl.py
Normal file
@ -0,0 +1,503 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
OParl-Importer für den Antragstracker Hagen.
|
||||||
|
Liest alle Papers von der ALLRIS OParl-API und speichert sie in die SQLite-DB.
|
||||||
|
|
||||||
|
Nutzung:
|
||||||
|
python scripts/import_oparl.py # Voll-Import (alle Seiten)
|
||||||
|
python scripts/import_oparl.py --resume 337 # Ab Seite 337 weitermachen
|
||||||
|
python scripts/import_oparl.py --incremental # Nur neue Papers (stoppt bei bekannten)
|
||||||
|
python scripts/import_oparl.py --limit 100 # Nur 100 Papers (Test)
|
||||||
|
python scripts/import_oparl.py --resolve-gremien # Nur Gremien-Namen auflösen
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import functools
|
||||||
|
import re
|
||||||
|
import sqlite3
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
# Unbuffered print für Live-Fortschritt
|
||||||
|
print = functools.partial(print, flush=True)
|
||||||
|
|
||||||
|
OPARL_BASE = "https://allris.hagen.de/public/oparl"
|
||||||
|
PAPERS_URL = f"{OPARL_BASE}/papers"
|
||||||
|
ORGS_URL = f"{OPARL_BASE}/organizations"
|
||||||
|
PROJECT_ROOT = Path(__file__).resolve().parent.parent
|
||||||
|
SCHEMA_PATH = PROJECT_ROOT / "backend" / "src" / "tracker" / "db" / "schema.sql"
|
||||||
|
DB_PATH = PROJECT_ROOT / "data" / "tracker_remote.db"
|
||||||
|
|
||||||
|
# OParl paperType → interner Typ
|
||||||
|
PAPER_TYPE_MAP = {
|
||||||
|
"Anfrage": "anfrage",
|
||||||
|
"Antrag": "antrag",
|
||||||
|
"Beschlussvorlage": "beschlussvorlage",
|
||||||
|
"Beschlussvorlage BBM": "beschlussvorlage",
|
||||||
|
"Beschlussvorlage WBH": "beschlussvorlage",
|
||||||
|
"Mitteilungsvorlage": "mitteilungsvorlage",
|
||||||
|
"Mitteilung": "mitteilungsvorlage",
|
||||||
|
"Mitteilung WBH": "mitteilungsvorlage",
|
||||||
|
"Stellungnahme": "stellungnahme",
|
||||||
|
"Berichtsvorlage": "bericht",
|
||||||
|
"Vorschlag zur Tagesordnung": "antrag",
|
||||||
|
"Dringlichkeitsantrag": "antrag",
|
||||||
|
"Dringlichkeitsanfrage": "anfrage",
|
||||||
|
"Änderungsantrag": "aenderungsantrag",
|
||||||
|
"Ergänzungsantrag": "ergaenzungsantrag",
|
||||||
|
"Bericht": "bericht",
|
||||||
|
"Resolution": "resolution",
|
||||||
|
}
|
||||||
|
|
||||||
|
# Regex für Aktenzeichen-Parsing
|
||||||
|
AKZ_RE = re.compile(r"^(\d+/\d+)(?:-(\d+))?$")
|
||||||
|
|
||||||
|
# Gremien-Typ aus OParl classification ableiten
|
||||||
|
GREMIUM_TYP_MAP = {
|
||||||
|
"Rat": "rat",
|
||||||
|
"Bezirksvertretung": "bv",
|
||||||
|
"Ausschuss": "ausschuss",
|
||||||
|
"Beirat": "beirat",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def init_db() -> sqlite3.Connection:
|
||||||
|
"""Erstellt die DB und führt schema.sql aus."""
|
||||||
|
DB_PATH.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
conn = sqlite3.connect(str(DB_PATH))
|
||||||
|
conn.execute("PRAGMA journal_mode = WAL")
|
||||||
|
conn.execute("PRAGMA foreign_keys = ON")
|
||||||
|
conn.row_factory = sqlite3.Row
|
||||||
|
schema = SCHEMA_PATH.read_text(encoding="utf-8")
|
||||||
|
conn.executescript(schema)
|
||||||
|
|
||||||
|
# Schema-Migration: oparl_id in beratungen (für Dedup)
|
||||||
|
cols = {r[1] for r in conn.execute("PRAGMA table_info(beratungen)").fetchall()}
|
||||||
|
if "oparl_id" not in cols:
|
||||||
|
conn.execute("ALTER TABLE beratungen ADD COLUMN oparl_id TEXT")
|
||||||
|
conn.execute("CREATE UNIQUE INDEX IF NOT EXISTS idx_beratungen_oparl ON beratungen(oparl_id)")
|
||||||
|
print(" Migration: oparl_id zu beratungen hinzugefügt")
|
||||||
|
|
||||||
|
conn.commit()
|
||||||
|
return conn
|
||||||
|
|
||||||
|
|
||||||
|
def fetch_page(client: httpx.Client, url: str, params: dict,
|
||||||
|
max_retries: int = 3) -> dict | None:
|
||||||
|
"""Holt eine API-Seite mit Retry-Logik."""
|
||||||
|
for attempt in range(max_retries):
|
||||||
|
try:
|
||||||
|
resp = client.get(url, params=params, timeout=30)
|
||||||
|
resp.raise_for_status()
|
||||||
|
return resp.json()
|
||||||
|
except httpx.TimeoutException:
|
||||||
|
print(f" Timeout, Versuch {attempt + 1}/{max_retries}")
|
||||||
|
if attempt < max_retries - 1:
|
||||||
|
time.sleep(5 * (attempt + 1))
|
||||||
|
except Exception as e:
|
||||||
|
print(f" Fehler: {e}")
|
||||||
|
if attempt < max_retries - 1:
|
||||||
|
time.sleep(3)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def parse_aktenzeichen(reference: str | None) -> tuple[str | None, str | None, str | None]:
|
||||||
|
"""Zerlegt ein Aktenzeichen in (aktenzeichen, basis, suffix)."""
|
||||||
|
if not reference:
|
||||||
|
return None, None, None
|
||||||
|
m = AKZ_RE.match(reference.strip())
|
||||||
|
if m:
|
||||||
|
basis = m.group(1)
|
||||||
|
suffix = m.group(2)
|
||||||
|
return reference.strip(), basis, f"-{suffix}" if suffix else None
|
||||||
|
return reference.strip(), reference.strip(), None
|
||||||
|
|
||||||
|
|
||||||
|
def map_paper_type(oparl_type: str | None) -> str:
|
||||||
|
if not oparl_type:
|
||||||
|
return "sonstig"
|
||||||
|
return PAPER_TYPE_MAP.get(oparl_type, "sonstig")
|
||||||
|
|
||||||
|
|
||||||
|
def is_verwaltungsvorlage(paper_type: str | None) -> bool:
|
||||||
|
if not paper_type:
|
||||||
|
return False
|
||||||
|
return paper_type.startswith("Beschlussvorlage") or paper_type.startswith("Mitteilung")
|
||||||
|
|
||||||
|
|
||||||
|
def upsert_paper(conn: sqlite3.Connection, paper: dict) -> tuple[int | None, bool]:
|
||||||
|
"""Fügt ein Paper ein oder aktualisiert es.
|
||||||
|
|
||||||
|
Returns (vorlage_id, is_new) — is_new=False means it already existed.
|
||||||
|
"""
|
||||||
|
oparl_id = paper.get("id")
|
||||||
|
if not oparl_id:
|
||||||
|
return None, False
|
||||||
|
|
||||||
|
# Check if already exists
|
||||||
|
existing = conn.execute(
|
||||||
|
"SELECT id FROM vorlagen WHERE oparl_id = ?", (oparl_id,)
|
||||||
|
).fetchone()
|
||||||
|
|
||||||
|
reference = paper.get("reference")
|
||||||
|
aktenzeichen, basis, suffix = parse_aktenzeichen(reference)
|
||||||
|
oparl_type = paper.get("paperType")
|
||||||
|
typ = map_paper_type(oparl_type)
|
||||||
|
betreff = paper.get("name", "")
|
||||||
|
datum = paper.get("date")
|
||||||
|
web_url = paper.get("web")
|
||||||
|
|
||||||
|
main_file = paper.get("mainFile")
|
||||||
|
pdf_url = None
|
||||||
|
if isinstance(main_file, dict):
|
||||||
|
pdf_url = main_file.get("accessUrl") or main_file.get("downloadUrl")
|
||||||
|
|
||||||
|
try:
|
||||||
|
if existing:
|
||||||
|
vorlage_id = existing["id"]
|
||||||
|
conn.execute(
|
||||||
|
"""UPDATE vorlagen SET
|
||||||
|
aktenzeichen = ?, aktenzeichen_basis = ?, aktenzeichen_suffix = ?,
|
||||||
|
typ = ?, betreff = ?, datum_eingang = ?,
|
||||||
|
pdf_url = ?, web_url = ?, ist_verwaltungsvorlage = ?,
|
||||||
|
scraped_at = CURRENT_TIMESTAMP
|
||||||
|
WHERE id = ?""",
|
||||||
|
(aktenzeichen, basis, suffix, typ, betreff, datum,
|
||||||
|
pdf_url, web_url, is_verwaltungsvorlage(oparl_type), vorlage_id),
|
||||||
|
)
|
||||||
|
return vorlage_id, False
|
||||||
|
else:
|
||||||
|
cur = conn.execute(
|
||||||
|
"""INSERT INTO vorlagen
|
||||||
|
(oparl_id, aktenzeichen, aktenzeichen_basis, aktenzeichen_suffix,
|
||||||
|
typ, betreff, datum_eingang, pdf_url, web_url, ist_verwaltungsvorlage)
|
||||||
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
|
||||||
|
(oparl_id, aktenzeichen, basis, suffix,
|
||||||
|
typ, betreff, datum, pdf_url, web_url,
|
||||||
|
is_verwaltungsvorlage(oparl_type)),
|
||||||
|
)
|
||||||
|
return cur.lastrowid, True
|
||||||
|
except sqlite3.Error as e:
|
||||||
|
print(f" DB-Fehler bei {reference}: {e}")
|
||||||
|
return None, False
|
||||||
|
|
||||||
|
|
||||||
|
def upsert_consultations(conn: sqlite3.Connection, vorlage_id: int, paper: dict):
|
||||||
|
"""Speichert die Beratungsfolge mit Dedup über oparl_id."""
|
||||||
|
consultations = paper.get("consultation") or []
|
||||||
|
for cons in consultations:
|
||||||
|
if not isinstance(cons, dict):
|
||||||
|
continue
|
||||||
|
|
||||||
|
cons_oparl_id = cons.get("id")
|
||||||
|
|
||||||
|
# Gremium auflösen
|
||||||
|
orgs = cons.get("organization") or []
|
||||||
|
gremium_id = None
|
||||||
|
for org_url in orgs:
|
||||||
|
if not isinstance(org_url, str):
|
||||||
|
continue
|
||||||
|
# Nur typ=gr sind echte Gremien
|
||||||
|
if "typ=gr" not in org_url:
|
||||||
|
continue
|
||||||
|
conn.execute(
|
||||||
|
"INSERT OR IGNORE INTO gremien (oparl_id, name) VALUES (?, ?)",
|
||||||
|
(org_url, _org_placeholder_name(org_url)),
|
||||||
|
)
|
||||||
|
row = conn.execute(
|
||||||
|
"SELECT id FROM gremien WHERE oparl_id = ?", (org_url,)
|
||||||
|
).fetchone()
|
||||||
|
if row:
|
||||||
|
gremium_id = row["id"]
|
||||||
|
|
||||||
|
rolle = cons.get("role")
|
||||||
|
authoritative = cons.get("authoritative", False)
|
||||||
|
|
||||||
|
if cons_oparl_id:
|
||||||
|
conn.execute(
|
||||||
|
"""INSERT INTO beratungen (oparl_id, vorlage_id, gremium_id, rolle)
|
||||||
|
VALUES (?, ?, ?, ?)
|
||||||
|
ON CONFLICT(oparl_id) DO UPDATE SET
|
||||||
|
vorlage_id = excluded.vorlage_id,
|
||||||
|
gremium_id = excluded.gremium_id,
|
||||||
|
rolle = excluded.rolle""",
|
||||||
|
(cons_oparl_id, vorlage_id, gremium_id, rolle),
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# Fallback ohne oparl_id: prüfe auf Duplikat
|
||||||
|
exists = conn.execute(
|
||||||
|
"""SELECT 1 FROM beratungen
|
||||||
|
WHERE vorlage_id = ? AND gremium_id IS ? AND rolle IS ?
|
||||||
|
LIMIT 1""",
|
||||||
|
(vorlage_id, gremium_id, rolle),
|
||||||
|
).fetchone()
|
||||||
|
if not exists:
|
||||||
|
conn.execute(
|
||||||
|
"INSERT INTO beratungen (vorlage_id, gremium_id, rolle) VALUES (?, ?, ?)",
|
||||||
|
(vorlage_id, gremium_id, rolle),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def insert_files(conn: sqlite3.Connection, vorlage_id: int, paper: dict):
|
||||||
|
"""Speichert Anlagen eines Papers (nur neue)."""
|
||||||
|
aux_files = paper.get("auxiliaryFile") or []
|
||||||
|
for f in aux_files:
|
||||||
|
if not isinstance(f, dict):
|
||||||
|
continue
|
||||||
|
url = f.get("accessUrl") or f.get("downloadUrl")
|
||||||
|
name = f.get("name") or f.get("fileName", "")
|
||||||
|
if url:
|
||||||
|
exists = conn.execute(
|
||||||
|
"SELECT 1 FROM anlagen WHERE vorlage_id = ? AND url = ?",
|
||||||
|
(vorlage_id, url),
|
||||||
|
).fetchone()
|
||||||
|
if not exists:
|
||||||
|
conn.execute(
|
||||||
|
"INSERT INTO anlagen (vorlage_id, dateiname, url) VALUES (?, ?, ?)",
|
||||||
|
(vorlage_id, name, url),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _org_placeholder_name(org_url: str) -> str:
|
||||||
|
"""Extrahiere Platzhalter-Name aus URL."""
|
||||||
|
org_id = org_url.split("id=")[-1] if "id=" in org_url else org_url
|
||||||
|
return f"Gremium {org_id}"
|
||||||
|
|
||||||
|
|
||||||
|
def build_suffix_references(conn: sqlite3.Connection):
|
||||||
|
"""Erstellt automatische Suffix-Referenzen."""
|
||||||
|
print("\nErstelle Suffix-Referenzen...")
|
||||||
|
# Parent → Child
|
||||||
|
conn.execute(
|
||||||
|
"""INSERT OR IGNORE INTO referenzen (quelle_id, ziel_id, typ, konfidenz)
|
||||||
|
SELECT parent.id, child.id, 'suffix', 1.0
|
||||||
|
FROM vorlagen child
|
||||||
|
JOIN vorlagen parent ON child.aktenzeichen_basis = parent.aktenzeichen_basis
|
||||||
|
WHERE child.aktenzeichen_suffix IS NOT NULL
|
||||||
|
AND parent.aktenzeichen_suffix IS NULL
|
||||||
|
AND child.id != parent.id"""
|
||||||
|
)
|
||||||
|
# Sequential: -1 → -2, -2 → -3, etc.
|
||||||
|
conn.execute(
|
||||||
|
"""INSERT OR IGNORE INTO referenzen (quelle_id, ziel_id, typ, konfidenz)
|
||||||
|
SELECT earlier.id, later.id, 'suffix', 1.0
|
||||||
|
FROM vorlagen later
|
||||||
|
JOIN vorlagen earlier
|
||||||
|
ON later.aktenzeichen_basis = earlier.aktenzeichen_basis
|
||||||
|
AND later.aktenzeichen_suffix IS NOT NULL
|
||||||
|
AND earlier.aktenzeichen_suffix IS NOT NULL
|
||||||
|
AND CAST(REPLACE(later.aktenzeichen_suffix, '-', '') AS INTEGER)
|
||||||
|
= CAST(REPLACE(earlier.aktenzeichen_suffix, '-', '') AS INTEGER) + 1
|
||||||
|
WHERE later.id != earlier.id"""
|
||||||
|
)
|
||||||
|
count = conn.execute("SELECT changes()").fetchone()[0]
|
||||||
|
conn.commit()
|
||||||
|
print(f" Suffix-Referenzen aktualisiert (letzte Runde: {count} neue)")
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_gremien(conn: sqlite3.Connection, client: httpx.Client):
|
||||||
|
"""Löst Gremien-Namen über die OParl Organizations-API auf.
|
||||||
|
|
||||||
|
Aktualisiert name, kuerzel und typ für alle Gremien mit Platzhalter-Namen.
|
||||||
|
"""
|
||||||
|
print("\nLöse Gremien-Namen auf...")
|
||||||
|
|
||||||
|
# Alle Organisationen von der API holen
|
||||||
|
org_lookup: dict[str, dict] = {}
|
||||||
|
page = 1
|
||||||
|
while True:
|
||||||
|
data = fetch_page(client, ORGS_URL, {"body": 1, "page": page})
|
||||||
|
if not data or "data" not in data:
|
||||||
|
break
|
||||||
|
for org in data["data"]:
|
||||||
|
oparl_id = org.get("id")
|
||||||
|
if oparl_id:
|
||||||
|
classification = org.get("classification", "")
|
||||||
|
typ = GREMIUM_TYP_MAP.get(classification, "sonstig")
|
||||||
|
org_lookup[oparl_id] = {
|
||||||
|
"name": org.get("name", ""),
|
||||||
|
"kuerzel": org.get("shortName", ""),
|
||||||
|
"typ": typ,
|
||||||
|
"classification": classification,
|
||||||
|
}
|
||||||
|
total_pages = data.get("pagination", {}).get("totalPages", page)
|
||||||
|
if page >= total_pages:
|
||||||
|
break
|
||||||
|
page += 1
|
||||||
|
time.sleep(0.2)
|
||||||
|
|
||||||
|
print(f" {len(org_lookup)} Organisationen von API geladen")
|
||||||
|
|
||||||
|
# Gremien in DB aktualisieren
|
||||||
|
updated = 0
|
||||||
|
for row in conn.execute("SELECT id, oparl_id, name FROM gremien").fetchall():
|
||||||
|
oparl_id = row["oparl_id"]
|
||||||
|
if oparl_id in org_lookup:
|
||||||
|
org = org_lookup[oparl_id]
|
||||||
|
if org["name"] and org["name"] != row["name"]:
|
||||||
|
conn.execute(
|
||||||
|
"UPDATE gremien SET name = ?, kuerzel = ?, typ = ? WHERE id = ?",
|
||||||
|
(org["name"], org["kuerzel"] or None, org["typ"], row["id"]),
|
||||||
|
)
|
||||||
|
updated += 1
|
||||||
|
|
||||||
|
conn.commit()
|
||||||
|
print(f" {updated} Gremien aktualisiert")
|
||||||
|
|
||||||
|
|
||||||
|
def import_papers(conn: sqlite3.Connection, client: httpx.Client,
|
||||||
|
start_page: int = 1, limit: int = 0, incremental: bool = False):
|
||||||
|
"""Importiert Papers von der OParl-API.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
start_page: Erste Seite (für --resume)
|
||||||
|
limit: Max. Anzahl Papers (0 = alle)
|
||||||
|
incremental: Stoppt wenn nur bereits bekannte Papers gefunden werden
|
||||||
|
"""
|
||||||
|
# Pagination ermitteln
|
||||||
|
print("Ermittle Seitenanzahl...")
|
||||||
|
first = fetch_page(client, PAPERS_URL, {"body": 1, "page": 1})
|
||||||
|
if not first or "pagination" not in first:
|
||||||
|
print("FEHLER: Konnte API nicht erreichen")
|
||||||
|
return
|
||||||
|
|
||||||
|
total_pages = first["pagination"]["totalPages"]
|
||||||
|
total_elements = first["pagination"]["totalElements"]
|
||||||
|
print(f" {total_elements} Papers auf {total_pages} Seiten")
|
||||||
|
|
||||||
|
existing_count = conn.execute("SELECT COUNT(*) FROM vorlagen").fetchone()[0]
|
||||||
|
print(f" {existing_count} bereits in DB\n")
|
||||||
|
|
||||||
|
total_new = 0
|
||||||
|
total_updated = 0
|
||||||
|
consecutive_known_pages = 0 # Für --incremental Abbruch
|
||||||
|
|
||||||
|
for page_num in range(start_page, total_pages + 1):
|
||||||
|
if page_num == 1 and start_page == 1:
|
||||||
|
data = first
|
||||||
|
else:
|
||||||
|
data = fetch_page(client, PAPERS_URL, {"body": 1, "page": page_num})
|
||||||
|
|
||||||
|
if not data or "data" not in data:
|
||||||
|
print(f" Seite {page_num} übersprungen (kein Data)")
|
||||||
|
continue
|
||||||
|
|
||||||
|
papers = data["data"]
|
||||||
|
page_new = 0
|
||||||
|
for paper in papers:
|
||||||
|
vorlage_id, is_new = upsert_paper(conn, paper)
|
||||||
|
if vorlage_id:
|
||||||
|
upsert_consultations(conn, vorlage_id, paper)
|
||||||
|
insert_files(conn, vorlage_id, paper)
|
||||||
|
if is_new:
|
||||||
|
page_new += 1
|
||||||
|
total_new += 1
|
||||||
|
else:
|
||||||
|
total_updated += 1
|
||||||
|
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
progress = (page_num / total_pages) * 100
|
||||||
|
marker = f" (+{page_new} neu)" if page_new > 0 else " (bekannt)"
|
||||||
|
print(f" Seite {page_num:4d}/{total_pages} ({progress:5.1f}%)"
|
||||||
|
f" — {len(papers)} Papers{marker}"
|
||||||
|
f" — neu: {total_new}, aktualisiert: {total_updated}")
|
||||||
|
|
||||||
|
# Incremental: Abbruch wenn 3 Seiten hintereinander nur bekannte Papers
|
||||||
|
if incremental:
|
||||||
|
if page_new == 0:
|
||||||
|
consecutive_known_pages += 1
|
||||||
|
if consecutive_known_pages >= 3:
|
||||||
|
print(f"\n Inkrementell: 3 Seiten ohne neue Papers, stoppe.")
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
consecutive_known_pages = 0
|
||||||
|
|
||||||
|
if limit and (total_new + total_updated) >= limit:
|
||||||
|
print(f"\n Limit von {limit} erreicht, stoppe.")
|
||||||
|
break
|
||||||
|
|
||||||
|
# Schonende Pause
|
||||||
|
time.sleep(0.3)
|
||||||
|
if page_num % 100 == 0:
|
||||||
|
print(f" Checkpoint Seite {page_num} — Pause 5s...")
|
||||||
|
time.sleep(5)
|
||||||
|
|
||||||
|
return total_new, total_updated
|
||||||
|
|
||||||
|
|
||||||
|
def print_stats(conn: sqlite3.Connection):
|
||||||
|
"""Gibt aktuelle DB-Statistiken aus."""
|
||||||
|
print(f"\n=== Datenbank-Statistiken ===")
|
||||||
|
print(f" Vorlagen: {conn.execute('SELECT COUNT(*) FROM vorlagen').fetchone()[0]}")
|
||||||
|
print(f" Beratungen: {conn.execute('SELECT COUNT(*) FROM beratungen').fetchone()[0]}")
|
||||||
|
print(f" Gremien: {conn.execute('SELECT COUNT(*) FROM gremien').fetchone()[0]}")
|
||||||
|
print(f" Referenzen: {conn.execute('SELECT COUNT(*) FROM referenzen').fetchone()[0]}")
|
||||||
|
print(f" Anlagen: {conn.execute('SELECT COUNT(*) FROM anlagen').fetchone()[0]}")
|
||||||
|
print(f"\n Vorlagen nach Typ:")
|
||||||
|
for r in conn.execute("SELECT typ, COUNT(*) c FROM vorlagen GROUP BY typ ORDER BY c DESC"):
|
||||||
|
print(f" {r['typ']:25s} {r['c']:>6d}")
|
||||||
|
print(f"\n Zeitraum: {conn.execute('SELECT MIN(datum_eingang) FROM vorlagen').fetchone()[0]}"
|
||||||
|
f" bis {conn.execute('SELECT MAX(datum_eingang) FROM vorlagen').fetchone()[0]}")
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(description="OParl-Import für Antragstracker Hagen")
|
||||||
|
parser.add_argument("--resume", type=int, default=0,
|
||||||
|
help="Ab dieser Seitennummer weitermachen")
|
||||||
|
parser.add_argument("--incremental", action="store_true",
|
||||||
|
help="Nur neue Papers (stoppt bei bekannten)")
|
||||||
|
parser.add_argument("--limit", type=int, default=0,
|
||||||
|
help="Max. Anzahl Papers (0 = alle)")
|
||||||
|
parser.add_argument("--resolve-gremien", action="store_true",
|
||||||
|
help="Nur Gremien-Namen auflösen, kein Paper-Import")
|
||||||
|
parser.add_argument("--no-references", action="store_true",
|
||||||
|
help="Suffix-Referenzen nicht neu bauen")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
print("=== Antragstracker Hagen — OParl-Import ===\n")
|
||||||
|
|
||||||
|
conn = init_db()
|
||||||
|
print(f" DB: {DB_PATH}\n")
|
||||||
|
|
||||||
|
client = httpx.Client(
|
||||||
|
headers={"Accept": "application/json"},
|
||||||
|
follow_redirects=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
if args.resolve_gremien:
|
||||||
|
resolve_gremien(conn, client)
|
||||||
|
print_stats(conn)
|
||||||
|
return
|
||||||
|
|
||||||
|
# Paper-Import
|
||||||
|
start_page = args.resume if args.resume > 0 else 1
|
||||||
|
if args.resume:
|
||||||
|
print(f"Setze Import ab Seite {start_page} fort...\n")
|
||||||
|
elif args.incremental:
|
||||||
|
print("Inkrementeller Import (nur neue Papers)...\n")
|
||||||
|
else:
|
||||||
|
print("Voll-Import...\n")
|
||||||
|
|
||||||
|
import_papers(conn, client, start_page, args.limit, args.incremental)
|
||||||
|
|
||||||
|
# Gremien-Namen auflösen
|
||||||
|
resolve_gremien(conn, client)
|
||||||
|
|
||||||
|
# Suffix-Referenzen
|
||||||
|
if not args.no_references:
|
||||||
|
build_suffix_references(conn)
|
||||||
|
|
||||||
|
print_stats(conn)
|
||||||
|
|
||||||
|
finally:
|
||||||
|
client.close()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
121
scripts/import_pdf_urls.py
Normal file
121
scripts/import_pdf_urls.py
Normal file
@ -0,0 +1,121 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Importiert PDF-URLs aus OParl in die anlagen-Tabelle.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import sqlite3
|
||||||
|
from pathlib import Path
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
PROJECT_ROOT = Path(__file__).resolve().parent.parent
|
||||||
|
DB_PATH = PROJECT_ROOT / "data" / "tracker_remote.db"
|
||||||
|
OPARL_BASE = "https://allris.hagen.de/public/oparl/papers?body=1"
|
||||||
|
|
||||||
|
|
||||||
|
def get_db():
|
||||||
|
conn = sqlite3.connect(str(DB_PATH))
|
||||||
|
conn.row_factory = sqlite3.Row
|
||||||
|
return conn
|
||||||
|
|
||||||
|
|
||||||
|
def ensure_anlagen_table(conn):
|
||||||
|
"""Erstellt anlagen-Tabelle falls nicht vorhanden."""
|
||||||
|
conn.execute("""
|
||||||
|
CREATE TABLE IF NOT EXISTS anlagen (
|
||||||
|
id INTEGER PRIMARY KEY,
|
||||||
|
vorlage_id INTEGER NOT NULL,
|
||||||
|
name TEXT,
|
||||||
|
url TEXT,
|
||||||
|
mime_type TEXT,
|
||||||
|
size INTEGER,
|
||||||
|
downloaded INTEGER DEFAULT 0,
|
||||||
|
FOREIGN KEY (vorlage_id) REFERENCES vorlagen(id)
|
||||||
|
)
|
||||||
|
""")
|
||||||
|
conn.execute("CREATE INDEX IF NOT EXISTS idx_anlagen_vorlage ON anlagen(vorlage_id)")
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
|
||||||
|
def import_pdf_urls():
|
||||||
|
conn = get_db()
|
||||||
|
ensure_anlagen_table(conn)
|
||||||
|
|
||||||
|
# Mapping oparl_id -> vorlage.id
|
||||||
|
vorlage_map = {}
|
||||||
|
for row in conn.execute("SELECT id, oparl_id FROM vorlagen WHERE oparl_id IS NOT NULL"):
|
||||||
|
vorlage_map[row['oparl_id']] = row['id']
|
||||||
|
|
||||||
|
print(f"Vorlagen mit OParl-ID: {len(vorlage_map)}")
|
||||||
|
|
||||||
|
page = 1
|
||||||
|
imported = 0
|
||||||
|
skipped = 0
|
||||||
|
|
||||||
|
while True:
|
||||||
|
print(f"Seite {page}...", end=" ", flush=True)
|
||||||
|
|
||||||
|
try:
|
||||||
|
resp = httpx.get(f"{OPARL_BASE}&page={page}", timeout=30)
|
||||||
|
resp.raise_for_status()
|
||||||
|
data = resp.json()
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Fehler: {e}")
|
||||||
|
break
|
||||||
|
|
||||||
|
if not data.get('data'):
|
||||||
|
print("keine Daten")
|
||||||
|
break
|
||||||
|
|
||||||
|
page_imported = 0
|
||||||
|
for paper in data['data']:
|
||||||
|
oparl_id = paper.get('id')
|
||||||
|
main_file = paper.get('mainFile')
|
||||||
|
|
||||||
|
if not main_file or not oparl_id:
|
||||||
|
continue
|
||||||
|
|
||||||
|
vorlage_id = vorlage_map.get(oparl_id)
|
||||||
|
if not vorlage_id:
|
||||||
|
skipped += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Prüfen ob schon existiert
|
||||||
|
existing = conn.execute(
|
||||||
|
"SELECT id FROM anlagen WHERE vorlage_id = ?", (vorlage_id,)
|
||||||
|
).fetchone()
|
||||||
|
|
||||||
|
if existing:
|
||||||
|
continue
|
||||||
|
|
||||||
|
url = main_file.get('accessUrl') or main_file.get('downloadUrl')
|
||||||
|
if not url:
|
||||||
|
continue
|
||||||
|
|
||||||
|
conn.execute("""
|
||||||
|
INSERT INTO anlagen (vorlage_id, name, url, mime_type, size)
|
||||||
|
VALUES (?, ?, ?, ?, ?)
|
||||||
|
""", (
|
||||||
|
vorlage_id,
|
||||||
|
main_file.get('name') or main_file.get('fileName'),
|
||||||
|
url,
|
||||||
|
main_file.get('mimeType'),
|
||||||
|
main_file.get('size')
|
||||||
|
))
|
||||||
|
imported += 1
|
||||||
|
page_imported += 1
|
||||||
|
|
||||||
|
conn.commit()
|
||||||
|
print(f"{page_imported} importiert")
|
||||||
|
|
||||||
|
if not data.get('links', {}).get('next'):
|
||||||
|
break
|
||||||
|
page += 1
|
||||||
|
|
||||||
|
conn.close()
|
||||||
|
print(f"\n=== Fertig ===")
|
||||||
|
print(f"Importiert: {imported}")
|
||||||
|
print(f"Übersprungen (keine Vorlage): {skipped}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import_pdf_urls()
|
||||||
262
scripts/ki_bulk_remote.py
Executable file
262
scripts/ki_bulk_remote.py
Executable file
@ -0,0 +1,262 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
KI-Zusammenfassung für Anträge via Qwen (DashScope).
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import sqlite3
|
||||||
|
import time
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
# Nominatim für Geocoding
|
||||||
|
NOMINATIM_URL = "https://nominatim.openstreetmap.org/search"
|
||||||
|
USER_AGENT = "Antragstracker-Hagen/1.0"
|
||||||
|
HAGEN_BBOX = "7.35,51.30,7.65,51.45"
|
||||||
|
|
||||||
|
PROJECT_ROOT = Path(__file__).resolve().parent.parent
|
||||||
|
DB_PATH = PROJECT_ROOT / "data" / "tracker_remote.db"
|
||||||
|
|
||||||
|
# DashScope API
|
||||||
|
DASHSCOPE_URL = "https://dashscope-intl.aliyuncs.com/compatible-mode/v1/chat/completions"
|
||||||
|
DASHSCOPE_KEY = os.environ.get("QWEN_API_KEY") or os.popen("security find-generic-password -s qwen-api -w 2>/dev/null").read().strip()
|
||||||
|
|
||||||
|
PROMPT_TEMPLATE = """Analysiere diesen kommunalpolitischen Antrag aus Hagen.
|
||||||
|
|
||||||
|
DOKUMENT:
|
||||||
|
{volltext}
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
Erstelle eine strukturierte Zusammenfassung im JSON-Format:
|
||||||
|
|
||||||
|
{{
|
||||||
|
"zusammenfassung": "2-3 Sätze, was gefordert wird",
|
||||||
|
"kernforderung": "Die zentrale Forderung in einem Satz",
|
||||||
|
"begruendung": "Warum wird das gefordert? (kurz)",
|
||||||
|
"thema": "Hauptthema (z.B. Verkehr, Soziales, Umwelt)",
|
||||||
|
"partei": "Antragstellende Fraktion falls erkennbar",
|
||||||
|
"orte": [
|
||||||
|
{{
|
||||||
|
"rohtext": "Die genaue Formulierung im Text (z.B. 'Polizeiwache an der Boeler Straße')",
|
||||||
|
"kontext": "Der Satz in dem der Ort erwähnt wird",
|
||||||
|
"typ": "strasse|platz|stadtteil|gebaeude|sonstiges",
|
||||||
|
"geocodierbar": true/false,
|
||||||
|
"geocode_query": "Suchbegriff für Karte (z.B. 'Boeler Straße' statt 'Polizeiwache an der Boeler Straße')"
|
||||||
|
}}
|
||||||
|
]
|
||||||
|
}}
|
||||||
|
|
||||||
|
WICHTIG für orte:
|
||||||
|
- Extrahiere ALLE geografischen Erwähnungen
|
||||||
|
- Bei "X an der Y-Straße" ist geocode_query = "Y-Straße"
|
||||||
|
- Straßennamen, Plätze, Stadtteile sind geocodierbar=true
|
||||||
|
- "Spielplatz", "Schule", "Bushaltestelle" ohne Straße sind geocodierbar=false
|
||||||
|
- geocode_query nur bei geocodierbar=true setzen
|
||||||
|
|
||||||
|
NUR JSON ausgeben, keine Erklärungen."""
|
||||||
|
|
||||||
|
|
||||||
|
def get_db():
|
||||||
|
conn = sqlite3.connect(str(DB_PATH))
|
||||||
|
conn.row_factory = sqlite3.Row
|
||||||
|
return conn
|
||||||
|
|
||||||
|
|
||||||
|
def call_qwen(prompt: str) -> dict | None:
|
||||||
|
"""Ruft Qwen API auf und gibt JSON zurück."""
|
||||||
|
if not DASHSCOPE_KEY:
|
||||||
|
print(" FEHLER: Kein QWEN_API_KEY gefunden")
|
||||||
|
return None
|
||||||
|
|
||||||
|
try:
|
||||||
|
resp = httpx.post(
|
||||||
|
DASHSCOPE_URL,
|
||||||
|
headers={
|
||||||
|
"Authorization": f"Bearer {DASHSCOPE_KEY}",
|
||||||
|
"Content-Type": "application/json"
|
||||||
|
},
|
||||||
|
json={
|
||||||
|
"model": "qwen-plus-latest",
|
||||||
|
"messages": [{"role": "user", "content": prompt}],
|
||||||
|
"temperature": 0.3
|
||||||
|
},
|
||||||
|
timeout=180
|
||||||
|
)
|
||||||
|
resp.raise_for_status()
|
||||||
|
|
||||||
|
content = resp.json()["choices"][0]["message"]["content"]
|
||||||
|
|
||||||
|
# JSON extrahieren (falls in Markdown-Block)
|
||||||
|
if "```json" in content:
|
||||||
|
content = content.split("```json")[1].split("```")[0]
|
||||||
|
elif "```" in content:
|
||||||
|
content = content.split("```")[1].split("```")[0]
|
||||||
|
|
||||||
|
return json.loads(content.strip())
|
||||||
|
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
print(f" JSON-Parse-Fehler: {e}")
|
||||||
|
print(f" Content: {content[:200]}...")
|
||||||
|
return None
|
||||||
|
except Exception as e:
|
||||||
|
print(f" API-Fehler: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def process_vorlage(conn: sqlite3.Connection, vorlage: dict) -> bool:
|
||||||
|
"""Erstellt KI-Zusammenfassung für eine Vorlage."""
|
||||||
|
vid = vorlage['id']
|
||||||
|
akz = vorlage['aktenzeichen'] or f"#{vid}"
|
||||||
|
volltext = vorlage['volltext_clean']
|
||||||
|
|
||||||
|
if not volltext or len(volltext) < 100:
|
||||||
|
print(f" {akz}: Volltext zu kurz")
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Volltext kürzen falls zu lang
|
||||||
|
if len(volltext) > 8000:
|
||||||
|
volltext = volltext[:8000] + "\n[...gekürzt...]"
|
||||||
|
|
||||||
|
prompt = PROMPT_TEMPLATE.format(volltext=volltext)
|
||||||
|
result = call_qwen(prompt)
|
||||||
|
|
||||||
|
if not result:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# In ki_bewertungen speichern
|
||||||
|
conn.execute("""
|
||||||
|
INSERT INTO ki_bewertungen (vorlage_id, typ, begruendung, anmerkungen, modell, prompt_version)
|
||||||
|
VALUES (?, 'zusammenfassung', ?, ?, 'qwen-plus-latest', 'v1')
|
||||||
|
""", (vid, result.get('zusammenfassung'), json.dumps(result, ensure_ascii=False)))
|
||||||
|
|
||||||
|
# thema_kurz in vorlagen aktualisieren
|
||||||
|
if result.get('kernforderung'):
|
||||||
|
conn.execute("UPDATE vorlagen SET thema_kurz = ? WHERE id = ?",
|
||||||
|
(result['kernforderung'][:200], vid))
|
||||||
|
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
# Orte geocodieren und speichern
|
||||||
|
orte = result.get('orte', [])
|
||||||
|
geocoded_count = 0
|
||||||
|
for ort in orte:
|
||||||
|
geocoded_count += process_ort(conn, vid, ort)
|
||||||
|
|
||||||
|
print(f" {akz}: ✓ {result.get('thema', '?')} ({len(orte)} Orte, {geocoded_count} geocodiert)")
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def geocode_nominatim(query: str) -> tuple[float, float] | None:
|
||||||
|
"""Geocodiert einen Ort in Hagen via Nominatim."""
|
||||||
|
try:
|
||||||
|
resp = httpx.get(
|
||||||
|
NOMINATIM_URL,
|
||||||
|
params={"q": f"{query}, Hagen, Germany", "format": "json", "limit": 1,
|
||||||
|
"viewbox": HAGEN_BBOX, "bounded": 1},
|
||||||
|
headers={"User-Agent": USER_AGENT},
|
||||||
|
timeout=10
|
||||||
|
)
|
||||||
|
resp.raise_for_status()
|
||||||
|
results = resp.json()
|
||||||
|
if results:
|
||||||
|
return (float(results[0]['lat']), float(results[0]['lon']))
|
||||||
|
except Exception as e:
|
||||||
|
pass
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def process_ort(conn: sqlite3.Connection, vorlage_id: int, ort: dict) -> int:
|
||||||
|
"""Speichert einen Ort und geocodiert ihn wenn möglich. Returns 1 wenn geocodiert."""
|
||||||
|
rohtext = ort.get('rohtext', '')
|
||||||
|
kontext = ort.get('kontext', '')
|
||||||
|
typ = ort.get('typ', 'sonstiges')
|
||||||
|
geocodierbar = ort.get('geocodierbar', False)
|
||||||
|
geocode_query = ort.get('geocode_query')
|
||||||
|
|
||||||
|
if not rohtext:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
# Prüfen ob schon existiert
|
||||||
|
existing = conn.execute(
|
||||||
|
"SELECT id, lat FROM orte WHERE name = ? OR rohtext = ?",
|
||||||
|
(geocode_query or rohtext, rohtext)
|
||||||
|
).fetchone()
|
||||||
|
|
||||||
|
if existing:
|
||||||
|
# Nur Verknüpfung erstellen
|
||||||
|
conn.execute("""
|
||||||
|
INSERT OR IGNORE INTO vorlagen_orte (vorlage_id, ort_id, kontext)
|
||||||
|
VALUES (?, ?, ?)
|
||||||
|
""", (vorlage_id, existing['id'], kontext[:500] if kontext else None))
|
||||||
|
conn.execute("UPDATE orte SET vorlage_count = vorlage_count + 1 WHERE id = ?", (existing['id'],))
|
||||||
|
conn.commit()
|
||||||
|
return 1 if existing['lat'] else 0
|
||||||
|
|
||||||
|
# Neuen Ort anlegen
|
||||||
|
lat, lon = None, None
|
||||||
|
status = 'skipped'
|
||||||
|
|
||||||
|
if geocodierbar and geocode_query:
|
||||||
|
time.sleep(1.1) # Nominatim Rate Limit
|
||||||
|
coords = geocode_nominatim(geocode_query)
|
||||||
|
if coords:
|
||||||
|
lat, lon = coords
|
||||||
|
status = 'success'
|
||||||
|
else:
|
||||||
|
status = 'failed'
|
||||||
|
|
||||||
|
cursor = conn.execute("""
|
||||||
|
INSERT INTO orte (name, typ, lat, lon, rohtext, kontext_satz, geocode_status, vorlage_count)
|
||||||
|
VALUES (?, ?, ?, ?, ?, ?, ?, 1)
|
||||||
|
""", (geocode_query or rohtext, typ, lat, lon, rohtext, kontext[:500] if kontext else None, status))
|
||||||
|
|
||||||
|
ort_id = cursor.lastrowid
|
||||||
|
conn.execute("""
|
||||||
|
INSERT OR IGNORE INTO vorlagen_orte (vorlage_id, ort_id, kontext)
|
||||||
|
VALUES (?, ?, ?)
|
||||||
|
""", (vorlage_id, ort_id, kontext[:500] if kontext else None))
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
return 1 if lat else 0
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(description="KI-Zusammenfassung für Anträge")
|
||||||
|
parser.add_argument("--limit", type=int, default=5, help="Max. Anzahl (default: 5)")
|
||||||
|
parser.add_argument("--typ", type=str, default="", help="Vorlagen-Typ")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
print(f"=== KI-Zusammenfassung ===\n")
|
||||||
|
|
||||||
|
conn = get_db()
|
||||||
|
|
||||||
|
# Vorlagen mit Volltext aber ohne KI-Zusammenfassung
|
||||||
|
query = """
|
||||||
|
SELECT v.id, v.aktenzeichen, v.volltext_clean
|
||||||
|
FROM vorlagen v
|
||||||
|
LEFT JOIN ki_bewertungen kb ON v.id = kb.vorlage_id AND kb.typ = 'zusammenfassung'
|
||||||
|
WHERE v.volltext_clean IS NOT NULL
|
||||||
|
AND kb.id IS NULL
|
||||||
|
"""
|
||||||
|
if args.typ:
|
||||||
|
query += f" AND v.typ = '{args.typ}'"
|
||||||
|
query += f" ORDER BY v.datum_eingang DESC LIMIT {args.limit}"
|
||||||
|
|
||||||
|
vorlagen = conn.execute(query).fetchall()
|
||||||
|
print(f"Verarbeite {len(vorlagen)} Vorlagen\n")
|
||||||
|
|
||||||
|
success = 0
|
||||||
|
for v in vorlagen:
|
||||||
|
if process_vorlage(conn, dict(v)):
|
||||||
|
success += 1
|
||||||
|
|
||||||
|
conn.close()
|
||||||
|
print(f"\n=== Fertig: {success}/{len(vorlagen)} erfolgreich ===")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
293
scripts/ki_parallel.py
Normal file
293
scripts/ki_parallel.py
Normal file
@ -0,0 +1,293 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Parallelisierte KI-Zusammenfassungen via Qwen/DashScope.
|
||||||
|
Nutzt ThreadPoolExecutor für parallele API-Calls.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import sqlite3
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||||
|
from pathlib import Path
|
||||||
|
from threading import Lock
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
PROJECT_ROOT = Path(__file__).resolve().parent.parent
|
||||||
|
DB_PATH = PROJECT_ROOT / "data" / "tracker_remote.db"
|
||||||
|
LOG_FILE = PROJECT_ROOT / "data" / "ki_parallel.log"
|
||||||
|
STATE_FILE = PROJECT_ROOT / "data" / "ki_parallel_state.json"
|
||||||
|
|
||||||
|
# DashScope API
|
||||||
|
DASHSCOPE_URL = "https://dashscope-intl.aliyuncs.com/compatible-mode/v1/chat/completions"
|
||||||
|
DASHSCOPE_KEY = os.environ.get("QWEN_API_KEY") or os.popen("security find-generic-password -s qwen-api -w 2>/dev/null").read().strip()
|
||||||
|
|
||||||
|
# Nominatim für Geocoding
|
||||||
|
NOMINATIM_URL = "https://nominatim.openstreetmap.org/search"
|
||||||
|
USER_AGENT = "Antragstracker-Hagen/1.0"
|
||||||
|
HAGEN_BBOX = "7.35,51.30,7.65,51.45"
|
||||||
|
|
||||||
|
PROMPT_TEMPLATE = """Analysiere diesen kommunalpolitischen Antrag aus Hagen.
|
||||||
|
|
||||||
|
DOKUMENT:
|
||||||
|
{volltext}
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
Erstelle eine strukturierte Zusammenfassung im JSON-Format:
|
||||||
|
|
||||||
|
{{
|
||||||
|
"zusammenfassung": "2-3 Sätze, was gefordert wird",
|
||||||
|
"kernforderung": "Die zentrale Forderung in einem Satz",
|
||||||
|
"begruendung": "Warum wird das gefordert? (kurz)",
|
||||||
|
"thema": "Hauptthema (z.B. Verkehr, Soziales, Umwelt)",
|
||||||
|
"partei": "Antragstellende Fraktion falls erkennbar",
|
||||||
|
"orte": [
|
||||||
|
{{
|
||||||
|
"rohtext": "Die genaue Formulierung im Text",
|
||||||
|
"kontext": "Der Satz in dem der Ort erwähnt wird",
|
||||||
|
"typ": "strasse|platz|stadtteil|gebaeude|sonstiges",
|
||||||
|
"geocodierbar": true/false,
|
||||||
|
"geocode_query": "Suchbegriff für Karte"
|
||||||
|
}}
|
||||||
|
]
|
||||||
|
}}
|
||||||
|
|
||||||
|
NUR JSON ausgeben, keine Erklärungen."""
|
||||||
|
|
||||||
|
db_lock = Lock()
|
||||||
|
log_lock = Lock()
|
||||||
|
stats = {"success": 0, "failed": 0, "throttled": 0}
|
||||||
|
|
||||||
|
|
||||||
|
def log(msg: str):
|
||||||
|
timestamp = time.strftime("%H:%M:%S")
|
||||||
|
line = f"[{timestamp}] {msg}"
|
||||||
|
print(line)
|
||||||
|
with log_lock:
|
||||||
|
with open(LOG_FILE, "a") as f:
|
||||||
|
f.write(line + "\n")
|
||||||
|
|
||||||
|
|
||||||
|
def get_db():
|
||||||
|
conn = sqlite3.connect(str(DB_PATH), check_same_thread=False)
|
||||||
|
conn.row_factory = sqlite3.Row
|
||||||
|
return conn
|
||||||
|
|
||||||
|
|
||||||
|
def call_qwen(prompt: str, max_retries: int = 5) -> dict | None:
|
||||||
|
"""Ruft Qwen API auf mit schnellen, häufigen Retries."""
|
||||||
|
for attempt in range(max_retries):
|
||||||
|
try:
|
||||||
|
resp = httpx.post(
|
||||||
|
DASHSCOPE_URL,
|
||||||
|
headers={
|
||||||
|
"Authorization": f"Bearer {DASHSCOPE_KEY}",
|
||||||
|
"Content-Type": "application/json"
|
||||||
|
},
|
||||||
|
json={
|
||||||
|
"model": "qwen-plus-latest",
|
||||||
|
"messages": [{"role": "user", "content": prompt}],
|
||||||
|
"temperature": 0.1,
|
||||||
|
"max_tokens": 2000,
|
||||||
|
"response_format": {"type": "json_object"},
|
||||||
|
},
|
||||||
|
timeout=30
|
||||||
|
)
|
||||||
|
|
||||||
|
if resp.status_code == 429:
|
||||||
|
wait = min(5, attempt + 1)
|
||||||
|
stats["throttled"] += 1
|
||||||
|
time.sleep(wait)
|
||||||
|
continue
|
||||||
|
|
||||||
|
resp.raise_for_status()
|
||||||
|
|
||||||
|
content = resp.json()["choices"][0]["message"]["content"]
|
||||||
|
content = content.strip()
|
||||||
|
if content.startswith("```"):
|
||||||
|
content = re.sub(r'^```\w*\n?', '', content)
|
||||||
|
content = re.sub(r'\n?```$', '', content)
|
||||||
|
|
||||||
|
return json.loads(content)
|
||||||
|
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
time.sleep(0.5)
|
||||||
|
except httpx.HTTPStatusError as e:
|
||||||
|
time.sleep(1)
|
||||||
|
except httpx.TimeoutException:
|
||||||
|
time.sleep(0.5)
|
||||||
|
except Exception as e:
|
||||||
|
time.sleep(0.5)
|
||||||
|
|
||||||
|
log(f" ✗ 5 Retries fehlgeschlagen")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def geocode_nominatim(query: str) -> tuple | None:
|
||||||
|
"""Geocodiert einen Ort in Hagen."""
|
||||||
|
try:
|
||||||
|
resp = httpx.get(NOMINATIM_URL, params={
|
||||||
|
"q": f"{query}, Hagen",
|
||||||
|
"format": "json",
|
||||||
|
"limit": 1,
|
||||||
|
"viewbox": HAGEN_BBOX,
|
||||||
|
"bounded": 1,
|
||||||
|
}, headers={"User-Agent": USER_AGENT}, timeout=10)
|
||||||
|
|
||||||
|
results = resp.json()
|
||||||
|
if results:
|
||||||
|
return (float(results[0]["lat"]), float(results[0]["lon"]))
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def process_vorlage(vorlage_id: int, aktenzeichen: str, volltext: str) -> bool:
|
||||||
|
"""Verarbeitet eine einzelne Vorlage."""
|
||||||
|
text = volltext[:30000] # Qwen Plus hat 128k Context, aber >30k bringt selten Mehrwert
|
||||||
|
prompt = PROMPT_TEMPLATE.format(volltext=text)
|
||||||
|
|
||||||
|
result = call_qwen(prompt)
|
||||||
|
if not result:
|
||||||
|
stats["failed"] += 1
|
||||||
|
return False
|
||||||
|
|
||||||
|
# In DB speichern
|
||||||
|
with db_lock:
|
||||||
|
conn = get_db()
|
||||||
|
try:
|
||||||
|
conn.execute("""
|
||||||
|
INSERT INTO ki_bewertungen
|
||||||
|
(vorlage_id, typ, begruendung, anmerkungen, modell, prompt_version)
|
||||||
|
VALUES (?, 'zusammenfassung', ?, ?, 'qwen-plus-latest', 'v2-parallel')
|
||||||
|
""", (
|
||||||
|
vorlage_id,
|
||||||
|
result.get("zusammenfassung", ""),
|
||||||
|
json.dumps(result, ensure_ascii=False),
|
||||||
|
))
|
||||||
|
|
||||||
|
# Thema + Status in Vorlagen-Tabelle
|
||||||
|
conn.execute("""
|
||||||
|
UPDATE vorlagen SET thema_kurz = ?, ki_status = 'done' WHERE id = ?
|
||||||
|
""", (result.get("thema"), vorlage_id))
|
||||||
|
|
||||||
|
# Orte verarbeiten
|
||||||
|
geocoded = 0
|
||||||
|
for ort in result.get("orte", []):
|
||||||
|
rohtext = ort.get("rohtext", "")
|
||||||
|
geocodierbar = ort.get("geocodierbar", False)
|
||||||
|
geocode_query = ort.get("geocode_query")
|
||||||
|
typ = ort.get("typ", "sonstiges")
|
||||||
|
kontext = ort.get("kontext", "")
|
||||||
|
|
||||||
|
lat, lon = None, None
|
||||||
|
status = 'pending' if geocodierbar else 'skipped'
|
||||||
|
|
||||||
|
# Geocoding wird separat gemacht (Nominatim 1req/s Limit)
|
||||||
|
|
||||||
|
cursor = conn.execute("""
|
||||||
|
INSERT INTO orte (name, typ, lat, lon, rohtext, kontext_satz, geocode_status, vorlage_count)
|
||||||
|
VALUES (?, ?, ?, ?, ?, ?, ?, 1)
|
||||||
|
""", (geocode_query or rohtext, typ, lat, lon, rohtext, kontext[:500], status))
|
||||||
|
|
||||||
|
conn.execute("""
|
||||||
|
INSERT OR IGNORE INTO vorlagen_orte (vorlage_id, ort_id, kontext)
|
||||||
|
VALUES (?, ?, ?)
|
||||||
|
""", (vorlage_id, cursor.lastrowid, kontext[:500]))
|
||||||
|
|
||||||
|
conn.commit()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
stats["success"] += 1
|
||||||
|
orte_count = len(result.get("orte", []))
|
||||||
|
log(f" ✓ {aktenzeichen}: {result.get('thema', '?')} ({orte_count} Orte, {geocoded} geocodiert)")
|
||||||
|
return True
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
conn.close()
|
||||||
|
log(f" DB-Fehler {aktenzeichen}: {e}")
|
||||||
|
stats["failed"] += 1
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(description="Parallele KI-Zusammenfassungen")
|
||||||
|
parser.add_argument("--workers", type=int, default=5, help="Parallele API-Calls")
|
||||||
|
parser.add_argument("--batch-size", type=int, default=100, help="Batch-Größe")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
log(f"=== Parallele KI-Zusammenfassung ===")
|
||||||
|
log(f"Workers: {args.workers}, Batch: {args.batch_size}")
|
||||||
|
|
||||||
|
if not DASHSCOPE_KEY:
|
||||||
|
log("FEHLER: Kein API-Key!")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
conn = get_db()
|
||||||
|
|
||||||
|
# Zähle offene (nutze ki_status statt JOIN)
|
||||||
|
remaining = conn.execute("""
|
||||||
|
SELECT COUNT(*) FROM vorlagen
|
||||||
|
WHERE volltext_clean IS NOT NULL AND volltext_clean != ''
|
||||||
|
AND ki_status IS NULL
|
||||||
|
""").fetchone()[0]
|
||||||
|
|
||||||
|
log(f"Noch zu verarbeiten: {remaining}")
|
||||||
|
|
||||||
|
if remaining == 0:
|
||||||
|
log("Alle fertig!")
|
||||||
|
conn.close()
|
||||||
|
return 0
|
||||||
|
|
||||||
|
# Batch holen
|
||||||
|
vorlagen = conn.execute("""
|
||||||
|
SELECT id, aktenzeichen, volltext_clean
|
||||||
|
FROM vorlagen
|
||||||
|
WHERE volltext_clean IS NOT NULL AND volltext_clean != ''
|
||||||
|
AND ki_status IS NULL
|
||||||
|
ORDER BY datum_eingang DESC
|
||||||
|
LIMIT ?
|
||||||
|
""", (args.batch_size,)).fetchall()
|
||||||
|
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
log(f"Batch: {len(vorlagen)} Vorlagen\n")
|
||||||
|
|
||||||
|
start_time = time.time()
|
||||||
|
|
||||||
|
with ThreadPoolExecutor(max_workers=args.workers) as executor:
|
||||||
|
futures = {}
|
||||||
|
for v in vorlagen:
|
||||||
|
future = executor.submit(
|
||||||
|
process_vorlage,
|
||||||
|
v['id'], v['aktenzeichen'], v['volltext_clean']
|
||||||
|
)
|
||||||
|
futures[future] = v['aktenzeichen']
|
||||||
|
|
||||||
|
for future in as_completed(futures):
|
||||||
|
ak = futures[future]
|
||||||
|
try:
|
||||||
|
future.result()
|
||||||
|
except Exception as e:
|
||||||
|
log(f" ✗ {ak}: {e}")
|
||||||
|
|
||||||
|
elapsed = time.time() - start_time
|
||||||
|
docs_per_sec = stats["success"] / max(elapsed, 1)
|
||||||
|
remaining_after = remaining - stats["success"]
|
||||||
|
|
||||||
|
log(f"\n=== Batch fertig ===")
|
||||||
|
log(f"✓ {stats['success']} | ✗ {stats['failed']} | ⏳ {stats['throttled']} throttled")
|
||||||
|
log(f"Dauer: {elapsed:.0f}s | {docs_per_sec:.2f} docs/sec")
|
||||||
|
log(f"Verbleibend: {remaining_after}")
|
||||||
|
log(f"ETA: {remaining_after / max(docs_per_sec, 0.01) / 3600:.1f}h")
|
||||||
|
|
||||||
|
return 0 if remaining_after == 0 else 1
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
sys.exit(main())
|
||||||
285
scripts/ki_zusammenfassung.py
Normal file
285
scripts/ki_zusammenfassung.py
Normal file
@ -0,0 +1,285 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
KI-Zusammenfassung für Anträge via Qwen (DashScope).
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import sqlite3
|
||||||
|
import time
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
# Nominatim für Geocoding
|
||||||
|
NOMINATIM_URL = "https://nominatim.openstreetmap.org/search"
|
||||||
|
USER_AGENT = "Antragstracker-Hagen/1.0"
|
||||||
|
HAGEN_BBOX = "7.35,51.30,7.65,51.45"
|
||||||
|
|
||||||
|
PROJECT_ROOT = Path(__file__).resolve().parent.parent
|
||||||
|
DB_PATH = PROJECT_ROOT / "data" / "tracker_remote.db"
|
||||||
|
|
||||||
|
# DashScope API
|
||||||
|
DASHSCOPE_URL = "https://dashscope-intl.aliyuncs.com/compatible-mode/v1/chat/completions"
|
||||||
|
DASHSCOPE_KEY = os.environ.get("QWEN_API_KEY") or os.popen("security find-generic-password -s qwen-api -w 2>/dev/null").read().strip()
|
||||||
|
|
||||||
|
PROMPT_TEMPLATE = """Analysiere diesen kommunalpolitischen Antrag aus Hagen.
|
||||||
|
|
||||||
|
DOKUMENT:
|
||||||
|
{volltext}
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
Erstelle eine strukturierte Zusammenfassung im JSON-Format:
|
||||||
|
|
||||||
|
{{
|
||||||
|
"zusammenfassung": "2-3 Sätze, was gefordert wird",
|
||||||
|
"kernforderung": "Die zentrale Forderung in einem Satz",
|
||||||
|
"begruendung": "Warum wird das gefordert? (kurz)",
|
||||||
|
"thema": "Hauptthema (z.B. Verkehr, Soziales, Umwelt)",
|
||||||
|
"partei": "Antragstellende Fraktion falls erkennbar",
|
||||||
|
"orte": [
|
||||||
|
{{
|
||||||
|
"rohtext": "Die genaue Formulierung im Text (z.B. 'Polizeiwache an der Boeler Straße')",
|
||||||
|
"kontext": "Der Satz in dem der Ort erwähnt wird",
|
||||||
|
"typ": "strasse|platz|stadtteil|gebaeude|sonstiges",
|
||||||
|
"geocodierbar": true/false,
|
||||||
|
"geocode_query": "Suchbegriff für Karte (z.B. 'Boeler Straße' statt 'Polizeiwache an der Boeler Straße')"
|
||||||
|
}}
|
||||||
|
]
|
||||||
|
}}
|
||||||
|
|
||||||
|
WICHTIG für orte:
|
||||||
|
- Extrahiere ALLE geografischen Erwähnungen
|
||||||
|
- Bei "X an der Y-Straße" ist geocode_query = "Y-Straße"
|
||||||
|
- Straßennamen, Plätze, Stadtteile sind geocodierbar=true
|
||||||
|
- "Spielplatz", "Schule", "Bushaltestelle" ohne Straße sind geocodierbar=false
|
||||||
|
- geocode_query nur bei geocodierbar=true setzen
|
||||||
|
|
||||||
|
NUR JSON ausgeben, keine Erklärungen."""
|
||||||
|
|
||||||
|
|
||||||
|
def get_db():
|
||||||
|
conn = sqlite3.connect(str(DB_PATH))
|
||||||
|
conn.row_factory = sqlite3.Row
|
||||||
|
return conn
|
||||||
|
|
||||||
|
|
||||||
|
def call_qwen(prompt: str) -> dict | None:
|
||||||
|
"""Ruft Qwen API auf und gibt JSON zurück."""
|
||||||
|
if not DASHSCOPE_KEY:
|
||||||
|
print(" FEHLER: Kein QWEN_API_KEY gefunden")
|
||||||
|
return None
|
||||||
|
|
||||||
|
try:
|
||||||
|
resp = httpx.post(
|
||||||
|
DASHSCOPE_URL,
|
||||||
|
headers={
|
||||||
|
"Authorization": f"Bearer {DASHSCOPE_KEY}",
|
||||||
|
"Content-Type": "application/json"
|
||||||
|
},
|
||||||
|
json={
|
||||||
|
"model": "qwen-plus-latest",
|
||||||
|
"messages": [{"role": "user", "content": prompt}],
|
||||||
|
"temperature": 0.3
|
||||||
|
},
|
||||||
|
timeout=180
|
||||||
|
)
|
||||||
|
resp.raise_for_status()
|
||||||
|
|
||||||
|
content = resp.json()["choices"][0]["message"]["content"]
|
||||||
|
|
||||||
|
# JSON extrahieren (falls in Markdown-Block)
|
||||||
|
if "```json" in content:
|
||||||
|
content = content.split("```json")[1].split("```")[0]
|
||||||
|
elif "```" in content:
|
||||||
|
content = content.split("```")[1].split("```")[0]
|
||||||
|
|
||||||
|
return json.loads(content.strip())
|
||||||
|
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
print(f" JSON-Parse-Fehler: {e}")
|
||||||
|
print(f" Content: {content[:200]}...")
|
||||||
|
return None
|
||||||
|
except Exception as e:
|
||||||
|
print(f" API-Fehler: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def process_vorlage(conn: sqlite3.Connection, vorlage: dict) -> bool:
|
||||||
|
"""Erstellt KI-Zusammenfassung für eine Vorlage."""
|
||||||
|
vid = vorlage['id']
|
||||||
|
akz = vorlage['aktenzeichen'] or f"#{vid}"
|
||||||
|
volltext = vorlage['volltext_clean']
|
||||||
|
|
||||||
|
if not volltext or len(volltext) < 100:
|
||||||
|
print(f" {akz}: Volltext zu kurz")
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Volltext kürzen falls zu lang
|
||||||
|
if len(volltext) > 8000:
|
||||||
|
volltext = volltext[:8000] + "\n[...gekürzt...]"
|
||||||
|
|
||||||
|
prompt = PROMPT_TEMPLATE.format(volltext=volltext)
|
||||||
|
result = call_qwen(prompt)
|
||||||
|
|
||||||
|
if not result:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# In ki_bewertungen speichern
|
||||||
|
conn.execute("""
|
||||||
|
INSERT INTO ki_bewertungen (vorlage_id, typ, begruendung, anmerkungen, modell, prompt_version)
|
||||||
|
VALUES (?, 'zusammenfassung', ?, ?, 'qwen-plus-latest', 'v1')
|
||||||
|
""", (vid, result.get('zusammenfassung'), json.dumps(result, ensure_ascii=False)))
|
||||||
|
|
||||||
|
# thema_kurz in vorlagen aktualisieren
|
||||||
|
if result.get('kernforderung'):
|
||||||
|
conn.execute("UPDATE vorlagen SET thema_kurz = ? WHERE id = ?",
|
||||||
|
(result['kernforderung'][:200], vid))
|
||||||
|
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
# Orte geocodieren und speichern
|
||||||
|
orte = result.get('orte', [])
|
||||||
|
geocoded_count = 0
|
||||||
|
for ort in orte:
|
||||||
|
geocoded_count += process_ort(conn, vid, ort)
|
||||||
|
|
||||||
|
print(f" {akz}: ✓ {result.get('thema', '?')} ({len(orte)} Orte, {geocoded_count} geocodiert)")
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def geocode_nominatim(query: str) -> tuple[float, float] | None:
|
||||||
|
"""Geocodiert einen Ort in Hagen via Nominatim."""
|
||||||
|
try:
|
||||||
|
resp = httpx.get(
|
||||||
|
NOMINATIM_URL,
|
||||||
|
params={"q": f"{query}, Hagen, Germany", "format": "json", "limit": 1,
|
||||||
|
"viewbox": HAGEN_BBOX, "bounded": 1},
|
||||||
|
headers={"User-Agent": USER_AGENT},
|
||||||
|
timeout=10
|
||||||
|
)
|
||||||
|
resp.raise_for_status()
|
||||||
|
results = resp.json()
|
||||||
|
if results:
|
||||||
|
return (float(results[0]['lat']), float(results[0]['lon']))
|
||||||
|
except Exception as e:
|
||||||
|
pass
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def process_ort(conn: sqlite3.Connection, vorlage_id: int, ort: dict) -> int:
|
||||||
|
"""Speichert einen Ort und geocodiert ihn wenn möglich. Returns 1 wenn geocodiert."""
|
||||||
|
rohtext = ort.get('rohtext', '')
|
||||||
|
kontext = ort.get('kontext', '')
|
||||||
|
typ = ort.get('typ', 'sonstiges')
|
||||||
|
geocodierbar = ort.get('geocodierbar', False)
|
||||||
|
geocode_query = ort.get('geocode_query')
|
||||||
|
|
||||||
|
if not rohtext:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
# Prüfen ob schon existiert
|
||||||
|
existing = conn.execute(
|
||||||
|
"SELECT id, lat FROM orte WHERE name = ? OR rohtext = ?",
|
||||||
|
(geocode_query or rohtext, rohtext)
|
||||||
|
).fetchone()
|
||||||
|
|
||||||
|
if existing:
|
||||||
|
# Nur Verknüpfung erstellen
|
||||||
|
conn.execute("""
|
||||||
|
INSERT OR IGNORE INTO vorlagen_orte (vorlage_id, ort_id, kontext)
|
||||||
|
VALUES (?, ?, ?)
|
||||||
|
""", (vorlage_id, existing['id'], kontext[:500] if kontext else None))
|
||||||
|
conn.execute("UPDATE orte SET vorlage_count = vorlage_count + 1 WHERE id = ?", (existing['id'],))
|
||||||
|
conn.commit()
|
||||||
|
return 1 if existing['lat'] else 0
|
||||||
|
|
||||||
|
# Neuen Ort anlegen
|
||||||
|
lat, lon = None, None
|
||||||
|
status = 'skipped'
|
||||||
|
|
||||||
|
if geocodierbar and geocode_query:
|
||||||
|
time.sleep(1.1) # Nominatim Rate Limit
|
||||||
|
coords = geocode_nominatim(geocode_query)
|
||||||
|
if coords:
|
||||||
|
lat, lon = coords
|
||||||
|
status = 'success'
|
||||||
|
else:
|
||||||
|
status = 'failed'
|
||||||
|
|
||||||
|
cursor = conn.execute("""
|
||||||
|
INSERT INTO orte (name, typ, lat, lon, rohtext, kontext_satz, geocode_status, vorlage_count)
|
||||||
|
VALUES (?, ?, ?, ?, ?, ?, ?, 1)
|
||||||
|
""", (geocode_query or rohtext, typ, lat, lon, rohtext, kontext[:500] if kontext else None, status))
|
||||||
|
|
||||||
|
ort_id = cursor.lastrowid
|
||||||
|
conn.execute("""
|
||||||
|
INSERT OR IGNORE INTO vorlagen_orte (vorlage_id, ort_id, kontext)
|
||||||
|
VALUES (?, ?, ?)
|
||||||
|
""", (vorlage_id, ort_id, kontext[:500] if kontext else None))
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
return 1 if lat else 0
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(description="KI-Zusammenfassung für Anträge")
|
||||||
|
parser.add_argument("--limit", type=int, default=100, help="Max. Anzahl pro Batch (default: 100)")
|
||||||
|
parser.add_argument("--typ", type=str, default=None, help="Vorlagen-Typ Filter")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
print(f"=== KI-Zusammenfassung ===\n")
|
||||||
|
|
||||||
|
conn = get_db()
|
||||||
|
|
||||||
|
# Zähle verbleibende
|
||||||
|
count_query = """
|
||||||
|
SELECT COUNT(*)
|
||||||
|
FROM vorlagen v
|
||||||
|
LEFT JOIN ki_bewertungen kb ON v.id = kb.vorlage_id AND kb.typ = 'zusammenfassung'
|
||||||
|
WHERE v.volltext_clean IS NOT NULL AND v.volltext_clean != ''
|
||||||
|
AND kb.id IS NULL
|
||||||
|
"""
|
||||||
|
remaining = conn.execute(count_query).fetchone()[0]
|
||||||
|
print(f"Noch zu verarbeiten: {remaining}")
|
||||||
|
|
||||||
|
if remaining == 0:
|
||||||
|
print("Alle fertig!")
|
||||||
|
conn.close()
|
||||||
|
return 0 # Exit-Code 0 = fertig
|
||||||
|
|
||||||
|
# Vorlagen mit Volltext aber ohne KI-Zusammenfassung
|
||||||
|
query = """
|
||||||
|
SELECT v.id, v.aktenzeichen, v.volltext_clean
|
||||||
|
FROM vorlagen v
|
||||||
|
LEFT JOIN ki_bewertungen kb ON v.id = kb.vorlage_id AND kb.typ = 'zusammenfassung'
|
||||||
|
WHERE v.volltext_clean IS NOT NULL AND v.volltext_clean != ''
|
||||||
|
AND kb.id IS NULL
|
||||||
|
"""
|
||||||
|
if args.typ:
|
||||||
|
query += f" AND v.typ = '{args.typ}'"
|
||||||
|
query += f" ORDER BY v.datum_eingang DESC LIMIT {args.limit}"
|
||||||
|
|
||||||
|
vorlagen = conn.execute(query).fetchall()
|
||||||
|
print(f"Verarbeite {len(vorlagen)} Vorlagen (Batch)\n")
|
||||||
|
|
||||||
|
success = 0
|
||||||
|
for v in vorlagen:
|
||||||
|
if process_vorlage(conn, dict(v)):
|
||||||
|
success += 1
|
||||||
|
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
remaining_after = remaining - success
|
||||||
|
print(f"\n=== Batch fertig: {success}/{len(vorlagen)} erfolgreich ===")
|
||||||
|
print(f"Verbleibend: {remaining_after}")
|
||||||
|
|
||||||
|
# Exit-Code: 0 = alles fertig, 1 = noch mehr zu tun
|
||||||
|
return 0 if remaining_after == 0 else 1
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import sys
|
||||||
|
sys.exit(main())
|
||||||
149
scripts/ocr_scans.py
Normal file
149
scripts/ocr_scans.py
Normal file
@ -0,0 +1,149 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
OCR für PDFs ohne extrahierbaren Text.
|
||||||
|
Nutzt Apple Vision Framework via ocrmac.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import sqlite3
|
||||||
|
import tempfile
|
||||||
|
import time
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
import pymupdf
|
||||||
|
from ocrmac import ocrmac
|
||||||
|
|
||||||
|
PROJECT_ROOT = Path(__file__).resolve().parent.parent
|
||||||
|
DB_PATH = PROJECT_ROOT / "data" / "tracker_remote.db"
|
||||||
|
LOG_FILE = PROJECT_ROOT / "data" / "ocr.log"
|
||||||
|
|
||||||
|
|
||||||
|
def log(msg: str):
|
||||||
|
timestamp = time.strftime("%H:%M:%S")
|
||||||
|
line = f"[{timestamp}] {msg}"
|
||||||
|
print(line)
|
||||||
|
with open(LOG_FILE, "a") as f:
|
||||||
|
f.write(line + "\n")
|
||||||
|
|
||||||
|
|
||||||
|
def get_db():
|
||||||
|
conn = sqlite3.connect(str(DB_PATH))
|
||||||
|
conn.row_factory = sqlite3.Row
|
||||||
|
return conn
|
||||||
|
|
||||||
|
|
||||||
|
def get_pdfs_without_text(limit: int) -> list[dict]:
|
||||||
|
"""Findet PDFs die keinen Text haben aber eine URL."""
|
||||||
|
conn = get_db()
|
||||||
|
|
||||||
|
# Vorlagen mit PDF-URL aber ohne Volltext
|
||||||
|
rows = conn.execute("""
|
||||||
|
SELECT v.id, a.url
|
||||||
|
FROM vorlagen v
|
||||||
|
JOIN anlagen a ON v.id = a.vorlage_id
|
||||||
|
WHERE a.url IS NOT NULL
|
||||||
|
AND a.downloaded = 1
|
||||||
|
AND (v.volltext_clean IS NULL OR v.volltext_clean = '' OR LENGTH(v.volltext_clean) < 50)
|
||||||
|
ORDER BY v.datum_eingang DESC
|
||||||
|
LIMIT ?
|
||||||
|
""", (limit,)).fetchall()
|
||||||
|
|
||||||
|
conn.close()
|
||||||
|
return [dict(r) for r in rows]
|
||||||
|
|
||||||
|
|
||||||
|
def ocr_pdf(url: str) -> tuple[str | None, str | None]:
|
||||||
|
"""Lädt PDF, extrahiert Bilder, macht OCR."""
|
||||||
|
try:
|
||||||
|
# Download
|
||||||
|
resp = httpx.get(url, timeout=60, follow_redirects=True)
|
||||||
|
resp.raise_for_status()
|
||||||
|
|
||||||
|
if len(resp.content) < 100:
|
||||||
|
return None, "PDF zu klein"
|
||||||
|
|
||||||
|
with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as tmp_pdf:
|
||||||
|
tmp_pdf.write(resp.content)
|
||||||
|
tmp_pdf.flush()
|
||||||
|
|
||||||
|
doc = pymupdf.open(tmp_pdf.name)
|
||||||
|
all_text = []
|
||||||
|
|
||||||
|
for page_num, page in enumerate(doc):
|
||||||
|
# Seite als Bild rendern (höhere DPI für bessere OCR)
|
||||||
|
pix = page.get_pixmap(dpi=200)
|
||||||
|
|
||||||
|
with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp_img:
|
||||||
|
pix.save(tmp_img.name)
|
||||||
|
|
||||||
|
# Apple Vision OCR
|
||||||
|
try:
|
||||||
|
results = ocrmac.OCR(tmp_img.name).recognize()
|
||||||
|
page_text = " ".join([r[0] for r in results])
|
||||||
|
all_text.append(page_text)
|
||||||
|
except Exception as e:
|
||||||
|
log(f" OCR-Fehler Seite {page_num}: {e}")
|
||||||
|
|
||||||
|
Path(tmp_img.name).unlink(missing_ok=True)
|
||||||
|
|
||||||
|
doc.close()
|
||||||
|
Path(tmp_pdf.name).unlink(missing_ok=True)
|
||||||
|
|
||||||
|
text = "\n\n".join(all_text).strip()
|
||||||
|
|
||||||
|
if len(text) < 50:
|
||||||
|
return None, "Kein Text erkannt"
|
||||||
|
|
||||||
|
return text, None
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
return None, str(e)[:100]
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(description="OCR für Scan-PDFs")
|
||||||
|
parser.add_argument("--limit", type=int, default=100, help="Max. Anzahl")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
log(f"=== OCR für Scans gestartet ===")
|
||||||
|
log(f"Limit: {args.limit}")
|
||||||
|
|
||||||
|
pdfs = get_pdfs_without_text(args.limit)
|
||||||
|
log(f"Gefunden: {len(pdfs)} PDFs ohne Text")
|
||||||
|
|
||||||
|
if not pdfs:
|
||||||
|
log("Nichts zu tun!")
|
||||||
|
return
|
||||||
|
|
||||||
|
conn = get_db()
|
||||||
|
success = 0
|
||||||
|
failed = 0
|
||||||
|
|
||||||
|
for i, pdf in enumerate(pdfs):
|
||||||
|
log(f"[{i+1}/{len(pdfs)}] Vorlage #{pdf['id']}...")
|
||||||
|
|
||||||
|
text, error = ocr_pdf(pdf['url'])
|
||||||
|
|
||||||
|
if text:
|
||||||
|
# In DB speichern
|
||||||
|
conn.execute("""
|
||||||
|
UPDATE vorlagen SET volltext = ?, volltext_clean = ?
|
||||||
|
WHERE id = ?
|
||||||
|
""", (text, text, pdf['id']))
|
||||||
|
conn.commit()
|
||||||
|
success += 1
|
||||||
|
log(f" ✓ {len(text)} Zeichen via OCR")
|
||||||
|
else:
|
||||||
|
failed += 1
|
||||||
|
log(f" ✗ {error}")
|
||||||
|
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
log(f"\n=== Fertig ===")
|
||||||
|
log(f"Erfolgreich: {success}")
|
||||||
|
log(f"Fehlgeschlagen: {failed}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
87
scripts/push_system_metrics.py
Normal file
87
scripts/push_system_metrics.py
Normal file
@ -0,0 +1,87 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Pusht Mac Mini System-Metriken zum VServer für Netdata.
|
||||||
|
Läuft als Hintergrund-Daemon.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import subprocess
|
||||||
|
import time
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
METRICS_URL = "http://152.53.119.77:8127"
|
||||||
|
INTERVAL = 10 # Sekunden
|
||||||
|
|
||||||
|
def get_cpu_percent():
|
||||||
|
"""CPU-Auslastung via top."""
|
||||||
|
result = subprocess.run(
|
||||||
|
["top", "-l", "1", "-n", "0"],
|
||||||
|
capture_output=True, text=True
|
||||||
|
)
|
||||||
|
for line in result.stdout.split("\n"):
|
||||||
|
if "CPU usage" in line:
|
||||||
|
# "CPU usage: 2.53% user, 10.52% sys, 84.21% idle"
|
||||||
|
import re
|
||||||
|
numbers = re.findall(r'([\d.]+)%', line)
|
||||||
|
if len(numbers) >= 2:
|
||||||
|
return float(numbers[0]) + float(numbers[1]) # user + sys
|
||||||
|
return 0
|
||||||
|
|
||||||
|
def get_memory_percent():
|
||||||
|
"""Memory-Auslastung via vm_stat."""
|
||||||
|
result = subprocess.run(["vm_stat"], capture_output=True, text=True)
|
||||||
|
stats = {}
|
||||||
|
for line in result.stdout.split("\n"):
|
||||||
|
if ":" in line:
|
||||||
|
key, val = line.split(":")
|
||||||
|
val = val.strip().rstrip(".")
|
||||||
|
try:
|
||||||
|
stats[key.strip()] = int(val)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
page_size = 16384 # Apple Silicon
|
||||||
|
pages_free = stats.get("Pages free", 0)
|
||||||
|
pages_active = stats.get("Pages active", 0)
|
||||||
|
pages_inactive = stats.get("Pages inactive", 0)
|
||||||
|
pages_wired = stats.get("Pages wired down", 0)
|
||||||
|
|
||||||
|
total_pages = pages_free + pages_active + pages_inactive + pages_wired
|
||||||
|
used_pages = pages_active + pages_wired
|
||||||
|
|
||||||
|
if total_pages > 0:
|
||||||
|
return (used_pages / total_pages) * 100
|
||||||
|
return 0
|
||||||
|
|
||||||
|
def get_load_avg():
|
||||||
|
"""Load average."""
|
||||||
|
result = subprocess.run(["/usr/sbin/sysctl", "-n", "vm.loadavg"], capture_output=True, text=True)
|
||||||
|
# "{ 1.23 2.34 3.45 }"
|
||||||
|
parts = result.stdout.strip().strip("{}").split()
|
||||||
|
if parts:
|
||||||
|
return float(parts[0])
|
||||||
|
return 0
|
||||||
|
|
||||||
|
def push_metrics():
|
||||||
|
"""Sammelt und pusht Metriken."""
|
||||||
|
metrics = {
|
||||||
|
"macmini.cpu_percent": get_cpu_percent(),
|
||||||
|
"macmini.memory_percent": get_memory_percent(),
|
||||||
|
"macmini.load_1min": get_load_avg(),
|
||||||
|
}
|
||||||
|
|
||||||
|
try:
|
||||||
|
httpx.post(METRICS_URL, json=metrics, timeout=5)
|
||||||
|
print(f"Pushed: {metrics}")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Push failed: {e}")
|
||||||
|
|
||||||
|
def main():
|
||||||
|
print(f"Mac Mini metrics pusher → {METRICS_URL}")
|
||||||
|
print(f"Interval: {INTERVAL}s")
|
||||||
|
|
||||||
|
while True:
|
||||||
|
push_metrics()
|
||||||
|
time.sleep(INTERVAL)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
44
scripts/run_beratungen_batches.sh
Executable file
44
scripts/run_beratungen_batches.sh
Executable file
@ -0,0 +1,44 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# Läuft Beratungsfolge-Scraper in Tranchen bis alles fertig ist
|
||||||
|
|
||||||
|
cd "$(dirname "$0")/.."
|
||||||
|
source .venv/bin/activate
|
||||||
|
|
||||||
|
LOG_FILE="data/beratungen_batches.log"
|
||||||
|
BATCH_SIZE=200
|
||||||
|
PAUSE_SECONDS=10
|
||||||
|
|
||||||
|
echo "=== Beratungsfolge-Batch-Runner gestartet $(date) ===" | tee -a "$LOG_FILE"
|
||||||
|
|
||||||
|
while true; do
|
||||||
|
echo "" | tee -a "$LOG_FILE"
|
||||||
|
echo "--- Starte Batch $(date +%H:%M:%S) ---" | tee -a "$LOG_FILE"
|
||||||
|
|
||||||
|
python scripts/scrape_beratungsfolge.py --limit $BATCH_SIZE 2>&1 | tee -a "$LOG_FILE"
|
||||||
|
|
||||||
|
# Prüfe ob noch Vorlagen ohne Beschlusstext übrig sind
|
||||||
|
REMAINING=$(python -c "
|
||||||
|
import sqlite3
|
||||||
|
conn = sqlite3.connect('data/tracker_remote.db')
|
||||||
|
r = conn.execute('''
|
||||||
|
SELECT COUNT(DISTINCT v.id)
|
||||||
|
FROM vorlagen v
|
||||||
|
LEFT JOIN beratungen b ON v.id = b.vorlage_id AND b.beschlusstext IS NOT NULL
|
||||||
|
WHERE v.web_url IS NOT NULL AND b.id IS NULL
|
||||||
|
''').fetchone()[0]
|
||||||
|
print(r)
|
||||||
|
")
|
||||||
|
|
||||||
|
echo "Verbleibend: $REMAINING" | tee -a "$LOG_FILE"
|
||||||
|
|
||||||
|
if [ "$REMAINING" -eq 0 ]; then
|
||||||
|
echo "" | tee -a "$LOG_FILE"
|
||||||
|
echo "=== ALLE FERTIG $(date) ===" | tee -a "$LOG_FILE"
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Pause ${PAUSE_SECONDS}s..." | tee -a "$LOG_FILE"
|
||||||
|
sleep $PAUSE_SECONDS
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "Runner beendet." | tee -a "$LOG_FILE"
|
||||||
30
scripts/run_ki_batches.sh
Executable file
30
scripts/run_ki_batches.sh
Executable file
@ -0,0 +1,30 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# Läuft KI-Zusammenfassungen in Tranchen bis alles fertig ist
|
||||||
|
|
||||||
|
cd "$(dirname "$0")/.."
|
||||||
|
source .venv/bin/activate
|
||||||
|
|
||||||
|
LOG_FILE="data/ki_batches.log"
|
||||||
|
BATCH_SIZE=100
|
||||||
|
PAUSE_SECONDS=5
|
||||||
|
|
||||||
|
echo "=== KI-Batch-Runner gestartet $(date) ===" | tee -a "$LOG_FILE"
|
||||||
|
|
||||||
|
while true; do
|
||||||
|
echo "" | tee -a "$LOG_FILE"
|
||||||
|
echo "--- Starte Batch $(date +%H:%M:%S) ---" | tee -a "$LOG_FILE"
|
||||||
|
|
||||||
|
python scripts/ki_zusammenfassung.py --limit $BATCH_SIZE 2>&1 | tee -a "$LOG_FILE"
|
||||||
|
EXIT_CODE=${PIPESTATUS[0]}
|
||||||
|
|
||||||
|
if [ $EXIT_CODE -eq 0 ]; then
|
||||||
|
echo "" | tee -a "$LOG_FILE"
|
||||||
|
echo "=== ALLE FERTIG $(date) ===" | tee -a "$LOG_FILE"
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Pause ${PAUSE_SECONDS}s vor nächstem Batch..." | tee -a "$LOG_FILE"
|
||||||
|
sleep $PAUSE_SECONDS
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "Runner beendet." | tee -a "$LOG_FILE"
|
||||||
32
scripts/run_ki_parallel.sh
Executable file
32
scripts/run_ki_parallel.sh
Executable file
@ -0,0 +1,32 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# Parallele KI-Zusammenfassungen in Batches bis alles fertig
|
||||||
|
|
||||||
|
cd "$(dirname "$0")/.."
|
||||||
|
source .venv/bin/activate
|
||||||
|
|
||||||
|
LOG_FILE="data/ki_parallel_batches.log"
|
||||||
|
WORKERS=15
|
||||||
|
BATCH_SIZE=100
|
||||||
|
PAUSE_SECONDS=5
|
||||||
|
|
||||||
|
echo "=== KI-Parallel-Runner gestartet $(date) ===" | tee -a "$LOG_FILE"
|
||||||
|
echo "Workers: $WORKERS, Batch: $BATCH_SIZE" | tee -a "$LOG_FILE"
|
||||||
|
|
||||||
|
while true; do
|
||||||
|
echo "" | tee -a "$LOG_FILE"
|
||||||
|
echo "--- Starte Batch $(date +%H:%M:%S) ---" | tee -a "$LOG_FILE"
|
||||||
|
|
||||||
|
python scripts/ki_parallel.py --workers $WORKERS --batch-size $BATCH_SIZE 2>&1 | tee -a "$LOG_FILE"
|
||||||
|
EXIT_CODE=${PIPESTATUS[0]}
|
||||||
|
|
||||||
|
if [ $EXIT_CODE -eq 0 ]; then
|
||||||
|
echo "" | tee -a "$LOG_FILE"
|
||||||
|
echo "=== ALLE FERTIG $(date) ===" | tee -a "$LOG_FILE"
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Pause ${PAUSE_SECONDS}s..." | tee -a "$LOG_FILE"
|
||||||
|
sleep $PAUSE_SECONDS
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "Runner beendet." | tee -a "$LOG_FILE"
|
||||||
341
scripts/scrape_abstimmungen.py
Normal file
341
scripts/scrape_abstimmungen.py
Normal file
@ -0,0 +1,341 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
ALLRIS Abstimmungs-Scraper für Antragstracker Hagen.
|
||||||
|
Lädt Abstimmungsergebnisse von to020-Seiten und extrahiert fraktionsweises Stimmverhalten.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import sqlite3
|
||||||
|
import time
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
PROJECT_ROOT = Path(__file__).resolve().parent.parent
|
||||||
|
DB_PATH = PROJECT_ROOT / "data" / "tracker_remote.db"
|
||||||
|
|
||||||
|
# DashScope API für Qwen
|
||||||
|
DASHSCOPE_URL = "https://dashscope-intl.aliyuncs.com/compatible-mode/v1/chat/completions"
|
||||||
|
DASHSCOPE_KEY = os.environ.get("QWEN_API_KEY") or os.popen("security find-generic-password -s qwen-api -w 2>/dev/null").read().strip()
|
||||||
|
|
||||||
|
|
||||||
|
EXTRACTION_PROMPT = """Analysiere dieses Abstimmungsergebnis aus einem kommunalen Ratsinformationssystem.
|
||||||
|
|
||||||
|
TEXT:
|
||||||
|
{text}
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
Extrahiere das Stimmverhalten aller Fraktionen. Gib NUR valides JSON zurück:
|
||||||
|
|
||||||
|
{{
|
||||||
|
"ergebnis": "beschlossen/abgelehnt/vertagt/zurückgezogen",
|
||||||
|
"ergebnis_typ": "einstimmig/mehrheitlich/mit_gegenstimmen",
|
||||||
|
"fraktionen": [
|
||||||
|
{{"name": "SPD", "stimme": "ja", "anzahl": null, "bemerkung": null}},
|
||||||
|
{{"name": "AfD", "stimme": "enthaltung", "anzahl": null, "bemerkung": null}},
|
||||||
|
{{"name": "Hagen Aktiv", "stimme": "enthaltung", "anzahl": 1, "bemerkung": "1 Mitglied"}}
|
||||||
|
]
|
||||||
|
}}
|
||||||
|
|
||||||
|
Regeln:
|
||||||
|
- stimme: "ja", "nein", oder "enthaltung"
|
||||||
|
- anzahl: nur wenn explizit Teilmenge genannt ("1 Mitglied"), sonst null
|
||||||
|
- Bei "einstimmig" oder "ungeändert beschlossen" ohne Gegenstimmen: alle bekannten Fraktionen als "ja"
|
||||||
|
- Bekannte Hagener Fraktionen: SPD, CDU, Grüne, FDP, AfD, HAK/Die Linke, Hagen Aktiv, Freie Wähler
|
||||||
|
|
||||||
|
NUR JSON, keine Erklärungen."""
|
||||||
|
|
||||||
|
|
||||||
|
def get_db():
|
||||||
|
conn = sqlite3.connect(str(DB_PATH))
|
||||||
|
conn.row_factory = sqlite3.Row
|
||||||
|
return conn
|
||||||
|
|
||||||
|
|
||||||
|
def fetch_to020_page(client: httpx.Client, tolfdnr: int) -> str | None:
|
||||||
|
"""Lädt die to020-Seite (Tagesordnungspunkt-Detail)."""
|
||||||
|
url = f"https://allris.hagen.de/public/to020?TOLFDNR={tolfdnr}"
|
||||||
|
try:
|
||||||
|
resp = client.get(url, timeout=30, follow_redirects=True)
|
||||||
|
resp.raise_for_status()
|
||||||
|
return resp.text
|
||||||
|
except Exception as e:
|
||||||
|
print(f" Fehler beim Laden von {url}: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def extract_abstimmung_text(html: str) -> tuple[str | None, str | None]:
|
||||||
|
"""Extrahiert Abstimmungsergebnis-Block aus HTML.
|
||||||
|
Returns: (ergebnis_kurz, volltext)
|
||||||
|
"""
|
||||||
|
soup = BeautifulSoup(html, 'html.parser')
|
||||||
|
|
||||||
|
# Kurzform: "ungeändert beschlossen" etc.
|
||||||
|
ergebnis_kurz = None
|
||||||
|
beschluss_span = soup.find('span', id='toBeschlussart')
|
||||||
|
if beschluss_span:
|
||||||
|
ergebnis_kurz = beschluss_span.get_text(strip=True)
|
||||||
|
|
||||||
|
# Volltext: Abstimmungsergebnis-Block
|
||||||
|
volltext = None
|
||||||
|
|
||||||
|
# Suche nach docPart mit "Abstimmungsergebnis"
|
||||||
|
for div in soup.find_all('div', class_='docPart'):
|
||||||
|
text = div.get_text()
|
||||||
|
if 'Abstimmungsergebnis' in text or 'einstimmig' in text.lower() or 'Enthaltung' in text:
|
||||||
|
volltext = text.strip()
|
||||||
|
break
|
||||||
|
|
||||||
|
# Fallback: Suche im Wortprotokoll nach Abstimmungsinfo
|
||||||
|
if not volltext:
|
||||||
|
for div in soup.find_all('div', class_='docPart'):
|
||||||
|
text = div.get_text()
|
||||||
|
if any(kw in text.lower() for kw in ['abstimmung', 'ja-stimm', 'nein-stimm', 'enthält sich', 'mehrheitlich', 'einstimmig']):
|
||||||
|
volltext = text.strip()
|
||||||
|
break
|
||||||
|
|
||||||
|
return ergebnis_kurz, volltext
|
||||||
|
|
||||||
|
|
||||||
|
def call_qwen_turbo(prompt: str) -> dict | None:
|
||||||
|
"""Ruft Qwen-Turbo API auf (schnell & günstig)."""
|
||||||
|
if not DASHSCOPE_KEY:
|
||||||
|
print(" FEHLER: Kein QWEN_API_KEY gefunden")
|
||||||
|
return None
|
||||||
|
|
||||||
|
try:
|
||||||
|
resp = httpx.post(
|
||||||
|
DASHSCOPE_URL,
|
||||||
|
headers={
|
||||||
|
"Authorization": f"Bearer {DASHSCOPE_KEY}",
|
||||||
|
"Content-Type": "application/json"
|
||||||
|
},
|
||||||
|
json={
|
||||||
|
"model": "qwen-turbo-latest",
|
||||||
|
"messages": [{"role": "user", "content": prompt}],
|
||||||
|
"temperature": 0.1
|
||||||
|
},
|
||||||
|
timeout=30
|
||||||
|
)
|
||||||
|
resp.raise_for_status()
|
||||||
|
|
||||||
|
content = resp.json()["choices"][0]["message"]["content"]
|
||||||
|
|
||||||
|
# JSON extrahieren
|
||||||
|
if "```json" in content:
|
||||||
|
content = content.split("```json")[1].split("```")[0]
|
||||||
|
elif "```" in content:
|
||||||
|
content = content.split("```")[1].split("```")[0]
|
||||||
|
|
||||||
|
return json.loads(content.strip())
|
||||||
|
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
print(f" JSON-Parse-Fehler: {e}")
|
||||||
|
return None
|
||||||
|
except Exception as e:
|
||||||
|
print(f" API-Fehler: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def get_tolfdnrs_from_vorlage(client: httpx.Client, volfdnr: int) -> list[tuple[int, str, str]]:
|
||||||
|
"""Extrahiert TOLFDNRs aus der vo020-Seite.
|
||||||
|
Returns: List of (tolfdnr, datum, gremium_name)
|
||||||
|
"""
|
||||||
|
url = f"https://allris.hagen.de/public/vo020?VOLFDNR={volfdnr}"
|
||||||
|
try:
|
||||||
|
resp = client.get(url, timeout=30, follow_redirects=True)
|
||||||
|
resp.raise_for_status()
|
||||||
|
html = resp.text
|
||||||
|
|
||||||
|
results = []
|
||||||
|
soup = BeautifulSoup(html, 'html.parser')
|
||||||
|
|
||||||
|
# Suche nach Links zu to020
|
||||||
|
for link in soup.find_all('a', href=re.compile(r'TOLFDNR=\d+')):
|
||||||
|
match = re.search(r'TOLFDNR=(\d+)', link.get('href', ''))
|
||||||
|
if match:
|
||||||
|
tolfdnr = int(match.group(1))
|
||||||
|
# Versuche Datum und Gremium aus Kontext zu extrahieren
|
||||||
|
row = link.find_parent('tr')
|
||||||
|
datum = ''
|
||||||
|
gremium = ''
|
||||||
|
if row:
|
||||||
|
cells = row.find_all('td')
|
||||||
|
for cell in cells:
|
||||||
|
text = cell.get_text(strip=True)
|
||||||
|
if re.match(r'\d{2}\.\d{2}\.\d{4}', text):
|
||||||
|
datum = text
|
||||||
|
elif 'Sitzung' in text or 'Rat' in text or 'Ausschuss' in text:
|
||||||
|
gremium = text
|
||||||
|
results.append((tolfdnr, datum, gremium))
|
||||||
|
|
||||||
|
return list(set(results)) # Deduplizieren
|
||||||
|
except Exception as e:
|
||||||
|
print(f" Fehler beim Laden von vo020: {e}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
def process_vorlage(conn: sqlite3.Connection, client: httpx.Client, vorlage: dict) -> int:
|
||||||
|
"""Verarbeitet alle TOLFDNRs einer Vorlage.
|
||||||
|
Returns: Anzahl erfolgreicher Abstimmungen
|
||||||
|
"""
|
||||||
|
vid = vorlage['id']
|
||||||
|
volfdnr = vorlage.get('volfdnr')
|
||||||
|
|
||||||
|
if not volfdnr:
|
||||||
|
# VOLFDNR aus web_url extrahieren
|
||||||
|
web_url = vorlage.get('web_url', '')
|
||||||
|
match = re.search(r'VOLFDNR=(\d+)', web_url)
|
||||||
|
if match:
|
||||||
|
volfdnr = int(match.group(1))
|
||||||
|
else:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
# TOLFDNRs von Vorlagen-Seite holen
|
||||||
|
tolfdnrs = get_tolfdnrs_from_vorlage(client, volfdnr)
|
||||||
|
if not tolfdnrs:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
success = 0
|
||||||
|
for tolfdnr, datum, gremium in tolfdnrs:
|
||||||
|
# Prüfen ob schon verarbeitet
|
||||||
|
existing = conn.execute(
|
||||||
|
"SELECT id FROM abstimmungen WHERE vorlage_id = ? AND ergebnis_detail LIKE ?",
|
||||||
|
(vid, f'%{tolfdnr}%')
|
||||||
|
).fetchone()
|
||||||
|
if existing:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if process_tolfdnr(conn, client, vid, tolfdnr, datum, gremium):
|
||||||
|
success += 1
|
||||||
|
time.sleep(0.3)
|
||||||
|
|
||||||
|
return success
|
||||||
|
|
||||||
|
|
||||||
|
def process_tolfdnr(conn: sqlite3.Connection, client: httpx.Client,
|
||||||
|
vorlage_id: int, tolfdnr: int, datum: str, gremium: str) -> bool:
|
||||||
|
"""Verarbeitet eine einzelne TOLFDNR."""
|
||||||
|
|
||||||
|
# Seite laden
|
||||||
|
html = fetch_to020_page(client, tolfdnr)
|
||||||
|
if not html:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Abstimmungstext extrahieren
|
||||||
|
ergebnis_kurz, volltext = extract_abstimmung_text(html)
|
||||||
|
|
||||||
|
if not ergebnis_kurz and not volltext:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# KI-Analyse wenn Volltext vorhanden
|
||||||
|
fraktionen_data = []
|
||||||
|
if volltext and len(volltext) > 20:
|
||||||
|
prompt = EXTRACTION_PROMPT.format(text=volltext[:3000])
|
||||||
|
result = call_qwen_turbo(prompt)
|
||||||
|
if result and 'fraktionen' in result:
|
||||||
|
fraktionen_data = result['fraktionen']
|
||||||
|
if result.get('ergebnis'):
|
||||||
|
ergebnis_kurz = result['ergebnis']
|
||||||
|
|
||||||
|
# Gremium-ID finden
|
||||||
|
gremium_id = None
|
||||||
|
if gremium:
|
||||||
|
row = conn.execute("SELECT id FROM gremien WHERE name LIKE ?", (f"%{gremium[:20]}%",)).fetchone()
|
||||||
|
if row:
|
||||||
|
gremium_id = row['id']
|
||||||
|
|
||||||
|
# Datum parsen
|
||||||
|
sitzung_datum = None
|
||||||
|
if datum:
|
||||||
|
try:
|
||||||
|
parts = datum.split('.')
|
||||||
|
if len(parts) == 3:
|
||||||
|
sitzung_datum = f"{parts[2]}-{parts[1]}-{parts[0]}"
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# In DB speichern
|
||||||
|
detail_json = json.dumps({
|
||||||
|
"tolfdnr": tolfdnr,
|
||||||
|
"fraktionen": fraktionen_data
|
||||||
|
}, ensure_ascii=False) if fraktionen_data else json.dumps({"tolfdnr": tolfdnr})
|
||||||
|
|
||||||
|
cursor = conn.execute("""
|
||||||
|
INSERT INTO abstimmungen (beratung_id, vorlage_id, gremium_id, sitzung_datum, ergebnis, ergebnis_detail, volltext)
|
||||||
|
VALUES (?, ?, ?, ?, ?, ?, ?)
|
||||||
|
""", (None, vorlage_id, gremium_id, sitzung_datum, ergebnis_kurz, detail_json, volltext))
|
||||||
|
|
||||||
|
abstimmung_id = cursor.lastrowid
|
||||||
|
|
||||||
|
# Fraktionen speichern
|
||||||
|
for f in fraktionen_data:
|
||||||
|
conn.execute("""
|
||||||
|
INSERT INTO abstimmungen_fraktionen (abstimmung_id, fraktion, stimme, anzahl, bemerkung)
|
||||||
|
VALUES (?, ?, ?, ?, ?)
|
||||||
|
""", (abstimmung_id, f.get('name'), f.get('stimme'), f.get('anzahl'), f.get('bemerkung')))
|
||||||
|
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
print(f" ✓ TOLFDNR {tolfdnr}: {ergebnis_kurz or 'OK'} ({len(fraktionen_data)} Fraktionen)")
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(description="ALLRIS Abstimmungs-Scraper")
|
||||||
|
parser.add_argument("--limit", type=int, default=10, help="Max. Anzahl Vorlagen")
|
||||||
|
parser.add_argument("--typ", type=str, default="antrag", help="Vorlagen-Typ (antrag/anfrage)")
|
||||||
|
parser.add_argument("--vorlage", type=int, help="Nur bestimmte Vorlage-ID")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
print(f"=== ALLRIS Abstimmungs-Scraper ===\n")
|
||||||
|
|
||||||
|
conn = get_db()
|
||||||
|
client = httpx.Client()
|
||||||
|
|
||||||
|
# Vorlagen mit web_url finden, die noch keine Abstimmung haben
|
||||||
|
query = """
|
||||||
|
SELECT v.id, v.aktenzeichen, v.web_url
|
||||||
|
FROM vorlagen v
|
||||||
|
LEFT JOIN abstimmungen a ON v.id = a.vorlage_id
|
||||||
|
WHERE v.web_url IS NOT NULL
|
||||||
|
AND a.id IS NULL
|
||||||
|
"""
|
||||||
|
params = []
|
||||||
|
|
||||||
|
if args.typ:
|
||||||
|
query += " AND v.typ = ?"
|
||||||
|
params.append(args.typ)
|
||||||
|
|
||||||
|
if args.vorlage:
|
||||||
|
query += " AND v.id = ?"
|
||||||
|
params.append(args.vorlage)
|
||||||
|
|
||||||
|
query += f" ORDER BY v.datum_eingang DESC LIMIT {args.limit}"
|
||||||
|
|
||||||
|
vorlagen = conn.execute(query, params).fetchall()
|
||||||
|
print(f"Verarbeite {len(vorlagen)} Vorlagen\n")
|
||||||
|
|
||||||
|
total_success = 0
|
||||||
|
for v in vorlagen:
|
||||||
|
vd = dict(v)
|
||||||
|
print(f"Vorlage {vd.get('aktenzeichen', vd['id'])}...")
|
||||||
|
success = process_vorlage(conn, client, vd)
|
||||||
|
total_success += success
|
||||||
|
if success == 0:
|
||||||
|
print(f" (keine Abstimmungsdaten)")
|
||||||
|
time.sleep(0.5)
|
||||||
|
|
||||||
|
client.close()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
print(f"\n=== Fertig: {total_success} Abstimmungen extrahiert ===")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
259
scripts/scrape_beratungsfolge.py
Normal file
259
scripts/scrape_beratungsfolge.py
Normal file
@ -0,0 +1,259 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Scraped Beratungsfolge und Beschlüsse von ALLRIS Vorlagen-Seiten.
|
||||||
|
Extrahiert: Sitzungen, Beschlussart, Beschlusstext aus verlinkten TOs.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import re
|
||||||
|
import sqlite3
|
||||||
|
import time
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
PROJECT_ROOT = Path(__file__).resolve().parent.parent
|
||||||
|
DB_PATH = PROJECT_ROOT / "data" / "tracker_remote.db"
|
||||||
|
LOG_FILE = PROJECT_ROOT / "data" / "beratungsfolge.log"
|
||||||
|
|
||||||
|
# Rate Limiting
|
||||||
|
DELAY_SECONDS = 1.0
|
||||||
|
|
||||||
|
|
||||||
|
def log(msg: str):
|
||||||
|
timestamp = time.strftime("%H:%M:%S")
|
||||||
|
line = f"[{timestamp}] {msg}"
|
||||||
|
print(line)
|
||||||
|
with open(LOG_FILE, "a") as f:
|
||||||
|
f.write(line + "\n")
|
||||||
|
|
||||||
|
|
||||||
|
def get_db():
|
||||||
|
conn = sqlite3.connect(str(DB_PATH))
|
||||||
|
conn.row_factory = sqlite3.Row
|
||||||
|
return conn
|
||||||
|
|
||||||
|
|
||||||
|
def init_tables(conn):
|
||||||
|
"""Erweitert beratungen-Tabelle um neue Spalten falls nötig."""
|
||||||
|
existing = [c[1] for c in conn.execute('PRAGMA table_info(beratungen)').fetchall()]
|
||||||
|
|
||||||
|
needed = ['to_url', 'tolfdnr', 'beschlussart', 'beschlusstext', 'wortprotokoll', 'scraped_at']
|
||||||
|
for col in needed:
|
||||||
|
if col not in existing:
|
||||||
|
conn.execute(f'ALTER TABLE beratungen ADD COLUMN {col} TEXT')
|
||||||
|
log(f" Schema: +{col}")
|
||||||
|
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
|
||||||
|
def scrape_vorlage_page(url: str) -> list[dict]:
|
||||||
|
"""Scraped Beratungsfolge von einer Vorlagen-Seite."""
|
||||||
|
try:
|
||||||
|
resp = httpx.get(url, timeout=30, follow_redirects=True)
|
||||||
|
resp.raise_for_status()
|
||||||
|
soup = BeautifulSoup(resp.text, 'html.parser')
|
||||||
|
|
||||||
|
beratungen = []
|
||||||
|
|
||||||
|
# Finde Beratungsfolge-Tabelle
|
||||||
|
# Links zu to020 (Tagesordnungspunkt) oder to010 (Sitzung)
|
||||||
|
for link in soup.find_all('a', href=True):
|
||||||
|
href = link['href']
|
||||||
|
|
||||||
|
# TO-Links finden (Beschluss)
|
||||||
|
if 'to020' in href and 'TOLFDNR=' in href:
|
||||||
|
tolfdnr_match = re.search(r'TOLFDNR=(\d+)', href)
|
||||||
|
if tolfdnr_match:
|
||||||
|
tolfdnr = tolfdnr_match.group(1)
|
||||||
|
beschlussart = link.get_text(strip=True)
|
||||||
|
|
||||||
|
# Sitzungsinfo aus vorherigem Link holen
|
||||||
|
sitzung_name = None
|
||||||
|
sitzung_url = None
|
||||||
|
|
||||||
|
# Suche vorherigen to010 Link (Sitzung)
|
||||||
|
prev = link.find_previous('a', href=re.compile(r'to010.*SILFDNR='))
|
||||||
|
if prev:
|
||||||
|
sitzung_name = prev.get_text(strip=True)
|
||||||
|
sitzung_url = prev['href']
|
||||||
|
if not sitzung_url.startswith('http'):
|
||||||
|
sitzung_url = 'https://allris.hagen.de' + sitzung_url
|
||||||
|
|
||||||
|
to_url = href if href.startswith('http') else 'https://allris.hagen.de' + href
|
||||||
|
|
||||||
|
beratungen.append({
|
||||||
|
'tolfdnr': tolfdnr,
|
||||||
|
'beschlussart': beschlussart,
|
||||||
|
'sitzung_name': sitzung_name,
|
||||||
|
'sitzung_url': sitzung_url,
|
||||||
|
'to_url': to_url,
|
||||||
|
})
|
||||||
|
|
||||||
|
return beratungen
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
log(f" Fehler beim Scrapen: {e}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
def scrape_to_page(url: str) -> dict:
|
||||||
|
"""Scraped Beschlusstext und Wortprotokoll von TO-Seite."""
|
||||||
|
try:
|
||||||
|
resp = httpx.get(url, timeout=30, follow_redirects=True)
|
||||||
|
resp.raise_for_status()
|
||||||
|
soup = BeautifulSoup(resp.text, 'html.parser')
|
||||||
|
|
||||||
|
result = {
|
||||||
|
'beschlusstext': None,
|
||||||
|
'wortprotokoll': None,
|
||||||
|
'sitzung_datum': None,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Datum aus Titel extrahieren (z.B. "30.01.2025 - 6.4 Mündlicher...")
|
||||||
|
title = soup.find('h1', class_='title')
|
||||||
|
if title:
|
||||||
|
date_match = re.search(r'(\d{2}\.\d{2}\.\d{4})', title.get_text())
|
||||||
|
if date_match:
|
||||||
|
result['sitzung_datum'] = date_match.group(1)
|
||||||
|
|
||||||
|
# Beschlusstext und Wortprotokoll finden
|
||||||
|
# Die sind in <span style="font-family:Arial"> Tags
|
||||||
|
text_spans = soup.find_all('span', style=re.compile(r'font-family.*Arial'))
|
||||||
|
|
||||||
|
texts = [s.get_text(strip=True) for s in text_spans if s.get_text(strip=True)]
|
||||||
|
|
||||||
|
if texts:
|
||||||
|
# Letzter Text ist oft der Beschluss
|
||||||
|
result['beschlusstext'] = texts[-1] if len(texts) > 0 else None
|
||||||
|
# Vorherige Texte sind Wortprotokoll
|
||||||
|
if len(texts) > 1:
|
||||||
|
result['wortprotokoll'] = '\n\n'.join(texts[:-1])
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
log(f" TO-Fehler: {e}")
|
||||||
|
return {}
|
||||||
|
|
||||||
|
|
||||||
|
def process_vorlage(conn, vorlage: dict) -> int:
|
||||||
|
"""Verarbeitet eine Vorlage und speichert Beratungsfolge."""
|
||||||
|
vorlage_id = vorlage['id']
|
||||||
|
web_url = vorlage['web_url']
|
||||||
|
|
||||||
|
if not web_url:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
# Beratungsfolge von Vorlagen-Seite scrapen
|
||||||
|
beratungen = scrape_vorlage_page(web_url)
|
||||||
|
|
||||||
|
if not beratungen:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
saved = 0
|
||||||
|
for b in beratungen:
|
||||||
|
time.sleep(DELAY_SECONDS)
|
||||||
|
|
||||||
|
# TO-Seite für Details scrapen
|
||||||
|
to_details = scrape_to_page(b['to_url'])
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Update existierende Zeile oder insert neue
|
||||||
|
conn.execute("""
|
||||||
|
UPDATE beratungen
|
||||||
|
SET to_url = ?, tolfdnr = ?, beschlussart = ?,
|
||||||
|
beschlusstext = ?, wortprotokoll = ?, scraped_at = CURRENT_TIMESTAMP
|
||||||
|
WHERE vorlage_id = ? AND (tolfdnr = ? OR tolfdnr IS NULL)
|
||||||
|
""", (
|
||||||
|
b['to_url'],
|
||||||
|
b['tolfdnr'],
|
||||||
|
b['beschlussart'],
|
||||||
|
to_details.get('beschlusstext'),
|
||||||
|
to_details.get('wortprotokoll'),
|
||||||
|
vorlage_id,
|
||||||
|
b['tolfdnr'],
|
||||||
|
))
|
||||||
|
|
||||||
|
if conn.total_changes == 0:
|
||||||
|
# Neue Zeile
|
||||||
|
conn.execute("""
|
||||||
|
INSERT INTO beratungen
|
||||||
|
(vorlage_id, to_url, tolfdnr, beschlussart, beschlusstext, wortprotokoll, scraped_at)
|
||||||
|
VALUES (?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP)
|
||||||
|
""", (
|
||||||
|
vorlage_id,
|
||||||
|
b['to_url'],
|
||||||
|
b['tolfdnr'],
|
||||||
|
b['beschlussart'],
|
||||||
|
to_details.get('beschlusstext'),
|
||||||
|
to_details.get('wortprotokoll'),
|
||||||
|
))
|
||||||
|
|
||||||
|
conn.commit()
|
||||||
|
saved += 1
|
||||||
|
except Exception as e:
|
||||||
|
log(f" DB-Fehler: {e}")
|
||||||
|
|
||||||
|
return saved
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(description="Beratungsfolge scrapen")
|
||||||
|
parser.add_argument("--limit", type=int, default=50, help="Max. Vorlagen")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
log(f"=== Beratungsfolge-Scraper ===")
|
||||||
|
log(f"Limit: {args.limit}")
|
||||||
|
|
||||||
|
conn = get_db()
|
||||||
|
init_tables(conn)
|
||||||
|
|
||||||
|
# Vorlagen mit web_url die noch nicht gescraped sind (nutze beratung_status)
|
||||||
|
vorlagen = conn.execute("""
|
||||||
|
SELECT id, aktenzeichen, web_url
|
||||||
|
FROM vorlagen
|
||||||
|
WHERE web_url IS NOT NULL
|
||||||
|
AND beratung_status IS NULL
|
||||||
|
ORDER BY datum_eingang DESC
|
||||||
|
LIMIT ?
|
||||||
|
""", (args.limit,)).fetchall()
|
||||||
|
|
||||||
|
log(f"Zu verarbeiten: {len(vorlagen)}")
|
||||||
|
|
||||||
|
total_beratungen = 0
|
||||||
|
for i, v in enumerate(vorlagen):
|
||||||
|
log(f"[{i+1}/{len(vorlagen)}] {v['aktenzeichen']}...")
|
||||||
|
time.sleep(DELAY_SECONDS)
|
||||||
|
|
||||||
|
count = process_vorlage(conn, dict(v))
|
||||||
|
total_beratungen += count
|
||||||
|
|
||||||
|
# Status setzen (auch wenn keine Beratung gefunden)
|
||||||
|
conn.execute("UPDATE vorlagen SET beratung_status = 'done' WHERE id = ?", (v['id'],))
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
if count > 0:
|
||||||
|
log(f" ✓ {count} Beratungen")
|
||||||
|
else:
|
||||||
|
log(f" - Keine Beratungsfolge")
|
||||||
|
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
log(f"\n=== Fertig ===")
|
||||||
|
log(f"Beratungen gespeichert: {total_beratungen}")
|
||||||
|
|
||||||
|
# Exit-Code für Batch-Runner
|
||||||
|
remaining = conn.execute("""
|
||||||
|
SELECT COUNT(*) FROM vorlagen
|
||||||
|
WHERE web_url IS NOT NULL
|
||||||
|
AND id NOT IN (SELECT DISTINCT vorlage_id FROM beratungen)
|
||||||
|
""").fetchone()[0] if False else 0 # Vereinfacht
|
||||||
|
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import sys
|
||||||
|
sys.exit(main())
|
||||||
BIN
tracker.db
Normal file
BIN
tracker.db
Normal file
Binary file not shown.
BIN
tracker.db-shm
Normal file
BIN
tracker.db-shm
Normal file
Binary file not shown.
0
tracker.db-wal
Normal file
0
tracker.db-wal
Normal file
0
tracker.db.broken
Normal file
0
tracker.db.broken
Normal file
Loading…
Reference in New Issue
Block a user