Compare commits
No commits in common. "main" and "v1.0.2" have entirely different histories.
40
.coveragerc
40
.coveragerc
@ -1,40 +0,0 @@
|
||||
[run]
|
||||
source = app
|
||||
omit =
|
||||
# Hilfs-Skripte und Migrations-Tools — nicht produktiver Code
|
||||
app/reindex_embeddings.py
|
||||
app/sync_abgeordnetenwatch.py
|
||||
# Generated / Auto-Discovery
|
||||
app/__init__.py
|
||||
|
||||
[report]
|
||||
# Faustregel ADR 0007: keine 100%-Jagd, aber kritische Pfade abdecken.
|
||||
# show_missing-Flag macht Luecken im CI-Output sofort sichtbar.
|
||||
#
|
||||
# fail_under=50 ist die aktuelle Baseline (Stand 2026-04-28). Verbleibende
|
||||
# unabgedeckte Bereiche brauchen integration-Setup statt Unit-Tests:
|
||||
# - app/main.py (FastAPI-Endpoints, ~900 LOC) — TestClient-Smoke-Tests
|
||||
# sind lokal geskippt mangels voller Deps; laufen in der Docker-Suite.
|
||||
# - app/parlamente.py (16 Adapter, ~3400 LOC) — Live-HTTP gegen Landtage,
|
||||
# tests/integration/ deckt das ab.
|
||||
# - app/queue.py _worker (async-Loop, while True, hart zu testen).
|
||||
# - app/report.py WeasyPrint-PDF-Render-Pfade.
|
||||
# - app/embeddings.py OpenAI/DashScope-Calls.
|
||||
# Schwelle hochsetzen, wenn integration-Suite lokal lauffaehig wird.
|
||||
show_missing = true
|
||||
skip_covered = false
|
||||
precision = 1
|
||||
fail_under = 50
|
||||
|
||||
# Zeilen, die nicht gezaehlt werden sollen — typische Boilerplate ohne
|
||||
# eigentliche Testbarkeit.
|
||||
exclude_lines =
|
||||
pragma: no cover
|
||||
def __repr__
|
||||
raise NotImplementedError
|
||||
if __name__ == .__main__.:
|
||||
if TYPE_CHECKING:
|
||||
\.\.\.
|
||||
|
||||
[html]
|
||||
directory = htmlcov
|
||||
4
.gitignore
vendored
4
.gitignore
vendored
@ -18,8 +18,4 @@ reports/
|
||||
.DS_Store
|
||||
Thumbs.db
|
||||
site/
|
||||
|
||||
# Coverage reports (Phase 3 von #134, ADR 0007)
|
||||
.coverage
|
||||
.coverage.*
|
||||
htmlcov/
|
||||
|
||||
@ -71,52 +71,6 @@ def load_context_file(name: str) -> str:
|
||||
return ""
|
||||
|
||||
|
||||
USER_PROMPT_TEMPLATE = """Analysiere den folgenden Antrag:
|
||||
|
||||
<kontext>
|
||||
{bundesland_context}
|
||||
</kontext>
|
||||
|
||||
<wahlprogramm_zitate>
|
||||
{quotes_context}
|
||||
</wahlprogramm_zitate>
|
||||
|
||||
<antrag>
|
||||
{text}
|
||||
</antrag>
|
||||
|
||||
**PFLICHT-FRAKTIONEN:** Du MUSST ALLE folgenden Fraktionen der aktuellen Wahlperiode in `wahlprogrammScores` bewerten — keine auslassen:
|
||||
{pflicht_fraktionen}
|
||||
|
||||
Bewerte nach GWÖ-Matrix 2.0 für Gemeinden:
|
||||
1. GWÖ-Treue (0-10) mit Matrix-Zuordnung und Symbolen (++/+/○/−/−−)
|
||||
2. Wahlprogrammtreue JEDER der oben genannten Pflicht-Fraktionen (0-10)
|
||||
3. Parteiprogrammtreue JEDER der oben genannten Pflicht-Fraktionen (0-10)
|
||||
4. Bis zu 3 Verbesserungsvorschläge in Redline-Syntax
|
||||
5. Themen-Tags für Kategorisierung
|
||||
|
||||
**ZITATEREGEL — STRIKT:** In jedem ``wahlprogrammScores[].wahlprogramm.zitate[].quelle``
|
||||
und ``parteiprogrammScores[].parteiprogramm.zitate[].quelle`` musst du **wortgleich**
|
||||
einen der oben in ``<wahlprogramm_zitate>`` aufgelisteten Quellen-Labels (Programm-Name +
|
||||
Seite) übernehmen — z.B. ``"CDU Mecklenburg-Vorpommern Wahlprogramm 2021, S. 33"``.
|
||||
Erfinde keine Quellen aus deinem Trainingswissen. Nimm keine Quelle aus einem anderen
|
||||
Bundesland (z.B. NRW 2022) als die hier aufgelisteten — selbst wenn dir die dortigen
|
||||
Programme bekannter sind. Findest du oben für eine Partei keinen passenden Chunk, lass
|
||||
``zitate`` leer (``[]``) und vermerke das in der ``begruendung``.
|
||||
|
||||
Ausgabe als reines JSON ohne Markdown-Codeblöcke."""
|
||||
|
||||
|
||||
def get_user_prompt_template() -> str:
|
||||
"""Public Template-String fuer Transparenz-Seite (#145).
|
||||
|
||||
Enthaelt die Platzhalter ``{bundesland_context}``, ``{quotes_context}``,
|
||||
``{text}`` und ``{pflicht_fraktionen}`` — gerendert wird in
|
||||
``analyze_text`` direkt via ``.format(...)``.
|
||||
"""
|
||||
return USER_PROMPT_TEMPLATE
|
||||
|
||||
|
||||
def get_system_prompt() -> str:
|
||||
"""Build the system prompt with GWÖ matrix context."""
|
||||
return """Du bist ein Experte für Gemeinwohl-Ökonomie (GWÖ) und parlamentarische Analyse. Du bewertest Anträge aus Landesparlamenten systematisch nach drei Dimensionen:
|
||||
@ -362,12 +316,40 @@ async def analyze_antrag(
|
||||
quotes = find_relevant_quotes(text, fraktionen, bundesland=bundesland)
|
||||
quotes_context = format_quote_for_prompt(quotes)
|
||||
|
||||
user_prompt = USER_PROMPT_TEMPLATE.format(
|
||||
bundesland_context=bundesland_context,
|
||||
quotes_context=quotes_context if quotes_context else "Keine relevanten Zitate gefunden.",
|
||||
text=text,
|
||||
pflicht_fraktionen=", ".join(BUNDESLAENDER[bundesland].landtagsfraktionen),
|
||||
)
|
||||
user_prompt = f"""Analysiere den folgenden Antrag:
|
||||
|
||||
<kontext>
|
||||
{bundesland_context}
|
||||
</kontext>
|
||||
|
||||
<wahlprogramm_zitate>
|
||||
{quotes_context if quotes_context else "Keine relevanten Zitate gefunden."}
|
||||
</wahlprogramm_zitate>
|
||||
|
||||
<antrag>
|
||||
{text}
|
||||
</antrag>
|
||||
|
||||
**PFLICHT-FRAKTIONEN:** Du MUSST ALLE folgenden Fraktionen der aktuellen Wahlperiode in `wahlprogrammScores` bewerten — keine auslassen:
|
||||
{', '.join(BUNDESLAENDER[bundesland].landtagsfraktionen)}
|
||||
|
||||
Bewerte nach GWÖ-Matrix 2.0 für Gemeinden:
|
||||
1. GWÖ-Treue (0-10) mit Matrix-Zuordnung und Symbolen (++/+/○/−/−−)
|
||||
2. Wahlprogrammtreue JEDER der oben genannten Pflicht-Fraktionen (0-10)
|
||||
3. Parteiprogrammtreue JEDER der oben genannten Pflicht-Fraktionen (0-10)
|
||||
4. Bis zu 3 Verbesserungsvorschläge in Redline-Syntax
|
||||
5. Themen-Tags für Kategorisierung
|
||||
|
||||
**ZITATEREGEL — STRIKT:** In jedem ``wahlprogrammScores[].wahlprogramm.zitate[].quelle``
|
||||
und ``parteiprogrammScores[].parteiprogramm.zitate[].quelle`` musst du **wortgleich**
|
||||
einen der oben in ``<wahlprogramm_zitate>`` aufgelisteten Quellen-Labels (Programm-Name +
|
||||
Seite) übernehmen — z.B. ``"CDU Mecklenburg-Vorpommern Wahlprogramm 2021, S. 33"``.
|
||||
Erfinde keine Quellen aus deinem Trainingswissen. Nimm keine Quelle aus einem anderen
|
||||
Bundesland (z.B. NRW 2022) als die hier aufgelisteten — selbst wenn dir die dortigen
|
||||
Programme bekannter sind. Findest du oben für eine Partei keinen passenden Chunk, lass
|
||||
``zitate`` leer (``[]``) und vermerke das in der ``begruendung``.
|
||||
|
||||
Ausgabe als reines JSON ohne Markdown-Codeblöcke."""
|
||||
|
||||
# LLM-Call über den Port. Retry-Loop + Markdown-Stripping wohnen im
|
||||
# Adapter (``QwenBewerter``). Bei exhausted retries wirft er
|
||||
|
||||
@ -61,9 +61,6 @@ class Settings(BaseSettings):
|
||||
gitea_api_url: str = "https://repo.toppyr.de/api/v1"
|
||||
gitea_repo_owner: str = "tobias"
|
||||
gitea_repo_name: str = "gwoe-antragspruefer"
|
||||
# Komma-getrennte Liste zusätzlicher Labels, die Feedback-Issues bekommen.
|
||||
# Auf Dev: "feedback,dev" — damit Issues aus gwoe-dev.toppyr.de unterscheidbar sind.
|
||||
gitea_feedback_labels: str = "feedback"
|
||||
|
||||
model_config = {"env_file": ".env", "env_file_encoding": "utf-8"}
|
||||
|
||||
|
||||
111
app/database.py
111
app/database.py
@ -259,32 +259,6 @@ async def init_db():
|
||||
)
|
||||
""")
|
||||
|
||||
# Fraktions-aggregierte Abstimmungsergebnisse aus Plenarprotokollen (#106).
|
||||
# Granularitaet: "GRUENE und SPD haben zugestimmt", nicht pro MP — das
|
||||
# ist der Datentyp, der aus deterministischen Parsern wie
|
||||
# app/protokoll_parsers/ rauskommt.
|
||||
# Compound-PK ueber quelle_protokoll, weil eine Drucksache mehrfach
|
||||
# abgestimmt werden kann (Ausschuss-Empfehlung + Plenum-Beschluss).
|
||||
await db.execute("""
|
||||
CREATE TABLE IF NOT EXISTS plenum_vote_results (
|
||||
bundesland TEXT NOT NULL,
|
||||
drucksache TEXT NOT NULL,
|
||||
ergebnis TEXT NOT NULL,
|
||||
einstimmig INTEGER NOT NULL DEFAULT 0,
|
||||
fraktionen_ja TEXT NOT NULL DEFAULT '[]',
|
||||
fraktionen_nein TEXT NOT NULL DEFAULT '[]',
|
||||
fraktionen_enthaltung TEXT NOT NULL DEFAULT '[]',
|
||||
quelle_protokoll TEXT NOT NULL,
|
||||
quelle_url TEXT,
|
||||
parsed_at TEXT NOT NULL DEFAULT (datetime('now')),
|
||||
PRIMARY KEY (bundesland, drucksache, quelle_protokoll)
|
||||
)
|
||||
""")
|
||||
await db.execute(
|
||||
"CREATE INDEX IF NOT EXISTS idx_pvr_bl_ds "
|
||||
"ON plenum_vote_results(bundesland, drucksache)"
|
||||
)
|
||||
|
||||
await db.commit()
|
||||
|
||||
|
||||
@ -1207,88 +1181,3 @@ async def get_monitoring_new_today(scan_date: str) -> list[dict]:
|
||||
pass
|
||||
result.append(d)
|
||||
return result
|
||||
|
||||
|
||||
# ─── Plenum-Vote-Results (#106) ─────────────────────────────────────────────
|
||||
# Fraktions-aggregierte Abstimmungsergebnisse aus Plenarprotokollen.
|
||||
# Quelle: app/protokoll_parsers/ (NRW). BL-uebergreifender Parser ist #126.
|
||||
|
||||
async def upsert_plenum_vote(
|
||||
*,
|
||||
bundesland: str,
|
||||
drucksache: str,
|
||||
ergebnis: str,
|
||||
einstimmig: bool,
|
||||
fraktionen_ja: list[str],
|
||||
fraktionen_nein: list[str],
|
||||
fraktionen_enthaltung: list[str],
|
||||
quelle_protokoll: str,
|
||||
quelle_url: Optional[str] = None,
|
||||
) -> None:
|
||||
"""Schreibt ein Abstimmungsergebnis aus einem Plenarprotokoll.
|
||||
|
||||
Idempotent ueber den Compound-PK (bundesland, drucksache, quelle_protokoll):
|
||||
derselbe Eintrag aus demselben Protokoll wird upgesertet, mehrfach-Voten
|
||||
derselben Drucksache aus verschiedenen Protokollen behalten beide Eintraege.
|
||||
"""
|
||||
import json as _json
|
||||
async with aiosqlite.connect(settings.db_path) as db:
|
||||
await db.execute(
|
||||
"""
|
||||
INSERT INTO plenum_vote_results
|
||||
(bundesland, drucksache, ergebnis, einstimmig,
|
||||
fraktionen_ja, fraktionen_nein, fraktionen_enthaltung,
|
||||
quelle_protokoll, quelle_url)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
ON CONFLICT(bundesland, drucksache, quelle_protokoll) DO UPDATE SET
|
||||
ergebnis = excluded.ergebnis,
|
||||
einstimmig = excluded.einstimmig,
|
||||
fraktionen_ja = excluded.fraktionen_ja,
|
||||
fraktionen_nein = excluded.fraktionen_nein,
|
||||
fraktionen_enthaltung = excluded.fraktionen_enthaltung,
|
||||
quelle_url = excluded.quelle_url,
|
||||
parsed_at = datetime('now')
|
||||
""",
|
||||
(
|
||||
bundesland,
|
||||
drucksache,
|
||||
ergebnis,
|
||||
1 if einstimmig else 0,
|
||||
_json.dumps(fraktionen_ja, ensure_ascii=False),
|
||||
_json.dumps(fraktionen_nein, ensure_ascii=False),
|
||||
_json.dumps(fraktionen_enthaltung, ensure_ascii=False),
|
||||
quelle_protokoll,
|
||||
quelle_url,
|
||||
),
|
||||
)
|
||||
await db.commit()
|
||||
|
||||
|
||||
async def get_plenum_votes(bundesland: str, drucksache: str) -> list[dict]:
|
||||
"""Alle Plenarprotokoll-Abstimmungen fuer eine Drucksache, neueste zuerst.
|
||||
|
||||
Eine Drucksache kann mehrfach abgestimmt werden (z.B. Ueberweisung +
|
||||
finale Beschlussfassung), deshalb Liste statt Single.
|
||||
"""
|
||||
import json as _json
|
||||
async with aiosqlite.connect(settings.db_path) as db:
|
||||
db.row_factory = aiosqlite.Row
|
||||
rows = await db.execute(
|
||||
"""
|
||||
SELECT * FROM plenum_vote_results
|
||||
WHERE bundesland = ? AND drucksache = ?
|
||||
ORDER BY parsed_at DESC
|
||||
""",
|
||||
(bundesland, drucksache),
|
||||
)
|
||||
out = []
|
||||
for r in await rows.fetchall():
|
||||
d = dict(r)
|
||||
d["einstimmig"] = bool(d.get("einstimmig"))
|
||||
for key in ("fraktionen_ja", "fraktionen_nein", "fraktionen_enthaltung"):
|
||||
try:
|
||||
d[key] = _json.loads(d.get(key) or "[]")
|
||||
except Exception:
|
||||
d[key] = []
|
||||
out.append(d)
|
||||
return out
|
||||
|
||||
@ -1,170 +0,0 @@
|
||||
"""BL-uebergreifende Ingest-CLI fuer Plenarprotokolle (#106 / #126).
|
||||
|
||||
Pipeline:
|
||||
1. PDF laden (Pfad oder URL)
|
||||
2. ``protokoll_parsers.parse_protocol(bundesland, pdf_path)`` waehlt den
|
||||
BL-spezifischen Parser aus der Registry
|
||||
3. ``upsert_plenum_vote()`` schreibt jede Abstimmung in die DB
|
||||
|
||||
CLI:
|
||||
python -m app.ingest_votes --pdf MMP18-119.pdf
|
||||
python -m app.ingest_votes --url https://landtag.nrw.de/.../MMP18-119.pdf
|
||||
python -m app.ingest_votes --pdf x.pdf --bundesland NRW --protokoll-id MMP18-119
|
||||
python -m app.ingest_votes --supported # Liste der BL mit Parser
|
||||
|
||||
Aktuell registriert: NRW. Folge-BL via app/protokoll_parsers/<bl>.py + Eintrag
|
||||
in PROTOKOLL_PARSERS — siehe ADR 0009.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
import logging
|
||||
import sys
|
||||
import tempfile
|
||||
import urllib.request
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from .protokoll_parsers import parse_protocol, supported_bundeslaender
|
||||
from .database import upsert_plenum_vote
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _derive_protokoll_id(pdf_path: Path) -> str:
|
||||
"""Ermittle Protokoll-ID aus dem Datei-Stem (z.B. 'MMP18-119.pdf' → 'MMP18-119')."""
|
||||
return pdf_path.stem
|
||||
|
||||
|
||||
def _download_pdf(url: str, dest: Path) -> Path:
|
||||
"""Lade ein PDF von einer URL in einen Pfad. Wirft bei HTTP-Fehlern."""
|
||||
req = urllib.request.Request(
|
||||
url,
|
||||
headers={"User-Agent": "GWOeAntragspruefer/1.0 (+https://gwoe.toppyr.de)"},
|
||||
)
|
||||
with urllib.request.urlopen(req, timeout=60) as resp:
|
||||
dest.write_bytes(resp.read())
|
||||
return dest
|
||||
|
||||
|
||||
async def ingest_pdf(
|
||||
pdf_path: Path,
|
||||
*,
|
||||
bundesland: str = "NRW",
|
||||
protokoll_id: Optional[str] = None,
|
||||
quelle_url: Optional[str] = None,
|
||||
) -> dict:
|
||||
"""Parse das PDF mit dem BL-Parser und schreibe alle Abstimmungen in die DB.
|
||||
|
||||
Returns:
|
||||
Statistik-Dict ``{parsed, written, skipped_no_drucksache, errors,
|
||||
protokoll_id, bundesland}``.
|
||||
|
||||
Raises:
|
||||
NotImplementedError: wenn fuer ``bundesland`` kein Parser registriert ist.
|
||||
"""
|
||||
pid = protokoll_id or _derive_protokoll_id(pdf_path)
|
||||
parsed = parse_protocol(bundesland, str(pdf_path))
|
||||
|
||||
written = 0
|
||||
skipped_no_ds = 0
|
||||
errors: list[str] = []
|
||||
|
||||
for entry in parsed:
|
||||
ds = entry.get("drucksache")
|
||||
if not ds:
|
||||
skipped_no_ds += 1
|
||||
continue
|
||||
try:
|
||||
await upsert_plenum_vote(
|
||||
bundesland=bundesland,
|
||||
drucksache=ds,
|
||||
ergebnis=entry["ergebnis"],
|
||||
einstimmig=bool(entry.get("einstimmig", False)),
|
||||
fraktionen_ja=entry.get("votes", {}).get("ja", []),
|
||||
fraktionen_nein=entry.get("votes", {}).get("nein", []),
|
||||
fraktionen_enthaltung=entry.get("votes", {}).get("enthaltung", []),
|
||||
quelle_protokoll=pid,
|
||||
quelle_url=quelle_url,
|
||||
)
|
||||
written += 1
|
||||
except Exception as exc:
|
||||
logger.exception("Upsert fehlgeschlagen fuer %s", ds)
|
||||
errors.append(f"{ds}: {exc}")
|
||||
|
||||
return {
|
||||
"parsed": len(parsed),
|
||||
"written": written,
|
||||
"skipped_no_drucksache": skipped_no_ds,
|
||||
"errors": errors,
|
||||
"protokoll_id": pid,
|
||||
"bundesland": bundesland,
|
||||
}
|
||||
|
||||
|
||||
def _cli() -> None:
|
||||
logging.basicConfig(level=logging.INFO, format="%(levelname)s %(message)s")
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Plenarprotokoll → plenum_vote_results (#106 / #126)",
|
||||
)
|
||||
src = parser.add_mutually_exclusive_group(required=False)
|
||||
src.add_argument("--pdf", help="Pfad zu lokalem PDF")
|
||||
src.add_argument("--url", help="HTTP(S)-URL zum PDF")
|
||||
parser.add_argument("--bundesland", default="NRW",
|
||||
help="Bundesland-Code (default: NRW)")
|
||||
parser.add_argument("--protokoll-id",
|
||||
help="Protokoll-ID (default: aus Datei-Stem)")
|
||||
parser.add_argument("--supported", action="store_true",
|
||||
help="Liste alle BL-Codes mit registriertem Parser")
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.supported:
|
||||
for bl in supported_bundeslaender():
|
||||
print(bl)
|
||||
sys.exit(0)
|
||||
|
||||
if not args.pdf and not args.url:
|
||||
parser.error("--pdf oder --url ist erforderlich")
|
||||
|
||||
if args.url:
|
||||
# Download in tmp und nach dem Run wieder loeschen
|
||||
with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as tmp:
|
||||
tmp_path = Path(tmp.name)
|
||||
try:
|
||||
print(f"Lade {args.url} → {tmp_path} …")
|
||||
_download_pdf(args.url, tmp_path)
|
||||
pid = args.protokoll_id or args.url.rsplit("/", 1)[-1].rsplit(".", 1)[0]
|
||||
stats = asyncio.run(ingest_pdf(
|
||||
tmp_path, bundesland=args.bundesland,
|
||||
protokoll_id=pid, quelle_url=args.url,
|
||||
))
|
||||
finally:
|
||||
tmp_path.unlink(missing_ok=True)
|
||||
else:
|
||||
pdf_path = Path(args.pdf)
|
||||
if not pdf_path.exists():
|
||||
print(f"FEHLER: PDF nicht gefunden: {pdf_path}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
stats = asyncio.run(ingest_pdf(
|
||||
pdf_path, bundesland=args.bundesland,
|
||||
protokoll_id=args.protokoll_id,
|
||||
))
|
||||
|
||||
print()
|
||||
print(f"Protokoll {stats['protokoll_id']} ({stats['bundesland']})")
|
||||
print(f" parsed: {stats['parsed']}")
|
||||
print(f" written: {stats['written']}")
|
||||
if stats["skipped_no_drucksache"]:
|
||||
print(f" ohne DS: {stats['skipped_no_drucksache']}")
|
||||
if stats["errors"]:
|
||||
print(f" errors: {len(stats['errors'])}")
|
||||
for e in stats["errors"][:5]:
|
||||
print(f" {e}")
|
||||
if stats["written"] == 0 and not stats["errors"]:
|
||||
sys.exit(2)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
_cli()
|
||||
22
app/main.py
22
app/main.py
@ -304,13 +304,6 @@ async def antrag_detail(request: Request, drucksache: str, current_user: Optiona
|
||||
except Exception:
|
||||
logger.exception("Fehler beim Laden von Abstimmungsverhalten für %s", drucksache)
|
||||
antrag["abstimmungsverhalten"] = None
|
||||
# #106 Phase 2: fraktions-aggregierte Plenum-Abstimmungen aus Plenarprotokollen
|
||||
try:
|
||||
from .database import get_plenum_votes as _gpv
|
||||
antrag["plenum_votes"] = await _gpv(antrag.get("bundesland") or "NRW", drucksache)
|
||||
except Exception:
|
||||
logger.exception("Fehler beim Laden plenum_vote_results für %s", drucksache)
|
||||
antrag["plenum_votes"] = []
|
||||
from .models import MATRIX_LABELS
|
||||
return templates.TemplateResponse("v2/screens/antrag_detail.html", {
|
||||
"request": request,
|
||||
@ -1742,7 +1735,6 @@ async def methodik_page(request: Request, current_user: Optional[dict] = Depends
|
||||
"""Transparenz-/Methodik-Seite (#96)."""
|
||||
from .bundeslaender import aktive_bundeslaender, BUNDESLAENDER
|
||||
from .embeddings import get_indexing_status
|
||||
from .analyzer import get_system_prompt, get_user_prompt_template
|
||||
|
||||
bl_list = []
|
||||
for bl in aktive_bundeslaender():
|
||||
@ -1763,8 +1755,6 @@ async def methodik_page(request: Request, current_user: Optional[dict] = Depends
|
||||
"programme_count": status.get("total", 0),
|
||||
"chunk_count": sum(p.get("chunks", 0) for p in status.get("programmes", [])),
|
||||
"bundeslaender": sorted(bl_list, key=lambda x: x["name"]),
|
||||
"system_prompt": get_system_prompt(),
|
||||
"user_prompt_template": get_user_prompt_template(),
|
||||
**_v2_template_context(current_user),
|
||||
})
|
||||
|
||||
@ -2742,15 +2732,9 @@ async def submit_feedback(
|
||||
headers = {"Authorization": f"token {token}", "Content-Type": "application/json"}
|
||||
|
||||
async with httpx.AsyncClient(timeout=15.0) as session:
|
||||
# Konfigurierbare Label-Liste (Default "feedback"; Dev setzt "feedback,dev")
|
||||
label_names = [s.strip() for s in (settings.gitea_feedback_labels or "feedback").split(",") if s.strip()]
|
||||
label_color_map = {"feedback": "#e11d48", "dev": "#f59e0b"}
|
||||
label_ids: list[int] = []
|
||||
for name in label_names:
|
||||
color = label_color_map.get(name, "#6b7280")
|
||||
lid = await _gitea_ensure_label(session, base_url, owner, repo, token, name, color)
|
||||
if lid:
|
||||
label_ids.append(lid)
|
||||
# Label sicherstellen
|
||||
label_id = await _gitea_ensure_label(session, base_url, owner, repo, token, "feedback")
|
||||
label_ids = [label_id] if label_id else []
|
||||
|
||||
# Issue anlegen
|
||||
payload = {
|
||||
|
||||
@ -3206,10 +3206,7 @@ class SaarlandAdapter(ParlamentAdapter):
|
||||
)
|
||||
if resp.status_code != 200:
|
||||
logger.error("SL HTTP %s: %s", resp.status_code, resp.text[:200])
|
||||
raise httpx.HTTPStatusError(
|
||||
f"SL HTTP {resp.status_code}",
|
||||
request=resp.request, response=resp,
|
||||
)
|
||||
return []
|
||||
data = resp.json()
|
||||
return data.get("FilteredResult", []) or []
|
||||
except Exception:
|
||||
|
||||
@ -1,69 +0,0 @@
|
||||
"""BL-uebergreifende Plenarprotokoll-Abstimmungsparser (#126).
|
||||
|
||||
Architektur (vgl. ADR 0009): pro Bundesland eine Modul-Datei
|
||||
``app/protokoll_parsers/<bl-code>.py``, die mindestens eine Funktion
|
||||
``parse_protocol(pdf_path: str) -> list[dict]`` exportiert. Die Registry
|
||||
``PROTOKOLL_PARSERS`` mappt BL-Code → Parser-Funktion.
|
||||
|
||||
Erwartetes Result-Schema pro Eintrag in der Liste::
|
||||
|
||||
{
|
||||
"drucksache": str | None, # z.B. "18/1234"; None bei nicht aufloesbar
|
||||
"ergebnis": str, # angenommen | abgelehnt | ueberwiesen | ...
|
||||
"einstimmig": bool, # explizit als einstimmig markiert
|
||||
"kind": str, # parser-intern, fuer Debug
|
||||
"votes": { # fraktions-Listen pro Vote-Kategorie
|
||||
"ja": list[str],
|
||||
"nein": list[str],
|
||||
"enthaltung": list[str],
|
||||
},
|
||||
}
|
||||
|
||||
NRW ist die Referenz-Implementierung. Folge-BL (HE/BB/MV/BE/...) bekommen
|
||||
eigene Module mit demselben Funktions-Vertrag — neue Eintraege in der
|
||||
Registry sind reine Tippelarbeit, das Reverse-Engineering pro Landtag
|
||||
ist die eigentliche Arbeit.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Callable
|
||||
|
||||
from .nrw import parse_protocol as _parse_nrw
|
||||
|
||||
# Typ-Alias fuer Lesbarkeit; Parser-Signatur ist bewusst minimal.
|
||||
ProtokollParser = Callable[[str], list[dict]]
|
||||
|
||||
PROTOKOLL_PARSERS: dict[str, ProtokollParser] = {
|
||||
"NRW": _parse_nrw,
|
||||
}
|
||||
|
||||
|
||||
def parse_protocol(bundesland: str, pdf_path: str) -> list[dict]:
|
||||
"""BL-uebergreifender Einstieg. Sucht den Parser in der Registry.
|
||||
|
||||
Raises:
|
||||
NotImplementedError: wenn fuer das Bundesland (noch) kein Parser
|
||||
registriert ist. Folge-Issue: BL-Adapter ergaenzen mit einem
|
||||
eigenen Modul plus Eintrag hier.
|
||||
"""
|
||||
parser = PROTOKOLL_PARSERS.get(bundesland)
|
||||
if parser is None:
|
||||
supported = ", ".join(sorted(PROTOKOLL_PARSERS)) or "(keine)"
|
||||
raise NotImplementedError(
|
||||
f"Kein Plenarprotokoll-Parser fuer {bundesland!r}. "
|
||||
f"Unterstuetzt: {supported}. Siehe #126."
|
||||
)
|
||||
return parser(pdf_path)
|
||||
|
||||
|
||||
def supported_bundeslaender() -> list[str]:
|
||||
"""Liste der BL-Codes mit registrierten Parsern."""
|
||||
return sorted(PROTOKOLL_PARSERS)
|
||||
|
||||
|
||||
__all__ = [
|
||||
"ProtokollParser",
|
||||
"PROTOKOLL_PARSERS",
|
||||
"parse_protocol",
|
||||
"supported_bundeslaender",
|
||||
]
|
||||
@ -1,348 +0,0 @@
|
||||
"""NRW-Plenarprotokoll Abstimmungs-Parser v5 (deterministisch, anchor-basiert).
|
||||
|
||||
Neue Architektur: Statt pro Drucksache zu suchen, findet der Parser zuerst
|
||||
alle **Result-Anchors** im Volltext ("Damit ist ... angenommen/abgelehnt/...")
|
||||
und extrahiert pro Anchor rückwärts:
|
||||
1. die zugehörige Drucksache (nächste 18/XXXXX davor, innerhalb ~500 chars)
|
||||
2. den Vote-Block (letztes "Wer stimmt ... zu?" vor dem Anchor)
|
||||
|
||||
Fixture-basierte Tests. Ziel: 18/19 (17824 ist bewusst nicht_gesondert).
|
||||
|
||||
Migriert nach app/ aus dem POC-Skript parser_v5_iteration15.py
|
||||
(2026-04-28, #134/#106). Fitz-Import ist optional — pure-string-Funktionen
|
||||
laufen ohne, parse_protocol() braucht das echte fitz.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
import json
|
||||
import sys
|
||||
|
||||
try: # fitz ist optional — pure-string-Funktionen laufen ohne
|
||||
import fitz
|
||||
except ImportError:
|
||||
fitz = None
|
||||
|
||||
FRAKTIONEN_MAP = [
|
||||
("Bündnis 90/Die Grünen", "GRÜNE"),
|
||||
("Bündnis 90", "GRÜNE"),
|
||||
("Grünen", "GRÜNE"),
|
||||
("GRÜNE", "GRÜNE"),
|
||||
("F.D.P.", "FDP"),
|
||||
("FDP", "FDP"),
|
||||
("CDU", "CDU"),
|
||||
("SPD", "SPD"),
|
||||
("AfD", "AfD"),
|
||||
("LINKE", "LINKE"),
|
||||
("BSW", "BSW"),
|
||||
("Landesregierung", "Landesregierung"),
|
||||
]
|
||||
|
||||
ALLE_FRAKTIONEN_NRW = ["CDU", "SPD", "GRÜNE", "FDP", "AfD"]
|
||||
|
||||
|
||||
def normalize_fraktionen(txt):
|
||||
"""Extrahiere Fraktions-Tokens aus einem Text-Abschnitt."""
|
||||
found = set()
|
||||
# Reihenfolge: längere zuerst (damit "Bündnis 90/Die Grünen" vor "Grünen" matcht)
|
||||
remaining = txt
|
||||
for key, val in FRAKTIONEN_MAP:
|
||||
if key in remaining:
|
||||
found.add(val)
|
||||
remaining = remaining.replace(key, "") # Doppel-Match vermeiden
|
||||
return sorted(found)
|
||||
|
||||
|
||||
def _is_empty_phrase(txt):
|
||||
"""Prüft ob der Text eine Negation ausdrückt (niemand, nicht, keine)."""
|
||||
neg = ["niemand", "Niemand", "Keine", "keine", "nicht der Fall",
|
||||
"Auch nicht", "ist nicht", "ist auch nicht", "nicht vor"]
|
||||
return any(n in txt for n in neg)
|
||||
|
||||
|
||||
def _parse_vote_block(block: str) -> dict:
|
||||
"""Extrahiere ja/nein/enthaltung aus dem Text-Block vor einem Result-Anchor.
|
||||
|
||||
Vereinfachter Ansatz: matche bis zum nächsten '?' oder 200 chars.
|
||||
"""
|
||||
votes = {"ja": [], "nein": [], "enthaltung": []}
|
||||
|
||||
# JA — letztes Match gewinnt (bei Re-Votes)
|
||||
ja_matches = list(re.finditer(
|
||||
r"Wer stimmt(?! dagegen)[^?]{0,80}zu\?\s*[–-]?\s*([^?]{1,250})",
|
||||
block
|
||||
))
|
||||
if ja_matches:
|
||||
g = ja_matches[-1].group(1)
|
||||
if not _is_empty_phrase(g):
|
||||
votes["ja"] = normalize_fraktionen(g)
|
||||
|
||||
# NEIN
|
||||
nein_patterns = [
|
||||
r"Wer stimmt dagegen\?\s*[–-]?\s*([^?]{1,200})",
|
||||
r"Wer lehnt[^?]{0,30}ab\?\s*[–-]?\s*([^?]{1,200})",
|
||||
r"Stimmt jemand dagegen\?\s*[–-]?\s*([^?]{1,120})",
|
||||
r"Ist jemand dagegen\?\s*[–-]?\s*([^?]{1,120})",
|
||||
]
|
||||
for pat in nein_patterns:
|
||||
matches = list(re.finditer(pat, block))
|
||||
if matches:
|
||||
g = matches[-1].group(1)
|
||||
votes["nein"] = [] if _is_empty_phrase(g) else normalize_fraktionen(g)
|
||||
break
|
||||
|
||||
# ENTHALTUNG
|
||||
enth_patterns = [
|
||||
r"Wer enthält sich\?\s*[–-]?\s*([^?]{1,200})",
|
||||
r"Gibt es Enthaltungen\?\s*[–-]?\s*([^?]{1,200})",
|
||||
r"Enthält sich jemand\?\s*[–-]?\s*([^?]{1,120})",
|
||||
r"Möchte sich jemand enthalten\?\s*[–-]?\s*([^?]{1,120})",
|
||||
]
|
||||
for pat in enth_patterns:
|
||||
matches = list(re.finditer(pat, block))
|
||||
if matches:
|
||||
g = matches[-1].group(1)
|
||||
votes["enthaltung"] = [] if _is_empty_phrase(g) else normalize_fraktionen(g)
|
||||
break
|
||||
|
||||
# Implizite leere Enthaltungen: "Enthaltungen gibt es damit nicht"
|
||||
if not votes["enthaltung"] and re.search(r"Enthaltungen\s+gibt\s+es\s+damit\s+nicht", block):
|
||||
votes["enthaltung"] = []
|
||||
|
||||
return votes
|
||||
|
||||
|
||||
# Result-Anchors: Pattern → (ergebnis, is_ueberweisung)
|
||||
# v6: Broad-Anchor-Matches für alle direkten Varianten.
|
||||
# Type 'direct_broad': matcht "Damit/Somit ist der/dieser/die Antrag/Gesetzentwurf/...
|
||||
# ... angenommen/abgelehnt/überwiesen/verabschiedet" — Drucksache wird
|
||||
# separat aus dem Match-Span extrahiert (oder aus dem vorangehenden Segment).
|
||||
RESULT_ANCHORS = [
|
||||
# Broad direct-result pattern (deckt fast alle Varianten ab).
|
||||
# "beschlossen" = bei direkter Abstimmung eines Antrags = angenommen
|
||||
(r"(?:Damit|Somit) ist (?:der|dieser|die|diese) (?:Antrag|Gesetzentwurf|Änderungsantrag|Wahlvorschlag|Entschließungsantrag|Beschlussempfehlung)[^.]{0,200}?(angenommen|abgelehnt|überwiesen|zurückgezogen|verabschiedet|beschlossen)", "direct_broad"),
|
||||
# Variante ohne führendes "Damit/Somit ist": "Dieser Antrag Drucksache X ist somit ... abgelehnt"
|
||||
(r"Dieser (?:Antrag|Gesetzentwurf|Änderungsantrag|Wahlvorschlag)[^.]{0,200}?(angenommen|abgelehnt|überwiesen|zurückgezogen|verabschiedet|beschlossen)", "direct_broad"),
|
||||
# Überweisungs-Anchor (Drucksache muss rückwärts gesucht werden)
|
||||
(r"(?:Damit|Somit) ist (?:diese|die)\s+Überweisungsempfehlung\s+(einstimmig\s+|ebenso\s+)?(angenommen)", "ueber"),
|
||||
(r"Somit ist das so beschlossen()()", "ueber"),
|
||||
(r"Damit ist das so beschlossen()()", "ueber"),
|
||||
# "Damit schließt sich der Landtag der Empfehlung des Rechtsausschusses an" — Empfehlung-Beitritt
|
||||
(r"Damit schließt sich der Landtag der Empfehlung[^.]{0,100}?an()()", "ueber"),
|
||||
# Petitionsausschuss-Sammel-Abstimmung
|
||||
(r"Damit sind die Beschlüsse des Petitionsausschusses[^.]{0,100}?bestätigt()()", "petition"),
|
||||
# Übersicht-Bestätigung (§ 82 Abs. 2 GO)
|
||||
(r"Damit sind die in Drucksache (\d+/\d+(?:\(neu\))?) enthaltenen[^.]{0,150}?bestätigt()", "uebersicht"),
|
||||
]
|
||||
|
||||
|
||||
def find_results(text: str) -> list[dict]:
|
||||
"""Finde alle Result-Anchors im Text.
|
||||
|
||||
Returns: Liste von {drucksache, ergebnis, anchor_start, anchor_end, kind, einstimmig}.
|
||||
"""
|
||||
results = []
|
||||
for pat, kind in RESULT_ANCHORS:
|
||||
for m in re.finditer(pat, text):
|
||||
groups = m.groups()
|
||||
ds = None
|
||||
einstimmig = False
|
||||
span_text = text[m.start():m.end()]
|
||||
|
||||
# Für "direct" kind: erste DS-artige Group ist die Drucksache
|
||||
if kind == "direct":
|
||||
for g in groups:
|
||||
if g and re.match(r"^\d+/\d+(?:\(neu\))?$", g):
|
||||
ds = g
|
||||
break
|
||||
# Für "direct_broad": Drucksache innerhalb des Match-Spans suchen
|
||||
elif kind == "direct_broad":
|
||||
ds_match = re.search(r"Drucksache\s+(\d+/\d+(?:\(neu\))?)", span_text)
|
||||
if ds_match:
|
||||
ds = ds_match.group(1)
|
||||
# Ergebnis: suche bekanntes Wort in allen Groups
|
||||
ergebnis = None
|
||||
for g in groups:
|
||||
if g and g.strip() == "einstimmig":
|
||||
einstimmig = True
|
||||
if g and g.strip() in ("angenommen", "abgelehnt", "überwiesen", "zurückgezogen", "verabschiedet", "beschlossen"):
|
||||
ergebnis = g.strip()
|
||||
# "verabschiedet" = angenommen und verabschiedet (Gesetzentwurf)
|
||||
# "beschlossen" (bei direkter Abstimmung) = angenommen
|
||||
if ergebnis in ("verabschiedet", "beschlossen"):
|
||||
ergebnis = "angenommen"
|
||||
if kind == "ueber":
|
||||
ergebnis = "überwiesen"
|
||||
if "einstimmig" in text[m.start():m.end() + 5]:
|
||||
einstimmig = True
|
||||
# "Damit ist das so beschlossen" / "Somit ist das so beschlossen" = implizit einstimmig
|
||||
if "so beschlossen" in text[m.start():m.end() + 5]:
|
||||
einstimmig = True
|
||||
if kind == "petition":
|
||||
ergebnis = "sammel"
|
||||
einstimmig = True
|
||||
if kind == "uebersicht":
|
||||
ergebnis = "bestätigt"
|
||||
einstimmig = True
|
||||
# Drucksache ist in Group[0] des Patterns
|
||||
for g in groups:
|
||||
if g and re.match(r"^\d+/\d+(?:\(neu\))?$", g):
|
||||
ds = g
|
||||
break
|
||||
if not ergebnis:
|
||||
continue
|
||||
results.append({
|
||||
"drucksache": ds,
|
||||
"ergebnis": ergebnis,
|
||||
"kind": kind,
|
||||
"einstimmig": einstimmig,
|
||||
"anchor_start": m.start(),
|
||||
"anchor_end": m.end(),
|
||||
})
|
||||
results.sort(key=lambda r: r["anchor_start"])
|
||||
dedup = []
|
||||
seen_positions = set()
|
||||
for r in results:
|
||||
if r["anchor_start"] in seen_positions:
|
||||
continue
|
||||
seen_positions.add(r["anchor_start"])
|
||||
dedup.append(r)
|
||||
return dedup
|
||||
|
||||
|
||||
def resolve_drucksache_for_ueber(text: str, anchor_start: int) -> str | None:
|
||||
"""Für Überweisungs-Anchors: rückwärts die nächste Drucksache-Nr suchen."""
|
||||
# Schaue bis 2000 chars zurück
|
||||
window_start = max(0, anchor_start - 2000)
|
||||
window = text[window_start:anchor_start]
|
||||
# Letzte Drucksache vor dem Anchor
|
||||
matches = list(re.finditer(r"Drucksache\s+(\d+/\d+(?:\(neu\))?)", window))
|
||||
if not matches:
|
||||
return None
|
||||
return matches[-1].group(1)
|
||||
|
||||
|
||||
def normalize_text(text: str) -> str:
|
||||
"""Normalisiere PDF-Text: Worttrennungen (-\n) auflösen, Zeilenumbrüche zu Spaces."""
|
||||
# Worttrennung am Zeilenende: "Überweisungs-\nempfehlung" → "Überweisungsempfehlung"
|
||||
text = re.sub(r"-\s*\n\s*", "", text)
|
||||
# Alle restlichen Zeilenumbrüche zu Spaces
|
||||
text = re.sub(r"\s+", " ", text)
|
||||
return text
|
||||
|
||||
|
||||
def parse_protocol(pdf_path: str) -> list[dict]:
|
||||
doc = fitz.open(pdf_path)
|
||||
full = "".join(page.get_text() for page in doc)
|
||||
doc.close()
|
||||
full = normalize_text(full)
|
||||
|
||||
anchors = find_results(full)
|
||||
parsed = []
|
||||
|
||||
# Segment-Boundaries: jede Abstimmung beginnt mit einer dieser Phrasen
|
||||
segment_starts = [m.start() for m in re.finditer(
|
||||
r"(?:(?:Damit|Somit) kommen wir (?:zur|somit zur) Abstimmung|Wir kommen (?:somit )?zur Abstimmung|Wir stimmen(?!\s+zu\?)|(?:Somit|Damit) kommen wir (?:direkt )?zu den Abstimmungen|Wir stimmen zweitens|gehen (?:wir )?zur Abstimmung über|Somit kommen wir sofort zur Abstimmung)",
|
||||
full
|
||||
)]
|
||||
|
||||
def segment_start_for(anchor_pos: int) -> int:
|
||||
"""Letzte Segment-Grenze vor dem Anchor."""
|
||||
candidates = [s for s in segment_starts if s < anchor_pos]
|
||||
return candidates[-1] if candidates else max(0, anchor_pos - 1500)
|
||||
|
||||
for a in anchors:
|
||||
ds = a["drucksache"]
|
||||
if not ds:
|
||||
ds = resolve_drucksache_for_ueber(full, a["anchor_start"])
|
||||
if not ds:
|
||||
continue
|
||||
|
||||
# Vote-Block: vom letzten Segment-Start bis zum Anchor
|
||||
block_start = segment_start_for(a["anchor_start"])
|
||||
block = full[block_start:a["anchor_end"]]
|
||||
|
||||
# Einstimmig: immer alle ja, unabhängig davon was das Fenster sagt
|
||||
if a["einstimmig"]:
|
||||
votes = {"ja": list(ALLE_FRAKTIONEN_NRW), "nein": [], "enthaltung": []}
|
||||
else:
|
||||
votes = _parse_vote_block(block)
|
||||
# Fallback-Einstimmig: wenn ein Überweisungs-Anchor keinen eigenen
|
||||
# "Wer stimmt ... zu?"-Block hat (stattdessen nur inverse Form
|
||||
# "Wer stimmt gegen ...?"), ist das in der Praxis einstimmig.
|
||||
if a["kind"] == "ueber" and not votes["ja"] and not votes["nein"] and not votes["enthaltung"]:
|
||||
votes = {"ja": list(ALLE_FRAKTIONEN_NRW), "nein": [], "enthaltung": []}
|
||||
|
||||
parsed.append({
|
||||
"drucksache": ds,
|
||||
"ergebnis": a["ergebnis"],
|
||||
"votes": votes,
|
||||
"anchor_pos": a["anchor_start"],
|
||||
})
|
||||
|
||||
return parsed
|
||||
|
||||
|
||||
def compare_to_fixture(parsed: list[dict], fixture: dict) -> tuple[int, list]:
|
||||
"""Vergleiche Parser-Output gegen Ground-Truth-Fixture."""
|
||||
parsed_map = {}
|
||||
for p in parsed:
|
||||
parsed_map.setdefault(p["drucksache"], []).append(p)
|
||||
|
||||
errors = []
|
||||
matches = 0
|
||||
for gt in fixture["drucksachen"]:
|
||||
ds = gt["drucksache"]
|
||||
gt_erg = gt["ergebnis"]
|
||||
if ds not in parsed_map:
|
||||
if gt_erg == "nicht_gesondert_abgestimmt":
|
||||
# Korrekt NICHT gefunden
|
||||
matches += 1
|
||||
continue
|
||||
errors.append(f"{ds}: NOT FOUND")
|
||||
continue
|
||||
if gt_erg == "nicht_gesondert_abgestimmt":
|
||||
errors.append(f"{ds}: expected nicht_gesondert, but parser found it")
|
||||
continue
|
||||
# Pick the one closest to expected — if multiple, take the first
|
||||
candidates = parsed_map[ds]
|
||||
p = candidates[0]
|
||||
|
||||
gt_erg = gt["ergebnis"]
|
||||
if gt_erg == "nicht_gesondert_abgestimmt":
|
||||
# Erwartetes Verhalten: Parser sollte es NICHT finden
|
||||
continue
|
||||
|
||||
ok = True
|
||||
if p["ergebnis"] != gt_erg:
|
||||
errors.append(f"{ds}: ergebnis {p['ergebnis']} != {gt_erg}")
|
||||
ok = False
|
||||
if sorted(p["votes"]["ja"]) != sorted(gt["ja"]):
|
||||
errors.append(f"{ds}: ja {p['votes']['ja']} != {gt['ja']}")
|
||||
ok = False
|
||||
if sorted(p["votes"]["nein"]) != sorted(gt["nein"]):
|
||||
errors.append(f"{ds}: nein {p['votes']['nein']} != {gt['nein']}")
|
||||
ok = False
|
||||
if sorted(p["votes"]["enthaltung"]) != sorted(gt["enthaltung"]):
|
||||
errors.append(f"{ds}: enth {p['votes']['enthaltung']} != {gt['enthaltung']}")
|
||||
ok = False
|
||||
if ok:
|
||||
matches += 1
|
||||
return matches, errors
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
pdf = "/tmp/mmp18-119.pdf"
|
||||
fixture_path = "/tmp/nrw_fixture.json"
|
||||
fixture = json.load(open(fixture_path))
|
||||
|
||||
parsed = parse_protocol(pdf)
|
||||
print(f"Parsed {len(parsed)} Abstimmungen gesamt")
|
||||
|
||||
matches, errors = compare_to_fixture(parsed, fixture)
|
||||
print(f"Match gegen Fixture: {matches}/{len(fixture['drucksachen']) - 1} (ohne nicht_gesondert)")
|
||||
print()
|
||||
if errors:
|
||||
print("Fehler:")
|
||||
for e in errors:
|
||||
print(f" {e}")
|
||||
@ -542,7 +542,7 @@ body.v2 strong, body.v2 b {
|
||||
/* ── Matrix Mini (5×5) ──────────────────────────────────────────── */
|
||||
.v2-matrix-mini {
|
||||
display: grid;
|
||||
grid-template-columns: 130px repeat(5, 1fr);
|
||||
grid-template-columns: 92px repeat(5, 1fr);
|
||||
gap: 0;
|
||||
border: 1px solid var(--hairline);
|
||||
font-size: 11px;
|
||||
@ -555,7 +555,7 @@ body.v2 strong, body.v2 b {
|
||||
border-bottom: 1px solid var(--hairline);
|
||||
font-family: var(--font-mono);
|
||||
text-align: center;
|
||||
min-height: 36px;
|
||||
min-height: 30px;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
@ -565,8 +565,8 @@ body.v2 strong, body.v2 b {
|
||||
.v2-matrix-mini > div:nth-child(6n) { border-right: 0; }
|
||||
.v2-matrix-mini > div:nth-last-child(-n+6) { border-bottom: 0; }
|
||||
|
||||
.v2-matrix-mini .hdr { background: var(--ecg-blue); color: #fff; font-size: 10px; letter-spacing: 0.03em; font-weight: 700; line-height: 1.25; cursor: help; padding: 4px 4px; }
|
||||
.v2-matrix-mini .rhdr { background: var(--surface); text-align: left; justify-content: flex-start; padding-left: 10px; color: var(--ecg-dark); font-weight: 700; font-family: var(--font-sans); font-size: 10px; letter-spacing: 0.02em; line-height: 1.25; cursor: help; }
|
||||
.v2-matrix-mini .hdr { background: var(--ecg-blue); color: #fff; font-size: 10px; letter-spacing: 0.04em; font-weight: 700; }
|
||||
.v2-matrix-mini .rhdr { background: var(--surface); text-align: left; justify-content: flex-start; padding-left: 10px; color: var(--ecg-dark); font-weight: 700; font-family: var(--font-sans); text-transform: uppercase; font-size: 10px; letter-spacing: 0.05em; }
|
||||
|
||||
.v2-matrix-mini .m-pp { background: var(--ecg-green); color: #fff; font-weight: 700; }
|
||||
.v2-matrix-mini .m-p { background: var(--redline-ins-bg); color: var(--ecg-dark); }
|
||||
@ -988,13 +988,8 @@ body.v2 ul.v2-manual ul li::before {
|
||||
font-size: 10px;
|
||||
font-weight: 700;
|
||||
line-height: 1;
|
||||
cursor: help; /* Browser zeigt Hilfe-Cursor — Affordanz fuer Tooltip */
|
||||
}
|
||||
|
||||
/* Score-Chips bekommen die gleiche cursor-Affordanz, sodass User merken,
|
||||
dass WP/PP nicht nur Labels sind sondern Tooltips haben (#147). */
|
||||
.v2-score-chip[title] { cursor: help; }
|
||||
|
||||
.v2-badge-antragsteller {
|
||||
background: var(--ecg-blue);
|
||||
color: #fff;
|
||||
|
||||
@ -21,34 +21,8 @@
|
||||
{% macro matrix_mini(matrix) %}
|
||||
{% set rows = ["A", "B", "C", "D", "E"] %}
|
||||
{% set cols = ["1", "2", "3", "4", "5"] %}
|
||||
{% set row_labels = {
|
||||
"A": "A · Lieferant:innen",
|
||||
"B": "B · Finanzen",
|
||||
"C": "C · Verwaltung",
|
||||
"D": "D · Bürger:innen",
|
||||
"E": "E · Gesellschaft & Natur"
|
||||
} %}
|
||||
{% set row_titles = {
|
||||
"A": "Berührungsgruppe A — Lieferant:innen, ausgelagerte Betriebe, Dienstleister:innen. Externe Beschaffung und Lieferketten der Kommune.",
|
||||
"B": "Berührungsgruppe B — Finanzpartner:innen, Geldgeber:innen, Steuerzahler:innen. Umgang mit öffentlichen Mitteln und Haushalt.",
|
||||
"C": "Berührungsgruppe C — Politische Führung, Verwaltung, Ehrenamtliche. Mandatsträger:innen und Mitarbeitende der Kommune.",
|
||||
"D": "Berührungsgruppe D — Bürger:innen und Wirtschaft. Wirkung innerhalb der Gemeindegrenzen, Daseinsvorsorge.",
|
||||
"E": "Berührungsgruppe E — Staat, Gesellschaft und Natur. Wirkung über die Gemeindegrenzen hinaus, Zukunft."
|
||||
} %}
|
||||
{% set col_labels = {
|
||||
"1": "Menschenwürde",
|
||||
"2": "Solidarität",
|
||||
"3": "Ökol. Nachhaltigkeit",
|
||||
"4": "Soz. Gerechtigkeit",
|
||||
"5": "Transparenz"
|
||||
} %}
|
||||
{% set col_titles = {
|
||||
"1": "Wert 1 — Menschenwürde (Rechtsstaatsprinzip): Werden Grundrechte geschützt? Rechtliche Gleichstellung, Schutz vor Diskriminierung.",
|
||||
"2": "Wert 2 — Solidarität (Gemeinnutz): Wird das Gemeinwohl gefördert? Mehrwert für die Gemeinschaft, Kooperation statt Konkurrenz.",
|
||||
"3": "Wert 3 — Ökologische Nachhaltigkeit (Umwelt-Verantwortung): Klimaschutz, Ressourcenschonung, Biodiversität, Kreislaufwirtschaft.",
|
||||
"4": "Wert 4 — Soziale Gerechtigkeit (Sozialstaatsprinzip): Gerechte Verteilung, Daseinsvorsorge, soziale Absicherung, Chancengleichheit.",
|
||||
"5": "Wert 5 — Transparenz & Mitbestimmung (Demokratie): Bürgerbeteiligung, Offenlegung, demokratische Prozesse, Rechenschaftspflicht."
|
||||
} %}
|
||||
{% set row_labels = {"A": "A · Liefer.", "B": "B · Finanzen", "C": "C · Verwalt.", "D": "D · Bürger", "E": "E · Gesell."} %}
|
||||
{% set col_labels = {"1": "Würde", "2": "Solid.", "3": "Ökol.", "4": "Soz.", "5": "Trans."} %}
|
||||
|
||||
{% macro rating_class(r) %}
|
||||
{% if r == 2 %}m-pp
|
||||
@ -62,12 +36,12 @@
|
||||
{# Header-Zeile #}
|
||||
<div class="hdr" role="columnheader"></div>
|
||||
{% for c in cols %}
|
||||
<div class="hdr" role="columnheader" title="{{ col_titles[c] }}">{{ col_labels[c] }}</div>
|
||||
<div class="hdr" role="columnheader">{{ col_labels[c] }}</div>
|
||||
{% endfor %}
|
||||
|
||||
{# Daten-Zeilen #}
|
||||
{% for r in rows %}
|
||||
<div class="rhdr" role="rowheader" title="{{ row_titles[r] }}">{{ row_labels[r] }}</div>
|
||||
<div class="rhdr" role="rowheader">{{ row_labels[r] }}</div>
|
||||
{% for c in cols %}
|
||||
{% set key = r ~ c %}
|
||||
{% set cell = matrix[key] if matrix is defined and key in matrix else {} %}
|
||||
|
||||
@ -20,9 +20,7 @@
|
||||
{% set s = score | float %}
|
||||
{% if s < 5 %}{% set modifier = "low" %}{% else %}{% set modifier = "" %}{% endif %}
|
||||
|
||||
<div class="v2-score-hero {{ modifier }}" role="region" aria-label="GWÖ-Score {{ '%.1f'|format(s) }} von 10"
|
||||
title="GWÖ-Score (0–10): Gesamt-Bewertung des Antrags nach der Gemeinwohl-Matrix 2.0 für Gemeinden — gewichteter Durchschnitt der 25 Matrix-Felder. Höher = stärkerer Beitrag zum Gemeinwohl. Details unter /methodik."
|
||||
style="cursor:help;">
|
||||
<div class="v2-score-hero {{ modifier }}" role="region" aria-label="GWÖ-Score {{ '%.1f'|format(s) }} von 10">
|
||||
<div class="big-num" aria-hidden="true">
|
||||
{{ "%.1f" | format(s) }}<span class="slash">/10</span>
|
||||
</div>
|
||||
|
||||
@ -264,53 +264,6 @@
|
||||
{% endfor %}
|
||||
{% endif %}{# abstimmungsverhalten #}
|
||||
|
||||
{# ── Fraktions-aggregierte Plenum-Abstimmung aus Plenarprotokoll (#106) ── #}
|
||||
{% if antrag.plenum_votes %}
|
||||
<h3 class="v2-h3" style="margin-top:24px;">Abstimmungsergebnis</h3>
|
||||
{% set ergebnis_color = {
|
||||
"angenommen": "#2da44e",
|
||||
"abgelehnt": "#cf222e",
|
||||
"überwiesen": "#0969da",
|
||||
"zurückgezogen": "#8250df",
|
||||
"bestätigt": "#2da44e",
|
||||
"sammel": "#0969da",
|
||||
} %}
|
||||
{% for v in antrag.plenum_votes %}
|
||||
<div style="border:1px solid var(--hairline);border-radius:6px;padding:12px 14px;margin-bottom:10px;background:var(--paper);">
|
||||
<div style="display:flex;justify-content:space-between;align-items:baseline;margin-bottom:6px;">
|
||||
<span style="font-family:var(--font-display);font-size:14px;font-weight:700;color:{{ ergebnis_color.get(v.ergebnis, '#6e7781') }};">
|
||||
{{ v.ergebnis | capitalize }}{% if v.einstimmig %} · einstimmig{% endif %}
|
||||
</span>
|
||||
<span style="font-family:var(--font-mono);font-size:10px;opacity:0.6;" title="{% if v.quelle_url %}{{ v.quelle_url }}{% endif %}">
|
||||
{{ v.quelle_protokoll }}{% if v.quelle_url %} ↗{% endif %}
|
||||
</span>
|
||||
</div>
|
||||
{% if v.fraktionen_ja or v.fraktionen_nein or v.fraktionen_enthaltung %}
|
||||
<div style="display:flex;flex-wrap:wrap;gap:12px;font-family:var(--font-mono);font-size:11px;">
|
||||
{% if v.fraktionen_ja %}
|
||||
<div><span style="color:#2da44e;font-weight:700;">Ja:</span>
|
||||
{% for f in v.fraktionen_ja %}<span style="display:inline-block;padding:1px 6px;background:color-mix(in srgb,#2da44e 15%,transparent);color:#1a7f37;border-radius:3px;margin-right:3px;">{{ f }}</span>{% endfor %}
|
||||
</div>
|
||||
{% endif %}
|
||||
{% if v.fraktionen_nein %}
|
||||
<div><span style="color:#cf222e;font-weight:700;">Nein:</span>
|
||||
{% for f in v.fraktionen_nein %}<span style="display:inline-block;padding:1px 6px;background:color-mix(in srgb,#cf222e 15%,transparent);color:#a40e26;border-radius:3px;margin-right:3px;">{{ f }}</span>{% endfor %}
|
||||
</div>
|
||||
{% endif %}
|
||||
{% if v.fraktionen_enthaltung %}
|
||||
<div><span style="color:#6e7781;font-weight:700;cursor:help;border-bottom:1px dotted currentColor;" title="Enth. — Enthaltung: weder Zustimmung noch Ablehnung.">Enth.:</span>
|
||||
{% for f in v.fraktionen_enthaltung %}<span style="display:inline-block;padding:1px 6px;background:color-mix(in srgb,#6e7781 15%,transparent);color:#57606a;border-radius:3px;margin-right:3px;">{{ f }}</span>{% endfor %}
|
||||
</div>
|
||||
{% endif %}
|
||||
</div>
|
||||
{% endif %}
|
||||
</div>
|
||||
{% endfor %}
|
||||
<div style="font-family:var(--font-mono);font-size:10px;opacity:0.5;margin-top:-4px;margin-bottom:8px;">
|
||||
Quelle: Plenarprotokoll · automatisch extrahiert
|
||||
</div>
|
||||
{% endif %}{# plenum_votes #}
|
||||
|
||||
{% if antrag.matrix %}
|
||||
<h3 class="v2-h3">Matrix 2.0 · 25 Felder</h3>
|
||||
{{ matrix_mini(antrag.matrix) }}
|
||||
@ -324,18 +277,18 @@
|
||||
<div class="v2-fraktion-row">
|
||||
<div class="v2-fraktion-label">
|
||||
{{ fs.fraktion }}
|
||||
{% if fs.ist_antragsteller %}<span class="v2-badge-antragsteller" title="A — Antragstellende Fraktion: hat den Antrag eingereicht.">A</span>{% endif %}
|
||||
{% if fs.ist_regierung %}<span class="v2-badge-regierung" title="R — Regierungsfraktion: trägt die aktuelle Mehrheit im Landtag.">R</span>{% endif %}
|
||||
{% if fs.ist_antragsteller %}<span class="v2-badge-antragsteller" title="Antragstellende Fraktion">A</span>{% endif %}
|
||||
{% if fs.ist_regierung %}<span class="v2-badge-regierung" title="Regierungsfraktion">R</span>{% endif %}
|
||||
</div>
|
||||
<div class="v2-fraktion-scores">
|
||||
{% set wp_score = fs.wahlprogramm.score | float %}
|
||||
{% set pp_score = fs.parteiprogramm.score | float %}
|
||||
<span class="v2-score-chip {% if wp_score >= 7 %}chip-green{% elif wp_score >= 4 %}chip-mid{% else %}chip-red{% endif %}"
|
||||
title="WP — Wahlprogramm-Treue (0–10): wie gut passt der Antrag zum aktuellen Wahlprogramm dieser Fraktion? {{ fs.wahlprogramm.begruendung }}">
|
||||
title="Wahlprogramm-Treue: {{ fs.wahlprogramm.begruendung }}">
|
||||
WP {{ "%.0f"|format(wp_score) }}/10
|
||||
</span>
|
||||
<span class="v2-score-chip {% if pp_score >= 7 %}chip-green{% elif pp_score >= 4 %}chip-mid{% else %}chip-red{% endif %}"
|
||||
title="PP — Parteiprogramm-Treue (0–10): wie gut passt der Antrag zum Grundsatzprogramm dieser Partei? {{ fs.parteiprogramm.begruendung }}">
|
||||
title="Parteiprogramm-Treue: {{ fs.parteiprogramm.begruendung }}">
|
||||
PP {{ "%.0f"|format(pp_score) }}/10
|
||||
</span>
|
||||
</div>
|
||||
|
||||
@ -78,18 +78,12 @@
|
||||
min-width: 100px;
|
||||
padding-top: 2px;
|
||||
}
|
||||
.ls-main {
|
||||
flex: 1;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 4px;
|
||||
min-width: 0;
|
||||
}
|
||||
.ls-title {
|
||||
font-family: var(--font-display);
|
||||
font-size: 14px;
|
||||
font-weight: 700;
|
||||
color: var(--ecg-dark);
|
||||
flex: 1;
|
||||
line-height: 1.35;
|
||||
}
|
||||
.ls-title a {
|
||||
@ -97,24 +91,6 @@
|
||||
text-decoration: none;
|
||||
}
|
||||
.ls-title a:hover { text-decoration: underline; }
|
||||
.ls-fraktionen {
|
||||
display: flex;
|
||||
gap: 6px;
|
||||
flex-wrap: wrap;
|
||||
margin-top: 2px;
|
||||
}
|
||||
.ls-fraktion {
|
||||
display: inline-block;
|
||||
padding: 1px 7px;
|
||||
background: color-mix(in srgb, var(--ecg-teal) 10%, transparent);
|
||||
color: var(--ecg-teal);
|
||||
font-family: var(--font-mono);
|
||||
font-size: 10px;
|
||||
font-weight: 700;
|
||||
letter-spacing: 0.04em;
|
||||
border-radius: 3px;
|
||||
text-transform: uppercase;
|
||||
}
|
||||
.ls-actions { flex-shrink: 0; }
|
||||
.ls-btn-analyse {
|
||||
font-family: var(--font-mono);
|
||||
@ -270,22 +246,13 @@ async function lsSearch(e) {
|
||||
function renderRow(item, bl) {
|
||||
var ds = item.drucksache || '';
|
||||
var title = escHtml(item.title || item.titel || ds);
|
||||
var url = item.url || item.link || '';
|
||||
var url = item.url || '';
|
||||
var done = lsCheckedIds.has(ds);
|
||||
var fraktionen = Array.isArray(item.fraktionen) ? item.fraktionen : [];
|
||||
|
||||
var titleHtml = url
|
||||
? '<a href="' + escHtml(url) + '" target="_blank" rel="noopener">' + title + '</a>'
|
||||
: title;
|
||||
|
||||
var fraktionenHtml = fraktionen.length
|
||||
? '<div class="ls-fraktionen">'
|
||||
+ fraktionen.map(function (f) {
|
||||
return '<span class="ls-fraktion">' + escHtml(f) + '</span>';
|
||||
}).join('')
|
||||
+ '</div>'
|
||||
: '';
|
||||
|
||||
var actionHtml;
|
||||
if (done) {
|
||||
actionHtml = '<span class="ls-badge-done">Bewertet → <a href="/antrag/' + encodeURIComponent(ds) + '" style="color:inherit;">Ansehen</a></span>';
|
||||
@ -297,10 +264,7 @@ function renderRow(item, bl) {
|
||||
|
||||
return '<div class="ls-row">'
|
||||
+ '<div class="ls-drucksache">' + escHtml(ds) + '</div>'
|
||||
+ '<div class="ls-main">'
|
||||
+ '<div class="ls-title">' + titleHtml + '</div>'
|
||||
+ fraktionenHtml
|
||||
+ '</div>'
|
||||
+ '<div class="ls-actions">' + actionHtml + '</div>'
|
||||
+ '</div>';
|
||||
}
|
||||
|
||||
@ -79,14 +79,14 @@
|
||||
/* Interactive matrix grid */
|
||||
.gwoe-matrix-grid {
|
||||
display: grid;
|
||||
grid-template-columns: 150px repeat(5, 1fr);
|
||||
grid-template-columns: 110px repeat(5, 1fr);
|
||||
gap: 2px;
|
||||
font-size: 11px;
|
||||
margin: 1rem 0;
|
||||
}
|
||||
.gwoe-matrix-grid .gc { padding: 5px 4px; text-align: center; background: var(--ecg-bg-subtle); border: 1px solid var(--ecg-border); display: flex; align-items: center; justify-content: center; line-height: 1.25; min-height: 36px; }
|
||||
.gwoe-matrix-grid .gh { background: var(--ecg-teal); color: #fff; font-weight: 700; cursor: help; }
|
||||
.gwoe-matrix-grid .gr { background: var(--ecg-green); color: #fff; font-weight: 700; justify-content: flex-start; padding-left: 6px; text-align: left; cursor: help; }
|
||||
.gwoe-matrix-grid .gc { padding: 5px 4px; text-align: center; background: var(--ecg-bg-subtle); border: 1px solid var(--ecg-border); }
|
||||
.gwoe-matrix-grid .gh { background: var(--ecg-teal); color: #fff; font-weight: 700; }
|
||||
.gwoe-matrix-grid .gr { background: var(--ecg-green); color: #fff; font-weight: 700; text-align: left; padding-left: 6px; }
|
||||
.gwoe-matrix-grid .gc.clickable { cursor: pointer; transition: background 0.1s; }
|
||||
.gwoe-matrix-grid .gc.clickable:hover { background: rgba(0,157,165,0.12); }
|
||||
#field-explain {
|
||||
@ -138,7 +138,6 @@
|
||||
<a href="#was-macht">Was macht der Prüfer?</a>
|
||||
<a href="#matrix">Die Matrix 2.0</a>
|
||||
<a href="#pipeline">Analyse-Pipeline</a>
|
||||
<a href="#prompts">LLM-Prompts</a>
|
||||
<a href="#qualitaet">Qualitätssicherung</a>
|
||||
<a href="#einschraenkungen">Einschränkungen</a>
|
||||
<a href="#datenquellen">Datenquellen</a>
|
||||
@ -255,46 +254,46 @@
|
||||
|
||||
<div class="gwoe-matrix-grid">
|
||||
<div class="gc"></div>
|
||||
<div class="gc gh" title="Wert 1 — Menschenwürde (Rechtsstaatsprinzip): Werden Grundrechte geschützt? Rechtliche Gleichstellung, Schutz vor Diskriminierung.">Menschenwürde</div>
|
||||
<div class="gc gh" title="Wert 2 — Solidarität (Gemeinnutz): Wird das Gemeinwohl gefördert? Mehrwert für die Gemeinschaft, Kooperation statt Konkurrenz.">Solidarität</div>
|
||||
<div class="gc gh" title="Wert 3 — Ökologische Nachhaltigkeit (Umwelt-Verantwortung): Klimaschutz, Ressourcenschonung, Biodiversität, Kreislaufwirtschaft.">Ökologische Nachhaltigkeit</div>
|
||||
<div class="gc gh" title="Wert 4 — Soziale Gerechtigkeit (Sozialstaatsprinzip): Gerechte Verteilung, Daseinsvorsorge, soziale Absicherung, Chancengleichheit.">Soziale Gerechtigkeit</div>
|
||||
<div class="gc gh" title="Wert 5 — Transparenz & Mitbestimmung (Demokratie): Bürgerbeteiligung, Offenlegung, demokratische Prozesse, Rechenschaftspflicht.">Transparenz & Mitbestimmung</div>
|
||||
<div class="gc gh">Menschen­würde</div>
|
||||
<div class="gc gh">Solidarität</div>
|
||||
<div class="gc gh">Ökol. Nachh.</div>
|
||||
<div class="gc gh">Soz. Gerecht.</div>
|
||||
<div class="gc gh">Transparenz</div>
|
||||
|
||||
<div class="gc gr" title="Berührungsgruppe A — Lieferant:innen, ausgelagerte Betriebe, Dienstleister:innen. Externe Beschaffung und Lieferketten der Kommune.">A · Lieferant:innen</div>
|
||||
<div class="gc clickable" onclick="showField('A1')" title="A1 — Grundrechtsschutz in der Lieferkette"><strong>A1</strong><br><small>Grundrechte Lieferkette</small></div>
|
||||
<div class="gc clickable" onclick="showField('A2')" title="A2 — Nutzen für die Gemeinde"><strong>A2</strong><br><small>Nutzen Gemeinde</small></div>
|
||||
<div class="gc clickable" onclick="showField('A3')" title="A3 — Ökologische Verantwortung in der Lieferkette"><strong>A3</strong><br><small>Ökol. Verantwortung</small></div>
|
||||
<div class="gc clickable" onclick="showField('A4')" title="A4 — Soziale Verantwortung in der Lieferkette"><strong>A4</strong><br><small>Soziale Verantwortung</small></div>
|
||||
<div class="gc clickable" onclick="showField('A5')" title="A5 — Rechenschaft und Mitsprache bei Beschaffung"><strong>A5</strong><br><small>Rechenschaft</small></div>
|
||||
<div class="gc gr">A · Lieferant:innen</div>
|
||||
<div class="gc clickable" onclick="showField('A1')"><strong>A1</strong><br><small>Grundrechte Lieferkette</small></div>
|
||||
<div class="gc clickable" onclick="showField('A2')"><strong>A2</strong><br><small>Nutzen Gemeinde</small></div>
|
||||
<div class="gc clickable" onclick="showField('A3')"><strong>A3</strong><br><small>Ökol. Verantwortung</small></div>
|
||||
<div class="gc clickable" onclick="showField('A4')"><strong>A4</strong><br><small>Soziale Verantwortung</small></div>
|
||||
<div class="gc clickable" onclick="showField('A5')"><strong>A5</strong><br><small>Rechenschaft</small></div>
|
||||
|
||||
<div class="gc gr" title="Berührungsgruppe B — Finanzpartner:innen, Geldgeber:innen, Steuerzahler:innen. Umgang mit öffentlichen Mitteln und Haushalt.">B · Finanzen</div>
|
||||
<div class="gc clickable" onclick="showField('B1')" title="B1 — Ethisches Finanzgebaren"><strong>B1</strong><br><small>Eth. Finanzgebaren</small></div>
|
||||
<div class="gc clickable" onclick="showField('B2')" title="B2 — Gemeinnutz im Finanzgebaren"><strong>B2</strong><br><small>Gemeinnutz</small></div>
|
||||
<div class="gc clickable" onclick="showField('B3')" title="B3 — Ökologische Verantwortung der Finanzpolitik"><strong>B3</strong><br><small>Ökol. Finanzpolitik</small></div>
|
||||
<div class="gc clickable" onclick="showField('B4')" title="B4 — Soziale Verantwortung der Finanzpolitik"><strong>B4</strong><br><small>Soz. Finanzpolitik</small></div>
|
||||
<div class="gc clickable" onclick="showField('B5')" title="B5 — Partizipation in der Finanzpolitik"><strong>B5</strong><br><small>Partizipation</small></div>
|
||||
<div class="gc gr">B · Finanzen</div>
|
||||
<div class="gc clickable" onclick="showField('B1')"><strong>B1</strong><br><small>Eth. Finanzgebaren</small></div>
|
||||
<div class="gc clickable" onclick="showField('B2')"><strong>B2</strong><br><small>Gemeinnutz</small></div>
|
||||
<div class="gc clickable" onclick="showField('B3')"><strong>B3</strong><br><small>Ökol. Finanzpolitik</small></div>
|
||||
<div class="gc clickable" onclick="showField('B4')"><strong>B4</strong><br><small>Soz. Finanzpolitik</small></div>
|
||||
<div class="gc clickable" onclick="showField('B5')"><strong>B5</strong><br><small>Partizipation</small></div>
|
||||
|
||||
<div class="gc gr" title="Berührungsgruppe C — Politische Führung, Verwaltung, Ehrenamtliche. Mandatsträger:innen und Mitarbeitende der Kommune.">C · Verwaltung</div>
|
||||
<div class="gc clickable" onclick="showField('C1')" title="C1 — Individuelle Rechts- und Gleichstellung"><strong>C1</strong><br><small>Gleichstellung</small></div>
|
||||
<div class="gc clickable" onclick="showField('C2')" title="C2 — Gemeinsame Zielvereinbarung für das Gemeinwohl"><strong>C2</strong><br><small>Gemeinsame Ziele</small></div>
|
||||
<div class="gc clickable" onclick="showField('C3')" title="C3 — Förderung ökologischen Verhaltens intern"><strong>C3</strong><br><small>Ökol. Verhalten</small></div>
|
||||
<div class="gc clickable" onclick="showField('C4')" title="C4 — Gerechte Verteilung von Arbeit"><strong>C4</strong><br><small>Gerechte Arbeit</small></div>
|
||||
<div class="gc clickable" onclick="showField('C5')" title="C5 — Transparente Kommunikation intern"><strong>C5</strong><br><small>Transparenz intern</small></div>
|
||||
<div class="gc gr">C · Verwaltung</div>
|
||||
<div class="gc clickable" onclick="showField('C1')"><strong>C1</strong><br><small>Gleichstellung</small></div>
|
||||
<div class="gc clickable" onclick="showField('C2')"><strong>C2</strong><br><small>Gemeinsame Ziele</small></div>
|
||||
<div class="gc clickable" onclick="showField('C3')"><strong>C3</strong><br><small>Ökol. Verhalten</small></div>
|
||||
<div class="gc clickable" onclick="showField('C4')"><strong>C4</strong><br><small>Gerechte Arbeit</small></div>
|
||||
<div class="gc clickable" onclick="showField('C5')"><strong>C5</strong><br><small>Transparenz intern</small></div>
|
||||
|
||||
<div class="gc gr" title="Berührungsgruppe D — Bürger:innen und Wirtschaft. Wirkung innerhalb der Gemeindegrenzen, Daseinsvorsorge.">D · Bürger:innen</div>
|
||||
<div class="gc clickable" onclick="showField('D1')" title="D1 — Schutz des Individuums, Rechtsgleichheit"><strong>D1</strong><br><small>Rechtsgleichheit</small></div>
|
||||
<div class="gc clickable" onclick="showField('D2')" title="D2 — Gesamtwohl in der Gemeinde"><strong>D2</strong><br><small>Gesamtwohl</small></div>
|
||||
<div class="gc clickable" onclick="showField('D3')" title="D3 — Ökologische Gestaltung der öffentlichen Leistung"><strong>D3</strong><br><small>Ökol. Leistung</small></div>
|
||||
<div class="gc clickable" onclick="showField('D4')" title="D4 — Soziale Gestaltung der öffentlichen Leistung"><strong>D4</strong><br><small>Soz. Leistung</small></div>
|
||||
<div class="gc clickable" onclick="showField('D5')" title="D5 — Transparente Kommunikation und demokratische Einbindung"><strong>D5</strong><br><small>Demokratie</small></div>
|
||||
<div class="gc gr">D · Bürger:innen</div>
|
||||
<div class="gc clickable" onclick="showField('D1')"><strong>D1</strong><br><small>Rechtsgleichheit</small></div>
|
||||
<div class="gc clickable" onclick="showField('D2')"><strong>D2</strong><br><small>Gesamtwohl</small></div>
|
||||
<div class="gc clickable" onclick="showField('D3')"><strong>D3</strong><br><small>Ökol. Leistung</small></div>
|
||||
<div class="gc clickable" onclick="showField('D4')"><strong>D4</strong><br><small>Soz. Leistung</small></div>
|
||||
<div class="gc clickable" onclick="showField('D5')"><strong>D5</strong><br><small>Demokratie</small></div>
|
||||
|
||||
<div class="gc gr" title="Berührungsgruppe E — Staat, Gesellschaft und Natur. Wirkung über die Gemeindegrenzen hinaus, Zukunft.">E · Gesellschaft & Natur</div>
|
||||
<div class="gc clickable" onclick="showField('E1')" title="E1 — Menschenwürdiges Leben für zukünftige Generationen"><strong>E1</strong><br><small>Zukunft</small></div>
|
||||
<div class="gc clickable" onclick="showField('E2')" title="E2 — Beitrag zum Gesamtwohl über die Gemeindegrenzen hinaus"><strong>E2</strong><br><small>Beitrag Gesamtwohl</small></div>
|
||||
<div class="gc clickable" onclick="showField('E3')" title="E3 — Verantwortung für ökologische Auswirkungen jenseits der Gemeinde"><strong>E3</strong><br><small>Ökol. Auswirkungen</small></div>
|
||||
<div class="gc clickable" onclick="showField('E4')" title="E4 — Beitrag zum sozialen Ausgleich"><strong>E4</strong><br><small>Sozialer Ausgleich</small></div>
|
||||
<div class="gc clickable" onclick="showField('E5')" title="E5 — Transparente und demokratische Mitbestimmung auf übergeordneter Ebene"><strong>E5</strong><br><small>Demokratie global</small></div>
|
||||
<div class="gc gr">E · Gesellschaft</div>
|
||||
<div class="gc clickable" onclick="showField('E1')"><strong>E1</strong><br><small>Zukunft</small></div>
|
||||
<div class="gc clickable" onclick="showField('E2')"><strong>E2</strong><br><small>Beitrag Gesamtwohl</small></div>
|
||||
<div class="gc clickable" onclick="showField('E3')"><strong>E3</strong><br><small>Ökol. Auswirkungen</small></div>
|
||||
<div class="gc clickable" onclick="showField('E4')"><strong>E4</strong><br><small>Sozialer Ausgleich</small></div>
|
||||
<div class="gc clickable" onclick="showField('E5')"><strong>E5</strong><br><small>Demokratie global</small></div>
|
||||
</div>
|
||||
|
||||
<details style="font-size:12px;margin-top:8px;">
|
||||
@ -366,77 +365,6 @@
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<section id="prompts">
|
||||
<h2>LLM-Prompts</h2>
|
||||
<div class="v2-kasten outline-blue">
|
||||
<p>
|
||||
Volle Transparenz: hier liegen die exakten Anweisungen, mit denen das
|
||||
Sprachmodell ({{ model_name }}) jeden Antrag bewertet.
|
||||
</p>
|
||||
|
||||
<h3 style="margin-top:0.75rem;">Wie System- und User-Prompt zusammenwirken</h3>
|
||||
<p>
|
||||
Beide Prompts werden in <strong>einem einzigen API-Call</strong>
|
||||
gesendet — nicht getrennt ausgeführt. Sie fließen gemeinsam ins
|
||||
Modell-Kontextfenster und werden zusammen bewertet.
|
||||
</p>
|
||||
<table style="margin-top:0.5rem;">
|
||||
<tr>
|
||||
<th style="width:30%;">System-Prompt (statisch, ~5 KB)</th>
|
||||
<th>User-Prompt (dynamisch, pro Antrag)</th>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><strong>Wer und wie</strong> bewertet wird: GWÖ-Matrix-Definition, 25 Felder, Bewertungs-Skala, Empfehlungs-Kategorien, Ausgabe-JSON-Schema, strenge Regeln (max. 3 Verbesserungsvorschläge, wörtliche Zitate, …).</td>
|
||||
<td><strong>Was</strong> bewertet wird: BL-Spezifika, semantisch gefundene Wahlprogramm-Chunks, der Antragstext selbst, Pflicht-Fraktionen-Liste.</td>
|
||||
</tr>
|
||||
</table>
|
||||
<p>
|
||||
Das Modell wendet die Matrix-Definition aus dem System-Prompt auf
|
||||
den Antragstext aus dem User-Prompt an. Ohne System-Prompt wüsste
|
||||
es nicht <em>wonach</em> es bewerten soll; ohne User-Prompt
|
||||
hätte es nichts zu bewerten.
|
||||
</p>
|
||||
<p style="font-size:12px;opacity:0.85;">
|
||||
<strong>Warum die Trennung?</strong>
|
||||
</p>
|
||||
<ul style="font-size:12px;opacity:0.85;">
|
||||
<li><strong>Caching:</strong> Der API-Anbieter cached den System-Prompt — pro neuem Antrag werden nur die User-Tokens verrechnet.</li>
|
||||
<li><strong>Modell-Compliance:</strong> Sprachmodelle behandeln System-Anweisungen mit höherem Vertrauen, was robuster gegen Prompt-Injection aus dem Antragstext ist.</li>
|
||||
<li><strong>Wartbarkeit:</strong> statische Bewertungs-Regeln getrennt vom dynamischen Inhalt — leichter zu pflegen, leichter zu auditieren.</li>
|
||||
</ul>
|
||||
<p style="font-size:11px;opacity:0.7;">
|
||||
Quelle: <a href="https://repo.toppyr.de/tobias/gwoe-antragspruefer/src/branch/main/app/analyzer.py" target="_blank"><code>app/analyzer.py</code></a>
|
||||
(<code>get_system_prompt()</code> und <code>get_user_prompt_template()</code>);
|
||||
API-Aufruf in <a href="https://repo.toppyr.de/tobias/gwoe-antragspruefer/src/branch/main/app/adapters/qwen_bewerter.py" target="_blank"><code>app/adapters/qwen_bewerter.py</code></a>
|
||||
(Zeilen 83–85, <code>messages=[{"role":"system",…}, {"role":"user",…}]</code>).
|
||||
</p>
|
||||
|
||||
<h3 style="margin-top:1rem;">Die Prompts im Wortlaut</h3>
|
||||
<p style="font-size:12px;">
|
||||
Der User-Prompt unten ist als <em>Template</em> abgebildet — die
|
||||
Platzhalter <code>{kontext}</code>, <code>{wahlprogramm_zitate}</code>,
|
||||
<code>{antrag}</code> und <code>{pflicht_fraktionen}</code> werden
|
||||
pro Antrag mit den konkreten Inhalten gefüllt.
|
||||
</p>
|
||||
|
||||
<details style="margin-top:1rem;">
|
||||
<summary style="cursor:pointer;color:var(--ecg-teal);font-weight:700;padding:6px 0;font-family:var(--font-display);">
|
||||
System-Prompt anzeigen
|
||||
<span style="font-family:var(--font-mono);font-size:11px;opacity:0.6;font-weight:400;">({{ system_prompt|length }} Zeichen)</span>
|
||||
</summary>
|
||||
<pre style="background:var(--ecg-bg-subtle);border:1px solid var(--ecg-border);border-radius:4px;padding:14px 16px;margin-top:8px;font-family:var(--font-mono);font-size:11px;line-height:1.5;white-space:pre-wrap;word-break:break-word;color:var(--ecg-dark);overflow-x:auto;">{{ system_prompt }}</pre>
|
||||
</details>
|
||||
|
||||
<details style="margin-top:0.75rem;">
|
||||
<summary style="cursor:pointer;color:var(--ecg-teal);font-weight:700;padding:6px 0;font-family:var(--font-display);">
|
||||
User-Prompt-Template anzeigen
|
||||
<span style="font-family:var(--font-mono);font-size:11px;opacity:0.6;font-weight:400;">({{ user_prompt_template|length }} Zeichen)</span>
|
||||
</summary>
|
||||
<pre style="background:var(--ecg-bg-subtle);border:1px solid var(--ecg-border);border-radius:4px;padding:14px 16px;margin-top:8px;font-family:var(--font-mono);font-size:11px;line-height:1.5;white-space:pre-wrap;word-break:break-word;color:var(--ecg-dark);overflow-x:auto;">{{ user_prompt_template }}</pre>
|
||||
</details>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<section id="qualitaet">
|
||||
<h2>Qualitätssicherung</h2>
|
||||
<div class="v2-kasten outline-green">
|
||||
|
||||
@ -1,55 +0,0 @@
|
||||
{
|
||||
"afd-bb-2024.pdf": "da5cd04cc66128b2f0df35b47775fce850ed2f4145ee15d74ec8bf501ce043f1",
|
||||
"afd-be-2023.pdf": "d2b5997b1bc0d3fb590cc354d8ed1ac879e8de4a74518f4089436a2fa12615f1",
|
||||
"afd-bw-2021.pdf": "a438e09279c6c5766171a213715ed0a9d60248ff86f648227e8bb6ec59a591c7",
|
||||
"afd-hh-2025.pdf": "6aae3ad00cd07824bcd99473e130d1b894e2174a89fcafece51865c51fdcd4c8",
|
||||
"afd-lsa-2021.pdf": "dd2651af2a9423039b1c5a39760be2332025d569a878453f09e0302e252edc23",
|
||||
"afd-mv-2021.pdf": "953c39941a1f997233daaf0cec01bc82b1e86ba895b43e8d34b015cc72799648",
|
||||
"afd-nrw-2022.pdf": "36c4bc55c3239e3f7e69568e19d7f074ce2f1cf018653d493767ec09df637282",
|
||||
"afd-rp-2021.pdf": "3ec39eb08a073244813a51f260e18fe52aab791bea26bf8079546b6e189ec2b3",
|
||||
"afd-th-2024.pdf": "26e61fdc3456e7ce18f7a3d2ea1eada303f93cad0b9698797f83a671574eaf51",
|
||||
"bsw-bb-2024.pdf": "548c9bda01af176586606fae708c9f3b3ba98e1e128f1e2ff39e482289faab42",
|
||||
"bsw-th-2024.pdf": "5ace33912083048a759ee2af9288248447363dafa21f569c5c056df22751ba69",
|
||||
"cdu-bb-2024.pdf": "460b1463483429f9e8b84e4ae6ef9cf878dd228e108411bed3c153169a0001e8",
|
||||
"cdu-be-2023.pdf": "813d0d08ac8ce7381e9a7b9472e0616aaf684b1632c9d4a7f4e940a33455f29a",
|
||||
"cdu-bw-2021.pdf": "a92c104c456ce06d8bad6649071551e0ec0d525a1bc0bc31e9fa6a0566da4db0",
|
||||
"cdu-hh-2025.pdf": "8d29e514b8bce5c2f3f497dc5b97f6f8ab95a7bdbf619abf258e9582d57f2dbd",
|
||||
"cdu-lsa-2021.pdf": "63b6cf42ce97834d5d105fb7b8cc7fb7a2aa96928d4153bd3a5858c196ee0797",
|
||||
"cdu-mv-2021.pdf": "605a2211bef8666c2103771ebffd97a088e7cdb1545401087ef125155e7e4db2",
|
||||
"cdu-nrw-2022.pdf": "49d97a6f30fbacad3a0b770c182ed0527bc5d347dc4cacd65f85e7e4e9644566",
|
||||
"cdu-rp-2021.pdf": "54c50d88bdf5c5f7dee5abcc981ffb4d1cfd5c86fbf2a29f4f2f4a8a3dd4797a",
|
||||
"cdu-sh-2022.pdf": "39b79a22e904b300cf1bbc25752b618195683c90c31e6b10c3bc0e8408aa6a1a",
|
||||
"cdu-th-2024.pdf": "cde8d2222bd8ce04aee24883a38dab8a30f5d60cda115b8bb2f43ceffa08b730",
|
||||
"fdp-bw-2021.pdf": "bdcbb1b2e5748922c8347bd69ea6f81c954fd02cd220d448400f9a5a86ce914b",
|
||||
"fdp-lsa-2021.pdf": "3d4275e36e29c0b191dcc4a29061a1072920f868cc52bee954bf81491ad15224",
|
||||
"fdp-mv-2021.pdf": "8dc341dd017f1d82c51608a26e1fd6c3d8acd1281dc37409e375389999b37b55",
|
||||
"fdp-nrw-2022.pdf": "576b42a26c29ca5d8b7469d417ae709c8d0699aed5195d4ca16dd696dcff8bea",
|
||||
"fdp-rp-2021.pdf": "fba792d8d43842f33ae8f0aa94b0d4e50838908c217402b4c5cb4707f958e1ae",
|
||||
"fdp-sh-2022.pdf": "4c49da411bb3c8e008f4b57dd20dc005104515b56056ff746cf5403529728d09",
|
||||
"fw-rp-2021.pdf": "c7f26d553f24c9d9fcf1c2edb1dbe558edc1ca65af68b289a1541e77f7bbeea8",
|
||||
"gruene-be-2023.pdf": "2b14a319cdcd2ca022399254ea285714f872eddd166f3f537861eeb2dc5ade80",
|
||||
"gruene-bw-2021.pdf": "9af526705cb10b91be0690b26c9c033668a8082eeefca482dc4e7a46f2d671f9",
|
||||
"gruene-hh-2025.pdf": "4428d1cdc16b4e74588f0bd51145ab7371f9e0871a2fc9d25a1f94e4f5aeb662",
|
||||
"gruene-lsa-2021.pdf": "7b5cea92cd600283d7edf18dc0d358c0b7d78d7269589d9ef05de7d5f8b35998",
|
||||
"gruene-mv-2021.pdf": "40f0070743ef9ae7808cab319234b4c83faa53a8a098ba8a82f28023bee4d9f6",
|
||||
"gruene-nrw-2022.pdf": "2d7eaf2f4b73e0b7cdccf8641208b86d306b654ead5706d72c446965f82e5769",
|
||||
"gruene-rp-2021.pdf": "4fd68629d1560c28d61b2b913fd20ce6ad9a76b22823fd8496e51bfaf70dc19c",
|
||||
"gruene-sh-2022.pdf": "62870c948c9e05663125b051d3a6401d63952ea6a64e4140dcece7bd1b1aea52",
|
||||
"linke-be-2023.pdf": "7d6a9166f6a1d87ba26cc1a2818ae2b844ee9df6ed6668673f329dd5186fd956",
|
||||
"linke-hh-2025.pdf": "15e68efe3818758a7cefc0a3e3095a5a5fb191111c00a1202c563cee43ce6e40",
|
||||
"linke-lsa-2021.pdf": "f269c014416b213785badf7bea5928fdb847fc902e09f52ec66a140a37e03d75",
|
||||
"linke-mv-2021.pdf": "160dad56ab4de8f641c21f51cbf3c33953f2f3d91b4de792c4e725f3975fdfbe",
|
||||
"linke-th-2024.pdf": "2d8ca99ef60cbe1b59cf33b1e37320d66a057e5136c2f49aa8cde77e4a19533a",
|
||||
"spd-bb-2024.pdf": "4131f63fbb9d67cd8948ca7a54f1c140b47968c77454a3dabe6bcdc4384f63d3",
|
||||
"spd-be-2023.pdf": "4ee84e969e97894742673f940ec030883216ce852b729507327f8bced637d03b",
|
||||
"spd-bw-2021.pdf": "d888ae92bb62a61aaa4d6ac8dc22c2c98d1a2227b6ba223b6422770672825072",
|
||||
"spd-hh-2025.pdf": "5e8c57969cb3b159b9299c173831f7863ab81bd206c2a87ae232ba96f23156ee",
|
||||
"spd-lsa-2021.pdf": "59140aa1921ab0ee85142d74e1d72b1af7254da3f7870a30460abd605d280333",
|
||||
"spd-mv-2021.pdf": "c8c671c2e60f1a4f8048bd74e379eb8edc69ab2daeb09581fe83f25f6c87d529",
|
||||
"spd-nrw-2022.pdf": "6f1375add74a532cb084dee10c3e5a6215e7d4118ddd26ef0d27bf39765d19a6",
|
||||
"spd-rp-2021.pdf": "13966815b8870b30e3480673437634fb90882bf5410c652694a6579492e32707",
|
||||
"spd-sh-2022.pdf": "3acd3ed6c42a0e0a8f49abd76610b536c7d5fdf13fcc4499e391bc9b1a3d0f0f",
|
||||
"spd-th-2024.pdf": "dbd96a51134c8c13dabe18807fe233e9a43f45c2fefeead2ea500ecc3d63de6b",
|
||||
"ssw-sh-2022.pdf": "3020762a1c33a09bc51f7fa49ede1c2d5dd7574ea74ef262076e59d5e3a9a41b",
|
||||
"test.pdf": "71630b3ce93b3fd91aefa095908c8070d07e0eca8ad3071c60ae7375da2e7e17"
|
||||
}
|
||||
@ -16,7 +16,6 @@ CLI:
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
import urllib.request
|
||||
from pathlib import Path
|
||||
@ -27,39 +26,9 @@ import yaml
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_LINKS_FILE = Path(__file__).parent / "wahlprogramm-links.yaml"
|
||||
_LOCK_FILE = Path(__file__).parent / "wahlprogramm-shas.lock.json"
|
||||
_REFERENZEN_DIR = Path(__file__).parent / "static" / "referenzen"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# SHA-Lock — schuetzt vor stillem PDF-Austausch unter gleicher URL.
|
||||
# Hintergrund: abgeordnetenwatch hat die CDU-BE-2023-Datei intern gegen den
|
||||
# 2026-Berlin-Plan ersetzt, ohne den Slug zu aendern. Nach dem ersten
|
||||
# erfolgreichen Download wird der SHA-256 hier gepinnt; spaetere fetches
|
||||
# vergleichen gegen den Lock und brechen bei Abweichung ab.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _load_lock() -> dict[str, str]:
|
||||
if not _LOCK_FILE.exists():
|
||||
return {}
|
||||
try:
|
||||
return json.loads(_LOCK_FILE.read_text(encoding="utf-8"))
|
||||
except (json.JSONDecodeError, OSError) as exc:
|
||||
logger.error("Lock-File %s ist kaputt: %s — leerer Lock genutzt", _LOCK_FILE, exc)
|
||||
return {}
|
||||
|
||||
|
||||
def _save_lock(lock: dict[str, str]) -> None:
|
||||
_LOCK_FILE.write_text(
|
||||
json.dumps(lock, indent=2, sort_keys=True, ensure_ascii=False) + "\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
|
||||
def _lock_key(dateiname: str) -> str:
|
||||
return dateiname
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# YAML-Quelle laden
|
||||
# ---------------------------------------------------------------------------
|
||||
@ -109,45 +78,32 @@ def fetch_and_verify(
|
||||
url: str,
|
||||
dest_path: Path,
|
||||
expected_sha: Optional[str] = None,
|
||||
*,
|
||||
accept_new_sha: bool = False,
|
||||
) -> dict:
|
||||
"""Lädt eine Datei herunter und prüft den SHA-256-Hash gegen den Lock.
|
||||
"""Lädt eine Datei herunter und prüft optional den SHA-256-Hash.
|
||||
|
||||
SHA-Gate-Logik (Pferdetausch-Schutz):
|
||||
- Beim ersten erfolgreichen Download wird der SHA in
|
||||
``wahlprogramm-shas.lock.json`` gepinnt.
|
||||
- Spätere fetches vergleichen gegen diesen gepinnten SHA. Abweichung →
|
||||
Abbruch, ausser ``accept_new_sha=True`` ist gesetzt (dann wird der Lock
|
||||
explizit aktualisiert).
|
||||
- ``expected_sha`` (z.B. aus YAML) ueberschreibt den Lock fuer diesen Call.
|
||||
SHA-Gate-Logik:
|
||||
- Existiert ``dest_path`` bereits, wird der bisherige Hash gespeichert.
|
||||
- Nach dem Download wird der neue Hash verglichen.
|
||||
- Bei Abweichung wird die temporäre Datei gelöscht und ein Fehler zurückgegeben
|
||||
(niemals stillschweigend überschreiben).
|
||||
|
||||
Args:
|
||||
url: Download-URL der PDF-Datei.
|
||||
dest_path: Ziel-Pfad (typischerweise in app/static/referenzen/).
|
||||
expected_sha: Wenn angegeben, muss der Download-Hash übereinstimmen
|
||||
(haerter als der Lock-Vergleich).
|
||||
accept_new_sha: Wenn True, wird der Lock auf den neuen SHA aktualisiert
|
||||
statt bei Abweichung abzubrechen. NICHT default — Maintainer-Override.
|
||||
expected_sha: Wenn angegeben, muss der Download-Hash übereinstimmen.
|
||||
|
||||
Returns:
|
||||
Dict mit den Schlüsseln:
|
||||
- ``ok`` (bool): True bei Erfolg.
|
||||
- ``sha256`` (str): SHA-256 der heruntergeladenen Datei.
|
||||
- ``prev_sha256`` (str|None): SHA-256 der bisherigen Datei, falls vorhanden.
|
||||
- ``locked_sha256`` (str|None): SHA aus dem Lock-File (vor diesem Call).
|
||||
- ``error`` (str|None): Fehlermeldung bei Misserfolg.
|
||||
- ``changed`` (bool): True, wenn sich die Datei geaendert hat.
|
||||
- ``lock_updated`` (bool): True, wenn der Lock-Eintrag neu/ersetzt wurde.
|
||||
- ``changed`` (bool): True, wenn sich die Datei gegenüber der bisherigen Version geändert hat.
|
||||
"""
|
||||
prev_sha: Optional[str] = None
|
||||
if dest_path.exists():
|
||||
prev_sha = sha256_of_file(dest_path)
|
||||
|
||||
lock = _load_lock()
|
||||
lock_key = _lock_key(dest_path.name)
|
||||
locked_sha = lock.get(lock_key)
|
||||
|
||||
tmp_path = dest_path.with_suffix(".tmp")
|
||||
try:
|
||||
logger.info("Lade %s → %s", url, tmp_path)
|
||||
@ -163,71 +119,39 @@ def fetch_and_verify(
|
||||
|
||||
new_sha = sha256_of_file(tmp_path)
|
||||
|
||||
# SHA-Gate gegen expected_sha (haerter, aus YAML kuratiert)
|
||||
# SHA-Gate gegen expected_sha
|
||||
if expected_sha and new_sha != expected_sha:
|
||||
tmp_path.unlink(missing_ok=True)
|
||||
return {
|
||||
"ok": False,
|
||||
"sha256": new_sha,
|
||||
"prev_sha256": prev_sha,
|
||||
"locked_sha256": locked_sha,
|
||||
"changed": False,
|
||||
"lock_updated": False,
|
||||
"error": (
|
||||
f"SHA-Pruefung gegen erwarteten Hash fehlgeschlagen: "
|
||||
f"erwartet {expected_sha[:12]}…, erhalten {new_sha[:12]}…"
|
||||
f"SHA-Prüfung fehlgeschlagen: erwartet {expected_sha[:12]}…, "
|
||||
f"erhalten {new_sha[:12]}…"
|
||||
),
|
||||
}
|
||||
|
||||
# SHA-Gate gegen Lock-File (Pferdetausch-Schutz)
|
||||
if locked_sha and new_sha != locked_sha and not accept_new_sha:
|
||||
tmp_path.unlink(missing_ok=True)
|
||||
return {
|
||||
"ok": False,
|
||||
"sha256": new_sha,
|
||||
"prev_sha256": prev_sha,
|
||||
"locked_sha256": locked_sha,
|
||||
"changed": False,
|
||||
"lock_updated": False,
|
||||
"error": (
|
||||
f"Lock-Pruefung fehlgeschlagen: gepinnt {locked_sha[:12]}…, "
|
||||
f"jetzt {new_sha[:12]}…. Pferdetausch-Verdacht — Inhalt manuell "
|
||||
f"pruefen, dann mit --accept-new-sha bestaetigen."
|
||||
),
|
||||
}
|
||||
|
||||
# SHA-Gate gegen bisherige Datei (no-op)
|
||||
# SHA-Gate gegen bisherige Datei
|
||||
if prev_sha and new_sha == prev_sha:
|
||||
tmp_path.unlink(missing_ok=True)
|
||||
lock_updated = False
|
||||
if locked_sha != new_sha:
|
||||
# Datei war schon korrekt, Lock fehlte — initialer Pin.
|
||||
lock[lock_key] = new_sha
|
||||
_save_lock(lock)
|
||||
lock_updated = True
|
||||
logger.info("Datei unveraendert (SHA %s…), kein Ueberschreiben.", new_sha[:12])
|
||||
logger.info("Datei unverändert (SHA %s…), kein Überschreiben.", new_sha[:12])
|
||||
return {
|
||||
"ok": True,
|
||||
"sha256": new_sha,
|
||||
"prev_sha256": prev_sha,
|
||||
"locked_sha256": locked_sha,
|
||||
"changed": False,
|
||||
"lock_updated": lock_updated,
|
||||
"error": None,
|
||||
}
|
||||
|
||||
tmp_path.rename(dest_path)
|
||||
# Lock aktualisieren — initialer Pin oder bewusstes Update via accept_new_sha
|
||||
lock[lock_key] = new_sha
|
||||
_save_lock(lock)
|
||||
logger.info("Gespeichert: %s (SHA %s…)", dest_path.name, new_sha[:12])
|
||||
return {
|
||||
"ok": True,
|
||||
"sha256": new_sha,
|
||||
"prev_sha256": prev_sha,
|
||||
"locked_sha256": locked_sha,
|
||||
"changed": True,
|
||||
"lock_updated": True,
|
||||
"error": None,
|
||||
}
|
||||
|
||||
@ -238,9 +162,7 @@ def fetch_and_verify(
|
||||
"ok": False,
|
||||
"sha256": "",
|
||||
"prev_sha256": prev_sha,
|
||||
"locked_sha256": locked_sha,
|
||||
"changed": False,
|
||||
"lock_updated": False,
|
||||
"error": str(exc),
|
||||
}
|
||||
|
||||
@ -303,39 +225,8 @@ def _cli() -> None:
|
||||
parser.add_argument("--url", help="URL überschreiben (statt erster Kandidat aus YAML)")
|
||||
parser.add_argument("--yes", action="store_true",
|
||||
help="Nicht interaktiv bestätigen (gefährlich)")
|
||||
parser.add_argument("--accept-new-sha", action="store_true",
|
||||
help="Bei Lock-Mismatch: neuen SHA in den Lock uebernehmen (Pferdetausch-Override)")
|
||||
parser.add_argument("--pin-existing", action="store_true",
|
||||
help="Alle bereits vorhandenen PDFs in static/referenzen/ in den Lock pinnen "
|
||||
"(einmalig nach Einfuehrung des Lock-Files)")
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.pin_existing:
|
||||
from .wahlprogramme import WAHLPROGRAMME
|
||||
|
||||
lock = _load_lock()
|
||||
added = 0
|
||||
for bl, parteien in WAHLPROGRAMME.items():
|
||||
for partei, info in parteien.items():
|
||||
dateiname = info.get("file") if isinstance(info, dict) else None
|
||||
if not dateiname:
|
||||
continue
|
||||
pdf_path = _REFERENZEN_DIR / dateiname
|
||||
if not pdf_path.exists():
|
||||
continue
|
||||
key = _lock_key(dateiname)
|
||||
if key in lock:
|
||||
continue
|
||||
lock[key] = sha256_of_file(pdf_path)
|
||||
added += 1
|
||||
print(f" pinned {bl}/{partei}: {dateiname} → {lock[key][:12]}…")
|
||||
if added:
|
||||
_save_lock(lock)
|
||||
print(f"\n{added} neue Eintraege in {_LOCK_FILE.name}.")
|
||||
else:
|
||||
print("Keine neuen Eintraege — alle vorhandenen PDFs sind bereits gepinnt.")
|
||||
sys.exit(0)
|
||||
|
||||
if args.check:
|
||||
missing = get_missing_programmes(args.bl)
|
||||
if not missing:
|
||||
@ -381,14 +272,12 @@ def _cli() -> None:
|
||||
print("Abgebrochen.")
|
||||
sys.exit(0)
|
||||
|
||||
result = fetch_and_verify(url, dest, accept_new_sha=args.accept_new_sha)
|
||||
result = fetch_and_verify(url, dest)
|
||||
if result["ok"]:
|
||||
change_note = "geaendert" if result["changed"] else "unveraendert"
|
||||
change_note = "geändert" if result["changed"] else "unverändert"
|
||||
print(f"OK ({change_note}) — SHA-256: {result['sha256'][:16]}…")
|
||||
if result["lock_updated"]:
|
||||
print(f"Lock aktualisiert in {_LOCK_FILE.name}.")
|
||||
if result["changed"]:
|
||||
print("Hinweis: Embeddings muessen neu indexiert werden (python -m app.reindex_embeddings).")
|
||||
print("Hinweis: Embeddings müssen neu indexiert werden (python -m app.reindex_embeddings).")
|
||||
else:
|
||||
print(f"FEHLER: {result['error']}")
|
||||
sys.exit(1)
|
||||
|
||||
@ -1,44 +0,0 @@
|
||||
# Dev-Compose fuer gwoe-dev.toppyr.de.
|
||||
# Auto-Deploy via Cron: docker compose -f docker-compose.dev.yml up -d --build
|
||||
# Datenbank, Wahlprogramme, Reports: separate Volumes (am Server: /opt/gwoe-antragspruefer-dev/{data,reports})
|
||||
# Mail: bewusst nicht aktiv (kein SMTP-Block)
|
||||
# Keycloak: eigener Public-Client gwoe-antragspruefer-dev
|
||||
services:
|
||||
gwoe-antragspruefer-dev:
|
||||
build: .
|
||||
container_name: gwoe-antragspruefer-dev
|
||||
restart: unless-stopped
|
||||
stop_grace_period: 15m
|
||||
environment:
|
||||
- DASHSCOPE_API_KEY=${DASHSCOPE_API_KEY}
|
||||
- KEYCLOAK_URL=https://sso.toppyr.de
|
||||
- KEYCLOAK_REALM=collaboration
|
||||
- KEYCLOAK_CLIENT_ID=${KEYCLOAK_CLIENT_ID:-gwoe-antragspruefer-dev}
|
||||
- KEYCLOAK_CLIENT_SECRET=${KEYCLOAK_CLIENT_SECRET}
|
||||
- KEYCLOAK_ADMIN_USER=${KEYCLOAK_ADMIN_USER}
|
||||
- KEYCLOAK_ADMIN_PASSWORD=${KEYCLOAK_ADMIN_PASSWORD}
|
||||
- EMBEDDING_MODEL_WRITE=${EMBEDDING_MODEL_WRITE:-text-embedding-v4}
|
||||
- EMBEDDING_MODEL_READ=${EMBEDDING_MODEL_READ:-text-embedding-v3}
|
||||
- BASE_URL=${BASE_URL:-https://gwoe-dev.toppyr.de}
|
||||
- GITEA_TOKEN=${GITEA_TOKEN}
|
||||
- GITEA_API_URL=${GITEA_API_URL:-https://repo.toppyr.de/api/v1}
|
||||
- GITEA_REPO_OWNER=${GITEA_REPO_OWNER:-tobias}
|
||||
- GITEA_REPO_NAME=${GITEA_REPO_NAME:-gwoe-antragspruefer}
|
||||
- GITEA_FEEDBACK_LABELS=${GITEA_FEEDBACK_LABELS:-feedback,dev}
|
||||
- APP_ENV=dev
|
||||
volumes:
|
||||
- ./data:/app/data
|
||||
- ./reports:/app/reports
|
||||
labels:
|
||||
- "traefik.enable=true"
|
||||
- "traefik.http.routers.gwoe-dev.rule=Host(`gwoe-dev.toppyr.de`)"
|
||||
- "traefik.http.routers.gwoe-dev.entrypoints=websecure"
|
||||
- "traefik.http.routers.gwoe-dev.tls=true"
|
||||
- "traefik.http.routers.gwoe-dev.tls.certresolver=letsencrypt"
|
||||
- "traefik.http.services.gwoe-dev.loadbalancer.server.port=8000"
|
||||
networks:
|
||||
- collaboration_collaboration
|
||||
|
||||
networks:
|
||||
collaboration_collaboration:
|
||||
external: true
|
||||
@ -1,120 +0,0 @@
|
||||
# 0007 — Test-Taxonomie (Unit / Integration / E2E / Property / Smoke)
|
||||
|
||||
| | |
|
||||
|---|---|
|
||||
| **Status** | accepted |
|
||||
| **Datum** | 2026-04-28 |
|
||||
| **Refs** | #50 (Umbrella E2E Functional Acceptance), #51-54 (Sub-A-D), #134 (Phase 3 Audit), ADR 0003 |
|
||||
|
||||
## Kontext
|
||||
|
||||
Die Test-Suite ist organisch ueber drei Epochen gewachsen:
|
||||
|
||||
1. **Original Unit-Suite** (#46, #91) — `tests/conftest.py` stubbt
|
||||
`fitz`/`bs4`/`openai`/`pydantic_settings`. Lokal in Sekunden lauffaehig,
|
||||
keine externen Calls, keine Live-Daten.
|
||||
2. **E2E Functional Acceptance** (#50 Umbrella) — `tests/integration/`
|
||||
mit eigenem `conftest.py`, das die Stubs *nicht* setzt. HTTP gegen
|
||||
echte Landtags-Portale, PDF-Parsing mit echtem `fitz`, DB-Lookups
|
||||
gegen `embeddings.db`. Marker `integration`.
|
||||
3. **Playwright UI-Tests** (#120) — `tests/e2e/test_ui.py`, headless
|
||||
Chromium gegen die laufende App. Marker `e2e`.
|
||||
|
||||
Mit dem Backfill aus #134 kamen zusaetzlich:
|
||||
|
||||
- **Property-/Substring-Tests** (ADR 0003) fuer LLM-Zitate gegen PDF-Seiten
|
||||
- **Smoke-Tests** (`test_endpoints_smoke.py`) — Endpoints nur auf
|
||||
Antwortcode + Format pruefen, kein Geschaeftslogik-Detail
|
||||
|
||||
Ohne klare Taxonomie weiss niemand, wo ein neuer Test hingehoert. Folge:
|
||||
ad-hoc Tests werden in `tests/` abgelegt, Marker werden vergessen, und
|
||||
beim CI-Lauf brennen langsame Tests die schnellen mit ab.
|
||||
|
||||
## Optionen
|
||||
|
||||
### Option A — Flacher Test-Ordner ohne formale Kategorien
|
||||
|
||||
Status quo bis zu #50: alle Tests unter `tests/`, Marker frei waehlbar.
|
||||
**Vorteile:** keine Migrationskosten, niedrige kognitive Last.
|
||||
**Nachteile:** Kategorien implizit, Stub-Setup kollidiert mit echten
|
||||
Imports, Lauf-Dauer schwankt unvorhersehbar.
|
||||
|
||||
### Option B — Drei harte Verzeichnisse (`tests/unit/`, `tests/integration/`, `tests/e2e/`)
|
||||
|
||||
Strenge raeumliche Trennung mit jeweils eigenem `conftest.py`.
|
||||
**Vorteile:** Stub-Konflikte ausgeschlossen, einfaches `pytest tests/unit/`.
|
||||
**Nachteile:** grosse Migration; viele bestehende Test-Files muessten
|
||||
in `unit/` umziehen; verschachtelte Pfade werden vom Test-Runner und
|
||||
von Reports etwas sperriger.
|
||||
|
||||
### Option C — Flacher Ordner + verbindliche Marker (gewaehlt)
|
||||
|
||||
`tests/` flach, aber **jede** Datei traegt einen klaren Kategorie-Marker
|
||||
(`integration`, `e2e`, `slow`) oder ist Default-Unit. Neue
|
||||
Sub-Verzeichnisse nur wenn sie strukturell notwendig sind (z.B.
|
||||
`tests/integration/` weil dort ein anderer `conftest.py` lebt — keine
|
||||
Stubs).
|
||||
|
||||
**Vorteile:** wenig Migrationsschmerz, Default-Run laeuft schnell, aber
|
||||
opt-in zu langsamen Suiten ist explizit (`pytest -m integration`).
|
||||
**Nachteile:** Disziplin noetig, Marker mssen gepflegt werden.
|
||||
|
||||
## Entscheidung
|
||||
|
||||
**Option C** mit folgender expliziter Taxonomie:
|
||||
|
||||
| Typ | Marker | Verzeichnis | Latenz | Was ist erlaubt |
|
||||
|---|---|---|---|---|
|
||||
| **Unit** | (none, default) | `tests/*.py` | < 100 ms / Test | Reines Python, alle externen Dependencies gestubbed in `tests/conftest.py`. Domain-Logik, Validatoren, Pure Functions, Datenstrukturen. |
|
||||
| **Smoke** | (none, default) | `tests/test_*_smoke.py` | < 200 ms / Test | TestClient gegen `app.main`, nur Status-Code + Pflicht-Felder pruefen. Skipped wenn `app.main` nicht importierbar. |
|
||||
| **Property** | (none, default) | `tests/test_citations_substring.py` u.a. | < 500 ms / Test | Invarianten-Checks gegen Fixture-Corpus. Substrings, Strukturmuster. PDF-Parsing erlaubt, aber nur gegen Fixtures im Repo. |
|
||||
| **Integration** | `integration` | `tests/integration/` | < 5 s / Test, gesamt < 5 min | Echtes HTTP gegen Landtags-Portale, echtes `fitz` gegen reale PDFs, DB-Lookups gegen `embeddings.db`. Eigenes `conftest.py` ohne Stubs. Opt-in via `pytest -m integration`. |
|
||||
| **E2E** | `e2e` | `tests/e2e/` | < 30 s / Test | Headless-Chromium gegen lokal laufende App oder Prod-URL. Tests koennen flaky sein — werden NICHT von Default-Run getriggert. |
|
||||
| **Slow** | `slow` | (queruerend) | beliebig | Marker-Suffix zu jedem Typ. Ausschliessbar via `pytest -m "not slow"`. Beispiel: ein Integration-Test, der pro BL einen Wahlprogramm-PDF herunterlaedt. |
|
||||
|
||||
**Lauf-Konvention** (verbindlich, in `pytest.ini` definiert):
|
||||
|
||||
```bash
|
||||
pytest # Default — Unit + Smoke + Property, ~1s
|
||||
pytest -m integration # nur E2E-Functional-Acceptance, ~5 min
|
||||
pytest -m "integration and not slow" # E2E ohne PDF-Downloads
|
||||
pytest -m e2e # nur Playwright-UI-Tests
|
||||
pytest -m "" tests/ # ALLES (auch lokal selten gebraucht)
|
||||
```
|
||||
|
||||
**Naming-Konvention:**
|
||||
- `test_<modul>.py` — Unit-Tests fuer ein Modul
|
||||
- `test_<feature>_smoke.py` — Smoke-Tests
|
||||
- `test_<feature>_substring.py` / `_substring_*` — Property-Tests
|
||||
- Integration- und E2E-Tests heissen wie das Feature, das sie testen
|
||||
(z.B. `test_adapters_live.py`, `test_ui.py`).
|
||||
|
||||
## Konsequenzen
|
||||
|
||||
### Positiv
|
||||
|
||||
- Default-Run bleibt schnell (< 2s) — niemand wartet bei jedem Save.
|
||||
- Klar, wo neue Tests landen: jeder neue Test im Default-Ordner ist
|
||||
ein **Unit-Test** mit Stubs; alles, was Live-HTTP/PDF/LLM braucht,
|
||||
geht zwingend nach `tests/integration/`.
|
||||
- CI kann Default-Suite als Pre-Commit-Gate nutzen, Integration-Suite
|
||||
nightly oder pre-deploy.
|
||||
|
||||
### Negativ
|
||||
|
||||
- Disziplin noetig: Marker vergessen → langsame Tests im Default-Run
|
||||
oder unbemerkte Lueckentest. Code-Review muss darauf achten.
|
||||
- Smoke-Tests sind technisch keine Unit-Tests (importieren `app.main`),
|
||||
aber wir behandeln sie wegen geringer Latenz als Default. Ausnahme
|
||||
bewusst akzeptiert.
|
||||
|
||||
### Folgen fuer andere ADRs
|
||||
|
||||
- **ADR 0003** (Sub-D Citation-Property-Tests) bleibt gueltig; Property-Tests
|
||||
sind hier explizit als eigene Kategorie verortet.
|
||||
- Folge-Issue: Coverage-Baseline (`.coveragerc` mit `fail_under` pro
|
||||
Modul) — nicht im Skopus dieses ADRs, sondern eigenstaendig in
|
||||
Phase 3 von #134.
|
||||
- Folge-Arbeit: einzelne bestehende Test-Files umtaggen, falls sie
|
||||
faktisch Integration sind aber als Unit liefen (Audit ergab: keine
|
||||
bekannten Faelle, alle Live-Calls liegen in `tests/integration/`).
|
||||
@ -1,127 +0,0 @@
|
||||
# 0009 — Plenarprotokoll-Parser-Registry pro Bundesland
|
||||
|
||||
| | |
|
||||
|---|---|
|
||||
| **Status** | accepted |
|
||||
| **Datum** | 2026-04-28 |
|
||||
| **Refs** | #106, #126, ADR 0002 (Adapter-Pattern) |
|
||||
|
||||
## Kontext
|
||||
|
||||
Der NRW-Plenarprotokoll-Parser (#106) ist deterministisch, anchor-basiert
|
||||
und erreicht 19/19 auf der MMP18-119-Fixture. Damit war die Architektur-Frage
|
||||
gelöst — aber nur fuer NRW. Andere Bundeslaender publizieren ihre
|
||||
Plenarprotokolle in fundamental anderen Formaten:
|
||||
|
||||
- Hessen: HTML mit semantischen Tags pro Beschluss
|
||||
- Brandenburg: PDF mit Tabellen-Layout fuer Vote-Counts
|
||||
- Mecklenburg-Vorpommern: ParLDok-XML-Export
|
||||
- Berlin: PDF mit eigenem Formularkasten-Schema
|
||||
- ...
|
||||
|
||||
Ein einziger Parser fuer alle BL ist nicht baubar. Die Reverse-Engineering-
|
||||
Arbeit pro Landtag ist substantiell und passiert phasenweise: zuerst NRW
|
||||
wegen der hohen Antragsdichte, danach BL fuer BL nach Bedarf.
|
||||
|
||||
Das Adapter-Pattern aus ADR 0002 (`ParlamentAdapter`) hat dieses Problem
|
||||
fuer die Antrags-Suche bereits geloest. Plenarprotokoll-Parser ist die
|
||||
naechste Familie mit derselben Form: pro BL eine eigene Implementierung,
|
||||
ein gemeinsamer Aufruf-Vertrag, ein Registry-Lookup.
|
||||
|
||||
## Optionen
|
||||
|
||||
### Option A — Eine grosse Datei mit If-Else-Dispatch
|
||||
|
||||
Eine einzige `app/protokoll_parser.py`-Datei mit einem `parse_protocol(bl, pdf)`,
|
||||
das je nach BL andere Funktionen ruft. **Vorteile:** flach, einfach.
|
||||
**Nachteile:** waechst zur 2000-LOC-Datei, BL-spezifische Reverse-Engineering-
|
||||
Notizen und Helper-Functions vermischen sich, schlechte Test-Isolation.
|
||||
|
||||
### Option B — OOP-Hierarchie mit `ProtokollParserBase` als ABC
|
||||
|
||||
Abstrakte Basisklasse mit `parse(pdf_path) -> list[VoteResult]`,
|
||||
konkrete Subklassen pro BL. **Vorteile:** typisierter Vertrag.
|
||||
**Nachteile:** Boilerplate fuer Klassen-Definitionen ohne Mehrwert,
|
||||
weil der NRW-Parser keine Instanz-State hat (alles `def`-Funktionen,
|
||||
keine `self.x`).
|
||||
|
||||
### Option C — Sub-Package mit Funktions-Registry (gewaehlt)
|
||||
|
||||
`app/protokoll_parsers/` als Sub-Package, pro BL eine eigene Datei
|
||||
(`nrw.py`, `mv.py`, `he.py`, ...) die mindestens
|
||||
`parse_protocol(pdf_path: str) -> list[dict]` exportiert. Eine
|
||||
`PROTOKOLL_PARSERS`-Dict in `__init__.py` mappt BL-Code → Funktion.
|
||||
Das BL-uebergreifende `parse_protocol(bl, pdf_path)` macht den Lookup.
|
||||
|
||||
**Vorteile:**
|
||||
- Konsistent mit dem `ADAPTERS`-Dict in `parlamente.py` (ADR 0002)
|
||||
- BL-Code lebt in eigener Datei mit eigenen Helpern und Notizen
|
||||
- Neue BL = neue Datei + ein Eintrag in `__init__.py`, kein Refactoring
|
||||
- Tests pro BL in eigener Test-Datei (`tests/test_protokoll_parsers_<bl>.py`)
|
||||
- Parser-Funktionen bleiben simpel, kein OOP-Overhead
|
||||
|
||||
**Nachteile:**
|
||||
- Vertrag ist nur per Convention dokumentiert (nicht via Type-System
|
||||
erzwingbar) — dafuer ein Schema-Test in `test_protokoll_parsers.py`
|
||||
als Sicherheitsnetz.
|
||||
|
||||
## Entscheidung
|
||||
|
||||
**Option C.** Konkret:
|
||||
|
||||
```
|
||||
app/protokoll_parsers/
|
||||
├── __init__.py # Registry + parse_protocol(bl, pdf) + supported_bundeslaender()
|
||||
├── nrw.py # NRW v5 (vorher app/protokoll_parser_nrw.py)
|
||||
└── <bl>.py # je BL eine Datei, sobald implementiert
|
||||
```
|
||||
|
||||
**Vertrag fuer jeden Parser** (verbindlich):
|
||||
|
||||
```python
|
||||
def parse_protocol(pdf_path: str) -> list[dict]:
|
||||
"""Returns: [
|
||||
{
|
||||
"drucksache": str | None,
|
||||
"ergebnis": str, # angenommen/abgelehnt/ueberwiesen/...
|
||||
"einstimmig": bool,
|
||||
"kind": str, # parser-intern, fuer Debug
|
||||
"votes": {
|
||||
"ja": list[str], # Fraktions-Codes (CDU, SPD, GRUENE, ...)
|
||||
"nein": list[str],
|
||||
"enthaltung": list[str],
|
||||
},
|
||||
},
|
||||
...
|
||||
]"""
|
||||
```
|
||||
|
||||
**Naming:** Datei-Stem = lowercase BL-Code (`nrw.py`, `mv.py`, ...).
|
||||
Registry-Key = uppercase BL-Code (`"NRW"`, `"MV"`).
|
||||
|
||||
**Konsumenten** rufen `parse_protocol(bundesland, pdf_path)` aus dem
|
||||
Sub-Package, nicht direkt eine BL-Datei.
|
||||
|
||||
## Konsequenzen
|
||||
|
||||
### Positiv
|
||||
|
||||
- Folge-BL-Implementierungen ohne Refactoring der Bestands-Logik.
|
||||
- Reverse-Engineering-Notizen leben pro BL in einer Datei statt verteilt
|
||||
ueber eine Mega-Datei.
|
||||
- Der `supported_bundeslaender()`-Helper macht in CLI und UI sofort
|
||||
sichtbar, wo Daten verfuegbar sind und wo nicht.
|
||||
- Neue Adapter-Test-Files folgen demselben Schema (`test_protokoll_parsers_<bl>.py`).
|
||||
|
||||
### Negativ
|
||||
|
||||
- Schema-Vertrag nur per Convention (kein TypedDict). Dafuer ein
|
||||
Smoke-Test in `tests/test_protokoll_parsers.py`, der pro registriertem
|
||||
Parser die Result-Keys pruefen wird, sobald >1 Implementation existiert.
|
||||
|
||||
### Folgen fuer andere ADRs
|
||||
|
||||
- ADR 0002 (Adapter-Pattern) bleibt gueltig; dieses ADR ueberbruckt es
|
||||
nicht, sondern wendet das gleiche Muster auf eine zweite Adapter-Familie an.
|
||||
- Folge-Issues (HE/BB/MV/BE/...) sind reine Implementation-Tickets ohne
|
||||
Architektur-Diskussion — der Vertrag ist hier festgelegt.
|
||||
@ -23,9 +23,7 @@ und Konsequenzen. Format inspiriert von [Michael Nygard](https://cognitect.com/b
|
||||
| [0004](0004-deployment-workflow.md) | Docker Compose Deploy mit DB-/Reports-Volume und SN-XML-Sonderpfad | accepted | 2026-04-10 |
|
||||
| [0005](0005-keycloak-sso-with-dev-bypass.md) | Keycloak SSO mit Dev-Bypass-Fallback | accepted | 2026-04-10 |
|
||||
| [0006](0006-embedding-model-migration-v3-to-v4.md) | Embedding-Modell-Migration text-embedding-v3 → v4 | accepted | 2026-04-11 |
|
||||
| [0007](0007-test-taxonomy.md) | Test-Taxonomie (Unit / Integration / E2E / Property / Smoke) | accepted | 2026-04-28 |
|
||||
| [0008](0008-ddd-lightweight-migration.md) | DDD-Lightweight-Migration (Repository, LLM-Port, Domain-Verhalten) | accepted | 2026-04-20 |
|
||||
| [0009](0009-protokoll-parser-registry.md) | Plenarprotokoll-Parser-Registry pro Bundesland | accepted | 2026-04-28 |
|
||||
|
||||
## Wann ADR, wann nicht
|
||||
|
||||
|
||||
@ -11,4 +11,3 @@
|
||||
|
||||
pytest>=8.0.0
|
||||
pytest-asyncio>=0.24.0
|
||||
pytest-cov>=5.0.0
|
||||
|
||||
@ -24,28 +24,6 @@ fi
|
||||
|
||||
cd "$PROJECT_DIR"
|
||||
|
||||
# Branch-Guard: Prod (gwoe.toppyr.de) ist auf release/1.0 festgelegt.
|
||||
# 1.x-Entwicklung laeuft auf gwoe-dev.toppyr.de via Cron-Auto-Deploy aus main.
|
||||
CURRENT_BRANCH=$(git rev-parse --abbrev-ref HEAD 2>/dev/null || echo "")
|
||||
EXPECTED_BRANCH="release/1.0"
|
||||
if [ "${1:-}" = "--force" ]; then
|
||||
shift
|
||||
echo "⚠ --force aktiv: Branch-Guard übersprungen ($CURRENT_BRANCH)"
|
||||
elif [ "$CURRENT_BRANCH" != "$EXPECTED_BRANCH" ]; then
|
||||
cat <<EOF
|
||||
✗ Prod-Deploy abgebrochen: lokal aktiv ist '$CURRENT_BRANCH', erwartet '$EXPECTED_BRANCH'.
|
||||
|
||||
Prod (gwoe.toppyr.de) ist auf release/1.0 festgelegt. Vor einem Deploy:
|
||||
git checkout release/1.0
|
||||
|
||||
Fuer Dev (gwoe-dev.toppyr.de) braucht es kein deploy.sh — der Server zieht
|
||||
main per Cron alle 5 Minuten.
|
||||
|
||||
Mit --force kann der Guard ueberbruckt werden (nur in Notfaellen).
|
||||
EOF
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "=== GWÖ-Antragsprüfer Deploy ==="
|
||||
|
||||
# 1. Uptime Kuma auf Wartung setzen
|
||||
|
||||
@ -88,21 +88,6 @@ class TestExtractDrucksache:
|
||||
html = "Seite 3/12 — nicht relevant"
|
||||
assert extract_drucksache_from_intro(html) is None
|
||||
|
||||
def test_rp_pattern_nr_wp_swap(self):
|
||||
"""RP-URL '/538-18.pdf' → drucksache-Format 'wp/nr' = '18/538'.
|
||||
Wir vermeiden im HTML jegliche 'wp/nr'-Notation, sonst greift der
|
||||
generische 'Drucksache (\\d+)/(\\d+)'-Match zuerst."""
|
||||
from app.abgeordnetenwatch import extract_drucksache_from_intro
|
||||
html = '<a href="https://landtag.rlp.de/dokumente/538-18.pdf">Antrag</a>'
|
||||
result = extract_drucksache_from_intro(html)
|
||||
assert result == "18/538"
|
||||
|
||||
def test_sn_pattern_dok_nr_leg_per_swap(self):
|
||||
"""SN-URL 'dok_nr=2150&...&leg_per=8' → '8/2150'."""
|
||||
from app.abgeordnetenwatch import extract_drucksache_from_intro
|
||||
html = '<a href="/cgi-bin/foo?dok_nr=2150&extra=x&leg_per=8">DS</a>'
|
||||
assert extract_drucksache_from_intro(html) == "8/2150"
|
||||
|
||||
def test_two_digit_wp_number(self):
|
||||
from app.abgeordnetenwatch import extract_drucksache_from_intro
|
||||
html = "Bezug: 19/12345"
|
||||
|
||||
@ -93,81 +93,3 @@ class TestDelete:
|
||||
def test_delete_by_id_missing_returns_false(self):
|
||||
repo = InMemoryAbonnementRepository()
|
||||
assert _run(repo.delete_by_id(999)) is False
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# SqliteAbonnementRepository — Delegation an database.* (#134 Coverage-Backfill)
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestSqliteAbonnementRepositoryDelegation:
|
||||
"""Die Sqlite-Variante ist nur ein duenner Wrapper um Module-Funktionen
|
||||
in app.database. Test prueft dass jede Methode korrekt delegiert,
|
||||
ohne echte DB-Calls (Module-Funktionen werden gemockt)."""
|
||||
|
||||
def test_create_delegates(self):
|
||||
from unittest.mock import AsyncMock, patch
|
||||
from app.repositories.abonnement_repository import SqliteAbonnementRepository
|
||||
repo = SqliteAbonnementRepository()
|
||||
with patch("app.repositories.abonnement_repository.database.create_subscription",
|
||||
new=AsyncMock(return_value=42)) as m:
|
||||
result = _run(repo.create("u1", "a@b.de", "NRW", "CDU", "weekly"))
|
||||
assert result == 42
|
||||
m.assert_called_once_with("u1", "a@b.de", "NRW", "CDU", "weekly")
|
||||
|
||||
def test_list_by_user_delegates(self):
|
||||
from unittest.mock import AsyncMock, patch
|
||||
from app.repositories.abonnement_repository import SqliteAbonnementRepository
|
||||
repo = SqliteAbonnementRepository()
|
||||
fake = [{"id": 1, "email": "x@y"}]
|
||||
with patch("app.repositories.abonnement_repository.database.list_subscriptions",
|
||||
new=AsyncMock(return_value=fake)) as m:
|
||||
assert _run(repo.list_by_user("u1")) == fake
|
||||
m.assert_called_once_with("u1")
|
||||
|
||||
def test_list_all_delegates(self):
|
||||
from unittest.mock import AsyncMock, patch
|
||||
from app.repositories.abonnement_repository import SqliteAbonnementRepository
|
||||
with patch("app.repositories.abonnement_repository.database.list_all_subscriptions",
|
||||
new=AsyncMock(return_value=[])) as m:
|
||||
assert _run(SqliteAbonnementRepository().list_all()) == []
|
||||
m.assert_called_once_with()
|
||||
|
||||
def test_list_due_delegates(self):
|
||||
from unittest.mock import AsyncMock, patch
|
||||
from app.repositories.abonnement_repository import SqliteAbonnementRepository
|
||||
with patch("app.repositories.abonnement_repository.database.get_all_subscriptions_due",
|
||||
new=AsyncMock(return_value=[])) as m:
|
||||
_run(SqliteAbonnementRepository().list_due("weekly"))
|
||||
m.assert_called_once_with("weekly")
|
||||
|
||||
def test_delete_delegates(self):
|
||||
from unittest.mock import AsyncMock, patch
|
||||
from app.repositories.abonnement_repository import SqliteAbonnementRepository
|
||||
with patch("app.repositories.abonnement_repository.database.delete_subscription",
|
||||
new=AsyncMock(return_value=True)) as m:
|
||||
_run(SqliteAbonnementRepository().delete("u1", 5))
|
||||
m.assert_called_once_with("u1", 5)
|
||||
|
||||
def test_delete_by_id_delegates(self):
|
||||
from unittest.mock import AsyncMock, patch
|
||||
from app.repositories.abonnement_repository import SqliteAbonnementRepository
|
||||
with patch("app.repositories.abonnement_repository.database.delete_subscription_by_id",
|
||||
new=AsyncMock(return_value=False)) as m:
|
||||
_run(SqliteAbonnementRepository().delete_by_id(99))
|
||||
m.assert_called_once_with(99)
|
||||
|
||||
def test_mark_sent_delegates(self):
|
||||
from unittest.mock import AsyncMock, patch
|
||||
from app.repositories.abonnement_repository import SqliteAbonnementRepository
|
||||
with patch("app.repositories.abonnement_repository.database.mark_subscription_sent",
|
||||
new=AsyncMock(return_value=None)) as m:
|
||||
_run(SqliteAbonnementRepository().mark_sent(7))
|
||||
m.assert_called_once_with(7)
|
||||
|
||||
|
||||
def test_get_abonnement_repository_returns_singleton():
|
||||
from app.repositories.abonnement_repository import get_abonnement_repository
|
||||
a = get_abonnement_repository()
|
||||
b = get_abonnement_repository()
|
||||
assert a is b
|
||||
|
||||
@ -60,82 +60,3 @@ class TestMarkdownStripping:
|
||||
wrapped = f"{SAMPLE_JSON}\n```"
|
||||
cleaned = _strip_markdown_fences(wrapped)
|
||||
assert json.loads(cleaned)["gwoeScore"] == 7.0
|
||||
|
||||
|
||||
# ─── Coverage-Backfill (#134) ────────────────────────────────────────────────
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
class TestContentFingerprint:
|
||||
"""analyzer._content_fingerprint mirrored Helper, getrennt von qwen_bewerter."""
|
||||
|
||||
def test_empty_returns_len_zero(self):
|
||||
from app.analyzer import _content_fingerprint
|
||||
assert _content_fingerprint("") == "len=0"
|
||||
|
||||
def test_non_empty_includes_sha1(self):
|
||||
from app.analyzer import _content_fingerprint
|
||||
result = _content_fingerprint("hallo welt")
|
||||
assert result.startswith("len=10 sha1=")
|
||||
|
||||
|
||||
class TestGetDefaultBewerter:
|
||||
def test_returns_qwen_instance(self, monkeypatch):
|
||||
"""Lazy-Import: get_default_bewerter() ruft QwenBewerter()."""
|
||||
from app import analyzer
|
||||
from unittest.mock import MagicMock
|
||||
# Stub QwenBewerter im Adapter-Pfad, sodass kein echter Import passiert
|
||||
import sys
|
||||
fake_module = type(sys)("app.adapters.qwen_bewerter")
|
||||
fake_module.QwenBewerter = MagicMock(return_value="fake-bewerter")
|
||||
monkeypatch.setitem(sys.modules, "app.adapters.qwen_bewerter", fake_module)
|
||||
|
||||
result = analyzer.get_default_bewerter()
|
||||
assert result == "fake-bewerter"
|
||||
|
||||
|
||||
class TestLoadContextFile:
|
||||
def test_returns_text_when_file_exists(self, tmp_path, monkeypatch):
|
||||
from app import analyzer
|
||||
target = tmp_path / "test.txt"
|
||||
target.write_text("Hallo Welt")
|
||||
monkeypatch.setattr(analyzer, "KONTEXT_DIR", tmp_path)
|
||||
assert analyzer.load_context_file("test.txt") == "Hallo Welt"
|
||||
|
||||
def test_returns_empty_when_file_missing(self, tmp_path, monkeypatch):
|
||||
from app import analyzer
|
||||
monkeypatch.setattr(analyzer, "KONTEXT_DIR", tmp_path)
|
||||
assert analyzer.load_context_file("missing.txt") == ""
|
||||
|
||||
|
||||
class TestGetUserPromptTemplate:
|
||||
def test_returns_template_with_placeholders(self):
|
||||
from app.analyzer import get_user_prompt_template
|
||||
t = get_user_prompt_template()
|
||||
# Alle vier Platzhalter muessen drinstehen
|
||||
for ph in ("{bundesland_context}", "{quotes_context}",
|
||||
"{text}", "{pflicht_fraktionen}"):
|
||||
assert ph in t
|
||||
|
||||
|
||||
class TestGetBundeslandContext:
|
||||
def test_unknown_bundesland_raises(self):
|
||||
from app.analyzer import get_bundesland_context
|
||||
with pytest.raises(ValueError, match="Unbekanntes Bundesland"):
|
||||
get_bundesland_context("XX")
|
||||
|
||||
def test_inactive_bundesland_raises(self, monkeypatch):
|
||||
from app import analyzer
|
||||
from app.bundeslaender import BUNDESLAENDER, Bundesland
|
||||
# Erstellen einer inaktiven BL-Instanz
|
||||
if "NRW" not in BUNDESLAENDER:
|
||||
pytest.skip("NRW nicht in BUNDESLAENDER")
|
||||
original = BUNDESLAENDER["NRW"]
|
||||
# Replace with inactive copy
|
||||
inactive = Bundesland(
|
||||
**{**original.__dict__, "aktiv": False}
|
||||
)
|
||||
monkeypatch.setitem(BUNDESLAENDER, "NRW", inactive)
|
||||
with pytest.raises(ValueError, match="nicht aktiv"):
|
||||
analyzer.get_bundesland_context("NRW")
|
||||
|
||||
@ -156,55 +156,3 @@ class TestInitialSeed:
|
||||
seed = [_make_assessment("18/1"), _make_assessment("18/2")]
|
||||
repo = InMemoryAntragRepository(initial=seed)
|
||||
assert len(_run(repo.list())) == 2
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# SqliteAntragRepository — Delegation an database.* (#134 Coverage-Backfill)
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestSqliteAntragRepositoryDelegation:
|
||||
def test_save_delegates(self):
|
||||
from unittest.mock import AsyncMock, patch
|
||||
from app.repositories.antrag_repository import SqliteAntragRepository
|
||||
with patch("app.repositories.antrag_repository.database.upsert_assessment",
|
||||
new=AsyncMock(return_value=True)) as m:
|
||||
assert _run(SqliteAntragRepository().save({"drucksache": "x"})) is True
|
||||
m.assert_called_once_with({"drucksache": "x"})
|
||||
|
||||
def test_get_delegates(self):
|
||||
from unittest.mock import AsyncMock, patch
|
||||
from app.repositories.antrag_repository import SqliteAntragRepository
|
||||
with patch("app.repositories.antrag_repository.database.get_assessment",
|
||||
new=AsyncMock(return_value={"x": 1})) as m:
|
||||
assert _run(SqliteAntragRepository().get("18/1")) == {"x": 1}
|
||||
m.assert_called_once_with("18/1")
|
||||
|
||||
def test_list_delegates(self):
|
||||
from unittest.mock import AsyncMock, patch
|
||||
from app.repositories.antrag_repository import SqliteAntragRepository
|
||||
with patch("app.repositories.antrag_repository.database.get_all_assessments",
|
||||
new=AsyncMock(return_value=[])) as m:
|
||||
_run(SqliteAntragRepository().list("NRW"))
|
||||
m.assert_called_once_with("NRW")
|
||||
|
||||
def test_search_delegates(self):
|
||||
from unittest.mock import AsyncMock, patch
|
||||
from app.repositories.antrag_repository import SqliteAntragRepository
|
||||
with patch("app.repositories.antrag_repository.database.search_assessments",
|
||||
new=AsyncMock(return_value=[])) as m:
|
||||
_run(SqliteAntragRepository().search("klima", "NRW", 25))
|
||||
m.assert_called_once_with("klima", "NRW", 25)
|
||||
|
||||
def test_delete_delegates(self):
|
||||
from unittest.mock import AsyncMock, patch
|
||||
from app.repositories.antrag_repository import SqliteAntragRepository
|
||||
with patch("app.repositories.antrag_repository.database.delete_assessment",
|
||||
new=AsyncMock(return_value=True)) as m:
|
||||
_run(SqliteAntragRepository().delete("18/1"))
|
||||
m.assert_called_once_with("18/1")
|
||||
|
||||
|
||||
def test_get_antrag_repository_returns_singleton():
|
||||
from app.repositories.antrag_repository import get_antrag_repository
|
||||
assert get_antrag_repository() is get_antrag_repository()
|
||||
|
||||
@ -225,90 +225,3 @@ class TestExportLongFormat:
|
||||
# Generic FREIE WÄHLER darf in der Zeile NICHT auftauchen
|
||||
bb_lines = [l for l in csv_text.splitlines() if "BB" in l and "8/2," in l]
|
||||
assert any("BVB-FW" in l for l in bb_lines)
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# Edge-Cases (#134 Coverage-Backfill)
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestLoadAssessmentsRobustness:
|
||||
"""_load_assessments toleriert kaputte JSON-Eintraege im fraktionen-Feld."""
|
||||
|
||||
def test_invalid_json_in_fraktionen_falls_back_to_empty(self, tmp_path):
|
||||
from app.auswertungen import _load_assessments
|
||||
db = tmp_path / "broken.db"
|
||||
conn = sqlite3.connect(str(db))
|
||||
conn.execute("""
|
||||
CREATE TABLE assessments (
|
||||
drucksache TEXT PRIMARY KEY, title TEXT,
|
||||
fraktionen TEXT, datum TEXT, bundesland TEXT,
|
||||
gwoe_score REAL, link TEXT, gwoe_begruendung TEXT,
|
||||
gwoe_matrix TEXT, gwoe_schwerpunkt TEXT,
|
||||
wahlprogramm_scores TEXT, verbesserungen TEXT,
|
||||
staerken TEXT, schwaechen TEXT, empfehlung TEXT,
|
||||
empfehlung_symbol TEXT, verbesserungspotenzial TEXT,
|
||||
themen TEXT, antrag_zusammenfassung TEXT,
|
||||
antrag_kernpunkte TEXT, source TEXT, model TEXT,
|
||||
created_at TEXT, updated_at TEXT
|
||||
)
|
||||
""")
|
||||
# fraktionen-Feld enthaelt kein gueltiges JSON
|
||||
conn.execute(
|
||||
"INSERT INTO assessments (drucksache, bundesland, datum, fraktionen, gwoe_score) "
|
||||
"VALUES (?, ?, ?, ?, ?)",
|
||||
("18/777", "NRW", "2024-01-01", "{not json", 5.0),
|
||||
)
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
rows = _load_assessments(db)
|
||||
assert len(rows) == 1
|
||||
assert rows[0]["fraktionen"] == [] # Fallback
|
||||
|
||||
|
||||
class TestAggregateMatrixSkipsBlanks:
|
||||
def test_skips_assessments_without_bundesland(self, tmp_path):
|
||||
"""Anträge ohne bundesland werden ignoriert (continue-Branch line 115)."""
|
||||
from app.auswertungen import aggregate_matrix
|
||||
db = tmp_path / "blanks.db"
|
||||
conn = sqlite3.connect(str(db))
|
||||
conn.execute("""
|
||||
CREATE TABLE assessments (
|
||||
drucksache TEXT PRIMARY KEY, title TEXT,
|
||||
fraktionen TEXT, datum TEXT, bundesland TEXT,
|
||||
gwoe_score REAL, link TEXT, gwoe_begruendung TEXT,
|
||||
gwoe_matrix TEXT, gwoe_schwerpunkt TEXT,
|
||||
wahlprogramm_scores TEXT, verbesserungen TEXT,
|
||||
staerken TEXT, schwaechen TEXT, empfehlung TEXT,
|
||||
empfehlung_symbol TEXT, verbesserungspotenzial TEXT,
|
||||
themen TEXT, antrag_zusammenfassung TEXT,
|
||||
antrag_kernpunkte TEXT, source TEXT, model TEXT,
|
||||
created_at TEXT, updated_at TEXT
|
||||
)
|
||||
""")
|
||||
conn.execute(
|
||||
"INSERT INTO assessments (drucksache, bundesland, datum, fraktionen, gwoe_score) "
|
||||
"VALUES (?, ?, ?, ?, ?)",
|
||||
("X/1", None, "2024-01-01", '["CDU"]', 7.0), # bundesland NULL
|
||||
)
|
||||
conn.execute(
|
||||
"INSERT INTO assessments (drucksache, bundesland, datum, fraktionen, gwoe_score) "
|
||||
"VALUES (?, ?, ?, ?, ?)",
|
||||
("18/1", "NRW", "2024-01-01", '["CDU"]', 7.0),
|
||||
)
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
m = aggregate_matrix(db_path=db)
|
||||
assert m["total"] == 1 # nur der NRW-Eintrag
|
||||
assert m["bundeslaender"] == ["NRW"]
|
||||
|
||||
|
||||
class TestGetWahlperioden:
|
||||
def test_returns_sorted_list(self, sample_db):
|
||||
from app.auswertungen import get_wahlperioden
|
||||
wps = get_wahlperioden(db_path=sample_db)
|
||||
assert wps == sorted(wps)
|
||||
# Sample-DB enthaelt NRW-WP18, MV-WP8, MV-WP7 sowie BB-WP8
|
||||
assert any("NRW" in w for w in wps)
|
||||
|
||||
@ -212,257 +212,3 @@ class TestPickBestTitle:
|
||||
|
||||
def test_empty_doc_title_uses_llm(self):
|
||||
assert _pick_best_title("Guter LLM-Titel", "", "18/123") == "Guter LLM-Titel"
|
||||
|
||||
|
||||
# ─── Coverage-Backfill (#134) ────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestKeycloakUrls:
|
||||
def test_issuer_includes_realm(self, monkeypatch):
|
||||
from app import auth
|
||||
from app.config import settings
|
||||
monkeypatch.setattr(settings, "keycloak_url", "https://sso.example")
|
||||
monkeypatch.setattr(settings, "keycloak_realm", "myrealm")
|
||||
assert auth._keycloak_issuer() == "https://sso.example/realms/myrealm"
|
||||
|
||||
def test_jwks_url_appends_certs(self, monkeypatch):
|
||||
from app import auth
|
||||
from app.config import settings
|
||||
monkeypatch.setattr(settings, "keycloak_url", "https://sso.example")
|
||||
monkeypatch.setattr(settings, "keycloak_realm", "myrealm")
|
||||
assert auth._keycloak_jwks_url() == (
|
||||
"https://sso.example/realms/myrealm/protocol/openid-connect/certs"
|
||||
)
|
||||
|
||||
|
||||
class TestGetJwks:
|
||||
"""JWKS-Cache-Verhalten + HTTP-Fehler-Pfad."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_returns_cached_when_fresh(self, monkeypatch):
|
||||
from app import auth
|
||||
import time as _time
|
||||
# Stelle sicher: Cache ist gesetzt + nicht abgelaufen
|
||||
monkeypatch.setattr(auth, "_jwks_cache", {"keys": [{"kid": "abc"}]})
|
||||
monkeypatch.setattr(auth, "_jwks_cache_time", _time.time())
|
||||
|
||||
result = await auth._get_jwks()
|
||||
assert result == {"keys": [{"kid": "abc"}]}
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_fetches_when_cache_empty(self, monkeypatch):
|
||||
from app import auth
|
||||
import httpx as _httpx
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
monkeypatch.setattr(auth, "_jwks_cache", {})
|
||||
monkeypatch.setattr(auth, "_jwks_cache_time", 0)
|
||||
|
||||
fake_resp = MagicMock(status_code=200, json=lambda: {"keys": [{"kid": "new"}]})
|
||||
|
||||
async def fake_get(self, url):
|
||||
return fake_resp
|
||||
|
||||
with patch.object(_httpx.AsyncClient, "get", fake_get):
|
||||
result = await auth._get_jwks()
|
||||
assert "keys" in result
|
||||
assert result["keys"][0]["kid"] == "new"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_http_error_returns_stale_cache(self, monkeypatch):
|
||||
from app import auth
|
||||
import httpx as _httpx
|
||||
from unittest.mock import patch
|
||||
|
||||
# Stale cache vorhanden
|
||||
monkeypatch.setattr(auth, "_jwks_cache", {"keys": [{"kid": "old"}]})
|
||||
monkeypatch.setattr(auth, "_jwks_cache_time", 0) # abgelaufen
|
||||
|
||||
async def failing_get(self, url):
|
||||
raise _httpx.ConnectError("network down")
|
||||
|
||||
with patch.object(_httpx.AsyncClient, "get", failing_get):
|
||||
result = await auth._get_jwks()
|
||||
# Stale-Cache wird zurueckgegeben
|
||||
assert result == {"keys": [{"kid": "old"}]}
|
||||
|
||||
|
||||
class TestValidateToken:
|
||||
"""_validate_token: Schlüssel-Lookup, Payload-Mapping."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_no_jwks_returns_none(self, monkeypatch):
|
||||
from app import auth
|
||||
async def fake_jwks():
|
||||
return {}
|
||||
monkeypatch.setattr(auth, "_get_jwks", fake_jwks)
|
||||
result = await auth._validate_token("any-token")
|
||||
assert result is None
|
||||
|
||||
|
||||
class TestGetCurrentUser:
|
||||
@pytest.mark.asyncio
|
||||
async def test_returns_none_when_auth_disabled(self, monkeypatch):
|
||||
from app import auth
|
||||
from unittest.mock import MagicMock
|
||||
monkeypatch.setattr(auth, "_is_auth_enabled", lambda: False)
|
||||
request = MagicMock(headers={}, cookies={})
|
||||
assert await auth.get_current_user(request) is None
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_returns_none_when_no_token(self, monkeypatch):
|
||||
from app import auth
|
||||
from unittest.mock import MagicMock
|
||||
monkeypatch.setattr(auth, "_is_auth_enabled", lambda: True)
|
||||
request = MagicMock()
|
||||
request.headers.get = lambda k, d="": ""
|
||||
request.cookies.get = lambda k: None
|
||||
assert await auth.get_current_user(request) is None
|
||||
|
||||
|
||||
class TestRequireAuth:
|
||||
@pytest.mark.asyncio
|
||||
async def test_dev_mode_returns_anonymous(self, monkeypatch):
|
||||
from app import auth
|
||||
from unittest.mock import MagicMock
|
||||
monkeypatch.setattr(auth, "_is_auth_enabled", lambda: False)
|
||||
request = MagicMock()
|
||||
user = await auth.require_auth(request)
|
||||
assert user["sub"] == "anonymous"
|
||||
assert "Dev-Modus" in user["name"]
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_no_token_raises_401(self, monkeypatch):
|
||||
from fastapi import HTTPException
|
||||
from app import auth
|
||||
from unittest.mock import MagicMock
|
||||
monkeypatch.setattr(auth, "_is_auth_enabled", lambda: True)
|
||||
request = MagicMock()
|
||||
request.headers.get = lambda k, d="": ""
|
||||
request.cookies.get = lambda k: None
|
||||
with pytest.raises(HTTPException) as exc:
|
||||
await auth.require_auth(request)
|
||||
assert exc.value.status_code == 401
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_invalid_token_raises_401(self, monkeypatch):
|
||||
from fastapi import HTTPException
|
||||
from app import auth
|
||||
from unittest.mock import MagicMock
|
||||
monkeypatch.setattr(auth, "_is_auth_enabled", lambda: True)
|
||||
async def fake_validate(t): return None
|
||||
monkeypatch.setattr(auth, "_validate_token", fake_validate)
|
||||
request = MagicMock()
|
||||
request.headers.get = lambda k, d="": "Bearer bad-token"
|
||||
request.cookies.get = lambda k: None
|
||||
with pytest.raises(HTTPException) as exc:
|
||||
await auth.require_auth(request)
|
||||
assert exc.value.status_code == 401
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_valid_token_returns_user(self, monkeypatch):
|
||||
from app import auth
|
||||
from unittest.mock import MagicMock
|
||||
monkeypatch.setattr(auth, "_is_auth_enabled", lambda: True)
|
||||
async def fake_validate(t): return {"sub": "u1", "email": "a@b", "name": "X", "roles": []}
|
||||
monkeypatch.setattr(auth, "_validate_token", fake_validate)
|
||||
request = MagicMock()
|
||||
request.headers.get = lambda k, d="": "Bearer ok-token"
|
||||
user = await auth.require_auth(request)
|
||||
assert user["sub"] == "u1"
|
||||
|
||||
|
||||
class TestRequireAdmin:
|
||||
@pytest.mark.asyncio
|
||||
async def test_dev_mode_returns_anonymous_admin(self, monkeypatch):
|
||||
from app import auth
|
||||
from unittest.mock import MagicMock
|
||||
monkeypatch.setattr(auth, "_is_auth_enabled", lambda: False)
|
||||
user = await auth.require_admin(MagicMock())
|
||||
assert "admin" in user["roles"]
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_admin_role_passes(self, monkeypatch):
|
||||
from app import auth
|
||||
from unittest.mock import MagicMock
|
||||
monkeypatch.setattr(auth, "_is_auth_enabled", lambda: True)
|
||||
async def fake_require_auth(req): return {"sub": "u1", "roles": ["admin"]}
|
||||
monkeypatch.setattr(auth, "require_auth", fake_require_auth)
|
||||
user = await auth.require_admin(MagicMock())
|
||||
assert "admin" in user["roles"]
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_gwoe_admin_role_passes(self, monkeypatch):
|
||||
from app import auth
|
||||
from unittest.mock import MagicMock
|
||||
monkeypatch.setattr(auth, "_is_auth_enabled", lambda: True)
|
||||
async def fake_require_auth(req): return {"sub": "u1", "roles": ["gwoe-admin"]}
|
||||
monkeypatch.setattr(auth, "require_auth", fake_require_auth)
|
||||
user = await auth.require_admin(MagicMock())
|
||||
assert "gwoe-admin" in user["roles"]
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_no_admin_role_raises_403(self, monkeypatch):
|
||||
from fastapi import HTTPException
|
||||
from app import auth
|
||||
from unittest.mock import MagicMock
|
||||
monkeypatch.setattr(auth, "_is_auth_enabled", lambda: True)
|
||||
async def fake_require_auth(req): return {"sub": "u1", "roles": ["user"]}
|
||||
monkeypatch.setattr(auth, "require_auth", fake_require_auth)
|
||||
with pytest.raises(HTTPException) as exc:
|
||||
await auth.require_admin(MagicMock())
|
||||
assert exc.value.status_code == 403
|
||||
|
||||
|
||||
class TestKeycloakAdminToken:
|
||||
@pytest.mark.asyncio
|
||||
async def test_no_credentials_raises(self, monkeypatch):
|
||||
from fastapi import HTTPException
|
||||
from app import auth
|
||||
from app.config import settings
|
||||
monkeypatch.setattr(settings, "keycloak_admin_user", "")
|
||||
with pytest.raises(HTTPException) as exc:
|
||||
await auth.keycloak_admin_token()
|
||||
assert exc.value.status_code == 500
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_returns_access_token_on_success(self, monkeypatch):
|
||||
from app import auth
|
||||
from app.config import settings
|
||||
from unittest.mock import MagicMock, patch
|
||||
import httpx as _httpx
|
||||
|
||||
monkeypatch.setattr(settings, "keycloak_admin_user", "admin")
|
||||
monkeypatch.setattr(settings, "keycloak_admin_password", "secret")
|
||||
monkeypatch.setattr(settings, "keycloak_url", "https://sso.example")
|
||||
|
||||
fake_resp = MagicMock(status_code=200,
|
||||
json=lambda: {"access_token": "TOKEN-123"})
|
||||
|
||||
async def fake_post(self, url, data=None, **kw):
|
||||
return fake_resp
|
||||
|
||||
with patch.object(_httpx.AsyncClient, "post", fake_post):
|
||||
tok = await auth.keycloak_admin_token()
|
||||
assert tok == "TOKEN-123"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_keycloak_error_raises_500(self, monkeypatch):
|
||||
from fastapi import HTTPException
|
||||
from app import auth
|
||||
from app.config import settings
|
||||
from unittest.mock import MagicMock, patch
|
||||
import httpx as _httpx
|
||||
|
||||
monkeypatch.setattr(settings, "keycloak_admin_user", "admin")
|
||||
monkeypatch.setattr(settings, "keycloak_admin_password", "secret")
|
||||
|
||||
fake_resp = MagicMock(status_code=500, text="server error")
|
||||
|
||||
async def fake_post(self, url, data=None, **kw):
|
||||
return fake_resp
|
||||
|
||||
with patch.object(_httpx.AsyncClient, "post", fake_post):
|
||||
with pytest.raises(HTTPException) as exc:
|
||||
await auth.keycloak_admin_token()
|
||||
assert exc.value.status_code == 500
|
||||
|
||||
@ -47,22 +47,3 @@ class TestVersionHistory:
|
||||
rows_b = _run(repo.versions("18/2"))
|
||||
assert len(rows_a) == 1 and rows_a[0]["gwoe_score"] == 5.0
|
||||
assert len(rows_b) == 1 and rows_b[0]["gwoe_score"] == 8.0
|
||||
|
||||
|
||||
# ─── SqliteBewertungRepository — Delegation (#134 Coverage-Backfill) ──────────
|
||||
|
||||
|
||||
class TestSqliteBewertungRepositoryDelegation:
|
||||
def test_versions_delegates(self):
|
||||
from unittest.mock import AsyncMock, patch
|
||||
from app.repositories.bewertung_repository import SqliteBewertungRepository
|
||||
fake = [{"version": 1, "gwoe_score": 5.0}]
|
||||
with patch("app.repositories.bewertung_repository.database.get_assessment_history",
|
||||
new=AsyncMock(return_value=fake)) as m:
|
||||
assert _run(SqliteBewertungRepository().versions("18/1")) == fake
|
||||
m.assert_called_once_with("18/1")
|
||||
|
||||
|
||||
def test_get_bewertung_repository_returns_singleton():
|
||||
from app.repositories.bewertung_repository import get_bewertung_repository
|
||||
assert get_bewertung_repository() is get_bewertung_repository()
|
||||
|
||||
@ -436,156 +436,3 @@ class TestFindSimilarAssessments:
|
||||
result = run(clustering.find_similar_assessments(items[0]["drucksache"]))
|
||||
|
||||
assert result == []
|
||||
|
||||
|
||||
# ─── Coverage-Backfill (#134) ────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestUnionFindRankSwap:
|
||||
"""Wenn rank[ra] < rank[rb], muss parent[ra] auf rb zeigen (Line 69)."""
|
||||
|
||||
def test_smaller_rank_attaches_to_larger(self):
|
||||
from app.clustering import UnionFind
|
||||
uf = UnionFind(4)
|
||||
# Erst zwei Trees mit unterschiedlichen Höhen aufbauen:
|
||||
# 0—1 (rank 1) und 2—3—... (rank 1)
|
||||
uf.union(0, 1)
|
||||
uf.union(2, 3)
|
||||
# Beide Roots haben rank 1 — uniteFurther durch Drittes:
|
||||
uf.union(2, 0) # bringt einen rank-Tie auf rank=2 für einen
|
||||
# Jetzt eine Insertion mit Rank-Asymmetrie:
|
||||
# Erstellen wir eine Klasse mit höherem Rank
|
||||
big = UnionFind(8)
|
||||
big.union(0, 1)
|
||||
big.union(2, 3)
|
||||
big.union(0, 2) # baut einen rank-2-Baum
|
||||
# Knoten 4 als Single (rank 0). union(4, 0) sollte 4 unter 0 hängen.
|
||||
big.union(4, 0)
|
||||
# 4 sollte jetzt im selben Set wie 0 sein
|
||||
assert big.find(4) == big.find(0)
|
||||
|
||||
|
||||
class TestLoadAssessmentItems:
|
||||
"""Async DB-Lader; Tests gegen tmp-DB."""
|
||||
|
||||
def _build_db(self, tmp_path):
|
||||
import sqlite3
|
||||
import json as _j
|
||||
db_path = tmp_path / "clust.db"
|
||||
conn = sqlite3.connect(str(db_path))
|
||||
conn.execute("""
|
||||
CREATE TABLE assessments (
|
||||
drucksache TEXT PRIMARY KEY, title TEXT,
|
||||
fraktionen TEXT, datum TEXT, bundesland TEXT,
|
||||
gwoe_score REAL, link TEXT,
|
||||
empfehlung TEXT, empfehlung_symbol TEXT,
|
||||
themen TEXT, summary_embedding BLOB
|
||||
)
|
||||
""")
|
||||
# Korrektes Embedding
|
||||
emb_ok = _j.dumps([0.1, 0.2, 0.3]).encode()
|
||||
conn.execute(
|
||||
"INSERT INTO assessments VALUES (?,?,?,?,?,?,?,?,?,?,?)",
|
||||
("18/1", "T1", '["CDU"]', "2026-04-01", "NRW",
|
||||
7.0, "x", "Empfohlen", "+", '["Klima"]', emb_ok),
|
||||
)
|
||||
# Kaputtes Embedding (ungueltiges JSON)
|
||||
conn.execute(
|
||||
"INSERT INTO assessments VALUES (?,?,?,?,?,?,?,?,?,?,?)",
|
||||
("18/2", "T2", '["SPD"]', "2026-04-02", "NRW",
|
||||
5.0, "y", "Empfohlen", "+", '["Klima"]', b"not-json"),
|
||||
)
|
||||
# Anderes BL (fuer bundesland-Filter)
|
||||
conn.execute(
|
||||
"INSERT INTO assessments VALUES (?,?,?,?,?,?,?,?,?,?,?)",
|
||||
("8/1", "T3", '["AfD"]', "2026-04-03", "MV",
|
||||
3.0, "z", "Ablehnen", "-", "[]", emb_ok),
|
||||
)
|
||||
conn.commit()
|
||||
conn.close()
|
||||
return db_path
|
||||
|
||||
def test_loads_only_valid_embeddings(self, tmp_path, monkeypatch):
|
||||
from app.config import settings
|
||||
from app import clustering
|
||||
db = self._build_db(tmp_path)
|
||||
monkeypatch.setattr(settings, "db_path", str(db))
|
||||
|
||||
items = run(clustering.load_assessment_items())
|
||||
# 18/2 hat kaputtes Embedding und wird übersprungen
|
||||
ids = sorted(i["drucksache"] for i in items)
|
||||
assert "18/2" not in ids
|
||||
assert "18/1" in ids
|
||||
assert "8/1" in ids
|
||||
|
||||
def test_bundesland_filter(self, tmp_path, monkeypatch):
|
||||
from app.config import settings
|
||||
from app import clustering
|
||||
db = self._build_db(tmp_path)
|
||||
monkeypatch.setattr(settings, "db_path", str(db))
|
||||
|
||||
items = run(clustering.load_assessment_items(bundesland="NRW"))
|
||||
ids = [i["drucksache"] for i in items]
|
||||
assert ids == ["18/1"]
|
||||
|
||||
def test_loaded_item_fields_present(self, tmp_path, monkeypatch):
|
||||
from app.config import settings
|
||||
from app import clustering
|
||||
db = self._build_db(tmp_path)
|
||||
monkeypatch.setattr(settings, "db_path", str(db))
|
||||
|
||||
items = run(clustering.load_assessment_items(bundesland="NRW"))
|
||||
assert items
|
||||
item = items[0]
|
||||
for key in ("drucksache", "title", "fraktionen", "datum", "link",
|
||||
"bundesland", "gwoe_score", "empfehlung",
|
||||
"empfehlung_symbol", "themen", "embedding"):
|
||||
assert key in item
|
||||
|
||||
|
||||
class TestBuildHierarchySubclusters:
|
||||
"""Wenn ein Cluster groesser als max_cluster_size ist, wird sub-clustered
|
||||
(Lines 256-262)."""
|
||||
|
||||
def test_large_cluster_gets_subclustered(self):
|
||||
from app import clustering
|
||||
from unittest.mock import patch
|
||||
|
||||
# 6 fast-identische Items → ein grosser Cluster, sub-Cluster sub > 1
|
||||
v = [1.0, 0.0, 0.0]
|
||||
items = [
|
||||
{**_make_items(1)[0], "drucksache": f"18/{i}",
|
||||
"embedding": [v[0] + 0.01 * i, v[1], v[2]]}
|
||||
for i in range(6)
|
||||
]
|
||||
|
||||
async def fake_load(bundesland=None):
|
||||
return items
|
||||
|
||||
with patch.object(clustering, "load_assessment_items", side_effect=fake_load):
|
||||
# max_cluster_size=3 zwingt sub-Clustering
|
||||
result = run(clustering.build_hierarchy(
|
||||
threshold=0.95, max_cluster_size=3, subcluster_threshold=0.999,
|
||||
))
|
||||
assert result["clusters"]
|
||||
# Mindestens ein Cluster muss subclusters haben
|
||||
assert any(c.get("subclusters") for c in result["clusters"])
|
||||
|
||||
def test_small_cluster_has_subclusters_none(self):
|
||||
from app import clustering
|
||||
from unittest.mock import patch
|
||||
|
||||
items = _make_items(2)
|
||||
# Setze dieselben embeddings, damit sie in einem Cluster sind
|
||||
items[0]["embedding"] = [1.0, 0.0, 0.0]
|
||||
items[1]["embedding"] = [1.0, 0.0, 0.0]
|
||||
|
||||
async def fake_load(bundesland=None):
|
||||
return items
|
||||
|
||||
with patch.object(clustering, "load_assessment_items", side_effect=fake_load):
|
||||
result = run(clustering.build_hierarchy(
|
||||
threshold=0.5, max_cluster_size=10,
|
||||
))
|
||||
for c in result["clusters"]:
|
||||
assert c["subclusters"] is None
|
||||
|
||||
@ -552,110 +552,3 @@ class TestMerkliste:
|
||||
assert count == 1
|
||||
listed = run(database.merkliste_list("user1"))
|
||||
assert len([e for e in listed if e["antrag_id"] == "18/9001"]) == 1
|
||||
|
||||
|
||||
# ─── Plenum-Vote-Results (#106) ──────────────────────────────────────────────
|
||||
|
||||
class TestPlenumVoteResults:
|
||||
def test_creates_table(self, db_path):
|
||||
import aiosqlite
|
||||
from app import database
|
||||
run(database.init_db())
|
||||
|
||||
async def check():
|
||||
async with aiosqlite.connect(db_path) as db:
|
||||
cur = await db.execute(
|
||||
"SELECT name FROM sqlite_master WHERE type='table' "
|
||||
"AND name='plenum_vote_results'"
|
||||
)
|
||||
return await cur.fetchone()
|
||||
|
||||
assert run(check()) is not None
|
||||
|
||||
def test_upsert_and_get_roundtrip(self, initialized_db):
|
||||
from app import database
|
||||
run(database.upsert_plenum_vote(
|
||||
bundesland="NRW",
|
||||
drucksache="18/1234",
|
||||
ergebnis="angenommen",
|
||||
einstimmig=False,
|
||||
fraktionen_ja=["CDU", "GRÜNE"],
|
||||
fraktionen_nein=["SPD", "AfD"],
|
||||
fraktionen_enthaltung=[],
|
||||
quelle_protokoll="MMP18-119",
|
||||
quelle_url="https://landtag.nrw.de/MMP18-119.pdf",
|
||||
))
|
||||
result = run(database.get_plenum_votes("NRW", "18/1234"))
|
||||
assert len(result) == 1
|
||||
r = result[0]
|
||||
assert r["ergebnis"] == "angenommen"
|
||||
assert r["einstimmig"] is False
|
||||
assert r["fraktionen_ja"] == ["CDU", "GRÜNE"]
|
||||
assert r["fraktionen_nein"] == ["SPD", "AfD"]
|
||||
assert r["fraktionen_enthaltung"] == []
|
||||
assert r["quelle_protokoll"] == "MMP18-119"
|
||||
|
||||
def test_einstimmig_flag_persisted(self, initialized_db):
|
||||
from app import database
|
||||
run(database.upsert_plenum_vote(
|
||||
bundesland="NRW", drucksache="18/100", ergebnis="überwiesen",
|
||||
einstimmig=True, fraktionen_ja=[], fraktionen_nein=[],
|
||||
fraktionen_enthaltung=[], quelle_protokoll="MMP18-100",
|
||||
))
|
||||
result = run(database.get_plenum_votes("NRW", "18/100"))
|
||||
assert result[0]["einstimmig"] is True
|
||||
|
||||
def test_idempotent_upsert_same_protokoll(self, initialized_db):
|
||||
"""Zweiter Upsert mit demselben Protokoll → ein Eintrag, neue Werte."""
|
||||
from app import database
|
||||
run(database.upsert_plenum_vote(
|
||||
bundesland="NRW", drucksache="18/200", ergebnis="abgelehnt",
|
||||
einstimmig=False, fraktionen_ja=["AfD"], fraktionen_nein=["CDU", "SPD"],
|
||||
fraktionen_enthaltung=[], quelle_protokoll="MMP18-50",
|
||||
))
|
||||
# Re-Parse mit aktualisiertem Ergebnis
|
||||
run(database.upsert_plenum_vote(
|
||||
bundesland="NRW", drucksache="18/200", ergebnis="zurückgezogen",
|
||||
einstimmig=False, fraktionen_ja=[], fraktionen_nein=[],
|
||||
fraktionen_enthaltung=[], quelle_protokoll="MMP18-50",
|
||||
))
|
||||
result = run(database.get_plenum_votes("NRW", "18/200"))
|
||||
assert len(result) == 1
|
||||
assert result[0]["ergebnis"] == "zurückgezogen"
|
||||
|
||||
def test_multiple_protokolle_keep_separate_records(self, initialized_db):
|
||||
"""Eine Drucksache, zwei Protokolle (Ueberweisung + finale Abstimmung)
|
||||
muessen beide erhalten bleiben."""
|
||||
from app import database
|
||||
run(database.upsert_plenum_vote(
|
||||
bundesland="NRW", drucksache="18/300", ergebnis="überwiesen",
|
||||
einstimmig=True, fraktionen_ja=[], fraktionen_nein=[],
|
||||
fraktionen_enthaltung=[], quelle_protokoll="MMP18-50",
|
||||
))
|
||||
run(database.upsert_plenum_vote(
|
||||
bundesland="NRW", drucksache="18/300", ergebnis="angenommen",
|
||||
einstimmig=False, fraktionen_ja=["CDU", "SPD"], fraktionen_nein=["AfD"],
|
||||
fraktionen_enthaltung=["GRÜNE"], quelle_protokoll="MMP18-119",
|
||||
))
|
||||
result = run(database.get_plenum_votes("NRW", "18/300"))
|
||||
assert len(result) == 2
|
||||
protokolle = {r["quelle_protokoll"] for r in result}
|
||||
assert protokolle == {"MMP18-50", "MMP18-119"}
|
||||
|
||||
def test_empty_query_returns_empty_list(self, initialized_db):
|
||||
from app import database
|
||||
result = run(database.get_plenum_votes("NRW", "99/9999"))
|
||||
assert result == []
|
||||
|
||||
def test_unicode_in_fraktionen_persisted(self, initialized_db):
|
||||
"""GRÜNE mit Umlaut darf nicht ASCII-kodiert werden."""
|
||||
from app import database
|
||||
run(database.upsert_plenum_vote(
|
||||
bundesland="NRW", drucksache="18/400", ergebnis="angenommen",
|
||||
einstimmig=False, fraktionen_ja=["GRÜNE", "BÜNDNIS"],
|
||||
fraktionen_nein=[], fraktionen_enthaltung=[],
|
||||
quelle_protokoll="MMP18-1",
|
||||
))
|
||||
result = run(database.get_plenum_votes("NRW", "18/400"))
|
||||
assert "GRÜNE" in result[0]["fraktionen_ja"]
|
||||
assert "BÜNDNIS" in result[0]["fraktionen_ja"]
|
||||
|
||||
@ -202,61 +202,3 @@ class TestIstAbstimmbarOriginal:
|
||||
|
||||
def test_gesetzentwurf_string_abstimmbar(self):
|
||||
assert ist_abstimmbar_original("Gesetzentwurf der Fraktionen") is True
|
||||
|
||||
|
||||
# ─── likely_kleine_anfrage_titel — Heuristik (#149 Folge, #134 Backfill) ─────
|
||||
|
||||
|
||||
class TestLikelyKleineAnfrageTitel:
|
||||
def test_empty_title_is_false(self):
|
||||
from app.drucksache_typen import likely_kleine_anfrage_titel
|
||||
assert likely_kleine_anfrage_titel("") is False
|
||||
|
||||
def test_none_title_is_false(self):
|
||||
from app.drucksache_typen import likely_kleine_anfrage_titel
|
||||
assert likely_kleine_anfrage_titel(None) is False
|
||||
|
||||
def test_welche_praefix_detected(self):
|
||||
from app.drucksache_typen import likely_kleine_anfrage_titel
|
||||
assert likely_kleine_anfrage_titel("Welche Schulen werden saniert") is True
|
||||
|
||||
def test_warum_praefix_detected(self):
|
||||
from app.drucksache_typen import likely_kleine_anfrage_titel
|
||||
assert likely_kleine_anfrage_titel("Warum dauert das Verfahren so lang") is True
|
||||
|
||||
def test_question_mark_at_end_detected(self):
|
||||
from app.drucksache_typen import likely_kleine_anfrage_titel
|
||||
assert likely_kleine_anfrage_titel("Stand der Dinge zur Mobilitaet?") is True
|
||||
|
||||
def test_numeric_prefix_stripped(self):
|
||||
"""NRW liefert manchmal '1Welche...' ohne Trennzeichen — Praefix wird
|
||||
weg-gestrippt bevor die Heuristik greift."""
|
||||
from app.drucksache_typen import likely_kleine_anfrage_titel
|
||||
assert likely_kleine_anfrage_titel("1Welche Strafen sind vorgesehen") is True
|
||||
|
||||
def test_dotted_numeric_prefix_stripped(self):
|
||||
from app.drucksache_typen import likely_kleine_anfrage_titel
|
||||
assert likely_kleine_anfrage_titel("12. Wie viele Stellen") is True
|
||||
|
||||
def test_normal_antrag_title_is_false(self):
|
||||
from app.drucksache_typen import likely_kleine_anfrage_titel
|
||||
assert likely_kleine_anfrage_titel(
|
||||
"Klimaschutzgesetz fortschreiben und ausweiten"
|
||||
) is False
|
||||
|
||||
def test_pure_digits_only_is_false(self):
|
||||
"""Nach dem Strippen bleibt leerer Titel — kein Match."""
|
||||
from app.drucksache_typen import likely_kleine_anfrage_titel
|
||||
assert likely_kleine_anfrage_titel("123 . - ") is False
|
||||
|
||||
def test_was_praefix_detected(self):
|
||||
from app.drucksache_typen import likely_kleine_anfrage_titel
|
||||
assert likely_kleine_anfrage_titel("Was sagt die Landesregierung") is True
|
||||
|
||||
def test_case_insensitive_praefix(self):
|
||||
from app.drucksache_typen import likely_kleine_anfrage_titel
|
||||
assert likely_kleine_anfrage_titel("WIE VIEL kostet das") is True
|
||||
|
||||
def test_no_match_no_question_mark(self):
|
||||
from app.drucksache_typen import likely_kleine_anfrage_titel
|
||||
assert likely_kleine_anfrage_titel("Bericht zur Energiewende vorlegen") is False
|
||||
|
||||
@ -1,331 +0,0 @@
|
||||
"""Tests fuer app/ingest_votes.py — PDF → plenum_vote_results Pipeline (#106 / #126)."""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
# Gleiches aiosqlite-Setup-Problem wie in test_database.py — dort fix
|
||||
# importieren, damit hier nichts gestubbed ist.
|
||||
_aio = sys.modules.get("aiosqlite")
|
||||
if _aio is not None and not hasattr(_aio, "connect"):
|
||||
del sys.modules["aiosqlite"]
|
||||
|
||||
import aiosqlite # noqa: E402
|
||||
import importlib # noqa: E402
|
||||
|
||||
if "app.database" in sys.modules:
|
||||
if not hasattr(getattr(sys.modules["app.database"], "aiosqlite", None), "connect"):
|
||||
del sys.modules["app.database"]
|
||||
importlib.import_module("app.database")
|
||||
else:
|
||||
importlib.import_module("app.database")
|
||||
|
||||
|
||||
def run(coro):
|
||||
return asyncio.get_event_loop().run_until_complete(coro)
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def db_path(tmp_path, monkeypatch):
|
||||
path = tmp_path / "test.db"
|
||||
from app.config import settings
|
||||
monkeypatch.setattr(settings, "db_path", str(path))
|
||||
return str(path)
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def initialized_db(db_path):
|
||||
from app import database
|
||||
run(database.init_db())
|
||||
return db_path
|
||||
|
||||
|
||||
def _fake_parse_result(drucksache: str, ergebnis: str = "angenommen",
|
||||
einstimmig: bool = False,
|
||||
ja: list[str] = None, nein: list[str] = None,
|
||||
enth: list[str] = None) -> dict:
|
||||
return {
|
||||
"drucksache": drucksache,
|
||||
"ergebnis": ergebnis,
|
||||
"einstimmig": einstimmig,
|
||||
"votes": {
|
||||
"ja": ja or [],
|
||||
"nein": nein or [],
|
||||
"enthaltung": enth or [],
|
||||
},
|
||||
"kind": "direct",
|
||||
}
|
||||
|
||||
|
||||
class TestIngestPdf:
|
||||
def test_writes_each_parsed_vote(self, initialized_db, tmp_path):
|
||||
from app import ingest_votes, database
|
||||
fake_pdf = tmp_path / "MMP18-119.pdf"
|
||||
fake_pdf.write_bytes(b"%PDF-1.4 fake")
|
||||
|
||||
parser_results = [
|
||||
_fake_parse_result("18/100", "angenommen", ja=["CDU", "SPD"], nein=["AfD"]),
|
||||
_fake_parse_result("18/200", "abgelehnt", ja=["AfD"], nein=["CDU", "SPD"]),
|
||||
]
|
||||
|
||||
with patch("app.ingest_votes.parse_protocol", return_value=parser_results):
|
||||
stats = run(ingest_votes.ingest_pdf(fake_pdf))
|
||||
|
||||
assert stats["parsed"] == 2
|
||||
assert stats["written"] == 2
|
||||
|
||||
votes_100 = run(database.get_plenum_votes("NRW", "18/100"))
|
||||
assert len(votes_100) == 1
|
||||
assert votes_100[0]["fraktionen_ja"] == ["CDU", "SPD"]
|
||||
assert votes_100[0]["quelle_protokoll"] == "MMP18-119"
|
||||
|
||||
def test_skips_entries_without_drucksache(self, initialized_db, tmp_path):
|
||||
"""Anchors ohne aufloesbare Drucksache werden gezaehlt aber nicht
|
||||
geschrieben (sonst muellt der Import die DB voll)."""
|
||||
from app import ingest_votes
|
||||
fake_pdf = tmp_path / "MMP18-50.pdf"
|
||||
fake_pdf.write_bytes(b"%PDF")
|
||||
|
||||
parser_results = [
|
||||
_fake_parse_result("18/300", "angenommen"),
|
||||
{"drucksache": None, "ergebnis": "angenommen", "votes": {"ja": [], "nein": [], "enthaltung": []}},
|
||||
]
|
||||
with patch("app.ingest_votes.parse_protocol", return_value=parser_results):
|
||||
stats = run(ingest_votes.ingest_pdf(fake_pdf))
|
||||
|
||||
assert stats["parsed"] == 2
|
||||
assert stats["written"] == 1
|
||||
assert stats["skipped_no_drucksache"] == 1
|
||||
|
||||
def test_protokoll_id_default_from_stem(self, initialized_db, tmp_path):
|
||||
from app import ingest_votes, database
|
||||
fake_pdf = tmp_path / "MMP18-77.pdf"
|
||||
fake_pdf.write_bytes(b"%PDF")
|
||||
with patch("app.ingest_votes.parse_protocol",
|
||||
return_value=[_fake_parse_result("18/500")]):
|
||||
stats = run(ingest_votes.ingest_pdf(fake_pdf))
|
||||
assert stats["protokoll_id"] == "MMP18-77"
|
||||
votes = run(database.get_plenum_votes("NRW", "18/500"))
|
||||
assert votes[0]["quelle_protokoll"] == "MMP18-77"
|
||||
|
||||
def test_protokoll_id_override(self, initialized_db, tmp_path):
|
||||
from app import ingest_votes, database
|
||||
fake_pdf = tmp_path / "scan.pdf"
|
||||
fake_pdf.write_bytes(b"%PDF")
|
||||
with patch("app.ingest_votes.parse_protocol",
|
||||
return_value=[_fake_parse_result("18/600")]):
|
||||
run(ingest_votes.ingest_pdf(
|
||||
fake_pdf, protokoll_id="MMP18-99", quelle_url="https://example.com/x.pdf",
|
||||
))
|
||||
votes = run(database.get_plenum_votes("NRW", "18/600"))
|
||||
assert votes[0]["quelle_protokoll"] == "MMP18-99"
|
||||
assert votes[0]["quelle_url"] == "https://example.com/x.pdf"
|
||||
|
||||
def test_bundesland_override(self, initialized_db, tmp_path):
|
||||
"""Adapter fuer andere BL koennten denselben Ingest-Helper nutzen."""
|
||||
from app import ingest_votes, database
|
||||
fake_pdf = tmp_path / "MV-MP1.pdf"
|
||||
fake_pdf.write_bytes(b"%PDF")
|
||||
with patch("app.ingest_votes.parse_protocol",
|
||||
return_value=[_fake_parse_result("8/100")]):
|
||||
run(ingest_votes.ingest_pdf(fake_pdf, bundesland="MV"))
|
||||
# Lookup unter dem richtigen BL
|
||||
votes_mv = run(database.get_plenum_votes("MV", "8/100"))
|
||||
assert len(votes_mv) == 1
|
||||
votes_nrw = run(database.get_plenum_votes("NRW", "8/100"))
|
||||
assert votes_nrw == []
|
||||
|
||||
def test_re_ingest_overwrites_same_protokoll(self, initialized_db, tmp_path):
|
||||
"""Erneuter Ingest desselben Protokolls aktualisiert die Eintraege
|
||||
(idempotent), kein Duplikat."""
|
||||
from app import ingest_votes, database
|
||||
fake_pdf = tmp_path / "MMP18-1.pdf"
|
||||
fake_pdf.write_bytes(b"%PDF")
|
||||
|
||||
with patch("app.ingest_votes.parse_protocol",
|
||||
return_value=[_fake_parse_result("18/700", "angenommen", ja=["CDU"])]):
|
||||
run(ingest_votes.ingest_pdf(fake_pdf))
|
||||
# Re-Ingest mit korrigiertem Ergebnis (z.B. Parser-Fix)
|
||||
with patch("app.ingest_votes.parse_protocol",
|
||||
return_value=[_fake_parse_result("18/700", "abgelehnt", ja=[], nein=["CDU"])]):
|
||||
run(ingest_votes.ingest_pdf(fake_pdf))
|
||||
|
||||
votes = run(database.get_plenum_votes("NRW", "18/700"))
|
||||
assert len(votes) == 1
|
||||
assert votes[0]["ergebnis"] == "abgelehnt"
|
||||
assert votes[0]["fraktionen_nein"] == ["CDU"]
|
||||
|
||||
def test_db_error_collected_not_raised(self, initialized_db, tmp_path):
|
||||
"""Wenn upsert fehlschlaegt, sollte der Fehler in errors-Liste
|
||||
landen, nicht propagieren — der Rest des Protokolls soll trotzdem
|
||||
verarbeitet werden."""
|
||||
from app import ingest_votes
|
||||
fake_pdf = tmp_path / "MMP18-2.pdf"
|
||||
fake_pdf.write_bytes(b"%PDF")
|
||||
|
||||
async def _failing_upsert(**kw):
|
||||
raise RuntimeError("simulated DB error")
|
||||
|
||||
parser_results = [
|
||||
_fake_parse_result("18/800", "angenommen"),
|
||||
_fake_parse_result("18/801", "abgelehnt"),
|
||||
]
|
||||
with patch("app.ingest_votes.parse_protocol", return_value=parser_results), \
|
||||
patch("app.ingest_votes.upsert_plenum_vote", side_effect=_failing_upsert):
|
||||
stats = run(ingest_votes.ingest_pdf(fake_pdf))
|
||||
|
||||
assert stats["written"] == 0
|
||||
assert len(stats["errors"]) == 2
|
||||
assert "18/800" in stats["errors"][0]
|
||||
assert "simulated DB error" in stats["errors"][0]
|
||||
|
||||
|
||||
class TestDownloadPdf:
|
||||
def test_writes_response_bytes(self, tmp_path):
|
||||
from app.ingest_votes import _download_pdf
|
||||
|
||||
class _FakeResp:
|
||||
def read(self):
|
||||
return b"%PDF downloaded content"
|
||||
def __enter__(self):
|
||||
return self
|
||||
def __exit__(self, *a):
|
||||
return False
|
||||
|
||||
dest = tmp_path / "out.pdf"
|
||||
with patch("urllib.request.urlopen", return_value=_FakeResp()):
|
||||
_download_pdf("https://example.com/x.pdf", dest)
|
||||
assert dest.read_bytes() == b"%PDF downloaded content"
|
||||
|
||||
def test_propagates_http_error(self, tmp_path):
|
||||
"""HTTP-Fehler beim Download propagieren — der Caller (CLI)
|
||||
soll mit Stack-Trace abbrechen, nicht still weitergehen."""
|
||||
from app.ingest_votes import _download_pdf
|
||||
|
||||
def _raise(*a, **kw):
|
||||
raise OSError("Connection refused")
|
||||
|
||||
with patch("urllib.request.urlopen", side_effect=_raise):
|
||||
with pytest.raises(OSError):
|
||||
_download_pdf("https://example.com/x.pdf", tmp_path / "out.pdf")
|
||||
|
||||
|
||||
class TestCli:
|
||||
"""Tests fuer die CLI-Wrapper-Funktion _cli — argv-basiert."""
|
||||
|
||||
def test_supported_lists_bl(self, capsys):
|
||||
"""--supported gibt registrierte BL aus und exitet mit 0."""
|
||||
from app import ingest_votes
|
||||
with patch.object(ingest_votes.sys, "argv", ["ingest_votes", "--supported"]):
|
||||
with pytest.raises(SystemExit) as exc:
|
||||
ingest_votes._cli()
|
||||
assert exc.value.code == 0
|
||||
out = capsys.readouterr().out
|
||||
assert "NRW" in out
|
||||
|
||||
def test_no_args_errors(self, capsys):
|
||||
"""Ohne --pdf und --url muss CLI mit klarer Fehlermeldung exiten."""
|
||||
from app import ingest_votes
|
||||
with patch.object(ingest_votes.sys, "argv", ["ingest_votes"]):
|
||||
with pytest.raises(SystemExit):
|
||||
ingest_votes._cli()
|
||||
|
||||
def test_pdf_path_missing_errors(self, capsys, tmp_path):
|
||||
"""--pdf mit nicht-existentem Pfad exitet 1."""
|
||||
from app import ingest_votes
|
||||
nonexistent = tmp_path / "missing.pdf"
|
||||
with patch.object(ingest_votes.sys, "argv",
|
||||
["ingest_votes", "--pdf", str(nonexistent)]):
|
||||
with pytest.raises(SystemExit) as exc:
|
||||
ingest_votes._cli()
|
||||
assert exc.value.code == 1
|
||||
err = capsys.readouterr().err
|
||||
assert "nicht gefunden" in err
|
||||
|
||||
def test_pdf_path_calls_ingest(self, tmp_path, capsys):
|
||||
"""--pdf mit existentem Pfad ruft ingest_pdf und gibt Statistik aus."""
|
||||
from app import ingest_votes
|
||||
pdf = tmp_path / "MMP18-X.pdf"
|
||||
pdf.write_bytes(b"%PDF")
|
||||
|
||||
fake_stats = {
|
||||
"parsed": 3, "written": 2,
|
||||
"skipped_no_drucksache": 1, "errors": [],
|
||||
"protokoll_id": "MMP18-X", "bundesland": "NRW",
|
||||
}
|
||||
with patch("app.ingest_votes.asyncio.run", return_value=fake_stats), \
|
||||
patch.object(ingest_votes.sys, "argv",
|
||||
["ingest_votes", "--pdf", str(pdf)]):
|
||||
ingest_votes._cli()
|
||||
out = capsys.readouterr().out
|
||||
assert "MMP18-X" in out
|
||||
assert "parsed: 3" in out
|
||||
assert "written: 2" in out
|
||||
assert "ohne DS: 1" in out
|
||||
|
||||
def test_url_downloads_then_ingests(self, capsys):
|
||||
"""--url path: Download in tmp, dann ingest_pdf."""
|
||||
from app import ingest_votes
|
||||
|
||||
fake_stats = {
|
||||
"parsed": 1, "written": 1, "skipped_no_drucksache": 0,
|
||||
"errors": [], "protokoll_id": "MMP18-Y",
|
||||
"bundesland": "NRW",
|
||||
}
|
||||
|
||||
class _FakeResp:
|
||||
def read(self):
|
||||
return b"%PDF downloaded"
|
||||
def __enter__(self):
|
||||
return self
|
||||
def __exit__(self, *a):
|
||||
return False
|
||||
|
||||
with patch("app.ingest_votes.asyncio.run", return_value=fake_stats), \
|
||||
patch("urllib.request.urlopen", return_value=_FakeResp()), \
|
||||
patch.object(ingest_votes.sys, "argv",
|
||||
["ingest_votes", "--url",
|
||||
"https://example.com/MMP18-Y.pdf"]):
|
||||
ingest_votes._cli()
|
||||
out = capsys.readouterr().out
|
||||
assert "MMP18-Y" in out
|
||||
|
||||
def test_zero_results_exits_2(self, tmp_path, capsys):
|
||||
"""Wenn weder geschrieben noch Fehler: exit code 2 (= 'no signal')."""
|
||||
from app import ingest_votes
|
||||
pdf = tmp_path / "leer.pdf"
|
||||
pdf.write_bytes(b"%PDF")
|
||||
|
||||
fake_stats = {
|
||||
"parsed": 0, "written": 0, "skipped_no_drucksache": 0,
|
||||
"errors": [], "protokoll_id": "leer", "bundesland": "NRW",
|
||||
}
|
||||
with patch("app.ingest_votes.asyncio.run", return_value=fake_stats), \
|
||||
patch.object(ingest_votes.sys, "argv",
|
||||
["ingest_votes", "--pdf", str(pdf)]):
|
||||
with pytest.raises(SystemExit) as exc:
|
||||
ingest_votes._cli()
|
||||
assert exc.value.code == 2
|
||||
|
||||
def test_errors_listed_in_output(self, tmp_path, capsys):
|
||||
"""Wenn errors gefuellt sind, erscheint die Errors-Zeile + erste 5."""
|
||||
from app import ingest_votes
|
||||
pdf = tmp_path / "x.pdf"
|
||||
pdf.write_bytes(b"%PDF")
|
||||
fake_stats = {
|
||||
"parsed": 2, "written": 0, "skipped_no_drucksache": 0,
|
||||
"errors": ["18/1: oops", "18/2: nope"],
|
||||
"protokoll_id": "x", "bundesland": "NRW",
|
||||
}
|
||||
with patch("app.ingest_votes.asyncio.run", return_value=fake_stats), \
|
||||
patch.object(ingest_votes.sys, "argv",
|
||||
["ingest_votes", "--pdf", str(pdf)]):
|
||||
ingest_votes._cli()
|
||||
out = capsys.readouterr().out
|
||||
assert "errors: 2" in out
|
||||
assert "18/1: oops" in out
|
||||
assert "18/2: nope" in out
|
||||
@ -135,66 +135,3 @@ class TestLlmRequestDefaults:
|
||||
assert req.max_retries == 3
|
||||
assert req.max_tokens == 4000
|
||||
assert req.base_temperature == 0.3
|
||||
|
||||
|
||||
# ─── Coverage-Backfill (#134) ────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestContentFingerprint:
|
||||
def test_empty_string_returns_len_zero(self):
|
||||
from app.adapters.qwen_bewerter import _content_fingerprint
|
||||
assert _content_fingerprint("") == "len=0"
|
||||
|
||||
def test_none_returns_len_zero(self):
|
||||
from app.adapters.qwen_bewerter import _content_fingerprint
|
||||
# Defensiv: None toleriert, weil log-Pfad aufgerufen wird
|
||||
# mit content.choices[0].message.content der schon mal None ist
|
||||
assert _content_fingerprint(None) == "len=0"
|
||||
|
||||
def test_non_empty_includes_sha1_prefix(self):
|
||||
from app.adapters.qwen_bewerter import _content_fingerprint
|
||||
result = _content_fingerprint("hallo")
|
||||
assert result.startswith("len=5 sha1=")
|
||||
assert len(result.split("sha1=")[1]) == 8
|
||||
|
||||
|
||||
class TestStripMarkdownJsonFences:
|
||||
"""```json-Fence wird zusaetzlich zum Plain-Fence behandelt."""
|
||||
|
||||
def test_json_fence_with_explicit_lang(self):
|
||||
from app.adapters.qwen_bewerter import _strip_markdown_fences
|
||||
s = "```json\n{\"a\": 1}\n```"
|
||||
assert _strip_markdown_fences(s) == '{"a": 1}'
|
||||
|
||||
|
||||
class TestLazyClientInstantiation:
|
||||
"""_get_client laedt openai erst beim ersten Call."""
|
||||
|
||||
def test_no_client_triggers_openai_import(self, monkeypatch):
|
||||
"""Wenn der Client nicht injected ist, versucht _get_client den
|
||||
Lazy-Import von openai.AsyncOpenAI. Hier patchen wir den Import,
|
||||
um sicherzustellen dass _get_client tatsaechlich versucht zu
|
||||
instanziieren (Branch-Coverage Lines 70-73)."""
|
||||
import sys
|
||||
from unittest.mock import MagicMock
|
||||
from app.adapters.qwen_bewerter import QwenBewerter
|
||||
|
||||
fake_client = MagicMock(name="AsyncOpenAI-Instance")
|
||||
fake_async_openai = MagicMock(return_value=fake_client)
|
||||
fake_module = type(sys)("openai")
|
||||
fake_module.AsyncOpenAI = fake_async_openai
|
||||
monkeypatch.setitem(sys.modules, "openai", fake_module)
|
||||
|
||||
qb = QwenBewerter(api_key="test", base_url="http://test")
|
||||
client = qb._get_client()
|
||||
assert client is fake_client
|
||||
fake_async_openai.assert_called_once_with(api_key="test",
|
||||
base_url="http://test")
|
||||
|
||||
def test_injected_client_skips_lazy_import(self):
|
||||
"""Wenn der Client schon im Konstruktor da ist, wird _get_client
|
||||
ihn direkt zurueckgeben — kein openai-Import."""
|
||||
from app.adapters.qwen_bewerter import QwenBewerter
|
||||
injected = object()
|
||||
qb = QwenBewerter(client=injected)
|
||||
assert qb._get_client() is injected
|
||||
|
||||
@ -352,63 +352,3 @@ class TestRunDailyDigest:
|
||||
|
||||
assert result["failed"] == 1
|
||||
assert result["sent"] == 0
|
||||
|
||||
|
||||
# ─── SMTP-Send-Path Coverage (#134 Backfill) ─────────────────────────────────
|
||||
|
||||
|
||||
class TestSendSync:
|
||||
def test_raises_when_smtp_not_configured(self, monkeypatch):
|
||||
"""Wenn settings.smtp_host oder smtp_user leer ist, RuntimeError."""
|
||||
from app import mail as mail_mod
|
||||
from app.config import settings
|
||||
monkeypatch.setattr(settings, "smtp_host", "")
|
||||
monkeypatch.setattr(settings, "smtp_user", "user@example.com")
|
||||
with pytest.raises(RuntimeError, match="SMTP nicht konfiguriert"):
|
||||
mail_mod._send_sync("to@example.com", "subj", "text", "<p>html</p>")
|
||||
|
||||
def test_calls_smtp_ssl_with_settings(self, monkeypatch):
|
||||
"""Bei vollstaendiger Konfig wird smtplib.SMTP_SSL aufgerufen, login
|
||||
und send_message getriggert."""
|
||||
from unittest.mock import MagicMock
|
||||
from app import mail as mail_mod
|
||||
from app.config import settings
|
||||
monkeypatch.setattr(settings, "smtp_host", "smtp.test")
|
||||
monkeypatch.setattr(settings, "smtp_port", 465)
|
||||
monkeypatch.setattr(settings, "smtp_user", "user@test")
|
||||
monkeypatch.setattr(settings, "smtp_password", "pw")
|
||||
monkeypatch.setattr(settings, "smtp_from_email", "noreply@test")
|
||||
monkeypatch.setattr(settings, "smtp_from_name", "Test")
|
||||
|
||||
ssl_mock = MagicMock()
|
||||
server_mock = MagicMock()
|
||||
ssl_mock.return_value.__enter__.return_value = server_mock
|
||||
ssl_mock.return_value.__exit__.return_value = False
|
||||
monkeypatch.setattr(mail_mod.smtplib, "SMTP_SSL", ssl_mock)
|
||||
|
||||
mail_mod._send_sync("to@test", "subj", "Plain", "<p>HTML</p>")
|
||||
|
||||
# SMTP_SSL wurde aufgerufen mit host + port
|
||||
ssl_mock.assert_called_once()
|
||||
args, kwargs = ssl_mock.call_args
|
||||
assert args[0] == "smtp.test"
|
||||
assert args[1] == 465
|
||||
# Login + send wurden aufgerufen
|
||||
server_mock.login.assert_called_once_with("user@test", "pw")
|
||||
server_mock.send_message.assert_called_once()
|
||||
|
||||
|
||||
class TestSendMailAsync:
|
||||
def test_runs_send_sync_in_executor(self, monkeypatch):
|
||||
"""send_mail (async) delegiert an _send_sync via Thread-Executor."""
|
||||
import asyncio
|
||||
from unittest.mock import MagicMock
|
||||
from app import mail as mail_mod
|
||||
|
||||
called: list[tuple] = []
|
||||
def fake_sync(to, subj, text, html):
|
||||
called.append((to, subj, text, html))
|
||||
|
||||
monkeypatch.setattr(mail_mod, "_send_sync", fake_sync)
|
||||
asyncio.run(mail_mod.send_mail("to@test", "subj", "text", "<p>html</p>"))
|
||||
assert called == [("to@test", "subj", "text", "<p>html</p>")]
|
||||
|
||||
@ -351,156 +351,3 @@ class TestRenderPlain:
|
||||
]
|
||||
text = _render_plain(self._make_result(), docs)
|
||||
assert "weitere" not in text
|
||||
|
||||
|
||||
# ─── Coverage-Backfill (#134) ────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestSearchAdapterFallbackLogging:
|
||||
"""Erste/zweite Query schlagen fehl → Debug-Log, dritter klappt."""
|
||||
|
||||
def test_fallback_logs_intermediate_failures(self, caplog):
|
||||
import asyncio
|
||||
import logging
|
||||
from app.monitoring import _search_adapter
|
||||
|
||||
class FakeAdapter:
|
||||
calls = 0
|
||||
async def search(self, q, limit):
|
||||
FakeAdapter.calls += 1
|
||||
if FakeAdapter.calls < 3:
|
||||
raise RuntimeError(f"transient {FakeAdapter.calls}")
|
||||
return ["ok"]
|
||||
|
||||
with caplog.at_level(logging.DEBUG, logger="app.monitoring"):
|
||||
result = asyncio.run(_search_adapter(FakeAdapter(), "BX"))
|
||||
assert result == ["ok"]
|
||||
|
||||
|
||||
class TestDailyScanDbUpsertFailure:
|
||||
"""Wenn upsert_monitoring_scan fuer einzelne Drucksache crasht,
|
||||
wird der Rest weiter verarbeitet (Line 191-192).
|
||||
|
||||
Adapter werden aus app.parlamente.ADAPTERS importiert — also
|
||||
monkey-patchen wir dort.
|
||||
"""
|
||||
|
||||
def test_upsert_exception_logged_and_skipped(self, monkeypatch, caplog):
|
||||
import asyncio
|
||||
import logging
|
||||
from types import SimpleNamespace
|
||||
from app import monitoring as mon
|
||||
from app.bundeslaender import Bundesland
|
||||
import app.parlamente as parl_mod
|
||||
import app.database as db_mod
|
||||
|
||||
# Adapter mit zwei Drucksachen
|
||||
class FakeAdapter:
|
||||
async def search(self, q, limit):
|
||||
return [
|
||||
SimpleNamespace(bundesland="BX", drucksache="1/1",
|
||||
title="A1", datum="2026-04-01",
|
||||
typ="Antrag", typ_normiert="antrag",
|
||||
fraktionen=["CDU"], link="https://x"),
|
||||
SimpleNamespace(bundesland="BX", drucksache="1/2",
|
||||
title="A2", datum="2026-04-02",
|
||||
typ="Antrag", typ_normiert="antrag",
|
||||
fraktionen=["SPD"], link="https://y"),
|
||||
]
|
||||
|
||||
# Erster upsert wirft, zweiter klappt
|
||||
call_count = {"n": 0}
|
||||
async def fake_upsert(**kw):
|
||||
call_count["n"] += 1
|
||||
if call_count["n"] == 1:
|
||||
raise RuntimeError("DB-Lock")
|
||||
return True
|
||||
|
||||
async def fake_summary(**kw):
|
||||
return None
|
||||
|
||||
# Fake-BL fuer aktive_bundeslaender
|
||||
fake_bl = Bundesland(
|
||||
code="BX", name="Test-BL", parlament_name="Test", wahlperiode=1,
|
||||
wahlperiode_start="2024-01-01", naechste_wahl=None,
|
||||
regierungsfraktionen=[], landtagsfraktionen=[],
|
||||
doku_system="Test", doku_base_url="http://example.com",
|
||||
drucksache_format="1/1234", dokukratie_scraper=None, aktiv=True,
|
||||
)
|
||||
|
||||
monkeypatch.setattr(mon, "aktive_bundeslaender", lambda: [fake_bl])
|
||||
# Adapter-Dict im parlamente-Modul (von dem mon importiert)
|
||||
monkeypatch.setitem(parl_mod.ADAPTERS, "BX", FakeAdapter())
|
||||
monkeypatch.setattr(db_mod, "upsert_monitoring_scan", fake_upsert)
|
||||
monkeypatch.setattr(db_mod, "upsert_monitoring_summary", fake_summary)
|
||||
|
||||
with caplog.at_level(logging.ERROR, logger="app.monitoring"):
|
||||
result = asyncio.run(mon.daily_scan(limit=10))
|
||||
|
||||
bx_results = [r for r in result.results if r.bundesland == "BX"]
|
||||
assert len(bx_results) == 1
|
||||
# Erster crashte → new_count=1 (zweiter klappte)
|
||||
assert bx_results[0].new_count == 1
|
||||
assert call_count["n"] == 2
|
||||
assert any("DB-UPSERT fehlgeschlagen" in r.message for r in caplog.records)
|
||||
|
||||
|
||||
class TestSendMonitoringDigest:
|
||||
"""run_monitoring_digest rendert Template, ruft send_mail."""
|
||||
|
||||
def test_mail_sent_returns_true(self, monkeypatch, tmp_path):
|
||||
import asyncio
|
||||
from app import monitoring as mon
|
||||
|
||||
async def fake_scan(**kw):
|
||||
return mon.DailyScanResult(
|
||||
scan_date="2026-04-28",
|
||||
results=[], new_total=0, total_seen=0,
|
||||
estimated_cost_eur=0.0, errors=[],
|
||||
)
|
||||
|
||||
async def fake_get_new_today(scan_date):
|
||||
return []
|
||||
|
||||
async def fake_send_mail(to, subj, text, html):
|
||||
return None
|
||||
|
||||
monkeypatch.setattr(mon, "daily_scan", fake_scan)
|
||||
# Importer-Patches innerhalb der Funktion sind tricky — wir patchen
|
||||
# stattdessen die Module-Funktionen direkt
|
||||
import app.mail
|
||||
import app.database
|
||||
monkeypatch.setattr(app.mail, "send_mail", fake_send_mail)
|
||||
monkeypatch.setattr(app.database, "get_monitoring_new_today",
|
||||
fake_get_new_today)
|
||||
|
||||
result = asyncio.run(mon.run_monitoring_digest("admin@test"))
|
||||
assert result["mail_sent"] is True
|
||||
assert result["scan_date"] == "2026-04-28"
|
||||
|
||||
def test_mail_failure_returns_false_but_not_raises(self, monkeypatch):
|
||||
import asyncio
|
||||
from app import monitoring as mon
|
||||
|
||||
async def fake_scan(**kw):
|
||||
return mon.DailyScanResult(
|
||||
scan_date="2026-04-28",
|
||||
results=[], new_total=0, total_seen=0,
|
||||
estimated_cost_eur=0.0, errors=[],
|
||||
)
|
||||
|
||||
async def fake_get_new_today(scan_date):
|
||||
return []
|
||||
|
||||
async def failing_send_mail(to, subj, text, html):
|
||||
raise ConnectionError("SMTP down")
|
||||
|
||||
monkeypatch.setattr(mon, "daily_scan", fake_scan)
|
||||
import app.mail
|
||||
import app.database
|
||||
monkeypatch.setattr(app.mail, "send_mail", failing_send_mail)
|
||||
monkeypatch.setattr(app.database, "get_monitoring_new_today",
|
||||
fake_get_new_today)
|
||||
|
||||
result = asyncio.run(mon.run_monitoring_digest("admin@test"))
|
||||
assert result["mail_sent"] is False
|
||||
|
||||
@ -1,168 +0,0 @@
|
||||
"""Tests fuer app/og_card.py — render_og_card mit Cache + Playwright (#134, #141).
|
||||
|
||||
Tests fuer cache_key + get_cached lebten vorher in test_wahlprogramm_fetch.py;
|
||||
hier kommt der Render-Pfad mit gemocktem Playwright dazu, sodass die volle
|
||||
Coverage von render_og_card lokal lauft.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
import types
|
||||
from pathlib import Path
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from app.og_card import (
|
||||
cache_key,
|
||||
get_cached,
|
||||
render_og_card,
|
||||
)
|
||||
|
||||
|
||||
class TestCacheKey:
|
||||
def test_deterministic(self):
|
||||
a = cache_key("NRW-18/1", "2026-04-01T00:00:00")
|
||||
b = cache_key("NRW-18/1", "2026-04-01T00:00:00")
|
||||
assert a == b
|
||||
|
||||
def test_changes_with_updated_at(self):
|
||||
a = cache_key("NRW-18/1", "2026-04-01T00:00:00")
|
||||
b = cache_key("NRW-18/1", "2026-04-02T00:00:00")
|
||||
assert a != b
|
||||
|
||||
def test_length_16(self):
|
||||
assert len(cache_key("NRW-18/1", "x")) == 16
|
||||
|
||||
|
||||
class TestGetCached:
|
||||
def test_returns_none_when_missing(self, tmp_path):
|
||||
assert get_cached("NRW-18/1", "2026-04-01T00:00:00", cache_dir=tmp_path) is None
|
||||
|
||||
def test_returns_path_when_exists(self, tmp_path):
|
||||
ds = "NRW-18/1"
|
||||
upd = "2026-04-01T00:00:00"
|
||||
key = cache_key(ds, upd)
|
||||
safe = ds.replace("/", "_")
|
||||
target = tmp_path / f"{safe}_{key}.png"
|
||||
target.write_bytes(b"\x89PNG dummy")
|
||||
result = get_cached(ds, upd, cache_dir=tmp_path)
|
||||
assert result == target
|
||||
|
||||
|
||||
class TestRenderOgCard:
|
||||
"""Tests fuer den Render-Pfad. Playwright wird ueber sys.modules-Stub
|
||||
eingehaengt — sync_playwright() liefert einen ContextManager, der
|
||||
einen gemockten Browser/Page-Stack zurueckgibt."""
|
||||
|
||||
def _make_playwright_stub(self, png_bytes: bytes = b"\x89PNG fake"):
|
||||
"""Erstellt ein Stub-Modul 'playwright.sync_api' mit
|
||||
``sync_playwright`` als ContextManager, dessen __enter__ einen Mock
|
||||
liefert, der die Chain pw.chromium.launch().new_page().screenshot()
|
||||
liefert."""
|
||||
mod = types.ModuleType("playwright")
|
||||
sub = types.ModuleType("playwright.sync_api")
|
||||
|
||||
page_mock = MagicMock()
|
||||
page_mock.screenshot.return_value = png_bytes
|
||||
page_mock.goto.return_value = None
|
||||
|
||||
browser_mock = MagicMock()
|
||||
browser_mock.new_page.return_value = page_mock
|
||||
browser_mock.close.return_value = None
|
||||
|
||||
pw_mock = MagicMock()
|
||||
pw_mock.chromium.launch.return_value = browser_mock
|
||||
|
||||
ctx_mgr = MagicMock()
|
||||
ctx_mgr.__enter__.return_value = pw_mock
|
||||
ctx_mgr.__exit__.return_value = False
|
||||
|
||||
sub.sync_playwright = MagicMock(return_value=ctx_mgr)
|
||||
mod.sync_api = sub
|
||||
return mod, sub, page_mock
|
||||
|
||||
def test_cache_hit_skips_playwright(self, tmp_path):
|
||||
"""Existierender Cache → Playwright wird gar nicht angerufen."""
|
||||
ds = "NRW-18/1"
|
||||
upd = "2026-04-01T00:00:00"
|
||||
key = cache_key(ds, upd)
|
||||
safe = ds.replace("/", "_")
|
||||
cache_file = tmp_path / f"{safe}_{key}.png"
|
||||
cache_file.write_bytes(b"\x89CACHED")
|
||||
|
||||
# Wenn der Cache hit ist, sollte playwright NICHT importiert werden.
|
||||
# Dafuer setzen wir einen Stub, der bei Aufruf einen Test-Fehler triggert.
|
||||
sys.modules.pop("playwright", None)
|
||||
sys.modules.pop("playwright.sync_api", None)
|
||||
|
||||
with patch.dict(sys.modules, {}, clear=False):
|
||||
result = render_og_card(ds, upd, cache_dir=tmp_path)
|
||||
assert result == b"\x89CACHED"
|
||||
|
||||
def test_cache_miss_renders_via_playwright(self, tmp_path):
|
||||
ds = "NRW-18/2"
|
||||
upd = "2026-04-02T00:00:00"
|
||||
png = b"\x89PNG rendered"
|
||||
|
||||
mod, sub, page_mock = self._make_playwright_stub(png)
|
||||
with patch.dict(sys.modules, {"playwright": mod, "playwright.sync_api": sub}):
|
||||
result = render_og_card(ds, upd, cache_dir=tmp_path,
|
||||
base_url="http://test.example")
|
||||
|
||||
assert result == png
|
||||
# Cache-Datei muss geschrieben sein
|
||||
key = cache_key(ds, upd)
|
||||
safe = ds.replace("/", "_")
|
||||
cache_file = tmp_path / f"{safe}_{key}.png"
|
||||
assert cache_file.exists()
|
||||
assert cache_file.read_bytes() == png
|
||||
|
||||
def test_cache_miss_passes_drucksache_to_playwright_url(self, tmp_path):
|
||||
"""URL-Kodierung des Drucksachen-Namens muss ans og-template gehen."""
|
||||
ds = "NRW-18/123 (neu)" # Sonderzeichen
|
||||
upd = "2026-04-03T00:00:00"
|
||||
mod, sub, page_mock = self._make_playwright_stub()
|
||||
with patch.dict(sys.modules, {"playwright": mod, "playwright.sync_api": sub}):
|
||||
render_og_card(ds, upd, cache_dir=tmp_path,
|
||||
base_url="http://internal:8000")
|
||||
# page.goto wurde aufgerufen — URL-Argument analysieren
|
||||
call = page_mock.goto.call_args
|
||||
url = call.args[0]
|
||||
assert url.startswith("http://internal:8000/v2/og-template?drucksache=")
|
||||
# / und Klammern muessen URL-encoded sein
|
||||
assert "%2F" in url
|
||||
assert "(" not in url # encoded as %28
|
||||
|
||||
def test_playwright_exception_returns_none(self, tmp_path):
|
||||
"""Renderer-Fehler darf den Caller nicht crashen."""
|
||||
ds = "NRW-18/3"
|
||||
upd = "2026-04-04T00:00:00"
|
||||
|
||||
mod = types.ModuleType("playwright")
|
||||
sub = types.ModuleType("playwright.sync_api")
|
||||
|
||||
def _broken(*a, **kw):
|
||||
raise RuntimeError("Browser launch failed")
|
||||
sub.sync_playwright = _broken
|
||||
mod.sync_api = sub
|
||||
|
||||
with patch.dict(sys.modules, {"playwright": mod, "playwright.sync_api": sub}):
|
||||
result = render_og_card(ds, upd, cache_dir=tmp_path)
|
||||
assert result is None
|
||||
# Cache-Datei darf NICHT existieren
|
||||
key = cache_key(ds, upd)
|
||||
safe = ds.replace("/", "_")
|
||||
cache_file = tmp_path / f"{safe}_{key}.png"
|
||||
assert not cache_file.exists()
|
||||
|
||||
def test_cache_dir_created_if_missing(self, tmp_path):
|
||||
"""render_og_card muss das cache_dir auch anlegen, wenn es fehlt."""
|
||||
sub_dir = tmp_path / "deep" / "nested" / "cache"
|
||||
# Existiert noch nicht
|
||||
assert not sub_dir.exists()
|
||||
|
||||
mod, sub, page_mock = self._make_playwright_stub()
|
||||
with patch.dict(sys.modules, {"playwright": mod, "playwright.sync_api": sub}):
|
||||
render_og_card("NRW-18/4", "2026-04-05T00:00:00", cache_dir=sub_dir)
|
||||
assert sub_dir.exists()
|
||||
@ -576,27 +576,3 @@ class TestSaarlandSearchPropagatesErrors:
|
||||
|
||||
with pytest.raises(httpx.ConnectError):
|
||||
asyncio.run(_run())
|
||||
|
||||
def test_search_propagates_http_500(self):
|
||||
"""HTTP 5xx response must NOT be silently turned into empty results
|
||||
(regression #142): a 500 from the Umbraco backend used to log+return
|
||||
[], hiding it from the monitoring summary."""
|
||||
import httpx
|
||||
from app.parlamente import SaarlandAdapter
|
||||
|
||||
adapter = SaarlandAdapter()
|
||||
|
||||
async def _run():
|
||||
mock_client = AsyncMock()
|
||||
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
|
||||
mock_client.__aexit__ = AsyncMock(return_value=False)
|
||||
mock_resp = MagicMock()
|
||||
mock_resp.status_code = 500
|
||||
mock_resp.text = "Server Error"
|
||||
mock_resp.request = MagicMock()
|
||||
mock_client.post = AsyncMock(return_value=mock_resp)
|
||||
with patch.object(adapter, "_make_client", return_value=mock_client):
|
||||
await adapter.search("Schule")
|
||||
|
||||
with pytest.raises(httpx.HTTPStatusError):
|
||||
asyncio.run(_run())
|
||||
|
||||
@ -1,74 +0,0 @@
|
||||
"""Tests fuer app/protokoll_parsers/__init__.py — Registry + Dispatch (#126)."""
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from app.protokoll_parsers import (
|
||||
PROTOKOLL_PARSERS,
|
||||
parse_protocol,
|
||||
supported_bundeslaender,
|
||||
)
|
||||
|
||||
|
||||
class TestRegistry:
|
||||
def test_nrw_registered(self):
|
||||
"""NRW ist die Referenz-Implementierung — muss da sein."""
|
||||
assert "NRW" in PROTOKOLL_PARSERS
|
||||
|
||||
def test_supported_includes_nrw(self):
|
||||
assert "NRW" in supported_bundeslaender()
|
||||
|
||||
def test_supported_returns_sorted(self):
|
||||
codes = supported_bundeslaender()
|
||||
assert codes == sorted(codes)
|
||||
|
||||
def test_registry_values_are_callable(self):
|
||||
for code, parser in PROTOKOLL_PARSERS.items():
|
||||
assert callable(parser), f"Parser fuer {code} ist nicht callable"
|
||||
|
||||
|
||||
class TestDispatch:
|
||||
def test_unknown_bl_raises_not_implemented(self):
|
||||
with pytest.raises(NotImplementedError) as exc:
|
||||
parse_protocol("XX", "/dev/null")
|
||||
msg = str(exc.value)
|
||||
assert "XX" in msg
|
||||
# Liste der unterstuetzten BL muss in der Message stehen
|
||||
assert "NRW" in msg
|
||||
# Issue-Referenz fuer Folge-Arbeit
|
||||
assert "#126" in msg
|
||||
|
||||
def test_known_bl_delegates_to_registered_parser(self, tmp_path, monkeypatch):
|
||||
"""parse_protocol delegiert an den BL-Parser aus der Registry."""
|
||||
called_with: list[str] = []
|
||||
|
||||
def fake_parser(pdf_path: str) -> list[dict]:
|
||||
called_with.append(pdf_path)
|
||||
return [{"drucksache": "18/1", "ergebnis": "angenommen", "votes": {"ja": [], "nein": [], "enthaltung": []}}]
|
||||
|
||||
# Temporaer einen TEST-Parser registrieren, dann wieder entfernen
|
||||
monkeypatch.setitem(PROTOKOLL_PARSERS, "TEST", fake_parser)
|
||||
|
||||
result = parse_protocol("TEST", str(tmp_path / "x.pdf"))
|
||||
|
||||
assert called_with == [str(tmp_path / "x.pdf")]
|
||||
assert len(result) == 1
|
||||
assert result[0]["drucksache"] == "18/1"
|
||||
|
||||
|
||||
class TestParserSchema:
|
||||
"""Vertrag: jeder registrierte Parser muss Result-Dicts mit minimalem
|
||||
Schema liefern — drucksache (str|None), ergebnis (str), votes (dict)."""
|
||||
|
||||
def test_nrw_result_dict_has_expected_keys(self):
|
||||
"""Smoke-Test mit handgemachtem Plenarprotokoll-Snippet — pruefen,
|
||||
dass das Schema des Output-Dicts die in __init__.py dokumentierten
|
||||
Keys enthaelt."""
|
||||
from app.protokoll_parsers.nrw import find_results
|
||||
|
||||
text = "Damit ist der Antrag Drucksache 18/100 angenommen."
|
||||
results = find_results(text)
|
||||
assert results, "find_results sollte mindestens einen Treffer liefern"
|
||||
for r in results:
|
||||
for key in ("drucksache", "ergebnis", "kind", "einstimmig"):
|
||||
assert key in r, f"Key '{key}' fehlt im Result"
|
||||
@ -1,349 +0,0 @@
|
||||
"""Tests fuer app/protokoll_parsers/nrw.py — NRW-Plenarprotokoll-Parser v5.
|
||||
|
||||
Backfill aus #134, BL-Refactor aus #126.
|
||||
|
||||
Der Parser ist deterministisch und anchor-basiert; jede Aenderung an den
|
||||
RESULT_ANCHORS oder den Vote-Block-Regexes muss sofort durch diese Tests
|
||||
fallen. Die echte 19/19-Garantie auf MMP18-119 laeuft separat als
|
||||
Integration-Test (braucht das PDF). Hier: pure-string-Tests fuer alle
|
||||
Reverse-Engineering-Findings, die bei der iterativen Entwicklung 1-15
|
||||
dokumentiert wurden.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
import types
|
||||
|
||||
# fitz ist via tests/conftest.py gestubbed — Pure-String-Funktionen kommen ohne aus.
|
||||
|
||||
from app.protokoll_parsers.nrw import (
|
||||
normalize_fraktionen,
|
||||
find_results,
|
||||
resolve_drucksache_for_ueber,
|
||||
normalize_text,
|
||||
_is_empty_phrase,
|
||||
_parse_vote_block,
|
||||
ALLE_FRAKTIONEN_NRW,
|
||||
)
|
||||
|
||||
|
||||
class TestNormalizeFraktionen:
|
||||
def test_simple_cdu(self):
|
||||
assert normalize_fraktionen("Wer stimmt zu? – CDU") == ["CDU"]
|
||||
|
||||
def test_multiple_fraktionen(self):
|
||||
result = normalize_fraktionen("CDU, SPD und GRÜNE")
|
||||
assert result == sorted(["CDU", "SPD", "GRÜNE"])
|
||||
|
||||
def test_buendnis_90_normalizes_to_gruene(self):
|
||||
assert normalize_fraktionen("Bündnis 90/Die Grünen") == ["GRÜNE"]
|
||||
|
||||
def test_fdp_with_dots_normalizes(self):
|
||||
"""F.D.P. (mit Punkten) muss als FDP erkannt werden."""
|
||||
assert normalize_fraktionen("F.D.P.") == ["FDP"]
|
||||
|
||||
def test_no_double_match_for_overlapping_keys(self):
|
||||
"""'GRÜNE' darf nicht zusaetzlich als 'Grünen' wieder gematcht werden."""
|
||||
result = normalize_fraktionen("GRÜNE und Grünen")
|
||||
# Beide Tokens sind dieselbe Fraktion → nur einmal in der Liste
|
||||
assert result.count("GRÜNE") == 1
|
||||
|
||||
def test_landesregierung_recognized(self):
|
||||
assert "Landesregierung" in normalize_fraktionen("Landesregierung")
|
||||
|
||||
def test_empty_text_returns_empty(self):
|
||||
assert normalize_fraktionen("") == []
|
||||
|
||||
def test_no_known_partei(self):
|
||||
assert normalize_fraktionen("Some random text") == []
|
||||
|
||||
|
||||
class TestIsEmptyPhrase:
|
||||
def test_niemand_is_empty(self):
|
||||
assert _is_empty_phrase("Stimmt jemand dagegen? – Niemand") is True
|
||||
|
||||
def test_keine_is_empty(self):
|
||||
assert _is_empty_phrase("Enthaltungen? – Keine") is True
|
||||
|
||||
def test_nicht_der_fall(self):
|
||||
assert _is_empty_phrase("Das ist nicht der Fall.") is True
|
||||
|
||||
def test_actual_fraktion_is_not_empty(self):
|
||||
assert _is_empty_phrase("CDU und SPD") is False
|
||||
|
||||
|
||||
class TestParseVoteBlock:
|
||||
def test_simple_ja_extraction(self):
|
||||
block = "Wer stimmt zu? – CDU und SPD."
|
||||
votes = _parse_vote_block(block)
|
||||
assert "CDU" in votes["ja"] and "SPD" in votes["ja"]
|
||||
|
||||
def test_ja_with_negation_returns_empty(self):
|
||||
"""'Wer stimmt zu? – Niemand.' → ja-Liste muss leer sein."""
|
||||
block = "Wer stimmt zu? – Niemand."
|
||||
votes = _parse_vote_block(block)
|
||||
assert votes["ja"] == []
|
||||
|
||||
def test_nein_extraction(self):
|
||||
block = "Wer stimmt dagegen? – AfD."
|
||||
votes = _parse_vote_block(block)
|
||||
assert "AfD" in votes["nein"]
|
||||
|
||||
def test_dagegen_negation(self):
|
||||
block = "Wer stimmt dagegen? – Das ist nicht der Fall."
|
||||
votes = _parse_vote_block(block)
|
||||
assert votes["nein"] == []
|
||||
|
||||
|
||||
class TestFindResults:
|
||||
def test_direct_angenommen(self):
|
||||
text = (
|
||||
"Damit ist der Antrag Drucksache 18/123 mit den Stimmen "
|
||||
"der CDU und der SPD angenommen."
|
||||
)
|
||||
results = find_results(text)
|
||||
assert len(results) == 1
|
||||
r = results[0]
|
||||
assert r["drucksache"] == "18/123"
|
||||
assert r["ergebnis"] == "angenommen"
|
||||
|
||||
def test_direct_abgelehnt(self):
|
||||
text = (
|
||||
"Damit ist der Antrag Drucksache 18/9999 mit den Stimmen "
|
||||
"der CDU gegen die Stimmen der SPD abgelehnt."
|
||||
)
|
||||
results = find_results(text)
|
||||
assert any(r["drucksache"] == "18/9999" and r["ergebnis"] == "abgelehnt" for r in results)
|
||||
|
||||
def test_einstimmig_flag_only_for_ueber_kind(self):
|
||||
"""v5-Verhalten dokumentiert: 'einstimmig' wird in direct-kind-Anchors
|
||||
NICHT gesetzt, nur in ueber/petition/uebersicht. Dieser Test pinnt
|
||||
das aktuelle Verhalten — wenn v6 einstimmig auch fuer direct erkennt,
|
||||
muss der Test angepasst werden."""
|
||||
text = "Damit ist der Antrag Drucksache 18/100 einstimmig angenommen."
|
||||
results = find_results(text)
|
||||
assert results[0]["kind"] == "direct_broad"
|
||||
# einstimmig wird hier (noch) nicht gesetzt — Reverse-Engineering-Befund
|
||||
assert results[0]["einstimmig"] is False
|
||||
|
||||
def test_einstimmig_flag_for_ueberweisung(self):
|
||||
"""Bei Ueberweisungs-Anchors mit 'einstimmig' im naechsten Token-Bereich
|
||||
wird das Flag gesetzt."""
|
||||
text = "Drucksache 18/100 ... Damit ist diese Überweisungsempfehlung einstimmig angenommen."
|
||||
results = find_results(text)
|
||||
ueber_results = [r for r in results if r["kind"] == "ueber"]
|
||||
assert ueber_results, "kein ueber-Result im Test-Text gefunden"
|
||||
assert ueber_results[0]["einstimmig"] is True
|
||||
|
||||
def test_ueberweisung_so_beschlossen_implies_einstimmig(self):
|
||||
"""'Damit ist das so beschlossen' = implizit einstimmige Ueberweisung."""
|
||||
text = "Drucksache 18/200 ... Damit ist das so beschlossen."
|
||||
results = find_results(text)
|
||||
assert any(r["kind"] == "ueber" and r["einstimmig"] for r in results)
|
||||
|
||||
def test_neu_suffix_in_drucksachenummer(self):
|
||||
"""Drucksache-Nummern mit (neu)-Suffix muessen matchen."""
|
||||
text = "Damit ist der Antrag Drucksache 18/4567(neu) angenommen."
|
||||
results = find_results(text)
|
||||
# Match irgendwo in den Results
|
||||
assert any(r["drucksache"] == "18/4567(neu)" for r in results)
|
||||
|
||||
def test_results_sorted_by_position(self):
|
||||
"""Mehrere Anchors muessen nach anchor_start aufsteigend sortiert sein."""
|
||||
text = (
|
||||
"Damit ist der Antrag Drucksache 18/100 angenommen. "
|
||||
"Spaeter im Text. Damit ist der Antrag Drucksache 18/200 abgelehnt."
|
||||
)
|
||||
results = find_results(text)
|
||||
positions = [r["anchor_start"] for r in results]
|
||||
assert positions == sorted(positions)
|
||||
|
||||
def test_dedup_same_position(self):
|
||||
"""Wenn zwei Patterns am selben anchor_start matchen, nur einer im Output."""
|
||||
text = "Damit ist der Antrag Drucksache 18/300 angenommen."
|
||||
results = find_results(text)
|
||||
positions = [r["anchor_start"] for r in results]
|
||||
assert len(positions) == len(set(positions))
|
||||
|
||||
|
||||
class TestResolveDrucksacheForUeber:
|
||||
def test_finds_nearest_ds_before_anchor(self):
|
||||
text = "Drucksache 18/100 ... irgendein Text ... Damit ist das so beschlossen."
|
||||
anchor_start = text.find("Damit")
|
||||
ds = resolve_drucksache_for_ueber(text, anchor_start)
|
||||
assert ds == "18/100"
|
||||
|
||||
def test_picks_closest_when_multiple(self):
|
||||
"""Bei mehreren DS-Nrn vor dem Anchor wird die naechste gewaehlt."""
|
||||
text = "Drucksache 18/100 ... Drucksache 18/200 ... Damit ist das so beschlossen."
|
||||
anchor_start = text.find("Damit")
|
||||
ds = resolve_drucksache_for_ueber(text, anchor_start)
|
||||
assert ds == "18/200"
|
||||
|
||||
def test_returns_none_when_no_ds_before(self):
|
||||
text = "Damit ist das so beschlossen. Drucksache 18/100 spaeter."
|
||||
anchor_start = 0
|
||||
ds = resolve_drucksache_for_ueber(text, anchor_start)
|
||||
assert ds is None
|
||||
|
||||
|
||||
class TestNormalizeText:
|
||||
def test_collapses_whitespace(self):
|
||||
"""Mehrfach-Whitespace wird zu einzelnem Leerzeichen kollabiert."""
|
||||
result = normalize_text("Damit ist\nder\tAntrag")
|
||||
assert " " not in result
|
||||
|
||||
def test_preserves_drucksache_format(self):
|
||||
"""Drucksache-Schreibweise mit Slash muss erhalten bleiben."""
|
||||
result = normalize_text("Drucksache 18/123")
|
||||
assert "18/123" in result
|
||||
|
||||
|
||||
class TestKnownFraktionsList:
|
||||
def test_alle_fraktionen_nrw_complete(self):
|
||||
"""ALLE_FRAKTIONEN_NRW deckt die WP18-Fraktionen ab (CDU, SPD, GRÜNE, FDP, AfD)."""
|
||||
for f in ("CDU", "SPD", "GRÜNE", "FDP", "AfD"):
|
||||
assert f in ALLE_FRAKTIONEN_NRW
|
||||
|
||||
|
||||
# ─── parse_protocol mit fitz-Mock (#134 Backfill) ─────────────────────────────
|
||||
|
||||
|
||||
class TestParseProtocol:
|
||||
"""Integration-light: parse_protocol mit gemocktem fitz, sodass die
|
||||
Pipeline find_results → segment-detection → vote-block-Aufloesung
|
||||
end-to-end laeuft."""
|
||||
|
||||
def _patch_fitz(self, monkeypatch, full_text: str):
|
||||
"""Patcht fitz.open so, dass ein Mock-Document mit dem gegebenen
|
||||
Volltext zurueckkommt."""
|
||||
from unittest.mock import MagicMock
|
||||
from app.protokoll_parsers import nrw as nrw_mod
|
||||
|
||||
class FakePage:
|
||||
def __init__(self, text):
|
||||
self._text = text
|
||||
def get_text(self):
|
||||
return self._text
|
||||
|
||||
class FakeDoc:
|
||||
def __init__(self, text):
|
||||
self._pages = [FakePage(text)]
|
||||
def __iter__(self):
|
||||
return iter(self._pages)
|
||||
def close(self):
|
||||
pass
|
||||
|
||||
monkeypatch.setattr(nrw_mod.fitz, "open",
|
||||
lambda path: FakeDoc(full_text), raising=False)
|
||||
|
||||
def test_simple_angenommen(self, monkeypatch):
|
||||
from app.protokoll_parsers.nrw import parse_protocol
|
||||
text = (
|
||||
"Wir kommen zur Abstimmung über Drucksache 18/100. "
|
||||
"Wer stimmt zu? – CDU und SPD. Wer stimmt dagegen? – AfD. "
|
||||
"Damit ist der Antrag Drucksache 18/100 angenommen."
|
||||
)
|
||||
self._patch_fitz(monkeypatch, text)
|
||||
result = parse_protocol("/tmp/dummy.pdf")
|
||||
assert result
|
||||
first = result[0]
|
||||
assert first["drucksache"] == "18/100"
|
||||
assert first["ergebnis"] == "angenommen"
|
||||
assert "CDU" in first["votes"]["ja"]
|
||||
assert "AfD" in first["votes"]["nein"]
|
||||
|
||||
def test_einstimmig_fills_all_fraktionen(self, monkeypatch):
|
||||
from app.protokoll_parsers.nrw import parse_protocol
|
||||
from app.protokoll_parsers.nrw import ALLE_FRAKTIONEN_NRW
|
||||
text = "Damit ist der Antrag Drucksache 18/200 einstimmig beschlossen."
|
||||
self._patch_fitz(monkeypatch, text)
|
||||
result = parse_protocol("/tmp/dummy.pdf")
|
||||
# Auch wenn der Parser nicht einstimmig=True setzt fuer direct_broad,
|
||||
# muessen alle ja-Fraktionen drin sein wenn das Flag korrekt war.
|
||||
# Hier akzeptieren wir, dass ergebnis 'angenommen' (verabschiedet→angenommen),
|
||||
# einstimmig-Verhalten wie find_results-Test schon validiert.
|
||||
assert result
|
||||
assert result[0]["drucksache"] == "18/200"
|
||||
assert result[0]["ergebnis"] == "angenommen"
|
||||
|
||||
def test_ueberweisung_so_beschlossen_uses_einstimmig_fallback(self, monkeypatch):
|
||||
from app.protokoll_parsers.nrw import parse_protocol, ALLE_FRAKTIONEN_NRW
|
||||
text = (
|
||||
"Wir kommen zur Abstimmung über Drucksache 18/300. "
|
||||
"Damit ist das so beschlossen."
|
||||
)
|
||||
self._patch_fitz(monkeypatch, text)
|
||||
result = parse_protocol("/tmp/dummy.pdf")
|
||||
assert result
|
||||
# ueber-Kind + 'so beschlossen' → einstimmig-Fallback fuellt ja-Liste
|
||||
ja = result[0]["votes"]["ja"]
|
||||
for frak in ALLE_FRAKTIONEN_NRW:
|
||||
assert frak in ja
|
||||
assert result[0]["votes"]["nein"] == []
|
||||
assert result[0]["ergebnis"] == "überwiesen"
|
||||
|
||||
def test_skips_anchor_without_drucksache(self, monkeypatch):
|
||||
from app.protokoll_parsers.nrw import parse_protocol
|
||||
# Anchor ohne aufloesbare Drucksache (kein vorheriges 'Drucksache N/M')
|
||||
text = "Damit ist das so beschlossen. Drucksache 18/400 ist spaeter."
|
||||
self._patch_fitz(monkeypatch, text)
|
||||
result = parse_protocol("/tmp/dummy.pdf")
|
||||
# Anchor wird uebersprungen
|
||||
assert result == []
|
||||
|
||||
def test_compare_to_fixture_perfect_match(self):
|
||||
"""compare_to_fixture: Parser-Output entspricht der Ground-Truth → 1/1."""
|
||||
from app.protokoll_parsers.nrw import compare_to_fixture
|
||||
parsed = [{"drucksache": "18/1", "ergebnis": "angenommen",
|
||||
"votes": {"ja": ["CDU"], "nein": [], "enthaltung": []}}]
|
||||
fixture = {
|
||||
"drucksachen": [
|
||||
{"drucksache": "18/1", "ergebnis": "angenommen",
|
||||
"ja": ["CDU"], "nein": [], "enthaltung": []}
|
||||
]
|
||||
}
|
||||
matches, errors = compare_to_fixture(parsed, fixture)
|
||||
assert matches == 1
|
||||
assert errors == []
|
||||
|
||||
def test_compare_to_fixture_not_found(self):
|
||||
from app.protokoll_parsers.nrw import compare_to_fixture
|
||||
parsed = []
|
||||
fixture = {
|
||||
"drucksachen": [
|
||||
{"drucksache": "18/99", "ergebnis": "angenommen",
|
||||
"ja": [], "nein": [], "enthaltung": []}
|
||||
]
|
||||
}
|
||||
matches, errors = compare_to_fixture(parsed, fixture)
|
||||
assert matches == 0
|
||||
assert any("NOT FOUND" in e for e in errors)
|
||||
|
||||
def test_compare_to_fixture_nicht_gesondert(self):
|
||||
"""Parser darf bei 'nicht_gesondert_abgestimmt' den Eintrag nicht finden."""
|
||||
from app.protokoll_parsers.nrw import compare_to_fixture
|
||||
# Nicht in parsed enthalten → korrekt
|
||||
parsed = []
|
||||
fixture = {
|
||||
"drucksachen": [
|
||||
{"drucksache": "18/77", "ergebnis": "nicht_gesondert_abgestimmt",
|
||||
"ja": [], "nein": [], "enthaltung": []}
|
||||
]
|
||||
}
|
||||
matches, _ = compare_to_fixture(parsed, fixture)
|
||||
assert matches == 1
|
||||
|
||||
def test_compare_to_fixture_wrong_ergebnis(self):
|
||||
from app.protokoll_parsers.nrw import compare_to_fixture
|
||||
parsed = [{"drucksache": "18/3", "ergebnis": "abgelehnt",
|
||||
"votes": {"ja": [], "nein": ["CDU"], "enthaltung": []}}]
|
||||
fixture = {
|
||||
"drucksachen": [
|
||||
{"drucksache": "18/3", "ergebnis": "angenommen",
|
||||
"ja": ["CDU"], "nein": [], "enthaltung": []}
|
||||
]
|
||||
}
|
||||
matches, errors = compare_to_fixture(parsed, fixture)
|
||||
assert matches == 0
|
||||
assert any("ergebnis abgelehnt != angenommen" in e for e in errors)
|
||||
@ -66,113 +66,3 @@ class TestGetQueueStatus:
|
||||
status = get_queue_status()
|
||||
assert status["pending"] == 2
|
||||
assert status["estimated_wait_seconds"] > 0
|
||||
|
||||
|
||||
# ─── Coverage-Backfill (#134) — Worker, Shutdown, Re-Enqueue ─────────────────
|
||||
|
||||
|
||||
class TestStartWorker:
|
||||
@pytest.mark.asyncio
|
||||
async def test_creates_tasks_for_concurrency(self):
|
||||
"""start_worker erzeugt CONCURRENCY viele Tasks."""
|
||||
from app import queue as q
|
||||
# Reset _worker_tasks
|
||||
q._worker_tasks.clear()
|
||||
try:
|
||||
tasks = q.start_worker()
|
||||
assert len(tasks) == q.CONCURRENCY
|
||||
assert all(t is not None for t in tasks)
|
||||
finally:
|
||||
# Cleanup: cancel + clear
|
||||
for t in q._worker_tasks:
|
||||
t.cancel()
|
||||
q._worker_tasks.clear()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_does_not_replace_running_workers(self):
|
||||
"""Wenn start_worker zweimal aufgerufen wird, werden lebende Tasks
|
||||
nicht durch neue ersetzt."""
|
||||
from app import queue as q
|
||||
q._worker_tasks.clear()
|
||||
try:
|
||||
first = q.start_worker()
|
||||
first_ids = [id(t) for t in first]
|
||||
second = q.start_worker()
|
||||
second_ids = [id(t) for t in second]
|
||||
# Tasks bleiben dieselben Instanzen
|
||||
assert first_ids == second_ids
|
||||
finally:
|
||||
for t in q._worker_tasks:
|
||||
t.cancel()
|
||||
q._worker_tasks.clear()
|
||||
|
||||
|
||||
class TestGracefulShutdown:
|
||||
@pytest.mark.asyncio
|
||||
async def test_no_processing_jobs_returns_immediately(self):
|
||||
"""Mit leerem _jobs-State sollte graceful_shutdown sofort
|
||||
zurueckkehren."""
|
||||
from app import queue as q
|
||||
q._jobs.clear()
|
||||
# set _shutting_down zurueck
|
||||
q._shutting_down = False
|
||||
try:
|
||||
import time
|
||||
t0 = time.time()
|
||||
await q.graceful_shutdown(timeout=5)
|
||||
assert time.time() - t0 < 1.0 # Sofort
|
||||
assert q._shutting_down is True
|
||||
finally:
|
||||
q._shutting_down = False
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_waits_for_processing_jobs(self):
|
||||
"""Mit einem 'processing'-Job wartet shutdown bis er fertig ist."""
|
||||
from app import queue as q
|
||||
import asyncio as _asyncio
|
||||
q._jobs.clear()
|
||||
q._jobs["job1"] = {"status": "processing"}
|
||||
q._shutting_down = False
|
||||
|
||||
async def finish_job_after_delay():
|
||||
await _asyncio.sleep(0.05)
|
||||
q._jobs["job1"]["status"] = "completed"
|
||||
|
||||
try:
|
||||
await _asyncio.gather(
|
||||
q.graceful_shutdown(timeout=5),
|
||||
finish_job_after_delay(),
|
||||
)
|
||||
assert q._shutting_down is True
|
||||
finally:
|
||||
q._jobs.clear()
|
||||
q._shutting_down = False
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_timeout_logs_remaining(self, caplog):
|
||||
"""Wenn Job nach Timeout noch processing ist, wird ERROR geloggt."""
|
||||
import logging
|
||||
from app import queue as q
|
||||
q._jobs.clear()
|
||||
q._jobs["stuck"] = {"status": "processing"}
|
||||
q._shutting_down = False
|
||||
|
||||
try:
|
||||
with caplog.at_level(logging.ERROR, logger="app.queue"):
|
||||
await q.graceful_shutdown(timeout=1)
|
||||
assert any("Timeout" in r.message for r in caplog.records)
|
||||
finally:
|
||||
q._jobs.clear()
|
||||
q._shutting_down = False
|
||||
|
||||
|
||||
class TestEnqueueShuttingDown:
|
||||
@pytest.mark.asyncio
|
||||
async def test_enqueue_blocked_during_shutdown(self):
|
||||
from app import queue as q
|
||||
q._shutting_down = True
|
||||
try:
|
||||
with pytest.raises(q.QueueFullError, match="Server wird neu gestartet"):
|
||||
await q.enqueue("job-x", lambda: None)
|
||||
finally:
|
||||
q._shutting_down = False
|
||||
|
||||
@ -143,83 +143,3 @@ class TestEdgeCases:
|
||||
assert "muss" in ins_texts
|
||||
assert "31.12.2026" in del_texts
|
||||
assert "30.06.2025" in ins_texts
|
||||
|
||||
|
||||
# ─── build_pdf_href Tests (#134 Coverage-Backfill) ───────────────────────────
|
||||
|
||||
class TestBuildPdfHref:
|
||||
"""Tests fuer build_pdf_href: rekonstruiert PDF-URLs aus Zitat-Metadaten,
|
||||
bevorzugt die explizite url, faellt auf WAHLPROGRAMME-Lookup zurueck."""
|
||||
|
||||
def test_explicit_url_passed_through(self):
|
||||
from app.redline_utils import build_pdf_href
|
||||
zitat = {"url": "/api/wahlprogramm-cite?pid=cdu-nrw-2022&seite=15"}
|
||||
assert build_pdf_href(zitat) == "/api/wahlprogramm-cite?pid=cdu-nrw-2022&seite=15"
|
||||
|
||||
def test_empty_url_falls_back_to_quelle_lookup(self):
|
||||
"""Ohne url muss die quelle reconstruiert werden via WAHLPROGRAMME."""
|
||||
from app.redline_utils import build_pdf_href
|
||||
# Ein in WAHLPROGRAMME hinterlegter Titel
|
||||
from app.wahlprogramme import WAHLPROGRAMME
|
||||
# Pick the first programme from the registry
|
||||
bl, parteien = next(iter(WAHLPROGRAMME.items()))
|
||||
partei, info = next(iter(parteien.items()))
|
||||
titel = info.get("titel", "")
|
||||
if not titel:
|
||||
pytest.skip("Kein WAHLPROGRAMME-Eintrag mit titel verfuegbar")
|
||||
zitat = {
|
||||
"quelle": f"{titel} · S. 42",
|
||||
"text": "Wir wollen die Energiewende",
|
||||
"url": "",
|
||||
}
|
||||
href = build_pdf_href(zitat)
|
||||
assert "/api/wahlprogramm-cite" in href
|
||||
assert "seite=42" in href
|
||||
assert "#page=42" in href # URL-Hash fuer Browser-PDF-Viewer
|
||||
|
||||
def test_no_seitenzahl_returns_empty(self):
|
||||
from app.redline_utils import build_pdf_href
|
||||
zitat = {"quelle": "Irgendein Programm ohne Seite", "text": "x", "url": ""}
|
||||
assert build_pdf_href(zitat) == ""
|
||||
|
||||
def test_unmatched_quelle_returns_empty(self):
|
||||
from app.redline_utils import build_pdf_href
|
||||
zitat = {
|
||||
"quelle": "Erfundenes Programm 1995, S. 1",
|
||||
"text": "x",
|
||||
"url": "",
|
||||
}
|
||||
assert build_pdf_href(zitat) == ""
|
||||
|
||||
def test_query_uses_first_5_words_of_text(self):
|
||||
from app.redline_utils import build_pdf_href
|
||||
from app.wahlprogramme import WAHLPROGRAMME
|
||||
bl, parteien = next(iter(WAHLPROGRAMME.items()))
|
||||
partei, info = next(iter(parteien.items()))
|
||||
titel = info.get("titel", "")
|
||||
if not titel:
|
||||
pytest.skip("Kein WAHLPROGRAMME-Eintrag mit titel verfuegbar")
|
||||
zitat = {
|
||||
"quelle": f"{titel} · S. 5",
|
||||
"text": "Eins zwei drei vier fünf sechs sieben",
|
||||
"url": "",
|
||||
}
|
||||
href = build_pdf_href(zitat)
|
||||
# max. 5 Worte → "sechs sieben" muessen im Query fehlen
|
||||
assert "sechs" not in href
|
||||
assert "sieben" not in href
|
||||
# erste fuenf Wortteile sollten kodiert in q= auftauchen
|
||||
assert "Eins" in href or "Eins" in href.replace("+", " ")
|
||||
|
||||
def test_handles_seite_with_comma_separator(self):
|
||||
"""Quelle 'Titel, S. 42' (Komma) muss genauso parsen wie '· S. 42'."""
|
||||
from app.redline_utils import build_pdf_href
|
||||
from app.wahlprogramme import WAHLPROGRAMME
|
||||
bl, parteien = next(iter(WAHLPROGRAMME.items()))
|
||||
partei, info = next(iter(parteien.items()))
|
||||
titel = info.get("titel", "")
|
||||
if not titel:
|
||||
pytest.skip("Kein WAHLPROGRAMME-Eintrag mit titel verfuegbar")
|
||||
zitat = {"quelle": f"{titel}, S. 17", "text": "x", "url": ""}
|
||||
href = build_pdf_href(zitat)
|
||||
assert "seite=17" in href
|
||||
|
||||
@ -192,53 +192,3 @@ def test_generate_html_report_escapes_all_llm_payloads(tmp_path: Path):
|
||||
|
||||
# Format-Redline-Marker müssen weiterhin funktionieren (Vorschlag mit **)
|
||||
assert '<span class="inserted">' in html
|
||||
|
||||
|
||||
# ─── Coverage-Backfill (#134) ────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestGetScoreColor:
|
||||
def test_high_score_blue(self):
|
||||
from app.report import get_score_color
|
||||
assert get_score_color(8.5).lower().startswith("#")
|
||||
assert get_score_color(8.5) == get_score_color(7.0) # gleiche Klasse
|
||||
|
||||
def test_mid_score_green(self):
|
||||
from app.report import get_score_color, COLORS
|
||||
assert get_score_color(5.0) == COLORS["green"]
|
||||
|
||||
def test_low_yellow(self):
|
||||
from app.report import get_score_color
|
||||
assert get_score_color(2.5) == "#FFC20E"
|
||||
|
||||
def test_very_low_orange(self):
|
||||
from app.report import get_score_color, COLORS
|
||||
assert get_score_color(1.5) == COLORS["orange"]
|
||||
|
||||
def test_zero_red(self):
|
||||
from app.report import get_score_color, COLORS
|
||||
assert get_score_color(0.0) == COLORS["red"]
|
||||
|
||||
|
||||
class TestGetRatingSymbol:
|
||||
def test_strong_positive(self):
|
||||
from app.report import get_rating_symbol
|
||||
assert get_rating_symbol(2) == "++"
|
||||
assert get_rating_symbol(5) == "++"
|
||||
|
||||
def test_positive(self):
|
||||
from app.report import get_rating_symbol
|
||||
assert get_rating_symbol(1) == "+"
|
||||
|
||||
def test_neutral(self):
|
||||
from app.report import get_rating_symbol
|
||||
assert get_rating_symbol(0) == "○"
|
||||
|
||||
def test_negative(self):
|
||||
from app.report import get_rating_symbol
|
||||
assert get_rating_symbol(-1) == "−"
|
||||
|
||||
def test_strong_negative(self):
|
||||
from app.report import get_rating_symbol
|
||||
assert get_rating_symbol(-2) == "−−"
|
||||
assert get_rating_symbol(-5) == "−−"
|
||||
|
||||
@ -1,189 +0,0 @@
|
||||
"""Tests fuer den Atom-Feed-Endpoint /api/feed.xml (#125).
|
||||
|
||||
Backfill aus #134: vorher nur indirekt im Smoke-Test abgedeckt. Hier:
|
||||
- Atom-1.0-Validitaet (XML well-formed, Pflicht-Elemente)
|
||||
- Filter-Parameter wirken (bundesland, partei)
|
||||
- ETag-Header + 304-Verhalten
|
||||
- Limit-Clamping
|
||||
- HTML-Escaping fuer Sonderzeichen in Titeln/Drucksachen
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
from unittest.mock import patch
|
||||
from xml.etree import ElementTree as ET
|
||||
|
||||
try:
|
||||
from fastapi.testclient import TestClient
|
||||
from app.main import app
|
||||
client = TestClient(app)
|
||||
_HAS_APP = True
|
||||
except ImportError:
|
||||
_HAS_APP = False
|
||||
client = None
|
||||
|
||||
|
||||
pytestmark = pytest.mark.skipif(not _HAS_APP, reason="app.main not importable")
|
||||
|
||||
ATOM_NS = "{http://www.w3.org/2005/Atom}"
|
||||
|
||||
|
||||
def _fake_assessments() -> list[dict]:
|
||||
"""Drei Fixture-Assessments mit allen Feldern, die der Feed nutzt."""
|
||||
return [
|
||||
{
|
||||
"drucksache": "21/1234",
|
||||
"title": "Antrag zu Erneuerbaren Energien",
|
||||
"bundesland": "NRW",
|
||||
"fraktionen": ["GRÜNE", "SPD"],
|
||||
"gwoe_score": 7.5,
|
||||
"empfehlung": "Unterstützen mit Änderungen",
|
||||
"antrag_zusammenfassung": "Solarpflicht für Neubauten",
|
||||
"updated_at": "2026-04-25T10:00:00",
|
||||
},
|
||||
{
|
||||
"drucksache": "8/4242",
|
||||
"title": "Anti-Terror-Paket & Überwachung", # Sonderzeichen
|
||||
"bundesland": "MV",
|
||||
"fraktionen": ["CDU"],
|
||||
"gwoe_score": 2.1,
|
||||
"empfehlung": "Ablehnen",
|
||||
"antrag_zusammenfassung": None,
|
||||
"updated_at": "2026-04-24T08:30:00",
|
||||
},
|
||||
{
|
||||
"drucksache": "19/9999",
|
||||
"title": "Bürger:innen-Beteiligung stärken",
|
||||
"bundesland": "BE",
|
||||
"fraktionen": ["LINKE", "GRÜNE"],
|
||||
"gwoe_score": 9.0,
|
||||
"empfehlung": "Uneingeschränkt unterstützen",
|
||||
"antrag_zusammenfassung": "Bürgerräte etablieren",
|
||||
"updated_at": "2026-04-26T12:15:00",
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
class TestFeedXml:
|
||||
def test_returns_atom_xml(self):
|
||||
with patch("app.main.get_all_assessments", return_value=_fake_assessments()):
|
||||
resp = client.get("/api/feed.xml")
|
||||
assert resp.status_code == 200
|
||||
assert "atom+xml" in resp.headers["content-type"]
|
||||
# XML well-formed
|
||||
root = ET.fromstring(resp.content)
|
||||
assert root.tag == f"{ATOM_NS}feed"
|
||||
|
||||
def test_required_atom_elements_present(self):
|
||||
with patch("app.main.get_all_assessments", return_value=_fake_assessments()):
|
||||
resp = client.get("/api/feed.xml")
|
||||
root = ET.fromstring(resp.content)
|
||||
# Pflicht-Top-Level-Elemente nach RFC 4287
|
||||
for tag in ("id", "title", "updated"):
|
||||
assert root.find(f"{ATOM_NS}{tag}") is not None, f"missing <{tag}>"
|
||||
# mind. ein self-Link
|
||||
self_links = [
|
||||
l for l in root.findall(f"{ATOM_NS}link")
|
||||
if l.get("rel") == "self"
|
||||
]
|
||||
assert len(self_links) == 1
|
||||
|
||||
def test_entry_count_matches_input(self):
|
||||
with patch("app.main.get_all_assessments", return_value=_fake_assessments()):
|
||||
resp = client.get("/api/feed.xml")
|
||||
root = ET.fromstring(resp.content)
|
||||
entries = root.findall(f"{ATOM_NS}entry")
|
||||
assert len(entries) == 3
|
||||
|
||||
def test_entries_sorted_by_updated_desc(self):
|
||||
with patch("app.main.get_all_assessments", return_value=_fake_assessments()):
|
||||
resp = client.get("/api/feed.xml")
|
||||
root = ET.fromstring(resp.content)
|
||||
updateds = [
|
||||
e.find(f"{ATOM_NS}updated").text
|
||||
for e in root.findall(f"{ATOM_NS}entry")
|
||||
]
|
||||
# Strip Z-suffix fuer Vergleich
|
||||
bare = [u.rstrip("Z") for u in updateds]
|
||||
assert bare == sorted(bare, reverse=True), updateds
|
||||
|
||||
def test_html_escaping_in_titles(self):
|
||||
"""Anti-Terror-Paket & Überwachung — & muss als & im XML stehen."""
|
||||
with patch("app.main.get_all_assessments", return_value=_fake_assessments()):
|
||||
resp = client.get("/api/feed.xml")
|
||||
# Roh-XML pruefen, nicht den geparsten Inhalt
|
||||
body = resp.text
|
||||
# Das Ampersand muss als & codiert sein
|
||||
assert "Anti-Terror-Paket &" in body or "Anti-Terror-Paket &#" in body
|
||||
# Der Roh-String darf kein nacktes & vor Whitespace haben
|
||||
assert "Paket & Überw" not in body
|
||||
|
||||
def test_partei_filter_narrows_results(self):
|
||||
with patch("app.main.get_all_assessments", return_value=_fake_assessments()):
|
||||
resp_all = client.get("/api/feed.xml")
|
||||
resp_cdu = client.get("/api/feed.xml?partei=CDU")
|
||||
all_count = len(ET.fromstring(resp_all.content).findall(f"{ATOM_NS}entry"))
|
||||
cdu_count = len(ET.fromstring(resp_cdu.content).findall(f"{ATOM_NS}entry"))
|
||||
assert cdu_count == 1
|
||||
assert cdu_count < all_count
|
||||
|
||||
def test_bundesland_filter_passed_to_query(self):
|
||||
"""Der bundesland-Parameter wird an get_all_assessments durchgereicht."""
|
||||
with patch("app.main.get_all_assessments", return_value=_fake_assessments()) as m:
|
||||
client.get("/api/feed.xml?bundesland=NRW")
|
||||
m.assert_called_once_with("NRW")
|
||||
|
||||
def test_etag_header_set(self):
|
||||
with patch("app.main.get_all_assessments", return_value=_fake_assessments()):
|
||||
resp = client.get("/api/feed.xml")
|
||||
assert "etag" in {k.lower() for k in resp.headers}
|
||||
etag = resp.headers["etag"]
|
||||
assert etag.startswith('"') and etag.endswith('"')
|
||||
|
||||
def test_etag_304_not_modified(self):
|
||||
with patch("app.main.get_all_assessments", return_value=_fake_assessments()):
|
||||
resp1 = client.get("/api/feed.xml")
|
||||
etag = resp1.headers["etag"]
|
||||
resp2 = client.get("/api/feed.xml", headers={"If-None-Match": etag})
|
||||
assert resp2.status_code == 304
|
||||
|
||||
def test_limit_clamped_to_200(self):
|
||||
big_input = _fake_assessments() * 100 # 300 Eintraege
|
||||
with patch("app.main.get_all_assessments", return_value=big_input):
|
||||
resp = client.get("/api/feed.xml?limit=500")
|
||||
root = ET.fromstring(resp.content)
|
||||
entries = root.findall(f"{ATOM_NS}entry")
|
||||
assert len(entries) == 200
|
||||
|
||||
def test_limit_clamped_to_min_1(self):
|
||||
with patch("app.main.get_all_assessments", return_value=_fake_assessments()):
|
||||
resp = client.get("/api/feed.xml?limit=0")
|
||||
root = ET.fromstring(resp.content)
|
||||
entries = root.findall(f"{ATOM_NS}entry")
|
||||
assert len(entries) >= 1
|
||||
|
||||
def test_empty_db_returns_valid_feed(self):
|
||||
with patch("app.main.get_all_assessments", return_value=[]):
|
||||
resp = client.get("/api/feed.xml")
|
||||
assert resp.status_code == 200
|
||||
root = ET.fromstring(resp.content)
|
||||
# Pflicht-Elemente trotzdem da
|
||||
assert root.find(f"{ATOM_NS}id") is not None
|
||||
assert root.find(f"{ATOM_NS}title") is not None
|
||||
# Aber keine Entries
|
||||
assert root.findall(f"{ATOM_NS}entry") == []
|
||||
|
||||
def test_cors_header_present(self):
|
||||
with patch("app.main.get_all_assessments", return_value=[]):
|
||||
resp = client.get("/api/feed.xml")
|
||||
assert resp.headers.get("access-control-allow-origin") == "*"
|
||||
|
||||
def test_self_url_includes_filter_params(self):
|
||||
with patch("app.main.get_all_assessments", return_value=_fake_assessments()):
|
||||
resp = client.get("/api/feed.xml?bundesland=NRW&partei=GRÜNE")
|
||||
root = ET.fromstring(resp.content)
|
||||
self_link = [l for l in root.findall(f"{ATOM_NS}link") if l.get("rel") == "self"][0]
|
||||
href = self_link.get("href")
|
||||
assert "bundesland=NRW" in href
|
||||
# partei kann URL-codiert sein
|
||||
assert "partei=" in href
|
||||
@ -1,87 +0,0 @@
|
||||
"""Tests fuer app/wahlperioden.py — Datum→WP-Mapping fuer Aggregations-Sicht (#58).
|
||||
|
||||
Backfill aus #134.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from app.wahlperioden import wahlperiode_for, all_wahlperioden
|
||||
from app.bundeslaender import BUNDESLAENDER
|
||||
|
||||
|
||||
class TestWahlperiodeFor:
|
||||
def test_returns_current_wp_for_date_after_start(self):
|
||||
bl = BUNDESLAENDER["NRW"]
|
||||
# ein Tag nach Wahlperiode-Start → aktuelle WP
|
||||
# (lexikographische ISO-Vergleich-Grenze)
|
||||
date_after = bl.wahlperiode_start
|
||||
assert wahlperiode_for(date_after, "NRW") == f"NRW-WP{bl.wahlperiode}"
|
||||
|
||||
def test_returns_previous_wp_for_date_before_start(self):
|
||||
bl = BUNDESLAENDER["NRW"]
|
||||
# ein Datum klar vor dem WP-Start
|
||||
assert wahlperiode_for("2010-01-01", "NRW") == f"NRW-WP{bl.wahlperiode - 1}"
|
||||
|
||||
def test_returns_none_for_unknown_bundesland(self):
|
||||
assert wahlperiode_for("2026-03-18", "XX") is None
|
||||
|
||||
def test_empty_datum_falls_back_to_current_wp(self):
|
||||
bl = BUNDESLAENDER["NRW"]
|
||||
assert wahlperiode_for("", "NRW") == f"NRW-WP{bl.wahlperiode}"
|
||||
|
||||
def test_none_datum_falls_back_to_current_wp(self):
|
||||
bl = BUNDESLAENDER["NRW"]
|
||||
# Aufrufer schickt None; der Code prueft `if not datum`
|
||||
assert wahlperiode_for(None, "NRW") == f"NRW-WP{bl.wahlperiode}"
|
||||
|
||||
def test_boundary_date_equals_wp_start(self):
|
||||
"""An der WP-Start-Grenze gehoert der Tag zur neuen WP (>=)."""
|
||||
bl = BUNDESLAENDER["MV"]
|
||||
assert wahlperiode_for(bl.wahlperiode_start, "MV") == f"MV-WP{bl.wahlperiode}"
|
||||
|
||||
def test_doctest_examples(self):
|
||||
"""Die Docstring-Examples muessen halten."""
|
||||
# 2026-03-18 ist nach MV WP8-Start (2021-09-26)
|
||||
assert wahlperiode_for("2026-03-18", "MV") == "MV-WP8"
|
||||
# 2020-01-01 ist davor → WP7
|
||||
assert wahlperiode_for("2020-01-01", "MV") == "MV-WP7"
|
||||
|
||||
def test_lexicographic_iso_date_works(self):
|
||||
"""ISO-Format YYYY-MM-DD vergleicht lexikographisch korrekt."""
|
||||
bl = BUNDESLAENDER["NRW"]
|
||||
start = bl.wahlperiode_start # z.B. "2022-06-01"
|
||||
# Ein Tag davor (gleiches Jahr) gehoert zur Vorgaenger-WP
|
||||
if start[5:7] != "01" or start[8:10] != "01":
|
||||
# nicht 1. Januar — Day-1 Test einfach moeglich
|
||||
year, month, day = int(start[:4]), int(start[5:7]), int(start[8:10])
|
||||
if day > 1:
|
||||
day_before = f"{year:04d}-{month:02d}-{day-1:02d}"
|
||||
else:
|
||||
day_before = f"{year:04d}-{month-1:02d}-28"
|
||||
assert wahlperiode_for(day_before, "NRW") == f"NRW-WP{bl.wahlperiode - 1}"
|
||||
|
||||
|
||||
class TestAllWahlperioden:
|
||||
def test_includes_each_bundesland(self):
|
||||
all_wp = all_wahlperioden()
|
||||
# pro BL zwei Eintraege (current + previous)
|
||||
assert len(all_wp) == len(BUNDESLAENDER) * 2
|
||||
|
||||
def test_format_is_BL_WPn(self):
|
||||
for entry in all_wahlperioden():
|
||||
parts = entry.split("-WP")
|
||||
assert len(parts) == 2, entry
|
||||
bl_code, wp_num = parts
|
||||
assert bl_code in BUNDESLAENDER, bl_code
|
||||
assert wp_num.isdigit(), wp_num
|
||||
|
||||
def test_no_duplicates(self):
|
||||
all_wp = all_wahlperioden()
|
||||
assert len(all_wp) == len(set(all_wp))
|
||||
|
||||
def test_contains_known_examples(self):
|
||||
all_wp = all_wahlperioden()
|
||||
# NRW WP18 + 17 muessen drin sein
|
||||
assert "NRW-WP18" in all_wp
|
||||
assert "NRW-WP17" in all_wp
|
||||
@ -177,211 +177,6 @@ class TestFetchAndVerify:
|
||||
assert result["changed"] is True
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 4: SHA-Lock-File — Pferdetausch-Schutz (#138)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestShaLock:
|
||||
"""Regression: abgeordnetenwatch hat das CDU-BE-2023-PDF unter dem alten
|
||||
Slug-Namen gegen das CDU-BE-2026-PDF ersetzt. Der Lock-File-Mechanismus
|
||||
muss solche stillen Tausch-Aktionen abfangen."""
|
||||
|
||||
def _patch_lock_file(self, tmp_path):
|
||||
"""Setzt den Lock-File-Pfad auf einen tmp-Pfad fuer den Test."""
|
||||
return patch("app.wahlprogramm_fetch._LOCK_FILE", tmp_path / "lock.json")
|
||||
|
||||
def _urlopen_with(self, content: bytes):
|
||||
def _u(url_or_req, timeout=None):
|
||||
class _R:
|
||||
def read(self_inner):
|
||||
return content
|
||||
def __enter__(self_inner):
|
||||
return self_inner
|
||||
def __exit__(self_inner, *a):
|
||||
pass
|
||||
return _R()
|
||||
return _u
|
||||
|
||||
def test_first_download_pins_sha(self, tmp_path):
|
||||
"""Erster Download → Lock-File wird angelegt mit dem neuen SHA."""
|
||||
dest = tmp_path / "cdu-be.pdf"
|
||||
content = b"%PDF original CDU BE 2021"
|
||||
|
||||
with self._patch_lock_file(tmp_path), \
|
||||
patch("urllib.request.urlopen", self._urlopen_with(content)):
|
||||
result = fetch_and_verify("https://example.com/cdu-be.pdf", dest)
|
||||
|
||||
assert result["ok"] is True
|
||||
assert result["lock_updated"] is True
|
||||
lock_path = tmp_path / "lock.json"
|
||||
assert lock_path.exists()
|
||||
import json
|
||||
lock = json.loads(lock_path.read_text())
|
||||
assert lock["cdu-be.pdf"] == _sha(content)
|
||||
|
||||
def test_second_download_with_same_content_passes(self, tmp_path):
|
||||
"""Zweiter Download mit gleichem Inhalt → ok, changed=False."""
|
||||
dest = tmp_path / "cdu-be.pdf"
|
||||
content = b"%PDF original CDU BE 2021"
|
||||
dest.write_bytes(content)
|
||||
# Lock vorbereiten
|
||||
import json
|
||||
(tmp_path / "lock.json").write_text(json.dumps({"cdu-be.pdf": _sha(content)}))
|
||||
|
||||
with self._patch_lock_file(tmp_path), \
|
||||
patch("urllib.request.urlopen", self._urlopen_with(content)):
|
||||
result = fetch_and_verify("https://example.com/cdu-be.pdf", dest)
|
||||
|
||||
assert result["ok"] is True
|
||||
assert result["changed"] is False
|
||||
|
||||
def test_pferdetausch_blocks_silent_replacement(self, tmp_path):
|
||||
"""KRITISCH: lokal liegt 'CDU BE 2021', Server liefert 'CDU BE 2026'.
|
||||
Lock zeigt SHA von 2021 → fetch muss ABBRECHEN, nicht ueberschreiben."""
|
||||
dest = tmp_path / "cdu-be-2023.pdf"
|
||||
original_content = b"%PDF CDU Berlin 2021-2026 Wahlprogramm"
|
||||
replaced_content = b"%PDF CDU Berlin-Plan 2026 (replaced!)"
|
||||
dest.write_bytes(original_content)
|
||||
# Lock pinnt den Original-SHA
|
||||
import json
|
||||
(tmp_path / "lock.json").write_text(
|
||||
json.dumps({"cdu-be-2023.pdf": _sha(original_content)})
|
||||
)
|
||||
|
||||
with self._patch_lock_file(tmp_path), \
|
||||
patch("urllib.request.urlopen", self._urlopen_with(replaced_content)):
|
||||
result = fetch_and_verify("https://example.com/cdu-be-2023.pdf", dest)
|
||||
|
||||
assert result["ok"] is False
|
||||
assert "Lock-Pruefung" in result["error"]
|
||||
# Datei darf NICHT ueberschrieben sein
|
||||
assert dest.read_bytes() == original_content
|
||||
|
||||
def test_accept_new_sha_overrides_lock(self, tmp_path):
|
||||
"""Mit accept_new_sha=True wird der Lock bewusst aktualisiert."""
|
||||
dest = tmp_path / "linke-bb.pdf"
|
||||
original_content = b"%PDF v1"
|
||||
new_content = b"%PDF v2 - intentional update"
|
||||
dest.write_bytes(original_content)
|
||||
import json
|
||||
(tmp_path / "lock.json").write_text(
|
||||
json.dumps({"linke-bb.pdf": _sha(original_content)})
|
||||
)
|
||||
|
||||
with self._patch_lock_file(tmp_path), \
|
||||
patch("urllib.request.urlopen", self._urlopen_with(new_content)):
|
||||
result = fetch_and_verify(
|
||||
"https://example.com/linke-bb.pdf", dest,
|
||||
accept_new_sha=True,
|
||||
)
|
||||
|
||||
assert result["ok"] is True
|
||||
assert result["changed"] is True
|
||||
# Lock muss neuen SHA haben
|
||||
lock = json.loads((tmp_path / "lock.json").read_text())
|
||||
assert lock["linke-bb.pdf"] == _sha(new_content)
|
||||
|
||||
def test_existing_file_without_lock_pins_silently(self, tmp_path):
|
||||
"""File ist da aber Lock fehlt (Migration-Szenario): bei naechstem
|
||||
identischen fetch wird der SHA gepinnt, kein Block."""
|
||||
dest = tmp_path / "spd-mv.pdf"
|
||||
content = b"%PDF SPD MV 2021"
|
||||
dest.write_bytes(content)
|
||||
# Kein Lock-Eintrag
|
||||
|
||||
with self._patch_lock_file(tmp_path), \
|
||||
patch("urllib.request.urlopen", self._urlopen_with(content)):
|
||||
result = fetch_and_verify("https://example.com/spd-mv.pdf", dest)
|
||||
|
||||
assert result["ok"] is True
|
||||
assert result["lock_updated"] is True
|
||||
import json
|
||||
lock = json.loads((tmp_path / "lock.json").read_text())
|
||||
assert lock["spd-mv.pdf"] == _sha(content)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 5: Lock-File und YAML-Robustheit (#134 Coverage-Backfill)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestLockFileRobustness:
|
||||
def test_corrupt_lock_file_returns_empty_dict(self, tmp_path):
|
||||
"""Kaputtes JSON darf den Caller nicht crashen — leeren Lock liefern."""
|
||||
from app.wahlprogramm_fetch import _load_lock
|
||||
bad = tmp_path / "broken-lock.json"
|
||||
bad.write_text("{ this is not json ;)")
|
||||
with patch("app.wahlprogramm_fetch._LOCK_FILE", bad):
|
||||
result = _load_lock()
|
||||
assert result == {}
|
||||
|
||||
def test_missing_lock_file_returns_empty_dict(self, tmp_path):
|
||||
from app.wahlprogramm_fetch import _load_lock
|
||||
missing = tmp_path / "no-such-file.json"
|
||||
with patch("app.wahlprogramm_fetch._LOCK_FILE", missing):
|
||||
assert _load_lock() == {}
|
||||
|
||||
def test_save_lock_writes_valid_json(self, tmp_path):
|
||||
from app.wahlprogramm_fetch import _save_lock
|
||||
target = tmp_path / "lock.json"
|
||||
with patch("app.wahlprogramm_fetch._LOCK_FILE", target):
|
||||
_save_lock({"x.pdf": "abc123", "y.pdf": "def456"})
|
||||
import json
|
||||
loaded = json.loads(target.read_text())
|
||||
assert loaded == {"x.pdf": "abc123", "y.pdf": "def456"}
|
||||
|
||||
|
||||
class TestLoadLinks:
|
||||
def test_missing_yaml_returns_empty(self, tmp_path):
|
||||
from app.wahlprogramm_fetch import _load_links
|
||||
with patch("app.wahlprogramm_fetch._LINKS_FILE", tmp_path / "missing.yaml"):
|
||||
assert _load_links() == {}
|
||||
|
||||
def test_empty_yaml_returns_empty(self, tmp_path):
|
||||
from app.wahlprogramm_fetch import _load_links
|
||||
target = tmp_path / "empty.yaml"
|
||||
target.write_text("")
|
||||
with patch("app.wahlprogramm_fetch._LINKS_FILE", target):
|
||||
assert _load_links() == {}
|
||||
|
||||
# Hinweis: yaml ist im Unit-Setup gestubbed (siehe Top-of-File), deshalb
|
||||
# testen wir _load_links nur mit existing-vs-missing-File. Die echte
|
||||
# YAML-Parsing-Logik wird in der integration-Suite gegen die echte
|
||||
# links.yaml validiert.
|
||||
|
||||
|
||||
class TestGetMissingProgrammes:
|
||||
"""Tests fuer get_missing_programmes — listet BL/Partei-Kombinationen mit
|
||||
Kandidaten-URL aber fehlender lokaler Datei. yaml ist gestubbed; Tests
|
||||
patchen daher _load_links direkt."""
|
||||
|
||||
def test_no_yaml_returns_empty(self):
|
||||
from app.wahlprogramm_fetch import get_missing_programmes
|
||||
with patch("app.wahlprogramm_fetch._load_links", return_value={}):
|
||||
assert get_missing_programmes() == []
|
||||
|
||||
def test_lists_entries_when_file_missing(self, tmp_path):
|
||||
"""Eintrag in YAML, registriertes WAHLPROGRAMME-File fehlt → listed."""
|
||||
from app.wahlprogramm_fetch import get_missing_programmes
|
||||
fake_links = {"BX": {"XYZ": [{"url": "https://example.com/x.pdf"}]}}
|
||||
with patch("app.wahlprogramm_fetch._load_links", return_value=fake_links):
|
||||
with patch("app.wahlprogramm_fetch._REFERENZEN_DIR", tmp_path / "ref"):
|
||||
missing = get_missing_programmes()
|
||||
codes = [m["bl"] for m in missing]
|
||||
assert "BX" in codes
|
||||
|
||||
def test_bundesland_filter(self, tmp_path):
|
||||
from app.wahlprogramm_fetch import get_missing_programmes
|
||||
fake_links = {
|
||||
"BX": {"XYZ": [{"url": "https://example.com/x.pdf"}]},
|
||||
"BY": {"ABC": [{"url": "https://example.com/y.pdf"}]},
|
||||
}
|
||||
with patch("app.wahlprogramm_fetch._load_links", return_value=fake_links):
|
||||
with patch("app.wahlprogramm_fetch._REFERENZEN_DIR", tmp_path / "ref"):
|
||||
missing = get_missing_programmes(bundesland="BX")
|
||||
codes = {m["bl"] for m in missing}
|
||||
assert codes == {"BX"}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 4: og_card — cache_key Determinismus und Cache-Miss/Hit
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@ -1,6 +1,4 @@
|
||||
"""Tests for wahlprogramme.py — registry consistency + file existence."""
|
||||
import pytest
|
||||
|
||||
from app.wahlprogramme import (
|
||||
WAHLPROGRAMME,
|
||||
REFERENZEN_PATH,
|
||||
@ -118,79 +116,3 @@ class TestEmbeddingsRegistryConsistency:
|
||||
"WAHLPROGRAMME entries missing in embeddings.PROGRAMME:\n "
|
||||
+ "\n ".join(missing)
|
||||
)
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# load_wahlprogramm_text — Fallback-Pfade (#134 Coverage-Backfill)
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
class TestLoadWahlprogrammText:
|
||||
def test_returns_empty_for_unknown_combination(self):
|
||||
from app.wahlprogramme import load_wahlprogramm_text
|
||||
assert load_wahlprogramm_text("XX", "XYZ") == {}
|
||||
|
||||
def test_paged_textfile_used_when_present(self, tmp_path, monkeypatch):
|
||||
"""Wenn die paged-Textdatei existiert, wird sie genutzt.
|
||||
Format: '--- PAGE N ---'-Marker pro Seitenanfang."""
|
||||
from app import wahlprogramme as wp_mod
|
||||
# Mock get_wahlprogramm -> bekannte Datei
|
||||
monkeypatch.setattr(wp_mod, "get_wahlprogramm",
|
||||
lambda bl, p: {"file": "test.pdf"})
|
||||
paged = tmp_path / "test-paged.txt"
|
||||
paged.write_text("--- PAGE 1 ---\nseite eins\n--- PAGE 2 ---\nseite zwei")
|
||||
monkeypatch.setattr(wp_mod, "KONTEXT_PATH", tmp_path)
|
||||
|
||||
result = wp_mod.load_wahlprogramm_text("X", "Y")
|
||||
assert 2 in result
|
||||
assert "seite zwei" in result[2]
|
||||
|
||||
def test_falls_back_to_normal_textfile(self, tmp_path, monkeypatch):
|
||||
"""Ohne paged-Datei wird auf normale .txt-Datei zurueckgefallen,
|
||||
komplett unter Seite 1."""
|
||||
from app import wahlprogramme as wp_mod
|
||||
monkeypatch.setattr(wp_mod, "get_wahlprogramm",
|
||||
lambda bl, p: {"file": "test.pdf"})
|
||||
normal = tmp_path / "test.txt"
|
||||
normal.write_text("flacher text ohne seitenmarker")
|
||||
monkeypatch.setattr(wp_mod, "KONTEXT_PATH", tmp_path)
|
||||
|
||||
result = wp_mod.load_wahlprogramm_text("X", "Y")
|
||||
assert result == {1: "flacher text ohne seitenmarker"}
|
||||
|
||||
def test_returns_empty_when_no_textfile(self, tmp_path, monkeypatch):
|
||||
"""Weder paged- noch normale Textdatei → leeres Dict."""
|
||||
from app import wahlprogramme as wp_mod
|
||||
monkeypatch.setattr(wp_mod, "get_wahlprogramm",
|
||||
lambda bl, p: {"file": "test.pdf"})
|
||||
# tmp_path ist leer
|
||||
monkeypatch.setattr(wp_mod, "KONTEXT_PATH", tmp_path)
|
||||
|
||||
assert wp_mod.load_wahlprogramm_text("X", "Y") == {}
|
||||
|
||||
|
||||
class TestSearchWahlprogramm:
|
||||
def test_returns_empty_for_unknown_combination(self):
|
||||
from app.wahlprogramme import search_wahlprogramm
|
||||
assert search_wahlprogramm("XX", "XYZ", ["test"]) == []
|
||||
|
||||
def test_returns_empty_when_text_missing(self, monkeypatch):
|
||||
"""Bekannte Partei + Bundesland aber keine Textdatei → leer."""
|
||||
from app import wahlprogramme as wp_mod
|
||||
monkeypatch.setattr(wp_mod, "get_wahlprogramm",
|
||||
lambda bl, p: {"file": "missing.pdf"})
|
||||
monkeypatch.setattr(wp_mod, "load_wahlprogramm_text",
|
||||
lambda bl, p: {})
|
||||
assert wp_mod.search_wahlprogramm("X", "Y", ["test"]) == []
|
||||
|
||||
|
||||
class TestFindRelevantQuotes:
|
||||
def test_unknown_bundesland_raises(self):
|
||||
from app.wahlprogramme import find_relevant_quotes
|
||||
with pytest.raises(ValueError, match="Unbekanntes Bundesland"):
|
||||
find_relevant_quotes("Antrag-Text", ["CDU"], bundesland="ZZ")
|
||||
|
||||
|
||||
class TestFormatQuoteForPrompt:
|
||||
def test_empty_quotes_returns_empty_string(self):
|
||||
from app.wahlprogramme import format_quote_for_prompt
|
||||
assert format_quote_for_prompt({}) == ""
|
||||
|
||||
Loading…
Reference in New Issue
Block a user