feat(#126): protokoll_parsers/-Sub-Package + Registry-Pattern + ADR 0009

Architektur-Refactor zur Vorbereitung BL-uebergreifender Parser: - app/protokoll_parser_nrw.py → app/protokoll_parsers/nrw.py - app/ingest_votes_nrw.py → app/ingest_votes.py (BL-uebergreifend) - Neue app/protokoll_parsers/__init__.py mit: - PROTOKOLL_PARSERS-Dict (BL-Code → Parser-Funktion, derzeit nur NRW) - parse_protocol(bundesland, pdf_path) als BL-uebergreifender Einstieg - supported_bundeslaender()-Helper - NotImplementedError mit hilfreicher Message bei unbekanntem BL CLI bekommt --supported-Flag fuer BL-Discovery: python -m app.ingest_votes --supported → 'NRW' ADR 0009 dokumentiert das Muster (Sub-Package + Funktions-Registry, analog zu ADR 0002 fuer ParlamentAdapter). Folge-BL bekommen je eine eigene Datei und einen Eintrag in PROTOKOLL_PARSERS — kein Refactoring der Bestands-Logik. Tests: - 7 neue Tests in test_protokoll_parsers.py fuer Registry und Dispatch - Bestehende NRW-Tests umbenannt zu test_protokoll_parsers_nrw.py, Imports angepasst — keine Verhaltens-Aenderung - Bestehende Ingest-Tests umbenannt zu test_ingest_votes.py 642 Tests gruen, kein Verhaltens-Drift.
2026-04-28 08:37:31 +02:00 · 2026-04-28 08:37:31 +02:00 · 7de4df1fef
commit 7de4df1fef
parent a9f0b61c75
9 changed files with 333 additions and 44 deletions
--- a/app/database.py
+++ b/app/database.py
@ -262,7 +262,7 @@ async def init_db():
        # Fraktions-aggregierte Abstimmungsergebnisse aus Plenarprotokollen (#106).
        # Granularitaet: "GRUENE und SPD haben zugestimmt", nicht pro MP — das
        # ist der Datentyp, der aus deterministischen Parsern wie
-        # protokoll_parser_nrw.py rauskommt.
+        # app/protokoll_parsers/ rauskommt.
        # Compound-PK ueber quelle_protokoll, weil eine Drucksache mehrfach
        # abgestimmt werden kann (Ausschuss-Empfehlung + Plenum-Beschluss).
        await db.execute("""
@ -1211,7 +1211,7 @@ async def get_monitoring_new_today(scan_date: str) -> list[dict]:
 # ─── Plenum-Vote-Results (#106) ─────────────────────────────────────────────
 # Fraktions-aggregierte Abstimmungsergebnisse aus Plenarprotokollen.
-# Quelle: protokoll_parser_nrw.py (NRW). BL-uebergreifender Parser ist #126.
+# Quelle: app/protokoll_parsers/ (NRW). BL-uebergreifender Parser ist #126.
 async def upsert_plenum_vote(
    *,
--- a/app/ingest_votes_nrw.py
+++ b/app/ingest_votes_nrw.py
@ -1,16 +1,19 @@
-"""Ingest-CLI fuer NRW-Plenarprotokolle (#106).
+"""BL-uebergreifende Ingest-CLI fuer Plenarprotokolle (#106 / #126).
 Pipeline:
  1. PDF laden (Pfad oder URL)
-  2. protokoll_parser_nrw.parse_protocol() liefert Liste von Abstimmungen
+  2. ``protokoll_parsers.parse_protocol(bundesland, pdf_path)`` waehlt den
-  3. upsert_plenum_vote() schreibt jede Abstimmung in die DB
+     BL-spezifischen Parser aus der Registry
  3. ``upsert_plenum_vote()`` schreibt jede Abstimmung in die DB
 CLI:
-  python -m app.ingest_votes_nrw --pdf /pfad/zu/MMP18-119.pdf
+  python -m app.ingest_votes --pdf MMP18-119.pdf
-  python -m app.ingest_votes_nrw --url https://landtag.nrw.de/.../MMP18-119.pdf
+  python -m app.ingest_votes --url https://landtag.nrw.de/.../MMP18-119.pdf
-  python -m app.ingest_votes_nrw --pdf MMP18-119.pdf --protokoll-id MMP18-119
+  python -m app.ingest_votes --pdf x.pdf --bundesland NRW --protokoll-id MMP18-119
  python -m app.ingest_votes --supported   # Liste der BL mit Parser
-Die Protokoll-ID wird, wenn nicht uebergeben, aus dem Datei-Stem abgeleitet.
+Aktuell registriert: NRW. Folge-BL via app/protokoll_parsers/<bl>.py + Eintrag
 in PROTOKOLL_PARSERS — siehe ADR 0009.
 """
 from __future__ import annotations
@ -23,7 +26,7 @@ import urllib.request
 from pathlib import Path
 from typing import Optional
-from .protokoll_parser_nrw import parse_protocol
+from .protokoll_parsers import parse_protocol, supported_bundeslaender
 from .database import upsert_plenum_vote
 logger = logging.getLogger(__name__)
@ -52,13 +55,17 @@ async def ingest_pdf(
    protokoll_id: Optional[str] = None,
    quelle_url: Optional[str] = None,
 ) -> dict:
-    """Parse das PDF und schreibe alle gefundenen Abstimmungen in die DB.
+    """Parse das PDF mit dem BL-Parser und schreibe alle Abstimmungen in die DB.
    Returns:
-        Statistik-Dict ``{parsed, written, skipped_no_drucksache, errors}``.
+        Statistik-Dict ``{parsed, written, skipped_no_drucksache, errors,
        protokoll_id, bundesland}``.
    Raises:
        NotImplementedError: wenn fuer ``bundesland`` kein Parser registriert ist.
    """
    pid = protokoll_id or _derive_protokoll_id(pdf_path)
-    parsed = parse_protocol(str(pdf_path))
+    parsed = parse_protocol(bundesland, str(pdf_path))
    written = 0
    skipped_no_ds = 0
@ -100,17 +107,27 @@ def _cli() -> None:
    logging.basicConfig(level=logging.INFO, format="%(levelname)s %(message)s")
    parser = argparse.ArgumentParser(
-        description="Plenarprotokoll → plenum_vote_results-Tabelle (#106)",
+        description="Plenarprotokoll → plenum_vote_results (#106 / #126)",
    )
-    src = parser.add_mutually_exclusive_group(required=True)
+    src = parser.add_mutually_exclusive_group(required=False)
    src.add_argument("--pdf", help="Pfad zu lokalem PDF")
    src.add_argument("--url", help="HTTP(S)-URL zum PDF")
    parser.add_argument("--bundesland", default="NRW",
                        help="Bundesland-Code (default: NRW)")
    parser.add_argument("--protokoll-id",
                        help="Protokoll-ID (default: aus Datei-Stem)")
    parser.add_argument("--supported", action="store_true",
                        help="Liste alle BL-Codes mit registriertem Parser")
    args = parser.parse_args()
    if args.supported:
        for bl in supported_bundeslaender():
            print(bl)
        sys.exit(0)
    if not args.pdf and not args.url:
        parser.error("--pdf oder --url ist erforderlich")
    if args.url:
        # Download in tmp und nach dem Run wieder loeschen
        with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as tmp:
--- a/app/protokoll_parsers/init.py
+++ b/app/protokoll_parsers/init.py
@ -0,0 +1,69 @@
 """BL-uebergreifende Plenarprotokoll-Abstimmungsparser (#126).
 Architektur (vgl. ADR 0009): pro Bundesland eine Modul-Datei
 ``app/protokoll_parsers/<bl-code>.py``, die mindestens eine Funktion
 ``parse_protocol(pdf_path: str) -> list[dict]`` exportiert. Die Registry
 ``PROTOKOLL_PARSERS`` mappt BL-Code → Parser-Funktion.
 Erwartetes Result-Schema pro Eintrag in der Liste::
    {
        "drucksache": str | None,    # z.B. "18/1234"; None bei nicht aufloesbar
        "ergebnis": str,             # angenommen | abgelehnt | ueberwiesen | ...
        "einstimmig": bool,          # explizit als einstimmig markiert
        "kind": str,                 # parser-intern, fuer Debug
        "votes": {                   # fraktions-Listen pro Vote-Kategorie
            "ja": list[str],
            "nein": list[str],
            "enthaltung": list[str],
        },
    }
 NRW ist die Referenz-Implementierung. Folge-BL (HE/BB/MV/BE/...) bekommen
 eigene Module mit demselben Funktions-Vertrag — neue Eintraege in der
 Registry sind reine Tippelarbeit, das Reverse-Engineering pro Landtag
 ist die eigentliche Arbeit.
 """
 from __future__ import annotations
 from typing import Callable
 from .nrw import parse_protocol as _parse_nrw
 # Typ-Alias fuer Lesbarkeit; Parser-Signatur ist bewusst minimal.
 ProtokollParser = Callable[[str], list[dict]]
 PROTOKOLL_PARSERS: dict[str, ProtokollParser] = {
    "NRW": _parse_nrw,
 }
 def parse_protocol(bundesland: str, pdf_path: str) -> list[dict]:
    """BL-uebergreifender Einstieg. Sucht den Parser in der Registry.
    Raises:
        NotImplementedError: wenn fuer das Bundesland (noch) kein Parser
            registriert ist. Folge-Issue: BL-Adapter ergaenzen mit einem
            eigenen Modul plus Eintrag hier.
    """
    parser = PROTOKOLL_PARSERS.get(bundesland)
    if parser is None:
        supported = ", ".join(sorted(PROTOKOLL_PARSERS)) or "(keine)"
        raise NotImplementedError(
            f"Kein Plenarprotokoll-Parser fuer {bundesland!r}. "
            f"Unterstuetzt: {supported}. Siehe #126."
        )
    return parser(pdf_path)
 def supported_bundeslaender() -> list[str]:
    """Liste der BL-Codes mit registrierten Parsern."""
    return sorted(PROTOKOLL_PARSERS)
 __all__ = [
    "ProtokollParser",
    "PROTOKOLL_PARSERS",
    "parse_protocol",
    "supported_bundeslaender",
 ]
--- a/app/protokoll_parsers/nrw.py
+++ b/app/protokoll_parsers/nrw.py
--- a/docs/adr/0009-protokoll-parser-registry.md
+++ b/docs/adr/0009-protokoll-parser-registry.md
@ -0,0 +1,127 @@
 # 0009 — Plenarprotokoll-Parser-Registry pro Bundesland
 | | |
 |---|---|
 | **Status** | accepted |
 | **Datum** | 2026-04-28 |
 | **Refs** | #106, #126, ADR 0002 (Adapter-Pattern) |
 ## Kontext
 Der NRW-Plenarprotokoll-Parser (#106) ist deterministisch, anchor-basiert
 und erreicht 19/19 auf der MMP18-119-Fixture. Damit war die Architektur-Frage
 gelöst — aber nur fuer NRW. Andere Bundeslaender publizieren ihre
 Plenarprotokolle in fundamental anderen Formaten:
 - Hessen: HTML mit semantischen Tags pro Beschluss
 - Brandenburg: PDF mit Tabellen-Layout fuer Vote-Counts
 - Mecklenburg-Vorpommern: ParLDok-XML-Export
 - Berlin: PDF mit eigenem Formularkasten-Schema
 - ...
 Ein einziger Parser fuer alle BL ist nicht baubar. Die Reverse-Engineering-
 Arbeit pro Landtag ist substantiell und passiert phasenweise: zuerst NRW
 wegen der hohen Antragsdichte, danach BL fuer BL nach Bedarf.
 Das Adapter-Pattern aus ADR 0002 (`ParlamentAdapter`) hat dieses Problem
 fuer die Antrags-Suche bereits geloest. Plenarprotokoll-Parser ist die
 naechste Familie mit derselben Form: pro BL eine eigene Implementierung,
 ein gemeinsamer Aufruf-Vertrag, ein Registry-Lookup.
 ## Optionen
 ### Option A — Eine grosse Datei mit If-Else-Dispatch
 Eine einzige `app/protokoll_parser.py`-Datei mit einem `parse_protocol(bl, pdf)`,
 das je nach BL andere Funktionen ruft. **Vorteile:** flach, einfach.
 **Nachteile:** waechst zur 2000-LOC-Datei, BL-spezifische Reverse-Engineering-
 Notizen und Helper-Functions vermischen sich, schlechte Test-Isolation.
 ### Option B — OOP-Hierarchie mit `ProtokollParserBase` als ABC
 Abstrakte Basisklasse mit `parse(pdf_path) -> list[VoteResult]`,
 konkrete Subklassen pro BL. **Vorteile:** typisierter Vertrag.
 **Nachteile:** Boilerplate fuer Klassen-Definitionen ohne Mehrwert,
 weil der NRW-Parser keine Instanz-State hat (alles `def`-Funktionen,
 keine `self.x`).
 ### Option C — Sub-Package mit Funktions-Registry (gewaehlt)
 `app/protokoll_parsers/` als Sub-Package, pro BL eine eigene Datei
 (`nrw.py`, `mv.py`, `he.py`, ...) die mindestens
 `parse_protocol(pdf_path: str) -> list[dict]` exportiert. Eine
 `PROTOKOLL_PARSERS`-Dict in `__init__.py` mappt BL-Code → Funktion.
 Das BL-uebergreifende `parse_protocol(bl, pdf_path)` macht den Lookup.
 **Vorteile:**
 - Konsistent mit dem `ADAPTERS`-Dict in `parlamente.py` (ADR 0002)
 - BL-Code lebt in eigener Datei mit eigenen Helpern und Notizen
 - Neue BL = neue Datei + ein Eintrag in `__init__.py`, kein Refactoring
 - Tests pro BL in eigener Test-Datei (`tests/test_protokoll_parsers_<bl>.py`)
 - Parser-Funktionen bleiben simpel, kein OOP-Overhead
 **Nachteile:**
 - Vertrag ist nur per Convention dokumentiert (nicht via Type-System
  erzwingbar) — dafuer ein Schema-Test in `test_protokoll_parsers.py`
  als Sicherheitsnetz.
 ## Entscheidung
 **Option C.** Konkret:
 ```
 app/protokoll_parsers/
 ├── __init__.py     # Registry + parse_protocol(bl, pdf) + supported_bundeslaender()
 ├── nrw.py          # NRW v5 (vorher app/protokoll_parser_nrw.py)
 └── <bl>.py         # je BL eine Datei, sobald implementiert
 ```
 **Vertrag fuer jeden Parser** (verbindlich):
 ```python
 def parse_protocol(pdf_path: str) -> list[dict]:
    """Returns: [
        {
            "drucksache": str | None,
            "ergebnis": str,         # angenommen/abgelehnt/ueberwiesen/...
            "einstimmig": bool,
            "kind": str,             # parser-intern, fuer Debug
            "votes": {
                "ja": list[str],     # Fraktions-Codes (CDU, SPD, GRUENE, ...)
                "nein": list[str],
                "enthaltung": list[str],
            },
        },
        ...
    ]"""
 ```
 **Naming:** Datei-Stem = lowercase BL-Code (`nrw.py`, `mv.py`, ...).
 Registry-Key = uppercase BL-Code (`"NRW"`, `"MV"`).
 **Konsumenten** rufen `parse_protocol(bundesland, pdf_path)` aus dem
 Sub-Package, nicht direkt eine BL-Datei.
 ## Konsequenzen
 ### Positiv
 - Folge-BL-Implementierungen ohne Refactoring der Bestands-Logik.
 - Reverse-Engineering-Notizen leben pro BL in einer Datei statt verteilt
  ueber eine Mega-Datei.
 - Der `supported_bundeslaender()`-Helper macht in CLI und UI sofort
  sichtbar, wo Daten verfuegbar sind und wo nicht.
 - Neue Adapter-Test-Files folgen demselben Schema (`test_protokoll_parsers_<bl>.py`).
 ### Negativ
 - Schema-Vertrag nur per Convention (kein TypedDict). Dafuer ein
  Smoke-Test in `tests/test_protokoll_parsers.py`, der pro registriertem
  Parser die Result-Keys pruefen wird, sobald >1 Implementation existiert.
 ### Folgen fuer andere ADRs
 - ADR 0002 (Adapter-Pattern) bleibt gueltig; dieses ADR ueberbruckt es
  nicht, sondern wendet das gleiche Muster auf eine zweite Adapter-Familie an.
 - Folge-Issues (HE/BB/MV/BE/...) sind reine Implementation-Tickets ohne
  Architektur-Diskussion — der Vertrag ist hier festgelegt.
--- a/docs/adr/index.md
+++ b/docs/adr/index.md
@ -25,6 +25,7 @@ und Konsequenzen. Format inspiriert von [Michael Nygard](https://cognitect.com/b
 | [0006](0006-embedding-model-migration-v3-to-v4.md) | Embedding-Modell-Migration text-embedding-v3 → v4 | accepted | 2026-04-11 |
 | [0007](0007-test-taxonomy.md) | Test-Taxonomie (Unit / Integration / E2E / Property / Smoke) | accepted | 2026-04-28 |
 | [0008](0008-ddd-lightweight-migration.md) | DDD-Lightweight-Migration (Repository, LLM-Port, Domain-Verhalten) | accepted | 2026-04-20 |
 | [0009](0009-protokoll-parser-registry.md) | Plenarprotokoll-Parser-Registry pro Bundesland | accepted | 2026-04-28 |
 ## Wann ADR, wann nicht
--- a/tests/test_ingest_votes_nrw.py
+++ b/tests/test_ingest_votes_nrw.py
@ -1,4 +1,4 @@
-"""Tests fuer app/ingest_votes_nrw.py — PDF → plenum_vote_results Pipeline (#106)."""
+"""Tests fuer app/ingest_votes.py — PDF → plenum_vote_results Pipeline (#106 / #126)."""
 from __future__ import annotations
 import asyncio
@ -63,7 +63,7 @@ def _fake_parse_result(drucksache: str, ergebnis: str = "angenommen",
 class TestIngestPdf:
    def test_writes_each_parsed_vote(self, initialized_db, tmp_path):
-        from app import ingest_votes_nrw, database
+        from app import ingest_votes, database
        fake_pdf = tmp_path / "MMP18-119.pdf"
        fake_pdf.write_bytes(b"%PDF-1.4 fake")
@ -72,8 +72,8 @@ class TestIngestPdf:
            _fake_parse_result("18/200", "abgelehnt", ja=["AfD"], nein=["CDU", "SPD"]),
        ]
-        with patch("app.ingest_votes_nrw.parse_protocol", return_value=parser_results):
+        with patch("app.ingest_votes.parse_protocol", return_value=parser_results):
-            stats = run(ingest_votes_nrw.ingest_pdf(fake_pdf))
+            stats = run(ingest_votes.ingest_pdf(fake_pdf))
        assert stats["parsed"] == 2
        assert stats["written"] == 2
@ -86,7 +86,7 @@ class TestIngestPdf:
    def test_skips_entries_without_drucksache(self, initialized_db, tmp_path):
        """Anchors ohne aufloesbare Drucksache werden gezaehlt aber nicht
        geschrieben (sonst muellt der Import die DB voll)."""
-        from app import ingest_votes_nrw
+        from app import ingest_votes
        fake_pdf = tmp_path / "MMP18-50.pdf"
        fake_pdf.write_bytes(b"%PDF")
@ -94,31 +94,31 @@ class TestIngestPdf:
            _fake_parse_result("18/300", "angenommen"),
            {"drucksache": None, "ergebnis": "angenommen", "votes": {"ja": [], "nein": [], "enthaltung": []}},
        ]
-        with patch("app.ingest_votes_nrw.parse_protocol", return_value=parser_results):
+        with patch("app.ingest_votes.parse_protocol", return_value=parser_results):
-            stats = run(ingest_votes_nrw.ingest_pdf(fake_pdf))
+            stats = run(ingest_votes.ingest_pdf(fake_pdf))
        assert stats["parsed"] == 2
        assert stats["written"] == 1
        assert stats["skipped_no_drucksache"] == 1
    def test_protokoll_id_default_from_stem(self, initialized_db, tmp_path):
-        from app import ingest_votes_nrw, database
+        from app import ingest_votes, database
        fake_pdf = tmp_path / "MMP18-77.pdf"
        fake_pdf.write_bytes(b"%PDF")
-        with patch("app.ingest_votes_nrw.parse_protocol",
+        with patch("app.ingest_votes.parse_protocol",
                   return_value=[_fake_parse_result("18/500")]):
-            stats = run(ingest_votes_nrw.ingest_pdf(fake_pdf))
+            stats = run(ingest_votes.ingest_pdf(fake_pdf))
        assert stats["protokoll_id"] == "MMP18-77"
        votes = run(database.get_plenum_votes("NRW", "18/500"))
        assert votes[0]["quelle_protokoll"] == "MMP18-77"
    def test_protokoll_id_override(self, initialized_db, tmp_path):
-        from app import ingest_votes_nrw, database
+        from app import ingest_votes, database
        fake_pdf = tmp_path / "scan.pdf"
        fake_pdf.write_bytes(b"%PDF")
-        with patch("app.ingest_votes_nrw.parse_protocol",
+        with patch("app.ingest_votes.parse_protocol",
                   return_value=[_fake_parse_result("18/600")]):
-            run(ingest_votes_nrw.ingest_pdf(
+            run(ingest_votes.ingest_pdf(
                fake_pdf, protokoll_id="MMP18-99", quelle_url="https://example.com/x.pdf",
            ))
        votes = run(database.get_plenum_votes("NRW", "18/600"))
@ -127,12 +127,12 @@ class TestIngestPdf:
    def test_bundesland_override(self, initialized_db, tmp_path):
        """Adapter fuer andere BL koennten denselben Ingest-Helper nutzen."""
-        from app import ingest_votes_nrw, database
+        from app import ingest_votes, database
        fake_pdf = tmp_path / "MV-MP1.pdf"
        fake_pdf.write_bytes(b"%PDF")
-        with patch("app.ingest_votes_nrw.parse_protocol",
+        with patch("app.ingest_votes.parse_protocol",
                   return_value=[_fake_parse_result("8/100")]):
-            run(ingest_votes_nrw.ingest_pdf(fake_pdf, bundesland="MV"))
+            run(ingest_votes.ingest_pdf(fake_pdf, bundesland="MV"))
        # Lookup unter dem richtigen BL
        votes_mv = run(database.get_plenum_votes("MV", "8/100"))
        assert len(votes_mv) == 1
@ -142,17 +142,17 @@ class TestIngestPdf:
    def test_re_ingest_overwrites_same_protokoll(self, initialized_db, tmp_path):
        """Erneuter Ingest desselben Protokolls aktualisiert die Eintraege
        (idempotent), kein Duplikat."""
-        from app import ingest_votes_nrw, database
+        from app import ingest_votes, database
        fake_pdf = tmp_path / "MMP18-1.pdf"
        fake_pdf.write_bytes(b"%PDF")
-        with patch("app.ingest_votes_nrw.parse_protocol",
+        with patch("app.ingest_votes.parse_protocol",
                   return_value=[_fake_parse_result("18/700", "angenommen", ja=["CDU"])]):
-            run(ingest_votes_nrw.ingest_pdf(fake_pdf))
+            run(ingest_votes.ingest_pdf(fake_pdf))
        # Re-Ingest mit korrigiertem Ergebnis (z.B. Parser-Fix)
-        with patch("app.ingest_votes_nrw.parse_protocol",
+        with patch("app.ingest_votes.parse_protocol",
                   return_value=[_fake_parse_result("18/700", "abgelehnt", ja=[], nein=["CDU"])]):
-            run(ingest_votes_nrw.ingest_pdf(fake_pdf))
+            run(ingest_votes.ingest_pdf(fake_pdf))
        votes = run(database.get_plenum_votes("NRW", "18/700"))
        assert len(votes) == 1
--- a/tests/test_protokoll_parsers.py
+++ b/tests/test_protokoll_parsers.py
@ -0,0 +1,74 @@
 """Tests fuer app/protokoll_parsers/__init__.py — Registry + Dispatch (#126)."""
 from __future__ import annotations
 import pytest
 from app.protokoll_parsers import (
    PROTOKOLL_PARSERS,
    parse_protocol,
    supported_bundeslaender,
 )
 class TestRegistry:
    def test_nrw_registered(self):
        """NRW ist die Referenz-Implementierung — muss da sein."""
        assert "NRW" in PROTOKOLL_PARSERS
    def test_supported_includes_nrw(self):
        assert "NRW" in supported_bundeslaender()
    def test_supported_returns_sorted(self):
        codes = supported_bundeslaender()
        assert codes == sorted(codes)
    def test_registry_values_are_callable(self):
        for code, parser in PROTOKOLL_PARSERS.items():
            assert callable(parser), f"Parser fuer {code} ist nicht callable"
 class TestDispatch:
    def test_unknown_bl_raises_not_implemented(self):
        with pytest.raises(NotImplementedError) as exc:
            parse_protocol("XX", "/dev/null")
        msg = str(exc.value)
        assert "XX" in msg
        # Liste der unterstuetzten BL muss in der Message stehen
        assert "NRW" in msg
        # Issue-Referenz fuer Folge-Arbeit
        assert "#126" in msg
    def test_known_bl_delegates_to_registered_parser(self, tmp_path, monkeypatch):
        """parse_protocol delegiert an den BL-Parser aus der Registry."""
        called_with: list[str] = []
        def fake_parser(pdf_path: str) -> list[dict]:
            called_with.append(pdf_path)
            return [{"drucksache": "18/1", "ergebnis": "angenommen", "votes": {"ja": [], "nein": [], "enthaltung": []}}]
        # Temporaer einen TEST-Parser registrieren, dann wieder entfernen
        monkeypatch.setitem(PROTOKOLL_PARSERS, "TEST", fake_parser)
        result = parse_protocol("TEST", str(tmp_path / "x.pdf"))
        assert called_with == [str(tmp_path / "x.pdf")]
        assert len(result) == 1
        assert result[0]["drucksache"] == "18/1"
 class TestParserSchema:
    """Vertrag: jeder registrierte Parser muss Result-Dicts mit minimalem
    Schema liefern — drucksache (str|None), ergebnis (str), votes (dict)."""
    def test_nrw_result_dict_has_expected_keys(self):
        """Smoke-Test mit handgemachtem Plenarprotokoll-Snippet — pruefen,
        dass das Schema des Output-Dicts die in __init__.py dokumentierten
        Keys enthaelt."""
        from app.protokoll_parsers.nrw import find_results
        text = "Damit ist der Antrag Drucksache 18/100 angenommen."
        results = find_results(text)
        assert results, "find_results sollte mindestens einen Treffer liefern"
        for r in results:
            for key in ("drucksache", "ergebnis", "kind", "einstimmig"):
                assert key in r, f"Key '{key}' fehlt im Result"
--- a/tests/test_protokoll_parsers_nrw.py
+++ b/tests/test_protokoll_parsers_nrw.py
@ -1,12 +1,13 @@
-"""Tests fuer app/protokoll_parser_nrw.py — NRW-Plenarprotokoll-Parser v5.
+"""Tests fuer app/protokoll_parsers/nrw.py — NRW-Plenarprotokoll-Parser v5.
-Backfill aus #134. Der Parser ist deterministisch und anchor-basiert;
+Backfill aus #134, BL-Refactor aus #126.
 jede Aenderung an den RESULT_ANCHORS oder den Vote-Block-Regexes muss
 sofort durch diese Tests fallen.
-Die echte 19/19-Garantie auf MMP18-119 laeuft separat als Integration-Test
+Der Parser ist deterministisch und anchor-basiert; jede Aenderung an den
-(braucht das PDF). Hier: pure-string-Tests fuer alle Reverse-Engineering-
+RESULT_ANCHORS oder den Vote-Block-Regexes muss sofort durch diese Tests
-Findings, die bei der iterativen Entwicklung 1-15 dokumentiert wurden.
+fallen. Die echte 19/19-Garantie auf MMP18-119 laeuft separat als
 Integration-Test (braucht das PDF). Hier: pure-string-Tests fuer alle
 Reverse-Engineering-Findings, die bei der iterativen Entwicklung 1-15
 dokumentiert wurden.
 """
 from __future__ import annotations
@ -15,7 +16,7 @@ import types
 # fitz ist via tests/conftest.py gestubbed — Pure-String-Funktionen kommen ohne aus.
-from app.protokoll_parser_nrw import (
+from app.protokoll_parsers.nrw import (
    normalize_fraktionen,
    find_results,
    resolve_drucksache_for_ueber,