feat(#126): protokoll_parsers/-Sub-Package + Registry-Pattern + ADR 0009

Architektur-Refactor zur Vorbereitung BL-uebergreifender Parser: - app/protokoll_parser_nrw.py → app/protokoll_parsers/nrw.py - app/ingest_votes_nrw.py → app/ingest_votes.py (BL-uebergreifend) - Neue app/protokoll_parsers/__init__.py mit: - PROTOKOLL_PARSERS-Dict (BL-Code → Parser-Funktion, derzeit nur NRW) - parse_protocol(bundesland, pdf_path) als BL-uebergreifender Einstieg - supported_bundeslaender()-Helper - NotImplementedError mit hilfreicher Message bei unbekanntem BL CLI bekommt --supported-Flag fuer BL-Discovery: python -m app.ingest_votes --supported → 'NRW' ADR 0009 dokumentiert das Muster (Sub-Package + Funktions-Registry, analog zu ADR 0002 fuer ParlamentAdapter). Folge-BL bekommen je eine eigene Datei und einen Eintrag in PROTOKOLL_PARSERS — kein Refactoring der Bestands-Logik. Tests: - 7 neue Tests in test_protokoll_parsers.py fuer Registry und Dispatch - Bestehende NRW-Tests umbenannt zu test_protokoll_parsers_nrw.py, Imports angepasst — keine Verhaltens-Aenderung - Bestehende Ingest-Tests umbenannt zu test_ingest_votes.py 642 Tests gruen, kein Verhaltens-Drift.
2026-04-28 08:37:31 +02:00 · 2026-04-28 08:37:31 +02:00 · 7de4df1fef
commit 7de4df1fef
parent a9f0b61c75
9 changed files with 333 additions and 44 deletions
--- a/app/database.py
+++ b/app/database.py
@ -262,7 +262,7 @@ async def init_db():
        # Fraktions-aggregierte Abstimmungsergebnisse aus Plenarprotokollen (#106).
        # Granularitaet: "GRUENE und SPD haben zugestimmt", nicht pro MP — das
        # ist der Datentyp, der aus deterministischen Parsern wie
-        # protokoll_parser_nrw.py rauskommt.
+        # app/protokoll_parsers/ rauskommt.
        # Compound-PK ueber quelle_protokoll, weil eine Drucksache mehrfach
        # abgestimmt werden kann (Ausschuss-Empfehlung + Plenum-Beschluss).
        await db.execute("""
@ -1211,7 +1211,7 @@ async def get_monitoring_new_today(scan_date: str) -> list[dict]:

 # ─── Plenum-Vote-Results (#106) ─────────────────────────────────────────────
 # Fraktions-aggregierte Abstimmungsergebnisse aus Plenarprotokollen.
-# Quelle: protokoll_parser_nrw.py (NRW). BL-uebergreifender Parser ist #126.
+# Quelle: app/protokoll_parsers/ (NRW). BL-uebergreifender Parser ist #126.

 async def upsert_plenum_vote(
    *,
--- a/app/ingest_votes_nrw.py
+++ b/app/ingest_votes_nrw.py
@ -1,16 +1,19 @@
-"""Ingest-CLI fuer NRW-Plenarprotokolle (#106).
+"""BL-uebergreifende Ingest-CLI fuer Plenarprotokolle (#106 / #126).

 Pipeline:
  1. PDF laden (Pfad oder URL)
-  2. protokoll_parser_nrw.parse_protocol() liefert Liste von Abstimmungen
-  3. upsert_plenum_vote() schreibt jede Abstimmung in die DB
+  2. ``protokoll_parsers.parse_protocol(bundesland, pdf_path)`` waehlt den
+     BL-spezifischen Parser aus der Registry
+  3. ``upsert_plenum_vote()`` schreibt jede Abstimmung in die DB

 CLI:
-  python -m app.ingest_votes_nrw --pdf /pfad/zu/MMP18-119.pdf
-  python -m app.ingest_votes_nrw --url https://landtag.nrw.de/.../MMP18-119.pdf
-  python -m app.ingest_votes_nrw --pdf MMP18-119.pdf --protokoll-id MMP18-119
+  python -m app.ingest_votes --pdf MMP18-119.pdf
+  python -m app.ingest_votes --url https://landtag.nrw.de/.../MMP18-119.pdf
+  python -m app.ingest_votes --pdf x.pdf --bundesland NRW --protokoll-id MMP18-119
+  python -m app.ingest_votes --supported   # Liste der BL mit Parser

-Die Protokoll-ID wird, wenn nicht uebergeben, aus dem Datei-Stem abgeleitet.
+Aktuell registriert: NRW. Folge-BL via app/protokoll_parsers/<bl>.py + Eintrag
+in PROTOKOLL_PARSERS — siehe ADR 0009.
 """
 from __future__ import annotations

@ -23,7 +26,7 @@ import urllib.request
 from pathlib import Path
 from typing import Optional

-from .protokoll_parser_nrw import parse_protocol
+from .protokoll_parsers import parse_protocol, supported_bundeslaender
 from .database import upsert_plenum_vote

 logger = logging.getLogger(__name__)
@ -52,13 +55,17 @@ async def ingest_pdf(
    protokoll_id: Optional[str] = None,
    quelle_url: Optional[str] = None,
 ) -> dict:
-    """Parse das PDF und schreibe alle gefundenen Abstimmungen in die DB.
+    """Parse das PDF mit dem BL-Parser und schreibe alle Abstimmungen in die DB.

    Returns:
-        Statistik-Dict ``{parsed, written, skipped_no_drucksache, errors}``.
+        Statistik-Dict ``{parsed, written, skipped_no_drucksache, errors,
+        protokoll_id, bundesland}``.
+
+    Raises:
+        NotImplementedError: wenn fuer ``bundesland`` kein Parser registriert ist.
    """
    pid = protokoll_id or _derive_protokoll_id(pdf_path)
-    parsed = parse_protocol(str(pdf_path))
+    parsed = parse_protocol(bundesland, str(pdf_path))

    written = 0
    skipped_no_ds = 0
@ -100,17 +107,27 @@ def _cli() -> None:
    logging.basicConfig(level=logging.INFO, format="%(levelname)s %(message)s")

    parser = argparse.ArgumentParser(
-        description="Plenarprotokoll → plenum_vote_results-Tabelle (#106)",
+        description="Plenarprotokoll → plenum_vote_results (#106 / #126)",
    )
-    src = parser.add_mutually_exclusive_group(required=True)
+    src = parser.add_mutually_exclusive_group(required=False)
    src.add_argument("--pdf", help="Pfad zu lokalem PDF")
    src.add_argument("--url", help="HTTP(S)-URL zum PDF")
    parser.add_argument("--bundesland", default="NRW",
                        help="Bundesland-Code (default: NRW)")
    parser.add_argument("--protokoll-id",
                        help="Protokoll-ID (default: aus Datei-Stem)")
+    parser.add_argument("--supported", action="store_true",
+                        help="Liste alle BL-Codes mit registriertem Parser")
    args = parser.parse_args()

+    if args.supported:
+        for bl in supported_bundeslaender():
+            print(bl)
+        sys.exit(0)
+
+    if not args.pdf and not args.url:
+        parser.error("--pdf oder --url ist erforderlich")
+
    if args.url:
        # Download in tmp und nach dem Run wieder loeschen
        with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as tmp:
--- a/app/protokoll_parsers/init.py
+++ b/app/protokoll_parsers/init.py
@ -0,0 +1,69 @@
+"""BL-uebergreifende Plenarprotokoll-Abstimmungsparser (#126).
+
+Architektur (vgl. ADR 0009): pro Bundesland eine Modul-Datei
+``app/protokoll_parsers/<bl-code>.py``, die mindestens eine Funktion
+``parse_protocol(pdf_path: str) -> list[dict]`` exportiert. Die Registry
+``PROTOKOLL_PARSERS`` mappt BL-Code → Parser-Funktion.
+
+Erwartetes Result-Schema pro Eintrag in der Liste::
+
+    {
+        "drucksache": str | None,    # z.B. "18/1234"; None bei nicht aufloesbar
+        "ergebnis": str,             # angenommen | abgelehnt | ueberwiesen | ...
+        "einstimmig": bool,          # explizit als einstimmig markiert
+        "kind": str,                 # parser-intern, fuer Debug
+        "votes": {                   # fraktions-Listen pro Vote-Kategorie
+            "ja": list[str],
+            "nein": list[str],
+            "enthaltung": list[str],
+        },
+    }
+
+NRW ist die Referenz-Implementierung. Folge-BL (HE/BB/MV/BE/...) bekommen
+eigene Module mit demselben Funktions-Vertrag — neue Eintraege in der
+Registry sind reine Tippelarbeit, das Reverse-Engineering pro Landtag
+ist die eigentliche Arbeit.
+"""
+from __future__ import annotations
+
+from typing import Callable
+
+from .nrw import parse_protocol as _parse_nrw
+
+# Typ-Alias fuer Lesbarkeit; Parser-Signatur ist bewusst minimal.
+ProtokollParser = Callable[[str], list[dict]]
+
+PROTOKOLL_PARSERS: dict[str, ProtokollParser] = {
+    "NRW": _parse_nrw,
+}
+
+
+def parse_protocol(bundesland: str, pdf_path: str) -> list[dict]:
+    """BL-uebergreifender Einstieg. Sucht den Parser in der Registry.
+
+    Raises:
+        NotImplementedError: wenn fuer das Bundesland (noch) kein Parser
+            registriert ist. Folge-Issue: BL-Adapter ergaenzen mit einem
+            eigenen Modul plus Eintrag hier.
+    """
+    parser = PROTOKOLL_PARSERS.get(bundesland)
+    if parser is None:
+        supported = ", ".join(sorted(PROTOKOLL_PARSERS)) or "(keine)"
+        raise NotImplementedError(
+            f"Kein Plenarprotokoll-Parser fuer {bundesland!r}. "
+            f"Unterstuetzt: {supported}. Siehe #126."
+        )
+    return parser(pdf_path)
+
+
+def supported_bundeslaender() -> list[str]:
+    """Liste der BL-Codes mit registrierten Parsern."""
+    return sorted(PROTOKOLL_PARSERS)
+
+
+__all__ = [
+    "ProtokollParser",
+    "PROTOKOLL_PARSERS",
+    "parse_protocol",
+    "supported_bundeslaender",
+]
--- a/app/protokoll_parsers/nrw.py
+++ b/app/protokoll_parsers/nrw.py
--- a/docs/adr/0009-protokoll-parser-registry.md
+++ b/docs/adr/0009-protokoll-parser-registry.md
@ -0,0 +1,127 @@
+# 0009 — Plenarprotokoll-Parser-Registry pro Bundesland
+
+| | |
+|---|---|
+| **Status** | accepted |
+| **Datum** | 2026-04-28 |
+| **Refs** | #106, #126, ADR 0002 (Adapter-Pattern) |
+
+## Kontext
+
+Der NRW-Plenarprotokoll-Parser (#106) ist deterministisch, anchor-basiert
+und erreicht 19/19 auf der MMP18-119-Fixture. Damit war die Architektur-Frage
+gelöst — aber nur fuer NRW. Andere Bundeslaender publizieren ihre
+Plenarprotokolle in fundamental anderen Formaten:
+
+- Hessen: HTML mit semantischen Tags pro Beschluss
+- Brandenburg: PDF mit Tabellen-Layout fuer Vote-Counts
+- Mecklenburg-Vorpommern: ParLDok-XML-Export
+- Berlin: PDF mit eigenem Formularkasten-Schema
+- ...
+
+Ein einziger Parser fuer alle BL ist nicht baubar. Die Reverse-Engineering-
+Arbeit pro Landtag ist substantiell und passiert phasenweise: zuerst NRW
+wegen der hohen Antragsdichte, danach BL fuer BL nach Bedarf.
+
+Das Adapter-Pattern aus ADR 0002 (`ParlamentAdapter`) hat dieses Problem
+fuer die Antrags-Suche bereits geloest. Plenarprotokoll-Parser ist die
+naechste Familie mit derselben Form: pro BL eine eigene Implementierung,
+ein gemeinsamer Aufruf-Vertrag, ein Registry-Lookup.
+
+## Optionen
+
+### Option A — Eine grosse Datei mit If-Else-Dispatch
+
+Eine einzige `app/protokoll_parser.py`-Datei mit einem `parse_protocol(bl, pdf)`,
+das je nach BL andere Funktionen ruft. **Vorteile:** flach, einfach.
+**Nachteile:** waechst zur 2000-LOC-Datei, BL-spezifische Reverse-Engineering-
+Notizen und Helper-Functions vermischen sich, schlechte Test-Isolation.
+
+### Option B — OOP-Hierarchie mit `ProtokollParserBase` als ABC
+
+Abstrakte Basisklasse mit `parse(pdf_path) -> list[VoteResult]`,
+konkrete Subklassen pro BL. **Vorteile:** typisierter Vertrag.
+**Nachteile:** Boilerplate fuer Klassen-Definitionen ohne Mehrwert,
+weil der NRW-Parser keine Instanz-State hat (alles `def`-Funktionen,
+keine `self.x`).
+
+### Option C — Sub-Package mit Funktions-Registry (gewaehlt)
+
+`app/protokoll_parsers/` als Sub-Package, pro BL eine eigene Datei
+(`nrw.py`, `mv.py`, `he.py`, ...) die mindestens
+`parse_protocol(pdf_path: str) -> list[dict]` exportiert. Eine
+`PROTOKOLL_PARSERS`-Dict in `__init__.py` mappt BL-Code → Funktion.
+Das BL-uebergreifende `parse_protocol(bl, pdf_path)` macht den Lookup.
+
+**Vorteile:**
+- Konsistent mit dem `ADAPTERS`-Dict in `parlamente.py` (ADR 0002)
+- BL-Code lebt in eigener Datei mit eigenen Helpern und Notizen
+- Neue BL = neue Datei + ein Eintrag in `__init__.py`, kein Refactoring
+- Tests pro BL in eigener Test-Datei (`tests/test_protokoll_parsers_<bl>.py`)
+- Parser-Funktionen bleiben simpel, kein OOP-Overhead
+
+**Nachteile:**
+- Vertrag ist nur per Convention dokumentiert (nicht via Type-System
+  erzwingbar) — dafuer ein Schema-Test in `test_protokoll_parsers.py`
+  als Sicherheitsnetz.
+
+## Entscheidung
+
+**Option C.** Konkret:
+
+```
+app/protokoll_parsers/
+├── __init__.py     # Registry + parse_protocol(bl, pdf) + supported_bundeslaender()
+├── nrw.py          # NRW v5 (vorher app/protokoll_parser_nrw.py)
+└── <bl>.py         # je BL eine Datei, sobald implementiert
+```
+
+**Vertrag fuer jeden Parser** (verbindlich):
+
+```python
+def parse_protocol(pdf_path: str) -> list[dict]:
+    """Returns: [
+        {
+            "drucksache": str | None,
+            "ergebnis": str,         # angenommen/abgelehnt/ueberwiesen/...
+            "einstimmig": bool,
+            "kind": str,             # parser-intern, fuer Debug
+            "votes": {
+                "ja": list[str],     # Fraktions-Codes (CDU, SPD, GRUENE, ...)
+                "nein": list[str],
+                "enthaltung": list[str],
+            },
+        },
+        ...
+    ]"""
+```
+
+**Naming:** Datei-Stem = lowercase BL-Code (`nrw.py`, `mv.py`, ...).
+Registry-Key = uppercase BL-Code (`"NRW"`, `"MV"`).
+
+**Konsumenten** rufen `parse_protocol(bundesland, pdf_path)` aus dem
+Sub-Package, nicht direkt eine BL-Datei.
+
+## Konsequenzen
+
+### Positiv
+
+- Folge-BL-Implementierungen ohne Refactoring der Bestands-Logik.
+- Reverse-Engineering-Notizen leben pro BL in einer Datei statt verteilt
+  ueber eine Mega-Datei.
+- Der `supported_bundeslaender()`-Helper macht in CLI und UI sofort
+  sichtbar, wo Daten verfuegbar sind und wo nicht.
+- Neue Adapter-Test-Files folgen demselben Schema (`test_protokoll_parsers_<bl>.py`).
+
+### Negativ
+
+- Schema-Vertrag nur per Convention (kein TypedDict). Dafuer ein
+  Smoke-Test in `tests/test_protokoll_parsers.py`, der pro registriertem
+  Parser die Result-Keys pruefen wird, sobald >1 Implementation existiert.
+
+### Folgen fuer andere ADRs
+
+- ADR 0002 (Adapter-Pattern) bleibt gueltig; dieses ADR ueberbruckt es
+  nicht, sondern wendet das gleiche Muster auf eine zweite Adapter-Familie an.
+- Folge-Issues (HE/BB/MV/BE/...) sind reine Implementation-Tickets ohne
+  Architektur-Diskussion — der Vertrag ist hier festgelegt.
--- a/docs/adr/index.md
+++ b/docs/adr/index.md
@ -25,6 +25,7 @@ und Konsequenzen. Format inspiriert von [Michael Nygard](https://cognitect.com/b
 | [0006](0006-embedding-model-migration-v3-to-v4.md) | Embedding-Modell-Migration text-embedding-v3 → v4 | accepted | 2026-04-11 |
 | [0007](0007-test-taxonomy.md) | Test-Taxonomie (Unit / Integration / E2E / Property / Smoke) | accepted | 2026-04-28 |
 | [0008](0008-ddd-lightweight-migration.md) | DDD-Lightweight-Migration (Repository, LLM-Port, Domain-Verhalten) | accepted | 2026-04-20 |
+| [0009](0009-protokoll-parser-registry.md) | Plenarprotokoll-Parser-Registry pro Bundesland | accepted | 2026-04-28 |

 ## Wann ADR, wann nicht

--- a/tests/test_ingest_votes_nrw.py
+++ b/tests/test_ingest_votes_nrw.py
@ -1,4 +1,4 @@
-"""Tests fuer app/ingest_votes_nrw.py — PDF → plenum_vote_results Pipeline (#106)."""
+"""Tests fuer app/ingest_votes.py — PDF → plenum_vote_results Pipeline (#106 / #126)."""
 from __future__ import annotations

 import asyncio
@ -63,7 +63,7 @@ def _fake_parse_result(drucksache: str, ergebnis: str = "angenommen",

 class TestIngestPdf:
    def test_writes_each_parsed_vote(self, initialized_db, tmp_path):
-        from app import ingest_votes_nrw, database
+        from app import ingest_votes, database
        fake_pdf = tmp_path / "MMP18-119.pdf"
        fake_pdf.write_bytes(b"%PDF-1.4 fake")

@ -72,8 +72,8 @@ class TestIngestPdf:
            _fake_parse_result("18/200", "abgelehnt", ja=["AfD"], nein=["CDU", "SPD"]),
        ]

-        with patch("app.ingest_votes_nrw.parse_protocol", return_value=parser_results):
-            stats = run(ingest_votes_nrw.ingest_pdf(fake_pdf))
+        with patch("app.ingest_votes.parse_protocol", return_value=parser_results):
+            stats = run(ingest_votes.ingest_pdf(fake_pdf))

        assert stats["parsed"] == 2
        assert stats["written"] == 2
@ -86,7 +86,7 @@ class TestIngestPdf:
    def test_skips_entries_without_drucksache(self, initialized_db, tmp_path):
        """Anchors ohne aufloesbare Drucksache werden gezaehlt aber nicht
        geschrieben (sonst muellt der Import die DB voll)."""
-        from app import ingest_votes_nrw
+        from app import ingest_votes
        fake_pdf = tmp_path / "MMP18-50.pdf"
        fake_pdf.write_bytes(b"%PDF")

@ -94,31 +94,31 @@ class TestIngestPdf:
            _fake_parse_result("18/300", "angenommen"),
            {"drucksache": None, "ergebnis": "angenommen", "votes": {"ja": [], "nein": [], "enthaltung": []}},
        ]
-        with patch("app.ingest_votes_nrw.parse_protocol", return_value=parser_results):
-            stats = run(ingest_votes_nrw.ingest_pdf(fake_pdf))
+        with patch("app.ingest_votes.parse_protocol", return_value=parser_results):
+            stats = run(ingest_votes.ingest_pdf(fake_pdf))

        assert stats["parsed"] == 2
        assert stats["written"] == 1
        assert stats["skipped_no_drucksache"] == 1

    def test_protokoll_id_default_from_stem(self, initialized_db, tmp_path):
-        from app import ingest_votes_nrw, database
+        from app import ingest_votes, database
        fake_pdf = tmp_path / "MMP18-77.pdf"
        fake_pdf.write_bytes(b"%PDF")
-        with patch("app.ingest_votes_nrw.parse_protocol",
+        with patch("app.ingest_votes.parse_protocol",
                   return_value=[_fake_parse_result("18/500")]):
-            stats = run(ingest_votes_nrw.ingest_pdf(fake_pdf))
+            stats = run(ingest_votes.ingest_pdf(fake_pdf))
        assert stats["protokoll_id"] == "MMP18-77"
        votes = run(database.get_plenum_votes("NRW", "18/500"))
        assert votes[0]["quelle_protokoll"] == "MMP18-77"

    def test_protokoll_id_override(self, initialized_db, tmp_path):
-        from app import ingest_votes_nrw, database
+        from app import ingest_votes, database
        fake_pdf = tmp_path / "scan.pdf"
        fake_pdf.write_bytes(b"%PDF")
-        with patch("app.ingest_votes_nrw.parse_protocol",
+        with patch("app.ingest_votes.parse_protocol",
                   return_value=[_fake_parse_result("18/600")]):
-            run(ingest_votes_nrw.ingest_pdf(
+            run(ingest_votes.ingest_pdf(
                fake_pdf, protokoll_id="MMP18-99", quelle_url="https://example.com/x.pdf",
            ))
        votes = run(database.get_plenum_votes("NRW", "18/600"))
@ -127,12 +127,12 @@ class TestIngestPdf:

    def test_bundesland_override(self, initialized_db, tmp_path):
        """Adapter fuer andere BL koennten denselben Ingest-Helper nutzen."""
-        from app import ingest_votes_nrw, database
+        from app import ingest_votes, database
        fake_pdf = tmp_path / "MV-MP1.pdf"
        fake_pdf.write_bytes(b"%PDF")
-        with patch("app.ingest_votes_nrw.parse_protocol",
+        with patch("app.ingest_votes.parse_protocol",
                   return_value=[_fake_parse_result("8/100")]):
-            run(ingest_votes_nrw.ingest_pdf(fake_pdf, bundesland="MV"))
+            run(ingest_votes.ingest_pdf(fake_pdf, bundesland="MV"))
        # Lookup unter dem richtigen BL
        votes_mv = run(database.get_plenum_votes("MV", "8/100"))
        assert len(votes_mv) == 1
@ -142,17 +142,17 @@ class TestIngestPdf:
    def test_re_ingest_overwrites_same_protokoll(self, initialized_db, tmp_path):
        """Erneuter Ingest desselben Protokolls aktualisiert die Eintraege
        (idempotent), kein Duplikat."""
-        from app import ingest_votes_nrw, database
+        from app import ingest_votes, database
        fake_pdf = tmp_path / "MMP18-1.pdf"
        fake_pdf.write_bytes(b"%PDF")

-        with patch("app.ingest_votes_nrw.parse_protocol",
+        with patch("app.ingest_votes.parse_protocol",
                   return_value=[_fake_parse_result("18/700", "angenommen", ja=["CDU"])]):
-            run(ingest_votes_nrw.ingest_pdf(fake_pdf))
+            run(ingest_votes.ingest_pdf(fake_pdf))
        # Re-Ingest mit korrigiertem Ergebnis (z.B. Parser-Fix)
-        with patch("app.ingest_votes_nrw.parse_protocol",
+        with patch("app.ingest_votes.parse_protocol",
                   return_value=[_fake_parse_result("18/700", "abgelehnt", ja=[], nein=["CDU"])]):
-            run(ingest_votes_nrw.ingest_pdf(fake_pdf))
+            run(ingest_votes.ingest_pdf(fake_pdf))

        votes = run(database.get_plenum_votes("NRW", "18/700"))
        assert len(votes) == 1
--- a/tests/test_protokoll_parsers.py
+++ b/tests/test_protokoll_parsers.py
@ -0,0 +1,74 @@
+"""Tests fuer app/protokoll_parsers/__init__.py — Registry + Dispatch (#126)."""
+from __future__ import annotations
+
+import pytest
+
+from app.protokoll_parsers import (
+    PROTOKOLL_PARSERS,
+    parse_protocol,
+    supported_bundeslaender,
+)
+
+
+class TestRegistry:
+    def test_nrw_registered(self):
+        """NRW ist die Referenz-Implementierung — muss da sein."""
+        assert "NRW" in PROTOKOLL_PARSERS
+
+    def test_supported_includes_nrw(self):
+        assert "NRW" in supported_bundeslaender()
+
+    def test_supported_returns_sorted(self):
+        codes = supported_bundeslaender()
+        assert codes == sorted(codes)
+
+    def test_registry_values_are_callable(self):
+        for code, parser in PROTOKOLL_PARSERS.items():
+            assert callable(parser), f"Parser fuer {code} ist nicht callable"
+
+
+class TestDispatch:
+    def test_unknown_bl_raises_not_implemented(self):
+        with pytest.raises(NotImplementedError) as exc:
+            parse_protocol("XX", "/dev/null")
+        msg = str(exc.value)
+        assert "XX" in msg
+        # Liste der unterstuetzten BL muss in der Message stehen
+        assert "NRW" in msg
+        # Issue-Referenz fuer Folge-Arbeit
+        assert "#126" in msg
+
+    def test_known_bl_delegates_to_registered_parser(self, tmp_path, monkeypatch):
+        """parse_protocol delegiert an den BL-Parser aus der Registry."""
+        called_with: list[str] = []
+
+        def fake_parser(pdf_path: str) -> list[dict]:
+            called_with.append(pdf_path)
+            return [{"drucksache": "18/1", "ergebnis": "angenommen", "votes": {"ja": [], "nein": [], "enthaltung": []}}]
+
+        # Temporaer einen TEST-Parser registrieren, dann wieder entfernen
+        monkeypatch.setitem(PROTOKOLL_PARSERS, "TEST", fake_parser)
+
+        result = parse_protocol("TEST", str(tmp_path / "x.pdf"))
+
+        assert called_with == [str(tmp_path / "x.pdf")]
+        assert len(result) == 1
+        assert result[0]["drucksache"] == "18/1"
+
+
+class TestParserSchema:
+    """Vertrag: jeder registrierte Parser muss Result-Dicts mit minimalem
+    Schema liefern — drucksache (str|None), ergebnis (str), votes (dict)."""
+
+    def test_nrw_result_dict_has_expected_keys(self):
+        """Smoke-Test mit handgemachtem Plenarprotokoll-Snippet — pruefen,
+        dass das Schema des Output-Dicts die in __init__.py dokumentierten
+        Keys enthaelt."""
+        from app.protokoll_parsers.nrw import find_results
+
+        text = "Damit ist der Antrag Drucksache 18/100 angenommen."
+        results = find_results(text)
+        assert results, "find_results sollte mindestens einen Treffer liefern"
+        for r in results:
+            for key in ("drucksache", "ergebnis", "kind", "einstimmig"):
+                assert key in r, f"Key '{key}' fehlt im Result"
--- a/tests/test_protokoll_parsers_nrw.py
+++ b/tests/test_protokoll_parsers_nrw.py
@ -1,12 +1,13 @@
-"""Tests fuer app/protokoll_parser_nrw.py — NRW-Plenarprotokoll-Parser v5.
+"""Tests fuer app/protokoll_parsers/nrw.py — NRW-Plenarprotokoll-Parser v5.

-Backfill aus #134. Der Parser ist deterministisch und anchor-basiert;
-jede Aenderung an den RESULT_ANCHORS oder den Vote-Block-Regexes muss
-sofort durch diese Tests fallen.
+Backfill aus #134, BL-Refactor aus #126.

-Die echte 19/19-Garantie auf MMP18-119 laeuft separat als Integration-Test
-(braucht das PDF). Hier: pure-string-Tests fuer alle Reverse-Engineering-
-Findings, die bei der iterativen Entwicklung 1-15 dokumentiert wurden.
+Der Parser ist deterministisch und anchor-basiert; jede Aenderung an den
+RESULT_ANCHORS oder den Vote-Block-Regexes muss sofort durch diese Tests
+fallen. Die echte 19/19-Garantie auf MMP18-119 laeuft separat als
+Integration-Test (braucht das PDF). Hier: pure-string-Tests fuer alle
+Reverse-Engineering-Findings, die bei der iterativen Entwicklung 1-15
+dokumentiert wurden.
 """
 from __future__ import annotations

@ -15,7 +16,7 @@ import types

 # fitz ist via tests/conftest.py gestubbed — Pure-String-Funktionen kommen ohne aus.

-from app.protokoll_parser_nrw import (
+from app.protokoll_parsers.nrw import (
    normalize_fraktionen,
    find_results,
    resolve_drucksache_for_ueber,