gwoe-antragspruefer/tests/test_protokoll_parsers.py

"""Tests fuer app/protokoll_parsers/__init__.py — Registry + Dispatch (#126)."""
from __future__ import annotations

import pytest

from app.protokoll_parsers import (
    PROTOKOLL_PARSERS,
    parse_protocol,
    supported_bundeslaender,
)


class TestRegistry:
    def test_nrw_registered(self):
        """NRW ist die Referenz-Implementierung — muss da sein."""
        assert "NRW" in PROTOKOLL_PARSERS

    def test_supported_includes_nrw(self):
        assert "NRW" in supported_bundeslaender()

    def test_supported_returns_sorted(self):
        codes = supported_bundeslaender()
        assert codes == sorted(codes)

    def test_registry_values_are_callable(self):
        for code, parser in PROTOKOLL_PARSERS.items():
            assert callable(parser), f"Parser fuer {code} ist nicht callable"


class TestDispatch:
    def test_unknown_bl_raises_not_implemented(self):
        with pytest.raises(NotImplementedError) as exc:
            parse_protocol("XX", "/dev/null")
        msg = str(exc.value)
        assert "XX" in msg
        # Liste der unterstuetzten BL muss in der Message stehen
        assert "NRW" in msg
        # Issue-Referenz fuer Folge-Arbeit
        assert "#126" in msg

    def test_known_bl_delegates_to_registered_parser(self, tmp_path, monkeypatch):
        """parse_protocol delegiert an den BL-Parser aus der Registry."""
        called_with: list[str] = []

        def fake_parser(pdf_path: str) -> list[dict]:
            called_with.append(pdf_path)
            return [{"drucksache": "18/1", "ergebnis": "angenommen", "votes": {"ja": [], "nein": [], "enthaltung": []}}]

        # Temporaer einen TEST-Parser registrieren, dann wieder entfernen
        monkeypatch.setitem(PROTOKOLL_PARSERS, "TEST", fake_parser)

        result = parse_protocol("TEST", str(tmp_path / "x.pdf"))

        assert called_with == [str(tmp_path / "x.pdf")]
        assert len(result) == 1
        assert result[0]["drucksache"] == "18/1"


class TestParserSchema:
    """Vertrag: jeder registrierte Parser muss Result-Dicts mit minimalem
    Schema liefern — drucksache (str|None), ergebnis (str), votes (dict)."""

    def test_nrw_result_dict_has_expected_keys(self):
        """Smoke-Test mit handgemachtem Plenarprotokoll-Snippet — pruefen,
        dass das Schema des Output-Dicts die in __init__.py dokumentierten
        Keys enthaelt."""
        from app.protokoll_parsers.nrw import find_results

        text = "Damit ist der Antrag Drucksache 18/100 angenommen."
        results = find_results(text)
        assert results, "find_results sollte mindestens einen Treffer liefern"
        for r in results:
            for key in ("drucksache", "ergebnis", "kind", "einstimmig"):
                assert key in r, f"Key '{key}' fehlt im Result"
feat(#126): protokoll_parsers/-Sub-Package + Registry-Pattern + ADR 0009 Architektur-Refactor zur Vorbereitung BL-uebergreifender Parser: - app/protokoll_parser_nrw.py → app/protokoll_parsers/nrw.py - app/ingest_votes_nrw.py → app/ingest_votes.py (BL-uebergreifend) - Neue app/protokoll_parsers/__init__.py mit: - PROTOKOLL_PARSERS-Dict (BL-Code → Parser-Funktion, derzeit nur NRW) - parse_protocol(bundesland, pdf_path) als BL-uebergreifender Einstieg - supported_bundeslaender()-Helper - NotImplementedError mit hilfreicher Message bei unbekanntem BL CLI bekommt --supported-Flag fuer BL-Discovery: python -m app.ingest_votes --supported → 'NRW' ADR 0009 dokumentiert das Muster (Sub-Package + Funktions-Registry, analog zu ADR 0002 fuer ParlamentAdapter). Folge-BL bekommen je eine eigene Datei und einen Eintrag in PROTOKOLL_PARSERS — kein Refactoring der Bestands-Logik. Tests: - 7 neue Tests in test_protokoll_parsers.py fuer Registry und Dispatch - Bestehende NRW-Tests umbenannt zu test_protokoll_parsers_nrw.py, Imports angepasst — keine Verhaltens-Aenderung - Bestehende Ingest-Tests umbenannt zu test_ingest_votes.py 642 Tests gruen, kein Verhaltens-Drift. 2026-04-28 08:37:31 +02:00			`"""Tests fuer app/protokoll_parsers/__init__.py — Registry + Dispatch (#126)."""`
			`from __future__ import annotations`

			`import pytest`

			`from app.protokoll_parsers import (`
			`PROTOKOLL_PARSERS,`
			`parse_protocol,`
			`supported_bundeslaender,`
			`)`


			`class TestRegistry:`
			`def test_nrw_registered(self):`
			`"""NRW ist die Referenz-Implementierung — muss da sein."""`
			`assert "NRW" in PROTOKOLL_PARSERS`

			`def test_supported_includes_nrw(self):`
			`assert "NRW" in supported_bundeslaender()`

			`def test_supported_returns_sorted(self):`
			`codes = supported_bundeslaender()`
			`assert codes == sorted(codes)`

			`def test_registry_values_are_callable(self):`
			`for code, parser in PROTOKOLL_PARSERS.items():`
			`assert callable(parser), f"Parser fuer {code} ist nicht callable"`


			`class TestDispatch:`
			`def test_unknown_bl_raises_not_implemented(self):`
			`with pytest.raises(NotImplementedError) as exc:`
			`parse_protocol("XX", "/dev/null")`
			`msg = str(exc.value)`
			`assert "XX" in msg`
			`# Liste der unterstuetzten BL muss in der Message stehen`
			`assert "NRW" in msg`
			`# Issue-Referenz fuer Folge-Arbeit`
			`assert "#126" in msg`

			`def test_known_bl_delegates_to_registered_parser(self, tmp_path, monkeypatch):`
			`"""parse_protocol delegiert an den BL-Parser aus der Registry."""`
			`called_with: list[str] = []`

			`def fake_parser(pdf_path: str) -> list[dict]:`
			`called_with.append(pdf_path)`
			`return [{"drucksache": "18/1", "ergebnis": "angenommen", "votes": {"ja": [], "nein": [], "enthaltung": []}}]`

			`# Temporaer einen TEST-Parser registrieren, dann wieder entfernen`
			`monkeypatch.setitem(PROTOKOLL_PARSERS, "TEST", fake_parser)`

			`result = parse_protocol("TEST", str(tmp_path / "x.pdf"))`

			`assert called_with == [str(tmp_path / "x.pdf")]`
			`assert len(result) == 1`
			`assert result[0]["drucksache"] == "18/1"`


			`class TestParserSchema:`
			`"""Vertrag: jeder registrierte Parser muss Result-Dicts mit minimalem`
			`Schema liefern — drucksache (str\|None), ergebnis (str), votes (dict)."""`

			`def test_nrw_result_dict_has_expected_keys(self):`
			`"""Smoke-Test mit handgemachtem Plenarprotokoll-Snippet — pruefen,`
			`dass das Schema des Output-Dicts die in __init__.py dokumentierten`
			`Keys enthaelt."""`
			`from app.protokoll_parsers.nrw import find_results`

			`text = "Damit ist der Antrag Drucksache 18/100 angenommen."`
			`results = find_results(text)`
			`assert results, "find_results sollte mindestens einen Treffer liefern"`
			`for r in results:`
			`for key in ("drucksache", "ergebnis", "kind", "einstimmig"):`
			`assert key in r, f"Key '{key}' fehlt im Result"`