gwoe-antragspruefer/tests/test_embeddings.py

"""Tests for embeddings.py prompt formatting.

Reproduces the LLM-Halluzinations-Bug from the 2026-04-08 session
(commits 1b5fd96 + bc7f4a6): the original ``format_quotes_for_prompt``
rendered each chunk as ``- S. X: "text"`` without any reference to the
programme name. As a result the LLM hallucinated familiar source labels
("FDP NRW Wahlprogramm 2022") for chunks that actually came from MV/BE,
because that was the strongest training-set prior for budget-policy
citations.

Fix: prepend the fully-qualified PROGRAMME[programm_id]["name"] to each
quote.
"""
import sys
import types

# Stub openai before importing embeddings, since the test environment may
# not have it installed and we don't actually need to make API calls.
if "openai" not in sys.modules:
    openai_stub = types.ModuleType("openai")
    openai_stub.OpenAI = lambda **kw: None
    sys.modules["openai"] = openai_stub

from app import embeddings as embeddings_mod
from app.embeddings import (
    _chunk_source_label,
    format_quotes_for_prompt,
    get_relevant_quotes_for_antrag,
)


# ─────────────────────────────────────────────────────────────────────────────
# _chunk_source_label — fully-qualified programme name + page
# ─────────────────────────────────────────────────────────────────────────────

class TestChunkSourceLabel:
    def test_known_programme_id(self):
        chunk = {"programm_id": "fdp-mv-2021", "seite": 73, "text": "..."}
        label = _chunk_source_label(chunk)
        assert "FDP Mecklenburg-Vorpommern" in label
        assert "S. 73" in label

    def test_known_programme_id_for_be(self):
        chunk = {"programm_id": "spd-be-2023", "seite": 24, "text": "..."}
        label = _chunk_source_label(chunk)
        assert "SPD Berlin" in label
        assert "2021" in label  # the BE-2023.pdf files contain 2021er programmes
        assert "S. 24" in label

    def test_unknown_programme_id_falls_back_to_id(self):
        chunk = {"programm_id": "fake-xx-9999", "seite": 1, "text": "..."}
        label = _chunk_source_label(chunk)
        # Should not crash, should at least include the id and the page
        assert "fake-xx-9999" in label
        assert "S. 1" in label

    def test_missing_seite_uses_questionmark(self):
        chunk = {"programm_id": "cdu-mv-2021", "text": "..."}
        label = _chunk_source_label(chunk)
        assert "?" in label


# ─────────────────────────────────────────────────────────────────────────────
# format_quotes_for_prompt — every chunk must carry programme identification
# ─────────────────────────────────────────────────────────────────────────────

EXAMPLE_QUOTES = {
    "FDP": {
        "wahlprogramm": [
            {
                "programm_id": "fdp-mv-2021",
                "partei": "FDP",
                "typ": "wahlprogramm",
                "seite": 73,
                "text": "Die Grundsätze von Wirtschaftlichkeit und Sparsamkeit",
                "similarity": 0.63,
            },
        ],
        "parteiprogramm": [
            {
                "programm_id": "fdp-grundsatz",
                "partei": "FDP",
                "typ": "parteiprogramm",
                "seite": 93,
                "text": "Liberale Marktwirtschaft erfordert solide Haushalte",
                "similarity": 0.60,
            },
        ],
    },
    "SPD": {
        "wahlprogramm": [
            {
                "programm_id": "spd-mv-2021",
                "partei": "SPD",
                "typ": "wahlprogramm",
                "seite": 22,
                "text": "Verkehrswende weg vom motorisierten Individualverkehr",
                "similarity": 0.58,
            },
        ],
    },
}


class TestFormatQuotesForPrompt:
    def test_empty_input_returns_empty_string(self):
        assert format_quotes_for_prompt({}) == ""

    def test_renders_party_headings(self):
        out = format_quotes_for_prompt(EXAMPLE_QUOTES)
        assert "### FDP" in out
        assert "### SPD" in out

    def test_every_chunk_has_programme_name(self):
        """Regression: pre-fix this used "S. X:" only, no programme name —
        the LLM then hallucinated NRW-2022 sources from training data."""
        out = format_quotes_for_prompt(EXAMPLE_QUOTES)
        # Each of the three chunks must reference its source programme
        assert "FDP Mecklenburg-Vorpommern" in out
        assert "FDP Grundsatzprogramm" in out
        assert "SPD Mecklenburg-Vorpommern" in out

    def test_contains_strict_citation_instruction(self):
        """The prompt header must explicitly forbid hallucinated sources."""
        out = format_quotes_for_prompt(EXAMPLE_QUOTES)
        assert "ausschließlich" in out.lower() or "verbatim" in out.lower() or "wörtlich" in out.lower()

    def test_no_nrw_2022_appears_unless_chunks_are_actually_nrw(self):
        """Sanity: a pure MV+SPD chunk set must not mention NRW anywhere."""
        out = format_quotes_for_prompt(EXAMPLE_QUOTES)
        assert "NRW" not in out
        assert "Nordrhein-Westfalen" not in out

    def test_renders_separate_blocks_for_wahl_and_parteiprogramm(self):
        out = format_quotes_for_prompt(EXAMPLE_QUOTES)
        assert "**Wahlprogramm:**" in out
        assert "**Grundsatzprogramm:**" in out

    def test_get_relevant_quotes_for_antrag_populates_results(self, monkeypatch):
        """Regression for the partei_upper NameError (Phase B / #55 / eb045d0):

        The dict-write line still referenced ``partei_upper`` after the
        rest of the function had been renamed to ``partei_lookup``. The
        result was that ``get_relevant_quotes_for_antrag`` raised
        ``NameError`` on every call, was silently swallowed by the
        ``except Exception`` in ``analyzer.run_analysis``, and silently
        downgraded *every* assessment to keyword search — which then
        caused the LLM hallucinations tracked in #60.

        Test strategy: monkeypatch ``find_relevant_chunks`` so we don't
        need real embeddings, then call the wrapper and assert it
        actually returns a populated dict instead of crashing.
        """
        def fake_find_relevant_chunks(query, parteien=None, typ=None,
                                      bundesland=None, top_k=3,
                                      min_similarity=0.5):
            return [{
                "programm_id": "gruene-nrw-2022",
                "partei": parteien[0] if parteien else "GRÜNE",
                "typ": typ or "wahlprogramm",
                "seite": 58,
                "text": "Wahlalter ab 16",
                "similarity": 0.7,
            }]

        monkeypatch.setattr(embeddings_mod, "find_relevant_chunks",
                            fake_find_relevant_chunks)

        result = get_relevant_quotes_for_antrag(
            antrag_text="Wahlalter ab 16",
            fraktionen=["GRÜNE"],
            bundesland="NRW",
            top_k_per_partei=2,
        )
        assert result, "Expected a non-empty result dict, got empty"
        # The keys are canonical party names; either GRÜNE itself or
        # whatever the canonical mapper returns for it.
        assert any("GR" in k.upper() for k in result.keys())
        # And the structure must be the {wahlprogramm, parteiprogramm} dict
        first = next(iter(result.values()))
        assert "wahlprogramm" in first
        assert "parteiprogramm" in first

    def test_text_truncated_at_500_chars(self):
        long_chunk = {
            "FDP": {
                "wahlprogramm": [
                    {
                        "programm_id": "fdp-mv-2021",
                        "seite": 1,
                        "text": "A" * 1000,  # 1000 chars → should be truncated
                        "similarity": 0.7,
                    }
                ],
            }
        }
        out = format_quotes_for_prompt(long_chunk)
        # Truncation marker
        assert "..." in out
        # Original chunk text 1000 chars not present in full
        assert "A" * 1000 not in out
Add pytest suite + fix two regex bugs uncovered by it (#46) Erste Tests für die Codebase. 77 Tests, 0.08s Laufzeit, decken die drei Bug-Klassen aus der April-2026-Adapter-Session ab plus haben schon zwei weitere Bugs in Production-Code aufgedeckt. ## Setup - requirements-dev.txt mit pytest + pytest-asyncio - pytest.ini mit asyncio_mode=auto - tests/conftest.py stubbt fitz/bs4/openai/pydantic_settings, damit die Suite ohne den vollen prod-requirements-Satz läuft (pure unit tests, kein PDF-Parsing, kein HTTP) ## Tests - tests/test_parlamente.py (33 Tests) * PortalaAdapter._parse_hit_list_cards: doctype/doctype_full NameError-Regression aus 1cb030a, plus Title/Drucksache/Fraktion- /Datum/PDF-Extraktion gegen ein BE-Card-Fixture * PortalaAdapter._parse_hit_list_dump: gegen ein LSA-Perl-Dump- Fixture inkl. Hex-Escape-Decoding (\x{fc} → ü) * PortalaAdapter._parse_hit_list_html: Auto-Detection zwischen Card- und Dump-Format * PortalaAdapter._normalize_fraktion: kanonische Fraktion-Codes inkl. F.D.P.-mit-Punkten, BÜNDNIS 90, DIE LINKE, BSW * ParLDokAdapter._hit_to_drucksache: JSON-Hit → Drucksache Mapping inkl. /navpanes-Stripping, MdL-mit-Partei-in-Klammern, Landesregierung-Detection * ParLDokAdapter._fulltext_id: bundle.js-mirroring (deferred, aber dokumentiert) * ADAPTERS-Registry-Sanity - tests/test_embeddings.py (11 Tests) * _chunk_source_label: Programm-Name + Seite (Halluzinations- Bug-Regression aus 1b5fd96) * format_quotes_for_prompt: jeder Chunk muss Programm-Name enthalten, strict-citation-Hinweis muss im Output sein, keine NRW-Halluzinationen für MV/BE-Chunk-Sets - tests/test_wahlprogramme.py (14 Tests) * Registry-Struktur (jahr int, seiten int, .pdf-Endung) * File-Existenz: jede registrierte PDF muss in static/referenzen/ liegen — würde Tippfehler in den 22 indexierten Programmen sofort fangen * embeddings.PROGRAMME-Konsistenz-Cross-Check - tests/test_bundeslaender.py (15 Tests) * Sanity über 16-State-Registry * #48-Klassifikations-Regression: TH=ParlDok, HB=StarWeb, SN=Eigensystem * Wahltermine plausibel (zwischen 2026 und 2035) - tests/test_analyzer.py (4 Tests) * Markdown-Codeblock-Stripping aus dem JSON-Retry-Loop ## Bug-Funde während der Test-Schreibphase Zwei Production-Bugs in den _normalize_fraktion-Helfern wurden durch die neuen Tests sofort aufgedeckt und im selben Commit gefixt: 1. PortalaAdapter._normalize_fraktion matched "F.D.P." (mit Punkten, wie historische SH/HB-Drucksachen) nicht — Regex \bFDP\b ist zu strikt. Fix: \bF\.?\sD\.?\sP\.?\b analog zu ParLDokAdapter. 2. ParLDokAdapter._normalize_fraktion (auch PortalaAdapter) matched "Ministerium der Finanzen" nicht als Landesregierung, weil \bMINISTER\b die Wortgrenze auch nach MINISTER verlangt — bei MINISTERIUM steht aber IUM danach, keine Wortgrenze. Fix: \bMINISTER ohne abschließendes \b. Beide Bugs hätten Fraktion-Felder bei Drucksachen der Bremischen Bürgerschaft (FDP-Listen) und bei Landesregierungs-Drucksachen in MV/LSA fälschlich leer gelassen — exakt der "fraktionen=[]"- Befund aus dem MV-Smoke-Test in #4. Phase 0 aus Roadmap-Issue #49. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> 2026-04-08 23:26:06 +02:00			`"""Tests for embeddings.py prompt formatting.`

			`Reproduces the LLM-Halluzinations-Bug from the 2026-04-08 session`
			(commits 1b5fd96 + bc7f4a6): the original ``format_quotes_for_prompt``
			rendered each chunk as ``- S. X: "text"`` without any reference to the
			`programme name. As a result the LLM hallucinated familiar source labels`
			`("FDP NRW Wahlprogramm 2022") for chunks that actually came from MV/BE,`
			`because that was the strongest training-set prior for budget-policy`
			`citations.`

			`Fix: prepend the fully-qualified PROGRAMME[programm_id]["name"] to each`
			`quote.`
			`"""`
			`import sys`
			`import types`

			`# Stub openai before importing embeddings, since the test environment may`
			`# not have it installed and we don't actually need to make API calls.`
			`if "openai" not in sys.modules:`
			`openai_stub = types.ModuleType("openai")`
			`openai_stub.OpenAI = lambda **kw: None`
			`sys.modules["openai"] = openai_stub`

Fix #60: NameError in get_relevant_quotes_for_antrag (Phase B refactor leftover) Root cause: der #55-Refactor (eb045d0) hat in get_relevant_quotes_for_antrag ``partei_upper`` zu ``partei_lookup`` umbenannt — aber die Dict-Write-Zeile ``results[partei_upper] = ...`` wurde übersehen. Bei jedem Aufruf knallt seither ein NameError, der in analyzer.py vom breiten ``except Exception`` verschluckt und still auf die Keyword-Fallback-Suche umgeleitet wird. Konsequenz: 100% der Assessments seit eb045d0 (inkl. autonomer Roadmap-Run #59) liefen ohne Embedding-Retrieval — daher die LLM-Halluzinationen aus #60. Fix: - embeddings.py:528: partei_upper → partei_lookup - analyzer.py:249: NameError/AttributeError/TypeError/KeyError nicht mehr schlucken. Programmierfehler im Embedding-Pfad sollen hart fehlschlagen, damit die nächste Refactor-Regression nicht wieder 24h still degradiert läuft. Echte Network-/API-Exceptions fallen weiterhin auf den Keyword-Pfad zurück. - tests/test_embeddings.py: Regression-Test, der get_relevant_quotes_for_antrag mit gemockten chunks aufruft und sicherstellt, dass die Funktion nicht crasht und ein populiertes Result liefert. Hätte den Bug bei eb045d0 sofort gefangen. Refs: #60, #55, #59 2026-04-09 21:57:56 +02:00			`from app import embeddings as embeddings_mod`
			`from app.embeddings import (`
			`_chunk_source_label,`
			`format_quotes_for_prompt,`
			`get_relevant_quotes_for_antrag,`
			`)`
Add pytest suite + fix two regex bugs uncovered by it (#46) Erste Tests für die Codebase. 77 Tests, 0.08s Laufzeit, decken die drei Bug-Klassen aus der April-2026-Adapter-Session ab plus haben schon zwei weitere Bugs in Production-Code aufgedeckt. ## Setup - requirements-dev.txt mit pytest + pytest-asyncio - pytest.ini mit asyncio_mode=auto - tests/conftest.py stubbt fitz/bs4/openai/pydantic_settings, damit die Suite ohne den vollen prod-requirements-Satz läuft (pure unit tests, kein PDF-Parsing, kein HTTP) ## Tests - tests/test_parlamente.py (33 Tests) * PortalaAdapter._parse_hit_list_cards: doctype/doctype_full NameError-Regression aus 1cb030a, plus Title/Drucksache/Fraktion- /Datum/PDF-Extraktion gegen ein BE-Card-Fixture * PortalaAdapter._parse_hit_list_dump: gegen ein LSA-Perl-Dump- Fixture inkl. Hex-Escape-Decoding (\x{fc} → ü) * PortalaAdapter._parse_hit_list_html: Auto-Detection zwischen Card- und Dump-Format * PortalaAdapter._normalize_fraktion: kanonische Fraktion-Codes inkl. F.D.P.-mit-Punkten, BÜNDNIS 90, DIE LINKE, BSW * ParLDokAdapter._hit_to_drucksache: JSON-Hit → Drucksache Mapping inkl. /navpanes-Stripping, MdL-mit-Partei-in-Klammern, Landesregierung-Detection * ParLDokAdapter._fulltext_id: bundle.js-mirroring (deferred, aber dokumentiert) * ADAPTERS-Registry-Sanity - tests/test_embeddings.py (11 Tests) * _chunk_source_label: Programm-Name + Seite (Halluzinations- Bug-Regression aus 1b5fd96) * format_quotes_for_prompt: jeder Chunk muss Programm-Name enthalten, strict-citation-Hinweis muss im Output sein, keine NRW-Halluzinationen für MV/BE-Chunk-Sets - tests/test_wahlprogramme.py (14 Tests) * Registry-Struktur (jahr int, seiten int, .pdf-Endung) * File-Existenz: jede registrierte PDF muss in static/referenzen/ liegen — würde Tippfehler in den 22 indexierten Programmen sofort fangen * embeddings.PROGRAMME-Konsistenz-Cross-Check - tests/test_bundeslaender.py (15 Tests) * Sanity über 16-State-Registry * #48-Klassifikations-Regression: TH=ParlDok, HB=StarWeb, SN=Eigensystem * Wahltermine plausibel (zwischen 2026 und 2035) - tests/test_analyzer.py (4 Tests) * Markdown-Codeblock-Stripping aus dem JSON-Retry-Loop ## Bug-Funde während der Test-Schreibphase Zwei Production-Bugs in den _normalize_fraktion-Helfern wurden durch die neuen Tests sofort aufgedeckt und im selben Commit gefixt: 1. PortalaAdapter._normalize_fraktion matched "F.D.P." (mit Punkten, wie historische SH/HB-Drucksachen) nicht — Regex \bFDP\b ist zu strikt. Fix: \bF\.?\sD\.?\sP\.?\b analog zu ParLDokAdapter. 2. ParLDokAdapter._normalize_fraktion (auch PortalaAdapter) matched "Ministerium der Finanzen" nicht als Landesregierung, weil \bMINISTER\b die Wortgrenze auch nach MINISTER verlangt — bei MINISTERIUM steht aber IUM danach, keine Wortgrenze. Fix: \bMINISTER ohne abschließendes \b. Beide Bugs hätten Fraktion-Felder bei Drucksachen der Bremischen Bürgerschaft (FDP-Listen) und bei Landesregierungs-Drucksachen in MV/LSA fälschlich leer gelassen — exakt der "fraktionen=[]"- Befund aus dem MV-Smoke-Test in #4. Phase 0 aus Roadmap-Issue #49. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> 2026-04-08 23:26:06 +02:00

			`# ─────────────────────────────────────────────────────────────────────────────`
			`# _chunk_source_label — fully-qualified programme name + page`
			`# ─────────────────────────────────────────────────────────────────────────────`

			`class TestChunkSourceLabel:`
			`def test_known_programme_id(self):`
			`chunk = {"programm_id": "fdp-mv-2021", "seite": 73, "text": "..."}`
			`label = _chunk_source_label(chunk)`
			`assert "FDP Mecklenburg-Vorpommern" in label`
			`assert "S. 73" in label`

			`def test_known_programme_id_for_be(self):`
			`chunk = {"programm_id": "spd-be-2023", "seite": 24, "text": "..."}`
			`label = _chunk_source_label(chunk)`
			`assert "SPD Berlin" in label`
			`assert "2021" in label # the BE-2023.pdf files contain 2021er programmes`
			`assert "S. 24" in label`

			`def test_unknown_programme_id_falls_back_to_id(self):`
			`chunk = {"programm_id": "fake-xx-9999", "seite": 1, "text": "..."}`
			`label = _chunk_source_label(chunk)`
			`# Should not crash, should at least include the id and the page`
			`assert "fake-xx-9999" in label`
			`assert "S. 1" in label`

			`def test_missing_seite_uses_questionmark(self):`
			`chunk = {"programm_id": "cdu-mv-2021", "text": "..."}`
			`label = _chunk_source_label(chunk)`
			`assert "?" in label`


			`# ─────────────────────────────────────────────────────────────────────────────`
			`# format_quotes_for_prompt — every chunk must carry programme identification`
			`# ─────────────────────────────────────────────────────────────────────────────`

			`EXAMPLE_QUOTES = {`
			`"FDP": {`
			`"wahlprogramm": [`
			`{`
			`"programm_id": "fdp-mv-2021",`
			`"partei": "FDP",`
			`"typ": "wahlprogramm",`
			`"seite": 73,`
			`"text": "Die Grundsätze von Wirtschaftlichkeit und Sparsamkeit",`
			`"similarity": 0.63,`
			`},`
			`],`
			`"parteiprogramm": [`
			`{`
			`"programm_id": "fdp-grundsatz",`
			`"partei": "FDP",`
			`"typ": "parteiprogramm",`
			`"seite": 93,`
			`"text": "Liberale Marktwirtschaft erfordert solide Haushalte",`
			`"similarity": 0.60,`
			`},`
			`],`
			`},`
			`"SPD": {`
			`"wahlprogramm": [`
			`{`
			`"programm_id": "spd-mv-2021",`
			`"partei": "SPD",`
			`"typ": "wahlprogramm",`
			`"seite": 22,`
			`"text": "Verkehrswende weg vom motorisierten Individualverkehr",`
			`"similarity": 0.58,`
			`},`
			`],`
			`},`
			`}`


			`class TestFormatQuotesForPrompt:`
			`def test_empty_input_returns_empty_string(self):`
			`assert format_quotes_for_prompt({}) == ""`

			`def test_renders_party_headings(self):`
			`out = format_quotes_for_prompt(EXAMPLE_QUOTES)`
			`assert "### FDP" in out`
			`assert "### SPD" in out`

			`def test_every_chunk_has_programme_name(self):`
			`"""Regression: pre-fix this used "S. X:" only, no programme name —`
			`the LLM then hallucinated NRW-2022 sources from training data."""`
			`out = format_quotes_for_prompt(EXAMPLE_QUOTES)`
			`# Each of the three chunks must reference its source programme`
			`assert "FDP Mecklenburg-Vorpommern" in out`
			`assert "FDP Grundsatzprogramm" in out`
			`assert "SPD Mecklenburg-Vorpommern" in out`

			`def test_contains_strict_citation_instruction(self):`
			`"""The prompt header must explicitly forbid hallucinated sources."""`
			`out = format_quotes_for_prompt(EXAMPLE_QUOTES)`
			`assert "ausschließlich" in out.lower() or "verbatim" in out.lower() or "wörtlich" in out.lower()`

			`def test_no_nrw_2022_appears_unless_chunks_are_actually_nrw(self):`
			`"""Sanity: a pure MV+SPD chunk set must not mention NRW anywhere."""`
			`out = format_quotes_for_prompt(EXAMPLE_QUOTES)`
			`assert "NRW" not in out`
			`assert "Nordrhein-Westfalen" not in out`

			`def test_renders_separate_blocks_for_wahl_and_parteiprogramm(self):`
			`out = format_quotes_for_prompt(EXAMPLE_QUOTES)`
			`assert "Wahlprogramm:" in out`
			`assert "Grundsatzprogramm:" in out`

Fix #60: NameError in get_relevant_quotes_for_antrag (Phase B refactor leftover) Root cause: der #55-Refactor (eb045d0) hat in get_relevant_quotes_for_antrag ``partei_upper`` zu ``partei_lookup`` umbenannt — aber die Dict-Write-Zeile ``results[partei_upper] = ...`` wurde übersehen. Bei jedem Aufruf knallt seither ein NameError, der in analyzer.py vom breiten ``except Exception`` verschluckt und still auf die Keyword-Fallback-Suche umgeleitet wird. Konsequenz: 100% der Assessments seit eb045d0 (inkl. autonomer Roadmap-Run #59) liefen ohne Embedding-Retrieval — daher die LLM-Halluzinationen aus #60. Fix: - embeddings.py:528: partei_upper → partei_lookup - analyzer.py:249: NameError/AttributeError/TypeError/KeyError nicht mehr schlucken. Programmierfehler im Embedding-Pfad sollen hart fehlschlagen, damit die nächste Refactor-Regression nicht wieder 24h still degradiert läuft. Echte Network-/API-Exceptions fallen weiterhin auf den Keyword-Pfad zurück. - tests/test_embeddings.py: Regression-Test, der get_relevant_quotes_for_antrag mit gemockten chunks aufruft und sicherstellt, dass die Funktion nicht crasht und ein populiertes Result liefert. Hätte den Bug bei eb045d0 sofort gefangen. Refs: #60, #55, #59 2026-04-09 21:57:56 +02:00			`def test_get_relevant_quotes_for_antrag_populates_results(self, monkeypatch):`
			`"""Regression for the partei_upper NameError (Phase B / #55 / eb045d0):`

			The dict-write line still referenced ``partei_upper`` after the
			rest of the function had been renamed to ``partei_lookup``. The
			result was that ``get_relevant_quotes_for_antrag`` raised
			``NameError`` on every call, was silently swallowed by the
			``except Exception`` in ``analyzer.run_analysis``, and silently
			`downgraded every assessment to keyword search — which then`
			`caused the LLM hallucinations tracked in #60.`

			Test strategy: monkeypatch ``find_relevant_chunks`` so we don't
			`need real embeddings, then call the wrapper and assert it`
			`actually returns a populated dict instead of crashing.`
			`"""`
			`def fake_find_relevant_chunks(query, parteien=None, typ=None,`
			`bundesland=None, top_k=3,`
			`min_similarity=0.5):`
			`return [{`
			`"programm_id": "gruene-nrw-2022",`
			`"partei": parteien[0] if parteien else "GRÜNE",`
			`"typ": typ or "wahlprogramm",`
			`"seite": 58,`
			`"text": "Wahlalter ab 16",`
			`"similarity": 0.7,`
			`}]`

			`monkeypatch.setattr(embeddings_mod, "find_relevant_chunks",`
			`fake_find_relevant_chunks)`

			`result = get_relevant_quotes_for_antrag(`
			`antrag_text="Wahlalter ab 16",`
			`fraktionen=["GRÜNE"],`
			`bundesland="NRW",`
			`top_k_per_partei=2,`
			`)`
			`assert result, "Expected a non-empty result dict, got empty"`
			`# The keys are canonical party names; either GRÜNE itself or`
			`# whatever the canonical mapper returns for it.`
			`assert any("GR" in k.upper() for k in result.keys())`
			`# And the structure must be the {wahlprogramm, parteiprogramm} dict`
			`first = next(iter(result.values()))`
			`assert "wahlprogramm" in first`
			`assert "parteiprogramm" in first`

Add pytest suite + fix two regex bugs uncovered by it (#46) Erste Tests für die Codebase. 77 Tests, 0.08s Laufzeit, decken die drei Bug-Klassen aus der April-2026-Adapter-Session ab plus haben schon zwei weitere Bugs in Production-Code aufgedeckt. ## Setup - requirements-dev.txt mit pytest + pytest-asyncio - pytest.ini mit asyncio_mode=auto - tests/conftest.py stubbt fitz/bs4/openai/pydantic_settings, damit die Suite ohne den vollen prod-requirements-Satz läuft (pure unit tests, kein PDF-Parsing, kein HTTP) ## Tests - tests/test_parlamente.py (33 Tests) * PortalaAdapter._parse_hit_list_cards: doctype/doctype_full NameError-Regression aus 1cb030a, plus Title/Drucksache/Fraktion- /Datum/PDF-Extraktion gegen ein BE-Card-Fixture * PortalaAdapter._parse_hit_list_dump: gegen ein LSA-Perl-Dump- Fixture inkl. Hex-Escape-Decoding (\x{fc} → ü) * PortalaAdapter._parse_hit_list_html: Auto-Detection zwischen Card- und Dump-Format * PortalaAdapter._normalize_fraktion: kanonische Fraktion-Codes inkl. F.D.P.-mit-Punkten, BÜNDNIS 90, DIE LINKE, BSW * ParLDokAdapter._hit_to_drucksache: JSON-Hit → Drucksache Mapping inkl. /navpanes-Stripping, MdL-mit-Partei-in-Klammern, Landesregierung-Detection * ParLDokAdapter._fulltext_id: bundle.js-mirroring (deferred, aber dokumentiert) * ADAPTERS-Registry-Sanity - tests/test_embeddings.py (11 Tests) * _chunk_source_label: Programm-Name + Seite (Halluzinations- Bug-Regression aus 1b5fd96) * format_quotes_for_prompt: jeder Chunk muss Programm-Name enthalten, strict-citation-Hinweis muss im Output sein, keine NRW-Halluzinationen für MV/BE-Chunk-Sets - tests/test_wahlprogramme.py (14 Tests) * Registry-Struktur (jahr int, seiten int, .pdf-Endung) * File-Existenz: jede registrierte PDF muss in static/referenzen/ liegen — würde Tippfehler in den 22 indexierten Programmen sofort fangen * embeddings.PROGRAMME-Konsistenz-Cross-Check - tests/test_bundeslaender.py (15 Tests) * Sanity über 16-State-Registry * #48-Klassifikations-Regression: TH=ParlDok, HB=StarWeb, SN=Eigensystem * Wahltermine plausibel (zwischen 2026 und 2035) - tests/test_analyzer.py (4 Tests) * Markdown-Codeblock-Stripping aus dem JSON-Retry-Loop ## Bug-Funde während der Test-Schreibphase Zwei Production-Bugs in den _normalize_fraktion-Helfern wurden durch die neuen Tests sofort aufgedeckt und im selben Commit gefixt: 1. PortalaAdapter._normalize_fraktion matched "F.D.P." (mit Punkten, wie historische SH/HB-Drucksachen) nicht — Regex \bFDP\b ist zu strikt. Fix: \bF\.?\sD\.?\sP\.?\b analog zu ParLDokAdapter. 2. ParLDokAdapter._normalize_fraktion (auch PortalaAdapter) matched "Ministerium der Finanzen" nicht als Landesregierung, weil \bMINISTER\b die Wortgrenze auch nach MINISTER verlangt — bei MINISTERIUM steht aber IUM danach, keine Wortgrenze. Fix: \bMINISTER ohne abschließendes \b. Beide Bugs hätten Fraktion-Felder bei Drucksachen der Bremischen Bürgerschaft (FDP-Listen) und bei Landesregierungs-Drucksachen in MV/LSA fälschlich leer gelassen — exakt der "fraktionen=[]"- Befund aus dem MV-Smoke-Test in #4. Phase 0 aus Roadmap-Issue #49. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> 2026-04-08 23:26:06 +02:00			`def test_text_truncated_at_500_chars(self):`
			`long_chunk = {`
			`"FDP": {`
			`"wahlprogramm": [`
			`{`
			`"programm_id": "fdp-mv-2021",`
			`"seite": 1,`
			`"text": "A" * 1000, # 1000 chars → should be truncated`
			`"similarity": 0.7,`
			`}`
			`],`
			`}`
			`}`
			`out = format_quotes_for_prompt(long_chunk)`
			`# Truncation marker`
			`assert "..." in out`
			`# Original chunk text 1000 chars not present in full`
			`assert "A" * 1000 not in out`