"""Tests for embeddings.py prompt formatting. Reproduces the LLM-Halluzinations-Bug from the 2026-04-08 session (commits 1b5fd96 + bc7f4a6): the original ``format_quotes_for_prompt`` rendered each chunk as ``- S. X: "text"`` without any reference to the programme name. As a result the LLM hallucinated familiar source labels ("FDP NRW Wahlprogramm 2022") for chunks that actually came from MV/BE, because that was the strongest training-set prior for budget-policy citations. Fix: prepend the fully-qualified PROGRAMME[programm_id]["name"] to each quote. """ import sys import types # Stub openai before importing embeddings, since the test environment may # not have it installed and we don't actually need to make API calls. if "openai" not in sys.modules: openai_stub = types.ModuleType("openai") openai_stub.OpenAI = lambda **kw: None sys.modules["openai"] = openai_stub from app.embeddings import _chunk_source_label, format_quotes_for_prompt # ───────────────────────────────────────────────────────────────────────────── # _chunk_source_label — fully-qualified programme name + page # ───────────────────────────────────────────────────────────────────────────── class TestChunkSourceLabel: def test_known_programme_id(self): chunk = {"programm_id": "fdp-mv-2021", "seite": 73, "text": "..."} label = _chunk_source_label(chunk) assert "FDP Mecklenburg-Vorpommern" in label assert "S. 73" in label def test_known_programme_id_for_be(self): chunk = {"programm_id": "spd-be-2023", "seite": 24, "text": "..."} label = _chunk_source_label(chunk) assert "SPD Berlin" in label assert "2021" in label # the BE-2023.pdf files contain 2021er programmes assert "S. 24" in label def test_unknown_programme_id_falls_back_to_id(self): chunk = {"programm_id": "fake-xx-9999", "seite": 1, "text": "..."} label = _chunk_source_label(chunk) # Should not crash, should at least include the id and the page assert "fake-xx-9999" in label assert "S. 1" in label def test_missing_seite_uses_questionmark(self): chunk = {"programm_id": "cdu-mv-2021", "text": "..."} label = _chunk_source_label(chunk) assert "?" in label # ───────────────────────────────────────────────────────────────────────────── # format_quotes_for_prompt — every chunk must carry programme identification # ───────────────────────────────────────────────────────────────────────────── EXAMPLE_QUOTES = { "FDP": { "wahlprogramm": [ { "programm_id": "fdp-mv-2021", "partei": "FDP", "typ": "wahlprogramm", "seite": 73, "text": "Die Grundsätze von Wirtschaftlichkeit und Sparsamkeit", "similarity": 0.63, }, ], "parteiprogramm": [ { "programm_id": "fdp-grundsatz", "partei": "FDP", "typ": "parteiprogramm", "seite": 93, "text": "Liberale Marktwirtschaft erfordert solide Haushalte", "similarity": 0.60, }, ], }, "SPD": { "wahlprogramm": [ { "programm_id": "spd-mv-2021", "partei": "SPD", "typ": "wahlprogramm", "seite": 22, "text": "Verkehrswende weg vom motorisierten Individualverkehr", "similarity": 0.58, }, ], }, } class TestFormatQuotesForPrompt: def test_empty_input_returns_empty_string(self): assert format_quotes_for_prompt({}) == "" def test_renders_party_headings(self): out = format_quotes_for_prompt(EXAMPLE_QUOTES) assert "### FDP" in out assert "### SPD" in out def test_every_chunk_has_programme_name(self): """Regression: pre-fix this used "S. X:" only, no programme name — the LLM then hallucinated NRW-2022 sources from training data.""" out = format_quotes_for_prompt(EXAMPLE_QUOTES) # Each of the three chunks must reference its source programme assert "FDP Mecklenburg-Vorpommern" in out assert "FDP Grundsatzprogramm" in out assert "SPD Mecklenburg-Vorpommern" in out def test_contains_strict_citation_instruction(self): """The prompt header must explicitly forbid hallucinated sources.""" out = format_quotes_for_prompt(EXAMPLE_QUOTES) assert "ausschließlich" in out.lower() or "verbatim" in out.lower() or "wörtlich" in out.lower() def test_no_nrw_2022_appears_unless_chunks_are_actually_nrw(self): """Sanity: a pure MV+SPD chunk set must not mention NRW anywhere.""" out = format_quotes_for_prompt(EXAMPLE_QUOTES) assert "NRW" not in out assert "Nordrhein-Westfalen" not in out def test_renders_separate_blocks_for_wahl_and_parteiprogramm(self): out = format_quotes_for_prompt(EXAMPLE_QUOTES) assert "**Wahlprogramm:**" in out assert "**Grundsatzprogramm:**" in out def test_text_truncated_at_500_chars(self): long_chunk = { "FDP": { "wahlprogramm": [ { "programm_id": "fdp-mv-2021", "seite": 1, "text": "A" * 1000, # 1000 chars → should be truncated "similarity": 0.7, } ], } } out = format_quotes_for_prompt(long_chunk) # Truncation marker assert "..." in out # Original chunk text 1000 chars not present in full assert "A" * 1000 not in out