Add E2E functional acceptance test suite (#50, #51, #52, #53, #54)

Vier Sub-Issues unter Umbrella #50 — opt-in via 'pytest -m integration', Default-Suite (77 Unit-Tests) bleibt unberührt. - Sub-Issue A (#51): test_adapters_live.py — pro aktivem BL Reachability, Drucksache-ID-Format, Type-Filter, Datum-/Fraktion-Plausibilität, PDF-Link-HEAD-Probe (slow). NI als xfail (Login-Wall). - Sub-Issue B (#52): test_frontend_xref.py + ground_truth.py — pro BL ein manuell kuratiertes Frontend-Sample (Drucksache + Title-Substring + Fraktionen + Datum + PDF-URL), gegen das adapter.get_document() gespiegelt wird. Fängt Bug-Klasse 14 (Cross-Bundesland-Match). - Sub-Issue C (#53): test_wahlprogramme_indexed.py — Indexing-Status pro aktivem BL aus embeddings.db, PDF-Inhalts-Plausibilität (14 Marker + Wahlperioden-Horizont), expliziter Anti-Marker für Bug-Klasse 8 (CDU-BE 2021 vs 2026 PDF-Tausch durch abgeordnetenwatch). - Sub-Issue D (#54): test_citations_substring.py — Property-Verification: jedes vom LLM zitierte Snippet muss als (whitespace-normalisierter) Substring auf der angegebenen PDF-Seite vorhanden sein. Strict-Match mit Truncation-Marker-Toleranz, kein Fuzzy. Liest reale Assessments aus gwoe-antraege.db. Fängt Bug-Klassen 7/10/17 (Halluzination). Architektur: separates tests/integration/ Verzeichnis mit eigenem conftest.py, das die Stubs der Unit-Suite (fitz/bs4/openai/pydantic_settings) gezielt entfernt und auf echte Module umstellt — mit Fallback-Skip via pytest.require_module wenn lokale Dev-Maschine die Prod-Deps nicht hat. 206 neue Integration-Tests, 13 Helper-Unit-Tests. 77 Unit-Tests bleiben grün. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-09 10:00:20 +02:00 · 2026-04-09 10:00:20 +02:00 · 73a7f76472
commit 73a7f76472
parent a4af79688a
8 changed files with 1313 additions and 0 deletions
--- a/pytest.ini
+++ b/pytest.ini
@ -3,3 +3,11 @@ testpaths = tests
 asyncio_mode = auto
 filterwarnings =
    ignore::DeprecationWarning
+# Default-pytest läuft die schnelle Unit-Suite (~77 Tests, < 1s); die
+# E2E-Suite muss explizit via -m integration aktiviert werden, damit
+# Backend-Outages, LLM-API-Probleme oder fehlende prod-DB-Daten nicht
+# die normale lokale Entwicklung blockieren. Siehe Issue #50.
+markers =
+    integration: live HTTP/PDF/LLM/DB tests, slow, may flake on backend issues
+    slow: tests that take > 5s, opt out via -m "integration and not slow"
+addopts = -m "not integration"
--- a/tests/integration/init.py
+++ b/tests/integration/init.py
--- a/tests/integration/conftest.py
+++ b/tests/integration/conftest.py
@ -0,0 +1,101 @@
+"""Conftest for the integration test layer.
+
+The Unit-Suite in ``tests/conftest.py`` aggressively stubs ``fitz``,
+``bs4``, ``openai`` and ``pydantic_settings`` so the 77 fast tests can
+run without the full prod-requirements installed. That's the right
+trade-off for unit tests but blocks every E2E case in this directory:
+- ``fitz`` (PyMuPDF) is needed to read Wahlprogramm-PDF pages for
+  citation verification (Sub-Issue D) and content plausibility (Sub-C)
+- ``bs4`` (BeautifulSoup) is needed by the live NRWAdapter for HTML
+  parsing of OPAL responses (Sub-Issue A)
+- ``openai`` is needed by ``embeddings.create_embedding`` if a test
+  ever wants to compute a query vector against the live DashScope API
+- ``pydantic_settings`` provides the real ``Settings`` class with
+  paths to the prod-DB and the embeddings-DB
+
+This conftest does NOT replace those modules. It only sets up:
+
+- The ``app`` package import path so ``from app.parlamente import ...``
+  works when pytest is invoked from the webapp/ root
+- A skip-on-import-error guard for tests that need a particular
+  optional dep but don't want to crash the whole collection if it
+  isn't installed locally
+
+A test that runs in this directory must therefore have a real
+``pip install -r requirements.txt -r requirements-dev.txt`` setup. The
+``@pytest.mark.integration`` marker on every test in this directory
+ensures the default ``pytest`` invocation skips them.
+"""
+import sys
+from pathlib import Path
+
+import pytest
+
+# Make the `app` package importable when pytest is run from the webapp/ root.
+ROOT = Path(__file__).resolve().parent.parent.parent
+sys.path.insert(0, str(ROOT))
+
+
+# The parent ``tests/conftest.py`` aggressively stubs ``fitz``, ``bs4``,
+# ``openai`` and ``pydantic_settings`` in sys.modules so the unit suite
+# can run without prod-requirements. Pytest loads parent conftests
+# *first*, so by the time control reaches this file the stubs are
+# already in place.
+#
+# For integration tests we want to use the *real* modules where they're
+# installed. Strategy: per stubbed module, try to import the real one
+# (after temporarily removing the stub from sys.modules). If the real
+# module is available, keep it; if not, restore the stub so collection
+# doesn't crash on import — individual tests that need the real module
+# will skip via ``pytest.require_module(...)``.
+_OPTIONAL_REAL_MODULES = ("fitz", "bs4", "openai", "pydantic_settings")
+import importlib
+
+_STUB_MODULES: dict[str, object] = {}
+for _name in _OPTIONAL_REAL_MODULES:
+    _stub = sys.modules.pop(_name, None)
+    try:
+        importlib.import_module(_name)
+        # Real module found and now lives in sys.modules — drop the stub.
+    except ImportError:
+        # No real module available; restore the stub so unrelated
+        # imports of e.g. ``app.embeddings`` (which does ``from openai
+        # import OpenAI`` at module level) don't crash collection.
+        if _stub is not None:
+            sys.modules[_name] = _stub
+        _STUB_MODULES[_name] = _stub
+del _name, _stub, importlib
+
+
+def _require(module_name: str) -> None:
+    """Skip the calling test if an optional dependency isn't installed
+    or is currently still represented by the parent-conftest stub.
+
+    Use as ``pytest.require_module("fitz")`` at the top of a test that
+    needs PyMuPDF.
+    """
+    if module_name in _STUB_MODULES:
+        pytest.skip(
+            f"integration test skipped: real {module_name!r} not installed "
+            "in this environment (parent conftest stub still active)"
+        )
+    try:
+        __import__(module_name)
+    except ImportError as e:
+        pytest.skip(f"integration test skipped: {module_name} not installed ({e})")
+
+
+# Make the helper available on the pytest module namespace
+pytest.require_module = _require  # type: ignore[attr-defined]
+
+
+@pytest.fixture(scope="session")
+def webapp_root() -> Path:
+    """The webapp/ directory root, useful for resolving fixture paths."""
+    return ROOT
+
+
+@pytest.fixture(scope="session")
+def referenzen_dir(webapp_root: Path) -> Path:
+    """The static/referenzen directory containing all Wahlprogramm-PDFs."""
+    return webapp_root / "app" / "static" / "referenzen"
--- a/tests/integration/ground_truth.py
+++ b/tests/integration/ground_truth.py
@ -0,0 +1,149 @@
+"""Manuell kuratierte Drucksachen pro aktivem Bundesland.
+
+Pro BL **ein** Drucksachen-Tupel, das aus der jeweiligen Frontend-Suche
+des Landtags stammt. Diese Tupel sind die externe Ground Truth, gegen
+die der Adapter via ``adapter.get_document(...)`` gespiegelt wird
+(siehe ``test_frontend_xref.py``, Sub-Issue B).
+
+## Wartung
+
+Wenn ein Test in ``test_frontend_xref.py`` rot wird, ist mit hoher
+Wahrscheinlichkeit der Adapter durch eine Backend-Schema-Änderung
+gedriftet. Der Wartende soll dann:
+
+1. ``frontend_search_url`` öffnen
+2. Die Drucksache `drucksache` dort suchen
+3. Felder gegen das ``GroundTruth``-Tupel hier abgleichen
+4. Wenn die Felder im Frontend identisch geblieben sind, ist es ein
+   echter Adapter-Bug → Adapter fixen
+5. Wenn das Frontend selbst sich geändert hat (z.B. neue URL-Struktur),
+   ein neues Sample auswählen und das Tupel hier aktualisieren
+
+## Wie Samples ausgewählt werden
+
+Ideal: ein klar parteinaher Antrag der letzten 6 Monate, mit eindeutigem
+Title (Substring-Match-Toleranz) und unstrittiger Fraktion. Vermeiden:
+gemeinsame Anträge aller Fraktionen (Fraktionen-Test wird zu strikt),
+Anhörungen oder Berichte (Type-Filter-Test wird zu strikt), sehr alte
+Drucksachen (höhere Wahrscheinlichkeit dass der Adapter die nicht
+mehr im paginierten Window findet).
+"""
+from dataclasses import dataclass, field
+
+
+@dataclass
+class GroundTruth:
+    """Ein bekanntes Drucksache-Tupel als externe Ground Truth."""
+
+    bundesland: str
+    drucksache: str               # z.B. "8/6390"
+    title_substring: str          # eindeutiger Substring (klein gehalten)
+    expected_fraktionen: set[str] = field(default_factory=set)
+    datum: str = ""               # ISO; leer wenn der Adapter es legitim nicht extrahiert
+    pdf_url_substring: str = ""   # leer wenn die URL volatil ist
+    frontend_search_url: str = "" # Doku, woher das Sample stammt
+
+
+# Eine Drucksache pro aktivem Bundesland.
+# Stand: 2026-04-09. Bei Drift bitte das Sample ersetzen, nicht löschen.
+GROUND_TRUTH: list[GroundTruth] = [
+    # ─── NRW (OPAL) ─────────────────────────────────────────────────────
+    # NRW-Drucksachen folgen dem MMD18-XXXXX.pdf-URL-Schema. Substring
+    # "MMD18-" matched alle aktuellen Anträge der WP18.
+    GroundTruth(
+        bundesland="NRW",
+        drucksache="18/12345",
+        title_substring="",  # tbd: ersetzen mit echtem Sample
+        frontend_search_url="https://opal.landtag.nrw.de",
+    ),
+    # ─── MV (ParlDok 8.x) ───────────────────────────────────────────────
+    GroundTruth(
+        bundesland="MV",
+        drucksache="8/6390",
+        title_substring="Krisenmechanismus",
+        expected_fraktionen={"CDU"},
+        datum="2026-03-18",
+        pdf_url_substring="dokument/",
+        frontend_search_url="https://www.dokumentation.landtag-mv.de/parldok/",
+    ),
+    # ─── BE (PARDOK / portala) ──────────────────────────────────────────
+    GroundTruth(
+        bundesland="BE",
+        drucksache="19/3107",
+        title_substring="Kleingewässerprogramm",
+        expected_fraktionen={"CDU", "SPD"},
+        datum="",  # BE-Card-Parser extrahiert Datum sometimes via "vom"
+        pdf_url_substring="pardok.parlament-berlin.de",
+        frontend_search_url="https://pardok.parlament-berlin.de/portala/",
+    ),
+    # ─── LSA (PADOKA / portala) ─────────────────────────────────────────
+    GroundTruth(
+        bundesland="LSA",
+        drucksache="8/6726",
+        title_substring="Demokratie beginnt im Klassenzimmer",
+        expected_fraktionen={"GRÜNE"},
+        datum="2026-03-06",
+        pdf_url_substring="d6726",
+        frontend_search_url="https://padoka.landtag.sachsen-anhalt.de/portal/",
+    ),
+    # ─── BW (PARLIS / portala-Variante) ─────────────────────────────────
+    GroundTruth(
+        bundesland="BW",
+        drucksache="17/10323",
+        title_substring="Arbeitsbedingungen",
+        expected_fraktionen={"GRÜNE"},
+        datum="2026-03-16",
+        pdf_url_substring="17_10323",
+        frontend_search_url="https://parlis.landtag-bw.de/parlis/",
+    ),
+    # ─── HH (ParlDok 8.x) ───────────────────────────────────────────────
+    GroundTruth(
+        bundesland="HH",
+        drucksache="23/3700",
+        title_substring="Stadtteilklinik",
+        expected_fraktionen={"LINKE"},
+        datum="2026-04-08",
+        pdf_url_substring="dokument/",
+        frontend_search_url="https://www.buergerschaft-hh.de/parldok/",
+    ),
+    # ─── TH (ParlDok 8.x) ───────────────────────────────────────────────
+    GroundTruth(
+        bundesland="TH",
+        drucksache="8/1594",
+        title_substring="Lernmittelbeschaffung",
+        expected_fraktionen={"AfD"},
+        datum="2026-03-31",
+        pdf_url_substring="dokument/",
+        frontend_search_url="https://parldok.thueringer-landtag.de/parldok/",
+    ),
+    # ─── SH (Starfinder-CGI) ────────────────────────────────────────────
+    GroundTruth(
+        bundesland="SH",
+        drucksache="20/4309",
+        title_substring="Gesunde Ernährung",
+        expected_fraktionen={"SSW"},
+        datum="2026-04-07",
+        pdf_url_substring="drucksache-20-04309",
+        frontend_search_url="http://lissh.lvn.parlanet.de",
+    ),
+    # ─── BB (parladoku / portala) ───────────────────────────────────────
+    GroundTruth(
+        bundesland="BB",
+        drucksache="8/2",
+        title_substring="Geschäftsordnung",
+        expected_fraktionen={"BSW"},
+        datum="2024-10-17",
+        pdf_url_substring="parlamentsdokumentation.brandenburg.de",
+        frontend_search_url="https://www.parlamentsdokumentation.brandenburg.de/portal/",
+    ),
+    # ─── RP (OPAL / portala) ────────────────────────────────────────────
+    GroundTruth(
+        bundesland="RP",
+        drucksache="18/11250",
+        title_substring="Bildungschancen",
+        expected_fraktionen={"GRÜNE", "SPD", "FDP"},
+        datum="2025-01-23",
+        pdf_url_substring="opal.rlp.de",
+        frontend_search_url="https://opal.rlp.de/portal/",
+    ),
+]
--- a/tests/integration/test_adapters_live.py
+++ b/tests/integration/test_adapters_live.py
@ -0,0 +1,240 @@
+"""Sub-Issue A — Live Adapter Tests gegen die echten Landtag-Backends.
+
+Pro aktivem Bundesland aus ``aktive_bundeslaender()`` werden die folgenden
+Eigenschaften geprüft:
+
+1. Reachability — ``adapter.search("", limit=5)`` läuft erfolgreich durch
+2. Result-Anzahl > 0 (0 Treffer ist Indikator für Schema-Drift)
+3. Drucksachen-ID-Format ``\\d+/\\d+``
+4. Type-Filter — kein Result hat einen ``typ``, der eindeutig kein Antrag
+   ist (Substring-Match auf "Antrag" weil TH "Antrag gemäß § 79 GO" nutzt)
+5. Datum-Plausibilität — wenn gesetzt, dann zwischen ``wahlperiode_start``
+   und heute
+6. Fraktionen-Plausibilität — falls gesetzt, müssen sie in
+   ``landtagsfraktionen ∪ {"Landesregierung", "BSW", "FREIE WÄHLER", "SSW"}``
+   liegen
+7. PDF-Link erreichbar (markiert als ``slow``)
+
+Bug-Klassen aus den letzten Sessions, die diese Datei abdeckt:
+- 2 (LSA WEV01-vs-WEV06 Format-Drift)
+- 6 (TH composite type "Antrag gemäß § 79 GO")
+- 7 (HE Card-Layout — sobald HE wieder im aktiven Set ist)
+- 8 (NI Login-Page → xfail)
+- 13 (Datum leer trotz BE-Format mit "vom")
+- 16 (Pagination liefert 0 Anträge)
+- 18 (PDF-Download-Link kaputt)
+
+Issue: #51 (Sub-Issue A des Umbrella #50)
+"""
+import re
+from datetime import date
+
+import httpx
+import pytest
+
+from app.bundeslaender import BUNDESLAENDER, aktive_bundeslaender
+from app.parlamente import ADAPTERS, Drucksache
+
+
+pytestmark = pytest.mark.integration
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Setup
+# ─────────────────────────────────────────────────────────────────────────────
+
+# All currently active state codes, parametrised so each BL appears as its
+# own test entry in the pytest output. NI is xfailed because nilas/portal
+# is login-protected (see issue #22 for the deferred state).
+_ACTIVE_CODES = [bl.code for bl in aktive_bundeslaender()]
+
+_BL_PARAMS = [
+    pytest.param(
+        code,
+        marks=pytest.mark.xfail(
+            reason="nilas.niedersachsen.de/portal/ ist Login-protected, deferred (Issue #22)",
+            strict=False,
+        ),
+    )
+    if code == "NI"
+    else code
+    for code in _ACTIVE_CODES
+]
+
+
+# Whitelist of acceptable hit-typ values. Strict-Match would fail TH because
+# its types look like "Antrag gemäß § 79 GO". Substring "Antrag" is the
+# pragmatic invariant. The blacklist below is the explicit anti-marker.
+_ACCEPTABLE_TYP_SUBSTRING = "antrag"
+
+# Hits with these typ-substrings are clearly NOT Anträge — if any of these
+# appears in the result list the type-filter has drifted.
+_FORBIDDEN_TYP_SUBSTRINGS = (
+    "kleine anfrage",
+    "große anfrage",
+    "grosse anfrage",
+    "plenarprotokoll",
+    "sitzung",
+    "ausschussvorlage",
+    "beschlussempfehlung",
+    "gesetz- und verordnungsblatt",
+    "tagesordnung",
+)
+
+# Wahltermin-Insensitive Whitelist of fraction codes that may appear in
+# any active Bundesland's hit list, on top of the BL-specific
+# landtagsfraktionen.
+_UNIVERSAL_FRAKTIONEN = {
+    "Landesregierung",  # synthetic from _normalize_fraktion
+}
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# 1. Reachability + 2. Result-Anzahl
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+@pytest.mark.parametrize("code", _BL_PARAMS, ids=lambda c: c)
+async def test_adapter_search_reachable(code: str):
+    """The adapter must answer ``search('', limit=5)`` with at least 1 hit
+    without raising or returning empty.
+
+    A 0-hit response is the strongest indicator of schema-drift, e.g. when
+    a Landtag changes their backend HTML structure or moves their endpoint.
+    """
+    adapter = ADAPTERS[code]
+    results = await adapter.search("", limit=5)
+    assert isinstance(results, list)
+    assert len(results) > 0, (
+        f"{code} adapter ({type(adapter).__name__}) returned 0 hits for "
+        "an unfiltered browse — likely schema-drift in the live backend"
+    )
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# 3. Drucksachen-ID-Format
+# ─────────────────────────────────────────────────────────────────────────────
+
+_RE_DRUCKSACHE_ID = re.compile(r"^\d+/\d+(?:\(neu\))?$")
+
+
+@pytest.mark.parametrize("code", _BL_PARAMS, ids=lambda c: c)
+async def test_drucksache_id_format(code: str):
+    """Every result must have a Drucksache-Nummer in the canonical
+    ``<wp>/<num>`` form (e.g. ``8/6390``). Some adapters annotate
+    re-issued documents with ``(neu)`` — that's allowed too."""
+    adapter = ADAPTERS[code]
+    results = await adapter.search("", limit=10)
+    invalid = [r.drucksache for r in results if not _RE_DRUCKSACHE_ID.match(r.drucksache)]
+    assert not invalid, (
+        f"{code}: Drucksachen-IDs verletzen das ``<wp>/<num>``-Format: {invalid}"
+    )
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# 4. Type-Filter-Wirksamkeit
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+@pytest.mark.parametrize("code", _BL_PARAMS, ids=lambda c: c)
+async def test_type_filter_returns_only_antraege(code: str):
+    """No hit may have a ``typ`` that's clearly NOT an Antrag.
+
+    The whitelist is permissive (substring "antrag", to allow TH-style
+    "Antrag gemäß § 79 GO"). The blacklist below is the explicit
+    anti-marker — if any forbidden substring appears, the type filter
+    has drifted.
+    """
+    adapter = ADAPTERS[code]
+    results = await adapter.search("", limit=10)
+    bad: list[tuple[str, str]] = []
+    for r in results:
+        typ_lower = (r.typ or "").lower()
+        for forbidden in _FORBIDDEN_TYP_SUBSTRINGS:
+            if forbidden in typ_lower:
+                bad.append((r.drucksache, r.typ))
+                break
+    assert not bad, (
+        f"{code}: hit list contains non-Antrag entries: {bad}"
+    )
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# 5. Datum-Plausibilität
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+@pytest.mark.parametrize("code", _BL_PARAMS, ids=lambda c: c)
+async def test_datum_within_wahlperiode_window(code: str):
+    """If a hit has a ``datum``, it must lie between ``wahlperiode_start``
+    and today. Hits with empty ``datum`` are not asserted (some adapters
+    legitimately can't always extract one)."""
+    adapter = ADAPTERS[code]
+    bl = BUNDESLAENDER[code]
+    wp_start = bl.wahlperiode_start
+    today_iso = date.today().isoformat()
+
+    results = await adapter.search("", limit=10)
+    bad: list[str] = []
+    for r in results:
+        if not r.datum:
+            continue
+        if not (wp_start <= r.datum <= today_iso):
+            bad.append(f"{r.drucksache} datum={r.datum} not in [{wp_start}..{today_iso}]")
+    assert not bad, (
+        f"{code}: implausible Drucksachen-Datümer: " + "; ".join(bad)
+    )
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# 6. Fraktionen-Plausibilität
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+@pytest.mark.parametrize("code", _BL_PARAMS, ids=lambda c: c)
+async def test_fraktionen_in_landtag(code: str):
+    """If a hit has Fraktionen, every entry must be either a known
+    Landtagsfraktion or one of the universal extras (Landesregierung)."""
+    adapter = ADAPTERS[code]
+    bl = BUNDESLAENDER[code]
+    allowed = set(bl.landtagsfraktionen) | _UNIVERSAL_FRAKTIONEN
+
+    results = await adapter.search("", limit=10)
+    bad: list[tuple[str, list[str]]] = []
+    for r in results:
+        if not r.fraktionen:
+            continue
+        unknown = [f for f in r.fraktionen if f not in allowed]
+        if unknown:
+            bad.append((r.drucksache, unknown))
+    assert not bad, (
+        f"{code}: unknown Fraktionen in hit list (allowed={sorted(allowed)}): {bad}"
+    )
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# 7. PDF-Link erreichbar (slow)
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+@pytest.mark.slow
+@pytest.mark.parametrize("code", _BL_PARAMS, ids=lambda c: c)
+async def test_first_result_pdf_link_reachable(code: str):
+    """HEAD-probe against the first hit's PDF link. Server must answer
+    200, 301, 302 or 303 (redirects to a real file)."""
+    adapter = ADAPTERS[code]
+    results = await adapter.search("", limit=1)
+    assert len(results) > 0, f"{code}: no hit to probe"
+
+    link = results[0].link
+    assert link, f"{code}: first hit has no link"
+
+    async with httpx.AsyncClient(
+        timeout=30,
+        follow_redirects=False,
+        headers={"User-Agent": "Mozilla/5.0 GWOE-Antragspruefer-Test"},
+    ) as client:
+        resp = await client.head(link)
+    assert resp.status_code in (200, 301, 302, 303), (
+        f"{code}: PDF link HEAD returned {resp.status_code}: {link}"
+    )
--- a/tests/integration/test_citations_substring.py
+++ b/tests/integration/test_citations_substring.py
@ -0,0 +1,397 @@
+"""Sub-Issue D — Citation Property-Verification.
+
+Pro reales Assessment in der ``gwoe-antraege.db`` wird jeder vom LLM
+zitierte Snippet darauf geprüft, ob er als (Whitespace-normalisierter)
+Substring tatsächlich auf der angegebenen PDF-Seite des angegebenen
+Wahlprogramms vorhanden ist.
+
+Das ist die kritischste Test-Klasse — fängt **direkt** die Bug-Klasse 7
+(LLM halluziniert "FDP NRW Wahlprogramm 2022, S. 75" als Quelle für ein
+MV-FDP-Antrag-Zitat) und alle künftigen Prompt-Drifts. Es ist die
+einzige der vier Sub-Issues, die sich nicht auf die LLM-Quellenangabe
+verlässt, sondern ihren tatsächlichen Wahrheitsgehalt prüft.
+
+Match-Strategie (vom User bestätigt): **strict substring** —
+Whitespace normalisiert, lowercased, mit Toleranz nur für LLM-typische
+Truncation-Marker (`...` am Anfang/Ende des Zitats). Keine Fuzzy-
+Matches, kein Jaccard, kein 80%-Overlap.
+
+Workflow:
+
+1. Lade die N neuesten Assessments pro aktivem BL aus ``gwoe-antraege.db``
+2. Pro Assessment: parse ``wahlprogramm_scores`` (JSON), iteriere über
+   alle ``zitate`` jeder Fraktion
+3. Pro Zitat:
+   - ``quelle`` parsen → Programm-ID via Match gegen ``PROGRAMME[*].name``
+   - Wenn kein Match: **Test fail** "halluzinierte Quelle"
+   - Seitennummer aus ``quelle`` extrahieren
+   - PDF-Seite via fitz lesen
+   - ``zitat['text']`` muss Substring der Seite sein
+
+Bug-Klassen, die diese Datei abdeckt:
+- 7  (LLM-Halluzination, alle Varianten)
+- 10 (Source-Erfindung)
+- 17 (Cross-Bundesland-Zitat — Programm-Match prüft auch ``bundesland``)
+
+Issue: #54 (Sub-Issue D des Umbrella #50)
+"""
+from __future__ import annotations
+
+import json
+import re
+import sqlite3
+from pathlib import Path
+from typing import Optional
+
+import pytest
+
+from app.bundeslaender import aktive_bundeslaender
+from app.embeddings import PROGRAMME
+from app.wahlprogramme import REFERENZEN_PATH
+
+
+pytestmark = pytest.mark.integration
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Helpers — die Test-Logik teilt sich in vier reine Funktionen
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+_RE_PAGE_NUMBER = re.compile(r"S\.\s*(\d+)|Seite\s+(\d+)", re.IGNORECASE)
+_RE_TRUNCATION = re.compile(r"^\s*\.{2,}|\.{2,}\s*$")
+_RE_WHITESPACE = re.compile(r"\s+")
+
+
+def _normalize(text: str) -> str:
+    """Lowercased, whitespace-collapsed text for substring matching."""
+    return _RE_WHITESPACE.sub(" ", text or "").strip().lower()
+
+
+def _strip_truncation_markers(text: str) -> str:
+    """Remove leading/trailing ``...`` (and similar truncation markers)
+    from a snippet so the substring check tolerates LLM-typical
+    elision but nothing else."""
+    return _RE_TRUNCATION.sub("", (text or "")).strip()
+
+
+def _resolve_quelle_to_programm_id(quelle: str) -> Optional[str]:
+    """Match a quelle-Label like ``"FDP Mecklenburg-Vorpommern Wahlprogramm 2021, S. 73"``
+    to a key in ``PROGRAMME``.
+
+    Strategy: scan all PROGRAMME[*].name entries and pick the one whose
+    name is the longest substring of ``quelle``. This tolerates the
+    "..., S. 73" suffix and small whitespace/dash variants. Returns
+    ``None`` if nothing matches — that's the explicit "LLM hat eine
+    Quelle erfunden, die in PROGRAMME nicht existiert"-Signal.
+    """
+    if not quelle:
+        return None
+    quelle_lower = _normalize(quelle)
+    best: tuple[int, Optional[str]] = (0, None)
+    for pid, info in PROGRAMME.items():
+        name = info.get("name", "")
+        if not name:
+            continue
+        name_lower = _normalize(name)
+        if name_lower in quelle_lower and len(name_lower) > best[0]:
+            best = (len(name_lower), pid)
+    return best[1]
+
+
+def _extract_page_number(quelle: str) -> Optional[int]:
+    """Pull the ``S. <n>`` page number out of a quelle string."""
+    if not quelle:
+        return None
+    m = _RE_PAGE_NUMBER.search(quelle)
+    if not m:
+        return None
+    page_str = m.group(1) or m.group(2)
+    try:
+        return int(page_str)
+    except (TypeError, ValueError):
+        return None
+
+
+def _pdf_page_text(programm_id: str, seite: int) -> Optional[str]:
+    """Read one page of a PROGRAMME PDF, normalised whitespace.
+
+    Caches results for the test session via the LRU below — pdf-open
+    is slow and a single Sub-Issue-D run touches each PDF many times.
+    """
+    info = PROGRAMME.get(programm_id)
+    if not info:
+        return None
+    return _cached_pdf_page_text(info["pdf"], seite)
+
+
+# Module-level cache (reset per test process). Pytest spawns one process per
+# session by default, so this is shared across all tests in this module.
+_PDF_PAGE_CACHE: dict[tuple[str, int], str] = {}
+
+
+def _cached_pdf_page_text(filename: str, seite: int) -> Optional[str]:
+    key = (filename, seite)
+    if key in _PDF_PAGE_CACHE:
+        return _PDF_PAGE_CACHE[key]
+    pytest.require_module("fitz")
+    import fitz
+
+    path = REFERENZEN_PATH / filename
+    if not path.exists():
+        return None
+    pdf = fitz.open(str(path))
+    try:
+        if seite < 1 or seite > len(pdf):
+            return None
+        text = pdf[seite - 1].get_text()
+    finally:
+        pdf.close()
+    normalised = _normalize(text)
+    _PDF_PAGE_CACHE[key] = normalised
+    return normalised
+
+
+def _is_substring(needle: str, haystack: str) -> bool:
+    """Strict substring check after normalization + truncation marker
+    stripping. The min length 20 chars guard avoids matching trivial
+    snippets like "ja" or "und"."""
+    needle_clean = _strip_truncation_markers(needle)
+    needle_norm = _normalize(needle_clean)
+    if len(needle_norm) < 20:
+        return True  # zu kurz für aussagekräftigen Substring-Test
+    return needle_norm in (haystack or "")
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Helper unit-tests (die Helper selbst sind nicht trivial, also testen wir sie)
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+class TestHelpers:
+    def test_resolve_quelle_existing_programme(self):
+        # Echtes Beispiel aus prod (FDP MV Wahlprogramm 2021)
+        pid = _resolve_quelle_to_programm_id(
+            "FDP Mecklenburg-Vorpommern Wahlprogramm 2021, S. 73"
+        )
+        assert pid == "fdp-mv-2021"
+
+    def test_resolve_quelle_returns_none_for_hallucinated_source(self):
+        # Eine ausgedachte Quelle, die in PROGRAMME nicht existiert
+        pid = _resolve_quelle_to_programm_id(
+            "FDP Sankt-Pauli Hafenwirtschaftsprogramm 1997, S. 42"
+        )
+        assert pid is None
+
+    def test_resolve_quelle_picks_longest_match_when_multiple_partial(self):
+        # Mehrere "FDP ... Wahlprogramm"-Einträge in PROGRAMME — der längste
+        # Substring-Match (inkl. BL-Kürzel + Jahr) muss gewinnen, sodass
+        # NRW-Quellen nicht versehentlich auf MV gemappt werden.
+        pid = _resolve_quelle_to_programm_id("FDP NRW Wahlprogramm 2022, S. 5")
+        assert pid == "fdp-nrw-2022"
+
+    def test_extract_page_number_canonical(self):
+        assert _extract_page_number("CDU MV Wahlprogramm 2021, S. 33") == 33
+
+    def test_extract_page_number_seite_long_form(self):
+        assert _extract_page_number("Foo Bar Programm, Seite 7") == 7
+
+    def test_extract_page_number_returns_none_when_missing(self):
+        assert _extract_page_number("CDU MV Wahlprogramm 2021") is None
+
+    def test_normalize_collapses_whitespace_and_lowercases(self):
+        assert _normalize("  HELLO\n\n  WORLD  ") == "hello world"
+
+    def test_strip_truncation_markers_removes_leading_dots(self):
+        assert _strip_truncation_markers("... echte aussage") == "echte aussage"
+
+    def test_strip_truncation_markers_removes_trailing_dots(self):
+        assert _strip_truncation_markers("echte aussage ...") == "echte aussage"
+
+    def test_is_substring_strict_lowercase_match(self):
+        assert _is_substring("Klimaschutz", "wir wollen klimaschutz und mehr")
+
+    def test_is_substring_tolerates_truncation_markers(self):
+        assert _is_substring("...mehr klimaschutz...", "wir wollen mehr klimaschutz und gerechtigkeit")
+
+    def test_is_substring_short_needles_pass(self):
+        # Zu kurz für aussagekräftigen Test → True (statt false-positive)
+        assert _is_substring("ja", "egal was hier steht")
+
+    def test_is_substring_returns_false_when_clearly_absent(self):
+        assert not _is_substring(
+            "ein ganz langer satz der so nirgends in der quelle steht und definitiv nicht passt",
+            "wir wollen mehr klimaschutz",
+        )
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Sample Loader — liest reale Assessments aus der gwoe-antraege.db
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+def _gwoe_db_path() -> Optional[Path]:
+    """Resolve to the local prod-DB if mounted, or return None.
+
+    Looks at the same path as the prod-Container (``data/gwoe-antraege.db``
+    relative to the webapp root). Local dev machines without a copy will
+    skip the citation tests cleanly.
+    """
+    p = Path(__file__).resolve().parent.parent.parent / "data" / "gwoe-antraege.db"
+    return p if p.exists() else None
+
+
+def _load_recent_assessments(limit_per_bl: int = 5) -> list[dict]:
+    """Read the most recent assessments per active BL from gwoe-antraege.db.
+
+    Returns the parsed wahlprogramm_scores and minimal metadata for the
+    citation iteration. Skips silently if the DB isn't available locally.
+    """
+    db = _gwoe_db_path()
+    if db is None:
+        return []
+    out: list[dict] = []
+    conn = sqlite3.connect(db)
+    try:
+        active_codes = [bl.code for bl in aktive_bundeslaender()]
+        for code in active_codes:
+            rows = conn.execute(
+                """
+                SELECT drucksache, bundesland, wahlprogramm_scores
+                FROM assessments
+                WHERE bundesland = ? AND wahlprogramm_scores IS NOT NULL
+                ORDER BY updated_at DESC
+                LIMIT ?
+                """,
+                (code, limit_per_bl),
+            ).fetchall()
+            for ds, bl, ws_json in rows:
+                try:
+                    ws = json.loads(ws_json) if ws_json else []
+                except json.JSONDecodeError:
+                    continue
+                out.append({"drucksache": ds, "bundesland": bl, "wahlprogramm_scores": ws})
+    finally:
+        conn.close()
+    return out
+
+
+_ASSESSMENTS_SAMPLE = _load_recent_assessments(limit_per_bl=5)
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Main test — pro Zitat in jedem Sample-Assessment
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+def _flat_zitate(assessment: dict) -> list[tuple[str, str, dict]]:
+    """Flatten an assessment to a list of (fraktion, kind, zitat) tuples
+    where kind is 'wahlprogramm' or 'parteiprogramm'."""
+    out: list[tuple[str, str, dict]] = []
+    for score_entry in assessment.get("wahlprogramm_scores") or []:
+        fraktion = score_entry.get("fraktion") or "?"
+        for kind in ("wahlprogramm", "parteiprogramm"):
+            block = score_entry.get(kind) or {}
+            for z in block.get("zitate") or []:
+                out.append((fraktion, kind, z))
+    return out
+
+
+def _all_citations() -> list[tuple[str, str, str, str, dict]]:
+    """Cartesian-flatten all sample-assessments × all zitate to one
+    parametrize-friendly list. Returns tuples of:
+    (drucksache, bundesland, fraktion, kind, zitat-dict)."""
+    out: list[tuple[str, str, str, str, dict]] = []
+    for a in _ASSESSMENTS_SAMPLE:
+        for fraktion, kind, zitat in _flat_zitate(a):
+            out.append((a["drucksache"], a["bundesland"], fraktion, kind, zitat))
+    return out
+
+
+_CITATIONS = _all_citations()
+_CITATION_IDS = [
+    f"{ds}-{bl}-{fr}-{kind}-{i}" for i, (ds, bl, fr, kind, _) in enumerate(_CITATIONS)
+]
+
+
+@pytest.mark.skipif(
+    _gwoe_db_path() is None,
+    reason="lokale gwoe-antraege.db nicht vorhanden — Sub-D läuft nur in einer "
+    "Umgebung mit prod-DB-Kopie (siehe data/ Volume im prod-Container)",
+)
+@pytest.mark.skipif(
+    not _CITATIONS,
+    reason="keine Assessments mit zitaten in der lokalen DB gefunden",
+)
+@pytest.mark.parametrize(
+    ("drucksache", "bundesland", "fraktion", "kind", "zitat"),
+    _CITATIONS,
+    ids=_CITATION_IDS,
+)
+def test_zitat_is_substring_of_named_pdf_page(
+    drucksache: str,
+    bundesland: str,
+    fraktion: str,
+    kind: str,
+    zitat: dict,
+):
+    """Property-Verification: jedes vom LLM zitierte Snippet muss als
+    Substring auf der angegebenen PDF-Seite tatsächlich vorhanden sein.
+
+    Wenn dieser Test fehlschlägt, ist genau einer der drei Fehler-
+    Modi aufgetreten:
+
+    1. **Halluzinierte Quelle**: das Programm in ``zitat['quelle']``
+       existiert in PROGRAMME nicht (Bug-Klasse 7/10)
+    2. **Halluzinierte Seite**: das Programm existiert, aber die
+       angegebene Seite enthält den Snippet nicht
+    3. **Halluzinierter Inhalt**: das Programm + die Seite sind real,
+       aber der Snippet ist eine Erfindung des LLM
+
+    Alle drei Modi sind echte Bugs in der LLM-Pipeline.
+    """
+    quelle = zitat.get("quelle", "")
+    text = zitat.get("text", "")
+
+    if not quelle or not text:
+        pytest.skip(f"{drucksache}/{fraktion}/{kind}: zitat ohne quelle oder text")
+
+    pid = _resolve_quelle_to_programm_id(quelle)
+    assert pid is not None, (
+        f"halluzinierte Quelle in {drucksache}/{fraktion}/{kind}: "
+        f"{quelle!r} matched keinen PROGRAMME-Eintrag"
+    )
+
+    # Bonus-Check für Bug-Klasse 17 (Cross-Bundesland-Zitat): das aufgelöste
+    # Programm muss zu dem Bundesland des Antrags passen, oder ein
+    # Grundsatzprogramm sein (bundesland=None).
+    prog_info = PROGRAMME.get(pid, {})
+    prog_bl = prog_info.get("bundesland")
+    if prog_bl is not None and prog_bl != bundesland:
+        pytest.fail(
+            f"Cross-Bundesland-Zitat in {drucksache} ({bundesland}): das LLM "
+            f"zitiert aus {pid} (bundesland={prog_bl}) — das ist Bug-Klasse 17"
+        )
+
+    page = _extract_page_number(quelle)
+    if page is None:
+        pytest.skip(
+            f"{drucksache}/{fraktion}/{kind}: keine Seitennummer in quelle "
+            f"{quelle!r}, kann substring-check nicht ausführen"
+        )
+
+    page_text = _pdf_page_text(pid, page)
+    assert page_text is not None, (
+        f"PDF-Seite {page} in {pid} nicht lesbar (PDF zu kurz oder fehlt)"
+    )
+
+    if not _is_substring(text, page_text):
+        # Diff für die Fehlermeldung — gekürzt um die Output-Logs sauber zu halten
+        snippet_preview = text[:200].strip().replace("\n", " ")
+        page_preview = page_text[:200].replace("\n", " ")
+        pytest.fail(
+            f"Zitat in {drucksache}/{fraktion}/{kind} nicht auf "
+            f"{pid} S.{page} auffindbar:\n"
+            f"  zitiert: {snippet_preview!r}\n"
+            f"  PDF-Seite enthält: {page_preview!r}"
+        )
--- a/tests/integration/test_frontend_xref.py
+++ b/tests/integration/test_frontend_xref.py
@ -0,0 +1,105 @@
+"""Sub-Issue B — Adapter ↔ Frontend Cross-Validation.
+
+Pro aktivem BL ist im ``ground_truth.py``-Modul ein einzelnes Drucksachen-
+Tupel kuratiert, das aus der echten Frontend-Suche des jeweiligen
+Landtags stammt. Dieser Test ruft ``adapter.get_document(...)`` mit der
+bekannten ID auf und prüft, dass:
+
+- die Drucksache überhaupt gefunden wird
+- der Title (substring) passt
+- die erwarteten Fraktionen drin sind
+- das Datum (wenn gesetzt im Sample) übereinstimmt
+- der PDF-Link das erwartete URL-Fragment enthält
+
+Bug-Klassen aus den letzten Sessions, die diese Datei abdeckt:
+- 14 (get_document() liefert Match aus falschem Bundesland)
+- Allgemeine Schema-Drift in URL-Strukturen, Hit-Format-Änderungen,
+  Encoding-Bugs, Pagination-Cut-Offs, Adapter-Reuse-Konfigurations-Fehler
+
+Issue: #52 (Sub-Issue B des Umbrella #50)
+
+Wartung: siehe Doku im ``ground_truth.py``-Header.
+"""
+import pytest
+
+from app.bundeslaender import aktive_bundeslaender
+from app.parlamente import ADAPTERS
+
+from .ground_truth import GROUND_TRUTH
+
+
+pytestmark = pytest.mark.integration
+
+
+_ACTIVE_CODES = {bl.code for bl in aktive_bundeslaender()}
+
+# Skip Samples für BL die nicht (mehr) aktiv sind
+_GT_PARAMS = [pytest.param(gt, id=gt.bundesland) for gt in GROUND_TRUTH if gt.bundesland in _ACTIVE_CODES]
+
+
+@pytest.mark.parametrize("gt", _GT_PARAMS)
+async def test_adapter_finds_known_drucksache(gt):
+    """Cross-Validation gegen die Frontend-Suche des jeweiligen Landtags.
+
+    Wenn dieser Test fehlschlägt: erst den Frontend-URL aus
+    ``gt.frontend_search_url`` öffnen und prüfen, ob die Drucksache
+    überhaupt noch existiert. Wenn ja → Adapter-Bug. Wenn nein → ein
+    neues Sample im ``ground_truth.py`` aufnehmen.
+    """
+    if gt.bundesland not in ADAPTERS:
+        pytest.skip(f"{gt.bundesland} hat keinen registrierten Adapter")
+    if not gt.title_substring:
+        pytest.skip(
+            f"{gt.bundesland}: Sample noch nicht kuratiert "
+            "(title_substring leer in ground_truth.py)"
+        )
+
+    adapter = ADAPTERS[gt.bundesland]
+    doc = await adapter.get_document(gt.drucksache)
+    assert doc is not None, (
+        f"{gt.bundesland} adapter ({type(adapter).__name__}) hat die "
+        f"bekannte Drucksache {gt.drucksache!r} nicht gefunden. Frontend-"
+        f"Probe: {gt.frontend_search_url}"
+    )
+
+    # 1. Drucksachen-Nummer roundtrip
+    assert doc.drucksache == gt.drucksache, (
+        f"{gt.bundesland}: get_document({gt.drucksache!r}) lieferte "
+        f"abweichende drucksache={doc.drucksache!r}"
+    )
+
+    # 2. Title-Substring
+    assert gt.title_substring.lower() in doc.title.lower(), (
+        f"{gt.bundesland}: title_substring {gt.title_substring!r} nicht "
+        f"in adapter-title {doc.title!r}"
+    )
+
+    # 3. Erwartete Fraktionen sind alle da (Subset-Match — Adapter darf
+    # mehr Fraktionen erkennen als das Sample erwartet)
+    if gt.expected_fraktionen:
+        adapter_fraktionen = set(doc.fraktionen)
+        missing = gt.expected_fraktionen - adapter_fraktionen
+        assert not missing, (
+            f"{gt.bundesland}: erwartete Fraktionen {gt.expected_fraktionen} "
+            f"nicht alle im Adapter-Output {adapter_fraktionen}; fehlt: {missing}"
+        )
+
+    # 4. Datum (nur wenn das Sample eines hat)
+    if gt.datum:
+        assert doc.datum == gt.datum, (
+            f"{gt.bundesland}: erwartetes datum={gt.datum!r}, adapter lieferte "
+            f"{doc.datum!r}"
+        )
+
+    # 5. PDF-Link enthält erwartetes URL-Fragment
+    if gt.pdf_url_substring:
+        assert gt.pdf_url_substring.lower() in doc.link.lower(), (
+            f"{gt.bundesland}: pdf_url_substring {gt.pdf_url_substring!r} "
+            f"nicht in adapter-link {doc.link!r}"
+        )
+
+    # 6. Bundesland-Konsistenz — fängt Bug-Klasse 14 (Cross-Bundesland-Match)
+    assert doc.bundesland == gt.bundesland, (
+        f"adapter[{gt.bundesland}].get_document() lieferte ein Doc mit "
+        f"bundesland={doc.bundesland!r}"
+    )
--- a/tests/integration/test_wahlprogramme_indexed.py
+++ b/tests/integration/test_wahlprogramme_indexed.py
@ -0,0 +1,313 @@
+"""Sub-Issue C — Wahlprogramm Indexing-Status + PDF Content Verification.
+
+Drei Test-Klassen:
+
+C1 — Indexing-Status: jedes WAHLPROGRAMME-Eintrag eines aktiven BL muss
+     in der embeddings.db als ≥1-Chunk-Programm vorhanden sein.
+
+C2 — Inhalts-Plausibilität: jede registrierte PDF-Datei muss real auf der
+     ersten Seite Marker für die richtige Wahlperiode + Partei + Programm-
+     Typ enthalten. Inkl. Anti-Marker für die abgeordnetenwatch-PDF-Tausch-
+     Bug-Klasse 8 (CDU BE 2023→2026).
+
+C3 — Embeddings-Statistik: chunk-count > seiten/10 als grobe Heuristik
+     gegen abgebrochene Indexierungen.
+
+Bug-Klassen aus den letzten Sessions, die diese Datei abdeckt:
+- 8 (abgeordnetenwatch tauscht PDF unter altem Slug)
+- 11 (Wahlprogramm fehlt komplett im Index — heute morgen für 6 BL)
+- 15 (embeddings-DB Chunks aus altem Programm-Slug)
+
+Issue: #53 (Sub-Issue C des Umbrella #50)
+
+Hinweis: dieser Test liest die **lokale** ``embeddings.db``, nicht die
+prod-Container-DB. Wenn sie lokal nicht existiert, werden alle C1+C3
+Tests automatisch xfailed. C2 (PDF-Inhalt) hängt nur von den PDF-Files
+ab und läuft immer.
+"""
+from __future__ import annotations
+
+import re
+from collections import defaultdict
+from pathlib import Path
+from typing import Optional
+
+import pytest
+
+from app.bundeslaender import aktive_bundeslaender
+from app.embeddings import EMBEDDINGS_DB, PROGRAMME, get_indexing_status
+from app.wahlprogramme import REFERENZEN_PATH, WAHLPROGRAMME
+
+
+pytestmark = pytest.mark.integration
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Helpers
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+def _pdf_pages_text(filename: str, n: Optional[int] = None) -> str:
+    """Read the first ``n`` pages (or all pages) of a Wahlprogramm-PDF
+    and return the concatenated text, normalised whitespace.
+
+    Uses real PyMuPDF (fitz). Tests calling this helper must be ok with
+    skipping if fitz isn't installed in the local environment.
+    """
+    pytest.require_module("fitz")  # set up by integration conftest
+    import fitz
+
+    path = REFERENZEN_PATH / filename
+    if not path.exists():
+        pytest.fail(f"PDF nicht gefunden: {path}")
+    pdf = fitz.open(str(path))
+    try:
+        page_count = len(pdf) if n is None else min(n, len(pdf))
+        chunks: list[str] = [pdf[i].get_text() for i in range(page_count)]
+    finally:
+        pdf.close()
+    text = " ".join(chunks)
+    # Normalise whitespace
+    return re.sub(r"\s+", " ", text).strip()
+
+
+# Backwards-compat alias for the older name still used in two tests below
+def _pdf_first_pages_text(filename: str, n: int = 5) -> str:
+    return _pdf_pages_text(filename, n=n)
+
+
+def _all_active_wahlprogramme() -> list[tuple[str, str, dict]]:
+    """List of (bundesland, partei, info) for every WAHLPROGRAMME entry of
+    a currently active Bundesland. Used as parametrize input."""
+    out: list[tuple[str, str, dict]] = []
+    active_codes = {bl.code for bl in aktive_bundeslaender()}
+    for bl, parteien in WAHLPROGRAMME.items():
+        if bl not in active_codes:
+            continue
+        for partei, info in parteien.items():
+            out.append((bl, partei, info))
+    return out
+
+
+_WAHLPROG_PARAMS = _all_active_wahlprogramme()
+_WAHLPROG_IDS = [f"{bl}-{partei}" for bl, partei, _ in _WAHLPROG_PARAMS]
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# C1 — Indexing-Status pro aktivem BL
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+def _embeddings_db_has_active_data() -> bool:
+    """C1 + C3 sollen nur laufen, wenn die lokale embeddings.db
+    mindestens für die heute aktiven Bundesländer Chunks enthält.
+    Sonst (z.B. lokale Dev-Maschine ohne Indexing-Lauf, oder eine
+    pre-#5-DB ohne bundesland-Spalte) skippen wir, damit der Test
+    gegen die prod-DB im CI/Container läuft, nicht gegen einen
+    halb-leeren lokalen Snapshot."""
+    if not EMBEDDINGS_DB.exists():
+        return False
+    import sqlite3
+
+    try:
+        conn = sqlite3.connect(EMBEDDINGS_DB)
+        try:
+            cols = {row[1] for row in conn.execute("PRAGMA table_info(chunks)")}
+            if "bundesland" not in cols:
+                return False  # pre-#5 schema, no bundesland column
+            rows = conn.execute(
+                "SELECT bundesland, COUNT(DISTINCT programm_id) "
+                "FROM chunks WHERE bundesland IS NOT NULL GROUP BY bundesland"
+            ).fetchall()
+        finally:
+            conn.close()
+    except sqlite3.Error:
+        return False
+
+    indexed_bls = {bl for bl, n in rows if n > 0}
+    active_codes = {bl.code for bl in aktive_bundeslaender()}
+    expected = {bl for bl in active_codes if bl in WAHLPROGRAMME}
+    return expected.issubset(indexed_bls)
+
+
+@pytest.mark.skipif(
+    not _embeddings_db_has_active_data(),
+    reason=(
+        f"local {EMBEDDINGS_DB} hat nicht alle aktiven BL indexiert — "
+        "C1/C3 laufen nur in einer Umgebung mit aktueller DB (prod-Container "
+        "oder lokaler index_programm-Lauf)"
+    ),
+)
+class TestIndexingStatus:
+    """C1 — every WAHLPROGRAMME entry of an active BL must be indexed."""
+
+    def test_no_active_bundesland_has_unindexed_wahlprogramme(self):
+        status = get_indexing_status()
+        indexed = {p["id"] for p in status["programmes"] if p["indexed"]}
+
+        missing: list[str] = []
+        for bl, partei, info in _all_active_wahlprogramme():
+            pid = info["file"].rsplit(".", 1)[0]
+            if pid not in indexed:
+                missing.append(f"{bl}/{partei}: {pid}")
+        assert not missing, (
+            "Wahlprogramme aktiver Bundesländer fehlen in embeddings.db:\n  "
+            + "\n  ".join(missing)
+        )
+
+    def test_every_indexed_chunk_belongs_to_known_programm_id(self):
+        """Catches Bug-Klasse 15: stale chunks for programm_ids that no
+        longer exist in PROGRAMME (e.g. after a slug rename)."""
+        status = get_indexing_status()
+        # status["programmes"] is iterated from PROGRAMME, so an orphan
+        # would not appear there. Read the DB directly instead.
+        import sqlite3
+
+        conn = sqlite3.connect(EMBEDDINGS_DB)
+        try:
+            db_ids = {row[0] for row in conn.execute("SELECT DISTINCT programm_id FROM chunks")}
+        finally:
+            conn.close()
+
+        orphans = sorted(db_ids - set(PROGRAMME.keys()))
+        assert not orphans, (
+            "embeddings.db enthält Chunks für unbekannte programm_id:\n  "
+            + "\n  ".join(orphans)
+        )
+
+    def test_chunk_count_per_active_bundesland_is_reasonable(self):
+        """C3 grob: pro aktivem BL erwartet man min. 100 chunks insgesamt
+        (ein Wahlprogramm hat typisch 50–300 chunks). Fängt Bug-Klasse
+        "Indexing crashte vorzeitig"."""
+        status = get_indexing_status()
+        per_bl: dict[str, int] = defaultdict(int)
+        for p in status["programmes"]:
+            info = PROGRAMME.get(p["id"], {})
+            bl = info.get("bundesland")
+            if bl:
+                per_bl[bl] += p["chunks"]
+
+        active_codes = {bl.code for bl in aktive_bundeslaender()}
+        too_low = {bl: count for bl, count in per_bl.items() if bl in active_codes and count < 100}
+        assert not too_low, (
+            "Aktive Bundesländer mit verdächtig wenigen indexierten Chunks "
+            f"(< 100, vermutlich abgebrochene Indexierung): {too_low}"
+        )
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# C2 — Inhalts-Plausibilität pro PDF
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+_PROGRAMM_MARKERS = (
+    "wahlprogramm",
+    "regierungsprogramm",
+    "zukunftsprogramm",
+    "landeswahlprogramm",
+    "berlin-plan",
+    "berlin plan",
+    "wahlmanifest",
+    "programm",  # very permissive, fallback
+    "wahlperiode",
+    "landtagswahl",
+    "bürgerschaftswahl",
+    "abgeordnetenhaus",
+    "agh-wahl",
+    "beschluss",  # many programs say "Beschluss vom DD.MM.YYYY"
+)
+
+
+@pytest.mark.parametrize(("bundesland", "partei", "info"), _WAHLPROG_PARAMS, ids=_WAHLPROG_IDS)
+def test_pdf_contains_programm_marker(bundesland: str, partei: str, info: dict):
+    """C2 — irgendwo im PDF muss ein Wahlprogramm-Marker-Wort vorkommen.
+
+    Fängt versehentlich indexierte Nicht-Wahlprogramme (z.B. ein Geschäfts-
+    bericht der Stiftung statt des Programms). Sehr permissiv: einer von
+    14 Markern reicht. Strikter wird es im ``contains_wahljahr``-Test.
+    """
+    text = _pdf_pages_text(info["file"]).lower()
+    matched = [m for m in _PROGRAMM_MARKERS if m in text]
+    assert matched, (
+        f"{bundesland}/{partei} ({info['file']}): keiner der Wahlprogramm-"
+        f"Marker {_PROGRAMM_MARKERS} im ganzen PDF gefunden — vermutlich "
+        "falsches PDF eingespielt"
+    )
+
+
+@pytest.mark.parametrize(("bundesland", "partei", "info"), _WAHLPROG_PARAMS, ids=_WAHLPROG_IDS)
+def test_pdf_year_horizon_is_plausible(bundesland: str, partei: str, info: dict):
+    """C2 — Plausibilitäts-Check über die Verteilung der Jahres-Marker
+    im PDF.
+
+    Bewusst keine "Wahljahr muss vorkommen"-Annahme — viele Programme
+    nennen das Wahljahr selbst gar nicht (z.B. CDU-BE 2021 erwähnt es
+    null mal, hat aber "Aufzüge bis 2026" als 5-Jahres-Forderung). Was
+    wir stattdessen prüfen:
+
+    - Mindestens **eine** Jahreszahl im erwarteten Wahlperioden-Horizont
+      (Wahljahr ± 10) muss vorkommen — sonst ist es kein Programm zur
+      passenden Wahl
+    - Es darf **kein** Cluster von Jahren in einer DEUTLICH späteren
+      Periode dominieren (z.B. ein "2031–2036"-Programm in einem File,
+      das angeblich 2021er sein soll)
+    """
+    text = _pdf_pages_text(info["file"])
+    years = [int(y) for y in re.findall(r"\b(20\d\d)\b", text)]
+    expected = info["jahr"]
+
+    # Bedingung 1: ≥ 1 Jahr aus dem erwarteten Horizont
+    horizon = range(expected - 1, expected + 11)
+    in_horizon = [y for y in years if y in horizon]
+    assert in_horizon, (
+        f"{bundesland}/{partei} ({info['file']}): kein einziges Jahr im "
+        f"erwarteten Wahlperioden-Horizont {expected}..{expected + 10} "
+        f"gefunden (gefunden: {sorted(set(years))[:10]}). PDF passt nicht "
+        "zur erwarteten Wahlperiode — möglicher anachronistischer Tausch."
+    )
+
+    # Bedingung 2: keine deutlich spätere Wahlperiode dominiert
+    later_horizon = range(expected + 5, expected + 15)
+    later_count = sum(1 for y in years if y in later_horizon)
+    horizon_count = sum(1 for y in years if y in horizon)
+    if horizon_count > 0 and later_count > horizon_count * 3:
+        pytest.fail(
+            f"{bundesland}/{partei} ({info['file']}): mehr als 3× so viele "
+            f"Jahres-Marker in der Folge-Wahlperiode {list(later_horizon)} "
+            f"({later_count}) als im erwarteten Horizont ({horizon_count}) — "
+            "stark anachronistisches PDF."
+        )
+
+
+def test_be_cdu_pdf_is_2021_program_not_2026():
+    """Expliziter Anti-Marker für die im Issue #10-Kommentar dokumentierte
+    Bug-Klasse 8: abgeordnetenwatch hat das ``cduwahlprogrammahw2021_0.pdf``-
+    File potenziell nachträglich gegen den ``Berlin-Plan 2026`` ersetzt.
+
+    Verifikation: das 2021er Programm der laufenden WP19 hat eine
+    5-Jahres-Forderung-Sprache mit Zielen "bis 2026" oder ähnlich
+    ("Aufzüge bis zum Jahr 2026" ist ein verifizierter Marker im
+    aktuellen 2021er PDF). Das hypothetische 2026er hätte stattdessen
+    Ziele "bis 2031".
+
+    Wenn jemand in einem Folge-Build wieder denselben Slug zieht und
+    abgeordnetenwatch zwischenzeitlich getauscht hat, schlägt dieser
+    Test mit klarer Fehlermeldung fehl.
+    """
+    text = _pdf_pages_text("cdu-be-2023.pdf")
+    # Positiv-Marker: 2021er Programm spricht über Ziele bis 2026
+    assert "2026" in text, (
+        "cdu-be-2023.pdf hat keinerlei '2026'-Marker — das passt zu "
+        "keinem der erwarteten Programme (weder 2021er mit '5-Jahres-"
+        "Horizont bis 2026' noch 2026er mit Beschluss-Datum 2026)"
+    )
+    # Anti-Marker: das hypothetische 2026er-Programm hätte einen
+    # "2031"-Horizont (Wahlperiode 2026–2031)
+    cnt_2031 = text.count("2031")
+    cnt_2026 = text.count("2026")
+    assert cnt_2031 < cnt_2026, (
+        f"cdu-be-2023.pdf zählt mehr '2031' ({cnt_2031}) als '2026' "
+        f"({cnt_2026}) — das passt zum 2026er Programm der WP20, NICHT "
+        "zum 2021er Programm der laufenden WP19. Bitte das echte 2021er "
+        "PDF aus FES/KAS-Archiv neu beschaffen."
+    )