Add pytest suite + fix two regex bugs uncovered by it (#46)
Erste Tests für die Codebase. 77 Tests, 0.08s Laufzeit, decken die
drei Bug-Klassen aus der April-2026-Adapter-Session ab plus haben
schon zwei weitere Bugs in Production-Code aufgedeckt.
## Setup
- requirements-dev.txt mit pytest + pytest-asyncio
- pytest.ini mit asyncio_mode=auto
- tests/conftest.py stubbt fitz/bs4/openai/pydantic_settings, damit
die Suite ohne den vollen prod-requirements-Satz läuft (pure unit
tests, kein PDF-Parsing, kein HTTP)
## Tests
- tests/test_parlamente.py (33 Tests)
* PortalaAdapter._parse_hit_list_cards: doctype/doctype_full
NameError-Regression aus 1cb030a, plus Title/Drucksache/Fraktion-
/Datum/PDF-Extraktion gegen ein BE-Card-Fixture
* PortalaAdapter._parse_hit_list_dump: gegen ein LSA-Perl-Dump-
Fixture inkl. Hex-Escape-Decoding (\x{fc} → ü)
* PortalaAdapter._parse_hit_list_html: Auto-Detection zwischen
Card- und Dump-Format
* PortalaAdapter._normalize_fraktion: kanonische Fraktion-Codes
inkl. F.D.P.-mit-Punkten, BÜNDNIS 90, DIE LINKE, BSW
* ParLDokAdapter._hit_to_drucksache: JSON-Hit → Drucksache
Mapping inkl. /navpanes-Stripping, MdL-mit-Partei-in-Klammern,
Landesregierung-Detection
* ParLDokAdapter._fulltext_id: bundle.js-mirroring (deferred,
aber dokumentiert)
* ADAPTERS-Registry-Sanity
- tests/test_embeddings.py (11 Tests)
* _chunk_source_label: Programm-Name + Seite (Halluzinations-
Bug-Regression aus 1b5fd96)
* format_quotes_for_prompt: jeder Chunk muss Programm-Name
enthalten, strict-citation-Hinweis muss im Output sein,
keine NRW-Halluzinationen für MV/BE-Chunk-Sets
- tests/test_wahlprogramme.py (14 Tests)
* Registry-Struktur (jahr int, seiten int, .pdf-Endung)
* File-Existenz: jede registrierte PDF muss in
static/referenzen/ liegen — würde Tippfehler in den 22
indexierten Programmen sofort fangen
* embeddings.PROGRAMME-Konsistenz-Cross-Check
- tests/test_bundeslaender.py (15 Tests)
* Sanity über 16-State-Registry
* #48-Klassifikations-Regression: TH=ParlDok, HB=StarWeb,
SN=Eigensystem
* Wahltermine plausibel (zwischen 2026 und 2035)
- tests/test_analyzer.py (4 Tests)
* Markdown-Codeblock-Stripping aus dem JSON-Retry-Loop
## Bug-Funde während der Test-Schreibphase
Zwei Production-Bugs in den _normalize_fraktion-Helfern wurden
durch die neuen Tests sofort aufgedeckt und im selben Commit gefixt:
1. PortalaAdapter._normalize_fraktion matched "F.D.P." (mit Punkten,
wie historische SH/HB-Drucksachen) nicht — Regex \bFDP\b ist zu
strikt. Fix: \bF\.?\s*D\.?\s*P\.?\b analog zu ParLDokAdapter.
2. ParLDokAdapter._normalize_fraktion (auch PortalaAdapter) matched
"Ministerium der Finanzen" nicht als Landesregierung, weil
\bMINISTER\b die Wortgrenze auch nach MINISTER verlangt — bei
MINISTERIUM steht aber IUM danach, keine Wortgrenze. Fix:
\bMINISTER ohne abschließendes \b.
Beide Bugs hätten Fraktion-Felder bei Drucksachen der Bremischen
Bürgerschaft (FDP-Listen) und bei Landesregierungs-Drucksachen
in MV/LSA fälschlich leer gelassen — exakt der "fraktionen=[]"-
Befund aus dem MV-Smoke-Test in #4.
Phase 0 aus Roadmap-Issue #49.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-08 23:26:06 +02:00
|
|
|
"""Tests for analyzer.py JSON-stripping logic.
|
|
|
|
|
|
|
|
|
|
Reproduces the markdown-codeblock-stripping in the LLM retry loop. Real
|
|
|
|
|
Qwen responses sometimes wrap their JSON in ```json …``` fences (despite
|
|
|
|
|
the prompt asking for raw JSON), and the analyzer must tolerate that
|
|
|
|
|
without resorting to retries.
|
|
|
|
|
"""
|
|
|
|
|
import json
|
|
|
|
|
import sys
|
|
|
|
|
import types
|
|
|
|
|
|
|
|
|
|
# Stub openai before importing analyzer
|
|
|
|
|
if "openai" not in sys.modules:
|
|
|
|
|
openai_stub = types.ModuleType("openai")
|
|
|
|
|
openai_stub.OpenAI = lambda **kw: None
|
|
|
|
|
sys.modules["openai"] = openai_stub
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _strip_markdown_fences(content: str) -> str:
|
|
|
|
|
"""Mirror the analyzer's markdown-stripping snippet so we can unit-test
|
|
|
|
|
the parsing rules without actually invoking the LLM.
|
|
|
|
|
|
|
|
|
|
Keep this in sync with analyzer.py around the `if content.startswith("```")`
|
|
|
|
|
branch — if the analyzer changes, this helper changes too. The point of
|
|
|
|
|
the duplication is that the analyzer's stripping is buried in an async
|
|
|
|
|
LLM call that we cannot easily unit-test directly.
|
|
|
|
|
"""
|
|
|
|
|
content = content.strip()
|
|
|
|
|
if content.startswith("```"):
|
|
|
|
|
content = content.split("\n", 1)[1]
|
|
|
|
|
if content.endswith("```"):
|
|
|
|
|
content = content.rsplit("```", 1)[0]
|
|
|
|
|
if content.startswith("```json"):
|
|
|
|
|
content = content[7:]
|
|
|
|
|
return content.strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
SAMPLE_JSON = '{"gwoeScore": 7.0, "title": "Test"}'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestMarkdownStripping:
|
|
|
|
|
def test_plain_json_unchanged(self):
|
|
|
|
|
assert _strip_markdown_fences(SAMPLE_JSON) == SAMPLE_JSON
|
|
|
|
|
|
|
|
|
|
def test_json_in_markdown_fence(self):
|
|
|
|
|
wrapped = f"```json\n{SAMPLE_JSON}\n```"
|
|
|
|
|
cleaned = _strip_markdown_fences(wrapped)
|
|
|
|
|
assert json.loads(cleaned)["gwoeScore"] == 7.0
|
|
|
|
|
|
|
|
|
|
def test_json_in_plain_fence(self):
|
|
|
|
|
wrapped = f"```\n{SAMPLE_JSON}\n```"
|
|
|
|
|
cleaned = _strip_markdown_fences(wrapped)
|
|
|
|
|
assert json.loads(cleaned)["gwoeScore"] == 7.0
|
|
|
|
|
|
|
|
|
|
def test_leading_whitespace_stripped(self):
|
|
|
|
|
wrapped = f" \n {SAMPLE_JSON} \n "
|
|
|
|
|
assert json.loads(_strip_markdown_fences(wrapped))["gwoeScore"] == 7.0
|
|
|
|
|
|
|
|
|
|
def test_trailing_fence_stripped(self):
|
|
|
|
|
wrapped = f"{SAMPLE_JSON}\n```"
|
|
|
|
|
cleaned = _strip_markdown_fences(wrapped)
|
|
|
|
|
assert json.loads(cleaned)["gwoeScore"] == 7.0
|
2026-04-28 11:06:24 +02:00
|
|
|
|
|
|
|
|
|
|
|
|
|
# ─── Coverage-Backfill (#134) ────────────────────────────────────────────────
|
|
|
|
|
|
|
|
|
|
import pytest
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestContentFingerprint:
|
|
|
|
|
"""analyzer._content_fingerprint mirrored Helper, getrennt von qwen_bewerter."""
|
|
|
|
|
|
|
|
|
|
def test_empty_returns_len_zero(self):
|
|
|
|
|
from app.analyzer import _content_fingerprint
|
|
|
|
|
assert _content_fingerprint("") == "len=0"
|
|
|
|
|
|
|
|
|
|
def test_non_empty_includes_sha1(self):
|
|
|
|
|
from app.analyzer import _content_fingerprint
|
|
|
|
|
result = _content_fingerprint("hallo welt")
|
|
|
|
|
assert result.startswith("len=10 sha1=")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestGetDefaultBewerter:
|
|
|
|
|
def test_returns_qwen_instance(self, monkeypatch):
|
|
|
|
|
"""Lazy-Import: get_default_bewerter() ruft QwenBewerter()."""
|
|
|
|
|
from app import analyzer
|
|
|
|
|
from unittest.mock import MagicMock
|
|
|
|
|
# Stub QwenBewerter im Adapter-Pfad, sodass kein echter Import passiert
|
|
|
|
|
import sys
|
|
|
|
|
fake_module = type(sys)("app.adapters.qwen_bewerter")
|
|
|
|
|
fake_module.QwenBewerter = MagicMock(return_value="fake-bewerter")
|
|
|
|
|
monkeypatch.setitem(sys.modules, "app.adapters.qwen_bewerter", fake_module)
|
|
|
|
|
|
|
|
|
|
result = analyzer.get_default_bewerter()
|
|
|
|
|
assert result == "fake-bewerter"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestLoadContextFile:
|
|
|
|
|
def test_returns_text_when_file_exists(self, tmp_path, monkeypatch):
|
|
|
|
|
from app import analyzer
|
|
|
|
|
target = tmp_path / "test.txt"
|
|
|
|
|
target.write_text("Hallo Welt")
|
|
|
|
|
monkeypatch.setattr(analyzer, "KONTEXT_DIR", tmp_path)
|
|
|
|
|
assert analyzer.load_context_file("test.txt") == "Hallo Welt"
|
|
|
|
|
|
|
|
|
|
def test_returns_empty_when_file_missing(self, tmp_path, monkeypatch):
|
|
|
|
|
from app import analyzer
|
|
|
|
|
monkeypatch.setattr(analyzer, "KONTEXT_DIR", tmp_path)
|
|
|
|
|
assert analyzer.load_context_file("missing.txt") == ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestGetUserPromptTemplate:
|
|
|
|
|
def test_returns_template_with_placeholders(self):
|
|
|
|
|
from app.analyzer import get_user_prompt_template
|
|
|
|
|
t = get_user_prompt_template()
|
|
|
|
|
# Alle vier Platzhalter muessen drinstehen
|
|
|
|
|
for ph in ("{bundesland_context}", "{quotes_context}",
|
|
|
|
|
"{text}", "{pflicht_fraktionen}"):
|
|
|
|
|
assert ph in t
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestGetBundeslandContext:
|
|
|
|
|
def test_unknown_bundesland_raises(self):
|
|
|
|
|
from app.analyzer import get_bundesland_context
|
|
|
|
|
with pytest.raises(ValueError, match="Unbekanntes Bundesland"):
|
|
|
|
|
get_bundesland_context("XX")
|
|
|
|
|
|
|
|
|
|
def test_inactive_bundesland_raises(self, monkeypatch):
|
|
|
|
|
from app import analyzer
|
|
|
|
|
from app.bundeslaender import BUNDESLAENDER, Bundesland
|
|
|
|
|
# Erstellen einer inaktiven BL-Instanz
|
|
|
|
|
if "NRW" not in BUNDESLAENDER:
|
|
|
|
|
pytest.skip("NRW nicht in BUNDESLAENDER")
|
|
|
|
|
original = BUNDESLAENDER["NRW"]
|
|
|
|
|
# Replace with inactive copy
|
|
|
|
|
inactive = Bundesland(
|
|
|
|
|
**{**original.__dict__, "aktiv": False}
|
|
|
|
|
)
|
|
|
|
|
monkeypatch.setitem(BUNDESLAENDER, "NRW", inactive)
|
|
|
|
|
with pytest.raises(ValueError, match="nicht aktiv"):
|
|
|
|
|
analyzer.get_bundesland_context("NRW")
|