gwoe-antragspruefer/tests/test_analyzer.py

"""Tests for analyzer.py JSON-stripping logic.

Reproduces the markdown-codeblock-stripping in the LLM retry loop. Real
Qwen responses sometimes wrap their JSON in ```json …``` fences (despite
the prompt asking for raw JSON), and the analyzer must tolerate that
without resorting to retries.
"""
import json
import sys
import types

# Stub openai before importing analyzer
if "openai" not in sys.modules:
    openai_stub = types.ModuleType("openai")
    openai_stub.OpenAI = lambda **kw: None
    sys.modules["openai"] = openai_stub


def _strip_markdown_fences(content: str) -> str:
    """Mirror the analyzer's markdown-stripping snippet so we can unit-test
    the parsing rules without actually invoking the LLM.

    Keep this in sync with analyzer.py around the `if content.startswith("```")`
    branch — if the analyzer changes, this helper changes too. The point of
    the duplication is that the analyzer's stripping is buried in an async
    LLM call that we cannot easily unit-test directly.
    """
    content = content.strip()
    if content.startswith("```"):
        content = content.split("\n", 1)[1]
    if content.endswith("```"):
        content = content.rsplit("```", 1)[0]
    if content.startswith("```json"):
        content = content[7:]
    return content.strip()


SAMPLE_JSON = '{"gwoeScore": 7.0, "title": "Test"}'


class TestMarkdownStripping:
    def test_plain_json_unchanged(self):
        assert _strip_markdown_fences(SAMPLE_JSON) == SAMPLE_JSON

    def test_json_in_markdown_fence(self):
        wrapped = f"```json\n{SAMPLE_JSON}\n```"
        cleaned = _strip_markdown_fences(wrapped)
        assert json.loads(cleaned)["gwoeScore"] == 7.0

    def test_json_in_plain_fence(self):
        wrapped = f"```\n{SAMPLE_JSON}\n```"
        cleaned = _strip_markdown_fences(wrapped)
        assert json.loads(cleaned)["gwoeScore"] == 7.0

    def test_leading_whitespace_stripped(self):
        wrapped = f"   \n  {SAMPLE_JSON}  \n  "
        assert json.loads(_strip_markdown_fences(wrapped))["gwoeScore"] == 7.0

    def test_trailing_fence_stripped(self):
        wrapped = f"{SAMPLE_JSON}\n```"
        cleaned = _strip_markdown_fences(wrapped)
        assert json.loads(cleaned)["gwoeScore"] == 7.0


# ─── Coverage-Backfill (#134) ────────────────────────────────────────────────

import pytest


class TestContentFingerprint:
    """analyzer._content_fingerprint mirrored Helper, getrennt von qwen_bewerter."""

    def test_empty_returns_len_zero(self):
        from app.analyzer import _content_fingerprint
        assert _content_fingerprint("") == "len=0"

    def test_non_empty_includes_sha1(self):
        from app.analyzer import _content_fingerprint
        result = _content_fingerprint("hallo welt")
        assert result.startswith("len=10 sha1=")


class TestGetDefaultBewerter:
    def test_returns_qwen_instance(self, monkeypatch):
        """Lazy-Import: get_default_bewerter() ruft QwenBewerter()."""
        from app import analyzer
        from unittest.mock import MagicMock
        # Stub QwenBewerter im Adapter-Pfad, sodass kein echter Import passiert
        import sys
        fake_module = type(sys)("app.adapters.qwen_bewerter")
        fake_module.QwenBewerter = MagicMock(return_value="fake-bewerter")
        monkeypatch.setitem(sys.modules, "app.adapters.qwen_bewerter", fake_module)

        result = analyzer.get_default_bewerter()
        assert result == "fake-bewerter"


class TestLoadContextFile:
    def test_returns_text_when_file_exists(self, tmp_path, monkeypatch):
        from app import analyzer
        target = tmp_path / "test.txt"
        target.write_text("Hallo Welt")
        monkeypatch.setattr(analyzer, "KONTEXT_DIR", tmp_path)
        assert analyzer.load_context_file("test.txt") == "Hallo Welt"

    def test_returns_empty_when_file_missing(self, tmp_path, monkeypatch):
        from app import analyzer
        monkeypatch.setattr(analyzer, "KONTEXT_DIR", tmp_path)
        assert analyzer.load_context_file("missing.txt") == ""


class TestGetUserPromptTemplate:
    def test_returns_template_with_placeholders(self):
        from app.analyzer import get_user_prompt_template
        t = get_user_prompt_template()
        # Alle vier Platzhalter muessen drinstehen
        for ph in ("{bundesland_context}", "{quotes_context}",
                    "{text}", "{pflicht_fraktionen}"):
            assert ph in t


class TestGetBundeslandContext:
    def test_unknown_bundesland_raises(self):
        from app.analyzer import get_bundesland_context
        with pytest.raises(ValueError, match="Unbekanntes Bundesland"):
            get_bundesland_context("XX")

    def test_inactive_bundesland_raises(self, monkeypatch):
        from app import analyzer
        from app.bundeslaender import BUNDESLAENDER, Bundesland
        # Erstellen einer inaktiven BL-Instanz
        if "NRW" not in BUNDESLAENDER:
            pytest.skip("NRW nicht in BUNDESLAENDER")
        original = BUNDESLAENDER["NRW"]
        # Replace with inactive copy
        inactive = Bundesland(
            **{**original.__dict__, "aktiv": False}
        )
        monkeypatch.setitem(BUNDESLAENDER, "NRW", inactive)
        with pytest.raises(ValueError, match="nicht aktiv"):
            analyzer.get_bundesland_context("NRW")
Add pytest suite + fix two regex bugs uncovered by it (#46) Erste Tests für die Codebase. 77 Tests, 0.08s Laufzeit, decken die drei Bug-Klassen aus der April-2026-Adapter-Session ab plus haben schon zwei weitere Bugs in Production-Code aufgedeckt. ## Setup - requirements-dev.txt mit pytest + pytest-asyncio - pytest.ini mit asyncio_mode=auto - tests/conftest.py stubbt fitz/bs4/openai/pydantic_settings, damit die Suite ohne den vollen prod-requirements-Satz läuft (pure unit tests, kein PDF-Parsing, kein HTTP) ## Tests - tests/test_parlamente.py (33 Tests) * PortalaAdapter._parse_hit_list_cards: doctype/doctype_full NameError-Regression aus 1cb030a, plus Title/Drucksache/Fraktion- /Datum/PDF-Extraktion gegen ein BE-Card-Fixture * PortalaAdapter._parse_hit_list_dump: gegen ein LSA-Perl-Dump- Fixture inkl. Hex-Escape-Decoding (\x{fc} → ü) * PortalaAdapter._parse_hit_list_html: Auto-Detection zwischen Card- und Dump-Format * PortalaAdapter._normalize_fraktion: kanonische Fraktion-Codes inkl. F.D.P.-mit-Punkten, BÜNDNIS 90, DIE LINKE, BSW * ParLDokAdapter._hit_to_drucksache: JSON-Hit → Drucksache Mapping inkl. /navpanes-Stripping, MdL-mit-Partei-in-Klammern, Landesregierung-Detection * ParLDokAdapter._fulltext_id: bundle.js-mirroring (deferred, aber dokumentiert) * ADAPTERS-Registry-Sanity - tests/test_embeddings.py (11 Tests) * _chunk_source_label: Programm-Name + Seite (Halluzinations- Bug-Regression aus 1b5fd96) * format_quotes_for_prompt: jeder Chunk muss Programm-Name enthalten, strict-citation-Hinweis muss im Output sein, keine NRW-Halluzinationen für MV/BE-Chunk-Sets - tests/test_wahlprogramme.py (14 Tests) * Registry-Struktur (jahr int, seiten int, .pdf-Endung) * File-Existenz: jede registrierte PDF muss in static/referenzen/ liegen — würde Tippfehler in den 22 indexierten Programmen sofort fangen * embeddings.PROGRAMME-Konsistenz-Cross-Check - tests/test_bundeslaender.py (15 Tests) * Sanity über 16-State-Registry * #48-Klassifikations-Regression: TH=ParlDok, HB=StarWeb, SN=Eigensystem * Wahltermine plausibel (zwischen 2026 und 2035) - tests/test_analyzer.py (4 Tests) * Markdown-Codeblock-Stripping aus dem JSON-Retry-Loop ## Bug-Funde während der Test-Schreibphase Zwei Production-Bugs in den _normalize_fraktion-Helfern wurden durch die neuen Tests sofort aufgedeckt und im selben Commit gefixt: 1. PortalaAdapter._normalize_fraktion matched "F.D.P." (mit Punkten, wie historische SH/HB-Drucksachen) nicht — Regex \bFDP\b ist zu strikt. Fix: \bF\.?\sD\.?\sP\.?\b analog zu ParLDokAdapter. 2. ParLDokAdapter._normalize_fraktion (auch PortalaAdapter) matched "Ministerium der Finanzen" nicht als Landesregierung, weil \bMINISTER\b die Wortgrenze auch nach MINISTER verlangt — bei MINISTERIUM steht aber IUM danach, keine Wortgrenze. Fix: \bMINISTER ohne abschließendes \b. Beide Bugs hätten Fraktion-Felder bei Drucksachen der Bremischen Bürgerschaft (FDP-Listen) und bei Landesregierungs-Drucksachen in MV/LSA fälschlich leer gelassen — exakt der "fraktionen=[]"- Befund aus dem MV-Smoke-Test in #4. Phase 0 aus Roadmap-Issue #49. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> 2026-04-08 23:26:06 +02:00			`"""Tests for analyzer.py JSON-stripping logic.`

			`Reproduces the markdown-codeblock-stripping in the LLM retry loop. Real`
			Qwen responses sometimes wrap their JSON in ```json …``` fences (despite
			`the prompt asking for raw JSON), and the analyzer must tolerate that`
			`without resorting to retries.`
			`"""`
			`import json`
			`import sys`
			`import types`

			`# Stub openai before importing analyzer`
			`if "openai" not in sys.modules:`
			`openai_stub = types.ModuleType("openai")`
			`openai_stub.OpenAI = lambda **kw: None`
			`sys.modules["openai"] = openai_stub`


			`def _strip_markdown_fences(content: str) -> str:`
			`"""Mirror the analyzer's markdown-stripping snippet so we can unit-test`
			`the parsing rules without actually invoking the LLM.`

			Keep this in sync with analyzer.py around the `if content.startswith("```")`
			`branch — if the analyzer changes, this helper changes too. The point of`
			`the duplication is that the analyzer's stripping is buried in an async`
			`LLM call that we cannot easily unit-test directly.`
			`"""`
			`content = content.strip()`
			if content.startswith("```"):
			`content = content.split("\n", 1)[1]`
			if content.endswith("```"):
			content = content.rsplit("```", 1)[0]
			if content.startswith("```json"):
			`content = content[7:]`
			`return content.strip()`


			`SAMPLE_JSON = '{"gwoeScore": 7.0, "title": "Test"}'`


			`class TestMarkdownStripping:`
			`def test_plain_json_unchanged(self):`
			`assert _strip_markdown_fences(SAMPLE_JSON) == SAMPLE_JSON`

			`def test_json_in_markdown_fence(self):`
			wrapped = f"```json\n{SAMPLE_JSON}\n```"
			`cleaned = _strip_markdown_fences(wrapped)`
			`assert json.loads(cleaned)["gwoeScore"] == 7.0`

			`def test_json_in_plain_fence(self):`
			wrapped = f"```\n{SAMPLE_JSON}\n```"
			`cleaned = _strip_markdown_fences(wrapped)`
			`assert json.loads(cleaned)["gwoeScore"] == 7.0`

			`def test_leading_whitespace_stripped(self):`
			`wrapped = f" \n {SAMPLE_JSON} \n "`
			`assert json.loads(_strip_markdown_fences(wrapped))["gwoeScore"] == 7.0`

			`def test_trailing_fence_stripped(self):`
			wrapped = f"{SAMPLE_JSON}\n```"
			`cleaned = _strip_markdown_fences(wrapped)`
			`assert json.loads(cleaned)["gwoeScore"] == 7.0`
test(#134): analyzer Coverage 70.1% → 83.1% - TestContentFingerprint: empty/non-empty cases (Lines 45-48) - TestGetDefaultBewerter: lazy-Import liefert QwenBewerter (Lines 58-60) - TestLoadContextFile: existierende + fehlende Datei (Line 71) - TestGetUserPromptTemplate: alle 4 Platzhalter im Template - TestGetBundeslandContext: - unbekanntes BL → ValueError 'Unbekanntes Bundesland' (Line 263) - inaktives BL → ValueError 'nicht aktiv' (Line 265) Verbleibend (alles im analyze_text LLM-Pfad): Embeddings-Fallback, reconstruct_zitate-Branch, missing-Programme-Logging — wuerde End-to-End Mock-Setup brauchen, Aufwand vs. Nutzen unguenstig. Total: 50.6% → 50.8%, 736 → 744 Tests. 2026-04-28 11:06:24 +02:00

			`# ─── Coverage-Backfill (#134) ────────────────────────────────────────────────`

			`import pytest`


			`class TestContentFingerprint:`
			`"""analyzer._content_fingerprint mirrored Helper, getrennt von qwen_bewerter."""`

			`def test_empty_returns_len_zero(self):`
			`from app.analyzer import _content_fingerprint`
			`assert _content_fingerprint("") == "len=0"`

			`def test_non_empty_includes_sha1(self):`
			`from app.analyzer import _content_fingerprint`
			`result = _content_fingerprint("hallo welt")`
			`assert result.startswith("len=10 sha1=")`


			`class TestGetDefaultBewerter:`
			`def test_returns_qwen_instance(self, monkeypatch):`
			`"""Lazy-Import: get_default_bewerter() ruft QwenBewerter()."""`
			`from app import analyzer`
			`from unittest.mock import MagicMock`
			`# Stub QwenBewerter im Adapter-Pfad, sodass kein echter Import passiert`
			`import sys`
			`fake_module = type(sys)("app.adapters.qwen_bewerter")`
			`fake_module.QwenBewerter = MagicMock(return_value="fake-bewerter")`
			`monkeypatch.setitem(sys.modules, "app.adapters.qwen_bewerter", fake_module)`

			`result = analyzer.get_default_bewerter()`
			`assert result == "fake-bewerter"`


			`class TestLoadContextFile:`
			`def test_returns_text_when_file_exists(self, tmp_path, monkeypatch):`
			`from app import analyzer`
			`target = tmp_path / "test.txt"`
			`target.write_text("Hallo Welt")`
			`monkeypatch.setattr(analyzer, "KONTEXT_DIR", tmp_path)`
			`assert analyzer.load_context_file("test.txt") == "Hallo Welt"`

			`def test_returns_empty_when_file_missing(self, tmp_path, monkeypatch):`
			`from app import analyzer`
			`monkeypatch.setattr(analyzer, "KONTEXT_DIR", tmp_path)`
			`assert analyzer.load_context_file("missing.txt") == ""`


			`class TestGetUserPromptTemplate:`
			`def test_returns_template_with_placeholders(self):`
			`from app.analyzer import get_user_prompt_template`
			`t = get_user_prompt_template()`
			`# Alle vier Platzhalter muessen drinstehen`
			`for ph in ("{bundesland_context}", "{quotes_context}",`
			`"{text}", "{pflicht_fraktionen}"):`
			`assert ph in t`


			`class TestGetBundeslandContext:`
			`def test_unknown_bundesland_raises(self):`
			`from app.analyzer import get_bundesland_context`
			`with pytest.raises(ValueError, match="Unbekanntes Bundesland"):`
			`get_bundesland_context("XX")`

			`def test_inactive_bundesland_raises(self, monkeypatch):`
			`from app import analyzer`
			`from app.bundeslaender import BUNDESLAENDER, Bundesland`
			`# Erstellen einer inaktiven BL-Instanz`
			`if "NRW" not in BUNDESLAENDER:`
			`pytest.skip("NRW nicht in BUNDESLAENDER")`
			`original = BUNDESLAENDER["NRW"]`
			`# Replace with inactive copy`
			`inactive = Bundesland(`
			`{original.__dict__, "aktiv": False}`
			`)`
			`monkeypatch.setitem(BUNDESLAENDER, "NRW", inactive)`
			`with pytest.raises(ValueError, match="nicht aktiv"):`
			`analyzer.get_bundesland_context("NRW")`