gwoe-antragspruefer/tests/test_analyzer.py
Dotty Dotter 8e6f435b94 test(#134): analyzer Coverage 70.1% → 83.1%
- TestContentFingerprint: empty/non-empty cases (Lines 45-48)
- TestGetDefaultBewerter: lazy-Import liefert QwenBewerter (Lines 58-60)
- TestLoadContextFile: existierende + fehlende Datei (Line 71)
- TestGetUserPromptTemplate: alle 4 Platzhalter im Template
- TestGetBundeslandContext:
  - unbekanntes BL → ValueError 'Unbekanntes Bundesland' (Line 263)
  - inaktives BL → ValueError 'nicht aktiv' (Line 265)

Verbleibend (alles im analyze_text LLM-Pfad): Embeddings-Fallback,
reconstruct_zitate-Branch, missing-Programme-Logging — wuerde End-to-End
Mock-Setup brauchen, Aufwand vs. Nutzen unguenstig.

Total: 50.6% → 50.8%, 736 → 744 Tests.
2026-04-28 11:06:24 +02:00

142 lines
5.3 KiB
Python

"""Tests for analyzer.py JSON-stripping logic.
Reproduces the markdown-codeblock-stripping in the LLM retry loop. Real
Qwen responses sometimes wrap their JSON in ```json …``` fences (despite
the prompt asking for raw JSON), and the analyzer must tolerate that
without resorting to retries.
"""
import json
import sys
import types
# Stub openai before importing analyzer
if "openai" not in sys.modules:
openai_stub = types.ModuleType("openai")
openai_stub.OpenAI = lambda **kw: None
sys.modules["openai"] = openai_stub
def _strip_markdown_fences(content: str) -> str:
"""Mirror the analyzer's markdown-stripping snippet so we can unit-test
the parsing rules without actually invoking the LLM.
Keep this in sync with analyzer.py around the `if content.startswith("```")`
branch — if the analyzer changes, this helper changes too. The point of
the duplication is that the analyzer's stripping is buried in an async
LLM call that we cannot easily unit-test directly.
"""
content = content.strip()
if content.startswith("```"):
content = content.split("\n", 1)[1]
if content.endswith("```"):
content = content.rsplit("```", 1)[0]
if content.startswith("```json"):
content = content[7:]
return content.strip()
SAMPLE_JSON = '{"gwoeScore": 7.0, "title": "Test"}'
class TestMarkdownStripping:
def test_plain_json_unchanged(self):
assert _strip_markdown_fences(SAMPLE_JSON) == SAMPLE_JSON
def test_json_in_markdown_fence(self):
wrapped = f"```json\n{SAMPLE_JSON}\n```"
cleaned = _strip_markdown_fences(wrapped)
assert json.loads(cleaned)["gwoeScore"] == 7.0
def test_json_in_plain_fence(self):
wrapped = f"```\n{SAMPLE_JSON}\n```"
cleaned = _strip_markdown_fences(wrapped)
assert json.loads(cleaned)["gwoeScore"] == 7.0
def test_leading_whitespace_stripped(self):
wrapped = f" \n {SAMPLE_JSON} \n "
assert json.loads(_strip_markdown_fences(wrapped))["gwoeScore"] == 7.0
def test_trailing_fence_stripped(self):
wrapped = f"{SAMPLE_JSON}\n```"
cleaned = _strip_markdown_fences(wrapped)
assert json.loads(cleaned)["gwoeScore"] == 7.0
# ─── Coverage-Backfill (#134) ────────────────────────────────────────────────
import pytest
class TestContentFingerprint:
"""analyzer._content_fingerprint mirrored Helper, getrennt von qwen_bewerter."""
def test_empty_returns_len_zero(self):
from app.analyzer import _content_fingerprint
assert _content_fingerprint("") == "len=0"
def test_non_empty_includes_sha1(self):
from app.analyzer import _content_fingerprint
result = _content_fingerprint("hallo welt")
assert result.startswith("len=10 sha1=")
class TestGetDefaultBewerter:
def test_returns_qwen_instance(self, monkeypatch):
"""Lazy-Import: get_default_bewerter() ruft QwenBewerter()."""
from app import analyzer
from unittest.mock import MagicMock
# Stub QwenBewerter im Adapter-Pfad, sodass kein echter Import passiert
import sys
fake_module = type(sys)("app.adapters.qwen_bewerter")
fake_module.QwenBewerter = MagicMock(return_value="fake-bewerter")
monkeypatch.setitem(sys.modules, "app.adapters.qwen_bewerter", fake_module)
result = analyzer.get_default_bewerter()
assert result == "fake-bewerter"
class TestLoadContextFile:
def test_returns_text_when_file_exists(self, tmp_path, monkeypatch):
from app import analyzer
target = tmp_path / "test.txt"
target.write_text("Hallo Welt")
monkeypatch.setattr(analyzer, "KONTEXT_DIR", tmp_path)
assert analyzer.load_context_file("test.txt") == "Hallo Welt"
def test_returns_empty_when_file_missing(self, tmp_path, monkeypatch):
from app import analyzer
monkeypatch.setattr(analyzer, "KONTEXT_DIR", tmp_path)
assert analyzer.load_context_file("missing.txt") == ""
class TestGetUserPromptTemplate:
def test_returns_template_with_placeholders(self):
from app.analyzer import get_user_prompt_template
t = get_user_prompt_template()
# Alle vier Platzhalter muessen drinstehen
for ph in ("{bundesland_context}", "{quotes_context}",
"{text}", "{pflicht_fraktionen}"):
assert ph in t
class TestGetBundeslandContext:
def test_unknown_bundesland_raises(self):
from app.analyzer import get_bundesland_context
with pytest.raises(ValueError, match="Unbekanntes Bundesland"):
get_bundesland_context("XX")
def test_inactive_bundesland_raises(self, monkeypatch):
from app import analyzer
from app.bundeslaender import BUNDESLAENDER, Bundesland
# Erstellen einer inaktiven BL-Instanz
if "NRW" not in BUNDESLAENDER:
pytest.skip("NRW nicht in BUNDESLAENDER")
original = BUNDESLAENDER["NRW"]
# Replace with inactive copy
inactive = Bundesland(
**{**original.__dict__, "aktiv": False}
)
monkeypatch.setitem(BUNDESLAENDER, "NRW", inactive)
with pytest.raises(ValueError, match="nicht aktiv"):
analyzer.get_bundesland_context("NRW")