test: 467 -> 574 Tests (+107) — DDD, abgeordnetenwatch, monitoring, v2, Bug-Regressions
Neue Tests in dieser Migration:
- test_database.py (Merkliste-CRUD, Subscriptions, abgeordnetenwatch-Joins)
- test_clustering.py (82% Coverage)
- test_drucksache_typen.py (100%)
- test_mail.py (86%)
- test_monitoring.py (23 Tests)
- test_abgeordnetenwatch.py (23 Tests, inkl. Drucksache-Extraction)
- test_redline_parser.py (20 Tests fuer §INS§/§DEL§-Marker)
- test_bug_regressions.py (PRAGMA, JWT-azp, CDU-PDF, PFLICHT-FRAKTIONEN, NRW-Titel)
- test_embeddings_v3_v4.py (WRITE/READ-Pattern)
- test_wahlprogramm_check.py (#128)
- test_wahlprogramm_fetch.py (#138)
- test_antrag/bewertung/abonnement_repository.py + test_llm_bewerter.py (DDD)
- test_domain_behavior.py (5 Domain-Methoden boundary tests)
- tests/e2e/test_ui.py (Playwright)
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-25 20:55:57 +02:00
|
|
|
"""Tests für LlmBewerter-Port und QwenBewerter-Adapter (ADR 0008).
|
|
|
|
|
|
|
|
|
|
Der Adapter wird mit einem Fake-Client getestet — kein Netzwerk, kein
|
|
|
|
|
``openai``-Paket. Retry-Semantik (Temperatur steigt um 0.1 pro Versuch)
|
|
|
|
|
ist hier explizit getestet, damit die Migration die Semantik nicht
|
|
|
|
|
still verändert.
|
|
|
|
|
"""
|
|
|
|
|
from __future__ import annotations
|
|
|
|
|
|
|
|
|
|
import asyncio
|
|
|
|
|
import json
|
|
|
|
|
import types
|
|
|
|
|
|
|
|
|
|
import pytest
|
|
|
|
|
|
|
|
|
|
from app.adapters.qwen_bewerter import QwenBewerter, _strip_markdown_fences
|
|
|
|
|
from app.ports.llm_bewerter import LlmBewerter, LlmRequest
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _run(coro):
|
|
|
|
|
return asyncio.get_event_loop().run_until_complete(coro)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _make_fake_client(responses: list[str]):
|
|
|
|
|
"""Produziert einen Fake-OpenAI-Client, der pro Call einen Response aus
|
|
|
|
|
der Liste liefert und Metadaten (Temperatur) aufzeichnet."""
|
|
|
|
|
calls: list[dict] = []
|
|
|
|
|
|
|
|
|
|
class FakeCompletions:
|
|
|
|
|
async def create(self, **kwargs):
|
|
|
|
|
calls.append(dict(kwargs))
|
|
|
|
|
idx = len(calls) - 1
|
|
|
|
|
content = responses[min(idx, len(responses) - 1)]
|
|
|
|
|
return types.SimpleNamespace(
|
|
|
|
|
choices=[types.SimpleNamespace(
|
|
|
|
|
message=types.SimpleNamespace(content=content)
|
|
|
|
|
)]
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
class FakeChat:
|
|
|
|
|
completions = FakeCompletions()
|
|
|
|
|
|
|
|
|
|
class FakeClient:
|
|
|
|
|
chat = FakeChat()
|
|
|
|
|
|
|
|
|
|
return FakeClient(), calls
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ─── Strip-Fences ──────────────────────────────────────────────────────────
|
|
|
|
|
|
|
|
|
|
class TestStripMarkdownFences:
|
|
|
|
|
def test_plain_json_unchanged(self):
|
|
|
|
|
assert _strip_markdown_fences('{"a": 1}') == '{"a": 1}'
|
|
|
|
|
|
|
|
|
|
def test_json_fence(self):
|
|
|
|
|
assert _strip_markdown_fences('```json\n{"a": 1}\n```') == '{"a": 1}'
|
|
|
|
|
|
|
|
|
|
def test_plain_fence(self):
|
|
|
|
|
assert _strip_markdown_fences('```\n{"a": 1}\n```') == '{"a": 1}'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ─── Protocol-Konformität ──────────────────────────────────────────────────
|
|
|
|
|
|
|
|
|
|
class TestProtocol:
|
|
|
|
|
def test_qwen_implements_llm_bewerter(self):
|
|
|
|
|
# runtime_checkable Protocol — Method bewerte existiert
|
|
|
|
|
qb = QwenBewerter(api_key="x", base_url="y", client=object())
|
|
|
|
|
assert isinstance(qb, LlmBewerter)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ─── QwenBewerter mit FakeClient ───────────────────────────────────────────
|
|
|
|
|
|
|
|
|
|
class TestQwenBewerterHappyPath:
|
|
|
|
|
def test_single_successful_call(self):
|
|
|
|
|
fake, calls = _make_fake_client(['{"gwoeScore": 7.0}'])
|
|
|
|
|
qb = QwenBewerter(api_key="x", base_url="y", client=fake)
|
|
|
|
|
request = LlmRequest(system_prompt="sys", user_prompt="usr")
|
|
|
|
|
result = _run(qb.bewerte(request))
|
|
|
|
|
assert result == {"gwoeScore": 7.0}
|
|
|
|
|
assert len(calls) == 1
|
|
|
|
|
assert calls[0]["temperature"] == pytest.approx(0.3)
|
|
|
|
|
|
|
|
|
|
def test_markdown_fence_is_stripped(self):
|
|
|
|
|
fake, _ = _make_fake_client(['```json\n{"gwoeScore": 8.0}\n```'])
|
|
|
|
|
qb = QwenBewerter(client=fake)
|
|
|
|
|
result = _run(qb.bewerte(LlmRequest("sys", "usr")))
|
|
|
|
|
assert result == {"gwoeScore": 8.0}
|
|
|
|
|
|
|
|
|
|
def test_passes_model_through(self):
|
|
|
|
|
fake, calls = _make_fake_client(['{"a": 1}'])
|
|
|
|
|
qb = QwenBewerter(client=fake)
|
|
|
|
|
_run(qb.bewerte(LlmRequest("sys", "usr", model="qwen-turbo")))
|
|
|
|
|
assert calls[0]["model"] == "qwen-turbo"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestQwenBewerterRetries:
|
|
|
|
|
def test_retry_raises_temperature(self):
|
|
|
|
|
"""Bei JSON-Parse-Fehler steigt die Temperatur um 0.1 pro Versuch."""
|
|
|
|
|
fake, calls = _make_fake_client([
|
|
|
|
|
"nicht valides JSON",
|
|
|
|
|
"immer noch kaputt",
|
|
|
|
|
'{"gwoeScore": 6.0}', # 3. Versuch klappt
|
|
|
|
|
])
|
|
|
|
|
qb = QwenBewerter(client=fake)
|
|
|
|
|
request = LlmRequest("sys", "usr", max_retries=3)
|
|
|
|
|
result = _run(qb.bewerte(request))
|
|
|
|
|
assert result == {"gwoeScore": 6.0}
|
|
|
|
|
assert len(calls) == 3
|
|
|
|
|
assert calls[0]["temperature"] == pytest.approx(0.3)
|
|
|
|
|
assert calls[1]["temperature"] == pytest.approx(0.4)
|
|
|
|
|
assert calls[2]["temperature"] == pytest.approx(0.5)
|
|
|
|
|
|
|
|
|
|
def test_exhausted_retries_raise(self):
|
|
|
|
|
fake, _ = _make_fake_client([
|
|
|
|
|
"kaputt", "kaputt", "kaputt",
|
|
|
|
|
])
|
|
|
|
|
qb = QwenBewerter(client=fake)
|
|
|
|
|
request = LlmRequest("sys", "usr", max_retries=3)
|
|
|
|
|
with pytest.raises(json.JSONDecodeError):
|
|
|
|
|
_run(qb.bewerte(request))
|
|
|
|
|
|
|
|
|
|
def test_single_retry_is_respected(self):
|
|
|
|
|
"""max_retries=1 heißt: genau ein Versuch, kein Retry."""
|
|
|
|
|
fake, calls = _make_fake_client(["kaputt"])
|
|
|
|
|
qb = QwenBewerter(client=fake)
|
|
|
|
|
with pytest.raises(json.JSONDecodeError):
|
|
|
|
|
_run(qb.bewerte(LlmRequest("sys", "usr", max_retries=1)))
|
|
|
|
|
assert len(calls) == 1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestLlmRequestDefaults:
|
|
|
|
|
def test_defaults_match_legacy_analyzer(self):
|
|
|
|
|
req = LlmRequest("s", "u")
|
|
|
|
|
assert req.model == "qwen-plus"
|
|
|
|
|
assert req.max_retries == 3
|
|
|
|
|
assert req.max_tokens == 4000
|
|
|
|
|
assert req.base_temperature == 0.3
|
2026-04-28 10:56:56 +02:00
|
|
|
|
|
|
|
|
|
|
|
|
|
# ─── Coverage-Backfill (#134) ────────────────────────────────────────────────
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestContentFingerprint:
|
|
|
|
|
def test_empty_string_returns_len_zero(self):
|
|
|
|
|
from app.adapters.qwen_bewerter import _content_fingerprint
|
|
|
|
|
assert _content_fingerprint("") == "len=0"
|
|
|
|
|
|
|
|
|
|
def test_none_returns_len_zero(self):
|
|
|
|
|
from app.adapters.qwen_bewerter import _content_fingerprint
|
|
|
|
|
# Defensiv: None toleriert, weil log-Pfad aufgerufen wird
|
|
|
|
|
# mit content.choices[0].message.content der schon mal None ist
|
|
|
|
|
assert _content_fingerprint(None) == "len=0"
|
|
|
|
|
|
|
|
|
|
def test_non_empty_includes_sha1_prefix(self):
|
|
|
|
|
from app.adapters.qwen_bewerter import _content_fingerprint
|
|
|
|
|
result = _content_fingerprint("hallo")
|
|
|
|
|
assert result.startswith("len=5 sha1=")
|
|
|
|
|
assert len(result.split("sha1=")[1]) == 8
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestStripMarkdownJsonFences:
|
|
|
|
|
"""```json-Fence wird zusaetzlich zum Plain-Fence behandelt."""
|
|
|
|
|
|
|
|
|
|
def test_json_fence_with_explicit_lang(self):
|
|
|
|
|
from app.adapters.qwen_bewerter import _strip_markdown_fences
|
|
|
|
|
s = "```json\n{\"a\": 1}\n```"
|
|
|
|
|
assert _strip_markdown_fences(s) == '{"a": 1}'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestLazyClientInstantiation:
|
|
|
|
|
"""_get_client laedt openai erst beim ersten Call."""
|
|
|
|
|
|
|
|
|
|
def test_no_client_triggers_openai_import(self, monkeypatch):
|
|
|
|
|
"""Wenn der Client nicht injected ist, versucht _get_client den
|
|
|
|
|
Lazy-Import von openai.AsyncOpenAI. Hier patchen wir den Import,
|
|
|
|
|
um sicherzustellen dass _get_client tatsaechlich versucht zu
|
|
|
|
|
instanziieren (Branch-Coverage Lines 70-73)."""
|
|
|
|
|
import sys
|
|
|
|
|
from unittest.mock import MagicMock
|
|
|
|
|
from app.adapters.qwen_bewerter import QwenBewerter
|
|
|
|
|
|
|
|
|
|
fake_client = MagicMock(name="AsyncOpenAI-Instance")
|
|
|
|
|
fake_async_openai = MagicMock(return_value=fake_client)
|
|
|
|
|
fake_module = type(sys)("openai")
|
|
|
|
|
fake_module.AsyncOpenAI = fake_async_openai
|
|
|
|
|
monkeypatch.setitem(sys.modules, "openai", fake_module)
|
|
|
|
|
|
|
|
|
|
qb = QwenBewerter(api_key="test", base_url="http://test")
|
|
|
|
|
client = qb._get_client()
|
|
|
|
|
assert client is fake_client
|
|
|
|
|
fake_async_openai.assert_called_once_with(api_key="test",
|
|
|
|
|
base_url="http://test")
|
|
|
|
|
|
|
|
|
|
def test_injected_client_skips_lazy_import(self):
|
|
|
|
|
"""Wenn der Client schon im Konstruktor da ist, wird _get_client
|
|
|
|
|
ihn direkt zurueckgeben — kein openai-Import."""
|
|
|
|
|
from app.adapters.qwen_bewerter import QwenBewerter
|
|
|
|
|
injected = object()
|
|
|
|
|
qb = QwenBewerter(client=injected)
|
|
|
|
|
assert qb._get_client() is injected
|
feat: Antrag-Detail News-Match-Box + Test-Coverage fuer aktuelle-themen
**News-Match-Box im Antrag-Detail:**
Reverse-Sicht zur /aktuelle-themen-Seite — pro Antrag-Detail-Page eine
Box "Aktuelle News passend zu diesem Antrag" mit den Top-5 Matches der
letzten 90 Tage. Pro News-Card direkter "PM-Vorschlag generieren"-Button
mit Idempotenz-Check (bestehender Draft wird ohne LLM-Call zurueckgegeben).
Loesst das User-Feedback "ich oeffne ja meist Antrags-Detail, nicht den
News-Tab — da fehlt mir die News-Sicht". Box laedt lazy via fetch und
bleibt komplett versteckt wenn keine Matches existieren (kein Noise).
**Test-Coverage fuer die heutigen Backend-Aenderungen:**
`tests/test_llm_bewerter.py`:
- 6 Tests fuer `_recover_unescaped_newlines` (clean, raw newline, tab+cr,
outside-string, makes-invalid-valid, preserves-already-escaped)
- 2 Tests fuer `json_object_mode` pass-through (off → kein Param,
on → response_format={"type":"json_object"})
- 1 Integration: Recovery greift im bewerte()-Loop ohne Retry
`tests/test_endpoints_smoke.py`:
- Vote-Orphans-Endpoint (GET) Smoke
- Vote-Orphans-Auto-Rate Auth-Wall
- Batch-Analyze Auth-Wall (incl. ALL-Modus)
- Aktuelle-Themen-Endpoints (top, zeitreihe, top-antraege, cluster,
drafts-list, drafts-versions) — 8 Tests
`tests/test_batch_helpers.py`:
- 4 Unit-Tests fuer _enqueue_for_bl-Logik via Inline-Repro mit Mocks
(already-rated skip, no-adapter, limit-cap, empty-text-skip)
Suite: 1084 passed, 50 skipped (Smoke-Tests skippen lokal weil
FastAPI nicht importbar, greifen aber gegen dev/CI).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-06 02:22:22 +02:00
|
|
|
|
|
|
|
|
|
|
|
|
|
# ─── _recover_unescaped_newlines (Issue #170 Followup) ─────────────────────
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestRecoverUnescapedNewlines:
|
|
|
|
|
def test_no_change_when_clean(self):
|
|
|
|
|
from app.adapters.qwen_bewerter import _recover_unescaped_newlines
|
|
|
|
|
clean = '{"body": "Zeile1\\nZeile2"}' # Source mit `\n` als 2 chars
|
|
|
|
|
assert _recover_unescaped_newlines(clean) == clean
|
|
|
|
|
|
|
|
|
|
def test_replaces_raw_newline_in_string(self):
|
|
|
|
|
from app.adapters.qwen_bewerter import _recover_unescaped_newlines
|
|
|
|
|
broken = '{"body": "Zeile1\nZeile2"}' # Source mit echtem newline
|
|
|
|
|
recovered = _recover_unescaped_newlines(broken)
|
|
|
|
|
# Roher Newline → \n-Sequenz
|
|
|
|
|
assert "\\n" in recovered # 2 chars `\n`
|
|
|
|
|
assert "\n" not in recovered.replace("\\n", "") # kein weiterer raw
|
|
|
|
|
|
|
|
|
|
def test_replaces_tab_and_cr(self):
|
|
|
|
|
from app.adapters.qwen_bewerter import _recover_unescaped_newlines
|
|
|
|
|
broken = '{"x": "a\tb\rc"}'
|
|
|
|
|
recovered = _recover_unescaped_newlines(broken)
|
|
|
|
|
assert "\\t" in recovered
|
|
|
|
|
assert "\\r" in recovered
|
|
|
|
|
|
|
|
|
|
def test_does_not_touch_outside_string(self):
|
|
|
|
|
from app.adapters.qwen_bewerter import _recover_unescaped_newlines
|
|
|
|
|
# Newline außerhalb von String (im JSON-Whitespace) bleibt erhalten
|
|
|
|
|
src = '{\n "x": 1\n}'
|
|
|
|
|
recovered = _recover_unescaped_newlines(src)
|
|
|
|
|
# Der Recovery toggelt in_string nur bei `"`; die newlines außerhalb
|
|
|
|
|
# sollen unbehandelt bleiben.
|
|
|
|
|
assert "\n" in recovered
|
|
|
|
|
# JSON-loads sollte trotzdem klappen (Newline ist valid whitespace)
|
|
|
|
|
import json
|
|
|
|
|
data = json.loads(recovered)
|
|
|
|
|
assert data == {"x": 1}
|
|
|
|
|
|
|
|
|
|
def test_recovery_makes_invalid_json_valid(self):
|
|
|
|
|
"""Konkreter Use-Case: qwen-Output mit echtem newline im body."""
|
|
|
|
|
from app.adapters.qwen_bewerter import _recover_unescaped_newlines
|
|
|
|
|
import json
|
|
|
|
|
broken = '{"titel": "T", "body": "Para1\n\nPara2"}'
|
|
|
|
|
with pytest.raises(json.JSONDecodeError):
|
|
|
|
|
json.loads(broken)
|
|
|
|
|
recovered = _recover_unescaped_newlines(broken)
|
|
|
|
|
result = json.loads(recovered)
|
|
|
|
|
assert "Para1" in result["body"]
|
|
|
|
|
assert "Para2" in result["body"]
|
|
|
|
|
assert "\n\n" in result["body"] # echte newlines im Python-String
|
|
|
|
|
|
|
|
|
|
def test_recovery_preserves_already_escaped(self):
|
|
|
|
|
"""`\\n` (2 chars `\\` + `n`) im Source bleibt unangetastet."""
|
|
|
|
|
from app.adapters.qwen_bewerter import _recover_unescaped_newlines
|
|
|
|
|
# Im Python-Source: `\\\\n` = 4 chars = `\\n` als 2 chars im JSON
|
|
|
|
|
src = '{"body": "Zeile1\\nZeile2"}'
|
|
|
|
|
recovered = _recover_unescaped_newlines(src)
|
|
|
|
|
# Source bleibt gleich, kein Doppel-Escape
|
|
|
|
|
assert recovered == src
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ─── json_object_mode pass-through (#170 Followup) ─────────────────────────
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestJsonObjectMode:
|
|
|
|
|
def test_disabled_default_no_response_format(self):
|
|
|
|
|
"""Standard: response_format wird NICHT an die API gesendet."""
|
|
|
|
|
fake, calls = _make_fake_client(['{"x": 1}'])
|
|
|
|
|
qb = QwenBewerter(client=fake)
|
|
|
|
|
_run(qb.bewerte(LlmRequest(
|
|
|
|
|
system_prompt="s", user_prompt="u", model="qwen-plus",
|
|
|
|
|
max_retries=1,
|
|
|
|
|
)))
|
|
|
|
|
assert "response_format" not in calls[0]
|
|
|
|
|
|
|
|
|
|
def test_enabled_sends_response_format(self):
|
|
|
|
|
"""Mit json_object_mode=True: response_format={'type':'json_object'}."""
|
|
|
|
|
fake, calls = _make_fake_client(['{"x": 1}'])
|
|
|
|
|
qb = QwenBewerter(client=fake)
|
|
|
|
|
_run(qb.bewerte(LlmRequest(
|
|
|
|
|
system_prompt="s", user_prompt="u", model="qwen-max",
|
|
|
|
|
max_retries=1, json_object_mode=True,
|
|
|
|
|
)))
|
|
|
|
|
assert calls[0].get("response_format") == {"type": "json_object"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ─── Recovery integriert: full bewerte()-Loop ──────────────────────────────
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestBewerteWithRecovery:
|
|
|
|
|
def test_bewerte_uses_recovery_when_first_parse_fails(self):
|
|
|
|
|
"""Erster Versuch produziert json mit raw newline → Recovery klappt → kein Retry."""
|
|
|
|
|
broken = '{"titel": "T", "body": "Lead\n\nPara2"}'
|
|
|
|
|
fake, calls = _make_fake_client([broken])
|
|
|
|
|
qb = QwenBewerter(client=fake)
|
|
|
|
|
result = _run(qb.bewerte(LlmRequest(
|
|
|
|
|
system_prompt="s", user_prompt="u", model="qwen-plus",
|
|
|
|
|
max_retries=2,
|
|
|
|
|
)))
|
|
|
|
|
# Recovery sollte greifen → ein einziger API-Call reichte
|
|
|
|
|
assert len(calls) == 1
|
|
|
|
|
assert result["titel"] == "T"
|
|
|
|
|
assert "Para2" in result["body"]
|