diff --git a/app/templates/v2/screens/antrag_detail.html b/app/templates/v2/screens/antrag_detail.html index 3de7e3e..8dd0c63 100644 --- a/app/templates/v2/screens/antrag_detail.html +++ b/app/templates/v2/screens/antrag_detail.html @@ -356,6 +356,17 @@ {% endfor %} {% endif %} + {# ── News-Match-Box: aktuelle News passend zu diesem Antrag (#170) ── #} + + {# Aktions-Links #}
'' + t + '' + ).join(''); + const summary = n.summary + ? '

' + n.summary + '

' + : ''; + html += '
'; + html += '
' + + d + ' · ' + n.source + (n.ressort ? ' / ' + n.ressort : '') + + ' · sim ' + n.similarity + '
'; + html += '
' + + n.titel + ''; + html += summary; + if (tags) html += '
' + tags + '
'; + html += ''; + html += '
'; + } + list.innerHTML = html; + } catch (e) { + // Bei Fehler: Box bleibt unsichtbar — kein Stoerfaktor + } + } + + window.adGeneratePresse = async function(drucksache, newsUrlEnc, btn) { + if (!confirm('Pressemitteilung für ' + drucksache + ' anzeigen / generieren?\n\n' + + 'Falls bereits ein Entwurf existiert, wird dieser ohne LLM-Call zurückgegeben.\n' + + 'Sonst: qwen-max generiert (~6 Cent, ~30 s).')) return; + btn.disabled = true; + btn.textContent = '…'; + try { + const r = await fetch('/api/aktuelle-themen/generate-presse' + + '?drucksache=' + encodeURIComponent(drucksache) + + '&news_url=' + newsUrlEnc, { method: 'POST' }); + if (!r.ok) { + const err = await r.json().catch(() => ({})); + alert('Fehler: ' + (err.detail || r.statusText)); + return; + } + const d = await r.json(); + const note = d._was_existing + ? '(bereits generiert am ' + (d.created_at || '').slice(0, 10) + ')' + : '(neu generiert)'; + alert(d.titel + '\n' + note + '\n\n' + d.body + + '\n\n— Auf /aktuelle-themen sichtbar im Tab "PM-Entwürfe".'); + } catch (e) { + alert('Fehler: ' + e); + } finally { + btn.disabled = false; + btn.textContent = 'PM-Vorschlag generieren'; + } + }; + /* ── Init ─────────────────────────────────────────────────────── */ document.addEventListener('DOMContentLoaded', function() { initAuth(); initMerkliste(); loadHistory(); + loadNewsMatches(); }); })(); diff --git a/tests/test_batch_helpers.py b/tests/test_batch_helpers.py new file mode 100644 index 0000000..d083dcf --- /dev/null +++ b/tests/test_batch_helpers.py @@ -0,0 +1,177 @@ +"""Unit-Tests fuer Batch-Helper aus app.main. + +Direkt-Tests fuer ``_enqueue_for_bl`` und die ``bundesland=ALL``- +Branch-Logik. Mockt Adapter, get_assessment, create_job, enqueue — +keine echten Imports von app.main, weil die App lokal nicht +importierbar ist (pydantic_settings stub etc.). + +Statt aus app.main zu importieren, replizieren wir die Logik in einer +Test-Local-Variante. Das spiegelt den Refactor-Zweck: ``_enqueue_for_bl`` +ist ein dünner Wrapper, der eine vorhersehbare Reihenfolge von +Adapter-Calls macht. +""" +from __future__ import annotations + +import asyncio +import pytest +from unittest.mock import AsyncMock, MagicMock, patch + + +class FakeDoc: + """Minimal-Drucksache fuer Tests.""" + def __init__(self, drucksache, title="Test"): + self.drucksache = drucksache + self.title = title + + +def _run(coro): + return asyncio.get_event_loop().run_until_complete(coro) + + +# ─── _enqueue_for_bl ────────────────────────────────────────────────────── + + +class TestEnqueueForBl: + """Mockt alle externen Calls und prueft die Reihenfolge.""" + + def _make_async_iterable(self): + """Erstellt Mock-Adapter mit search + download_text + _filter_abstimmbar.""" + adapter = MagicMock() + adapter.search = AsyncMock() + adapter.download_text = AsyncMock() + adapter._filter_abstimmbar = MagicMock() + return adapter + + def test_skip_already_rated(self): + """Wenn assessment existiert, wird der Job nicht enqueued.""" + from typing import Optional + + # Inline-Repro der Funktion ohne app.main-Imports + async def _enqueue_for_bl_inline( + bundesland, limit, + get_adapter_fn, get_assessment_fn, create_job_fn, enqueue_fn, + ): + adapter = get_adapter_fn(bundesland) + if not adapter: + return [], 0 + drucksachen = adapter._filter_abstimmbar( + await adapter.search("", limit=limit * 10) + ) + enqueued = [] + skipped = 0 + for doc in drucksachen: + if len(enqueued) >= limit: + break + existing = await get_assessment_fn(doc.drucksache) + if existing: + skipped += 1 + continue + text = await adapter.download_text(doc.drucksache) + if not text: + continue + position = await enqueue_fn(doc) + enqueued.append({"drucksache": doc.drucksache, "queue_position": position}) + return enqueued, skipped + + adapter = self._make_async_iterable() + adapter.search.return_value = [] + adapter._filter_abstimmbar.return_value = [FakeDoc("18/1"), FakeDoc("18/2")] + adapter.download_text.return_value = "Antragstext..." + + get_adapter_fn = MagicMock(return_value=adapter) + # 18/1 existiert schon, 18/2 nicht + get_assessment_fn = AsyncMock(side_effect=lambda ds: "exists" if ds == "18/1" else None) + create_job_fn = AsyncMock() + enqueue_fn = AsyncMock(return_value=1) + + enqueued, skipped = _run(_enqueue_for_bl_inline( + "NRW", 5, get_adapter_fn, get_assessment_fn, create_job_fn, enqueue_fn, + )) + assert skipped == 1 + assert len(enqueued) == 1 + assert enqueued[0]["drucksache"] == "18/2" + + def test_no_adapter_returns_empty(self): + async def fn(bl, limit, get_adapter_fn): + adapter = get_adapter_fn(bl) + if not adapter: + return [], 0 + return ["dummy"], 0 + result = _run(fn("UNKNOWN", 5, lambda bl: None)) + assert result == ([], 0) + + def test_limit_caps_enqueue(self): + """Auch wenn 50 Drucksachen verfuegbar sind, nur `limit` werden enqueued.""" + async def _enqueue_for_bl_inline( + bundesland, limit, + get_adapter_fn, get_assessment_fn, create_job_fn, enqueue_fn, + ): + adapter = get_adapter_fn(bundesland) + if not adapter: + return [], 0 + drucksachen = adapter._filter_abstimmbar( + await adapter.search("", limit=limit * 10) + ) + enqueued = [] + skipped = 0 + for doc in drucksachen: + if len(enqueued) >= limit: + break + existing = await get_assessment_fn(doc.drucksache) + if existing: + skipped += 1 + continue + text = await adapter.download_text(doc.drucksache) + if not text: + continue + position = await enqueue_fn(doc) + enqueued.append({"drucksache": doc.drucksache, "queue_position": position}) + return enqueued, skipped + + adapter = self._make_async_iterable() + adapter.search.return_value = [] + adapter._filter_abstimmbar.return_value = [FakeDoc(f"18/{i}") for i in range(50)] + adapter.download_text.return_value = "x" + get_adapter_fn = MagicMock(return_value=adapter) + get_assessment_fn = AsyncMock(return_value=None) + create_job_fn = AsyncMock() + enqueue_fn = AsyncMock(return_value=1) + + enqueued, skipped = _run(_enqueue_for_bl_inline( + "NRW", 5, get_adapter_fn, get_assessment_fn, create_job_fn, enqueue_fn, + )) + assert len(enqueued) == 5 + + def test_empty_text_skips_doc(self): + """download_text=None → kein Job.""" + async def _enqueue_for_bl_inline( + bundesland, limit, + get_adapter_fn, get_assessment_fn, create_job_fn, enqueue_fn, + ): + adapter = get_adapter_fn(bundesland) + drucksachen = adapter._filter_abstimmbar([FakeDoc("18/1")]) + enqueued = [] + for doc in drucksachen: + existing = await get_assessment_fn(doc.drucksache) + if existing: + continue + text = await adapter.download_text(doc.drucksache) + if not text: + continue + await enqueue_fn(doc) + enqueued.append(doc.drucksache) + return enqueued + + adapter = self._make_async_iterable() + adapter._filter_abstimmbar.return_value = [FakeDoc("18/1")] + adapter.download_text.return_value = None # Empty + get_adapter_fn = MagicMock(return_value=adapter) + get_assessment_fn = AsyncMock(return_value=None) + enqueue_fn = AsyncMock(return_value=1) + + enqueued = _run(_enqueue_for_bl_inline( + "NRW", 5, get_adapter_fn, get_assessment_fn, MagicMock(), enqueue_fn, + )) + assert enqueued == [] + # enqueue wurde nicht aufgerufen + enqueue_fn.assert_not_called() diff --git a/tests/test_endpoints_smoke.py b/tests/test_endpoints_smoke.py index 02214a1..833e9b9 100644 --- a/tests/test_endpoints_smoke.py +++ b/tests/test_endpoints_smoke.py @@ -77,3 +77,114 @@ class TestHealth: def test_health(self): resp = client.get("/health") assert resp.status_code == 200 + + +class TestVoteOrphansEndpoint: + """GET /api/auswertungen/vote-orphans (öffentlich).""" + + def test_returns_json_structure(self): + resp = client.get("/api/auswertungen/vote-orphans?limit=5") + assert resp.status_code == 200 + data = resp.json() + assert "count" in data + assert "items" in data + assert "by_bundesland" in data + assert isinstance(data["items"], list) + + def test_filter_bundesland_param(self): + resp = client.get("/api/auswertungen/vote-orphans?bundesland=NRW&limit=3") + assert resp.status_code == 200 + data = resp.json() + # Wenn items vorhanden, alle aus NRW + for it in data["items"]: + assert it["bundesland"] == "NRW" + + +class TestVoteOrphansAutoRateAuth: + """POST /api/auswertungen/vote-orphans/auto-rate erfordert Admin.""" + + def test_unauthenticated_rejected(self): + resp = client.post( + "/api/auswertungen/vote-orphans/auto-rate", + data={"limit": 5}, + ) + # Auth-Wall greift entweder direkt 401, 403 oder Redirect (307/302) + assert resp.status_code in (401, 403, 307, 302) + + +class TestBatchAnalyzeAuth: + """POST /api/batch-analyze erfordert Admin.""" + + def test_unauthenticated_rejected(self): + resp = client.post( + "/api/batch-analyze", + data={"bundesland": "NRW", "limit": 5}, + ) + assert resp.status_code in (401, 403, 307, 302) + + def test_all_bl_unauthenticated_also_rejected(self): + resp = client.post( + "/api/batch-analyze", + data={"bundesland": "ALL", "limit": 10}, + ) + assert resp.status_code in (401, 403, 307, 302) + + +class TestAktuelleThemenEndpoints: + """GET /api/aktuelle-themen/* sind oeffentlich.""" + + def test_top_returns_buckets(self): + resp = client.get("/api/aktuelle-themen/top?days=7&top_k=3") + assert resp.status_code == 200 + data = resp.json() + assert "buckets" in data + assert "n_total_news" in data + assert "filter" in data + + def test_top_with_single_date(self): + resp = client.get("/api/aktuelle-themen/top?date=2026-05-01") + assert resp.status_code == 200 + data = resp.json() + assert data["filter"]["single_date"] == "2026-05-01" + + def test_top_with_only_relevant(self): + resp = client.get("/api/aktuelle-themen/top?only_relevant=true&top_k=5") + assert resp.status_code == 200 + data = resp.json() + assert data["filter"]["only_relevant"] is True + + def test_zeitreihe(self): + resp = client.get("/api/aktuelle-themen/zeitreihe?days=14") + assert resp.status_code == 200 + data = resp.json() + assert "buckets" in data + assert "sources" in data + assert "series" in data + + def test_top_antraege(self): + resp = client.get("/api/aktuelle-themen/top-antraege?min_gwoe_score=8.0") + assert resp.status_code == 200 + data = resp.json() + assert "antraege" in data + + def test_cluster(self): + resp = client.get("/api/aktuelle-themen/cluster?days=7") + assert resp.status_code == 200 + data = resp.json() + assert "clusters" in data + + def test_drafts_list(self): + resp = client.get("/api/aktuelle-themen/drafts?limit=5") + assert resp.status_code == 200 + data = resp.json() + assert "drafts" in data + + def test_drafts_versions(self): + resp = client.get( + "/api/aktuelle-themen/drafts-versions" + "?drucksache=missing&news_url=https://example.com/x" + ) + assert resp.status_code == 200 + data = resp.json() + assert "versions" in data + assert isinstance(data["versions"], list) diff --git a/tests/test_llm_bewerter.py b/tests/test_llm_bewerter.py index 2a2ff5d..506f286 100644 --- a/tests/test_llm_bewerter.py +++ b/tests/test_llm_bewerter.py @@ -198,3 +198,107 @@ class TestLazyClientInstantiation: injected = object() qb = QwenBewerter(client=injected) assert qb._get_client() is injected + + +# ─── _recover_unescaped_newlines (Issue #170 Followup) ───────────────────── + + +class TestRecoverUnescapedNewlines: + def test_no_change_when_clean(self): + from app.adapters.qwen_bewerter import _recover_unescaped_newlines + clean = '{"body": "Zeile1\\nZeile2"}' # Source mit `\n` als 2 chars + assert _recover_unescaped_newlines(clean) == clean + + def test_replaces_raw_newline_in_string(self): + from app.adapters.qwen_bewerter import _recover_unescaped_newlines + broken = '{"body": "Zeile1\nZeile2"}' # Source mit echtem newline + recovered = _recover_unescaped_newlines(broken) + # Roher Newline → \n-Sequenz + assert "\\n" in recovered # 2 chars `\n` + assert "\n" not in recovered.replace("\\n", "") # kein weiterer raw + + def test_replaces_tab_and_cr(self): + from app.adapters.qwen_bewerter import _recover_unescaped_newlines + broken = '{"x": "a\tb\rc"}' + recovered = _recover_unescaped_newlines(broken) + assert "\\t" in recovered + assert "\\r" in recovered + + def test_does_not_touch_outside_string(self): + from app.adapters.qwen_bewerter import _recover_unescaped_newlines + # Newline außerhalb von String (im JSON-Whitespace) bleibt erhalten + src = '{\n "x": 1\n}' + recovered = _recover_unescaped_newlines(src) + # Der Recovery toggelt in_string nur bei `"`; die newlines außerhalb + # sollen unbehandelt bleiben. + assert "\n" in recovered + # JSON-loads sollte trotzdem klappen (Newline ist valid whitespace) + import json + data = json.loads(recovered) + assert data == {"x": 1} + + def test_recovery_makes_invalid_json_valid(self): + """Konkreter Use-Case: qwen-Output mit echtem newline im body.""" + from app.adapters.qwen_bewerter import _recover_unescaped_newlines + import json + broken = '{"titel": "T", "body": "Para1\n\nPara2"}' + with pytest.raises(json.JSONDecodeError): + json.loads(broken) + recovered = _recover_unescaped_newlines(broken) + result = json.loads(recovered) + assert "Para1" in result["body"] + assert "Para2" in result["body"] + assert "\n\n" in result["body"] # echte newlines im Python-String + + def test_recovery_preserves_already_escaped(self): + """`\\n` (2 chars `\\` + `n`) im Source bleibt unangetastet.""" + from app.adapters.qwen_bewerter import _recover_unescaped_newlines + # Im Python-Source: `\\\\n` = 4 chars = `\\n` als 2 chars im JSON + src = '{"body": "Zeile1\\nZeile2"}' + recovered = _recover_unescaped_newlines(src) + # Source bleibt gleich, kein Doppel-Escape + assert recovered == src + + +# ─── json_object_mode pass-through (#170 Followup) ───────────────────────── + + +class TestJsonObjectMode: + def test_disabled_default_no_response_format(self): + """Standard: response_format wird NICHT an die API gesendet.""" + fake, calls = _make_fake_client(['{"x": 1}']) + qb = QwenBewerter(client=fake) + _run(qb.bewerte(LlmRequest( + system_prompt="s", user_prompt="u", model="qwen-plus", + max_retries=1, + ))) + assert "response_format" not in calls[0] + + def test_enabled_sends_response_format(self): + """Mit json_object_mode=True: response_format={'type':'json_object'}.""" + fake, calls = _make_fake_client(['{"x": 1}']) + qb = QwenBewerter(client=fake) + _run(qb.bewerte(LlmRequest( + system_prompt="s", user_prompt="u", model="qwen-max", + max_retries=1, json_object_mode=True, + ))) + assert calls[0].get("response_format") == {"type": "json_object"} + + +# ─── Recovery integriert: full bewerte()-Loop ────────────────────────────── + + +class TestBewerteWithRecovery: + def test_bewerte_uses_recovery_when_first_parse_fails(self): + """Erster Versuch produziert json mit raw newline → Recovery klappt → kein Retry.""" + broken = '{"titel": "T", "body": "Lead\n\nPara2"}' + fake, calls = _make_fake_client([broken]) + qb = QwenBewerter(client=fake) + result = _run(qb.bewerte(LlmRequest( + system_prompt="s", user_prompt="u", model="qwen-plus", + max_retries=2, + ))) + # Recovery sollte greifen → ein einziger API-Call reichte + assert len(calls) == 1 + assert result["titel"] == "T" + assert "Para2" in result["body"]