From b13b46a44469aae295a534e7eb12babf0c943082 Mon Sep 17 00:00:00 2001 From: Dotty Dotter Date: Tue, 28 Apr 2026 10:50:26 +0200 Subject: [PATCH] test(#134): Coverage-Backfill drei Module MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - app/ingest_votes.py 39.2% → 100% - TestDownloadPdf: schreibt Bytes, propagiert HTTP-Fehler - TestCli: --supported, kein-arg-error, fehlender PDF-Pfad, pdf-Pfad-Run, --url-Download-Pfad, exit-Code 2 bei null Resultaten, Errors-Liste im Output - DB-Error-Collection in ingest_pdf - app/wahlprogramme.py 90.7% → 100% - TestLoadWahlprogrammText: paged-Datei, Normal-Datei-Fallback, fehlende Datei - TestSearchWahlprogramm: leere Returns - TestFindRelevantQuotes: ValueError bei unbekanntem BL - TestFormatQuoteForPrompt: leeres Dict - app/abgeordnetenwatch.py 95.2% → 97.6% - test_rp_pattern_nr_wp_swap: '/538-18.pdf' → '18/538' - test_sn_pattern_dok_nr_leg_per_swap: 'dok_nr=2150&leg_per=8' → '8/2150' Total: 47.59% → 48.69%, 666 → 686 Tests, 0 Failures. --- tests/test_abgeordnetenwatch.py | 15 +++ tests/test_ingest_votes.py | 171 ++++++++++++++++++++++++++++++++ tests/test_wahlprogramme.py | 78 +++++++++++++++ 3 files changed, 264 insertions(+) diff --git a/tests/test_abgeordnetenwatch.py b/tests/test_abgeordnetenwatch.py index 145e51e..889661d 100644 --- a/tests/test_abgeordnetenwatch.py +++ b/tests/test_abgeordnetenwatch.py @@ -88,6 +88,21 @@ class TestExtractDrucksache: html = "Seite 3/12 — nicht relevant" assert extract_drucksache_from_intro(html) is None + def test_rp_pattern_nr_wp_swap(self): + """RP-URL '/538-18.pdf' → drucksache-Format 'wp/nr' = '18/538'. + Wir vermeiden im HTML jegliche 'wp/nr'-Notation, sonst greift der + generische 'Drucksache (\\d+)/(\\d+)'-Match zuerst.""" + from app.abgeordnetenwatch import extract_drucksache_from_intro + html = 'Antrag' + result = extract_drucksache_from_intro(html) + assert result == "18/538" + + def test_sn_pattern_dok_nr_leg_per_swap(self): + """SN-URL 'dok_nr=2150&...&leg_per=8' → '8/2150'.""" + from app.abgeordnetenwatch import extract_drucksache_from_intro + html = 'DS' + assert extract_drucksache_from_intro(html) == "8/2150" + def test_two_digit_wp_number(self): from app.abgeordnetenwatch import extract_drucksache_from_intro html = "Bezug: 19/12345" diff --git a/tests/test_ingest_votes.py b/tests/test_ingest_votes.py index d98dea2..b60e7c6 100644 --- a/tests/test_ingest_votes.py +++ b/tests/test_ingest_votes.py @@ -158,3 +158,174 @@ class TestIngestPdf: assert len(votes) == 1 assert votes[0]["ergebnis"] == "abgelehnt" assert votes[0]["fraktionen_nein"] == ["CDU"] + + def test_db_error_collected_not_raised(self, initialized_db, tmp_path): + """Wenn upsert fehlschlaegt, sollte der Fehler in errors-Liste + landen, nicht propagieren — der Rest des Protokolls soll trotzdem + verarbeitet werden.""" + from app import ingest_votes + fake_pdf = tmp_path / "MMP18-2.pdf" + fake_pdf.write_bytes(b"%PDF") + + async def _failing_upsert(**kw): + raise RuntimeError("simulated DB error") + + parser_results = [ + _fake_parse_result("18/800", "angenommen"), + _fake_parse_result("18/801", "abgelehnt"), + ] + with patch("app.ingest_votes.parse_protocol", return_value=parser_results), \ + patch("app.ingest_votes.upsert_plenum_vote", side_effect=_failing_upsert): + stats = run(ingest_votes.ingest_pdf(fake_pdf)) + + assert stats["written"] == 0 + assert len(stats["errors"]) == 2 + assert "18/800" in stats["errors"][0] + assert "simulated DB error" in stats["errors"][0] + + +class TestDownloadPdf: + def test_writes_response_bytes(self, tmp_path): + from app.ingest_votes import _download_pdf + + class _FakeResp: + def read(self): + return b"%PDF downloaded content" + def __enter__(self): + return self + def __exit__(self, *a): + return False + + dest = tmp_path / "out.pdf" + with patch("urllib.request.urlopen", return_value=_FakeResp()): + _download_pdf("https://example.com/x.pdf", dest) + assert dest.read_bytes() == b"%PDF downloaded content" + + def test_propagates_http_error(self, tmp_path): + """HTTP-Fehler beim Download propagieren — der Caller (CLI) + soll mit Stack-Trace abbrechen, nicht still weitergehen.""" + from app.ingest_votes import _download_pdf + + def _raise(*a, **kw): + raise OSError("Connection refused") + + with patch("urllib.request.urlopen", side_effect=_raise): + with pytest.raises(OSError): + _download_pdf("https://example.com/x.pdf", tmp_path / "out.pdf") + + +class TestCli: + """Tests fuer die CLI-Wrapper-Funktion _cli — argv-basiert.""" + + def test_supported_lists_bl(self, capsys): + """--supported gibt registrierte BL aus und exitet mit 0.""" + from app import ingest_votes + with patch.object(ingest_votes.sys, "argv", ["ingest_votes", "--supported"]): + with pytest.raises(SystemExit) as exc: + ingest_votes._cli() + assert exc.value.code == 0 + out = capsys.readouterr().out + assert "NRW" in out + + def test_no_args_errors(self, capsys): + """Ohne --pdf und --url muss CLI mit klarer Fehlermeldung exiten.""" + from app import ingest_votes + with patch.object(ingest_votes.sys, "argv", ["ingest_votes"]): + with pytest.raises(SystemExit): + ingest_votes._cli() + + def test_pdf_path_missing_errors(self, capsys, tmp_path): + """--pdf mit nicht-existentem Pfad exitet 1.""" + from app import ingest_votes + nonexistent = tmp_path / "missing.pdf" + with patch.object(ingest_votes.sys, "argv", + ["ingest_votes", "--pdf", str(nonexistent)]): + with pytest.raises(SystemExit) as exc: + ingest_votes._cli() + assert exc.value.code == 1 + err = capsys.readouterr().err + assert "nicht gefunden" in err + + def test_pdf_path_calls_ingest(self, tmp_path, capsys): + """--pdf mit existentem Pfad ruft ingest_pdf und gibt Statistik aus.""" + from app import ingest_votes + pdf = tmp_path / "MMP18-X.pdf" + pdf.write_bytes(b"%PDF") + + fake_stats = { + "parsed": 3, "written": 2, + "skipped_no_drucksache": 1, "errors": [], + "protokoll_id": "MMP18-X", "bundesland": "NRW", + } + with patch("app.ingest_votes.asyncio.run", return_value=fake_stats), \ + patch.object(ingest_votes.sys, "argv", + ["ingest_votes", "--pdf", str(pdf)]): + ingest_votes._cli() + out = capsys.readouterr().out + assert "MMP18-X" in out + assert "parsed: 3" in out + assert "written: 2" in out + assert "ohne DS: 1" in out + + def test_url_downloads_then_ingests(self, capsys): + """--url path: Download in tmp, dann ingest_pdf.""" + from app import ingest_votes + + fake_stats = { + "parsed": 1, "written": 1, "skipped_no_drucksache": 0, + "errors": [], "protokoll_id": "MMP18-Y", + "bundesland": "NRW", + } + + class _FakeResp: + def read(self): + return b"%PDF downloaded" + def __enter__(self): + return self + def __exit__(self, *a): + return False + + with patch("app.ingest_votes.asyncio.run", return_value=fake_stats), \ + patch("urllib.request.urlopen", return_value=_FakeResp()), \ + patch.object(ingest_votes.sys, "argv", + ["ingest_votes", "--url", + "https://example.com/MMP18-Y.pdf"]): + ingest_votes._cli() + out = capsys.readouterr().out + assert "MMP18-Y" in out + + def test_zero_results_exits_2(self, tmp_path, capsys): + """Wenn weder geschrieben noch Fehler: exit code 2 (= 'no signal').""" + from app import ingest_votes + pdf = tmp_path / "leer.pdf" + pdf.write_bytes(b"%PDF") + + fake_stats = { + "parsed": 0, "written": 0, "skipped_no_drucksache": 0, + "errors": [], "protokoll_id": "leer", "bundesland": "NRW", + } + with patch("app.ingest_votes.asyncio.run", return_value=fake_stats), \ + patch.object(ingest_votes.sys, "argv", + ["ingest_votes", "--pdf", str(pdf)]): + with pytest.raises(SystemExit) as exc: + ingest_votes._cli() + assert exc.value.code == 2 + + def test_errors_listed_in_output(self, tmp_path, capsys): + """Wenn errors gefuellt sind, erscheint die Errors-Zeile + erste 5.""" + from app import ingest_votes + pdf = tmp_path / "x.pdf" + pdf.write_bytes(b"%PDF") + fake_stats = { + "parsed": 2, "written": 0, "skipped_no_drucksache": 0, + "errors": ["18/1: oops", "18/2: nope"], + "protokoll_id": "x", "bundesland": "NRW", + } + with patch("app.ingest_votes.asyncio.run", return_value=fake_stats), \ + patch.object(ingest_votes.sys, "argv", + ["ingest_votes", "--pdf", str(pdf)]): + ingest_votes._cli() + out = capsys.readouterr().out + assert "errors: 2" in out + assert "18/1: oops" in out + assert "18/2: nope" in out diff --git a/tests/test_wahlprogramme.py b/tests/test_wahlprogramme.py index dedbfd2..b6e35a2 100644 --- a/tests/test_wahlprogramme.py +++ b/tests/test_wahlprogramme.py @@ -1,4 +1,6 @@ """Tests for wahlprogramme.py — registry consistency + file existence.""" +import pytest + from app.wahlprogramme import ( WAHLPROGRAMME, REFERENZEN_PATH, @@ -116,3 +118,79 @@ class TestEmbeddingsRegistryConsistency: "WAHLPROGRAMME entries missing in embeddings.PROGRAMME:\n " + "\n ".join(missing) ) + + +# ───────────────────────────────────────────────────────────────────────────── +# load_wahlprogramm_text — Fallback-Pfade (#134 Coverage-Backfill) +# ───────────────────────────────────────────────────────────────────────────── + +class TestLoadWahlprogrammText: + def test_returns_empty_for_unknown_combination(self): + from app.wahlprogramme import load_wahlprogramm_text + assert load_wahlprogramm_text("XX", "XYZ") == {} + + def test_paged_textfile_used_when_present(self, tmp_path, monkeypatch): + """Wenn die paged-Textdatei existiert, wird sie genutzt. + Format: '--- PAGE N ---'-Marker pro Seitenanfang.""" + from app import wahlprogramme as wp_mod + # Mock get_wahlprogramm -> bekannte Datei + monkeypatch.setattr(wp_mod, "get_wahlprogramm", + lambda bl, p: {"file": "test.pdf"}) + paged = tmp_path / "test-paged.txt" + paged.write_text("--- PAGE 1 ---\nseite eins\n--- PAGE 2 ---\nseite zwei") + monkeypatch.setattr(wp_mod, "KONTEXT_PATH", tmp_path) + + result = wp_mod.load_wahlprogramm_text("X", "Y") + assert 2 in result + assert "seite zwei" in result[2] + + def test_falls_back_to_normal_textfile(self, tmp_path, monkeypatch): + """Ohne paged-Datei wird auf normale .txt-Datei zurueckgefallen, + komplett unter Seite 1.""" + from app import wahlprogramme as wp_mod + monkeypatch.setattr(wp_mod, "get_wahlprogramm", + lambda bl, p: {"file": "test.pdf"}) + normal = tmp_path / "test.txt" + normal.write_text("flacher text ohne seitenmarker") + monkeypatch.setattr(wp_mod, "KONTEXT_PATH", tmp_path) + + result = wp_mod.load_wahlprogramm_text("X", "Y") + assert result == {1: "flacher text ohne seitenmarker"} + + def test_returns_empty_when_no_textfile(self, tmp_path, monkeypatch): + """Weder paged- noch normale Textdatei → leeres Dict.""" + from app import wahlprogramme as wp_mod + monkeypatch.setattr(wp_mod, "get_wahlprogramm", + lambda bl, p: {"file": "test.pdf"}) + # tmp_path ist leer + monkeypatch.setattr(wp_mod, "KONTEXT_PATH", tmp_path) + + assert wp_mod.load_wahlprogramm_text("X", "Y") == {} + + +class TestSearchWahlprogramm: + def test_returns_empty_for_unknown_combination(self): + from app.wahlprogramme import search_wahlprogramm + assert search_wahlprogramm("XX", "XYZ", ["test"]) == [] + + def test_returns_empty_when_text_missing(self, monkeypatch): + """Bekannte Partei + Bundesland aber keine Textdatei → leer.""" + from app import wahlprogramme as wp_mod + monkeypatch.setattr(wp_mod, "get_wahlprogramm", + lambda bl, p: {"file": "missing.pdf"}) + monkeypatch.setattr(wp_mod, "load_wahlprogramm_text", + lambda bl, p: {}) + assert wp_mod.search_wahlprogramm("X", "Y", ["test"]) == [] + + +class TestFindRelevantQuotes: + def test_unknown_bundesland_raises(self): + from app.wahlprogramme import find_relevant_quotes + with pytest.raises(ValueError, match="Unbekanntes Bundesland"): + find_relevant_quotes("Antrag-Text", ["CDU"], bundesland="ZZ") + + +class TestFormatQuoteForPrompt: + def test_empty_quotes_returns_empty_string(self): + from app.wahlprogramme import format_quote_for_prompt + assert format_quote_for_prompt({}) == ""