test(#134): Coverage-Backfill drei Module
- app/ingest_votes.py 39.2% → 100%
- TestDownloadPdf: schreibt Bytes, propagiert HTTP-Fehler
- TestCli: --supported, kein-arg-error, fehlender PDF-Pfad,
pdf-Pfad-Run, --url-Download-Pfad, exit-Code 2 bei null Resultaten,
Errors-Liste im Output
- DB-Error-Collection in ingest_pdf
- app/wahlprogramme.py 90.7% → 100%
- TestLoadWahlprogrammText: paged-Datei, Normal-Datei-Fallback,
fehlende Datei
- TestSearchWahlprogramm: leere Returns
- TestFindRelevantQuotes: ValueError bei unbekanntem BL
- TestFormatQuoteForPrompt: leeres Dict
- app/abgeordnetenwatch.py 95.2% → 97.6%
- test_rp_pattern_nr_wp_swap: '/538-18.pdf' → '18/538'
- test_sn_pattern_dok_nr_leg_per_swap: 'dok_nr=2150&leg_per=8' → '8/2150'
Total: 47.59% → 48.69%, 666 → 686 Tests, 0 Failures.
This commit is contained in:
parent
145ad1e8d4
commit
b13b46a444
@ -88,6 +88,21 @@ class TestExtractDrucksache:
|
|||||||
html = "Seite 3/12 — nicht relevant"
|
html = "Seite 3/12 — nicht relevant"
|
||||||
assert extract_drucksache_from_intro(html) is None
|
assert extract_drucksache_from_intro(html) is None
|
||||||
|
|
||||||
|
def test_rp_pattern_nr_wp_swap(self):
|
||||||
|
"""RP-URL '/538-18.pdf' → drucksache-Format 'wp/nr' = '18/538'.
|
||||||
|
Wir vermeiden im HTML jegliche 'wp/nr'-Notation, sonst greift der
|
||||||
|
generische 'Drucksache (\\d+)/(\\d+)'-Match zuerst."""
|
||||||
|
from app.abgeordnetenwatch import extract_drucksache_from_intro
|
||||||
|
html = '<a href="https://landtag.rlp.de/dokumente/538-18.pdf">Antrag</a>'
|
||||||
|
result = extract_drucksache_from_intro(html)
|
||||||
|
assert result == "18/538"
|
||||||
|
|
||||||
|
def test_sn_pattern_dok_nr_leg_per_swap(self):
|
||||||
|
"""SN-URL 'dok_nr=2150&...&leg_per=8' → '8/2150'."""
|
||||||
|
from app.abgeordnetenwatch import extract_drucksache_from_intro
|
||||||
|
html = '<a href="/cgi-bin/foo?dok_nr=2150&extra=x&leg_per=8">DS</a>'
|
||||||
|
assert extract_drucksache_from_intro(html) == "8/2150"
|
||||||
|
|
||||||
def test_two_digit_wp_number(self):
|
def test_two_digit_wp_number(self):
|
||||||
from app.abgeordnetenwatch import extract_drucksache_from_intro
|
from app.abgeordnetenwatch import extract_drucksache_from_intro
|
||||||
html = "Bezug: 19/12345"
|
html = "Bezug: 19/12345"
|
||||||
|
|||||||
@ -158,3 +158,174 @@ class TestIngestPdf:
|
|||||||
assert len(votes) == 1
|
assert len(votes) == 1
|
||||||
assert votes[0]["ergebnis"] == "abgelehnt"
|
assert votes[0]["ergebnis"] == "abgelehnt"
|
||||||
assert votes[0]["fraktionen_nein"] == ["CDU"]
|
assert votes[0]["fraktionen_nein"] == ["CDU"]
|
||||||
|
|
||||||
|
def test_db_error_collected_not_raised(self, initialized_db, tmp_path):
|
||||||
|
"""Wenn upsert fehlschlaegt, sollte der Fehler in errors-Liste
|
||||||
|
landen, nicht propagieren — der Rest des Protokolls soll trotzdem
|
||||||
|
verarbeitet werden."""
|
||||||
|
from app import ingest_votes
|
||||||
|
fake_pdf = tmp_path / "MMP18-2.pdf"
|
||||||
|
fake_pdf.write_bytes(b"%PDF")
|
||||||
|
|
||||||
|
async def _failing_upsert(**kw):
|
||||||
|
raise RuntimeError("simulated DB error")
|
||||||
|
|
||||||
|
parser_results = [
|
||||||
|
_fake_parse_result("18/800", "angenommen"),
|
||||||
|
_fake_parse_result("18/801", "abgelehnt"),
|
||||||
|
]
|
||||||
|
with patch("app.ingest_votes.parse_protocol", return_value=parser_results), \
|
||||||
|
patch("app.ingest_votes.upsert_plenum_vote", side_effect=_failing_upsert):
|
||||||
|
stats = run(ingest_votes.ingest_pdf(fake_pdf))
|
||||||
|
|
||||||
|
assert stats["written"] == 0
|
||||||
|
assert len(stats["errors"]) == 2
|
||||||
|
assert "18/800" in stats["errors"][0]
|
||||||
|
assert "simulated DB error" in stats["errors"][0]
|
||||||
|
|
||||||
|
|
||||||
|
class TestDownloadPdf:
|
||||||
|
def test_writes_response_bytes(self, tmp_path):
|
||||||
|
from app.ingest_votes import _download_pdf
|
||||||
|
|
||||||
|
class _FakeResp:
|
||||||
|
def read(self):
|
||||||
|
return b"%PDF downloaded content"
|
||||||
|
def __enter__(self):
|
||||||
|
return self
|
||||||
|
def __exit__(self, *a):
|
||||||
|
return False
|
||||||
|
|
||||||
|
dest = tmp_path / "out.pdf"
|
||||||
|
with patch("urllib.request.urlopen", return_value=_FakeResp()):
|
||||||
|
_download_pdf("https://example.com/x.pdf", dest)
|
||||||
|
assert dest.read_bytes() == b"%PDF downloaded content"
|
||||||
|
|
||||||
|
def test_propagates_http_error(self, tmp_path):
|
||||||
|
"""HTTP-Fehler beim Download propagieren — der Caller (CLI)
|
||||||
|
soll mit Stack-Trace abbrechen, nicht still weitergehen."""
|
||||||
|
from app.ingest_votes import _download_pdf
|
||||||
|
|
||||||
|
def _raise(*a, **kw):
|
||||||
|
raise OSError("Connection refused")
|
||||||
|
|
||||||
|
with patch("urllib.request.urlopen", side_effect=_raise):
|
||||||
|
with pytest.raises(OSError):
|
||||||
|
_download_pdf("https://example.com/x.pdf", tmp_path / "out.pdf")
|
||||||
|
|
||||||
|
|
||||||
|
class TestCli:
|
||||||
|
"""Tests fuer die CLI-Wrapper-Funktion _cli — argv-basiert."""
|
||||||
|
|
||||||
|
def test_supported_lists_bl(self, capsys):
|
||||||
|
"""--supported gibt registrierte BL aus und exitet mit 0."""
|
||||||
|
from app import ingest_votes
|
||||||
|
with patch.object(ingest_votes.sys, "argv", ["ingest_votes", "--supported"]):
|
||||||
|
with pytest.raises(SystemExit) as exc:
|
||||||
|
ingest_votes._cli()
|
||||||
|
assert exc.value.code == 0
|
||||||
|
out = capsys.readouterr().out
|
||||||
|
assert "NRW" in out
|
||||||
|
|
||||||
|
def test_no_args_errors(self, capsys):
|
||||||
|
"""Ohne --pdf und --url muss CLI mit klarer Fehlermeldung exiten."""
|
||||||
|
from app import ingest_votes
|
||||||
|
with patch.object(ingest_votes.sys, "argv", ["ingest_votes"]):
|
||||||
|
with pytest.raises(SystemExit):
|
||||||
|
ingest_votes._cli()
|
||||||
|
|
||||||
|
def test_pdf_path_missing_errors(self, capsys, tmp_path):
|
||||||
|
"""--pdf mit nicht-existentem Pfad exitet 1."""
|
||||||
|
from app import ingest_votes
|
||||||
|
nonexistent = tmp_path / "missing.pdf"
|
||||||
|
with patch.object(ingest_votes.sys, "argv",
|
||||||
|
["ingest_votes", "--pdf", str(nonexistent)]):
|
||||||
|
with pytest.raises(SystemExit) as exc:
|
||||||
|
ingest_votes._cli()
|
||||||
|
assert exc.value.code == 1
|
||||||
|
err = capsys.readouterr().err
|
||||||
|
assert "nicht gefunden" in err
|
||||||
|
|
||||||
|
def test_pdf_path_calls_ingest(self, tmp_path, capsys):
|
||||||
|
"""--pdf mit existentem Pfad ruft ingest_pdf und gibt Statistik aus."""
|
||||||
|
from app import ingest_votes
|
||||||
|
pdf = tmp_path / "MMP18-X.pdf"
|
||||||
|
pdf.write_bytes(b"%PDF")
|
||||||
|
|
||||||
|
fake_stats = {
|
||||||
|
"parsed": 3, "written": 2,
|
||||||
|
"skipped_no_drucksache": 1, "errors": [],
|
||||||
|
"protokoll_id": "MMP18-X", "bundesland": "NRW",
|
||||||
|
}
|
||||||
|
with patch("app.ingest_votes.asyncio.run", return_value=fake_stats), \
|
||||||
|
patch.object(ingest_votes.sys, "argv",
|
||||||
|
["ingest_votes", "--pdf", str(pdf)]):
|
||||||
|
ingest_votes._cli()
|
||||||
|
out = capsys.readouterr().out
|
||||||
|
assert "MMP18-X" in out
|
||||||
|
assert "parsed: 3" in out
|
||||||
|
assert "written: 2" in out
|
||||||
|
assert "ohne DS: 1" in out
|
||||||
|
|
||||||
|
def test_url_downloads_then_ingests(self, capsys):
|
||||||
|
"""--url path: Download in tmp, dann ingest_pdf."""
|
||||||
|
from app import ingest_votes
|
||||||
|
|
||||||
|
fake_stats = {
|
||||||
|
"parsed": 1, "written": 1, "skipped_no_drucksache": 0,
|
||||||
|
"errors": [], "protokoll_id": "MMP18-Y",
|
||||||
|
"bundesland": "NRW",
|
||||||
|
}
|
||||||
|
|
||||||
|
class _FakeResp:
|
||||||
|
def read(self):
|
||||||
|
return b"%PDF downloaded"
|
||||||
|
def __enter__(self):
|
||||||
|
return self
|
||||||
|
def __exit__(self, *a):
|
||||||
|
return False
|
||||||
|
|
||||||
|
with patch("app.ingest_votes.asyncio.run", return_value=fake_stats), \
|
||||||
|
patch("urllib.request.urlopen", return_value=_FakeResp()), \
|
||||||
|
patch.object(ingest_votes.sys, "argv",
|
||||||
|
["ingest_votes", "--url",
|
||||||
|
"https://example.com/MMP18-Y.pdf"]):
|
||||||
|
ingest_votes._cli()
|
||||||
|
out = capsys.readouterr().out
|
||||||
|
assert "MMP18-Y" in out
|
||||||
|
|
||||||
|
def test_zero_results_exits_2(self, tmp_path, capsys):
|
||||||
|
"""Wenn weder geschrieben noch Fehler: exit code 2 (= 'no signal')."""
|
||||||
|
from app import ingest_votes
|
||||||
|
pdf = tmp_path / "leer.pdf"
|
||||||
|
pdf.write_bytes(b"%PDF")
|
||||||
|
|
||||||
|
fake_stats = {
|
||||||
|
"parsed": 0, "written": 0, "skipped_no_drucksache": 0,
|
||||||
|
"errors": [], "protokoll_id": "leer", "bundesland": "NRW",
|
||||||
|
}
|
||||||
|
with patch("app.ingest_votes.asyncio.run", return_value=fake_stats), \
|
||||||
|
patch.object(ingest_votes.sys, "argv",
|
||||||
|
["ingest_votes", "--pdf", str(pdf)]):
|
||||||
|
with pytest.raises(SystemExit) as exc:
|
||||||
|
ingest_votes._cli()
|
||||||
|
assert exc.value.code == 2
|
||||||
|
|
||||||
|
def test_errors_listed_in_output(self, tmp_path, capsys):
|
||||||
|
"""Wenn errors gefuellt sind, erscheint die Errors-Zeile + erste 5."""
|
||||||
|
from app import ingest_votes
|
||||||
|
pdf = tmp_path / "x.pdf"
|
||||||
|
pdf.write_bytes(b"%PDF")
|
||||||
|
fake_stats = {
|
||||||
|
"parsed": 2, "written": 0, "skipped_no_drucksache": 0,
|
||||||
|
"errors": ["18/1: oops", "18/2: nope"],
|
||||||
|
"protokoll_id": "x", "bundesland": "NRW",
|
||||||
|
}
|
||||||
|
with patch("app.ingest_votes.asyncio.run", return_value=fake_stats), \
|
||||||
|
patch.object(ingest_votes.sys, "argv",
|
||||||
|
["ingest_votes", "--pdf", str(pdf)]):
|
||||||
|
ingest_votes._cli()
|
||||||
|
out = capsys.readouterr().out
|
||||||
|
assert "errors: 2" in out
|
||||||
|
assert "18/1: oops" in out
|
||||||
|
assert "18/2: nope" in out
|
||||||
|
|||||||
@ -1,4 +1,6 @@
|
|||||||
"""Tests for wahlprogramme.py — registry consistency + file existence."""
|
"""Tests for wahlprogramme.py — registry consistency + file existence."""
|
||||||
|
import pytest
|
||||||
|
|
||||||
from app.wahlprogramme import (
|
from app.wahlprogramme import (
|
||||||
WAHLPROGRAMME,
|
WAHLPROGRAMME,
|
||||||
REFERENZEN_PATH,
|
REFERENZEN_PATH,
|
||||||
@ -116,3 +118,79 @@ class TestEmbeddingsRegistryConsistency:
|
|||||||
"WAHLPROGRAMME entries missing in embeddings.PROGRAMME:\n "
|
"WAHLPROGRAMME entries missing in embeddings.PROGRAMME:\n "
|
||||||
+ "\n ".join(missing)
|
+ "\n ".join(missing)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ─────────────────────────────────────────────────────────────────────────────
|
||||||
|
# load_wahlprogramm_text — Fallback-Pfade (#134 Coverage-Backfill)
|
||||||
|
# ─────────────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
class TestLoadWahlprogrammText:
|
||||||
|
def test_returns_empty_for_unknown_combination(self):
|
||||||
|
from app.wahlprogramme import load_wahlprogramm_text
|
||||||
|
assert load_wahlprogramm_text("XX", "XYZ") == {}
|
||||||
|
|
||||||
|
def test_paged_textfile_used_when_present(self, tmp_path, monkeypatch):
|
||||||
|
"""Wenn die paged-Textdatei existiert, wird sie genutzt.
|
||||||
|
Format: '--- PAGE N ---'-Marker pro Seitenanfang."""
|
||||||
|
from app import wahlprogramme as wp_mod
|
||||||
|
# Mock get_wahlprogramm -> bekannte Datei
|
||||||
|
monkeypatch.setattr(wp_mod, "get_wahlprogramm",
|
||||||
|
lambda bl, p: {"file": "test.pdf"})
|
||||||
|
paged = tmp_path / "test-paged.txt"
|
||||||
|
paged.write_text("--- PAGE 1 ---\nseite eins\n--- PAGE 2 ---\nseite zwei")
|
||||||
|
monkeypatch.setattr(wp_mod, "KONTEXT_PATH", tmp_path)
|
||||||
|
|
||||||
|
result = wp_mod.load_wahlprogramm_text("X", "Y")
|
||||||
|
assert 2 in result
|
||||||
|
assert "seite zwei" in result[2]
|
||||||
|
|
||||||
|
def test_falls_back_to_normal_textfile(self, tmp_path, monkeypatch):
|
||||||
|
"""Ohne paged-Datei wird auf normale .txt-Datei zurueckgefallen,
|
||||||
|
komplett unter Seite 1."""
|
||||||
|
from app import wahlprogramme as wp_mod
|
||||||
|
monkeypatch.setattr(wp_mod, "get_wahlprogramm",
|
||||||
|
lambda bl, p: {"file": "test.pdf"})
|
||||||
|
normal = tmp_path / "test.txt"
|
||||||
|
normal.write_text("flacher text ohne seitenmarker")
|
||||||
|
monkeypatch.setattr(wp_mod, "KONTEXT_PATH", tmp_path)
|
||||||
|
|
||||||
|
result = wp_mod.load_wahlprogramm_text("X", "Y")
|
||||||
|
assert result == {1: "flacher text ohne seitenmarker"}
|
||||||
|
|
||||||
|
def test_returns_empty_when_no_textfile(self, tmp_path, monkeypatch):
|
||||||
|
"""Weder paged- noch normale Textdatei → leeres Dict."""
|
||||||
|
from app import wahlprogramme as wp_mod
|
||||||
|
monkeypatch.setattr(wp_mod, "get_wahlprogramm",
|
||||||
|
lambda bl, p: {"file": "test.pdf"})
|
||||||
|
# tmp_path ist leer
|
||||||
|
monkeypatch.setattr(wp_mod, "KONTEXT_PATH", tmp_path)
|
||||||
|
|
||||||
|
assert wp_mod.load_wahlprogramm_text("X", "Y") == {}
|
||||||
|
|
||||||
|
|
||||||
|
class TestSearchWahlprogramm:
|
||||||
|
def test_returns_empty_for_unknown_combination(self):
|
||||||
|
from app.wahlprogramme import search_wahlprogramm
|
||||||
|
assert search_wahlprogramm("XX", "XYZ", ["test"]) == []
|
||||||
|
|
||||||
|
def test_returns_empty_when_text_missing(self, monkeypatch):
|
||||||
|
"""Bekannte Partei + Bundesland aber keine Textdatei → leer."""
|
||||||
|
from app import wahlprogramme as wp_mod
|
||||||
|
monkeypatch.setattr(wp_mod, "get_wahlprogramm",
|
||||||
|
lambda bl, p: {"file": "missing.pdf"})
|
||||||
|
monkeypatch.setattr(wp_mod, "load_wahlprogramm_text",
|
||||||
|
lambda bl, p: {})
|
||||||
|
assert wp_mod.search_wahlprogramm("X", "Y", ["test"]) == []
|
||||||
|
|
||||||
|
|
||||||
|
class TestFindRelevantQuotes:
|
||||||
|
def test_unknown_bundesland_raises(self):
|
||||||
|
from app.wahlprogramme import find_relevant_quotes
|
||||||
|
with pytest.raises(ValueError, match="Unbekanntes Bundesland"):
|
||||||
|
find_relevant_quotes("Antrag-Text", ["CDU"], bundesland="ZZ")
|
||||||
|
|
||||||
|
|
||||||
|
class TestFormatQuoteForPrompt:
|
||||||
|
def test_empty_quotes_returns_empty_string(self):
|
||||||
|
from app.wahlprogramme import format_quote_for_prompt
|
||||||
|
assert format_quote_for_prompt({}) == ""
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user