gwoe-antragspruefer/tests/test_ingest_votes.py

161 lines
6.2 KiB
Python
Raw Normal View History

"""Tests fuer app/ingest_votes.py — PDF → plenum_vote_results Pipeline (#106 / #126)."""
from __future__ import annotations
import asyncio
import sys
from pathlib import Path
from unittest.mock import patch
import pytest
# Gleiches aiosqlite-Setup-Problem wie in test_database.py — dort fix
# importieren, damit hier nichts gestubbed ist.
_aio = sys.modules.get("aiosqlite")
if _aio is not None and not hasattr(_aio, "connect"):
del sys.modules["aiosqlite"]
import aiosqlite # noqa: E402
import importlib # noqa: E402
if "app.database" in sys.modules:
if not hasattr(getattr(sys.modules["app.database"], "aiosqlite", None), "connect"):
del sys.modules["app.database"]
importlib.import_module("app.database")
else:
importlib.import_module("app.database")
def run(coro):
return asyncio.get_event_loop().run_until_complete(coro)
@pytest.fixture()
def db_path(tmp_path, monkeypatch):
path = tmp_path / "test.db"
from app.config import settings
monkeypatch.setattr(settings, "db_path", str(path))
return str(path)
@pytest.fixture()
def initialized_db(db_path):
from app import database
run(database.init_db())
return db_path
def _fake_parse_result(drucksache: str, ergebnis: str = "angenommen",
einstimmig: bool = False,
ja: list[str] = None, nein: list[str] = None,
enth: list[str] = None) -> dict:
return {
"drucksache": drucksache,
"ergebnis": ergebnis,
"einstimmig": einstimmig,
"votes": {
"ja": ja or [],
"nein": nein or [],
"enthaltung": enth or [],
},
"kind": "direct",
}
class TestIngestPdf:
def test_writes_each_parsed_vote(self, initialized_db, tmp_path):
from app import ingest_votes, database
fake_pdf = tmp_path / "MMP18-119.pdf"
fake_pdf.write_bytes(b"%PDF-1.4 fake")
parser_results = [
_fake_parse_result("18/100", "angenommen", ja=["CDU", "SPD"], nein=["AfD"]),
_fake_parse_result("18/200", "abgelehnt", ja=["AfD"], nein=["CDU", "SPD"]),
]
with patch("app.ingest_votes.parse_protocol", return_value=parser_results):
stats = run(ingest_votes.ingest_pdf(fake_pdf))
assert stats["parsed"] == 2
assert stats["written"] == 2
votes_100 = run(database.get_plenum_votes("NRW", "18/100"))
assert len(votes_100) == 1
assert votes_100[0]["fraktionen_ja"] == ["CDU", "SPD"]
assert votes_100[0]["quelle_protokoll"] == "MMP18-119"
def test_skips_entries_without_drucksache(self, initialized_db, tmp_path):
"""Anchors ohne aufloesbare Drucksache werden gezaehlt aber nicht
geschrieben (sonst muellt der Import die DB voll)."""
from app import ingest_votes
fake_pdf = tmp_path / "MMP18-50.pdf"
fake_pdf.write_bytes(b"%PDF")
parser_results = [
_fake_parse_result("18/300", "angenommen"),
{"drucksache": None, "ergebnis": "angenommen", "votes": {"ja": [], "nein": [], "enthaltung": []}},
]
with patch("app.ingest_votes.parse_protocol", return_value=parser_results):
stats = run(ingest_votes.ingest_pdf(fake_pdf))
assert stats["parsed"] == 2
assert stats["written"] == 1
assert stats["skipped_no_drucksache"] == 1
def test_protokoll_id_default_from_stem(self, initialized_db, tmp_path):
from app import ingest_votes, database
fake_pdf = tmp_path / "MMP18-77.pdf"
fake_pdf.write_bytes(b"%PDF")
with patch("app.ingest_votes.parse_protocol",
return_value=[_fake_parse_result("18/500")]):
stats = run(ingest_votes.ingest_pdf(fake_pdf))
assert stats["protokoll_id"] == "MMP18-77"
votes = run(database.get_plenum_votes("NRW", "18/500"))
assert votes[0]["quelle_protokoll"] == "MMP18-77"
def test_protokoll_id_override(self, initialized_db, tmp_path):
from app import ingest_votes, database
fake_pdf = tmp_path / "scan.pdf"
fake_pdf.write_bytes(b"%PDF")
with patch("app.ingest_votes.parse_protocol",
return_value=[_fake_parse_result("18/600")]):
run(ingest_votes.ingest_pdf(
fake_pdf, protokoll_id="MMP18-99", quelle_url="https://example.com/x.pdf",
))
votes = run(database.get_plenum_votes("NRW", "18/600"))
assert votes[0]["quelle_protokoll"] == "MMP18-99"
assert votes[0]["quelle_url"] == "https://example.com/x.pdf"
def test_bundesland_override(self, initialized_db, tmp_path):
"""Adapter fuer andere BL koennten denselben Ingest-Helper nutzen."""
from app import ingest_votes, database
fake_pdf = tmp_path / "MV-MP1.pdf"
fake_pdf.write_bytes(b"%PDF")
with patch("app.ingest_votes.parse_protocol",
return_value=[_fake_parse_result("8/100")]):
run(ingest_votes.ingest_pdf(fake_pdf, bundesland="MV"))
# Lookup unter dem richtigen BL
votes_mv = run(database.get_plenum_votes("MV", "8/100"))
assert len(votes_mv) == 1
votes_nrw = run(database.get_plenum_votes("NRW", "8/100"))
assert votes_nrw == []
def test_re_ingest_overwrites_same_protokoll(self, initialized_db, tmp_path):
"""Erneuter Ingest desselben Protokolls aktualisiert die Eintraege
(idempotent), kein Duplikat."""
from app import ingest_votes, database
fake_pdf = tmp_path / "MMP18-1.pdf"
fake_pdf.write_bytes(b"%PDF")
with patch("app.ingest_votes.parse_protocol",
return_value=[_fake_parse_result("18/700", "angenommen", ja=["CDU"])]):
run(ingest_votes.ingest_pdf(fake_pdf))
# Re-Ingest mit korrigiertem Ergebnis (z.B. Parser-Fix)
with patch("app.ingest_votes.parse_protocol",
return_value=[_fake_parse_result("18/700", "abgelehnt", ja=[], nein=["CDU"])]):
run(ingest_votes.ingest_pdf(fake_pdf))
votes = run(database.get_plenum_votes("NRW", "18/700"))
assert len(votes) == 1
assert votes[0]["ergebnis"] == "abgelehnt"
assert votes[0]["fraktionen_nein"] == ["CDU"]