gwoe-antragspruefer/tests/test_auswertungen.py

"""Tests für app.wahlperioden und app.auswertungen.

Issue #58 + Roadmap #59 Phase C. Verifiziert die Aggregations-Logik
gegen eine in-memory SQLite-DB mit kontrollierten Sample-Assessments.
"""
from __future__ import annotations

import json
import sqlite3
from datetime import datetime
from pathlib import Path

import pytest

from app.auswertungen import (
    aggregate_matrix,
    aggregate_zeitreihe,
    export_long_format,
)
from app.wahlperioden import all_wahlperioden, wahlperiode_for


# ─────────────────────────────────────────────────────────────────────────────
# wahlperioden helper
# ─────────────────────────────────────────────────────────────────────────────


class TestWahlperiodeFor:
    def test_current_wp_for_recent_date(self):
        assert wahlperiode_for("2026-03-18", "MV") == "MV-WP8"

    def test_previous_wp_for_old_date(self):
        # MV WP8 startete am 26.10.2021 — alles davor ist WP7
        assert wahlperiode_for("2020-01-01", "MV") == "MV-WP7"

    def test_unknown_bl_returns_none(self):
        assert wahlperiode_for("2026-01-01", "XX") is None

    def test_empty_datum_returns_current_wp(self):
        # Wenn kein Datum bekannt → wir nehmen die aktuelle WP an,
        # weil das die einzig sinnvolle Default-Annahme ist
        assert wahlperiode_for("", "NRW") == "NRW-WP18"

    def test_all_wahlperioden_lists_each_bl_twice(self):
        out = all_wahlperioden()
        # 16 Bundesländer + BUND × 2 WPs = 34 Einträge (#56 fügt BUND hinzu)
        assert len(out) == 34
        # Aktuelle und vorherige WP für NRW
        assert "NRW-WP18" in out
        assert "NRW-WP17" in out
        # BUND ist auch dabei
        assert "BUND-WP21" in out


# ─────────────────────────────────────────────────────────────────────────────
# Test-DB-Fixture
# ─────────────────────────────────────────────────────────────────────────────


@pytest.fixture
def sample_db(tmp_path: Path) -> Path:
    """Lege eine Mini-Assessments-DB an, die typische Fälle abdeckt."""
    db = tmp_path / "test_assessments.db"
    conn = sqlite3.connect(str(db))
    conn.execute("""
        CREATE TABLE assessments (
            drucksache TEXT PRIMARY KEY,
            title TEXT,
            fraktionen TEXT,
            datum TEXT,
            bundesland TEXT,
            gwoe_score REAL,
            link TEXT,
            gwoe_begruendung TEXT,
            gwoe_matrix TEXT,
            gwoe_schwerpunkt TEXT,
            wahlprogramm_scores TEXT,
            verbesserungen TEXT,
            staerken TEXT,
            schwaechen TEXT,
            empfehlung TEXT,
            empfehlung_symbol TEXT,
            verbesserungspotenzial TEXT,
            themen TEXT,
            antrag_zusammenfassung TEXT,
            antrag_kernpunkte TEXT,
            source TEXT,
            model TEXT,
            created_at TEXT,
            updated_at TEXT
        )
    """)
    samples = [
        # NRW WP18 — drei Anträge, zwei Parteien
        ("18/100", "NRW", "2024-01-15", '["CDU"]', 7.0),
        ("18/101", "NRW", "2024-02-15", '["SPD"]', 8.0),
        ("18/102", "NRW", "2024-03-15", '["CDU"]', 5.0),
        # MV WP8 — Koalitionsantrag (zwei Parteien zählen beide)
        ("8/200", "MV", "2024-04-01", '["SPD","LINKE"]', 6.0),
        ("8/201", "MV", "2025-01-10", '["AfD"]', 2.0),
        # MV WP7 — historischer Antrag vor wahlperiode_start (2021-10-26)
        ("7/100", "MV", "2020-05-01", '["CDU"]', 4.0),
        # BB — FREIE WÄHLER soll als BVB-FW kanonisiert werden
        ("8/2", "BB", "2024-10-17", '["FREIE WÄHLER"]', 6.5),
    ]
    now = datetime.utcnow().isoformat()
    for ds, bl, dat, fr, sc in samples:
        conn.execute(
            "INSERT INTO assessments (drucksache, title, fraktionen, datum, bundesland, "
            "gwoe_score, source, model, created_at, updated_at) VALUES "
            "(?, ?, ?, ?, ?, ?, 'test', 'test', ?, ?)",
            (ds, f"Test {ds}", fr, dat, bl, sc, now, now),
        )
    conn.commit()
    conn.close()
    return db


# ─────────────────────────────────────────────────────────────────────────────
# aggregate_matrix
# ─────────────────────────────────────────────────────────────────────────────


class TestAggregateMatrix:
    def test_total_count(self, sample_db):
        m = aggregate_matrix(db_path=sample_db)
        assert m["total"] == 7

    def test_bundeslaender_listed(self, sample_db):
        m = aggregate_matrix(db_path=sample_db)
        assert set(m["bundeslaender"]) == {"NRW", "MV", "BB"}

    def test_nrw_cdu_average(self, sample_db):
        # NRW-CDU: 7.0 + 5.0 → Avg 6.0, n=2
        m = aggregate_matrix(db_path=sample_db)
        cell = m["cells"]["NRW"]["CDU"]
        assert cell["n"] == 2
        assert cell["avg"] == 6.0

    def test_koalition_counts_both_parties(self, sample_db):
        # MV-SPD und MV-LINKE bekommen beide den Score 6.0 (n=1)
        m = aggregate_matrix(db_path=sample_db)
        assert m["cells"]["MV"]["SPD"]["n"] == 1
        assert m["cells"]["MV"]["LINKE"]["n"] == 1
        assert m["cells"]["MV"]["SPD"]["avg"] == 6.0

    def test_filter_by_wahlperiode(self, sample_db):
        # NRW-WP18-Filter → nur die 3 NRW-Anträge
        m = aggregate_matrix(filter_wp="NRW-WP18", db_path=sample_db)
        assert m["total"] == 3
        assert set(m["bundeslaender"]) == {"NRW"}

    def test_filter_excludes_old_wp(self, sample_db):
        # MV-WP8 darf den 7/100-Antrag (datum=2020) NICHT enthalten
        m = aggregate_matrix(filter_wp="MV-WP8", db_path=sample_db)
        assert m["total"] == 2  # nur 8/200 und 8/201
        # CDU darf NICHT vorkommen, weil der CDU-Antrag in WP7 war
        assert "CDU" not in m["cells"].get("MV", {})

    def test_bb_freie_waehler_normalized_to_bvb(self, sample_db):
        # Die BB-FW-Drucksache muss als BVB-FW gezählt werden, NICHT als
        # generisches FREIE WÄHLER — das ist der eigentliche Mehrwert
        # des Parteinamen-Mappers (#55)
        m = aggregate_matrix(db_path=sample_db)
        bb_cells = m["cells"]["BB"]
        assert "BVB-FW" in bb_cells
        assert bb_cells["BVB-FW"]["n"] == 1
        assert "FREIE WÄHLER" not in bb_cells

    def test_empty_db_returns_empty_matrix(self, tmp_path):
        m = aggregate_matrix(db_path=tmp_path / "missing.db")
        assert m["total"] == 0
        assert m["bundeslaender"] == []


# ─────────────────────────────────────────────────────────────────────────────
# aggregate_zeitreihe
# ─────────────────────────────────────────────────────────────────────────────


class TestAggregateZeitreihe:
    def test_mv_cdu_two_wps(self, sample_db):
        # MV-CDU hat einen Eintrag in WP7 (4.0) und keinen in WP8
        z = aggregate_zeitreihe("MV", "CDU", db_path=sample_db)
        wps = {entry["wp"]: entry for entry in z["wahlperioden"]}
        assert "MV-WP7" in wps
        assert wps["MV-WP7"]["avg"] == 4.0
        assert wps["MV-WP7"]["n"] == 1

    def test_nrw_cdu_one_wp(self, sample_db):
        z = aggregate_zeitreihe("NRW", "CDU", db_path=sample_db)
        assert len(z["wahlperioden"]) == 1
        assert z["wahlperioden"][0]["avg"] == 6.0

    def test_unknown_combination_empty(self, sample_db):
        z = aggregate_zeitreihe("NRW", "AfD", db_path=sample_db)
        assert z["wahlperioden"] == []


# ─────────────────────────────────────────────────────────────────────────────
# export_long_format
# ─────────────────────────────────────────────────────────────────────────────


class TestExportLongFormat:
    def test_csv_has_header(self, sample_db):
        csv_text = export_long_format(db_path=sample_db)
        first_line = csv_text.splitlines()[0]
        assert "drucksache" in first_line
        assert "bundesland" in first_line
        assert "wahlperiode" in first_line
        assert "partei" in first_line
        assert "gwoe_score" in first_line

    def test_koalition_yields_two_rows(self, sample_db):
        csv_text = export_long_format(db_path=sample_db)
        lines = csv_text.splitlines()[1:]  # ohne Header
        # 8/200 ist Koalitionsantrag (SPD+LINKE) → 2 Zeilen
        mv_8_200_lines = [l for l in lines if l.startswith("8/200,")]
        assert len(mv_8_200_lines) == 2

    def test_bb_fw_normalized_in_csv(self, sample_db):
        csv_text = export_long_format(db_path=sample_db)
        assert "BVB-FW" in csv_text
        # Generic FREIE WÄHLER darf in der Zeile NICHT auftauchen
        bb_lines = [l for l in csv_text.splitlines() if "BB" in l and "8/2," in l]
        assert any("BVB-FW" in l for l in bb_lines)