"""Tests for parlamente.py adapter parsers — pure functions over fixture HTML.

Reproduces the three regression scenarios from the 2026-04-08 adapter session:

1. PortalaAdapter `_parse_hit_list_cards` had a `doctype` vs. `doctype_full`
   NameError that was hot-fixed live on the prod server (commit 1cb030a).
2. ParLDokAdapter `_hit_to_drucksache` needs to map ParlDok 8.x JSON hit
   dicts to Drucksache objects without losing fraction or date info.
3. PortalaAdapter `_normalize_fraktion` and ParLDokAdapter same-named method
   must yield canonical fraction codes for both comma-lists and embedded
   "MdL (Partei)" patterns.
"""
import asyncio

import pytest
from unittest.mock import AsyncMock, patch, MagicMock

from app.parlamente import ParLDokAdapter, PortalaAdapter, NRWAdapter, Drucksache


# ─────────────────────────────────────────────────────────────────────────────
# PortalaAdapter — Berlin-style HTML cards
# ─────────────────────────────────────────────────────────────────────────────

BE_CARD_FIXTURE = """
<div class="other-prelude">ignored</div>
<div class="record-card efxRecordRepeater">
  <h3 class="h5"><span>Schwimmstatistik für die dritten Klassen der Berliner Schulen</span></h3>
  <span class="h6">Antrag (Eilantrag)&nbsp;<a href="/files/drs19-3104.pdf">Drucksache 19/3104</a> S. 1 bis 24 vom 31.03.2026</span>
</div>
<div class="record-card efxRecordRepeater">
  <h3 class="h5"><span>Klimaneutrales Bauen im Bestand</span></h3>
  <span class="h6">Antrag CDU, SPD&nbsp;<a href="/files/drs19-3107.pdf">Drucksache 19/3107</a> vom 02.04.2026</span>
</div>
"""


def _make_be_adapter():
    return PortalaAdapter(
        bundesland="BE",
        name="test BE",
        base_url="https://pardok.parlament-berlin.de",
        db_id="lah.lissh",
        wahlperiode=19,
        portala_path="/portala",
        document_type=None,
    )


class TestPortalaAdapterCardParser:
    """Issue: doctype/doctype_full NameError (hot-fix 1cb030a)."""

    def test_parses_two_cards_without_nameerror(self):
        """Smoke test — must NOT raise NameError or any other exception.

        Pre-fix this method referenced an undefined ``doctype`` variable
        in the query-filter branch when computing the haystack. The fix
        renamed it to ``doctype_full``. A direct call covers both branches.
        """
        adapter = _make_be_adapter()
        result = adapter._parse_hit_list_cards(BE_CARD_FIXTURE, query_filter="")
        assert len(result) == 2

    def test_first_card_extracts_drucksache_and_title(self):
        adapter = _make_be_adapter()
        result = adapter._parse_hit_list_cards(BE_CARD_FIXTURE, query_filter="")
        d = result[0]
        assert d.drucksache == "19/3104"
        assert "Schwimmstatistik" in d.title
        assert d.datum == "2026-03-31"
        assert d.bundesland == "BE"

    def test_second_card_extracts_fraktionen_from_h6(self):
        """Card 2 packs CDU+SPD into the type line — must split out cleanly."""
        adapter = _make_be_adapter()
        result = adapter._parse_hit_list_cards(BE_CARD_FIXTURE, query_filter="")
        d = result[1]
        assert d.drucksache == "19/3107"
        assert d.fraktionen == ["CDU", "SPD"]
        # The typ string should have the parties stripped back out
        assert d.typ.strip() == "Antrag"

    def test_pdf_link_is_absolute_url(self):
        adapter = _make_be_adapter()
        result = adapter._parse_hit_list_cards(BE_CARD_FIXTURE, query_filter="")
        assert result[0].link.startswith("https://pardok.parlament-berlin.de/")
        assert result[0].link.endswith(".pdf")

    def test_query_filter_uses_doctype_full_not_doctype(self):
        """Regression: the filter branch references doctype_full, not doctype.

        Pre-fix this raised NameError as soon as a query was passed.
        """
        adapter = _make_be_adapter()
        # Schwimmstatistik matches card 1, Klimaneutral matches card 2
        result = adapter._parse_hit_list_cards(BE_CARD_FIXTURE, query_filter="Schwimm")
        assert len(result) == 1
        assert result[0].drucksache == "19/3104"


# ─────────────────────────────────────────────────────────────────────────────
# PortalaAdapter — LSA-style Perl-Dump records
# ─────────────────────────────────────────────────────────────────────────────

LSA_DUMP_FIXTURE = """
<pre>$VAR1 = {
  'WEV06' => [{ 'main' => 'Demokratie beginnt im Klassenzimmer' }],
  'WEV32' => [{
    'main' => 'Antrag B\\x{fc}ndnis 90/Die Gr\\x{fc}nen 06.03.2026 Drucksache <b>8/6726</b> ...',
    '5' => 'drs/wp8/drs/d6726lan.pdf'
  }]
}</pre>
<pre>$VAR1 = {
  'WEV06' => [{ 'main' => 'Andere Drucksache ohne Schul-Bezug' }],
  'WEV32' => [{
    'main' => 'Antrag CDU, SPD 14.01.2026 Drucksache <b>8/6171</b> ...',
    '5' => 'drs/wp8/drs/d6171lan.pdf'
  }]
}</pre>
"""


def _make_lsa_adapter():
    return PortalaAdapter(
        bundesland="LSA",
        name="test LSA",
        base_url="https://padoka.landtag.sachsen-anhalt.de",
        db_id="lsa.lissh",
        wahlperiode=8,
        portala_path="/portal",
        document_type="Antrag",
    )


class TestPortalaAdapterDumpParser:
    def test_parses_two_dump_records(self):
        adapter = _make_lsa_adapter()
        result = adapter._parse_hit_list_dump(LSA_DUMP_FIXTURE, query_filter="")
        assert len(result) == 2

    def test_extracts_drucksache_from_perl_dump(self):
        adapter = _make_lsa_adapter()
        result = adapter._parse_hit_list_dump(LSA_DUMP_FIXTURE, query_filter="")
        assert result[0].drucksache == "8/6726"
        assert result[1].drucksache == "8/6171"

    def test_decodes_perl_hex_escapes_in_urheber(self):
        """The first record has \\x{fc} (ü) and \\x{e4} (ä) in WEV32."""
        adapter = _make_lsa_adapter()
        result = adapter._parse_hit_list_dump(LSA_DUMP_FIXTURE, query_filter="")
        # GRÜNE should be detected from "Bündnis 90/Die Grünen"
        assert "GRÜNE" in result[0].fraktionen

    def test_extracts_date_iso(self):
        adapter = _make_lsa_adapter()
        result = adapter._parse_hit_list_dump(LSA_DUMP_FIXTURE, query_filter="")
        assert result[0].datum == "2026-03-06"
        assert result[1].datum == "2026-01-14"

    def test_pdf_url_uses_pdf_url_prefix(self):
        adapter = _make_lsa_adapter()
        result = adapter._parse_hit_list_dump(LSA_DUMP_FIXTURE, query_filter="")
        assert result[0].link == (
            "https://padoka.landtag.sachsen-anhalt.de/files/drs/wp8/drs/d6726lan.pdf"
        )

    def test_client_side_query_filter(self):
        adapter = _make_lsa_adapter()
        result = adapter._parse_hit_list_dump(LSA_DUMP_FIXTURE, query_filter="Demokratie")
        assert len(result) == 1
        assert result[0].drucksache == "8/6726"


# ─────────────────────────────────────────────────────────────────────────────
# PortalaAdapter — Auto-detection between dump and card formats
# ─────────────────────────────────────────────────────────────────────────────

class TestPortalaAdapterAutoDetect:
    def test_dump_html_routes_to_dump_parser(self):
        adapter = _make_lsa_adapter()
        result = adapter._parse_hit_list_html(LSA_DUMP_FIXTURE, query_filter="")
        assert len(result) == 2

    def test_card_html_routes_to_card_parser(self):
        adapter = _make_be_adapter()
        result = adapter._parse_hit_list_html(BE_CARD_FIXTURE, query_filter="")
        assert len(result) == 2


# ─────────────────────────────────────────────────────────────────────────────
# Adapter._normalize_fraktion — Roundtrip-Test über eine echte Instanz
#
# Die ausführliche Pattern-Sammlung lebt nach #55 in tests/test_parteien.py.
# Hier verifizieren wir nur, dass der Adapter-Shim die zentrale Funktion
# tatsächlich aufruft und das Bundesland korrekt durchreicht.
# ─────────────────────────────────────────────────────────────────────────────

class TestAdapterNormalizeFraktionRoundtrip:
    def test_portala_lsa_adapter_instance(self):
        adapter = _make_lsa_adapter()
        assert "CDU" in adapter._normalize_fraktion("CDU")
        assert adapter._normalize_fraktion("BÜNDNIS 90/DIE GRÜNEN") == ["GRÜNE"]

    def test_portala_be_adapter_instance(self):
        adapter = _make_be_adapter()
        out = adapter._normalize_fraktion("Senat von Berlin")
        assert "Landesregierung" in out

    def test_empty_string(self):
        adapter = _make_lsa_adapter()
        assert adapter._normalize_fraktion("") == []

    def test_freie_waehler_disambiguates_by_adapter_bundesland(self):
        # BB-Adapter → BVB-FW, RP-Adapter → FREIE WÄHLER. Das ist der
        # eigentliche Mehrwert von #55, hier roundtripped via Adapter.
        from app.parlamente import PortalaAdapter
        bb = PortalaAdapter(
            bundesland="BB", name="test BB",
            base_url="https://www.parlamentsdokumentation.brandenburg.de",
            db_id="lap.lap8", wahlperiode=8,
            portala_path="/portal", document_type="Antrag",
        )
        rp = PortalaAdapter(
            bundesland="RP", name="test RP",
            base_url="https://opal.rlp.de",
            db_id="rlp.opal", wahlperiode=18,
            portala_path="/portal", document_type="Antrag",
        )
        assert bb._normalize_fraktion("FREIE WÄHLER") == ["BVB-FW"]
        assert rp._normalize_fraktion("FREIE WÄHLER") == ["FREIE WÄHLER"]


# ─────────────────────────────────────────────────────────────────────────────
# ParLDokAdapter — JSON hit dict → Drucksache mapping
# ─────────────────────────────────────────────────────────────────────────────

def _make_mv_adapter():
    return ParLDokAdapter(
        bundesland="MV",
        name="test MV",
        base_url="https://www.dokumentation.landtag-mv.de",
        wahlperiode=8,
        prefix="/parldok",
        document_typ="Antrag",
    )


SAMPLE_PARLDOK_HIT = {
    "id": 70748,
    "title": "Zweckentfremdung von Sondervermögen des Bundes beenden",
    "date": "18.03.2026",
    "prelink": "/dokument/70748",
    "link": "/dokument/70748#navpanes=0",
    "authorhtml": "FDP",
    "kind": "Drucksache",
    "type": "Antrag",
    "lp": 8,
    "number": "6409",
}


class TestParLDokAdapterHitMapping:
    def test_hit_to_drucksache_basic(self):
        adapter = _make_mv_adapter()
        d = adapter._hit_to_drucksache(SAMPLE_PARLDOK_HIT)
        assert d is not None
        assert d.drucksache == "8/6409"
        assert d.title == "Zweckentfremdung von Sondervermögen des Bundes beenden"
        assert d.datum == "2026-03-18"
        assert d.fraktionen == ["FDP"]
        assert d.typ == "Antrag"
        assert d.bundesland == "MV"

    def test_pdf_link_strips_navpanes_fragment_and_prepends_prefix(self):
        adapter = _make_mv_adapter()
        d = adapter._hit_to_drucksache(SAMPLE_PARLDOK_HIT)
        assert d.link == "https://www.dokumentation.landtag-mv.de/parldok/dokument/70748"
        assert "#navpanes" not in d.link

    def test_missing_lp_returns_none(self):
        adapter = _make_mv_adapter()
        hit = dict(SAMPLE_PARLDOK_HIT)
        del hit["lp"]
        assert adapter._hit_to_drucksache(hit) is None

    def test_mdl_with_party_in_parens(self):
        """MV often packs the MdL into authorhtml: 'Thomas X (AfD)'."""
        adapter = _make_mv_adapter()
        hit = dict(SAMPLE_PARLDOK_HIT, authorhtml="Thomas de Jesus Fernandes (AfD)")
        d = adapter._hit_to_drucksache(hit)
        assert "AfD" in d.fraktionen

    def test_landesregierung_detection(self):
        adapter = _make_mv_adapter()
        hit = dict(SAMPLE_PARLDOK_HIT, authorhtml="Ministerium der Finanzen")
        d = adapter._hit_to_drucksache(hit)
        assert "Landesregierung" in d.fraktionen


class TestParLDokFulltextIdSanitization:
    """Reverse-engineered from bundle.js pd.getFulltextId — must mirror exactly.

    Even though server-side fulltext is currently disabled (#18), the helper
    is kept around in code as documentation. If it ever gets re-activated,
    the sanitization must still match the SPA's behavior 1:1.
    """

    def test_simple_word_unchanged(self):
        assert ParLDokAdapter._fulltext_id("Schule") == "Schule"

    def test_whitespace_becomes_dash(self):
        assert ParLDokAdapter._fulltext_id("Klima Schutz") == "Klima-Schutz"

    def test_umlauts_become_dashes(self):
        # The JS regex is /[^a-zA-z0-9]/ — note the lowercase z, deliberate.
        # Umlauts are non-ASCII so they get replaced.
        assert ParLDokAdapter._fulltext_id("Bürger") == "B-rger"

    def test_punctuation_becomes_dashes(self):
        assert ParLDokAdapter._fulltext_id("CO2-Emission") == "CO2-Emission"


# ─────────────────────────────────────────────────────────────────────────────
# Adapter registry sanity
# ─────────────────────────────────────────────────────────────────────────────

class TestAdapterRegistry:
    def test_active_adapters_present(self):
        from app.parlamente import ADAPTERS
        for code in ["NRW", "MV", "BE", "LSA"]:
            assert code in ADAPTERS, f"missing adapter for {code}"

    def test_get_adapter_returns_none_for_unknown(self):
        from app.parlamente import get_adapter
        assert get_adapter("XX") is None

    def test_mv_adapter_is_parldok_instance(self):
        from app.parlamente import ADAPTERS
        assert isinstance(ADAPTERS["MV"], ParLDokAdapter)

    def test_be_adapter_is_portala_instance(self):
        from app.parlamente import ADAPTERS
        assert isinstance(ADAPTERS["BE"], PortalaAdapter)

    def test_lsa_adapter_is_portala_instance(self):
        from app.parlamente import ADAPTERS
        assert isinstance(ADAPTERS["LSA"], PortalaAdapter)


# ─────────────────────────────────────────────────────────────────────────────
# Bug #135 — NRW: empty query returns results (monitoring path)
# ─────────────────────────────────────────────────────────────────────────────

# Minimal OPAL HTML fixture with one valid Drucksache result
_NRW_RESULT_HTML = """
<li>
  <article>
    <a class="e-document-result-item__title" href="#">Klimaschutz im Ruhrgebiet</a>
    <a href="/portal/WWW/dokumentenarchiv/Dokument/MMD18-1234.pdf">Download</a>
    <span class="e-document-result-item__category">Antrag</span>
    <time>15.04.2026</time>
    <p>Urheber: SPD</p>
  </article>
</li>
"""


class TestNRWEmptyQueryMonitoringPath:
    """Regression: search("") must return ≥1 Drucksachen, not 0.

    Pre-fix: OPAL rejects empty dokNum and returns 0 hits. The adapter now
    substitutes the current year so OPAL returns recent documents.
    """

    def _make_mock_responses(self, html=_NRW_RESULT_HTML):
        """Returns two mock httpx.Response objects: initial GET + search POST."""
        initial = MagicMock()
        initial.status_code = 200
        initial.text = '<html><form id="docSearchByItem" action="/search"></form></html>'
        initial.cookies = {}

        search_resp = MagicMock()
        search_resp.status_code = 200
        search_resp.text = html

        return initial, search_resp

    def test_empty_query_uses_year_as_api_query(self):
        """_parse_query("") yields api_query="", but search() substitutes the year."""
        adapter = NRWAdapter()
        api_q, terms, is_exact = adapter._parse_query("")
        # The adapter substitutes year inside search(), not in _parse_query — so
        # _parse_query itself still returns "". The substitution is tested via
        # _matches_all_terms below.
        assert api_q == ""

    def test_matches_all_terms_with_empty_terms_is_true(self):
        """With filter_terms=[""], every document matches (wildcard semantics)."""
        adapter = NRWAdapter()
        doc = Drucksache(
            drucksache="18/1234", title="Klimaschutz", fraktionen=["SPD"],
            datum="2026-04-15", link="https://example.com/x.pdf",
            bundesland="NRW", typ="Antrag",
        )
        assert adapter._matches_all_terms(doc, [""], is_exact=False) is True

    def test_matches_all_terms_with_wildcard_star_is_true(self):
        """filter_terms=["*"] is treated as match-all."""
        adapter = NRWAdapter()
        doc = Drucksache(
            drucksache="18/1234", title="Klimaschutz", fraktionen=["SPD"],
            datum="2026-04-15", link="https://example.com/x.pdf",
            bundesland="NRW", typ="Antrag",
        )
        assert adapter._matches_all_terms(doc, ["*"], is_exact=False) is True

    def test_matches_all_terms_with_real_term_filters_correctly(self):
        """Normal search terms still filter as before."""
        adapter = NRWAdapter()
        doc = Drucksache(
            drucksache="18/1234", title="Klimaschutz", fraktionen=["SPD"],
            datum="2026-04-15", link="https://example.com/x.pdf",
            bundesland="NRW", typ="Antrag",
        )
        assert adapter._matches_all_terms(doc, ["klimaschutz"], is_exact=False) is True
        assert adapter._matches_all_terms(doc, ["haushalt"], is_exact=False) is False


# ─────────────────────────────────────────────────────────────────────────────
# Bug #135 — SL: timeout must propagate, not be swallowed as []
# ─────────────────────────────────────────────────────────────────────────────

class TestSaarlandTimeoutPropagates:
    """Regression: ReadTimeout in _post_search must not be caught and returned as [].

    Pre-fix: except-block returned [] silently, making monitoring report
    errors='ok' instead of surfacing the failure.
    Post-fix: the except-block re-raises so callers can see the error.
    """

    def test_post_search_propagates_timeout(self):
        """A ReadTimeout from httpx must propagate out of _post_search."""
        import httpx
        from app.parlamente import SaarlandAdapter

        adapter = SaarlandAdapter()

        async def _run():
            mock_client = AsyncMock()
            mock_client.post.side_effect = httpx.ReadTimeout("timeout", request=None)
            await adapter._post_search(mock_client, "Schule")

        with pytest.raises(httpx.ReadTimeout):
            asyncio.run(_run())

    def test_post_search_propagates_connection_error(self):
        """Generic network errors also propagate."""
        import httpx
        from app.parlamente import SaarlandAdapter

        adapter = SaarlandAdapter()

        async def _run():
            mock_client = AsyncMock()
            mock_client.post.side_effect = httpx.ConnectError("refused")
            await adapter._post_search(mock_client, "Schule")

        with pytest.raises(httpx.ConnectError):
            asyncio.run(_run())


# ─────────────────────────────────────────────────────────────────────────────
# Bug #135 — NI: excluded from monitoring scan (login-protected portal)
# ─────────────────────────────────────────────────────────────────────────────

class TestNIMonitoringSkip:
    """NI is in _MONITORING_SKIP because NILAS requires login (#22).

    Unauthenticated requests return login-page HTML that the JSON-comment
    parser misreads as ~50 junk records. Until a valid HAR-Capture is
    available, NI must be excluded from daily_scan().
    """

    def test_ni_in_monitoring_skip_set(self):
        """NI must appear in the _MONITORING_SKIP constant."""
        from app.monitoring import _MONITORING_SKIP
        assert "NI" in _MONITORING_SKIP

    def test_daily_scan_skips_ni(self):
        """daily_scan() must not call the NI adapter at all."""
        import asyncio
        from unittest.mock import AsyncMock, patch, MagicMock
        import sys, types

        # Stub heavy deps if not already present
        for mod in ("aiosqlite", "fitz"):
            if mod not in sys.modules:
                sys.modules[mod] = types.ModuleType(mod)

        from app.bundeslaender import Bundesland

        ni_adapter = MagicMock()
        ni_adapter.search = AsyncMock(return_value=[])

        fake_bls = [
            Bundesland(
                code="NI", name="NI", parlament_name="NI", wahlperiode=19,
                wahlperiode_start="2022-01-01", naechste_wahl=None,
                regierungsfraktionen=[], landtagsfraktionen=[],
                doku_system="Test", doku_base_url="http://example.com",
                drucksache_format="19/1234", dokukratie_scraper=None,
                aktiv=True,
            )
        ]

        import app.monitoring as mon_mod
        import app.database as db_mod
        import app.parlamente as parl_mod

        original_adapters = getattr(parl_mod, "ADAPTERS", {})
        parl_mod.ADAPTERS = {"NI": ni_adapter}
        try:
            with (
                patch("app.monitoring.aktive_bundeslaender", return_value=fake_bls),
                patch.object(db_mod, "upsert_monitoring_scan", new_callable=AsyncMock),
                patch.object(db_mod, "upsert_monitoring_summary", new_callable=AsyncMock),
            ):
                asyncio.run(mon_mod.daily_scan())
        finally:
            parl_mod.ADAPTERS = original_adapters

        ni_adapter.search.assert_not_called()


# ─────────────────────────────────────────────────────────────────────────────
# Fix #142 — SL: search() must propagate network errors (not swallow as [])
# ─────────────────────────────────────────────────────────────────────────────

class TestSaarlandSearchPropagatesErrors:
    """Regression: network errors in SaarlandAdapter.search() must not be
    caught at the search()-level — they must propagate so the monitoring
    layer records them as errors instead of seeing ``seen=0 errors=None``."""

    def test_search_propagates_read_timeout(self):
        """ReadTimeout from _post_search must propagate out of search()."""
        import httpx
        from app.parlamente import SaarlandAdapter

        adapter = SaarlandAdapter()

        async def _run():
            mock_client = AsyncMock()
            mock_client.__aenter__ = AsyncMock(return_value=mock_client)
            mock_client.__aexit__ = AsyncMock(return_value=False)
            mock_client.post.side_effect = httpx.ReadTimeout("timeout", request=None)
            with patch.object(adapter, "_make_client", return_value=mock_client):
                await adapter.search("Schule")

        with pytest.raises(httpx.ReadTimeout):
            asyncio.run(_run())

    def test_search_propagates_connect_error(self):
        """ConnectError from _post_search must propagate out of search()."""
        import httpx
        from app.parlamente import SaarlandAdapter

        adapter = SaarlandAdapter()

        async def _run():
            mock_client = AsyncMock()
            mock_client.__aenter__ = AsyncMock(return_value=mock_client)
            mock_client.__aexit__ = AsyncMock(return_value=False)
            mock_client.post.side_effect = httpx.ConnectError("refused")
            with patch.object(adapter, "_make_client", return_value=mock_client):
                await adapter.search("Schule")

        with pytest.raises(httpx.ConnectError):
            asyncio.run(_run())

    def test_search_propagates_http_500(self):
        """HTTP 5xx response must NOT be silently turned into empty results
        (regression #142): a 500 from the Umbraco backend used to log+return
        [], hiding it from the monitoring summary."""
        import httpx
        from app.parlamente import SaarlandAdapter

        adapter = SaarlandAdapter()

        async def _run():
            mock_client = AsyncMock()
            mock_client.__aenter__ = AsyncMock(return_value=mock_client)
            mock_client.__aexit__ = AsyncMock(return_value=False)
            mock_resp = MagicMock()
            mock_resp.status_code = 500
            mock_resp.text = "Server Error"
            mock_resp.request = MagicMock()
            mock_client.post = AsyncMock(return_value=mock_resp)
            with patch.object(adapter, "_make_client", return_value=mock_client):
                await adapter.search("Schule")

        with pytest.raises(httpx.HTTPStatusError):
            asyncio.run(_run())