From b5ae8894d46d459d829a71de747a56f62134d503 Mon Sep 17 00:00:00 2001
From: Dotty Dotter <dotty@Mac-mini-von-Dotty.local>
Date: Wed, 8 Apr 2026 19:01:00 +0200
Subject: [PATCH] =?UTF-8?q?ParLDokAdapter:=20Volltext=20(#12)=20deaktivier?=
 =?UTF-8?q?en=20=E2=80=94=20einheitlich=20Title-Filter=20(#18)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Server-side facet_fulltext-Suche aus #12 war asymmetrisch zu BE/LSA
(beide nur Title-Filter über 730d-Window aus #13). User-Entscheidung
2026-04-08: einheitliches Verhalten ist wichtiger als das beste
Verhalten in 2 von 4 Adaptern.

Konkrete Änderungen:

- _build_search_body() schickt query nicht mehr server-side. Der
  query-Parameter bleibt in der Signatur als unused-mit-del, weil
  die Wieder-Aktivierung später ein Drop-in sein soll wenn die
  PortalaAdapter-Variante reverse-engineered wurde.

- _initial_search() und _paginated_hits() ohne query-Parameter.

- search() macht clientseitigen Title+Urheber-Filter wie der
  PortalaAdapter — same Codepfad, einheitliches Verhalten.

- get_document() nutzt die unveränderte Pagination.

- FACET_FULLTEXT-Konstante und _fulltext_id-Helper bleiben im Code
  als Dokumentation für die spätere Re-Aktivierung. Im Docstring
  ist die Tag-Form festgehalten.

Folgen:

- MV "Schule" ist von 20 (mit Volltext) auf 3 zurück (Title-Filter
  über die letzten 1000 Drucksachen). Gleiches Niveau wie BE/LSA
  pre-#13.

- Browse-Mode (no query) ist unverändert: ~10 hits in ~25s, MAX_PAGES=10.

- Wenn das später nicht reicht: #16 (UI-Split DB vs. Landtag) und
  ein optionaler "echter Volltext"-Toggle (#17 closed-as-deferred)
  bleiben als Folge-Optionen.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 app/parlamente.py | 92 ++++++++++++++++++++---------------------------
 1 file changed, 39 insertions(+), 53 deletions(-)
diff --git a/app/parlamente.py b/app/parlamente.py
index 82c68ae..db35487 100644
--- a/app/parlamente.py
+++ b/app/parlamente.py
@@ -961,35 +961,27 @@ class ParLDokAdapter(ParlamentAdapter):
     def _build_search_body(self, *, length: int = 100, query: str = "") -> dict:
         """Build the JSON payload for the initial ``Fulltext/Search`` call.
 
-        Filters by Wahlperiode + optional server-side full-text search.
-        Type/kind filtering still happens client-side because the
-        facet_type/facet_kind value IDs are instance-specific and would
-        require an extra ``Fulltext/Filter`` round trip to discover.
+        Filters by Wahlperiode only — type/kind/fulltext filtering all
+        happen client-side after the hit list is paginated. The
+        ``query`` parameter is accepted for API compatibility but is
+        currently NOT forwarded to the server (#18: einheitliche
+        client-side Title-Suche, kein Server-Volltext, weil das
+        Verhalten zwischen Adaptern sonst asymmetrisch wird). The
+        ``FACET_FULLTEXT`` constant and :meth:`_fulltext_id` helper
+        are kept around as documentation for the previous #12
+        server-side variant — when fulltext gets uniformly
+        re-introduced later, the dormant tag is just::
+
+            {"type": self.FACET_FULLTEXT,
+             "id": self._fulltext_id(query),
+             "fulltext": query, "label": query, "field": "Alle"}
 
         Pagination beyond the first page goes through
         ``Fulltext/Resultpage`` — the ``Search`` endpoint itself
         ignores any non-zero ``Start``.
-
-        The full-text tag schema is reverse-engineered from
-        ``pd.addInput`` in ``bundle.js`` and matches the SPA payload
-        verbatim::
-
-            {"type": 0, "id": "<sanitized>", "fulltext": "<raw>",
-             "label": "<raw>", "field": "Alle"}
-
-        ``field="Alle"`` means "search all indexed fields"
-        (``pd.currentFTSearchMode`` default). The server tokenizes
-        the term and applies AND-semantics across whitespace.
         """
+        del query  # explicitly unused — see docstring
         tags: list[dict] = [{"type": self.FACET_LP, "id": self.wahlperiode}]
-        if query:
-            tags.append({
-                "type": self.FACET_FULLTEXT,
-                "id": self._fulltext_id(query),
-                "fulltext": query,
-                "label": query,
-                "field": "Alle",
-            })
         return {
             "devicekey": "",
             "max": length,
@@ -1066,18 +1058,15 @@ class ParLDokAdapter(ParlamentAdapter):
             return None
 
     async def _initial_search(
-        self, client: httpx.AsyncClient, *, length: int, query: str = "",
+        self, client: httpx.AsyncClient, *, length: int,
     ) -> tuple[Optional[int], list[dict]]:
         """Run the initial ``Fulltext/Search`` and return ``(queryid, docs)``.
 
         The ``queryid`` is needed for subsequent ``Fulltext/Resultpage``
         calls. ParlDok ignores any non-zero ``Start`` on this endpoint —
         the first 100 hits are the only ones reachable via ``Search``.
-
-        ``query`` is sent server-side as a ``facet_fulltext`` tag — see
-        ``_build_search_body``.
         """
-        body = self._build_search_body(length=length, query=query)
+        body = self._build_search_body(length=length)
         inner = await self._post_json(client, "Fulltext/Search", body)
         if not inner:
             return None, []
@@ -1104,25 +1093,16 @@ class ParLDokAdapter(ParlamentAdapter):
             headers={"User-Agent": "Mozilla/5.0 GWOE-Antragspruefer"},
         )
 
-    async def _paginated_hits(
-        self, client: httpx.AsyncClient, *, query: str = "",
-    ):
-        """Async iterator over Drucksachen-style hits across all pages.
+    async def _paginated_hits(self, client: httpx.AsyncClient):
+        """Async iterator over Drucksachen-style hits across pages.
 
         Yields raw hit dicts in newest-first order. The first batch comes
         from ``Fulltext/Search``, subsequent batches from
         ``Fulltext/Resultpage`` using the queryid the server returned for
         the initial call. Stops when a page comes back empty, undersized,
-        or after ``MAX_PAGES`` iterations.
-
-        ``query`` is forwarded as a server-side full-text filter to
-        ``_initial_search``; the resulting ``queryid`` is bound to that
-        filter, so subsequent ``Resultpage`` calls automatically inherit
-        it without needing to repeat the tag.
+        or after :attr:`MAX_PAGES` iterations.
         """
-        queryid, hits = await self._initial_search(
-            client, length=self.PAGE_SIZE, query=query,
-        )
+        queryid, hits = await self._initial_search(client, length=self.PAGE_SIZE)
         for hit in hits:
             yield hit
         if not queryid or len(hits) < self.PAGE_SIZE:
@@ -1148,29 +1128,30 @@ class ParLDokAdapter(ParlamentAdapter):
     # Anträge are ~3% of all hits in MV, so 1000 raw → ~30 Anträge, more
     # than enough for the typical UI request (limit 5..20). Filtered
     # queries that find nothing in the last 1000 docs return empty
-    # rather than scan the entire WP.
+    # rather than scan the entire WP — same trade-off as the BE/LSA
+    # PortalaAdapter quick-win window.
     MAX_PAGES = 10
 
     async def search(self, query: str, limit: int = 20) -> list[Drucksache]:
-        """Search recent documents of the configured Wahlperiode.
+        """Search the configured Wahlperiode, sorted newest-first.
 
-        Server-side full-text search via the ``facet_fulltext`` tag (#12)
-        when ``query`` is non-empty; otherwise pure browse mode. The
-        server returns the WP sorted newest-first across all document
-        kinds, the client keeps only ``Antrag``-typed Drucksachen and
-        dedupes by lp/number (ParlDok returns the same Drucksache
-        multiple times when it appears in several Vorgänge/Beratungen).
+        #18: einheitliches Verhalten — Server filtert nur nach WP, der
+        Client paginiert über die ganze WP und filtert lokal nach
+        Treffern in Titel oder Urheber. Volltext-Filter (#12) ist
+        zurückgebaut, weil das Verhalten zwischen Adaptern sonst
+        asymmetrisch wird. Sortierung kommt vom Server (newest-first
+        durch ``sort=2`` in :meth:`_build_search_body`).
 
-        Pagination: ParlDok caps each response at 100 rows; further
-        pages come from ``Fulltext/Resultpage`` bound to the
-        server-assigned ``queryid``.
+        Dedupe per ``lp/number`` weil ParlDok dieselbe Drucksache
+        mehrfach in verschiedenen Vorgängen/Beratungen liefert.
         """
         results: list[Drucksache] = []
         seen: set[str] = set()
+        query_terms = [t.lower() for t in query.split() if t] if query else []
 
         async with self._make_client() as client:
             await client.get(f"{self.base_url}{self.prefix}/")
-            async for hit in self._paginated_hits(client, query=query):
+            async for hit in self._paginated_hits(client):
                 if hit.get("kind") != "Drucksache":
                     continue
                 if self.document_typ and hit.get("type") != self.document_typ:
@@ -1183,6 +1164,11 @@ class ParLDokAdapter(ParlamentAdapter):
                     continue
                 seen.add(doc.drucksache)
 
+                if query_terms:
+                    hay = f"{doc.title} {hit.get('authorhtml', '')}".lower()
+                    if not all(t in hay for t in query_terms):
+                        continue
+
                 results.append(doc)
                 if len(results) >= limit:
                     return results