Activate Thüringen via ParLDokAdapter reuse + filter widening (#25, Phase 1)
Thüringen läuft auf parldok.thueringer-landtag.de mit ParlDok 8.3.5
(J3S GmbH) — exakt dieselbe Version wie MV. Aber TH packt seine
Anträge unter zusammengesetzten type-Strings ("Antrag gemäß § 79 GO",
"Antrag gemäß § 74 (2) GO") und kind="Vorlage" statt der MV-Variante
kind="Drucksache"/type="Antrag". Strict-Match auf "Antrag" hat 0
Treffer geliefert.
Lösung: ParLDokAdapter um zwei Konstruktor-Parameter erweitert:
- document_typ_substring=True → Substring-Match auf type-Feld
("Antrag" matched "Antrag gemäß § 79 GO", "Alternativantrag" usw.)
- kinds=["Drucksache", "Vorlage"] → erweiterte kind-Liste
Defaults sind backward-kompatibel (Substring-Match aus, kinds nur
Drucksache), sodass MV und HH unverändert weiterlaufen.
_hit_matches_filters() als zentraler Filter-Helper extrahiert,
search() und get_document() nutzen ihn — get_document() überspringt
ihn allerdings, weil dort beliebige Drucksachen aufrufbar sein müssen,
unabhängig vom search-Time-Filter.
Hostname-Korrektur: parldok.thueringen.de redirected per 303 auf
parldok.thueringer-landtag.de. doku_base_url in bundeslaender.py
auf den neuen Host umgestellt.
Smoke-Test (lokal):
TH q="": 8 hits in 3.3s
TH q="Schule": 2 hits in 25.7s (Lernmittelbeschaffung, Modernisierung
Bund-Länder-Vereinbarung — beide Schul-bezogen)
TH q="Klima": 0 hits (keine in den letzten 1000 Drucksachen)
Damit ist Phase 1 (3/3) komplett. Nächstes Phase-2 Issue: #27 BB als
StarWebAdapter-Template.
Phase 1 (3/3) aus Roadmap-Issue #49.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
916c5d84d7
commit
dc0bb07c12
@ -379,18 +379,19 @@ BUNDESLAENDER: dict[str, Bundesland] = {
|
|||||||
regierungsfraktionen=["CDU", "BSW", "SPD"],
|
regierungsfraktionen=["CDU", "BSW", "SPD"],
|
||||||
landtagsfraktionen=["AfD", "CDU", "LINKE", "BSW", "SPD"],
|
landtagsfraktionen=["AfD", "CDU", "LINKE", "BSW", "SPD"],
|
||||||
doku_system="ParlDok",
|
doku_system="ParlDok",
|
||||||
doku_base_url="https://parldok.thueringen.de",
|
doku_base_url="https://parldok.thueringer-landtag.de",
|
||||||
drucksache_format="8/1234",
|
drucksache_format="8/1234",
|
||||||
dokukratie_scraper="th",
|
dokukratie_scraper="th",
|
||||||
|
aktiv=True,
|
||||||
anmerkung=(
|
anmerkung=(
|
||||||
"Erste Brombeer-Koalition Deutschlands (CDU+BSW+SPD) als "
|
"Erste Brombeer-Koalition Deutschlands (CDU+BSW+SPD) als "
|
||||||
"Minderheitsregierung mit 44 von 88 Sitzen. Mario Voigt (CDU) "
|
"Minderheitsregierung mit 44 von 88 Sitzen. Mario Voigt "
|
||||||
"seit Dezember 2024 MP. Doku-System ist ParlDok (siehe "
|
"(CDU) seit Dezember 2024 MP. ParlDok 8.3.5 (J3S GmbH) — "
|
||||||
"dokukratie/th.yml mit DokumententypId/LegislaturperiodenNummer-"
|
"EXAKT dieselbe Version wie MV. ParLDokAdapter direkt "
|
||||||
"Form-Feldern), nicht StarWeb wie ursprünglich klassifiziert. "
|
"wiederverwendbar als Registry-Eintrag (#25). Achtung: "
|
||||||
"Vor Implementierung mit `curl parldok.thueringen.de/parldok/` "
|
"alter Hostname parldok.thueringen.de redirected per 303 "
|
||||||
"verifizieren ob das Live-System ParlDok 8.x SPA wie MV ist — "
|
"auf parldok.thueringer-landtag.de — neuer Hostname ist "
|
||||||
"dann ist der ParLDokAdapter direkt wiederverwendbar."
|
"der korrekte."
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|||||||
@ -889,6 +889,8 @@ class ParLDokAdapter(ParlamentAdapter):
|
|||||||
wahlperiode: int,
|
wahlperiode: int,
|
||||||
prefix: str = "/parldok",
|
prefix: str = "/parldok",
|
||||||
document_typ: str = "Antrag",
|
document_typ: str = "Antrag",
|
||||||
|
document_typ_substring: bool = False,
|
||||||
|
kinds: Optional[list[str]] = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Configure a ParlDok 8.x adapter for one specific parliament.
|
"""Configure a ParlDok 8.x adapter for one specific parliament.
|
||||||
|
|
||||||
@ -902,6 +904,16 @@ class ParLDokAdapter(ParlamentAdapter):
|
|||||||
document_typ: client-side filter on the ``type`` field of
|
document_typ: client-side filter on the ``type`` field of
|
||||||
each hit ("Antrag", "Gesetzentwurf", …). Set to empty
|
each hit ("Antrag", "Gesetzentwurf", …). Set to empty
|
||||||
string to disable type filtering.
|
string to disable type filtering.
|
||||||
|
document_typ_substring: if True, ``document_typ`` is matched
|
||||||
|
as a substring against the hit's ``type`` field instead
|
||||||
|
of an exact match. Needed for instances where the
|
||||||
|
Drucksachen-Anträge live under composite type strings
|
||||||
|
like ``"Antrag gemäß § 79 GO"`` (Thüringen) — strict
|
||||||
|
``"Antrag"`` would never match.
|
||||||
|
kinds: optional list of acceptable ``kind`` values. Defaults
|
||||||
|
to ``["Drucksache"]`` if None — but TH packs its Anträge
|
||||||
|
under ``kind="Vorlage"`` so the parameter has to be
|
||||||
|
widened there.
|
||||||
"""
|
"""
|
||||||
self.bundesland = bundesland
|
self.bundesland = bundesland
|
||||||
self.name = name
|
self.name = name
|
||||||
@ -909,6 +921,27 @@ class ParLDokAdapter(ParlamentAdapter):
|
|||||||
self.prefix = "/" + prefix.strip("/")
|
self.prefix = "/" + prefix.strip("/")
|
||||||
self.wahlperiode = wahlperiode
|
self.wahlperiode = wahlperiode
|
||||||
self.document_typ = document_typ
|
self.document_typ = document_typ
|
||||||
|
self.document_typ_substring = document_typ_substring
|
||||||
|
self.kinds = kinds if kinds is not None else ["Drucksache"]
|
||||||
|
|
||||||
|
def _hit_matches_filters(self, hit: dict) -> bool:
|
||||||
|
"""Apply the kind/typ filters to a raw hit dict.
|
||||||
|
|
||||||
|
Centralised so the search loop can short-circuit cleanly. ``hit``
|
||||||
|
comes from ``Fulltext/Search`` or ``Fulltext/Resultpage`` JSON
|
||||||
|
responses; both share the same record schema.
|
||||||
|
"""
|
||||||
|
if self.kinds and hit.get("kind") not in self.kinds:
|
||||||
|
return False
|
||||||
|
hit_type = (hit.get("type") or "").strip()
|
||||||
|
if self.document_typ:
|
||||||
|
if self.document_typ_substring:
|
||||||
|
if self.document_typ not in hit_type:
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
if hit_type != self.document_typ:
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _datum_de_to_iso(datum_de: str) -> str:
|
def _datum_de_to_iso(datum_de: str) -> str:
|
||||||
@ -1159,9 +1192,7 @@ class ParLDokAdapter(ParlamentAdapter):
|
|||||||
async with self._make_client() as client:
|
async with self._make_client() as client:
|
||||||
await client.get(f"{self.base_url}{self.prefix}/")
|
await client.get(f"{self.base_url}{self.prefix}/")
|
||||||
async for hit in self._paginated_hits(client):
|
async for hit in self._paginated_hits(client):
|
||||||
if hit.get("kind") != "Drucksache":
|
if not self._hit_matches_filters(hit):
|
||||||
continue
|
|
||||||
if self.document_typ and hit.get("type") != self.document_typ:
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
doc = self._hit_to_drucksache(hit)
|
doc = self._hit_to_drucksache(hit)
|
||||||
@ -1199,8 +1230,9 @@ class ParLDokAdapter(ParlamentAdapter):
|
|||||||
async with self._make_client() as client:
|
async with self._make_client() as client:
|
||||||
await client.get(f"{self.base_url}{self.prefix}/")
|
await client.get(f"{self.base_url}{self.prefix}/")
|
||||||
async for hit in self._paginated_hits(client):
|
async for hit in self._paginated_hits(client):
|
||||||
if hit.get("kind") != "Drucksache":
|
# Don't apply doc-type filters here — get_document is
|
||||||
continue
|
# used to look up arbitrary Drucksachen, including ones
|
||||||
|
# whose kind/typ doesn't match the search-time filter.
|
||||||
if str(hit.get("lp")) == wanted_lp and str(hit.get("number")) == wanted_num:
|
if str(hit.get("lp")) == wanted_lp and str(hit.get("number")) == wanted_num:
|
||||||
return self._hit_to_drucksache(hit)
|
return self._hit_to_drucksache(hit)
|
||||||
return None
|
return None
|
||||||
@ -1708,6 +1740,20 @@ ADAPTERS = {
|
|||||||
prefix="/parldok",
|
prefix="/parldok",
|
||||||
document_typ="Antrag",
|
document_typ="Antrag",
|
||||||
),
|
),
|
||||||
|
"TH": ParLDokAdapter(
|
||||||
|
bundesland="TH",
|
||||||
|
name="Thüringer Landtag (ParlDok)",
|
||||||
|
base_url="https://parldok.thueringer-landtag.de",
|
||||||
|
wahlperiode=8,
|
||||||
|
prefix="/parldok",
|
||||||
|
# TH packs Anträge under composite type strings like
|
||||||
|
# "Antrag gemäß § 79 GO" with kind="Vorlage", not the
|
||||||
|
# MV-style kind="Drucksache"/type="Antrag". Substring-match
|
||||||
|
# on "Antrag" plus widened kind list catches them all.
|
||||||
|
document_typ="Antrag",
|
||||||
|
document_typ_substring=True,
|
||||||
|
kinds=["Drucksache", "Vorlage"],
|
||||||
|
),
|
||||||
"BY": BayernAdapter(),
|
"BY": BayernAdapter(),
|
||||||
"BW": PARLISAdapter(
|
"BW": PARLISAdapter(
|
||||||
bundesland="BW",
|
bundesland="BW",
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user