diff --git a/app/bundeslaender.py b/app/bundeslaender.py
index 9150eab..d1b3261 100644
--- a/app/bundeslaender.py
+++ b/app/bundeslaender.py
@@ -364,10 +364,19 @@ BUNDESLAENDER: dict[str, Bundesland] = {
regierungsfraktionen=["CDU", "GRÜNE"],
landtagsfraktionen=["CDU", "GRÜNE", "SPD", "FDP", "SSW"],
doku_system="StarWeb",
- doku_base_url="https://www.landtag.ltsh.de",
+ doku_base_url="http://lissh.lvn.parlanet.de",
drucksache_format="20/1234",
dokukratie_scraper="sh",
- anmerkung="SSW ist von der 5%-Hürde befreit.",
+ aktiv=True,
+ anmerkung=(
+ "SSW ist von der 5%-Hürde befreit. Doku-System ist die "
+ "alte Starfinder-CGI auf lissh.lvn.parlanet.de — URL-"
+ "basiert via "
+ "/cgi-bin/starfinder/0?path=lisshfl.txt&search=WP=20+AND+dtyp=antrag, "
+ "Latin-1-encoding. NICHT die moderne StarWeb-Servlet-"
+ "Variante (BB/HE/NI/RP/HB) — eigene Klasse "
+ "StarFinderCGIAdapter."
+ ),
),
"TH": Bundesland(
code="TH",
diff --git a/app/parlamente.py b/app/parlamente.py
index 51a395d..b96a3bd 100644
--- a/app/parlamente.py
+++ b/app/parlamente.py
@@ -1269,6 +1269,215 @@ class ParLDokAdapter(ParlamentAdapter):
return None
+class StarFinderCGIAdapter(ParlamentAdapter):
+ """Adapter for old-school CGI Starfinder instances.
+
+ Currently used by Schleswig-Holstein on
+ ``lissh.lvn.parlanet.de/cgi-bin/starfinder/0`` — the **oldest** of the
+ parliament backends we touch. Predates StarWeb's HTML form-submit
+ machinery: instead of submitting a stateful AdvancedSearch form
+ (which BB/HE/NI/RP/HB do), Starfinder accepts the entire query as
+ URL parameters and returns plain HTML with a flat ``
`` table of
+ records.
+
+ Reverse-engineering quelle: ``dokukratie/sh.yml`` plus a probe
+ against the live endpoint. Format details:
+
+ - URL template: ``{base}/cgi-bin/starfinder/0?path={db_path}&id=FASTLINK
+ &pass=&search={starfinder_query}&format=WEBKURZFL``
+ - Query syntax: ``WP=20+AND+dtyp=antrag`` (URL-encoded). The
+ ``dtyp`` codes are lowercase short labels (``antrag``, ``kleine``).
+ - Encoding: ``iso-8859-1`` (Latin-1) — NOT UTF-8. The HTTP response
+ doesn't always declare it via Content-Type, so we explicitly
+ decode with ``latin1`` to avoid mojibake on the German umlauts.
+ - Hit-format: each record is one ``
``
+ with the title in ````, then ``Antrag
+ Drucksache XX/YYYY``.
+ """
+
+ _RE_RECORD = re.compile(
+ r'.*?
',
+ re.DOTALL,
+ )
+ _RE_TITLE = re.compile(r"(.*?)", re.DOTALL)
+ _RE_DRUCKSACHE_LINK = re.compile(
+ r']*>(\d+/\d+)'
+ )
+ # The line between title and the -link looks like:
+ # "Antrag Christian Dirschauer (SSW) 07.04.2026 Drucksache "
+ # We pull the originator(s) and the date out of it.
+ _RE_URHEBER_DATUM = re.compile(
+ r"\s*
\s*[A-Za-zÄÖÜäöüß]+\s+(.+?)\s+(\d{1,2}\.\d{1,2}\.\d{4})\s+Drucksache",
+ re.DOTALL,
+ )
+
+ def __init__(
+ self,
+ *,
+ bundesland: str,
+ name: str,
+ base_url: str,
+ wahlperiode: int,
+ db_path: str = "lisshfl.txt",
+ document_typ_code: str = "antrag",
+ ) -> None:
+ self.bundesland = bundesland
+ self.name = name
+ self.base_url = base_url.rstrip("/")
+ self.wahlperiode = wahlperiode
+ self.db_path = db_path
+ self.document_typ_code = document_typ_code
+
+ @staticmethod
+ def _datum_de_to_iso(datum_de: str) -> str:
+ if not datum_de:
+ return ""
+ try:
+ d, m, y = datum_de.split(".")
+ return f"{y}-{m.zfill(2)}-{d.zfill(2)}"
+ except ValueError:
+ return ""
+
+ @staticmethod
+ def _normalize_fraktion(text: str) -> list[str]:
+ """SH format: 'Christian Dirschauer (SSW), Jette Waldinger-Thiering (SSW)'.
+
+ Includes SSW which is unique to SH (befreit von 5%-Hürde).
+ """
+ if not text:
+ return []
+ u = text.upper()
+ out: list[str] = []
+ if re.search(r"\bBÜNDNIS\s*90\b", u) or re.search(r"\bGR(?:Ü|UE)NE\b", u):
+ out.append("GRÜNE")
+ if re.search(r"\bCDU\b", u):
+ out.append("CDU")
+ if re.search(r"\bSPD\b", u):
+ out.append("SPD")
+ if re.search(r"\bF\.?\s*D\.?\s*P\.?\b", u):
+ out.append("FDP")
+ if re.search(r"\bAFD\b", u):
+ out.append("AfD")
+ if re.search(r"\bLINKE\b", u):
+ out.append("LINKE")
+ if re.search(r"\bSSW\b", u):
+ out.append("SSW")
+ if re.search(r"LANDESREGIERUNG|\bMINISTER|STAATSKANZLEI|MINISTERPRÄSIDENT", u):
+ out.append("Landesregierung")
+ return out
+
+ def _build_url(self) -> str:
+ """Build the Starfinder URL for the structural WP+dtyp browse.
+
+ Free-text filtering is done client-side on the parsed records
+ (consistent with #18 — alle Adapter machen einheitlich Title-
+ Filter ohne Server-Volltext, weil das Verhalten zwischen
+ Adaptern sonst asymmetrisch wird).
+ """
+ search_param = f"WP={self.wahlperiode}+AND+dtyp={self.document_typ_code}"
+ return (
+ f"{self.base_url}/cgi-bin/starfinder/0"
+ f"?path={self.db_path}&id=FASTLINK&pass=&search={search_param}"
+ f"&format=WEBKURZFL"
+ )
+
+ def _parse_records(self, html: str) -> list[Drucksache]:
+ results: list[Drucksache] = []
+ for record_html in self._RE_RECORD.findall(html):
+ m_link = self._RE_DRUCKSACHE_LINK.search(record_html)
+ if not m_link:
+ continue
+ pdf_url, drucksache = m_link.group(1), m_link.group(2)
+
+ m_title = self._RE_TITLE.search(record_html)
+ title = re.sub(r"\s+", " ", m_title.group(1)).strip() if m_title else f"Drucksache {drucksache}"
+
+ urheber = ""
+ datum_iso = ""
+ m_meta = self._RE_URHEBER_DATUM.search(record_html)
+ if m_meta:
+ urheber = m_meta.group(1).strip()
+ datum_iso = self._datum_de_to_iso(m_meta.group(2))
+
+ results.append(Drucksache(
+ drucksache=drucksache,
+ title=title,
+ fraktionen=self._normalize_fraktion(urheber),
+ datum=datum_iso,
+ link=pdf_url,
+ bundesland=self.bundesland,
+ typ="Antrag",
+ ))
+ return results
+
+ async def search(self, query: str, limit: int = 20) -> list[Drucksache]:
+ url = self._build_url()
+ async with httpx.AsyncClient(
+ timeout=60,
+ follow_redirects=True,
+ headers={"User-Agent": "Mozilla/5.0 GWOE-Antragspruefer"},
+ ) as client:
+ try:
+ resp = await client.get(url)
+ if resp.status_code != 200:
+ logger.error("%s search HTTP %s", self.bundesland, resp.status_code)
+ return []
+ # Force latin1 because the Starfinder server doesn't always
+ # advertise the encoding correctly.
+ html = resp.content.decode("latin-1", errors="replace")
+ results = self._parse_records(html)
+ except Exception:
+ logger.exception("%s search error", self.bundesland)
+ return []
+
+ # Client-side title + Urheber filter (siehe #18)
+ if query:
+ terms = [t.lower() for t in query.split() if t]
+ results = [
+ d for d in results
+ if all(t in f"{d.title} {' '.join(d.fraktionen)}".lower() for t in terms)
+ ]
+ return results[:limit]
+
+ async def get_document(self, drucksache: str) -> Optional[Drucksache]:
+ """Look up a single Drucksache by ID.
+
+ SH responses are pre-sorted newest-first; we re-fetch up to 200
+ records and scan for the exact match. The Starfinder server
+ doesn't expose a number-only filter that we know of.
+ """
+ results = await self.search(query="", limit=200)
+ for doc in results:
+ if doc.drucksache == drucksache:
+ return doc
+ return None
+
+ async def download_text(self, drucksache: str) -> Optional[str]:
+ import fitz # PyMuPDF
+
+ doc = await self.get_document(drucksache)
+ if not doc or not doc.link:
+ return None
+ async with httpx.AsyncClient(
+ timeout=60,
+ follow_redirects=True,
+ headers={"User-Agent": "Mozilla/5.0 GWOE-Antragspruefer"},
+ ) as client:
+ try:
+ resp = await client.get(doc.link)
+ if resp.status_code != 200:
+ return None
+ pdf = fitz.open(stream=resp.content, filetype="pdf")
+ text = ""
+ for page in pdf:
+ text += page.get_text()
+ pdf.close()
+ return text
+ except Exception:
+ logger.exception("%s PDF download error for %s", self.bundesland, drucksache)
+ return None
+
+
class BayernAdapter(ParlamentAdapter):
"""Adapter for Bayerischer Landtag."""
@@ -1754,6 +1963,14 @@ ADAPTERS = {
document_typ_substring=True,
kinds=["Drucksache", "Vorlage"],
),
+ "SH": StarFinderCGIAdapter(
+ bundesland="SH",
+ name="Schleswig-Holsteinischer Landtag (LIS-SH)",
+ base_url="http://lissh.lvn.parlanet.de",
+ wahlperiode=20,
+ db_path="lisshfl.txt",
+ document_typ_code="antrag",
+ ),
"BY": BayernAdapter(),
"BW": PARLISAdapter(
bundesland="BW",