gwoe-antragspruefer/app/parlamente.py

"""Parliament search adapters for different German states."""

import httpx
import re
from abc import ABC, abstractmethod
from dataclasses import dataclass
from typing import Optional
from bs4 import BeautifulSoup


@dataclass
class Drucksache:
    """A parliamentary document."""
    drucksache: str  # e.g. "18/8125"
    title: str
    fraktionen: list[str]
    datum: str  # ISO date
    link: str  # PDF URL
    bundesland: str
    typ: str = "Antrag"  # Antrag, Anfrage, Beschlussempfehlung, etc.


class ParlamentAdapter(ABC):
    """Base adapter for searching parliament documents."""
    
    bundesland: str
    name: str
    
    @abstractmethod
    async def search(self, query: str, limit: int = 20) -> list[Drucksache]:
        """Search for documents matching query."""
        pass
    
    @abstractmethod
    async def get_document(self, drucksache: str) -> Optional[Drucksache]:
        """Get a specific document by ID."""
        pass
    
    @abstractmethod
    async def download_text(self, drucksache: str) -> Optional[str]:
        """Download and extract text from a document."""
        pass


class NRWAdapter(ParlamentAdapter):
    """Adapter for NRW Landtag (opal.landtag.nrw.de)."""
    
    bundesland = "NRW"
    name = "Landtag Nordrhein-Westfalen"
    base_url = "https://opal.landtag.nrw.de"
    search_url = "https://opal.landtag.nrw.de/home/dokumente/dokumentensuche/parlamentsdokumente/aktuelle-dokumente.html"
    
    def _parse_query(self, query: str) -> tuple[str, list[str], bool]:
        """
        Parse search query for AND logic and exact phrases.
        Returns: (search_term_for_api, filter_terms, is_exact)
        
        Examples:
        - 'Klimaschutz Energie' -> ('Klimaschutz', ['klimaschutz', 'energie'], False)
        - '"Grüner Stahl"' -> ('Grüner Stahl', ['grüner stahl'], True)
        - 'Klimaschutz "erneuerbare Energie"' -> ('Klimaschutz', ['klimaschutz', 'erneuerbare energie'], False)
        """
        query = query.strip()
        
        # Check for exact phrase (entire query in quotes)
        if query.startswith('"') and query.endswith('"') and query.count('"') == 2:
            exact = query[1:-1].strip()
            return (exact, [exact.lower()], True)
        
        # Extract quoted phrases and regular terms
        import shlex
        try:
            parts = shlex.split(query)
        except ValueError:
            # Fallback for unbalanced quotes
            parts = query.split()
        
        if not parts:
            return (query, [query.lower()], False)
        
        # Use first term for API search, all terms for filtering
        filter_terms = [p.lower() for p in parts]
        return (parts[0], filter_terms, False)
    
    def _matches_all_terms(self, doc: 'Drucksache', terms: list[str], is_exact: bool) -> bool:
        """Check if document matches all search terms (AND logic)."""
        searchable = f"{doc.title} {doc.drucksache} {' '.join(doc.fraktionen)} {doc.typ}".lower()
        
        if is_exact:
            # Exact phrase must appear
            return terms[0] in searchable
        else:
            # All terms must appear (AND)
            return all(term in searchable for term in terms)
    
    async def search(self, query: str, limit: int = 20) -> list[Drucksache]:
        """Search NRW Landtag documents via OPAL portal."""
        results = []
        
        # Parse query for AND logic
        api_query, filter_terms, is_exact = self._parse_query(query)
        
        async with httpx.AsyncClient(timeout=30, follow_redirects=True) as client:
            try:
                # First, get the page to establish session
                initial = await client.get(self.search_url)
                if initial.status_code != 200:
                    print(f"NRW search initial request failed: {initial.status_code}")
                    return []
                
                # Parse for webflow token from pagination links
                soup = BeautifulSoup(initial.text, 'html.parser')
                
                # Find a pagination link to extract the webflow token
                pagination_link = soup.select_one('a[href*="webflowexecution"]')
                webflow_token = ""
                webflow_execution = ""
                
                if pagination_link:
                    href = pagination_link.get('href', '')
                    # Extract webflowToken and webflowexecution from URL
                    token_match = re.search(r'webflowToken=([^&]*)', href)
                    exec_match = re.search(r'(webflowexecution[^=]+)=([^&]+)', href)
                    if token_match:
                        webflow_token = token_match.group(1)
                    if exec_match:
                        webflow_execution = f"{exec_match.group(1)}={exec_match.group(2)}"
                
                # Now perform the search with POST
                # Find the form action URL with webflow token
                form = soup.select_one('form#docSearchByItem')
                form_action = self.search_url
                if form and form.get('action'):
                    action = form.get('action')
                    if action.startswith('/'):
                        form_action = f"{self.base_url}{action}"
                    elif action.startswith('http'):
                        form_action = action
                    else:
                        form_action = f"{self.search_url}?{action}"
                
                # Build form data for "Einfache Suche" (searchByItem form)
                form_data = {
                    '_eventId_sendform': '1',
                    'dokNum': api_query,  # This is the text search field
                    'formId': 'searchByItem',
                    'dokTyp': '',  # All types
                    'wp': '18',  # Wahlperiode 18
                }
                
                # POST request with form data to the form action URL
                search_resp = await client.post(
                    form_action,
                    data=form_data,
                    cookies=initial.cookies,
                    headers={'Content-Type': 'application/x-www-form-urlencoded'}
                )
                
                if search_resp.status_code != 200:
                    print(f"NRW search request failed: {search_resp.status_code}")
                    return []
                
                # Parse results
                soup = BeautifulSoup(search_resp.text, 'html.parser')
                
                # Find all document result items (li elements containing articles)
                items = soup.select('li:has(article)')
                
                for item in items[:limit]:
                    try:
                        # Extract drucksache number from first link
                        num_link = item.select_one('a[href*="MMD"]')
                        if not num_link:
                            continue
                        
                        href = num_link.get('href', '')
                        # Extract number: MMD18-12345.pdf -> 18/12345
                        match = re.search(r'MMD(\d+)-(\d+)\.pdf', href)
                        if not match:
                            continue
                        
                        legislatur, nummer = match.groups()
                        drucksache = f"{legislatur}/{nummer}"
                        pdf_url = f"https://www.landtag.nrw.de{href}" if href.startswith('/') else href
                        
                        # Extract title from the title link (class e-document-result-item__title)
                        title_elem = item.select_one('a.e-document-result-item__title')
                        if title_elem:
                            # Get text content, clean it up
                            title = title_elem.get_text(strip=True)
                            # Remove SVG icon text and clean
                            title = re.sub(r'\s*<svg.*', '', title)
                            title = re.sub(r'\s+', ' ', title).strip()
                        else:
                            # Fallback: try to find any longer text
                            title = f"Drucksache {drucksache}"
                        
                        # Clean up common artifacts
                        title = re.sub(r'\s*\(\s*externer Link.*?\)', '', title).strip()
                        
                        # Extract type (Antrag, Kleine Anfrage, etc.)
                        typ_elem = item.select_one('.e-document-result-item__category')
                        typ = typ_elem.get_text(strip=True) if typ_elem else "Drucksache"
                        
                        # Extract date
                        time_elem = item.select_one('time')
                        datum = ""
                        if time_elem:
                            datum_text = time_elem.get_text(strip=True)
                            # Convert DD.MM.YYYY to YYYY-MM-DD
                            date_match = re.match(r'(\d{2})\.(\d{2})\.(\d{4})', datum_text)
                            if date_match:
                                d, m, y = date_match.groups()
                                datum = f"{y}-{m}-{d}"
                        
                        # Extract Urheber (fraktionen) - look for paragraph containing "Urheber:"
                        urheber_text = ""
                        for p in item.select('p'):
                            if 'Urheber:' in p.get_text():
                                urheber_text = p.get_text()
                                break
                        
                        fraktionen = []
                        if urheber_text:
                            # Extract party names (SPD, CDU, GRÜNE, FDP, AfD)
                            for party in ['SPD', 'CDU', 'GRÜNE', 'Grüne', 'FDP', 'AfD']:
                                if party in urheber_text:
                                    fraktionen.append(party.upper() if party.lower() != 'grüne' else 'GRÜNE')
                        
                        doc = Drucksache(
                            drucksache=drucksache,
                            title=title,
                            fraktionen=fraktionen,
                            datum=datum,
                            link=pdf_url,
                            bundesland="NRW",
                            typ=typ,
                        )
                        
                        # Apply AND filter (all terms must match)
                        if self._matches_all_terms(doc, filter_terms, is_exact):
                            results.append(doc)
                            
                    except Exception as e:
                        print(f"Error parsing item: {e}")
                        continue
                
            except Exception as e:
                print(f"NRW search error: {e}")
        
        return results
    
    async def get_document(self, drucksache: str) -> Optional[Drucksache]:
        """Get document metadata by drucksache ID (e.g. '18/8125')."""
        # Parse legislatur and number
        match = re.match(r"(\d+)/(\d+)", drucksache)
        if not match:
            return None
        
        legislatur, nummer = match.groups()
        pdf_url = f"https://www.landtag.nrw.de/portal/WWW/dokumentenarchiv/Dokument/MMD{legislatur}-{nummer}.pdf"
        
        # Try to fetch and extract basic info
        async with httpx.AsyncClient(timeout=30, follow_redirects=True) as client:
            try:
                resp = await client.head(pdf_url)
                if resp.status_code == 200:
                    return Drucksache(
                        drucksache=drucksache,
                        title=f"Drucksache {drucksache}",
                        fraktionen=[],
                        datum="",
                        link=pdf_url,
                        bundesland="NRW",
                    )
            except:
                pass
        
        return None
    
    async def download_text(self, drucksache: str) -> Optional[str]:
        """Download PDF and extract text."""
        import fitz  # PyMuPDF
        
        doc = await self.get_document(drucksache)
        if not doc:
            return None
        
        async with httpx.AsyncClient(timeout=60, follow_redirects=True) as client:
            try:
                resp = await client.get(doc.link)
                if resp.status_code != 200:
                    return None
                
                # Extract text with PyMuPDF
                pdf = fitz.open(stream=resp.content, filetype="pdf")
                text = ""
                for page in pdf:
                    text += page.get_text()
                pdf.close()
                
                return text
            except Exception as e:
                print(f"Error downloading {drucksache}: {e}")
                return None


class BayernAdapter(ParlamentAdapter):
    """Adapter for Bayerischer Landtag."""
    
    bundesland = "BY"
    name = "Bayerischer Landtag"
    base_url = "https://www.bayern.landtag.de"
    
    async def search(self, query: str, limit: int = 20) -> list[Drucksache]:
        # TODO: Implement Bayern search
        return []
    
    async def get_document(self, drucksache: str) -> Optional[Drucksache]:
        # TODO: Implement
        return None
    
    async def download_text(self, drucksache: str) -> Optional[str]:
        return None


class BWAdapter(ParlamentAdapter):
    """Adapter for Baden-Württemberg Landtag."""
    
    bundesland = "BW"
    name = "Landtag Baden-Württemberg"
    base_url = "https://www.landtag-bw.de"
    
    async def search(self, query: str, limit: int = 20) -> list[Drucksache]:
        # TODO: Implement BW search
        return []
    
    async def get_document(self, drucksache: str) -> Optional[Drucksache]:
        return None
    
    async def download_text(self, drucksache: str) -> Optional[str]:
        return None


# Registry of adapters
ADAPTERS = {
    "NRW": NRWAdapter(),
    "BY": BayernAdapter(),
    "BW": BWAdapter(),
}


def get_adapter(bundesland: str) -> Optional[ParlamentAdapter]:
    """Get adapter for a bundesland."""
    return ADAPTERS.get(bundesland)


async def search_all(query: str, bundesland: str = "NRW", limit: int = 20) -> list[Drucksache]:
    """Search parliament documents in a specific state."""
    adapter = get_adapter(bundesland)
    if not adapter:
        return []
    return await adapter.search(query, limit)
Initial commit: GWÖ-Antragsprüfer v1.0 Features: - GWÖ-Matrix 2.0 Analyse für NRW-Landtagsanträge - Verbesserungsvorschläge im Redline-Format (Original/Vorschlag/Begründung) - Wahlprogramm- und Parteiprogrammtreue-Bewertung - Landtag-Suche via OPAL-API - Tag-Wolke mit Multi-Select Filter - Partei-Filter mit Durchschnittswerten - PDF-Report-Generierung - Security Headers (CSP, X-Frame-Options, etc.) - Persistente SQLite-DB via Docker Volumes Tech Stack: - FastAPI + Jinja2 - Qwen LLM via DashScope API - SQLite + aiosqlite - WeasyPrint für PDF - Docker Compose mit Traefik 2026-03-28 22:30:24 +01:00			`"""Parliament search adapters for different German states."""`

			`import httpx`
			`import re`
			`from abc import ABC, abstractmethod`
			`from dataclasses import dataclass`
			`from typing import Optional`
			`from bs4 import BeautifulSoup`


			`@dataclass`
			`class Drucksache:`
			`"""A parliamentary document."""`
			`drucksache: str # e.g. "18/8125"`
			`title: str`
			`fraktionen: list[str]`
			`datum: str # ISO date`
			`link: str # PDF URL`
			`bundesland: str`
			`typ: str = "Antrag" # Antrag, Anfrage, Beschlussempfehlung, etc.`


			`class ParlamentAdapter(ABC):`
			`"""Base adapter for searching parliament documents."""`

			`bundesland: str`
			`name: str`

			`@abstractmethod`
			`async def search(self, query: str, limit: int = 20) -> list[Drucksache]:`
			`"""Search for documents matching query."""`
			`pass`

			`@abstractmethod`
			`async def get_document(self, drucksache: str) -> Optional[Drucksache]:`
			`"""Get a specific document by ID."""`
			`pass`

			`@abstractmethod`
			`async def download_text(self, drucksache: str) -> Optional[str]:`
			`"""Download and extract text from a document."""`
			`pass`


			`class NRWAdapter(ParlamentAdapter):`
			`"""Adapter for NRW Landtag (opal.landtag.nrw.de)."""`

			`bundesland = "NRW"`
			`name = "Landtag Nordrhein-Westfalen"`
			`base_url = "https://opal.landtag.nrw.de"`
			`search_url = "https://opal.landtag.nrw.de/home/dokumente/dokumentensuche/parlamentsdokumente/aktuelle-dokumente.html"`

			`def _parse_query(self, query: str) -> tuple[str, list[str], bool]:`
			`"""`
			`Parse search query for AND logic and exact phrases.`
			`Returns: (search_term_for_api, filter_terms, is_exact)`

			`Examples:`
			`- 'Klimaschutz Energie' -> ('Klimaschutz', ['klimaschutz', 'energie'], False)`
			`- '"Grüner Stahl"' -> ('Grüner Stahl', ['grüner stahl'], True)`
			`- 'Klimaschutz "erneuerbare Energie"' -> ('Klimaschutz', ['klimaschutz', 'erneuerbare energie'], False)`
			`"""`
			`query = query.strip()`

			`# Check for exact phrase (entire query in quotes)`
			`if query.startswith('"') and query.endswith('"') and query.count('"') == 2:`
			`exact = query[1:-1].strip()`
			`return (exact, [exact.lower()], True)`

			`# Extract quoted phrases and regular terms`
			`import shlex`
			`try:`
			`parts = shlex.split(query)`
			`except ValueError:`
			`# Fallback for unbalanced quotes`
			`parts = query.split()`

			`if not parts:`
			`return (query, [query.lower()], False)`

			`# Use first term for API search, all terms for filtering`
			`filter_terms = [p.lower() for p in parts]`
			`return (parts[0], filter_terms, False)`

			`def _matches_all_terms(self, doc: 'Drucksache', terms: list[str], is_exact: bool) -> bool:`
			`"""Check if document matches all search terms (AND logic)."""`
			`searchable = f"{doc.title} {doc.drucksache} {' '.join(doc.fraktionen)} {doc.typ}".lower()`

			`if is_exact:`
			`# Exact phrase must appear`
			`return terms[0] in searchable`
			`else:`
			`# All terms must appear (AND)`
			`return all(term in searchable for term in terms)`

			`async def search(self, query: str, limit: int = 20) -> list[Drucksache]:`
			`"""Search NRW Landtag documents via OPAL portal."""`
			`results = []`

			`# Parse query for AND logic`
			`api_query, filter_terms, is_exact = self._parse_query(query)`

			`async with httpx.AsyncClient(timeout=30, follow_redirects=True) as client:`
			`try:`
			`# First, get the page to establish session`
			`initial = await client.get(self.search_url)`
			`if initial.status_code != 200:`
			`print(f"NRW search initial request failed: {initial.status_code}")`
			`return []`

			`# Parse for webflow token from pagination links`
			`soup = BeautifulSoup(initial.text, 'html.parser')`

			`# Find a pagination link to extract the webflow token`
			`pagination_link = soup.select_one('a[href*="webflowexecution"]')`
			`webflow_token = ""`
			`webflow_execution = ""`

			`if pagination_link:`
			`href = pagination_link.get('href', '')`
			`# Extract webflowToken and webflowexecution from URL`
			`token_match = re.search(r'webflowToken=([^&]*)', href)`
			`exec_match = re.search(r'(webflowexecution[^=]+)=([^&]+)', href)`
			`if token_match:`
			`webflow_token = token_match.group(1)`
			`if exec_match:`
			`webflow_execution = f"{exec_match.group(1)}={exec_match.group(2)}"`

			`# Now perform the search with POST`
			`# Find the form action URL with webflow token`
			`form = soup.select_one('form#docSearchByItem')`
			`form_action = self.search_url`
			`if form and form.get('action'):`
			`action = form.get('action')`
			`if action.startswith('/'):`
			`form_action = f"{self.base_url}{action}"`
			`elif action.startswith('http'):`
			`form_action = action`
			`else:`
			`form_action = f"{self.search_url}?{action}"`

			`# Build form data for "Einfache Suche" (searchByItem form)`
			`form_data = {`
			`'_eventId_sendform': '1',`
			`'dokNum': api_query, # This is the text search field`
			`'formId': 'searchByItem',`
			`'dokTyp': '', # All types`
			`'wp': '18', # Wahlperiode 18`
			`}`

			`# POST request with form data to the form action URL`
			`search_resp = await client.post(`
			`form_action,`
			`data=form_data,`
			`cookies=initial.cookies,`
			`headers={'Content-Type': 'application/x-www-form-urlencoded'}`
			`)`

			`if search_resp.status_code != 200:`
			`print(f"NRW search request failed: {search_resp.status_code}")`
			`return []`

			`# Parse results`
			`soup = BeautifulSoup(search_resp.text, 'html.parser')`

			`# Find all document result items (li elements containing articles)`
			`items = soup.select('li:has(article)')`

			`for item in items[:limit]:`
			`try:`
			`# Extract drucksache number from first link`
			`num_link = item.select_one('a[href*="MMD"]')`
			`if not num_link:`
			`continue`

			`href = num_link.get('href', '')`
			`# Extract number: MMD18-12345.pdf -> 18/12345`
			`match = re.search(r'MMD(\d+)-(\d+)\.pdf', href)`
			`if not match:`
			`continue`

			`legislatur, nummer = match.groups()`
			`drucksache = f"{legislatur}/{nummer}"`
			`pdf_url = f"https://www.landtag.nrw.de{href}" if href.startswith('/') else href`

			`# Extract title from the title link (class e-document-result-item__title)`
			`title_elem = item.select_one('a.e-document-result-item__title')`
			`if title_elem:`
			`# Get text content, clean it up`
			`title = title_elem.get_text(strip=True)`
			`# Remove SVG icon text and clean`
			`title = re.sub(r'\s<svg.', '', title)`
			`title = re.sub(r'\s+', ' ', title).strip()`
			`else:`
			`# Fallback: try to find any longer text`
			`title = f"Drucksache {drucksache}"`

			`# Clean up common artifacts`
			`title = re.sub(r'\s\(\sexterner Link.*?\)', '', title).strip()`

			`# Extract type (Antrag, Kleine Anfrage, etc.)`
			`typ_elem = item.select_one('.e-document-result-item__category')`
			`typ = typ_elem.get_text(strip=True) if typ_elem else "Drucksache"`

			`# Extract date`
			`time_elem = item.select_one('time')`
			`datum = ""`
			`if time_elem:`
			`datum_text = time_elem.get_text(strip=True)`
			`# Convert DD.MM.YYYY to YYYY-MM-DD`
			`date_match = re.match(r'(\d{2})\.(\d{2})\.(\d{4})', datum_text)`
			`if date_match:`
			`d, m, y = date_match.groups()`
			`datum = f"{y}-{m}-{d}"`

			`# Extract Urheber (fraktionen) - look for paragraph containing "Urheber:"`
			`urheber_text = ""`
			`for p in item.select('p'):`
			`if 'Urheber:' in p.get_text():`
			`urheber_text = p.get_text()`
			`break`

			`fraktionen = []`
			`if urheber_text:`
			`# Extract party names (SPD, CDU, GRÜNE, FDP, AfD)`
			`for party in ['SPD', 'CDU', 'GRÜNE', 'Grüne', 'FDP', 'AfD']:`
			`if party in urheber_text:`
			`fraktionen.append(party.upper() if party.lower() != 'grüne' else 'GRÜNE')`

			`doc = Drucksache(`
			`drucksache=drucksache,`
			`title=title,`
			`fraktionen=fraktionen,`
			`datum=datum,`
			`link=pdf_url,`
			`bundesland="NRW",`
			`typ=typ,`
			`)`

			`# Apply AND filter (all terms must match)`
			`if self._matches_all_terms(doc, filter_terms, is_exact):`
			`results.append(doc)`

			`except Exception as e:`
			`print(f"Error parsing item: {e}")`
			`continue`

			`except Exception as e:`
			`print(f"NRW search error: {e}")`

			`return results`

			`async def get_document(self, drucksache: str) -> Optional[Drucksache]:`
			`"""Get document metadata by drucksache ID (e.g. '18/8125')."""`
			`# Parse legislatur and number`
			`match = re.match(r"(\d+)/(\d+)", drucksache)`
			`if not match:`
			`return None`

			`legislatur, nummer = match.groups()`
			`pdf_url = f"https://www.landtag.nrw.de/portal/WWW/dokumentenarchiv/Dokument/MMD{legislatur}-{nummer}.pdf"`

			`# Try to fetch and extract basic info`
			`async with httpx.AsyncClient(timeout=30, follow_redirects=True) as client:`
			`try:`
			`resp = await client.head(pdf_url)`
			`if resp.status_code == 200:`
			`return Drucksache(`
			`drucksache=drucksache,`
			`title=f"Drucksache {drucksache}",`
			`fraktionen=[],`
			`datum="",`
			`link=pdf_url,`
			`bundesland="NRW",`
			`)`
			`except:`
			`pass`

			`return None`

			`async def download_text(self, drucksache: str) -> Optional[str]:`
			`"""Download PDF and extract text."""`
			`import fitz # PyMuPDF`

			`doc = await self.get_document(drucksache)`
			`if not doc:`
			`return None`

			`async with httpx.AsyncClient(timeout=60, follow_redirects=True) as client:`
			`try:`
			`resp = await client.get(doc.link)`
			`if resp.status_code != 200:`
			`return None`

			`# Extract text with PyMuPDF`
			`pdf = fitz.open(stream=resp.content, filetype="pdf")`
			`text = ""`
			`for page in pdf:`
			`text += page.get_text()`
			`pdf.close()`

			`return text`
			`except Exception as e:`
			`print(f"Error downloading {drucksache}: {e}")`
			`return None`


			`class BayernAdapter(ParlamentAdapter):`
			`"""Adapter for Bayerischer Landtag."""`

			`bundesland = "BY"`
			`name = "Bayerischer Landtag"`
			`base_url = "https://www.bayern.landtag.de"`

			`async def search(self, query: str, limit: int = 20) -> list[Drucksache]:`
			`# TODO: Implement Bayern search`
			`return []`

			`async def get_document(self, drucksache: str) -> Optional[Drucksache]:`
			`# TODO: Implement`
			`return None`

			`async def download_text(self, drucksache: str) -> Optional[str]:`
			`return None`


			`class BWAdapter(ParlamentAdapter):`
			`"""Adapter for Baden-Württemberg Landtag."""`

			`bundesland = "BW"`
			`name = "Landtag Baden-Württemberg"`
			`base_url = "https://www.landtag-bw.de"`

			`async def search(self, query: str, limit: int = 20) -> list[Drucksache]:`
			`# TODO: Implement BW search`
			`return []`

			`async def get_document(self, drucksache: str) -> Optional[Drucksache]:`
			`return None`

			`async def download_text(self, drucksache: str) -> Optional[str]:`
			`return None`


			`# Registry of adapters`
			`ADAPTERS = {`
			`"NRW": NRWAdapter(),`
			`"BY": BayernAdapter(),`
			`"BW": BWAdapter(),`
			`}`


			`def get_adapter(bundesland: str) -> Optional[ParlamentAdapter]:`
			`"""Get adapter for a bundesland."""`
			`return ADAPTERS.get(bundesland)`


			`async def search_all(query: str, bundesland: str = "NRW", limit: int = 20) -> list[Drucksache]:`
			`"""Search parliament documents in a specific state."""`
			`adapter = get_adapter(bundesland)`
			`if not adapter:`
			`return []`
			`return await adapter.search(query, limit)`