feat(#106,#135,#128): Monitoring + abgeordnetenwatch + Wahlprogramm-Check

- monitoring.py: taeglicher Scan-Adapter aller aktiven BL, kein Auto-Fetch (#135) - monitoring_digest.html: Mail-Template mit '0-Kontext'-Hinweis - abgeordnetenwatch.py + sync_*.py: Phase 1 Roll-Call-Voting (#106) - 17 Parlamente (16 BL + BT) - 9 BL-spezifische Drucksachen-Patterns + Date-Title-Fallback - 28977 Votes fuer BUND in DB - wahlprogramm_check.py: fehlende Programme erkennen (#128) - NI-Skip-Liste, NRW Empty-Query-Fallback Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-25 20:55:16 +02:00 · 2026-04-25 20:55:16 +02:00 · 2c0e94d29d
commit 2c0e94d29d
parent ad1db2a924
5 changed files with 939 additions and 0 deletions
--- a/app/abgeordnetenwatch.py
+++ b/app/abgeordnetenwatch.py
@ -0,0 +1,285 @@
 """Adapter für abgeordnetenwatch.de API v2 (#106 Phase 1).
 Liefert strukturierte Abstimmungsdaten (namentliche Abstimmungen)
 pro Bundesland + Bundestag. Daten werden lokal in abgeordnetenwatch_polls
 und abgeordnetenwatch_votes gecacht.
 API-Docs: https://www.abgeordnetenwatch.de/api/v2
 """
 from __future__ import annotations
 import logging
 import re
 from typing import Optional
 import httpx
 logger = logging.getLogger(__name__)
 # Mapping unserer BL-Codes auf abgeordnetenwatch parliament-IDs.
 # IDs aus GET /api/v2/parliaments (Stand April 2026).
 PARLIAMENT_ID: dict[str, int] = {
    "BT":   5,   # Bundestag  (auch "BUND")
    "BUND": 5,   # Alias
    "NRW":  4,
    "BE":   2,   # Berlin
    "HH":   3,   # Hamburg
    "BW":   6,   # Baden-Württemberg
    "RP":   7,   # Rheinland-Pfalz
    "LSA":  8,   # Sachsen-Anhalt
    "MV":   9,   # Mecklenburg-Vorpommern
    "HB":  10,   # Bremen
    "HE":  11,   # Hessen
    "NI":  12,   # Niedersachsen
    "BY":  13,   # Bayern
    "SL":  14,   # Saarland
    "TH":  15,   # Thüringen
    "BB":  16,   # Brandenburg
    "SN":  17,   # Sachsen
    "SH":  18,   # Schleswig-Holstein
 }
 _BASE = "https://www.abgeordnetenwatch.de/api/v2"
 # Drucksachen-Extraktion aus field_intro-HTML — pro Landtag eigenes URL-/
 # Dateinamen-Schema. Reihenfolge: erst Generic-Pattern "WP/NR" probieren
 # (BUND, HE), dann BL-spezifische Patterns aus den Drucksachen-PDF-URLs.
 _DS_PATTERNS: list[re.Pattern] = [
    # Generic: "20/12345" — BUND, HE und ähnliche
    re.compile(r"\b(\d{1,2})/(\d{3,5})\b"),
    # NRW: MMD18-2142.pdf
    re.compile(r"MMD(\d{1,2})-(\d{3,5})\.pdf", re.IGNORECASE),
    # BE: d19-0564.pdf
    re.compile(r"/d(\d{1,2})-(\d{4})\.pdf", re.IGNORECASE),
    # BW: 17_7713_D.pdf
    re.compile(r"/(\d{1,2})_(\d{3,5})_D\.pdf", re.IGNORECASE),
    # HB: D21L0568.pdf  (D<wp>L<nr>)
    re.compile(r"/D(\d{1,2})L(\d{3,5})\.pdf", re.IGNORECASE),
    # SH: drucksache-20-00187.pdf
    re.compile(r"drucksache-(\d{1,2})-(\d{3,5})\.pdf", re.IGNORECASE),
    # SL: Gs17_0503.pdf
    re.compile(r"/Gs(\d{1,2})_(\d{3,5})\.pdf", re.IGNORECASE),
    # LSA: wp8/drs/d0145…  (Reihenfolge: wp dann nr)
    re.compile(r"/wp(\d{1,2})/drs/d(\d{3,5})", re.IGNORECASE),
    # SN: dok_nr=2150&...&leg_per=8 — params können in beliebiger Reihenfolge auftreten
    re.compile(r"dok_nr=(\d{3,5}).*leg_per=(\d{1,2})", re.IGNORECASE),
    # RP: 538-18.pdf  (Reihenfolge: nr-wp)
    re.compile(r"/(\d{3,5})-(\d{1,2})\.pdf", re.IGNORECASE),
 ]
 def extract_drucksache_from_intro(html: str) -> Optional[str]:
    """Extrahiert die erste Drucksachen-Nummer aus dem field_intro-HTML.
    Probiert mehrere Landtags-spezifische URL-Patterns durch (NRW MMD<wp>-<nr>,
    BW <wp>_<nr>_D.pdf, etc.) und gibt die erste Fundstelle als
    "<wp>/<nr>"-String zurück. Reihenfolge im Match-Tupel ist immer (wp, nr) —
    die Patterns selbst kümmern sich um eventuelle URL-Reihenfolgen-Eigenheiten
    (RP hat z.B. nr-wp, SN hat dok_nr=...&leg_per=..., dort drehen wir).
    """
    if not html:
        return None
    for pat in _DS_PATTERNS:
        m = pat.search(html)
        if not m:
            continue
        # Spezialfall RP: nr-wp im URL → drehen, damit Output wp/nr
        if "-" in m.re.pattern and m.re.pattern.startswith("/(\\d{3,5})"):
            return f"{m.group(2)}/{m.group(1)}"
        # Spezialfall SN: dok_nr (Gruppe 1) + leg_per (Gruppe 2) → wp/nr
        if "dok_nr" in m.re.pattern:
            return f"{m.group(2)}/{m.group(1)}"
        # Standard: (wp, nr)
        return f"{m.group(1)}/{m.group(2)}"
    return None
 async def fallback_drucksache_by_date_title(
    datum: Optional[str],
    titel: Optional[str],
    bundesland: str,
 ) -> Optional[str]:
    """Fallback-Drucksachen-Lookup via Datum + Titel gegen die Assessments-DB.
    Wird aufgerufen wenn ``extract_drucksache_from_intro`` kein Pattern findet
    (betrifft MV/BY/BB/TH/HH/SL deren intro-HTML keine PDF-URLs enthält).
    Sucht Assessments für ``bundesland`` innerhalb von ±14 Tagen um ``datum``
    und einem Titel-Substring-Match. Gibt die Drucksachen-Nummer des ersten
    Treffers zurück oder ``None``.
    Args:
        datum: ISO-Datum des Polls (``field_poll_date``, z.B. ``"2026-04-01"``).
        titel: Label/Titel des Polls (wird als LIKE-Substring geprüft).
        bundesland: Unser BL-Code (z.B. ``"MV"``).
    Returns:
        Drucksachen-Nummer als String (z.B. ``"7/1234"``) oder ``None``.
    """
    if not datum or not titel:
        return None
    # Titel-Substring: nur die ersten 40 Zeichen für den LIKE-Match verwenden,
    # da Poll-Labels und Assessment-Titel leicht voneinander abweichen können.
    titel_substr = titel.strip()[:40]
    from .config import settings as _settings
    import aiosqlite as _aio
    async with _aio.connect(_settings.db_path) as db:
        cur = await db.execute(
            """
            SELECT drucksache FROM assessments
            WHERE bundesland = ?
              AND ABS(julianday(datum) - julianday(?)) < 14
              AND LOWER(title) LIKE ?
            ORDER BY ABS(julianday(datum) - julianday(?))
            LIMIT 1
            """,
            (bundesland.upper(), datum, f"%{titel_substr.lower()}%", datum),
        )
        row = await cur.fetchone()
    if row:
        logger.debug(
            "fallback_drucksache_by_date_title: %s/%s → %s",
            bundesland, datum, row[0],
        )
        return row[0]
    return None
 async def fetch_polls(bundesland_code: str, limit: int = 100) -> list[dict]:
    """Holt aktuelle Abstimmungen für ein Bundesland von abgeordnetenwatch.
    Gibt eine Liste von Poll-Dicts zurück; jedes Dict enthält zusätzlich
    den geparsten Key ``drucksache`` (kann None sein).
    Args:
        bundesland_code: Unser BL-Code (z.B. "NRW", "BT", "BUND").
        limit: Maximale Anzahl Polls; wird als range_end übergeben.
    Returns:
        Liste von Poll-Dicts mit den Feldern aus der API plus ``drucksache``.
    Raises:
        ValueError: Wenn der bundesland_code nicht in PARLIAMENT_ID ist.
        httpx.HTTPError: Bei Netzwerkproblemen.
    """
    parliament_id = PARLIAMENT_ID.get(bundesland_code.upper())
    if parliament_id is None:
        raise ValueError(
            f"Unbekannter BL-Code '{bundesland_code}'. "
            f"Bekannte Codes: {sorted(PARLIAMENT_ID.keys())}"
        )
    async with httpx.AsyncClient(timeout=30.0) as client:
        # Zuerst aktuellen ParliamentPeriod für das Parlament holen —
        # /polls filtert nach field_legislature (period-id), NICHT parliament-id.
        pp_resp = await client.get(
            f"{_BASE}/parliament-periods",
            params={"parliament": parliament_id, "type": "legislature", "range_end": 5},
        )
        pp_resp.raise_for_status()
        periods = (pp_resp.json() or {}).get("data") or []
        # Aktuelle Periode: sortiere nach start-date desc, nimm die neueste
        current = sorted(
            periods,
            key=lambda x: x.get("start_date_period") or "",
            reverse=True,
        )
        if not current:
            logger.warning("Keine ParliamentPeriod für %s (parliament_id=%d)",
                           bundesland_code, parliament_id)
            return []
        period_id = current[0]["id"]
        # Polls für diese Periode
        resp = await client.get(
            f"{_BASE}/polls",
            params={"field_legislature": period_id, "range_end": limit},
        )
        resp.raise_for_status()
        data = resp.json()
    polls_raw: list[dict] = data.get("data") or []
    polls = []
    for p in polls_raw:
        intro_html = p.get("field_intro") or ""
        polls.append({
            "id":               p.get("id"),
            "label":            p.get("label") or p.get("field_poll_date", ""),
            "field_poll_date":  p.get("field_poll_date"),
            "field_accepted":   p.get("field_accepted"),
            "field_topics":     p.get("field_topics") or [],
            "field_intro":      intro_html,
            "field_legislature": p.get("field_legislature") or {},
            "drucksache":       extract_drucksache_from_intro(intro_html),
        })
    logger.info(
        "abgeordnetenwatch: %d polls für %s (parliament_id=%d)",
        len(polls), bundesland_code, parliament_id,
    )
    return polls
 async def fetch_votes_for_poll(poll_id: int) -> list[dict]:
    """Holt namentliche Einzelstimmen für eine Abstimmung.
    Args:
        poll_id: ID der Abstimmung (aus polls[].id).
    Returns:
        Liste von Vote-Dicts mit den Feldern:
        poll_id, politician_id, politician_name, partei, vote.
        vote ist einer von: "yes", "no", "abstain", "no_show".
    Raises:
        httpx.HTTPError: Bei Netzwerkproblemen.
    """
    # /votes?poll=X funktioniert (empirisch ermittelt);
    # NICHT field_poll (500) und NICHT /polls/{id}?related_data=votes
    # (liefert leeres related_data). Einfaches ?poll=<id>.
    url = f"{_BASE}/votes"
    params = {"poll": poll_id, "range_end": 1000}
    async with httpx.AsyncClient(timeout=30.0) as client:
        resp = await client.get(url, params=params)
        resp.raise_for_status()
        data = resp.json()
    votes_raw: list[dict] = data.get("data") or []
    votes = []
    for v in votes_raw:
        politician = v.get("mandate") or v.get("politician") or {}
        politician_id = politician.get("id") or v.get("mandate_id")
        politician_name = politician.get("label") or politician.get("name") or ""
        # Partei aus politician.party oder fraction
        partei = ""
        party = politician.get("party") or {}
        if isinstance(party, dict):
            partei = party.get("label") or party.get("short_label") or ""
        fraction = v.get("fraction") or {}
        if not partei and isinstance(fraction, dict):
            partei = fraction.get("full_name") or fraction.get("label") or ""
        vote_value = (v.get("vote") or "").lower()
        # API liefert "yes"/"no"/"abstain"/"no_show" — direkt übernehmen
        if vote_value not in ("yes", "no", "abstain", "no_show"):
            vote_value = "no_show"
        votes.append({
            "poll_id":        poll_id,
            "politician_id":  politician_id,
            "politician_name": politician_name,
            "partei":         partei,
            "vote":           vote_value,
        })
    logger.info(
        "abgeordnetenwatch: %d votes für poll_id=%d", len(votes), poll_id
    )
    return votes
--- a/app/monitoring.py
+++ b/app/monitoring.py
@ -0,0 +1,332 @@
 """Täglicher Monitoring-Scan für neue Landtags-Drucksachen (#135).
 Nur Metadaten — kein PDF-Download, kein LLM-Call.
 Ablauf:
 1. Iteriert alle aktiven Bundesländer via aktive_bundeslaender().
 2. Ruft adapter.search("", limit=50) (Fallback: " " oder "*") auf.
 3. UPSERTs Treffer in monitoring_scans. seen_first_at bleibt stabil,
   last_seen_at wird immer gesetzt.
 4. Aggregiert Ergebnisse in monitoring_daily_summary.
 5. Gibt ScanResult zurück, aus dem run_monitoring_digest() den
   Mail-Digest baut.
 Kosten-Schätzung (Qwen Plus, Stand April 2026):
  Quelle: https://help.aliyun.com/zh/dashscope/developer-reference/tongyi-qianwen-7b-14b-72b-api-pricing
  Input:  0.0004 USD / 1 K Token
  Output: 0.0012 USD / 1 K Token
  Kurs:   1 USD = 0.93 EUR (Näherung April 2026)
 """
 from __future__ import annotations
 import asyncio
 import logging
 from dataclasses import dataclass, field
 from datetime import datetime, timezone
 from .bundeslaender import aktive_bundeslaender
 logger = logging.getLogger(__name__)
 # ─── Kosten-Schätzung ────────────────────────────────────────────────────────
 # Preise aus DashScope-Dokumentation (USD, Stand April 2026):
 # https://help.aliyun.com/zh/dashscope/developer-reference/tongyi-qianwen-7b-14b-72b-api-pricing
 _QWEN_PLUS_INPUT_USD_PER_1K = 0.0004
 _QWEN_PLUS_OUTPUT_USD_PER_1K = 0.0012
 _USD_TO_EUR = 0.93  # Näherungskurs April 2026 (als Konstante OK für Schätzung)
 # Default-Annahmen pro Analyse (Durchschnittswerte aus Produktionsbetrieb)
 _DEFAULT_AVG_IN_TOKENS = 20_000
 _DEFAULT_AVG_OUT_TOKENS = 3_000
 def estimate_cost_qwen_plus(
    n_new: int,
    avg_in_tokens: int = _DEFAULT_AVG_IN_TOKENS,
    avg_out_tokens: int = _DEFAULT_AVG_OUT_TOKENS,
 ) -> float:
    """Schätzt die Analysekosten in EUR für n_new neue Drucksachen (Qwen Plus).
    Rechnet auf Basis der offiziellen DashScope-Preise, Umrechnung USD→EUR
    mit festem Näherungskurs. Ergebnis ist eine Schätzung, keine Garantie.
    Args:
        n_new: Anzahl neuer Drucksachen.
        avg_in_tokens: Durchschnittliche Input-Token pro Antrag (Default 20 000).
        avg_out_tokens: Durchschnittliche Output-Token pro Antrag (Default 3 000).
    Returns:
        Geschätzte Kosten in EUR.
    """
    if n_new <= 0:
        return 0.0
    input_cost_usd = (avg_in_tokens / 1000) * _QWEN_PLUS_INPUT_USD_PER_1K * n_new
    output_cost_usd = (avg_out_tokens / 1000) * _QWEN_PLUS_OUTPUT_USD_PER_1K * n_new
    total_eur = (input_cost_usd + output_cost_usd) * _USD_TO_EUR
    return round(total_eur, 4)
 # ─── Datenklassen ────────────────────────────────────────────────────────────
@dataclass
 class BundeslandScanResult:
    """Scan-Ergebnis für ein einzelnes Bundesland."""
    bundesland: str
    total_seen: int = 0
    new_count: int = 0
    error: str | None = None
@dataclass
 class DailyScanResult:
    """Gesamtergebnis eines daily_scan()-Laufs."""
    scan_date: str                            # YYYY-MM-DD
    results: list[BundeslandScanResult] = field(default_factory=list)
    new_total: int = 0                        # Summe aller new_count
    total_seen: int = 0                       # Summe aller total_seen
    estimated_cost_eur: float = 0.0
    errors: list[str] = field(default_factory=list)
 # ─── Adapter-Suche ───────────────────────────────────────────────────────────
 DEFAULT_DAILY_LIMIT = 50
 # Bundesländer, die vom täglichen Monitoring-Scan ausgenommen sind.
 # NI (Niedersachsen): NILAS-Portal erfordert Login — unauthentifizierte Anfragen
 # liefern Login-Page-HTML, das der JSON-Comment-Parser als ~50 Junk-Records parsed.
 # Ausnahme bleibt bis ein gültiger HAR-Capture vorliegt (siehe Issue #22).
 _MONITORING_SKIP: frozenset[str] = frozenset({"NI"})
 async def _search_adapter(adapter, bundesland_code: str, limit: int = DEFAULT_DAILY_LIMIT) -> list:
    """Sucht via Adapter nach aktuellen Drucksachen.
    Probiert der Reihe nach: leerer String, Leerzeichen, Sternchen —
    und fängt alle Exceptions ab, damit ein Adapter-Fehler den
    Gesamt-Scan nicht abbricht. ``limit`` steuert pro-Adapter-Obergrenze;
    für Initial-Seeding ggf. höher setzen.
    """
    for query in ("", " ", "*"):
        try:
            results = await adapter.search(query, limit=limit)
            return results
        except Exception as e:
            if query == "*":
                # Alle Versuche gescheitert — Exception nach oben durchreichen
                raise
            logger.debug(
                "%s: search(%r) fehlgeschlagen (%s), versuche nächsten Query",
                bundesland_code, query, e,
            )
    return []
 # ─── Haupt-Scan ──────────────────────────────────────────────────────────────
 async def daily_scan(limit: int = DEFAULT_DAILY_LIMIT) -> DailyScanResult:
    """Täglicher Scan aller aktiven Bundesländer nach neuen Drucksachen.
    Kein PDF-Download, kein LLM-Call — nur Metadaten. ``limit`` gilt
    pro Adapter; für Initial-Seeding größer setzen (z.B. 500).
    """
    from .parlamente import ADAPTERS
    from .database import upsert_monitoring_scan, upsert_monitoring_summary
    now_utc = datetime.now(timezone.utc)
    scan_date = now_utc.strftime("%Y-%m-%d")
    now_iso = now_utc.strftime("%Y-%m-%dT%H:%M:%S")
    result = DailyScanResult(scan_date=scan_date)
    active_bls = aktive_bundeslaender()
    for bl in active_bls:
        if bl.code in _MONITORING_SKIP:
            logger.debug("%s: Monitoring-Skip aktiv — übersprungen", bl.code)
            continue
        adapter = ADAPTERS.get(bl.code)
        if adapter is None:
            logger.debug("Kein Adapter für %s — übersprungen", bl.code)
            continue
        bl_result = BundeslandScanResult(bundesland=bl.code)
        try:
            docs = await _search_adapter(adapter, bl.code, limit=limit)
        except Exception as exc:
            err_msg = f"{type(exc).__name__}: {str(exc)[:500]}"
            logger.exception("Adapter-Fehler bei %s", bl.code)
            bl_result.error = err_msg
            result.errors.append(f"{bl.code}: {err_msg}")
            await upsert_monitoring_summary(
                scan_date=scan_date,
                bundesland=bl.code,
                total_seen=0,
                new_count=0,
                errors=err_msg,
            )
            result.results.append(bl_result)
            continue
        bl_result.total_seen = len(docs)
        new_this_bl = 0
        for doc in docs:
            try:
                is_new = await upsert_monitoring_scan(
                    bundesland=doc.bundesland,
                    drucksache=doc.drucksache,
                    title=doc.title,
                    datum=doc.datum,
                    typ=doc.typ,
                    typ_normiert=doc.typ_normiert,
                    fraktionen=doc.fraktionen,
                    link=doc.link,
                    now=now_iso,
                )
                if is_new:
                    new_this_bl += 1
            except Exception:
                logger.exception(
                    "DB-UPSERT fehlgeschlagen für %s/%s — wird übersprungen",
                    bl.code, getattr(doc, "drucksache", "?"),
                )
        bl_result.new_count = new_this_bl
        await upsert_monitoring_summary(
            scan_date=scan_date,
            bundesland=bl.code,
            total_seen=bl_result.total_seen,
            new_count=bl_result.new_count,
            errors=None,
        )
        logger.info(
            "%s: %d gesehen, %d neu",
            bl.code, bl_result.total_seen, bl_result.new_count,
        )
        result.results.append(bl_result)
    result.new_total = sum(r.new_count for r in result.results)
    result.total_seen = sum(r.total_seen for r in result.results)
    result.estimated_cost_eur = estimate_cost_qwen_plus(result.new_total)
    return result
 # ─── Mail-Digest ─────────────────────────────────────────────────────────────
 async def run_monitoring_digest(recipient: str) -> dict:
    """Führt daily_scan() durch und verschickt den Ergebnis-Digest per Mail.
    Args:
        recipient: Empfänger-Adresse (typischerweise der Admin).
    Returns:
        dict mit Scan-Statistiken + {"mail_sent": bool}.
    """
    from .mail import send_mail
    from .database import get_monitoring_new_today
    from jinja2 import Environment, FileSystemLoader
    from pathlib import Path
    scan_result = await daily_scan()
    # Neue Drucksachen für den heutigen Tag laden
    new_docs = await get_monitoring_new_today(scan_result.scan_date)
    # Mail-Inhalt via Template rendern
    tmpl_dir = Path(__file__).resolve().parent / "templates"
    env = Environment(loader=FileSystemLoader(str(tmpl_dir)), autoescape=True)
    tmpl = env.get_template("monitoring_digest.html")
    html_body = tmpl.render(
        scan_date=scan_result.scan_date,
        new_total=scan_result.new_total,
        total_seen=scan_result.total_seen,
        estimated_cost_eur=scan_result.estimated_cost_eur,
        results=scan_result.results,
        new_docs=new_docs,
        errors=scan_result.errors,
    )
    # Plaintext-Variante
    text_body = _render_plain(scan_result, new_docs)
    subject = (
        f"[GWÖ-Monitor] {scan_result.scan_date} — "
        f"{scan_result.new_total} neue Drucksachen"
        + (f" ({len(scan_result.errors)} Fehler)" if scan_result.errors else "")
    )
    mail_sent = False
    try:
        await send_mail(recipient, subject, text_body, html_body)
        mail_sent = True
        logger.info("Monitoring-Digest verschickt an %s", recipient)
    except Exception:
        logger.exception("Monitoring-Digest: Mail-Versand fehlgeschlagen")
    return {
        "scan_date": scan_result.scan_date,
        "new_total": scan_result.new_total,
        "total_seen": scan_result.total_seen,
        "estimated_cost_eur": scan_result.estimated_cost_eur,
        "error_count": len(scan_result.errors),
        "mail_sent": mail_sent,
    }
 def _render_plain(scan_result: DailyScanResult, new_docs: list[dict]) -> str:
    """Baut den Plaintext-Part des Monitoring-Digests."""
    from .config import settings
    lines = [
        f"GWÖ-Antragsprüfer — Monitoring-Digest {scan_result.scan_date}",
        "=" * 60,
        "",
        f"Neue Drucksachen:   {scan_result.new_total}",
        f"Gesamt gesehen:     {scan_result.total_seen}",
        f"Kosten-Schätzung:   {scan_result.estimated_cost_eur:.4f} EUR",
        "",
    ]
    if scan_result.errors:
        lines.append(f"Fehler ({len(scan_result.errors)}):")
        for e in scan_result.errors:
            lines.append(f"  • {e}")
        lines.append("")
    lines.append("Bundesland-Übersicht:")
    for r in scan_result.results:
        status = f"✓ {r.new_count} neu / {r.total_seen} gesehen"
        if r.error:
            status = f"✗ Fehler: {r.error[:80]}"
        lines.append(f"  {r.bundesland:6s}  {status}")
    lines.append("")
    if new_docs:
        lines.append(f"Neue Drucksachen ({len(new_docs)}):")
        for doc in new_docs[:30]:
            title = (doc.get("title") or doc.get("drucksache") or "")[:80]
            bl = doc.get("bundesland", "")
            drucks = doc.get("drucksache", "")
            lines.append(f"  [{bl}] {drucks} — {title}")
        if len(new_docs) > 30:
            lines.append(f"  … und {len(new_docs) - 30} weitere")
        lines.append("")
    lines.append(f"Webapp: {settings.base_url}")
    return "\n".join(lines)
 if __name__ == "__main__":
    # python -m app.monitoring <empfaenger@example.com>
    import sys
    logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
    to = sys.argv[1] if len(sys.argv) > 1 else "mail@tobiasroedel.de"
    stats = asyncio.run(run_monitoring_digest(to))
    print(f"Monitoring-Scan fertig: {stats}")
--- a/app/sync_abgeordnetenwatch.py
+++ b/app/sync_abgeordnetenwatch.py
@ -0,0 +1,157 @@
 """CLI-Sync-Skript für abgeordnetenwatch.de (#106 Phase 1).
 Holt Polls + namentliche Stimmen für alle oder einen bestimmten BL-Code
 und speichert sie via UPSERT in der lokalen SQLite-DB.
 Aufruf:
    python -m app.sync_abgeordnetenwatch [--bundesland NRW] [--limit 50]
 Ohne --bundesland werden alle in PARLIAMENT_ID eingetragenen BL-Codes
 abgearbeitet (BUND-Alias wird übersprungen, BT genügt).
 Ausgabe:
    NRW: 12 polls neu, 340 votes neu
    BT:  0 polls neu, 0 votes neu
    …
 """
 from __future__ import annotations
 import argparse
 import asyncio
 import logging
 from datetime import datetime, timezone
 logger = logging.getLogger(__name__)
 async def sync_bundesland(bundesland_code: str, limit: int) -> tuple[int, int]:
    """Synct einen BL-Code. Gibt (neue_polls, neue_votes) zurück."""
    from .abgeordnetenwatch import (
        PARLIAMENT_ID, fetch_polls, fetch_votes_for_poll,
        fallback_drucksache_by_date_title,
    )
    from .database import init_db, upsert_aw_poll, upsert_aw_vote
    await init_db()
    parliament_id = PARLIAMENT_ID[bundesland_code.upper()]
    synced_at = datetime.now(timezone.utc).isoformat()
    polls = await fetch_polls(bundesland_code, limit=limit)
    new_polls = 0
    new_votes = 0
    for poll in polls:
        poll_id = poll.get("id")
        if poll_id is None:
            continue
        legislature = poll.get("field_legislature") or {}
        legislature_label = (
            legislature.get("label") or legislature.get("name") or ""
            if isinstance(legislature, dict) else str(legislature)
        )
        topics_raw = poll.get("field_topics") or []
        topics = [
            (t.get("label") or t.get("name") or str(t))
            if isinstance(t, dict) else str(t)
            for t in topics_raw
        ]
        # Primär: Drucksache aus intro-HTML geparst; Fallback über Datum+Titel
        # für BL ohne PDF-URL im intro (MV/BY/BB/TH/HH/SL — Fix #142 Phase 3).
        drucksache = poll.get("drucksache")
        if drucksache is None:
            drucksache = await fallback_drucksache_by_date_title(
                datum=poll.get("field_poll_date"),
                titel=poll.get("label"),
                bundesland=bundesland_code,
            )
        is_new_poll = await upsert_aw_poll(
            poll_id=poll_id,
            parliament_id=parliament_id,
            bundesland=bundesland_code.upper(),
            drucksache=drucksache,
            titel=poll.get("label"),
            datum=poll.get("field_poll_date"),
            accepted=poll.get("field_accepted"),
            topics=topics,
            legislature_label=legislature_label,
            synced_at=synced_at,
        )
        if is_new_poll:
            new_polls += 1
        # Votes laden und speichern
        try:
            votes = await fetch_votes_for_poll(poll_id)
        except Exception:
            logger.exception("Fehler beim Laden von Votes für poll_id=%d", poll_id)
            continue
        for vote in votes:
            politician_id = vote.get("politician_id")
            if politician_id is None:
                continue
            is_new_vote = await upsert_aw_vote(
                poll_id=poll_id,
                politician_id=politician_id,
                politician_name=vote.get("politician_name"),
                partei=vote.get("partei"),
                vote=vote.get("vote", "no_show"),
            )
            if is_new_vote:
                new_votes += 1
    return new_polls, new_votes
 async def main(bundesland: str | None, limit: int) -> None:
    from .abgeordnetenwatch import PARLIAMENT_ID
    # Alle Codes ohne BUND-Alias (BT und BUND zeigen auf die selbe ID)
    if bundesland:
        codes = [bundesland.upper()]
    else:
        seen_ids: set[int] = set()
        codes = []
        for code, pid in PARLIAMENT_ID.items():
            if pid not in seen_ids:
                seen_ids.add(pid)
                codes.append(code)
    for code in codes:
        try:
            new_polls, new_votes = await sync_bundesland(code, limit)
            print(f"{code:4s}: {new_polls} polls neu, {new_votes} votes neu")
        except Exception:
            logger.exception("Fehler beim Sync für %s", code)
            print(f"{code:4s}: FEHLER (siehe Log)")
 def _cli() -> None:
    parser = argparse.ArgumentParser(
        description="Sync abgeordnetenwatch-Abstimmungsdaten in die lokale DB."
    )
    parser.add_argument(
        "--bundesland", "-b",
        default=None,
        help="BL-Code (z.B. NRW, BT). Ohne Angabe: alle Codes.",
    )
    parser.add_argument(
        "--limit", "-n",
        type=int,
        default=100,
        help="Maximale Anzahl Polls pro BL (default: 100).",
    )
    args = parser.parse_args()
    asyncio.run(main(args.bundesland, args.limit))
 if __name__ == "__main__":
    logging.basicConfig(level=logging.INFO)
    _cli()
--- a/app/templates/monitoring_digest.html
+++ b/app/templates/monitoring_digest.html
@ -0,0 +1,128 @@
 <!DOCTYPE html>
 <html lang="de">
 <head>
 <meta charset="utf-8">
 <meta name="viewport" content="width=device-width,initial-scale=1">
 <title>GWÖ-Monitor {{ scan_date }}</title>
 </head>
 <body style="font-family:Helvetica,Arial,sans-serif;max-width:640px;margin:0 auto;padding:20px;color:#333">
 <h2 style="color:#007a80;margin-bottom:4px">GWÖ-Antragsprüfer — Monitoring {{ scan_date }}</h2>
 <p style="color:#666;margin-top:4px;font-size:0.9em">Täglicher Scan aller aktiven Bundesländer</p>
 {% if new_total == 0 %}
 <div style="background:#f0fafa;border-left:3px solid #009da5;padding:10px 14px;margin:12px 0;font-size:0.95em;color:#444">
  <b style="color:#007a80">Heute keine Änderungen.</b> Alle {{ total_seen }} in den Landtags-Portalen sichtbaren Drucksachen sind bereits seit dem letzten Scan bekannt. Das heißt: die Portale haben seit gestern keine neuen Anträge publiziert — nicht: der Scan war erfolglos.
 </div>
 {% endif %}
 <!-- Kennzahlen-Block -->
 <table style="width:100%;border-collapse:collapse;margin:16px 0">
  <tr style="background:#f0fafa">
    <td style="padding:10px 14px;border:1px solid #c8e6e6;font-weight:bold">Neue Drucksachen seit letztem Scan</td>
    <td style="padding:10px 14px;border:1px solid #c8e6e6;font-size:1.4em;color:#007a80;font-weight:bold">{{ new_total }}</td>
  </tr>
  <tr>
    <td style="padding:10px 14px;border:1px solid #ddd">Im Portal aktuell sichtbar (inkl. bekannter)</td>
    <td style="padding:10px 14px;border:1px solid #ddd">{{ total_seen }}</td>
  </tr>
  <tr style="background:#fffbf0">
    <td style="padding:10px 14px;border:1px solid #ddd">Kosten-Schätzung (alle analysieren)</td>
    <td style="padding:10px 14px;border:1px solid #ddd">
      <b>{{ "%.4f"|format(estimated_cost_eur) }} EUR</b>
      <span style="font-size:0.8em;color:#888">&nbsp;(Qwen Plus, Näherung)</span>
    </td>
  </tr>
  {% if errors %}
  <tr style="background:#fff3f3">
    <td style="padding:10px 14px;border:1px solid #f5c0c0;color:#c00">Adapter-Fehler</td>
    <td style="padding:10px 14px;border:1px solid #f5c0c0;color:#c00;font-weight:bold">{{ errors|length }}</td>
  </tr>
  {% endif %}
 </table>
 <!-- Fehler-Details -->
 {% if errors %}
 <div style="background:#fff3f3;border-left:3px solid #c00;padding:10px 14px;margin:12px 0">
  <b style="color:#c00">Fehler-Details:</b>
  <ul style="margin:6px 0 0;padding-left:18px;font-size:0.9em">
    {% for e in errors %}
    <li>{{ e }}</li>
    {% endfor %}
  </ul>
 </div>
 {% endif %}
 <!-- Bundesland-Übersicht -->
 <h3 style="color:#007a80;border-bottom:1px solid #c8e6e6;padding-bottom:6px">Bundesland-Übersicht</h3>
 <table style="width:100%;border-collapse:collapse;font-size:0.9em">
  <thead>
    <tr style="background:#e6f4f4">
      <th style="padding:7px 10px;border:1px solid #c8e6e6;text-align:left">BL</th>
      <th style="padding:7px 10px;border:1px solid #c8e6e6;text-align:right">Gesehen</th>
      <th style="padding:7px 10px;border:1px solid #c8e6e6;text-align:right">Neu</th>
      <th style="padding:7px 10px;border:1px solid #c8e6e6;text-align:left">Status</th>
    </tr>
  </thead>
  <tbody>
    {% for r in results %}
    <tr style="{% if r.error %}background:#fff8f8{% elif r.new_count > 0 %}background:#f8fff8{% endif %}">
      <td style="padding:6px 10px;border:1px solid #ddd;font-weight:bold">{{ r.bundesland }}</td>
      <td style="padding:6px 10px;border:1px solid #ddd;text-align:right">{{ r.total_seen }}</td>
      <td style="padding:6px 10px;border:1px solid #ddd;text-align:right;color:{% if r.new_count > 0 %}#007a80{% else %}#999{% endif %}">
        {{ r.new_count }}
      </td>
      <td style="padding:6px 10px;border:1px solid #ddd;font-size:0.85em">
        {% if r.error %}
        <span style="color:#c00">✗ {{ r.error[:100] }}</span>
        {% elif r.new_count > 0 %}
        <span style="color:#2a7a2a">✓ {{ r.new_count }} neue</span>
        {% else %}
        <span style="color:#999">keine Änderung</span>
        {% endif %}
      </td>
    </tr>
    {% endfor %}
  </tbody>
 </table>
 <!-- Neue Drucksachen -->
 {% if new_docs %}
 <h3 style="color:#007a80;border-bottom:1px solid #c8e6e6;padding-bottom:6px;margin-top:24px">
  Neue Drucksachen ({{ new_docs|length }})
 </h3>
 {% for doc in new_docs[:30] %}
 <div style="border-left:3px solid #007a80;padding:6px 12px;margin:8px 0;background:#f9f9f9;font-size:0.9em">
  <span style="color:#007a80;font-weight:bold">{{ doc.bundesland }}</span>
  <span style="color:#555;margin-left:8px">{{ doc.drucksache }}</span>
  {% if doc.datum %}
  <span style="color:#888;font-size:0.85em;margin-left:8px">{{ doc.datum }}</span>
  {% endif %}
  <br>
  <span style="color:#333">{{ (doc.title or doc.drucksache or '')[:120] }}</span>
  {% if doc.fraktionen %}
  <br><span style="color:#777;font-size:0.85em">
    {% if doc.fraktionen is iterable and doc.fraktionen is not string %}
      {{ doc.fraktionen | join(', ') }}
    {% else %}
      {{ doc.fraktionen }}
    {% endif %}
  </span>
  {% endif %}
 </div>
 {% endfor %}
 {% if new_docs|length > 30 %}
 <p style="color:#666;font-size:0.9em">… und {{ new_docs|length - 30 }} weitere neue Drucksachen.</p>
 {% endif %}
 {% else %}
 <p style="color:#888;font-style:italic">Keine neuen Drucksachen heute.</p>
 {% endif %}
 <!-- Footer -->
 <hr style="border:none;border-top:1px solid #ddd;margin:24px 0">
 <p style="font-size:0.8em;color:#aaa">
  GWÖ-Antragsprüfer Monitoring &middot; Kosten-Schätzung basiert auf Qwen-Plus-Preisen (DashScope, April 2026) &middot;
  Nur Metadaten — kein LLM-Call im Scan
 </p>
 </body>
 </html>
--- a/app/wahlprogramm_check.py
+++ b/app/wahlprogramm_check.py
@ -0,0 +1,37 @@
 """Erkennung fehlender Wahlprogramme (#128).
 Prüft für ein gegebenes Bundesland, welche der im Landtag vertretenen
 Fraktionen in der WAHLPROGRAMME-Registry nicht hinterlegt sind.
 Wird nach dem LLM-Call in analyze_antrag() aufgerufen, damit das
 Assessment-Ergebnis die Lücken explizit ausweist.
 """
 from .bundeslaender import BUNDESLAENDER
 from .wahlprogramme import WAHLPROGRAMME
 def check_missing_programmes(bundesland: str, fraktionen: list[str]) -> list[str]:
    """Gibt eine Liste der Fraktions-Namen zurück, für die kein Wahlprogramm
    im gegebenen Bundesland hinterlegt ist.
    Args:
        bundesland: Bundesland-Code (z.B. "NRW", "BY").
        fraktionen: Liste der Fraktionen, die geprüft werden sollen
            (typischerweise aus BUNDESLAENDER[bl].landtagsfraktionen).
    Returns:
        Geordnete Liste der Fraktions-Namen ohne hinterlegtes Wahlprogramm.
        Leere Liste, wenn für alle Fraktionen Programme vorliegen oder
        fraktionen leer ist.
    Raises:
        ValueError: Wenn das Bundesland nicht in BUNDESLAENDER bekannt ist.
    """
    if bundesland not in BUNDESLAENDER:
        raise ValueError(f"Unbekanntes Bundesland: {bundesland!r}")
    if not fraktionen:
        return []
    indexed = WAHLPROGRAMME.get(bundesland, {})
    return [f for f in fraktionen if f not in indexed]