From 2c0e94d29d400769846f2825c26643d83b47cf06 Mon Sep 17 00:00:00 2001
From: Dotty Dotter <dotty@Mac-mini-von-Dotty.local>
Date: Sat, 25 Apr 2026 20:55:16 +0200
Subject: [PATCH] feat(#106,#135,#128): Monitoring + abgeordnetenwatch +
 Wahlprogramm-Check

- monitoring.py: taeglicher Scan-Adapter aller aktiven BL, kein Auto-Fetch (#135)
- monitoring_digest.html: Mail-Template mit '0-Kontext'-Hinweis
- abgeordnetenwatch.py + sync_*.py: Phase 1 Roll-Call-Voting (#106)
  - 17 Parlamente (16 BL + BT)
  - 9 BL-spezifische Drucksachen-Patterns + Date-Title-Fallback
  - 28977 Votes fuer BUND in DB
- wahlprogramm_check.py: fehlende Programme erkennen (#128)
- NI-Skip-Liste, NRW Empty-Query-Fallback

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 app/abgeordnetenwatch.py             | 285 +++++++++++++++++++++++
 app/monitoring.py                    | 332 +++++++++++++++++++++++++++
 app/sync_abgeordnetenwatch.py        | 157 +++++++++++++
 app/templates/monitoring_digest.html | 128 +++++++++++
 app/wahlprogramm_check.py            |  37 +++
 5 files changed, 939 insertions(+)
 create mode 100644 app/abgeordnetenwatch.py
 create mode 100644 app/monitoring.py
 create mode 100644 app/sync_abgeordnetenwatch.py
 create mode 100644 app/templates/monitoring_digest.html
 create mode 100644 app/wahlprogramm_check.py
diff --git a/app/abgeordnetenwatch.py b/app/abgeordnetenwatch.py
new file mode 100644
index 0000000..949149e
--- /dev/null
+++ b/app/abgeordnetenwatch.py
@@ -0,0 +1,285 @@
+"""Adapter für abgeordnetenwatch.de API v2 (#106 Phase 1).
+
+Liefert strukturierte Abstimmungsdaten (namentliche Abstimmungen)
+pro Bundesland + Bundestag. Daten werden lokal in abgeordnetenwatch_polls
+und abgeordnetenwatch_votes gecacht.
+
+API-Docs: https://www.abgeordnetenwatch.de/api/v2
+"""
+
+from __future__ import annotations
+
+import logging
+import re
+from typing import Optional
+
+import httpx
+
+logger = logging.getLogger(__name__)
+
+# Mapping unserer BL-Codes auf abgeordnetenwatch parliament-IDs.
+# IDs aus GET /api/v2/parliaments (Stand April 2026).
+PARLIAMENT_ID: dict[str, int] = {
+    "BT":   5,   # Bundestag  (auch "BUND")
+    "BUND": 5,   # Alias
+    "NRW":  4,
+    "BE":   2,   # Berlin
+    "HH":   3,   # Hamburg
+    "BW":   6,   # Baden-Württemberg
+    "RP":   7,   # Rheinland-Pfalz
+    "LSA":  8,   # Sachsen-Anhalt
+    "MV":   9,   # Mecklenburg-Vorpommern
+    "HB":  10,   # Bremen
+    "HE":  11,   # Hessen
+    "NI":  12,   # Niedersachsen
+    "BY":  13,   # Bayern
+    "SL":  14,   # Saarland
+    "TH":  15,   # Thüringen
+    "BB":  16,   # Brandenburg
+    "SN":  17,   # Sachsen
+    "SH":  18,   # Schleswig-Holstein
+}
+
+_BASE = "https://www.abgeordnetenwatch.de/api/v2"
+
+# Drucksachen-Extraktion aus field_intro-HTML — pro Landtag eigenes URL-/
+# Dateinamen-Schema. Reihenfolge: erst Generic-Pattern "WP/NR" probieren
+# (BUND, HE), dann BL-spezifische Patterns aus den Drucksachen-PDF-URLs.
+_DS_PATTERNS: list[re.Pattern] = [
+    # Generic: "20/12345" — BUND, HE und ähnliche
+    re.compile(r"\b(\d{1,2})/(\d{3,5})\b"),
+    # NRW: MMD18-2142.pdf
+    re.compile(r"MMD(\d{1,2})-(\d{3,5})\.pdf", re.IGNORECASE),
+    # BE: d19-0564.pdf
+    re.compile(r"/d(\d{1,2})-(\d{4})\.pdf", re.IGNORECASE),
+    # BW: 17_7713_D.pdf
+    re.compile(r"/(\d{1,2})_(\d{3,5})_D\.pdf", re.IGNORECASE),
+    # HB: D21L0568.pdf  (D<wp>L<nr>)
+    re.compile(r"/D(\d{1,2})L(\d{3,5})\.pdf", re.IGNORECASE),
+    # SH: drucksache-20-00187.pdf
+    re.compile(r"drucksache-(\d{1,2})-(\d{3,5})\.pdf", re.IGNORECASE),
+    # SL: Gs17_0503.pdf
+    re.compile(r"/Gs(\d{1,2})_(\d{3,5})\.pdf", re.IGNORECASE),
+    # LSA: wp8/drs/d0145…  (Reihenfolge: wp dann nr)
+    re.compile(r"/wp(\d{1,2})/drs/d(\d{3,5})", re.IGNORECASE),
+    # SN: dok_nr=2150&...&leg_per=8 — params können in beliebiger Reihenfolge auftreten
+    re.compile(r"dok_nr=(\d{3,5}).*leg_per=(\d{1,2})", re.IGNORECASE),
+    # RP: 538-18.pdf  (Reihenfolge: nr-wp)
+    re.compile(r"/(\d{3,5})-(\d{1,2})\.pdf", re.IGNORECASE),
+]
+
+
+def extract_drucksache_from_intro(html: str) -> Optional[str]:
+    """Extrahiert die erste Drucksachen-Nummer aus dem field_intro-HTML.
+
+    Probiert mehrere Landtags-spezifische URL-Patterns durch (NRW MMD<wp>-<nr>,
+    BW <wp>_<nr>_D.pdf, etc.) und gibt die erste Fundstelle als
+    "<wp>/<nr>"-String zurück. Reihenfolge im Match-Tupel ist immer (wp, nr) —
+    die Patterns selbst kümmern sich um eventuelle URL-Reihenfolgen-Eigenheiten
+    (RP hat z.B. nr-wp, SN hat dok_nr=...&leg_per=..., dort drehen wir).
+    """
+    if not html:
+        return None
+    for pat in _DS_PATTERNS:
+        m = pat.search(html)
+        if not m:
+            continue
+        # Spezialfall RP: nr-wp im URL → drehen, damit Output wp/nr
+        if "-" in m.re.pattern and m.re.pattern.startswith("/(\\d{3,5})"):
+            return f"{m.group(2)}/{m.group(1)}"
+        # Spezialfall SN: dok_nr (Gruppe 1) + leg_per (Gruppe 2) → wp/nr
+        if "dok_nr" in m.re.pattern:
+            return f"{m.group(2)}/{m.group(1)}"
+        # Standard: (wp, nr)
+        return f"{m.group(1)}/{m.group(2)}"
+    return None
+
+
+async def fallback_drucksache_by_date_title(
+    datum: Optional[str],
+    titel: Optional[str],
+    bundesland: str,
+) -> Optional[str]:
+    """Fallback-Drucksachen-Lookup via Datum + Titel gegen die Assessments-DB.
+
+    Wird aufgerufen wenn ``extract_drucksache_from_intro`` kein Pattern findet
+    (betrifft MV/BY/BB/TH/HH/SL deren intro-HTML keine PDF-URLs enthält).
+
+    Sucht Assessments für ``bundesland`` innerhalb von ±14 Tagen um ``datum``
+    und einem Titel-Substring-Match. Gibt die Drucksachen-Nummer des ersten
+    Treffers zurück oder ``None``.
+
+    Args:
+        datum: ISO-Datum des Polls (``field_poll_date``, z.B. ``"2026-04-01"``).
+        titel: Label/Titel des Polls (wird als LIKE-Substring geprüft).
+        bundesland: Unser BL-Code (z.B. ``"MV"``).
+
+    Returns:
+        Drucksachen-Nummer als String (z.B. ``"7/1234"``) oder ``None``.
+    """
+    if not datum or not titel:
+        return None
+
+    # Titel-Substring: nur die ersten 40 Zeichen für den LIKE-Match verwenden,
+    # da Poll-Labels und Assessment-Titel leicht voneinander abweichen können.
+    titel_substr = titel.strip()[:40]
+
+    from .config import settings as _settings
+    import aiosqlite as _aio
+
+    async with _aio.connect(_settings.db_path) as db:
+        cur = await db.execute(
+            """
+            SELECT drucksache FROM assessments
+            WHERE bundesland = ?
+              AND ABS(julianday(datum) - julianday(?)) < 14
+              AND LOWER(title) LIKE ?
+            ORDER BY ABS(julianday(datum) - julianday(?))
+            LIMIT 1
+            """,
+            (bundesland.upper(), datum, f"%{titel_substr.lower()}%", datum),
+        )
+        row = await cur.fetchone()
+
+    if row:
+        logger.debug(
+            "fallback_drucksache_by_date_title: %s/%s → %s",
+            bundesland, datum, row[0],
+        )
+        return row[0]
+    return None
+
+
+async def fetch_polls(bundesland_code: str, limit: int = 100) -> list[dict]:
+    """Holt aktuelle Abstimmungen für ein Bundesland von abgeordnetenwatch.
+
+    Gibt eine Liste von Poll-Dicts zurück; jedes Dict enthält zusätzlich
+    den geparsten Key ``drucksache`` (kann None sein).
+
+    Args:
+        bundesland_code: Unser BL-Code (z.B. "NRW", "BT", "BUND").
+        limit: Maximale Anzahl Polls; wird als range_end übergeben.
+
+    Returns:
+        Liste von Poll-Dicts mit den Feldern aus der API plus ``drucksache``.
+
+    Raises:
+        ValueError: Wenn der bundesland_code nicht in PARLIAMENT_ID ist.
+        httpx.HTTPError: Bei Netzwerkproblemen.
+    """
+    parliament_id = PARLIAMENT_ID.get(bundesland_code.upper())
+    if parliament_id is None:
+        raise ValueError(
+            f"Unbekannter BL-Code '{bundesland_code}'. "
+            f"Bekannte Codes: {sorted(PARLIAMENT_ID.keys())}"
+        )
+
+    async with httpx.AsyncClient(timeout=30.0) as client:
+        # Zuerst aktuellen ParliamentPeriod für das Parlament holen —
+        # /polls filtert nach field_legislature (period-id), NICHT parliament-id.
+        pp_resp = await client.get(
+            f"{_BASE}/parliament-periods",
+            params={"parliament": parliament_id, "type": "legislature", "range_end": 5},
+        )
+        pp_resp.raise_for_status()
+        periods = (pp_resp.json() or {}).get("data") or []
+        # Aktuelle Periode: sortiere nach start-date desc, nimm die neueste
+        current = sorted(
+            periods,
+            key=lambda x: x.get("start_date_period") or "",
+            reverse=True,
+        )
+        if not current:
+            logger.warning("Keine ParliamentPeriod für %s (parliament_id=%d)",
+                           bundesland_code, parliament_id)
+            return []
+        period_id = current[0]["id"]
+
+        # Polls für diese Periode
+        resp = await client.get(
+            f"{_BASE}/polls",
+            params={"field_legislature": period_id, "range_end": limit},
+        )
+        resp.raise_for_status()
+        data = resp.json()
+
+    polls_raw: list[dict] = data.get("data") or []
+    polls = []
+    for p in polls_raw:
+        intro_html = p.get("field_intro") or ""
+        polls.append({
+            "id":               p.get("id"),
+            "label":            p.get("label") or p.get("field_poll_date", ""),
+            "field_poll_date":  p.get("field_poll_date"),
+            "field_accepted":   p.get("field_accepted"),
+            "field_topics":     p.get("field_topics") or [],
+            "field_intro":      intro_html,
+            "field_legislature": p.get("field_legislature") or {},
+            "drucksache":       extract_drucksache_from_intro(intro_html),
+        })
+
+    logger.info(
+        "abgeordnetenwatch: %d polls für %s (parliament_id=%d)",
+        len(polls), bundesland_code, parliament_id,
+    )
+    return polls
+
+
+async def fetch_votes_for_poll(poll_id: int) -> list[dict]:
+    """Holt namentliche Einzelstimmen für eine Abstimmung.
+
+    Args:
+        poll_id: ID der Abstimmung (aus polls[].id).
+
+    Returns:
+        Liste von Vote-Dicts mit den Feldern:
+        poll_id, politician_id, politician_name, partei, vote.
+        vote ist einer von: "yes", "no", "abstain", "no_show".
+
+    Raises:
+        httpx.HTTPError: Bei Netzwerkproblemen.
+    """
+    # /votes?poll=X funktioniert (empirisch ermittelt);
+    # NICHT field_poll (500) und NICHT /polls/{id}?related_data=votes
+    # (liefert leeres related_data). Einfaches ?poll=<id>.
+    url = f"{_BASE}/votes"
+    params = {"poll": poll_id, "range_end": 1000}
+
+    async with httpx.AsyncClient(timeout=30.0) as client:
+        resp = await client.get(url, params=params)
+        resp.raise_for_status()
+        data = resp.json()
+
+    votes_raw: list[dict] = data.get("data") or []
+    votes = []
+    for v in votes_raw:
+        politician = v.get("mandate") or v.get("politician") or {}
+        politician_id = politician.get("id") or v.get("mandate_id")
+        politician_name = politician.get("label") or politician.get("name") or ""
+
+        # Partei aus politician.party oder fraction
+        partei = ""
+        party = politician.get("party") or {}
+        if isinstance(party, dict):
+            partei = party.get("label") or party.get("short_label") or ""
+        fraction = v.get("fraction") or {}
+        if not partei and isinstance(fraction, dict):
+            partei = fraction.get("full_name") or fraction.get("label") or ""
+
+        vote_value = (v.get("vote") or "").lower()
+        # API liefert "yes"/"no"/"abstain"/"no_show" — direkt übernehmen
+        if vote_value not in ("yes", "no", "abstain", "no_show"):
+            vote_value = "no_show"
+
+        votes.append({
+            "poll_id":        poll_id,
+            "politician_id":  politician_id,
+            "politician_name": politician_name,
+            "partei":         partei,
+            "vote":           vote_value,
+        })
+
+    logger.info(
+        "abgeordnetenwatch: %d votes für poll_id=%d", len(votes), poll_id
+    )
+    return votes
diff --git a/app/monitoring.py b/app/monitoring.py
new file mode 100644
index 0000000..dab5ed3
--- /dev/null
+++ b/app/monitoring.py
@@ -0,0 +1,332 @@
+"""Täglicher Monitoring-Scan für neue Landtags-Drucksachen (#135).
+
+Nur Metadaten — kein PDF-Download, kein LLM-Call.
+
+Ablauf:
+1. Iteriert alle aktiven Bundesländer via aktive_bundeslaender().
+2. Ruft adapter.search("", limit=50) (Fallback: " " oder "*") auf.
+3. UPSERTs Treffer in monitoring_scans. seen_first_at bleibt stabil,
+   last_seen_at wird immer gesetzt.
+4. Aggregiert Ergebnisse in monitoring_daily_summary.
+5. Gibt ScanResult zurück, aus dem run_monitoring_digest() den
+   Mail-Digest baut.
+
+Kosten-Schätzung (Qwen Plus, Stand April 2026):
+  Quelle: https://help.aliyun.com/zh/dashscope/developer-reference/tongyi-qianwen-7b-14b-72b-api-pricing
+  Input:  0.0004 USD / 1 K Token
+  Output: 0.0012 USD / 1 K Token
+  Kurs:   1 USD = 0.93 EUR (Näherung April 2026)
+"""
+from __future__ import annotations
+
+import asyncio
+import logging
+from dataclasses import dataclass, field
+from datetime import datetime, timezone
+
+from .bundeslaender import aktive_bundeslaender
+
+logger = logging.getLogger(__name__)
+
+# ─── Kosten-Schätzung ────────────────────────────────────────────────────────
+# Preise aus DashScope-Dokumentation (USD, Stand April 2026):
+# https://help.aliyun.com/zh/dashscope/developer-reference/tongyi-qianwen-7b-14b-72b-api-pricing
+_QWEN_PLUS_INPUT_USD_PER_1K = 0.0004
+_QWEN_PLUS_OUTPUT_USD_PER_1K = 0.0012
+_USD_TO_EUR = 0.93  # Näherungskurs April 2026 (als Konstante OK für Schätzung)
+
+# Default-Annahmen pro Analyse (Durchschnittswerte aus Produktionsbetrieb)
+_DEFAULT_AVG_IN_TOKENS = 20_000
+_DEFAULT_AVG_OUT_TOKENS = 3_000
+
+
+def estimate_cost_qwen_plus(
+    n_new: int,
+    avg_in_tokens: int = _DEFAULT_AVG_IN_TOKENS,
+    avg_out_tokens: int = _DEFAULT_AVG_OUT_TOKENS,
+) -> float:
+    """Schätzt die Analysekosten in EUR für n_new neue Drucksachen (Qwen Plus).
+
+    Rechnet auf Basis der offiziellen DashScope-Preise, Umrechnung USD→EUR
+    mit festem Näherungskurs. Ergebnis ist eine Schätzung, keine Garantie.
+
+    Args:
+        n_new: Anzahl neuer Drucksachen.
+        avg_in_tokens: Durchschnittliche Input-Token pro Antrag (Default 20 000).
+        avg_out_tokens: Durchschnittliche Output-Token pro Antrag (Default 3 000).
+
+    Returns:
+        Geschätzte Kosten in EUR.
+    """
+    if n_new <= 0:
+        return 0.0
+    input_cost_usd = (avg_in_tokens / 1000) * _QWEN_PLUS_INPUT_USD_PER_1K * n_new
+    output_cost_usd = (avg_out_tokens / 1000) * _QWEN_PLUS_OUTPUT_USD_PER_1K * n_new
+    total_eur = (input_cost_usd + output_cost_usd) * _USD_TO_EUR
+    return round(total_eur, 4)
+
+
+# ─── Datenklassen ────────────────────────────────────────────────────────────
+
+@dataclass
+class BundeslandScanResult:
+    """Scan-Ergebnis für ein einzelnes Bundesland."""
+    bundesland: str
+    total_seen: int = 0
+    new_count: int = 0
+    error: str | None = None
+
+
+@dataclass
+class DailyScanResult:
+    """Gesamtergebnis eines daily_scan()-Laufs."""
+    scan_date: str                            # YYYY-MM-DD
+    results: list[BundeslandScanResult] = field(default_factory=list)
+    new_total: int = 0                        # Summe aller new_count
+    total_seen: int = 0                       # Summe aller total_seen
+    estimated_cost_eur: float = 0.0
+    errors: list[str] = field(default_factory=list)
+
+
+# ─── Adapter-Suche ───────────────────────────────────────────────────────────
+
+DEFAULT_DAILY_LIMIT = 50
+
+# Bundesländer, die vom täglichen Monitoring-Scan ausgenommen sind.
+# NI (Niedersachsen): NILAS-Portal erfordert Login — unauthentifizierte Anfragen
+# liefern Login-Page-HTML, das der JSON-Comment-Parser als ~50 Junk-Records parsed.
+# Ausnahme bleibt bis ein gültiger HAR-Capture vorliegt (siehe Issue #22).
+_MONITORING_SKIP: frozenset[str] = frozenset({"NI"})
+
+
+async def _search_adapter(adapter, bundesland_code: str, limit: int = DEFAULT_DAILY_LIMIT) -> list:
+    """Sucht via Adapter nach aktuellen Drucksachen.
+
+    Probiert der Reihe nach: leerer String, Leerzeichen, Sternchen —
+    und fängt alle Exceptions ab, damit ein Adapter-Fehler den
+    Gesamt-Scan nicht abbricht. ``limit`` steuert pro-Adapter-Obergrenze;
+    für Initial-Seeding ggf. höher setzen.
+    """
+    for query in ("", " ", "*"):
+        try:
+            results = await adapter.search(query, limit=limit)
+            return results
+        except Exception as e:
+            if query == "*":
+                # Alle Versuche gescheitert — Exception nach oben durchreichen
+                raise
+            logger.debug(
+                "%s: search(%r) fehlgeschlagen (%s), versuche nächsten Query",
+                bundesland_code, query, e,
+            )
+    return []
+
+
+# ─── Haupt-Scan ──────────────────────────────────────────────────────────────
+
+async def daily_scan(limit: int = DEFAULT_DAILY_LIMIT) -> DailyScanResult:
+    """Täglicher Scan aller aktiven Bundesländer nach neuen Drucksachen.
+
+    Kein PDF-Download, kein LLM-Call — nur Metadaten. ``limit`` gilt
+    pro Adapter; für Initial-Seeding größer setzen (z.B. 500).
+    """
+    from .parlamente import ADAPTERS
+    from .database import upsert_monitoring_scan, upsert_monitoring_summary
+
+    now_utc = datetime.now(timezone.utc)
+    scan_date = now_utc.strftime("%Y-%m-%d")
+    now_iso = now_utc.strftime("%Y-%m-%dT%H:%M:%S")
+
+    result = DailyScanResult(scan_date=scan_date)
+
+    active_bls = aktive_bundeslaender()
+
+    for bl in active_bls:
+        if bl.code in _MONITORING_SKIP:
+            logger.debug("%s: Monitoring-Skip aktiv — übersprungen", bl.code)
+            continue
+
+        adapter = ADAPTERS.get(bl.code)
+        if adapter is None:
+            logger.debug("Kein Adapter für %s — übersprungen", bl.code)
+            continue
+
+        bl_result = BundeslandScanResult(bundesland=bl.code)
+
+        try:
+            docs = await _search_adapter(adapter, bl.code, limit=limit)
+        except Exception as exc:
+            err_msg = f"{type(exc).__name__}: {str(exc)[:500]}"
+            logger.exception("Adapter-Fehler bei %s", bl.code)
+            bl_result.error = err_msg
+            result.errors.append(f"{bl.code}: {err_msg}")
+            await upsert_monitoring_summary(
+                scan_date=scan_date,
+                bundesland=bl.code,
+                total_seen=0,
+                new_count=0,
+                errors=err_msg,
+            )
+            result.results.append(bl_result)
+            continue
+
+        bl_result.total_seen = len(docs)
+        new_this_bl = 0
+
+        for doc in docs:
+            try:
+                is_new = await upsert_monitoring_scan(
+                    bundesland=doc.bundesland,
+                    drucksache=doc.drucksache,
+                    title=doc.title,
+                    datum=doc.datum,
+                    typ=doc.typ,
+                    typ_normiert=doc.typ_normiert,
+                    fraktionen=doc.fraktionen,
+                    link=doc.link,
+                    now=now_iso,
+                )
+                if is_new:
+                    new_this_bl += 1
+            except Exception:
+                logger.exception(
+                    "DB-UPSERT fehlgeschlagen für %s/%s — wird übersprungen",
+                    bl.code, getattr(doc, "drucksache", "?"),
+                )
+
+        bl_result.new_count = new_this_bl
+
+        await upsert_monitoring_summary(
+            scan_date=scan_date,
+            bundesland=bl.code,
+            total_seen=bl_result.total_seen,
+            new_count=bl_result.new_count,
+            errors=None,
+        )
+
+        logger.info(
+            "%s: %d gesehen, %d neu",
+            bl.code, bl_result.total_seen, bl_result.new_count,
+        )
+        result.results.append(bl_result)
+
+    result.new_total = sum(r.new_count for r in result.results)
+    result.total_seen = sum(r.total_seen for r in result.results)
+    result.estimated_cost_eur = estimate_cost_qwen_plus(result.new_total)
+
+    return result
+
+
+# ─── Mail-Digest ─────────────────────────────────────────────────────────────
+
+async def run_monitoring_digest(recipient: str) -> dict:
+    """Führt daily_scan() durch und verschickt den Ergebnis-Digest per Mail.
+
+    Args:
+        recipient: Empfänger-Adresse (typischerweise der Admin).
+
+    Returns:
+        dict mit Scan-Statistiken + {"mail_sent": bool}.
+    """
+    from .mail import send_mail
+    from .database import get_monitoring_new_today
+    from jinja2 import Environment, FileSystemLoader
+    from pathlib import Path
+
+    scan_result = await daily_scan()
+
+    # Neue Drucksachen für den heutigen Tag laden
+    new_docs = await get_monitoring_new_today(scan_result.scan_date)
+
+    # Mail-Inhalt via Template rendern
+    tmpl_dir = Path(__file__).resolve().parent / "templates"
+    env = Environment(loader=FileSystemLoader(str(tmpl_dir)), autoescape=True)
+    tmpl = env.get_template("monitoring_digest.html")
+
+    html_body = tmpl.render(
+        scan_date=scan_result.scan_date,
+        new_total=scan_result.new_total,
+        total_seen=scan_result.total_seen,
+        estimated_cost_eur=scan_result.estimated_cost_eur,
+        results=scan_result.results,
+        new_docs=new_docs,
+        errors=scan_result.errors,
+    )
+
+    # Plaintext-Variante
+    text_body = _render_plain(scan_result, new_docs)
+
+    subject = (
+        f"[GWÖ-Monitor] {scan_result.scan_date} — "
+        f"{scan_result.new_total} neue Drucksachen"
+        + (f" ({len(scan_result.errors)} Fehler)" if scan_result.errors else "")
+    )
+
+    mail_sent = False
+    try:
+        await send_mail(recipient, subject, text_body, html_body)
+        mail_sent = True
+        logger.info("Monitoring-Digest verschickt an %s", recipient)
+    except Exception:
+        logger.exception("Monitoring-Digest: Mail-Versand fehlgeschlagen")
+
+    return {
+        "scan_date": scan_result.scan_date,
+        "new_total": scan_result.new_total,
+        "total_seen": scan_result.total_seen,
+        "estimated_cost_eur": scan_result.estimated_cost_eur,
+        "error_count": len(scan_result.errors),
+        "mail_sent": mail_sent,
+    }
+
+
+def _render_plain(scan_result: DailyScanResult, new_docs: list[dict]) -> str:
+    """Baut den Plaintext-Part des Monitoring-Digests."""
+    from .config import settings
+
+    lines = [
+        f"GWÖ-Antragsprüfer — Monitoring-Digest {scan_result.scan_date}",
+        "=" * 60,
+        "",
+        f"Neue Drucksachen:   {scan_result.new_total}",
+        f"Gesamt gesehen:     {scan_result.total_seen}",
+        f"Kosten-Schätzung:   {scan_result.estimated_cost_eur:.4f} EUR",
+        "",
+    ]
+
+    if scan_result.errors:
+        lines.append(f"Fehler ({len(scan_result.errors)}):")
+        for e in scan_result.errors:
+            lines.append(f"  • {e}")
+        lines.append("")
+
+    lines.append("Bundesland-Übersicht:")
+    for r in scan_result.results:
+        status = f"✓ {r.new_count} neu / {r.total_seen} gesehen"
+        if r.error:
+            status = f"✗ Fehler: {r.error[:80]}"
+        lines.append(f"  {r.bundesland:6s}  {status}")
+    lines.append("")
+
+    if new_docs:
+        lines.append(f"Neue Drucksachen ({len(new_docs)}):")
+        for doc in new_docs[:30]:
+            title = (doc.get("title") or doc.get("drucksache") or "")[:80]
+            bl = doc.get("bundesland", "")
+            drucks = doc.get("drucksache", "")
+            lines.append(f"  [{bl}] {drucks} — {title}")
+        if len(new_docs) > 30:
+            lines.append(f"  … und {len(new_docs) - 30} weitere")
+        lines.append("")
+
+    lines.append(f"Webapp: {settings.base_url}")
+    return "\n".join(lines)
+
+
+if __name__ == "__main__":
+    # python -m app.monitoring <empfaenger@example.com>
+    import sys
+    logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
+    to = sys.argv[1] if len(sys.argv) > 1 else "mail@tobiasroedel.de"
+    stats = asyncio.run(run_monitoring_digest(to))
+    print(f"Monitoring-Scan fertig: {stats}")
diff --git a/app/sync_abgeordnetenwatch.py b/app/sync_abgeordnetenwatch.py
new file mode 100644
index 0000000..0dffb7a
--- /dev/null
+++ b/app/sync_abgeordnetenwatch.py
@@ -0,0 +1,157 @@
+"""CLI-Sync-Skript für abgeordnetenwatch.de (#106 Phase 1).
+
+Holt Polls + namentliche Stimmen für alle oder einen bestimmten BL-Code
+und speichert sie via UPSERT in der lokalen SQLite-DB.
+
+Aufruf:
+    python -m app.sync_abgeordnetenwatch [--bundesland NRW] [--limit 50]
+
+Ohne --bundesland werden alle in PARLIAMENT_ID eingetragenen BL-Codes
+abgearbeitet (BUND-Alias wird übersprungen, BT genügt).
+
+Ausgabe:
+    NRW: 12 polls neu, 340 votes neu
+    BT:  0 polls neu, 0 votes neu
+    …
+"""
+
+from __future__ import annotations
+
+import argparse
+import asyncio
+import logging
+from datetime import datetime, timezone
+
+logger = logging.getLogger(__name__)
+
+
+async def sync_bundesland(bundesland_code: str, limit: int) -> tuple[int, int]:
+    """Synct einen BL-Code. Gibt (neue_polls, neue_votes) zurück."""
+    from .abgeordnetenwatch import (
+        PARLIAMENT_ID, fetch_polls, fetch_votes_for_poll,
+        fallback_drucksache_by_date_title,
+    )
+    from .database import init_db, upsert_aw_poll, upsert_aw_vote
+
+    await init_db()
+
+    parliament_id = PARLIAMENT_ID[bundesland_code.upper()]
+    synced_at = datetime.now(timezone.utc).isoformat()
+
+    polls = await fetch_polls(bundesland_code, limit=limit)
+
+    new_polls = 0
+    new_votes = 0
+
+    for poll in polls:
+        poll_id = poll.get("id")
+        if poll_id is None:
+            continue
+
+        legislature = poll.get("field_legislature") or {}
+        legislature_label = (
+            legislature.get("label") or legislature.get("name") or ""
+            if isinstance(legislature, dict) else str(legislature)
+        )
+
+        topics_raw = poll.get("field_topics") or []
+        topics = [
+            (t.get("label") or t.get("name") or str(t))
+            if isinstance(t, dict) else str(t)
+            for t in topics_raw
+        ]
+
+        # Primär: Drucksache aus intro-HTML geparst; Fallback über Datum+Titel
+        # für BL ohne PDF-URL im intro (MV/BY/BB/TH/HH/SL — Fix #142 Phase 3).
+        drucksache = poll.get("drucksache")
+        if drucksache is None:
+            drucksache = await fallback_drucksache_by_date_title(
+                datum=poll.get("field_poll_date"),
+                titel=poll.get("label"),
+                bundesland=bundesland_code,
+            )
+
+        is_new_poll = await upsert_aw_poll(
+            poll_id=poll_id,
+            parliament_id=parliament_id,
+            bundesland=bundesland_code.upper(),
+            drucksache=drucksache,
+            titel=poll.get("label"),
+            datum=poll.get("field_poll_date"),
+            accepted=poll.get("field_accepted"),
+            topics=topics,
+            legislature_label=legislature_label,
+            synced_at=synced_at,
+        )
+        if is_new_poll:
+            new_polls += 1
+
+        # Votes laden und speichern
+        try:
+            votes = await fetch_votes_for_poll(poll_id)
+        except Exception:
+            logger.exception("Fehler beim Laden von Votes für poll_id=%d", poll_id)
+            continue
+
+        for vote in votes:
+            politician_id = vote.get("politician_id")
+            if politician_id is None:
+                continue
+            is_new_vote = await upsert_aw_vote(
+                poll_id=poll_id,
+                politician_id=politician_id,
+                politician_name=vote.get("politician_name"),
+                partei=vote.get("partei"),
+                vote=vote.get("vote", "no_show"),
+            )
+            if is_new_vote:
+                new_votes += 1
+
+    return new_polls, new_votes
+
+
+async def main(bundesland: str | None, limit: int) -> None:
+    from .abgeordnetenwatch import PARLIAMENT_ID
+
+    # Alle Codes ohne BUND-Alias (BT und BUND zeigen auf die selbe ID)
+    if bundesland:
+        codes = [bundesland.upper()]
+    else:
+        seen_ids: set[int] = set()
+        codes = []
+        for code, pid in PARLIAMENT_ID.items():
+            if pid not in seen_ids:
+                seen_ids.add(pid)
+                codes.append(code)
+
+    for code in codes:
+        try:
+            new_polls, new_votes = await sync_bundesland(code, limit)
+            print(f"{code:4s}: {new_polls} polls neu, {new_votes} votes neu")
+        except Exception:
+            logger.exception("Fehler beim Sync für %s", code)
+            print(f"{code:4s}: FEHLER (siehe Log)")
+
+
+def _cli() -> None:
+    parser = argparse.ArgumentParser(
+        description="Sync abgeordnetenwatch-Abstimmungsdaten in die lokale DB."
+    )
+    parser.add_argument(
+        "--bundesland", "-b",
+        default=None,
+        help="BL-Code (z.B. NRW, BT). Ohne Angabe: alle Codes.",
+    )
+    parser.add_argument(
+        "--limit", "-n",
+        type=int,
+        default=100,
+        help="Maximale Anzahl Polls pro BL (default: 100).",
+    )
+    args = parser.parse_args()
+    asyncio.run(main(args.bundesland, args.limit))
+
+
+if __name__ == "__main__":
+    logging.basicConfig(level=logging.INFO)
+    _cli()
diff --git a/app/templates/monitoring_digest.html b/app/templates/monitoring_digest.html
new file mode 100644
index 0000000..d54f75f
--- /dev/null
+++ b/app/templates/monitoring_digest.html
@@ -0,0 +1,128 @@
+<!DOCTYPE html>
+<html lang="de">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<title>GWÖ-Monitor {{ scan_date }}</title>
+</head>
+<body style="font-family:Helvetica,Arial,sans-serif;max-width:640px;margin:0 auto;padding:20px;color:#333">
+
+<h2 style="color:#007a80;margin-bottom:4px">GWÖ-Antragsprüfer — Monitoring {{ scan_date }}</h2>
+<p style="color:#666;margin-top:4px;font-size:0.9em">Täglicher Scan aller aktiven Bundesländer</p>
+
+{% if new_total == 0 %}
+<div style="background:#f0fafa;border-left:3px solid #009da5;padding:10px 14px;margin:12px 0;font-size:0.95em;color:#444">
+  <b style="color:#007a80">Heute keine Änderungen.</b> Alle {{ total_seen }} in den Landtags-Portalen sichtbaren Drucksachen sind bereits seit dem letzten Scan bekannt. Das heißt: die Portale haben seit gestern keine neuen Anträge publiziert — nicht: der Scan war erfolglos.
+</div>
+{% endif %}
+
+<!-- Kennzahlen-Block -->
+<table style="width:100%;border-collapse:collapse;margin:16px 0">
+  <tr style="background:#f0fafa">
+    <td style="padding:10px 14px;border:1px solid #c8e6e6;font-weight:bold">Neue Drucksachen seit letztem Scan</td>
+    <td style="padding:10px 14px;border:1px solid #c8e6e6;font-size:1.4em;color:#007a80;font-weight:bold">{{ new_total }}</td>
+  </tr>
+  <tr>
+    <td style="padding:10px 14px;border:1px solid #ddd">Im Portal aktuell sichtbar (inkl. bekannter)</td>
+    <td style="padding:10px 14px;border:1px solid #ddd">{{ total_seen }}</td>
+  </tr>
+  <tr style="background:#fffbf0">
+    <td style="padding:10px 14px;border:1px solid #ddd">Kosten-Schätzung (alle analysieren)</td>
+    <td style="padding:10px 14px;border:1px solid #ddd">
+      <b>{{ "%.4f"|format(estimated_cost_eur) }} EUR</b>
+      <span style="font-size:0.8em;color:#888">&nbsp;(Qwen Plus, Näherung)</span>
+    </td>
+  </tr>
+  {% if errors %}
+  <tr style="background:#fff3f3">
+    <td style="padding:10px 14px;border:1px solid #f5c0c0;color:#c00">Adapter-Fehler</td>
+    <td style="padding:10px 14px;border:1px solid #f5c0c0;color:#c00;font-weight:bold">{{ errors|length }}</td>
+  </tr>
+  {% endif %}
+</table>
+
+<!-- Fehler-Details -->
+{% if errors %}
+<div style="background:#fff3f3;border-left:3px solid #c00;padding:10px 14px;margin:12px 0">
+  <b style="color:#c00">Fehler-Details:</b>
+  <ul style="margin:6px 0 0;padding-left:18px;font-size:0.9em">
+    {% for e in errors %}
+    <li>{{ e }}</li>
+    {% endfor %}
+  </ul>
+</div>
+{% endif %}
+
+<!-- Bundesland-Übersicht -->
+<h3 style="color:#007a80;border-bottom:1px solid #c8e6e6;padding-bottom:6px">Bundesland-Übersicht</h3>
+<table style="width:100%;border-collapse:collapse;font-size:0.9em">
+  <thead>
+    <tr style="background:#e6f4f4">
+      <th style="padding:7px 10px;border:1px solid #c8e6e6;text-align:left">BL</th>
+      <th style="padding:7px 10px;border:1px solid #c8e6e6;text-align:right">Gesehen</th>
+      <th style="padding:7px 10px;border:1px solid #c8e6e6;text-align:right">Neu</th>
+      <th style="padding:7px 10px;border:1px solid #c8e6e6;text-align:left">Status</th>
+    </tr>
+  </thead>
+  <tbody>
+    {% for r in results %}
+    <tr style="{% if r.error %}background:#fff8f8{% elif r.new_count > 0 %}background:#f8fff8{% endif %}">
+      <td style="padding:6px 10px;border:1px solid #ddd;font-weight:bold">{{ r.bundesland }}</td>
+      <td style="padding:6px 10px;border:1px solid #ddd;text-align:right">{{ r.total_seen }}</td>
+      <td style="padding:6px 10px;border:1px solid #ddd;text-align:right;color:{% if r.new_count > 0 %}#007a80{% else %}#999{% endif %}">
+        {{ r.new_count }}
+      </td>
+      <td style="padding:6px 10px;border:1px solid #ddd;font-size:0.85em">
+        {% if r.error %}
+        <span style="color:#c00">✗ {{ r.error[:100] }}</span>
+        {% elif r.new_count > 0 %}
+        <span style="color:#2a7a2a">✓ {{ r.new_count }} neue</span>
+        {% else %}
+        <span style="color:#999">keine Änderung</span>
+        {% endif %}
+      </td>
+    </tr>
+    {% endfor %}
+  </tbody>
+</table>
+
+<!-- Neue Drucksachen -->
+{% if new_docs %}
+<h3 style="color:#007a80;border-bottom:1px solid #c8e6e6;padding-bottom:6px;margin-top:24px">
+  Neue Drucksachen ({{ new_docs|length }})
+</h3>
+{% for doc in new_docs[:30] %}
+<div style="border-left:3px solid #007a80;padding:6px 12px;margin:8px 0;background:#f9f9f9;font-size:0.9em">
+  <span style="color:#007a80;font-weight:bold">{{ doc.bundesland }}</span>
+  <span style="color:#555;margin-left:8px">{{ doc.drucksache }}</span>
+  {% if doc.datum %}
+  <span style="color:#888;font-size:0.85em;margin-left:8px">{{ doc.datum }}</span>
+  {% endif %}
+  <br>
+  <span style="color:#333">{{ (doc.title or doc.drucksache or '')[:120] }}</span>
+  {% if doc.fraktionen %}
+  <br><span style="color:#777;font-size:0.85em">
+    {% if doc.fraktionen is iterable and doc.fraktionen is not string %}
+      {{ doc.fraktionen | join(', ') }}
+    {% else %}
+      {{ doc.fraktionen }}
+    {% endif %}
+  </span>
+  {% endif %}
+</div>
+{% endfor %}
+{% if new_docs|length > 30 %}
+<p style="color:#666;font-size:0.9em">… und {{ new_docs|length - 30 }} weitere neue Drucksachen.</p>
+{% endif %}
+{% else %}
+<p style="color:#888;font-style:italic">Keine neuen Drucksachen heute.</p>
+{% endif %}
+
+<!-- Footer -->
+<hr style="border:none;border-top:1px solid #ddd;margin:24px 0">
+<p style="font-size:0.8em;color:#aaa">
+  GWÖ-Antragsprüfer Monitoring &middot; Kosten-Schätzung basiert auf Qwen-Plus-Preisen (DashScope, April 2026) &middot;
+  Nur Metadaten — kein LLM-Call im Scan
+</p>
+</body>
+</html>
diff --git a/app/wahlprogramm_check.py b/app/wahlprogramm_check.py
new file mode 100644
index 0000000..2383b5d
--- /dev/null
+++ b/app/wahlprogramm_check.py
@@ -0,0 +1,37 @@
+"""Erkennung fehlender Wahlprogramme (#128).
+
+Prüft für ein gegebenes Bundesland, welche der im Landtag vertretenen
+Fraktionen in der WAHLPROGRAMME-Registry nicht hinterlegt sind.
+Wird nach dem LLM-Call in analyze_antrag() aufgerufen, damit das
+Assessment-Ergebnis die Lücken explizit ausweist.
+"""
+
+from .bundeslaender import BUNDESLAENDER
+from .wahlprogramme import WAHLPROGRAMME
+
+
+def check_missing_programmes(bundesland: str, fraktionen: list[str]) -> list[str]:
+    """Gibt eine Liste der Fraktions-Namen zurück, für die kein Wahlprogramm
+    im gegebenen Bundesland hinterlegt ist.
+
+    Args:
+        bundesland: Bundesland-Code (z.B. "NRW", "BY").
+        fraktionen: Liste der Fraktionen, die geprüft werden sollen
+            (typischerweise aus BUNDESLAENDER[bl].landtagsfraktionen).
+
+    Returns:
+        Geordnete Liste der Fraktions-Namen ohne hinterlegtes Wahlprogramm.
+        Leere Liste, wenn für alle Fraktionen Programme vorliegen oder
+        fraktionen leer ist.
+
+    Raises:
+        ValueError: Wenn das Bundesland nicht in BUNDESLAENDER bekannt ist.
+    """
+    if bundesland not in BUNDESLAENDER:
+        raise ValueError(f"Unbekanntes Bundesland: {bundesland!r}")
+
+    if not fraktionen:
+        return []
+
+    indexed = WAHLPROGRAMME.get(bundesland, {})
+    return [f for f in fraktionen if f not in indexed]

Neue Drucksachen seit letztem Scan	{{ new_total }}
Im Portal aktuell sichtbar (inkl. bekannter)	{{ total_seen }}
Kosten-Schätzung (alle analysieren)	+ {{ "%.4f"\|format(estimated_cost_eur) }} EUR + (Qwen Plus, Näherung) +
Adapter-Fehler	{{ errors\|length }}