feat(#106,#135,#128): Monitoring + abgeordnetenwatch + Wahlprogramm-Check
- monitoring.py: taeglicher Scan-Adapter aller aktiven BL, kein Auto-Fetch (#135) - monitoring_digest.html: Mail-Template mit '0-Kontext'-Hinweis - abgeordnetenwatch.py + sync_*.py: Phase 1 Roll-Call-Voting (#106) - 17 Parlamente (16 BL + BT) - 9 BL-spezifische Drucksachen-Patterns + Date-Title-Fallback - 28977 Votes fuer BUND in DB - wahlprogramm_check.py: fehlende Programme erkennen (#128) - NI-Skip-Liste, NRW Empty-Query-Fallback Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
ad1db2a924
commit
2c0e94d29d
285
app/abgeordnetenwatch.py
Normal file
285
app/abgeordnetenwatch.py
Normal file
@ -0,0 +1,285 @@
|
|||||||
|
"""Adapter für abgeordnetenwatch.de API v2 (#106 Phase 1).
|
||||||
|
|
||||||
|
Liefert strukturierte Abstimmungsdaten (namentliche Abstimmungen)
|
||||||
|
pro Bundesland + Bundestag. Daten werden lokal in abgeordnetenwatch_polls
|
||||||
|
und abgeordnetenwatch_votes gecacht.
|
||||||
|
|
||||||
|
API-Docs: https://www.abgeordnetenwatch.de/api/v2
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Mapping unserer BL-Codes auf abgeordnetenwatch parliament-IDs.
|
||||||
|
# IDs aus GET /api/v2/parliaments (Stand April 2026).
|
||||||
|
PARLIAMENT_ID: dict[str, int] = {
|
||||||
|
"BT": 5, # Bundestag (auch "BUND")
|
||||||
|
"BUND": 5, # Alias
|
||||||
|
"NRW": 4,
|
||||||
|
"BE": 2, # Berlin
|
||||||
|
"HH": 3, # Hamburg
|
||||||
|
"BW": 6, # Baden-Württemberg
|
||||||
|
"RP": 7, # Rheinland-Pfalz
|
||||||
|
"LSA": 8, # Sachsen-Anhalt
|
||||||
|
"MV": 9, # Mecklenburg-Vorpommern
|
||||||
|
"HB": 10, # Bremen
|
||||||
|
"HE": 11, # Hessen
|
||||||
|
"NI": 12, # Niedersachsen
|
||||||
|
"BY": 13, # Bayern
|
||||||
|
"SL": 14, # Saarland
|
||||||
|
"TH": 15, # Thüringen
|
||||||
|
"BB": 16, # Brandenburg
|
||||||
|
"SN": 17, # Sachsen
|
||||||
|
"SH": 18, # Schleswig-Holstein
|
||||||
|
}
|
||||||
|
|
||||||
|
_BASE = "https://www.abgeordnetenwatch.de/api/v2"
|
||||||
|
|
||||||
|
# Drucksachen-Extraktion aus field_intro-HTML — pro Landtag eigenes URL-/
|
||||||
|
# Dateinamen-Schema. Reihenfolge: erst Generic-Pattern "WP/NR" probieren
|
||||||
|
# (BUND, HE), dann BL-spezifische Patterns aus den Drucksachen-PDF-URLs.
|
||||||
|
_DS_PATTERNS: list[re.Pattern] = [
|
||||||
|
# Generic: "20/12345" — BUND, HE und ähnliche
|
||||||
|
re.compile(r"\b(\d{1,2})/(\d{3,5})\b"),
|
||||||
|
# NRW: MMD18-2142.pdf
|
||||||
|
re.compile(r"MMD(\d{1,2})-(\d{3,5})\.pdf", re.IGNORECASE),
|
||||||
|
# BE: d19-0564.pdf
|
||||||
|
re.compile(r"/d(\d{1,2})-(\d{4})\.pdf", re.IGNORECASE),
|
||||||
|
# BW: 17_7713_D.pdf
|
||||||
|
re.compile(r"/(\d{1,2})_(\d{3,5})_D\.pdf", re.IGNORECASE),
|
||||||
|
# HB: D21L0568.pdf (D<wp>L<nr>)
|
||||||
|
re.compile(r"/D(\d{1,2})L(\d{3,5})\.pdf", re.IGNORECASE),
|
||||||
|
# SH: drucksache-20-00187.pdf
|
||||||
|
re.compile(r"drucksache-(\d{1,2})-(\d{3,5})\.pdf", re.IGNORECASE),
|
||||||
|
# SL: Gs17_0503.pdf
|
||||||
|
re.compile(r"/Gs(\d{1,2})_(\d{3,5})\.pdf", re.IGNORECASE),
|
||||||
|
# LSA: wp8/drs/d0145… (Reihenfolge: wp dann nr)
|
||||||
|
re.compile(r"/wp(\d{1,2})/drs/d(\d{3,5})", re.IGNORECASE),
|
||||||
|
# SN: dok_nr=2150&...&leg_per=8 — params können in beliebiger Reihenfolge auftreten
|
||||||
|
re.compile(r"dok_nr=(\d{3,5}).*leg_per=(\d{1,2})", re.IGNORECASE),
|
||||||
|
# RP: 538-18.pdf (Reihenfolge: nr-wp)
|
||||||
|
re.compile(r"/(\d{3,5})-(\d{1,2})\.pdf", re.IGNORECASE),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def extract_drucksache_from_intro(html: str) -> Optional[str]:
|
||||||
|
"""Extrahiert die erste Drucksachen-Nummer aus dem field_intro-HTML.
|
||||||
|
|
||||||
|
Probiert mehrere Landtags-spezifische URL-Patterns durch (NRW MMD<wp>-<nr>,
|
||||||
|
BW <wp>_<nr>_D.pdf, etc.) und gibt die erste Fundstelle als
|
||||||
|
"<wp>/<nr>"-String zurück. Reihenfolge im Match-Tupel ist immer (wp, nr) —
|
||||||
|
die Patterns selbst kümmern sich um eventuelle URL-Reihenfolgen-Eigenheiten
|
||||||
|
(RP hat z.B. nr-wp, SN hat dok_nr=...&leg_per=..., dort drehen wir).
|
||||||
|
"""
|
||||||
|
if not html:
|
||||||
|
return None
|
||||||
|
for pat in _DS_PATTERNS:
|
||||||
|
m = pat.search(html)
|
||||||
|
if not m:
|
||||||
|
continue
|
||||||
|
# Spezialfall RP: nr-wp im URL → drehen, damit Output wp/nr
|
||||||
|
if "-" in m.re.pattern and m.re.pattern.startswith("/(\\d{3,5})"):
|
||||||
|
return f"{m.group(2)}/{m.group(1)}"
|
||||||
|
# Spezialfall SN: dok_nr (Gruppe 1) + leg_per (Gruppe 2) → wp/nr
|
||||||
|
if "dok_nr" in m.re.pattern:
|
||||||
|
return f"{m.group(2)}/{m.group(1)}"
|
||||||
|
# Standard: (wp, nr)
|
||||||
|
return f"{m.group(1)}/{m.group(2)}"
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
async def fallback_drucksache_by_date_title(
|
||||||
|
datum: Optional[str],
|
||||||
|
titel: Optional[str],
|
||||||
|
bundesland: str,
|
||||||
|
) -> Optional[str]:
|
||||||
|
"""Fallback-Drucksachen-Lookup via Datum + Titel gegen die Assessments-DB.
|
||||||
|
|
||||||
|
Wird aufgerufen wenn ``extract_drucksache_from_intro`` kein Pattern findet
|
||||||
|
(betrifft MV/BY/BB/TH/HH/SL deren intro-HTML keine PDF-URLs enthält).
|
||||||
|
|
||||||
|
Sucht Assessments für ``bundesland`` innerhalb von ±14 Tagen um ``datum``
|
||||||
|
und einem Titel-Substring-Match. Gibt die Drucksachen-Nummer des ersten
|
||||||
|
Treffers zurück oder ``None``.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
datum: ISO-Datum des Polls (``field_poll_date``, z.B. ``"2026-04-01"``).
|
||||||
|
titel: Label/Titel des Polls (wird als LIKE-Substring geprüft).
|
||||||
|
bundesland: Unser BL-Code (z.B. ``"MV"``).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Drucksachen-Nummer als String (z.B. ``"7/1234"``) oder ``None``.
|
||||||
|
"""
|
||||||
|
if not datum or not titel:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Titel-Substring: nur die ersten 40 Zeichen für den LIKE-Match verwenden,
|
||||||
|
# da Poll-Labels und Assessment-Titel leicht voneinander abweichen können.
|
||||||
|
titel_substr = titel.strip()[:40]
|
||||||
|
|
||||||
|
from .config import settings as _settings
|
||||||
|
import aiosqlite as _aio
|
||||||
|
|
||||||
|
async with _aio.connect(_settings.db_path) as db:
|
||||||
|
cur = await db.execute(
|
||||||
|
"""
|
||||||
|
SELECT drucksache FROM assessments
|
||||||
|
WHERE bundesland = ?
|
||||||
|
AND ABS(julianday(datum) - julianday(?)) < 14
|
||||||
|
AND LOWER(title) LIKE ?
|
||||||
|
ORDER BY ABS(julianday(datum) - julianday(?))
|
||||||
|
LIMIT 1
|
||||||
|
""",
|
||||||
|
(bundesland.upper(), datum, f"%{titel_substr.lower()}%", datum),
|
||||||
|
)
|
||||||
|
row = await cur.fetchone()
|
||||||
|
|
||||||
|
if row:
|
||||||
|
logger.debug(
|
||||||
|
"fallback_drucksache_by_date_title: %s/%s → %s",
|
||||||
|
bundesland, datum, row[0],
|
||||||
|
)
|
||||||
|
return row[0]
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
async def fetch_polls(bundesland_code: str, limit: int = 100) -> list[dict]:
|
||||||
|
"""Holt aktuelle Abstimmungen für ein Bundesland von abgeordnetenwatch.
|
||||||
|
|
||||||
|
Gibt eine Liste von Poll-Dicts zurück; jedes Dict enthält zusätzlich
|
||||||
|
den geparsten Key ``drucksache`` (kann None sein).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
bundesland_code: Unser BL-Code (z.B. "NRW", "BT", "BUND").
|
||||||
|
limit: Maximale Anzahl Polls; wird als range_end übergeben.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Liste von Poll-Dicts mit den Feldern aus der API plus ``drucksache``.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ValueError: Wenn der bundesland_code nicht in PARLIAMENT_ID ist.
|
||||||
|
httpx.HTTPError: Bei Netzwerkproblemen.
|
||||||
|
"""
|
||||||
|
parliament_id = PARLIAMENT_ID.get(bundesland_code.upper())
|
||||||
|
if parliament_id is None:
|
||||||
|
raise ValueError(
|
||||||
|
f"Unbekannter BL-Code '{bundesland_code}'. "
|
||||||
|
f"Bekannte Codes: {sorted(PARLIAMENT_ID.keys())}"
|
||||||
|
)
|
||||||
|
|
||||||
|
async with httpx.AsyncClient(timeout=30.0) as client:
|
||||||
|
# Zuerst aktuellen ParliamentPeriod für das Parlament holen —
|
||||||
|
# /polls filtert nach field_legislature (period-id), NICHT parliament-id.
|
||||||
|
pp_resp = await client.get(
|
||||||
|
f"{_BASE}/parliament-periods",
|
||||||
|
params={"parliament": parliament_id, "type": "legislature", "range_end": 5},
|
||||||
|
)
|
||||||
|
pp_resp.raise_for_status()
|
||||||
|
periods = (pp_resp.json() or {}).get("data") or []
|
||||||
|
# Aktuelle Periode: sortiere nach start-date desc, nimm die neueste
|
||||||
|
current = sorted(
|
||||||
|
periods,
|
||||||
|
key=lambda x: x.get("start_date_period") or "",
|
||||||
|
reverse=True,
|
||||||
|
)
|
||||||
|
if not current:
|
||||||
|
logger.warning("Keine ParliamentPeriod für %s (parliament_id=%d)",
|
||||||
|
bundesland_code, parliament_id)
|
||||||
|
return []
|
||||||
|
period_id = current[0]["id"]
|
||||||
|
|
||||||
|
# Polls für diese Periode
|
||||||
|
resp = await client.get(
|
||||||
|
f"{_BASE}/polls",
|
||||||
|
params={"field_legislature": period_id, "range_end": limit},
|
||||||
|
)
|
||||||
|
resp.raise_for_status()
|
||||||
|
data = resp.json()
|
||||||
|
|
||||||
|
polls_raw: list[dict] = data.get("data") or []
|
||||||
|
polls = []
|
||||||
|
for p in polls_raw:
|
||||||
|
intro_html = p.get("field_intro") or ""
|
||||||
|
polls.append({
|
||||||
|
"id": p.get("id"),
|
||||||
|
"label": p.get("label") or p.get("field_poll_date", ""),
|
||||||
|
"field_poll_date": p.get("field_poll_date"),
|
||||||
|
"field_accepted": p.get("field_accepted"),
|
||||||
|
"field_topics": p.get("field_topics") or [],
|
||||||
|
"field_intro": intro_html,
|
||||||
|
"field_legislature": p.get("field_legislature") or {},
|
||||||
|
"drucksache": extract_drucksache_from_intro(intro_html),
|
||||||
|
})
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"abgeordnetenwatch: %d polls für %s (parliament_id=%d)",
|
||||||
|
len(polls), bundesland_code, parliament_id,
|
||||||
|
)
|
||||||
|
return polls
|
||||||
|
|
||||||
|
|
||||||
|
async def fetch_votes_for_poll(poll_id: int) -> list[dict]:
|
||||||
|
"""Holt namentliche Einzelstimmen für eine Abstimmung.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
poll_id: ID der Abstimmung (aus polls[].id).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Liste von Vote-Dicts mit den Feldern:
|
||||||
|
poll_id, politician_id, politician_name, partei, vote.
|
||||||
|
vote ist einer von: "yes", "no", "abstain", "no_show".
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
httpx.HTTPError: Bei Netzwerkproblemen.
|
||||||
|
"""
|
||||||
|
# /votes?poll=X funktioniert (empirisch ermittelt);
|
||||||
|
# NICHT field_poll (500) und NICHT /polls/{id}?related_data=votes
|
||||||
|
# (liefert leeres related_data). Einfaches ?poll=<id>.
|
||||||
|
url = f"{_BASE}/votes"
|
||||||
|
params = {"poll": poll_id, "range_end": 1000}
|
||||||
|
|
||||||
|
async with httpx.AsyncClient(timeout=30.0) as client:
|
||||||
|
resp = await client.get(url, params=params)
|
||||||
|
resp.raise_for_status()
|
||||||
|
data = resp.json()
|
||||||
|
|
||||||
|
votes_raw: list[dict] = data.get("data") or []
|
||||||
|
votes = []
|
||||||
|
for v in votes_raw:
|
||||||
|
politician = v.get("mandate") or v.get("politician") or {}
|
||||||
|
politician_id = politician.get("id") or v.get("mandate_id")
|
||||||
|
politician_name = politician.get("label") or politician.get("name") or ""
|
||||||
|
|
||||||
|
# Partei aus politician.party oder fraction
|
||||||
|
partei = ""
|
||||||
|
party = politician.get("party") or {}
|
||||||
|
if isinstance(party, dict):
|
||||||
|
partei = party.get("label") or party.get("short_label") or ""
|
||||||
|
fraction = v.get("fraction") or {}
|
||||||
|
if not partei and isinstance(fraction, dict):
|
||||||
|
partei = fraction.get("full_name") or fraction.get("label") or ""
|
||||||
|
|
||||||
|
vote_value = (v.get("vote") or "").lower()
|
||||||
|
# API liefert "yes"/"no"/"abstain"/"no_show" — direkt übernehmen
|
||||||
|
if vote_value not in ("yes", "no", "abstain", "no_show"):
|
||||||
|
vote_value = "no_show"
|
||||||
|
|
||||||
|
votes.append({
|
||||||
|
"poll_id": poll_id,
|
||||||
|
"politician_id": politician_id,
|
||||||
|
"politician_name": politician_name,
|
||||||
|
"partei": partei,
|
||||||
|
"vote": vote_value,
|
||||||
|
})
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"abgeordnetenwatch: %d votes für poll_id=%d", len(votes), poll_id
|
||||||
|
)
|
||||||
|
return votes
|
||||||
332
app/monitoring.py
Normal file
332
app/monitoring.py
Normal file
@ -0,0 +1,332 @@
|
|||||||
|
"""Täglicher Monitoring-Scan für neue Landtags-Drucksachen (#135).
|
||||||
|
|
||||||
|
Nur Metadaten — kein PDF-Download, kein LLM-Call.
|
||||||
|
|
||||||
|
Ablauf:
|
||||||
|
1. Iteriert alle aktiven Bundesländer via aktive_bundeslaender().
|
||||||
|
2. Ruft adapter.search("", limit=50) (Fallback: " " oder "*") auf.
|
||||||
|
3. UPSERTs Treffer in monitoring_scans. seen_first_at bleibt stabil,
|
||||||
|
last_seen_at wird immer gesetzt.
|
||||||
|
4. Aggregiert Ergebnisse in monitoring_daily_summary.
|
||||||
|
5. Gibt ScanResult zurück, aus dem run_monitoring_digest() den
|
||||||
|
Mail-Digest baut.
|
||||||
|
|
||||||
|
Kosten-Schätzung (Qwen Plus, Stand April 2026):
|
||||||
|
Quelle: https://help.aliyun.com/zh/dashscope/developer-reference/tongyi-qianwen-7b-14b-72b-api-pricing
|
||||||
|
Input: 0.0004 USD / 1 K Token
|
||||||
|
Output: 0.0012 USD / 1 K Token
|
||||||
|
Kurs: 1 USD = 0.93 EUR (Näherung April 2026)
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import logging
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
|
||||||
|
from .bundeslaender import aktive_bundeslaender
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# ─── Kosten-Schätzung ────────────────────────────────────────────────────────
|
||||||
|
# Preise aus DashScope-Dokumentation (USD, Stand April 2026):
|
||||||
|
# https://help.aliyun.com/zh/dashscope/developer-reference/tongyi-qianwen-7b-14b-72b-api-pricing
|
||||||
|
_QWEN_PLUS_INPUT_USD_PER_1K = 0.0004
|
||||||
|
_QWEN_PLUS_OUTPUT_USD_PER_1K = 0.0012
|
||||||
|
_USD_TO_EUR = 0.93 # Näherungskurs April 2026 (als Konstante OK für Schätzung)
|
||||||
|
|
||||||
|
# Default-Annahmen pro Analyse (Durchschnittswerte aus Produktionsbetrieb)
|
||||||
|
_DEFAULT_AVG_IN_TOKENS = 20_000
|
||||||
|
_DEFAULT_AVG_OUT_TOKENS = 3_000
|
||||||
|
|
||||||
|
|
||||||
|
def estimate_cost_qwen_plus(
|
||||||
|
n_new: int,
|
||||||
|
avg_in_tokens: int = _DEFAULT_AVG_IN_TOKENS,
|
||||||
|
avg_out_tokens: int = _DEFAULT_AVG_OUT_TOKENS,
|
||||||
|
) -> float:
|
||||||
|
"""Schätzt die Analysekosten in EUR für n_new neue Drucksachen (Qwen Plus).
|
||||||
|
|
||||||
|
Rechnet auf Basis der offiziellen DashScope-Preise, Umrechnung USD→EUR
|
||||||
|
mit festem Näherungskurs. Ergebnis ist eine Schätzung, keine Garantie.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
n_new: Anzahl neuer Drucksachen.
|
||||||
|
avg_in_tokens: Durchschnittliche Input-Token pro Antrag (Default 20 000).
|
||||||
|
avg_out_tokens: Durchschnittliche Output-Token pro Antrag (Default 3 000).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Geschätzte Kosten in EUR.
|
||||||
|
"""
|
||||||
|
if n_new <= 0:
|
||||||
|
return 0.0
|
||||||
|
input_cost_usd = (avg_in_tokens / 1000) * _QWEN_PLUS_INPUT_USD_PER_1K * n_new
|
||||||
|
output_cost_usd = (avg_out_tokens / 1000) * _QWEN_PLUS_OUTPUT_USD_PER_1K * n_new
|
||||||
|
total_eur = (input_cost_usd + output_cost_usd) * _USD_TO_EUR
|
||||||
|
return round(total_eur, 4)
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Datenklassen ────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class BundeslandScanResult:
|
||||||
|
"""Scan-Ergebnis für ein einzelnes Bundesland."""
|
||||||
|
bundesland: str
|
||||||
|
total_seen: int = 0
|
||||||
|
new_count: int = 0
|
||||||
|
error: str | None = None
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class DailyScanResult:
|
||||||
|
"""Gesamtergebnis eines daily_scan()-Laufs."""
|
||||||
|
scan_date: str # YYYY-MM-DD
|
||||||
|
results: list[BundeslandScanResult] = field(default_factory=list)
|
||||||
|
new_total: int = 0 # Summe aller new_count
|
||||||
|
total_seen: int = 0 # Summe aller total_seen
|
||||||
|
estimated_cost_eur: float = 0.0
|
||||||
|
errors: list[str] = field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Adapter-Suche ───────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
DEFAULT_DAILY_LIMIT = 50
|
||||||
|
|
||||||
|
# Bundesländer, die vom täglichen Monitoring-Scan ausgenommen sind.
|
||||||
|
# NI (Niedersachsen): NILAS-Portal erfordert Login — unauthentifizierte Anfragen
|
||||||
|
# liefern Login-Page-HTML, das der JSON-Comment-Parser als ~50 Junk-Records parsed.
|
||||||
|
# Ausnahme bleibt bis ein gültiger HAR-Capture vorliegt (siehe Issue #22).
|
||||||
|
_MONITORING_SKIP: frozenset[str] = frozenset({"NI"})
|
||||||
|
|
||||||
|
|
||||||
|
async def _search_adapter(adapter, bundesland_code: str, limit: int = DEFAULT_DAILY_LIMIT) -> list:
|
||||||
|
"""Sucht via Adapter nach aktuellen Drucksachen.
|
||||||
|
|
||||||
|
Probiert der Reihe nach: leerer String, Leerzeichen, Sternchen —
|
||||||
|
und fängt alle Exceptions ab, damit ein Adapter-Fehler den
|
||||||
|
Gesamt-Scan nicht abbricht. ``limit`` steuert pro-Adapter-Obergrenze;
|
||||||
|
für Initial-Seeding ggf. höher setzen.
|
||||||
|
"""
|
||||||
|
for query in ("", " ", "*"):
|
||||||
|
try:
|
||||||
|
results = await adapter.search(query, limit=limit)
|
||||||
|
return results
|
||||||
|
except Exception as e:
|
||||||
|
if query == "*":
|
||||||
|
# Alle Versuche gescheitert — Exception nach oben durchreichen
|
||||||
|
raise
|
||||||
|
logger.debug(
|
||||||
|
"%s: search(%r) fehlgeschlagen (%s), versuche nächsten Query",
|
||||||
|
bundesland_code, query, e,
|
||||||
|
)
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Haupt-Scan ──────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
async def daily_scan(limit: int = DEFAULT_DAILY_LIMIT) -> DailyScanResult:
|
||||||
|
"""Täglicher Scan aller aktiven Bundesländer nach neuen Drucksachen.
|
||||||
|
|
||||||
|
Kein PDF-Download, kein LLM-Call — nur Metadaten. ``limit`` gilt
|
||||||
|
pro Adapter; für Initial-Seeding größer setzen (z.B. 500).
|
||||||
|
"""
|
||||||
|
from .parlamente import ADAPTERS
|
||||||
|
from .database import upsert_monitoring_scan, upsert_monitoring_summary
|
||||||
|
|
||||||
|
now_utc = datetime.now(timezone.utc)
|
||||||
|
scan_date = now_utc.strftime("%Y-%m-%d")
|
||||||
|
now_iso = now_utc.strftime("%Y-%m-%dT%H:%M:%S")
|
||||||
|
|
||||||
|
result = DailyScanResult(scan_date=scan_date)
|
||||||
|
|
||||||
|
active_bls = aktive_bundeslaender()
|
||||||
|
|
||||||
|
for bl in active_bls:
|
||||||
|
if bl.code in _MONITORING_SKIP:
|
||||||
|
logger.debug("%s: Monitoring-Skip aktiv — übersprungen", bl.code)
|
||||||
|
continue
|
||||||
|
|
||||||
|
adapter = ADAPTERS.get(bl.code)
|
||||||
|
if adapter is None:
|
||||||
|
logger.debug("Kein Adapter für %s — übersprungen", bl.code)
|
||||||
|
continue
|
||||||
|
|
||||||
|
bl_result = BundeslandScanResult(bundesland=bl.code)
|
||||||
|
|
||||||
|
try:
|
||||||
|
docs = await _search_adapter(adapter, bl.code, limit=limit)
|
||||||
|
except Exception as exc:
|
||||||
|
err_msg = f"{type(exc).__name__}: {str(exc)[:500]}"
|
||||||
|
logger.exception("Adapter-Fehler bei %s", bl.code)
|
||||||
|
bl_result.error = err_msg
|
||||||
|
result.errors.append(f"{bl.code}: {err_msg}")
|
||||||
|
await upsert_monitoring_summary(
|
||||||
|
scan_date=scan_date,
|
||||||
|
bundesland=bl.code,
|
||||||
|
total_seen=0,
|
||||||
|
new_count=0,
|
||||||
|
errors=err_msg,
|
||||||
|
)
|
||||||
|
result.results.append(bl_result)
|
||||||
|
continue
|
||||||
|
|
||||||
|
bl_result.total_seen = len(docs)
|
||||||
|
new_this_bl = 0
|
||||||
|
|
||||||
|
for doc in docs:
|
||||||
|
try:
|
||||||
|
is_new = await upsert_monitoring_scan(
|
||||||
|
bundesland=doc.bundesland,
|
||||||
|
drucksache=doc.drucksache,
|
||||||
|
title=doc.title,
|
||||||
|
datum=doc.datum,
|
||||||
|
typ=doc.typ,
|
||||||
|
typ_normiert=doc.typ_normiert,
|
||||||
|
fraktionen=doc.fraktionen,
|
||||||
|
link=doc.link,
|
||||||
|
now=now_iso,
|
||||||
|
)
|
||||||
|
if is_new:
|
||||||
|
new_this_bl += 1
|
||||||
|
except Exception:
|
||||||
|
logger.exception(
|
||||||
|
"DB-UPSERT fehlgeschlagen für %s/%s — wird übersprungen",
|
||||||
|
bl.code, getattr(doc, "drucksache", "?"),
|
||||||
|
)
|
||||||
|
|
||||||
|
bl_result.new_count = new_this_bl
|
||||||
|
|
||||||
|
await upsert_monitoring_summary(
|
||||||
|
scan_date=scan_date,
|
||||||
|
bundesland=bl.code,
|
||||||
|
total_seen=bl_result.total_seen,
|
||||||
|
new_count=bl_result.new_count,
|
||||||
|
errors=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"%s: %d gesehen, %d neu",
|
||||||
|
bl.code, bl_result.total_seen, bl_result.new_count,
|
||||||
|
)
|
||||||
|
result.results.append(bl_result)
|
||||||
|
|
||||||
|
result.new_total = sum(r.new_count for r in result.results)
|
||||||
|
result.total_seen = sum(r.total_seen for r in result.results)
|
||||||
|
result.estimated_cost_eur = estimate_cost_qwen_plus(result.new_total)
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Mail-Digest ─────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
async def run_monitoring_digest(recipient: str) -> dict:
|
||||||
|
"""Führt daily_scan() durch und verschickt den Ergebnis-Digest per Mail.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
recipient: Empfänger-Adresse (typischerweise der Admin).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict mit Scan-Statistiken + {"mail_sent": bool}.
|
||||||
|
"""
|
||||||
|
from .mail import send_mail
|
||||||
|
from .database import get_monitoring_new_today
|
||||||
|
from jinja2 import Environment, FileSystemLoader
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
scan_result = await daily_scan()
|
||||||
|
|
||||||
|
# Neue Drucksachen für den heutigen Tag laden
|
||||||
|
new_docs = await get_monitoring_new_today(scan_result.scan_date)
|
||||||
|
|
||||||
|
# Mail-Inhalt via Template rendern
|
||||||
|
tmpl_dir = Path(__file__).resolve().parent / "templates"
|
||||||
|
env = Environment(loader=FileSystemLoader(str(tmpl_dir)), autoescape=True)
|
||||||
|
tmpl = env.get_template("monitoring_digest.html")
|
||||||
|
|
||||||
|
html_body = tmpl.render(
|
||||||
|
scan_date=scan_result.scan_date,
|
||||||
|
new_total=scan_result.new_total,
|
||||||
|
total_seen=scan_result.total_seen,
|
||||||
|
estimated_cost_eur=scan_result.estimated_cost_eur,
|
||||||
|
results=scan_result.results,
|
||||||
|
new_docs=new_docs,
|
||||||
|
errors=scan_result.errors,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Plaintext-Variante
|
||||||
|
text_body = _render_plain(scan_result, new_docs)
|
||||||
|
|
||||||
|
subject = (
|
||||||
|
f"[GWÖ-Monitor] {scan_result.scan_date} — "
|
||||||
|
f"{scan_result.new_total} neue Drucksachen"
|
||||||
|
+ (f" ({len(scan_result.errors)} Fehler)" if scan_result.errors else "")
|
||||||
|
)
|
||||||
|
|
||||||
|
mail_sent = False
|
||||||
|
try:
|
||||||
|
await send_mail(recipient, subject, text_body, html_body)
|
||||||
|
mail_sent = True
|
||||||
|
logger.info("Monitoring-Digest verschickt an %s", recipient)
|
||||||
|
except Exception:
|
||||||
|
logger.exception("Monitoring-Digest: Mail-Versand fehlgeschlagen")
|
||||||
|
|
||||||
|
return {
|
||||||
|
"scan_date": scan_result.scan_date,
|
||||||
|
"new_total": scan_result.new_total,
|
||||||
|
"total_seen": scan_result.total_seen,
|
||||||
|
"estimated_cost_eur": scan_result.estimated_cost_eur,
|
||||||
|
"error_count": len(scan_result.errors),
|
||||||
|
"mail_sent": mail_sent,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _render_plain(scan_result: DailyScanResult, new_docs: list[dict]) -> str:
|
||||||
|
"""Baut den Plaintext-Part des Monitoring-Digests."""
|
||||||
|
from .config import settings
|
||||||
|
|
||||||
|
lines = [
|
||||||
|
f"GWÖ-Antragsprüfer — Monitoring-Digest {scan_result.scan_date}",
|
||||||
|
"=" * 60,
|
||||||
|
"",
|
||||||
|
f"Neue Drucksachen: {scan_result.new_total}",
|
||||||
|
f"Gesamt gesehen: {scan_result.total_seen}",
|
||||||
|
f"Kosten-Schätzung: {scan_result.estimated_cost_eur:.4f} EUR",
|
||||||
|
"",
|
||||||
|
]
|
||||||
|
|
||||||
|
if scan_result.errors:
|
||||||
|
lines.append(f"Fehler ({len(scan_result.errors)}):")
|
||||||
|
for e in scan_result.errors:
|
||||||
|
lines.append(f" • {e}")
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
lines.append("Bundesland-Übersicht:")
|
||||||
|
for r in scan_result.results:
|
||||||
|
status = f"✓ {r.new_count} neu / {r.total_seen} gesehen"
|
||||||
|
if r.error:
|
||||||
|
status = f"✗ Fehler: {r.error[:80]}"
|
||||||
|
lines.append(f" {r.bundesland:6s} {status}")
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
if new_docs:
|
||||||
|
lines.append(f"Neue Drucksachen ({len(new_docs)}):")
|
||||||
|
for doc in new_docs[:30]:
|
||||||
|
title = (doc.get("title") or doc.get("drucksache") or "")[:80]
|
||||||
|
bl = doc.get("bundesland", "")
|
||||||
|
drucks = doc.get("drucksache", "")
|
||||||
|
lines.append(f" [{bl}] {drucks} — {title}")
|
||||||
|
if len(new_docs) > 30:
|
||||||
|
lines.append(f" … und {len(new_docs) - 30} weitere")
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
lines.append(f"Webapp: {settings.base_url}")
|
||||||
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# python -m app.monitoring <empfaenger@example.com>
|
||||||
|
import sys
|
||||||
|
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
|
||||||
|
to = sys.argv[1] if len(sys.argv) > 1 else "mail@tobiasroedel.de"
|
||||||
|
stats = asyncio.run(run_monitoring_digest(to))
|
||||||
|
print(f"Monitoring-Scan fertig: {stats}")
|
||||||
157
app/sync_abgeordnetenwatch.py
Normal file
157
app/sync_abgeordnetenwatch.py
Normal file
@ -0,0 +1,157 @@
|
|||||||
|
"""CLI-Sync-Skript für abgeordnetenwatch.de (#106 Phase 1).
|
||||||
|
|
||||||
|
Holt Polls + namentliche Stimmen für alle oder einen bestimmten BL-Code
|
||||||
|
und speichert sie via UPSERT in der lokalen SQLite-DB.
|
||||||
|
|
||||||
|
Aufruf:
|
||||||
|
python -m app.sync_abgeordnetenwatch [--bundesland NRW] [--limit 50]
|
||||||
|
|
||||||
|
Ohne --bundesland werden alle in PARLIAMENT_ID eingetragenen BL-Codes
|
||||||
|
abgearbeitet (BUND-Alias wird übersprungen, BT genügt).
|
||||||
|
|
||||||
|
Ausgabe:
|
||||||
|
NRW: 12 polls neu, 340 votes neu
|
||||||
|
BT: 0 polls neu, 0 votes neu
|
||||||
|
…
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import asyncio
|
||||||
|
import logging
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
async def sync_bundesland(bundesland_code: str, limit: int) -> tuple[int, int]:
|
||||||
|
"""Synct einen BL-Code. Gibt (neue_polls, neue_votes) zurück."""
|
||||||
|
from .abgeordnetenwatch import (
|
||||||
|
PARLIAMENT_ID, fetch_polls, fetch_votes_for_poll,
|
||||||
|
fallback_drucksache_by_date_title,
|
||||||
|
)
|
||||||
|
from .database import init_db, upsert_aw_poll, upsert_aw_vote
|
||||||
|
|
||||||
|
await init_db()
|
||||||
|
|
||||||
|
parliament_id = PARLIAMENT_ID[bundesland_code.upper()]
|
||||||
|
synced_at = datetime.now(timezone.utc).isoformat()
|
||||||
|
|
||||||
|
polls = await fetch_polls(bundesland_code, limit=limit)
|
||||||
|
|
||||||
|
new_polls = 0
|
||||||
|
new_votes = 0
|
||||||
|
|
||||||
|
for poll in polls:
|
||||||
|
poll_id = poll.get("id")
|
||||||
|
if poll_id is None:
|
||||||
|
continue
|
||||||
|
|
||||||
|
legislature = poll.get("field_legislature") or {}
|
||||||
|
legislature_label = (
|
||||||
|
legislature.get("label") or legislature.get("name") or ""
|
||||||
|
if isinstance(legislature, dict) else str(legislature)
|
||||||
|
)
|
||||||
|
|
||||||
|
topics_raw = poll.get("field_topics") or []
|
||||||
|
topics = [
|
||||||
|
(t.get("label") or t.get("name") or str(t))
|
||||||
|
if isinstance(t, dict) else str(t)
|
||||||
|
for t in topics_raw
|
||||||
|
]
|
||||||
|
|
||||||
|
# Primär: Drucksache aus intro-HTML geparst; Fallback über Datum+Titel
|
||||||
|
# für BL ohne PDF-URL im intro (MV/BY/BB/TH/HH/SL — Fix #142 Phase 3).
|
||||||
|
drucksache = poll.get("drucksache")
|
||||||
|
if drucksache is None:
|
||||||
|
drucksache = await fallback_drucksache_by_date_title(
|
||||||
|
datum=poll.get("field_poll_date"),
|
||||||
|
titel=poll.get("label"),
|
||||||
|
bundesland=bundesland_code,
|
||||||
|
)
|
||||||
|
|
||||||
|
is_new_poll = await upsert_aw_poll(
|
||||||
|
poll_id=poll_id,
|
||||||
|
parliament_id=parliament_id,
|
||||||
|
bundesland=bundesland_code.upper(),
|
||||||
|
drucksache=drucksache,
|
||||||
|
titel=poll.get("label"),
|
||||||
|
datum=poll.get("field_poll_date"),
|
||||||
|
accepted=poll.get("field_accepted"),
|
||||||
|
topics=topics,
|
||||||
|
legislature_label=legislature_label,
|
||||||
|
synced_at=synced_at,
|
||||||
|
)
|
||||||
|
if is_new_poll:
|
||||||
|
new_polls += 1
|
||||||
|
|
||||||
|
# Votes laden und speichern
|
||||||
|
try:
|
||||||
|
votes = await fetch_votes_for_poll(poll_id)
|
||||||
|
except Exception:
|
||||||
|
logger.exception("Fehler beim Laden von Votes für poll_id=%d", poll_id)
|
||||||
|
continue
|
||||||
|
|
||||||
|
for vote in votes:
|
||||||
|
politician_id = vote.get("politician_id")
|
||||||
|
if politician_id is None:
|
||||||
|
continue
|
||||||
|
is_new_vote = await upsert_aw_vote(
|
||||||
|
poll_id=poll_id,
|
||||||
|
politician_id=politician_id,
|
||||||
|
politician_name=vote.get("politician_name"),
|
||||||
|
partei=vote.get("partei"),
|
||||||
|
vote=vote.get("vote", "no_show"),
|
||||||
|
)
|
||||||
|
if is_new_vote:
|
||||||
|
new_votes += 1
|
||||||
|
|
||||||
|
return new_polls, new_votes
|
||||||
|
|
||||||
|
|
||||||
|
async def main(bundesland: str | None, limit: int) -> None:
|
||||||
|
from .abgeordnetenwatch import PARLIAMENT_ID
|
||||||
|
|
||||||
|
# Alle Codes ohne BUND-Alias (BT und BUND zeigen auf die selbe ID)
|
||||||
|
if bundesland:
|
||||||
|
codes = [bundesland.upper()]
|
||||||
|
else:
|
||||||
|
seen_ids: set[int] = set()
|
||||||
|
codes = []
|
||||||
|
for code, pid in PARLIAMENT_ID.items():
|
||||||
|
if pid not in seen_ids:
|
||||||
|
seen_ids.add(pid)
|
||||||
|
codes.append(code)
|
||||||
|
|
||||||
|
for code in codes:
|
||||||
|
try:
|
||||||
|
new_polls, new_votes = await sync_bundesland(code, limit)
|
||||||
|
print(f"{code:4s}: {new_polls} polls neu, {new_votes} votes neu")
|
||||||
|
except Exception:
|
||||||
|
logger.exception("Fehler beim Sync für %s", code)
|
||||||
|
print(f"{code:4s}: FEHLER (siehe Log)")
|
||||||
|
|
||||||
|
|
||||||
|
def _cli() -> None:
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Sync abgeordnetenwatch-Abstimmungsdaten in die lokale DB."
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--bundesland", "-b",
|
||||||
|
default=None,
|
||||||
|
help="BL-Code (z.B. NRW, BT). Ohne Angabe: alle Codes.",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--limit", "-n",
|
||||||
|
type=int,
|
||||||
|
default=100,
|
||||||
|
help="Maximale Anzahl Polls pro BL (default: 100).",
|
||||||
|
)
|
||||||
|
args = parser.parse_args()
|
||||||
|
asyncio.run(main(args.bundesland, args.limit))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
_cli()
|
||||||
128
app/templates/monitoring_digest.html
Normal file
128
app/templates/monitoring_digest.html
Normal file
@ -0,0 +1,128 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="de">
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8">
|
||||||
|
<meta name="viewport" content="width=device-width,initial-scale=1">
|
||||||
|
<title>GWÖ-Monitor {{ scan_date }}</title>
|
||||||
|
</head>
|
||||||
|
<body style="font-family:Helvetica,Arial,sans-serif;max-width:640px;margin:0 auto;padding:20px;color:#333">
|
||||||
|
|
||||||
|
<h2 style="color:#007a80;margin-bottom:4px">GWÖ-Antragsprüfer — Monitoring {{ scan_date }}</h2>
|
||||||
|
<p style="color:#666;margin-top:4px;font-size:0.9em">Täglicher Scan aller aktiven Bundesländer</p>
|
||||||
|
|
||||||
|
{% if new_total == 0 %}
|
||||||
|
<div style="background:#f0fafa;border-left:3px solid #009da5;padding:10px 14px;margin:12px 0;font-size:0.95em;color:#444">
|
||||||
|
<b style="color:#007a80">Heute keine Änderungen.</b> Alle {{ total_seen }} in den Landtags-Portalen sichtbaren Drucksachen sind bereits seit dem letzten Scan bekannt. Das heißt: die Portale haben seit gestern keine neuen Anträge publiziert — nicht: der Scan war erfolglos.
|
||||||
|
</div>
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
<!-- Kennzahlen-Block -->
|
||||||
|
<table style="width:100%;border-collapse:collapse;margin:16px 0">
|
||||||
|
<tr style="background:#f0fafa">
|
||||||
|
<td style="padding:10px 14px;border:1px solid #c8e6e6;font-weight:bold">Neue Drucksachen seit letztem Scan</td>
|
||||||
|
<td style="padding:10px 14px;border:1px solid #c8e6e6;font-size:1.4em;color:#007a80;font-weight:bold">{{ new_total }}</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td style="padding:10px 14px;border:1px solid #ddd">Im Portal aktuell sichtbar (inkl. bekannter)</td>
|
||||||
|
<td style="padding:10px 14px;border:1px solid #ddd">{{ total_seen }}</td>
|
||||||
|
</tr>
|
||||||
|
<tr style="background:#fffbf0">
|
||||||
|
<td style="padding:10px 14px;border:1px solid #ddd">Kosten-Schätzung (alle analysieren)</td>
|
||||||
|
<td style="padding:10px 14px;border:1px solid #ddd">
|
||||||
|
<b>{{ "%.4f"|format(estimated_cost_eur) }} EUR</b>
|
||||||
|
<span style="font-size:0.8em;color:#888"> (Qwen Plus, Näherung)</span>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
{% if errors %}
|
||||||
|
<tr style="background:#fff3f3">
|
||||||
|
<td style="padding:10px 14px;border:1px solid #f5c0c0;color:#c00">Adapter-Fehler</td>
|
||||||
|
<td style="padding:10px 14px;border:1px solid #f5c0c0;color:#c00;font-weight:bold">{{ errors|length }}</td>
|
||||||
|
</tr>
|
||||||
|
{% endif %}
|
||||||
|
</table>
|
||||||
|
|
||||||
|
<!-- Fehler-Details -->
|
||||||
|
{% if errors %}
|
||||||
|
<div style="background:#fff3f3;border-left:3px solid #c00;padding:10px 14px;margin:12px 0">
|
||||||
|
<b style="color:#c00">Fehler-Details:</b>
|
||||||
|
<ul style="margin:6px 0 0;padding-left:18px;font-size:0.9em">
|
||||||
|
{% for e in errors %}
|
||||||
|
<li>{{ e }}</li>
|
||||||
|
{% endfor %}
|
||||||
|
</ul>
|
||||||
|
</div>
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
<!-- Bundesland-Übersicht -->
|
||||||
|
<h3 style="color:#007a80;border-bottom:1px solid #c8e6e6;padding-bottom:6px">Bundesland-Übersicht</h3>
|
||||||
|
<table style="width:100%;border-collapse:collapse;font-size:0.9em">
|
||||||
|
<thead>
|
||||||
|
<tr style="background:#e6f4f4">
|
||||||
|
<th style="padding:7px 10px;border:1px solid #c8e6e6;text-align:left">BL</th>
|
||||||
|
<th style="padding:7px 10px;border:1px solid #c8e6e6;text-align:right">Gesehen</th>
|
||||||
|
<th style="padding:7px 10px;border:1px solid #c8e6e6;text-align:right">Neu</th>
|
||||||
|
<th style="padding:7px 10px;border:1px solid #c8e6e6;text-align:left">Status</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
{% for r in results %}
|
||||||
|
<tr style="{% if r.error %}background:#fff8f8{% elif r.new_count > 0 %}background:#f8fff8{% endif %}">
|
||||||
|
<td style="padding:6px 10px;border:1px solid #ddd;font-weight:bold">{{ r.bundesland }}</td>
|
||||||
|
<td style="padding:6px 10px;border:1px solid #ddd;text-align:right">{{ r.total_seen }}</td>
|
||||||
|
<td style="padding:6px 10px;border:1px solid #ddd;text-align:right;color:{% if r.new_count > 0 %}#007a80{% else %}#999{% endif %}">
|
||||||
|
{{ r.new_count }}
|
||||||
|
</td>
|
||||||
|
<td style="padding:6px 10px;border:1px solid #ddd;font-size:0.85em">
|
||||||
|
{% if r.error %}
|
||||||
|
<span style="color:#c00">✗ {{ r.error[:100] }}</span>
|
||||||
|
{% elif r.new_count > 0 %}
|
||||||
|
<span style="color:#2a7a2a">✓ {{ r.new_count }} neue</span>
|
||||||
|
{% else %}
|
||||||
|
<span style="color:#999">keine Änderung</span>
|
||||||
|
{% endif %}
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
{% endfor %}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
|
||||||
|
<!-- Neue Drucksachen -->
|
||||||
|
{% if new_docs %}
|
||||||
|
<h3 style="color:#007a80;border-bottom:1px solid #c8e6e6;padding-bottom:6px;margin-top:24px">
|
||||||
|
Neue Drucksachen ({{ new_docs|length }})
|
||||||
|
</h3>
|
||||||
|
{% for doc in new_docs[:30] %}
|
||||||
|
<div style="border-left:3px solid #007a80;padding:6px 12px;margin:8px 0;background:#f9f9f9;font-size:0.9em">
|
||||||
|
<span style="color:#007a80;font-weight:bold">{{ doc.bundesland }}</span>
|
||||||
|
<span style="color:#555;margin-left:8px">{{ doc.drucksache }}</span>
|
||||||
|
{% if doc.datum %}
|
||||||
|
<span style="color:#888;font-size:0.85em;margin-left:8px">{{ doc.datum }}</span>
|
||||||
|
{% endif %}
|
||||||
|
<br>
|
||||||
|
<span style="color:#333">{{ (doc.title or doc.drucksache or '')[:120] }}</span>
|
||||||
|
{% if doc.fraktionen %}
|
||||||
|
<br><span style="color:#777;font-size:0.85em">
|
||||||
|
{% if doc.fraktionen is iterable and doc.fraktionen is not string %}
|
||||||
|
{{ doc.fraktionen | join(', ') }}
|
||||||
|
{% else %}
|
||||||
|
{{ doc.fraktionen }}
|
||||||
|
{% endif %}
|
||||||
|
</span>
|
||||||
|
{% endif %}
|
||||||
|
</div>
|
||||||
|
{% endfor %}
|
||||||
|
{% if new_docs|length > 30 %}
|
||||||
|
<p style="color:#666;font-size:0.9em">… und {{ new_docs|length - 30 }} weitere neue Drucksachen.</p>
|
||||||
|
{% endif %}
|
||||||
|
{% else %}
|
||||||
|
<p style="color:#888;font-style:italic">Keine neuen Drucksachen heute.</p>
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
<!-- Footer -->
|
||||||
|
<hr style="border:none;border-top:1px solid #ddd;margin:24px 0">
|
||||||
|
<p style="font-size:0.8em;color:#aaa">
|
||||||
|
GWÖ-Antragsprüfer Monitoring · Kosten-Schätzung basiert auf Qwen-Plus-Preisen (DashScope, April 2026) ·
|
||||||
|
Nur Metadaten — kein LLM-Call im Scan
|
||||||
|
</p>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
37
app/wahlprogramm_check.py
Normal file
37
app/wahlprogramm_check.py
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
"""Erkennung fehlender Wahlprogramme (#128).
|
||||||
|
|
||||||
|
Prüft für ein gegebenes Bundesland, welche der im Landtag vertretenen
|
||||||
|
Fraktionen in der WAHLPROGRAMME-Registry nicht hinterlegt sind.
|
||||||
|
Wird nach dem LLM-Call in analyze_antrag() aufgerufen, damit das
|
||||||
|
Assessment-Ergebnis die Lücken explizit ausweist.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from .bundeslaender import BUNDESLAENDER
|
||||||
|
from .wahlprogramme import WAHLPROGRAMME
|
||||||
|
|
||||||
|
|
||||||
|
def check_missing_programmes(bundesland: str, fraktionen: list[str]) -> list[str]:
|
||||||
|
"""Gibt eine Liste der Fraktions-Namen zurück, für die kein Wahlprogramm
|
||||||
|
im gegebenen Bundesland hinterlegt ist.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
bundesland: Bundesland-Code (z.B. "NRW", "BY").
|
||||||
|
fraktionen: Liste der Fraktionen, die geprüft werden sollen
|
||||||
|
(typischerweise aus BUNDESLAENDER[bl].landtagsfraktionen).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Geordnete Liste der Fraktions-Namen ohne hinterlegtes Wahlprogramm.
|
||||||
|
Leere Liste, wenn für alle Fraktionen Programme vorliegen oder
|
||||||
|
fraktionen leer ist.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ValueError: Wenn das Bundesland nicht in BUNDESLAENDER bekannt ist.
|
||||||
|
"""
|
||||||
|
if bundesland not in BUNDESLAENDER:
|
||||||
|
raise ValueError(f"Unbekanntes Bundesland: {bundesland!r}")
|
||||||
|
|
||||||
|
if not fraktionen:
|
||||||
|
return []
|
||||||
|
|
||||||
|
indexed = WAHLPROGRAMME.get(bundesland, {})
|
||||||
|
return [f for f in fraktionen if f not in indexed]
|
||||||
Loading…
Reference in New Issue
Block a user