- monitoring.py: taeglicher Scan-Adapter aller aktiven BL, kein Auto-Fetch (#135) - monitoring_digest.html: Mail-Template mit '0-Kontext'-Hinweis - abgeordnetenwatch.py + sync_*.py: Phase 1 Roll-Call-Voting (#106) - 17 Parlamente (16 BL + BT) - 9 BL-spezifische Drucksachen-Patterns + Date-Title-Fallback - 28977 Votes fuer BUND in DB - wahlprogramm_check.py: fehlende Programme erkennen (#128) - NI-Skip-Liste, NRW Empty-Query-Fallback Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
158 lines
4.8 KiB
Python
158 lines
4.8 KiB
Python
"""CLI-Sync-Skript für abgeordnetenwatch.de (#106 Phase 1).
|
|
|
|
Holt Polls + namentliche Stimmen für alle oder einen bestimmten BL-Code
|
|
und speichert sie via UPSERT in der lokalen SQLite-DB.
|
|
|
|
Aufruf:
|
|
python -m app.sync_abgeordnetenwatch [--bundesland NRW] [--limit 50]
|
|
|
|
Ohne --bundesland werden alle in PARLIAMENT_ID eingetragenen BL-Codes
|
|
abgearbeitet (BUND-Alias wird übersprungen, BT genügt).
|
|
|
|
Ausgabe:
|
|
NRW: 12 polls neu, 340 votes neu
|
|
BT: 0 polls neu, 0 votes neu
|
|
…
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import asyncio
|
|
import logging
|
|
from datetime import datetime, timezone
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
async def sync_bundesland(bundesland_code: str, limit: int) -> tuple[int, int]:
|
|
"""Synct einen BL-Code. Gibt (neue_polls, neue_votes) zurück."""
|
|
from .abgeordnetenwatch import (
|
|
PARLIAMENT_ID, fetch_polls, fetch_votes_for_poll,
|
|
fallback_drucksache_by_date_title,
|
|
)
|
|
from .database import init_db, upsert_aw_poll, upsert_aw_vote
|
|
|
|
await init_db()
|
|
|
|
parliament_id = PARLIAMENT_ID[bundesland_code.upper()]
|
|
synced_at = datetime.now(timezone.utc).isoformat()
|
|
|
|
polls = await fetch_polls(bundesland_code, limit=limit)
|
|
|
|
new_polls = 0
|
|
new_votes = 0
|
|
|
|
for poll in polls:
|
|
poll_id = poll.get("id")
|
|
if poll_id is None:
|
|
continue
|
|
|
|
legislature = poll.get("field_legislature") or {}
|
|
legislature_label = (
|
|
legislature.get("label") or legislature.get("name") or ""
|
|
if isinstance(legislature, dict) else str(legislature)
|
|
)
|
|
|
|
topics_raw = poll.get("field_topics") or []
|
|
topics = [
|
|
(t.get("label") or t.get("name") or str(t))
|
|
if isinstance(t, dict) else str(t)
|
|
for t in topics_raw
|
|
]
|
|
|
|
# Primär: Drucksache aus intro-HTML geparst; Fallback über Datum+Titel
|
|
# für BL ohne PDF-URL im intro (MV/BY/BB/TH/HH/SL — Fix #142 Phase 3).
|
|
drucksache = poll.get("drucksache")
|
|
if drucksache is None:
|
|
drucksache = await fallback_drucksache_by_date_title(
|
|
datum=poll.get("field_poll_date"),
|
|
titel=poll.get("label"),
|
|
bundesland=bundesland_code,
|
|
)
|
|
|
|
is_new_poll = await upsert_aw_poll(
|
|
poll_id=poll_id,
|
|
parliament_id=parliament_id,
|
|
bundesland=bundesland_code.upper(),
|
|
drucksache=drucksache,
|
|
titel=poll.get("label"),
|
|
datum=poll.get("field_poll_date"),
|
|
accepted=poll.get("field_accepted"),
|
|
topics=topics,
|
|
legislature_label=legislature_label,
|
|
synced_at=synced_at,
|
|
)
|
|
if is_new_poll:
|
|
new_polls += 1
|
|
|
|
# Votes laden und speichern
|
|
try:
|
|
votes = await fetch_votes_for_poll(poll_id)
|
|
except Exception:
|
|
logger.exception("Fehler beim Laden von Votes für poll_id=%d", poll_id)
|
|
continue
|
|
|
|
for vote in votes:
|
|
politician_id = vote.get("politician_id")
|
|
if politician_id is None:
|
|
continue
|
|
is_new_vote = await upsert_aw_vote(
|
|
poll_id=poll_id,
|
|
politician_id=politician_id,
|
|
politician_name=vote.get("politician_name"),
|
|
partei=vote.get("partei"),
|
|
vote=vote.get("vote", "no_show"),
|
|
)
|
|
if is_new_vote:
|
|
new_votes += 1
|
|
|
|
return new_polls, new_votes
|
|
|
|
|
|
async def main(bundesland: str | None, limit: int) -> None:
|
|
from .abgeordnetenwatch import PARLIAMENT_ID
|
|
|
|
# Alle Codes ohne BUND-Alias (BT und BUND zeigen auf die selbe ID)
|
|
if bundesland:
|
|
codes = [bundesland.upper()]
|
|
else:
|
|
seen_ids: set[int] = set()
|
|
codes = []
|
|
for code, pid in PARLIAMENT_ID.items():
|
|
if pid not in seen_ids:
|
|
seen_ids.add(pid)
|
|
codes.append(code)
|
|
|
|
for code in codes:
|
|
try:
|
|
new_polls, new_votes = await sync_bundesland(code, limit)
|
|
print(f"{code:4s}: {new_polls} polls neu, {new_votes} votes neu")
|
|
except Exception:
|
|
logger.exception("Fehler beim Sync für %s", code)
|
|
print(f"{code:4s}: FEHLER (siehe Log)")
|
|
|
|
|
|
def _cli() -> None:
|
|
parser = argparse.ArgumentParser(
|
|
description="Sync abgeordnetenwatch-Abstimmungsdaten in die lokale DB."
|
|
)
|
|
parser.add_argument(
|
|
"--bundesland", "-b",
|
|
default=None,
|
|
help="BL-Code (z.B. NRW, BT). Ohne Angabe: alle Codes.",
|
|
)
|
|
parser.add_argument(
|
|
"--limit", "-n",
|
|
type=int,
|
|
default=100,
|
|
help="Maximale Anzahl Polls pro BL (default: 100).",
|
|
)
|
|
args = parser.parse_args()
|
|
asyncio.run(main(args.bundesland, args.limit))
|
|
|
|
|
|
if __name__ == "__main__":
|
|
logging.basicConfig(level=logging.INFO)
|
|
_cli()
|