- Scraper: HTML-Extraktion von ECOnGOOD-Webseite - Analyzer: LLM-Bewertung (Qwen) nach GWÖ-Matrix 2.0 - Aggregator: Partei-Auswertung + Kandidat:innen-Ranking - CLI: Reproduzierbarer Workflow (scrape → analyze → aggregate) - Output: 7 Dokumente inkl. Pressemitteilung und Methodik - 27 Kandidat:innen, 162 Einzelbewertungen
260 lines
8.2 KiB
Python
260 lines
8.2 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
GWÖ-Wahlprüfsteine CLI
|
|
Haupteinstiegspunkt für alle Operationen.
|
|
"""
|
|
|
|
import argparse
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
|
|
def cmd_scrape(args):
|
|
"""Scrape-Befehl: Daten von URL laden."""
|
|
from scraper import fetch_and_parse, init_db, save_to_db, parse_html
|
|
|
|
if args.html:
|
|
print(f"Lade lokale Datei: {args.html}")
|
|
with open(args.html) as f:
|
|
kandidaten = parse_html(f.read())
|
|
else:
|
|
print(f"Lade URL: {args.url}")
|
|
kandidaten = fetch_and_parse(args.url)
|
|
|
|
print(f"Gefunden: {len(kandidaten)} Kandidat:innen")
|
|
|
|
conn = init_db(args.db)
|
|
count = save_to_db(conn, kandidaten)
|
|
conn.close()
|
|
|
|
print(f"Gespeichert: {count} Kandidat:innen in {args.db}")
|
|
return 0
|
|
|
|
|
|
def cmd_analyze(args):
|
|
"""Analyze-Befehl: LLM-Bewertung durchführen."""
|
|
from analyzer import main as analyzer_main
|
|
|
|
# Args weiterleiten
|
|
sys.argv = ['analyzer.py', '--db', str(args.db), '--model', args.model]
|
|
if args.limit:
|
|
sys.argv.extend(['--limit', str(args.limit)])
|
|
if args.dry_run:
|
|
sys.argv.append('--dry-run')
|
|
if args.verbose:
|
|
sys.argv.append('--verbose')
|
|
|
|
return analyzer_main()
|
|
|
|
|
|
def cmd_aggregate(args):
|
|
"""Aggregate-Befehl: Reports generieren."""
|
|
from aggregator import main as aggregator_main
|
|
|
|
sys.argv = ['aggregator.py', '--db', str(args.db), '--output', str(args.output)]
|
|
return aggregator_main()
|
|
|
|
|
|
def cmd_status(args):
|
|
"""Status-Befehl: Datenbankstatus anzeigen."""
|
|
import sqlite3
|
|
|
|
if not args.db.exists():
|
|
print(f"Datenbank nicht gefunden: {args.db}")
|
|
return 1
|
|
|
|
conn = sqlite3.connect(args.db)
|
|
cursor = conn.cursor()
|
|
|
|
# Kandidaten
|
|
cursor.execute("SELECT COUNT(*) FROM kandidaten")
|
|
n_kandidaten = cursor.fetchone()[0]
|
|
|
|
# Antworten
|
|
cursor.execute("SELECT COUNT(*) FROM antworten_raw")
|
|
n_antworten = cursor.fetchone()[0]
|
|
|
|
# Bewertungen
|
|
cursor.execute("SELECT COUNT(*) FROM bewertungen")
|
|
n_bewertungen = cursor.fetchone()[0]
|
|
|
|
# Offene Bewertungen
|
|
cursor.execute("""
|
|
SELECT COUNT(*) FROM antworten_raw ar
|
|
LEFT JOIN bewertungen b ON ar.id = b.antwort_id
|
|
WHERE b.id IS NULL
|
|
""")
|
|
n_offen = cursor.fetchone()[0]
|
|
|
|
print(f"Datenbank: {args.db}")
|
|
print(f" Kandidat:innen: {n_kandidaten}")
|
|
print(f" Antworten: {n_antworten}")
|
|
print(f" Bewertungen: {n_bewertungen} ({n_offen} offen)")
|
|
|
|
# Partei-Verteilung
|
|
print("\nPartei-Verteilung:")
|
|
for row in cursor.execute("""
|
|
SELECT partei_normalisiert, COUNT(*) as n
|
|
FROM kandidaten
|
|
GROUP BY partei_normalisiert
|
|
ORDER BY n DESC
|
|
"""):
|
|
print(f" {row[0]}: {row[1]}")
|
|
|
|
# GWÖ-Durchschnitt wenn Bewertungen vorhanden
|
|
if n_bewertungen > 0:
|
|
cursor.execute("SELECT ROUND(AVG(gwoe_score), 2) FROM bewertungen WHERE gwoe_score IS NOT NULL")
|
|
avg_gwoe = cursor.fetchone()[0]
|
|
print(f"\nØ GWÖ-Score: {avg_gwoe}")
|
|
|
|
conn.close()
|
|
return 0
|
|
|
|
|
|
def cmd_export(args):
|
|
"""Export-Befehl: Daten als JSON exportieren."""
|
|
import sqlite3
|
|
import json
|
|
|
|
conn = sqlite3.connect(args.db)
|
|
conn.row_factory = sqlite3.Row
|
|
cursor = conn.cursor()
|
|
|
|
# Alle Daten zusammenbauen
|
|
data = {
|
|
'kandidaten': [],
|
|
'bewertungen': []
|
|
}
|
|
|
|
for row in cursor.execute("""
|
|
SELECT k.*,
|
|
GROUP_CONCAT(f.nummer || ':' || COALESCE(ar.antwort_kurz, '') || ':' || COALESCE(ar.antwort_erlaeuterung, ''), '|||') as antworten
|
|
FROM kandidaten k
|
|
LEFT JOIN antworten_raw ar ON k.id = ar.kandidat_id
|
|
LEFT JOIN fragen f ON ar.frage_id = f.id
|
|
GROUP BY k.id
|
|
"""):
|
|
data['kandidaten'].append(dict(row))
|
|
|
|
for row in cursor.execute("""
|
|
SELECT b.*,
|
|
k.vorname || ' ' || k.nachname as kandidat,
|
|
k.partei_normalisiert as partei,
|
|
f.kurztext as frage
|
|
FROM bewertungen b
|
|
JOIN antworten_raw ar ON b.antwort_id = ar.id
|
|
JOIN kandidaten k ON ar.kandidat_id = k.id
|
|
JOIN fragen f ON ar.frage_id = f.id
|
|
"""):
|
|
data['bewertungen'].append(dict(row))
|
|
|
|
output = args.output or (args.db.parent / 'export.json')
|
|
with open(output, 'w') as f:
|
|
json.dump(data, f, ensure_ascii=False, indent=2, default=str)
|
|
|
|
print(f"Exportiert: {output}")
|
|
conn.close()
|
|
return 0
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description='GWÖ-Wahlprüfsteine Auswertung',
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
epilog="""
|
|
Beispiele:
|
|
%(prog)s scrape # Daten von ECOnGOOD laden
|
|
%(prog)s analyze --limit 10 # Erste 10 Antworten bewerten
|
|
%(prog)s analyze # Alle offenen Antworten bewerten
|
|
%(prog)s aggregate # Reports generieren
|
|
%(prog)s status # Datenbankstatus anzeigen
|
|
|
|
Vollständiger Workflow:
|
|
%(prog)s scrape && %(prog)s analyze && %(prog)s aggregate
|
|
"""
|
|
)
|
|
|
|
# Globale Optionen
|
|
parser.add_argument('--db', type=Path,
|
|
default=Path(__file__).parent / 'wahlpruefsteine.db',
|
|
help='Pfad zur SQLite-Datenbank')
|
|
parser.add_argument('--verbose', '-v', action='store_true',
|
|
help='Ausführliche Ausgabe')
|
|
|
|
subparsers = parser.add_subparsers(dest='command', help='Verfügbare Befehle')
|
|
|
|
# Scrape
|
|
p_scrape = subparsers.add_parser('scrape', help='Daten von URL laden')
|
|
p_scrape.add_argument('--url',
|
|
default='https://germany.econgood.org/wahlpruefsteine-zu-den-bayerischen-kommunalwahlen-2026',
|
|
help='URL der Wahlprüfsteine-Seite')
|
|
p_scrape.add_argument('--html', type=Path, help='Lokale HTML-Datei statt URL')
|
|
|
|
# Analyze
|
|
p_analyze = subparsers.add_parser('analyze', help='LLM-Bewertung durchführen')
|
|
p_analyze.add_argument('--model', default='qwen-plus',
|
|
choices=['qwen-plus', 'qwen-max', 'qwen-turbo'],
|
|
help='Qwen-Modell')
|
|
p_analyze.add_argument('--limit', type=int, help='Maximal zu bewertende Antworten')
|
|
p_analyze.add_argument('--dry-run', action='store_true', help='Nur anzeigen')
|
|
|
|
# Aggregate
|
|
p_aggregate = subparsers.add_parser('aggregate', help='Reports generieren')
|
|
p_aggregate.add_argument('--output', type=Path,
|
|
default=Path(__file__).parent / 'output',
|
|
help='Ausgabeverzeichnis')
|
|
|
|
# Status
|
|
subparsers.add_parser('status', help='Datenbankstatus anzeigen')
|
|
|
|
# Export
|
|
p_export = subparsers.add_parser('export', help='Daten als JSON exportieren')
|
|
p_export.add_argument('--output', '-o', type=Path, help='Ausgabedatei')
|
|
|
|
# Vollständiger Lauf
|
|
p_run = subparsers.add_parser('run', help='Vollständiger Workflow (scrape → analyze → aggregate)')
|
|
p_run.add_argument('--model', default='qwen-plus',
|
|
choices=['qwen-plus', 'qwen-max', 'qwen-turbo'])
|
|
p_run.add_argument('--output', type=Path,
|
|
default=Path(__file__).parent / 'output')
|
|
|
|
args = parser.parse_args()
|
|
|
|
if not args.command:
|
|
parser.print_help()
|
|
return 1
|
|
|
|
# Befehle ausführen
|
|
if args.command == 'scrape':
|
|
return cmd_scrape(args)
|
|
elif args.command == 'analyze':
|
|
return cmd_analyze(args)
|
|
elif args.command == 'aggregate':
|
|
return cmd_aggregate(args)
|
|
elif args.command == 'status':
|
|
return cmd_status(args)
|
|
elif args.command == 'export':
|
|
return cmd_export(args)
|
|
elif args.command == 'run':
|
|
# Vollständiger Workflow
|
|
print("=== SCRAPE ===")
|
|
args.url = 'https://germany.econgood.org/wahlpruefsteine-zu-den-bayerischen-kommunalwahlen-2026'
|
|
args.html = None
|
|
if cmd_scrape(args) != 0:
|
|
return 1
|
|
|
|
print("\n=== ANALYZE ===")
|
|
args.limit = None
|
|
args.dry_run = False
|
|
if cmd_analyze(args) != 0:
|
|
return 1
|
|
|
|
print("\n=== AGGREGATE ===")
|
|
return cmd_aggregate(args)
|
|
|
|
return 1
|
|
|
|
|
|
if __name__ == '__main__':
|
|
sys.exit(main())
|