92 lines
2.9 KiB
Python
92 lines
2.9 KiB
Python
|
|
#!/usr/bin/env python3
|
||
|
|
"""Retro-Migration: Zitate, deren `quelle`-Label auf das jeweils andere
|
||
|
|
Programm verweist, wandern in den passenden Block.
|
||
|
|
|
||
|
|
Hintergrund: Vor Commit 4b9c65c hat ``reconstruct_zitate`` bei einem
|
||
|
|
Cross-Kind-Fallback-Match nur die ``quelle`` korrigiert, das Zitat aber
|
||
|
|
im urspruenglichen Block belassen. Folge: Im wahlprogramm-Block standen
|
||
|
|
auch Zitate aus dem Grundsatzprogramm. Der Code-Fix korrigiert das fuer
|
||
|
|
neue Bewertungen — dieses Skript korrigiert die bestehenden Records.
|
||
|
|
|
||
|
|
Heuristik (string-basiert, ohne LLM/Re-Bewertung):
|
||
|
|
- quelle enthaelt 'Grundsatzprogramm' (case-insensitive) → parteiprogramm-Block
|
||
|
|
- quelle enthaelt 'Wahlprogramm' (ohne 'Grundsatz') → wahlprogramm-Block
|
||
|
|
- sonst: bleibt wo es ist
|
||
|
|
|
||
|
|
Idempotent: doppelter Lauf bewegt nichts mehr.
|
||
|
|
|
||
|
|
Usage (aus dem Container):
|
||
|
|
docker exec gwoe-antragspruefer python /app/scripts/migrate-zitate-blocks.py # dry-run
|
||
|
|
docker exec gwoe-antragspruefer python /app/scripts/migrate-zitate-blocks.py --apply # commit
|
||
|
|
"""
|
||
|
|
import json
|
||
|
|
import sqlite3
|
||
|
|
import sys
|
||
|
|
|
||
|
|
DRY_RUN = "--apply" not in sys.argv
|
||
|
|
|
||
|
|
db = sqlite3.connect("/app/data/gwoe-antraege.db")
|
||
|
|
db.row_factory = sqlite3.Row
|
||
|
|
moved = 0
|
||
|
|
touched_assessments = 0
|
||
|
|
|
||
|
|
rows = db.execute(
|
||
|
|
"SELECT drucksache, bundesland, wahlprogramm_scores FROM assessments "
|
||
|
|
"WHERE wahlprogramm_scores IS NOT NULL"
|
||
|
|
).fetchall()
|
||
|
|
|
||
|
|
for r in rows:
|
||
|
|
raw = r["wahlprogramm_scores"]
|
||
|
|
if not raw:
|
||
|
|
continue
|
||
|
|
try:
|
||
|
|
wps = json.loads(raw)
|
||
|
|
except Exception:
|
||
|
|
continue
|
||
|
|
|
||
|
|
changed = False
|
||
|
|
for wp in (wps or []):
|
||
|
|
wp_blk = wp.get("wahlprogramm") or {}
|
||
|
|
pp_blk = wp.get("parteiprogramm") or {}
|
||
|
|
wp_zitate = list(wp_blk.get("zitate") or [])
|
||
|
|
pp_zitate = list(pp_blk.get("zitate") or [])
|
||
|
|
|
||
|
|
new_wp, new_pp = [], []
|
||
|
|
for z in wp_zitate:
|
||
|
|
q = (z.get("quelle") or "").lower()
|
||
|
|
if "grundsatzprogramm" in q:
|
||
|
|
new_pp.append(z)
|
||
|
|
moved += 1
|
||
|
|
else:
|
||
|
|
new_wp.append(z)
|
||
|
|
for z in pp_zitate:
|
||
|
|
q = (z.get("quelle") or "").lower()
|
||
|
|
if "wahlprogramm" in q and "grundsatz" not in q:
|
||
|
|
new_wp.append(z)
|
||
|
|
moved += 1
|
||
|
|
else:
|
||
|
|
new_pp.append(z)
|
||
|
|
|
||
|
|
if new_wp != wp_zitate or new_pp != pp_zitate:
|
||
|
|
wp_blk["zitate"] = new_wp
|
||
|
|
wp["wahlprogramm"] = wp_blk
|
||
|
|
pp_blk["zitate"] = new_pp
|
||
|
|
wp["parteiprogramm"] = pp_blk
|
||
|
|
changed = True
|
||
|
|
|
||
|
|
if changed:
|
||
|
|
touched_assessments += 1
|
||
|
|
if not DRY_RUN:
|
||
|
|
db.execute(
|
||
|
|
"UPDATE assessments SET wahlprogramm_scores=? WHERE drucksache=?",
|
||
|
|
(json.dumps(wps, ensure_ascii=False), r["drucksache"]),
|
||
|
|
)
|
||
|
|
|
||
|
|
if not DRY_RUN:
|
||
|
|
db.commit()
|
||
|
|
|
||
|
|
print(f"DRY_RUN={DRY_RUN}")
|
||
|
|
print(f"Zitate verschoben: {moved}")
|
||
|
|
print(f"Assessments betroffen: {touched_assessments}")
|
||
|
|
db.close()
|