From 6a78dee2d1d7a86b6dd1c14269d7470283a664bb Mon Sep 17 00:00:00 2001 From: Dotty Dotter Date: Wed, 6 May 2026 16:12:41 +0200 Subject: [PATCH] feat(#179 Phase 4.3): pm-sample-bundle.sh fuer 5 PMs (PM + Thread) zur Sichtung Skript laeuft fuer N_SAMPLES (Default 5) hochbewertete Antraege jeweils generate_draft() mit style='pm' und style='thread' aus. Idempotent ueber das presse_drafts.style-Schema. Manueller Aufruf: ./scripts/pm-sample-bundle.sh gwoe-antragspruefer-dev Co-Authored-By: Claude Opus 4.7 (1M context) --- scripts/pm-sample-bundle.sh | 90 +++++++++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) create mode 100755 scripts/pm-sample-bundle.sh diff --git a/scripts/pm-sample-bundle.sh b/scripts/pm-sample-bundle.sh new file mode 100755 index 0000000..3305a3c --- /dev/null +++ b/scripts/pm-sample-bundle.sh @@ -0,0 +1,90 @@ +#!/bin/bash +# PM-Sample-Bundle: erzeugt fuer 5 hochbewertete Anträge je eine PM +# (Klassisch + Thread) zur manuellen Qualitätssichtung. +# +# Manueller Aufruf: +# ./scripts/pm-sample-bundle.sh gwoe-antragspruefer-dev +# +# Bei mehrfachem Aufruf werden bestehende PMs nicht regeneriert +# (Idempotenz pro drucksache+news_url+style). + +set -euo pipefail + +CONTAINER="${1:-gwoe-antragspruefer-dev}" +N_SAMPLES="${N_SAMPLES:-5}" + +if ! docker ps --format '{{.Names}}' | grep -q "^${CONTAINER}$"; then + echo "$(date -Iseconds) SKIP — ${CONTAINER} is not running" + exit 0 +fi + +echo "$(date -Iseconds) START pm-sample-bundle (n=${N_SAMPLES})" + +docker exec -i -e N_SAMPLES="$N_SAMPLES" "$CONTAINER" python <<'EOF' +import os +import asyncio +import sqlite3 +from app.presse_generator import generate_draft + + +N_SAMPLES = int(os.environ.get("N_SAMPLES", "5")) + + +def find_candidates(n: int) -> list: + """Top-N hochbewertete Anträge ohne existierende PM (style=pm).""" + conn = sqlite3.connect("/app/data/gwoe-antraege.db") + try: + cur = conn.execute(""" + SELECT a.drucksache, a.bundesland, a.gwoe_score + FROM assessments a + WHERE a.gwoe_score >= 7.0 + ORDER BY a.gwoe_score DESC, a.datum DESC + """) + out = [] + for ds, bl, score in cur.fetchall(): + existing = conn.execute( + "SELECT id FROM presse_drafts WHERE drucksache=? AND style='pm' LIMIT 1", + (ds,), + ).fetchone() + if existing: + continue + news_match = conn.execute(""" + SELECT n.url FROM news_articles n + ORDER BY n.datum DESC LIMIT 1 + """).fetchone() + if not news_match: + continue + out.append({"drucksache": ds, "bundesland": bl, + "score": score, "news_url": news_match[0]}) + if len(out) >= n: + break + return out + finally: + conn.close() + + +async def main() -> None: + cands = find_candidates(N_SAMPLES) + if not cands: + print(" keine Kandidaten gefunden") + return + print(f" {len(cands)} Kandidaten:") + for i, c in enumerate(cands, 1): + print(f" [{i}/{len(cands)}] {c['bundesland']}/{c['drucksache']} (GWÖ {c['score']})") + try: + pm = await generate_draft( + drucksache=c["drucksache"], news_url=c["news_url"], style="pm", + ) + print(f" PM: id={pm.get('id')} was_existing={pm.get('_was_existing')}") + thread = await generate_draft( + drucksache=c["drucksache"], news_url=c["news_url"], style="thread", + ) + print(f" Thread: id={thread.get('id')} was_existing={thread.get('_was_existing')}") + except Exception as e: + print(f" ERROR: {str(e)[:120]}") + + +asyncio.run(main()) +EOF + +echo "$(date -Iseconds) END pm-sample-bundle"