feat(#179 Phase 4.3): pm-sample-bundle.sh fuer 5 PMs (PM + Thread) zur Sichtung

Skript laeuft fuer N_SAMPLES (Default 5) hochbewertete Antraege jeweils
generate_draft() mit style='pm' und style='thread' aus. Idempotent ueber
das presse_drafts.style-Schema.

Manueller Aufruf:
  ./scripts/pm-sample-bundle.sh gwoe-antragspruefer-dev

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Dotty Dotter 2026-05-06 16:12:41 +02:00
parent a2b8f8c6fe
commit 6a78dee2d1

90
scripts/pm-sample-bundle.sh Executable file
View File

@ -0,0 +1,90 @@
#!/bin/bash
# PM-Sample-Bundle: erzeugt fuer 5 hochbewertete Anträge je eine PM
# (Klassisch + Thread) zur manuellen Qualitätssichtung.
#
# Manueller Aufruf:
# ./scripts/pm-sample-bundle.sh gwoe-antragspruefer-dev
#
# Bei mehrfachem Aufruf werden bestehende PMs nicht regeneriert
# (Idempotenz pro drucksache+news_url+style).
set -euo pipefail
CONTAINER="${1:-gwoe-antragspruefer-dev}"
N_SAMPLES="${N_SAMPLES:-5}"
if ! docker ps --format '{{.Names}}' | grep -q "^${CONTAINER}$"; then
echo "$(date -Iseconds) SKIP — ${CONTAINER} is not running"
exit 0
fi
echo "$(date -Iseconds) START pm-sample-bundle (n=${N_SAMPLES})"
docker exec -i -e N_SAMPLES="$N_SAMPLES" "$CONTAINER" python <<'EOF'
import os
import asyncio
import sqlite3
from app.presse_generator import generate_draft
N_SAMPLES = int(os.environ.get("N_SAMPLES", "5"))
def find_candidates(n: int) -> list:
"""Top-N hochbewertete Anträge ohne existierende PM (style=pm)."""
conn = sqlite3.connect("/app/data/gwoe-antraege.db")
try:
cur = conn.execute("""
SELECT a.drucksache, a.bundesland, a.gwoe_score
FROM assessments a
WHERE a.gwoe_score >= 7.0
ORDER BY a.gwoe_score DESC, a.datum DESC
""")
out = []
for ds, bl, score in cur.fetchall():
existing = conn.execute(
"SELECT id FROM presse_drafts WHERE drucksache=? AND style='pm' LIMIT 1",
(ds,),
).fetchone()
if existing:
continue
news_match = conn.execute("""
SELECT n.url FROM news_articles n
ORDER BY n.datum DESC LIMIT 1
""").fetchone()
if not news_match:
continue
out.append({"drucksache": ds, "bundesland": bl,
"score": score, "news_url": news_match[0]})
if len(out) >= n:
break
return out
finally:
conn.close()
async def main() -> None:
cands = find_candidates(N_SAMPLES)
if not cands:
print(" keine Kandidaten gefunden")
return
print(f" {len(cands)} Kandidaten:")
for i, c in enumerate(cands, 1):
print(f" [{i}/{len(cands)}] {c['bundesland']}/{c['drucksache']} (GWÖ {c['score']})")
try:
pm = await generate_draft(
drucksache=c["drucksache"], news_url=c["news_url"], style="pm",
)
print(f" PM: id={pm.get('id')} was_existing={pm.get('_was_existing')}")
thread = await generate_draft(
drucksache=c["drucksache"], news_url=c["news_url"], style="thread",
)
print(f" Thread: id={thread.get('id')} was_existing={thread.get('_was_existing')}")
except Exception as e:
print(f" ERROR: {str(e)[:120]}")
asyncio.run(main())
EOF
echo "$(date -Iseconds) END pm-sample-bundle"