91 lines
2.9 KiB
Bash
91 lines
2.9 KiB
Bash
|
|
#!/bin/bash
|
||
|
|
# PM-Sample-Bundle: erzeugt fuer 5 hochbewertete Anträge je eine PM
|
||
|
|
# (Klassisch + Thread) zur manuellen Qualitätssichtung.
|
||
|
|
#
|
||
|
|
# Manueller Aufruf:
|
||
|
|
# ./scripts/pm-sample-bundle.sh gwoe-antragspruefer-dev
|
||
|
|
#
|
||
|
|
# Bei mehrfachem Aufruf werden bestehende PMs nicht regeneriert
|
||
|
|
# (Idempotenz pro drucksache+news_url+style).
|
||
|
|
|
||
|
|
set -euo pipefail
|
||
|
|
|
||
|
|
CONTAINER="${1:-gwoe-antragspruefer-dev}"
|
||
|
|
N_SAMPLES="${N_SAMPLES:-5}"
|
||
|
|
|
||
|
|
if ! docker ps --format '{{.Names}}' | grep -q "^${CONTAINER}$"; then
|
||
|
|
echo "$(date -Iseconds) SKIP — ${CONTAINER} is not running"
|
||
|
|
exit 0
|
||
|
|
fi
|
||
|
|
|
||
|
|
echo "$(date -Iseconds) START pm-sample-bundle (n=${N_SAMPLES})"
|
||
|
|
|
||
|
|
docker exec -i -e N_SAMPLES="$N_SAMPLES" "$CONTAINER" python <<'EOF'
|
||
|
|
import os
|
||
|
|
import asyncio
|
||
|
|
import sqlite3
|
||
|
|
from app.presse_generator import generate_draft
|
||
|
|
|
||
|
|
|
||
|
|
N_SAMPLES = int(os.environ.get("N_SAMPLES", "5"))
|
||
|
|
|
||
|
|
|
||
|
|
def find_candidates(n: int) -> list:
|
||
|
|
"""Top-N hochbewertete Anträge ohne existierende PM (style=pm)."""
|
||
|
|
conn = sqlite3.connect("/app/data/gwoe-antraege.db")
|
||
|
|
try:
|
||
|
|
cur = conn.execute("""
|
||
|
|
SELECT a.drucksache, a.bundesland, a.gwoe_score
|
||
|
|
FROM assessments a
|
||
|
|
WHERE a.gwoe_score >= 7.0
|
||
|
|
ORDER BY a.gwoe_score DESC, a.datum DESC
|
||
|
|
""")
|
||
|
|
out = []
|
||
|
|
for ds, bl, score in cur.fetchall():
|
||
|
|
existing = conn.execute(
|
||
|
|
"SELECT id FROM presse_drafts WHERE drucksache=? AND style='pm' LIMIT 1",
|
||
|
|
(ds,),
|
||
|
|
).fetchone()
|
||
|
|
if existing:
|
||
|
|
continue
|
||
|
|
news_match = conn.execute("""
|
||
|
|
SELECT n.url FROM news_articles n
|
||
|
|
ORDER BY n.datum DESC LIMIT 1
|
||
|
|
""").fetchone()
|
||
|
|
if not news_match:
|
||
|
|
continue
|
||
|
|
out.append({"drucksache": ds, "bundesland": bl,
|
||
|
|
"score": score, "news_url": news_match[0]})
|
||
|
|
if len(out) >= n:
|
||
|
|
break
|
||
|
|
return out
|
||
|
|
finally:
|
||
|
|
conn.close()
|
||
|
|
|
||
|
|
|
||
|
|
async def main() -> None:
|
||
|
|
cands = find_candidates(N_SAMPLES)
|
||
|
|
if not cands:
|
||
|
|
print(" keine Kandidaten gefunden")
|
||
|
|
return
|
||
|
|
print(f" {len(cands)} Kandidaten:")
|
||
|
|
for i, c in enumerate(cands, 1):
|
||
|
|
print(f" [{i}/{len(cands)}] {c['bundesland']}/{c['drucksache']} (GWÖ {c['score']})")
|
||
|
|
try:
|
||
|
|
pm = await generate_draft(
|
||
|
|
drucksache=c["drucksache"], news_url=c["news_url"], style="pm",
|
||
|
|
)
|
||
|
|
print(f" PM: id={pm.get('id')} was_existing={pm.get('_was_existing')}")
|
||
|
|
thread = await generate_draft(
|
||
|
|
drucksache=c["drucksache"], news_url=c["news_url"], style="thread",
|
||
|
|
)
|
||
|
|
print(f" Thread: id={thread.get('id')} was_existing={thread.get('_was_existing')}")
|
||
|
|
except Exception as e:
|
||
|
|
print(f" ERROR: {str(e)[:120]}")
|
||
|
|
|
||
|
|
|
||
|
|
asyncio.run(main())
|
||
|
|
EOF
|
||
|
|
|
||
|
|
echo "$(date -Iseconds) END pm-sample-bundle"
|