- _split_into_thread_posts() splittet zu lange Bodies an Satzgrenzen in mehrere Posts ≤ max_chars (Default 280). Greedy: möglichst viele Sätze pro Post. Hashtags am Ende bleiben erhalten. - generate_draft(style='thread') ruft den Splitter auf, wenn das LLM weniger als 3 Posts oder Posts > 290 chars liefert. - 7 Unit-Tests fuer den Splitter (test_thread_splitter.py). - scripts/pm-quality-audit.sh: prueft alle PM-Drafts gegen Verbotsliste (GWÖ-Score, Matrix-Codes, Floskeln) + Wortzahl + Absatzzahl + Post-Laengen. Markdown-Report-Output. Audit von 23 Drafts: 4/23 ohne Auffaelligkeit; Hauptbefund: PMs haeufig zu kurz, Threads splittten ohne Auto-Splitter nicht zuverlaessig — Splitter behebt das. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
104 lines
3.3 KiB
Bash
Executable File
104 lines
3.3 KiB
Bash
Executable File
#!/bin/bash
|
||
# PM-Quality-Audit: prüft alle PM-Drafts gegen Verbotsliste, Längen-
|
||
# Limits und Markdown-Konsistenz. Ausgabe als Markdown-Report.
|
||
#
|
||
# Manueller Aufruf:
|
||
# ./scripts/pm-quality-audit.sh gwoe-antragspruefer-dev > pm-audit.md
|
||
|
||
set -euo pipefail
|
||
|
||
CONTAINER="${1:-gwoe-antragspruefer-dev}"
|
||
|
||
if ! docker ps --format '{{.Names}}' | grep -q "^${CONTAINER}$"; then
|
||
echo "$(date -Iseconds) SKIP — ${CONTAINER} is not running" >&2
|
||
exit 0
|
||
fi
|
||
|
||
docker exec -i "$CONTAINER" python <<'EOF'
|
||
import sqlite3
|
||
import re
|
||
|
||
VERBOTSLISTE_PM = [
|
||
(r"GWÖ-Score\s*\d", "GWÖ-Score-Zahl"),
|
||
(r"\b\d+(?:[.,]\d)?\s*/\s*10\b", "X/10-Score"),
|
||
(r"\b[A-E][1-5]\b(?!\.\s)", "Matrix-Code (A1-E5)"),
|
||
(r"Würde×|Solidarität×|Nachhaltigkeit×", "Berührungsgruppe×Wert"),
|
||
(r"\bzukunftsweisend\b", "Floskel zukunftsweisend"),
|
||
(r"\binnovativ\b", "Floskel innovativ"),
|
||
(r"\brichtungsweisend\b", "Floskel richtungsweisend"),
|
||
(r"in den Bereichen Bürger\W+und Staat", "GWÖ-Berührungsgruppen-Sprache"),
|
||
]
|
||
VERBOTSLISTE_THREAD = VERBOTSLISTE_PM + [
|
||
(r"\*\*[^*\n]+\*\*", "Markdown-Bold (Thread sollte ohne)"),
|
||
(r"\\[\\[\\\]]", "Eckige Klammern \\[\\]"),
|
||
]
|
||
|
||
|
||
def audit_pm(body: str) -> list:
|
||
flaws = []
|
||
word_count = len(body.split())
|
||
if word_count < 280 or word_count > 420:
|
||
flaws.append(f"Wortzahl {word_count} (Soll 320-380)")
|
||
for pattern, label in VERBOTSLISTE_PM:
|
||
if re.search(pattern, body):
|
||
flaws.append(f"Verbot: {label}")
|
||
paragraphs = [p for p in body.split("\n\n") if p.strip()]
|
||
if len(paragraphs) < 4:
|
||
flaws.append(f"nur {len(paragraphs)} Absätze (Soll 6)")
|
||
return flaws
|
||
|
||
|
||
def audit_thread(body: str) -> list:
|
||
flaws = []
|
||
posts = [p for p in body.split("\n\n") if p.strip()]
|
||
if not (3 <= len(posts) <= 5):
|
||
flaws.append(f"{len(posts)} Posts (Soll 3-5)")
|
||
for i, p in enumerate(posts, 1):
|
||
if len(p) > 280:
|
||
flaws.append(f"Post {i}: {len(p)} chars (>280)")
|
||
for pattern, label in VERBOTSLISTE_THREAD:
|
||
if re.search(pattern, body):
|
||
flaws.append(f"Verbot: {label}")
|
||
return flaws
|
||
|
||
|
||
conn = sqlite3.connect("/app/data/gwoe-antraege.db")
|
||
rows = conn.execute("""
|
||
SELECT id, drucksache, bundesland, style, titel, body, created_at
|
||
FROM presse_drafts ORDER BY id DESC
|
||
""").fetchall()
|
||
conn.close()
|
||
|
||
print("# PM-Quality-Audit — Stand", "2026-05-06")
|
||
print()
|
||
print(f"**Total Drafts:** {len(rows)}\n")
|
||
|
||
ok = 0
|
||
flagged_pm = 0
|
||
flagged_thread = 0
|
||
print("| ID | DS | BL | Style | Titel-Länge | Body-Wörter | Status |")
|
||
print("|---|---|---|---|---|---|---|")
|
||
for r in rows:
|
||
rid, ds, bl, style, titel, body, created = r
|
||
style = style or "pm"
|
||
word_count = len(body.split())
|
||
audit = audit_thread(body) if style == "thread" else audit_pm(body)
|
||
if audit:
|
||
if style == "thread":
|
||
flagged_thread += 1
|
||
else:
|
||
flagged_pm += 1
|
||
status = "⚠ " + "; ".join(audit[:2])
|
||
if len(audit) > 2:
|
||
status += f" (+{len(audit)-2})"
|
||
else:
|
||
status = "✅"
|
||
ok += 1
|
||
titel_short = (titel or "")[:40] + ("…" if titel and len(titel) > 40 else "")
|
||
print(f"| {rid} | {ds} | {bl} | {style} | {len(titel or '')} | {word_count} | {status} |")
|
||
|
||
print()
|
||
print(f"**Zusammenfassung:** {ok}/{len(rows)} ohne Auffälligkeit · "
|
||
f"{flagged_pm} PMs flagged · {flagged_thread} Threads flagged")
|
||
EOF
|