gwoe-antragspruefer/scripts/pm-quality-audit.sh
Dotty Dotter ba1f104c8e feat(#178 Folge): Thread-Auto-Splitter + Quality-Audit-Skript
- _split_into_thread_posts() splittet zu lange Bodies an Satzgrenzen
  in mehrere Posts ≤ max_chars (Default 280). Greedy: möglichst viele
  Sätze pro Post. Hashtags am Ende bleiben erhalten.
- generate_draft(style='thread') ruft den Splitter auf, wenn das LLM
  weniger als 3 Posts oder Posts > 290 chars liefert.
- 7 Unit-Tests fuer den Splitter (test_thread_splitter.py).
- scripts/pm-quality-audit.sh: prueft alle PM-Drafts gegen Verbotsliste
  (GWÖ-Score, Matrix-Codes, Floskeln) + Wortzahl + Absatzzahl + Post-Laengen.
  Markdown-Report-Output. Audit von 23 Drafts: 4/23 ohne Auffaelligkeit;
  Hauptbefund: PMs haeufig zu kurz, Threads splittten ohne Auto-Splitter
  nicht zuverlaessig — Splitter behebt das.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-06 17:08:57 +02:00

104 lines
3.3 KiB
Bash
Executable File
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/bash
# PM-Quality-Audit: prüft alle PM-Drafts gegen Verbotsliste, Längen-
# Limits und Markdown-Konsistenz. Ausgabe als Markdown-Report.
#
# Manueller Aufruf:
# ./scripts/pm-quality-audit.sh gwoe-antragspruefer-dev > pm-audit.md
set -euo pipefail
CONTAINER="${1:-gwoe-antragspruefer-dev}"
if ! docker ps --format '{{.Names}}' | grep -q "^${CONTAINER}$"; then
echo "$(date -Iseconds) SKIP — ${CONTAINER} is not running" >&2
exit 0
fi
docker exec -i "$CONTAINER" python <<'EOF'
import sqlite3
import re
VERBOTSLISTE_PM = [
(r"GWÖ-Score\s*\d", "GWÖ-Score-Zahl"),
(r"\b\d+(?:[.,]\d)?\s*/\s*10\b", "X/10-Score"),
(r"\b[A-E][1-5]\b(?!\.\s)", "Matrix-Code (A1-E5)"),
(r"Würde×|Solidarität×|Nachhaltigkeit×", "Berührungsgruppe×Wert"),
(r"\bzukunftsweisend\b", "Floskel zukunftsweisend"),
(r"\binnovativ\b", "Floskel innovativ"),
(r"\brichtungsweisend\b", "Floskel richtungsweisend"),
(r"in den Bereichen Bürger\W+und Staat", "GWÖ-Berührungsgruppen-Sprache"),
]
VERBOTSLISTE_THREAD = VERBOTSLISTE_PM + [
(r"\*\*[^*\n]+\*\*", "Markdown-Bold (Thread sollte ohne)"),
(r"\\[\\[\\\]]", "Eckige Klammern \\[\\]"),
]
def audit_pm(body: str) -> list:
flaws = []
word_count = len(body.split())
if word_count < 280 or word_count > 420:
flaws.append(f"Wortzahl {word_count} (Soll 320-380)")
for pattern, label in VERBOTSLISTE_PM:
if re.search(pattern, body):
flaws.append(f"Verbot: {label}")
paragraphs = [p for p in body.split("\n\n") if p.strip()]
if len(paragraphs) < 4:
flaws.append(f"nur {len(paragraphs)} Absätze (Soll 6)")
return flaws
def audit_thread(body: str) -> list:
flaws = []
posts = [p for p in body.split("\n\n") if p.strip()]
if not (3 <= len(posts) <= 5):
flaws.append(f"{len(posts)} Posts (Soll 3-5)")
for i, p in enumerate(posts, 1):
if len(p) > 280:
flaws.append(f"Post {i}: {len(p)} chars (>280)")
for pattern, label in VERBOTSLISTE_THREAD:
if re.search(pattern, body):
flaws.append(f"Verbot: {label}")
return flaws
conn = sqlite3.connect("/app/data/gwoe-antraege.db")
rows = conn.execute("""
SELECT id, drucksache, bundesland, style, titel, body, created_at
FROM presse_drafts ORDER BY id DESC
""").fetchall()
conn.close()
print("# PM-Quality-Audit — Stand", "2026-05-06")
print()
print(f"**Total Drafts:** {len(rows)}\n")
ok = 0
flagged_pm = 0
flagged_thread = 0
print("| ID | DS | BL | Style | Titel-Länge | Body-Wörter | Status |")
print("|---|---|---|---|---|---|---|")
for r in rows:
rid, ds, bl, style, titel, body, created = r
style = style or "pm"
word_count = len(body.split())
audit = audit_thread(body) if style == "thread" else audit_pm(body)
if audit:
if style == "thread":
flagged_thread += 1
else:
flagged_pm += 1
status = "⚠ " + "; ".join(audit[:2])
if len(audit) > 2:
status += f" (+{len(audit)-2})"
else:
status = "✅"
ok += 1
titel_short = (titel or "")[:40] + ("…" if titel and len(titel) > 40 else "")
print(f"| {rid} | {ds} | {bl} | {style} | {len(titel or '')} | {word_count} | {status} |")
print()
print(f"**Zusammenfassung:** {ok}/{len(rows)} ohne Auffälligkeit · "
f"{flagged_pm} PMs flagged · {flagged_thread} Threads flagged")
EOF