gwoe-antragspruefer/scripts/pm-quality-audit.sh

104 lines
3.3 KiB
Bash
Raw Normal View History

#!/bin/bash
# PM-Quality-Audit: prüft alle PM-Drafts gegen Verbotsliste, Längen-
# Limits und Markdown-Konsistenz. Ausgabe als Markdown-Report.
#
# Manueller Aufruf:
# ./scripts/pm-quality-audit.sh gwoe-antragspruefer-dev > pm-audit.md
set -euo pipefail
CONTAINER="${1:-gwoe-antragspruefer-dev}"
if ! docker ps --format '{{.Names}}' | grep -q "^${CONTAINER}$"; then
echo "$(date -Iseconds) SKIP — ${CONTAINER} is not running" >&2
exit 0
fi
docker exec -i "$CONTAINER" python <<'EOF'
import sqlite3
import re
VERBOTSLISTE_PM = [
(r"GWÖ-Score\s*\d", "GWÖ-Score-Zahl"),
(r"\b\d+(?:[.,]\d)?\s*/\s*10\b", "X/10-Score"),
(r"\b[A-E][1-5]\b(?!\.\s)", "Matrix-Code (A1-E5)"),
(r"Würde×|Solidarität×|Nachhaltigkeit×", "Berührungsgruppe×Wert"),
(r"\bzukunftsweisend\b", "Floskel zukunftsweisend"),
(r"\binnovativ\b", "Floskel innovativ"),
(r"\brichtungsweisend\b", "Floskel richtungsweisend"),
(r"in den Bereichen Bürger\W+und Staat", "GWÖ-Berührungsgruppen-Sprache"),
]
VERBOTSLISTE_THREAD = VERBOTSLISTE_PM + [
(r"\*\*[^*\n]+\*\*", "Markdown-Bold (Thread sollte ohne)"),
(r"\\[\\[\\\]]", "Eckige Klammern \\[\\]"),
]
def audit_pm(body: str) -> list:
flaws = []
word_count = len(body.split())
if word_count < 280 or word_count > 420:
flaws.append(f"Wortzahl {word_count} (Soll 320-380)")
for pattern, label in VERBOTSLISTE_PM:
if re.search(pattern, body):
flaws.append(f"Verbot: {label}")
paragraphs = [p for p in body.split("\n\n") if p.strip()]
if len(paragraphs) < 4:
flaws.append(f"nur {len(paragraphs)} Absätze (Soll 6)")
return flaws
def audit_thread(body: str) -> list:
flaws = []
posts = [p for p in body.split("\n\n") if p.strip()]
if not (3 <= len(posts) <= 5):
flaws.append(f"{len(posts)} Posts (Soll 3-5)")
for i, p in enumerate(posts, 1):
if len(p) > 280:
flaws.append(f"Post {i}: {len(p)} chars (>280)")
for pattern, label in VERBOTSLISTE_THREAD:
if re.search(pattern, body):
flaws.append(f"Verbot: {label}")
return flaws
conn = sqlite3.connect("/app/data/gwoe-antraege.db")
rows = conn.execute("""
SELECT id, drucksache, bundesland, style, titel, body, created_at
FROM presse_drafts ORDER BY id DESC
""").fetchall()
conn.close()
print("# PM-Quality-Audit — Stand", "2026-05-06")
print()
print(f"**Total Drafts:** {len(rows)}\n")
ok = 0
flagged_pm = 0
flagged_thread = 0
print("| ID | DS | BL | Style | Titel-Länge | Body-Wörter | Status |")
print("|---|---|---|---|---|---|---|")
for r in rows:
rid, ds, bl, style, titel, body, created = r
style = style or "pm"
word_count = len(body.split())
audit = audit_thread(body) if style == "thread" else audit_pm(body)
if audit:
if style == "thread":
flagged_thread += 1
else:
flagged_pm += 1
status = "⚠ " + "; ".join(audit[:2])
if len(audit) > 2:
status += f" (+{len(audit)-2})"
else:
status = "✅"
ok += 1
titel_short = (titel or "")[:40] + ("…" if titel and len(titel) > 40 else "")
print(f"| {rid} | {ds} | {bl} | {style} | {len(titel or '')} | {word_count} | {status} |")
print()
print(f"**Zusammenfassung:** {ok}/{len(rows)} ohne Auffälligkeit · "
f"{flagged_pm} PMs flagged · {flagged_thread} Threads flagged")
EOF