gwoe-antragspruefer/app/report.py
Dotty Dotter c268d889fa fix(#175 #176 Phase 14): PDF-Matrix-Coloring + Symbol an v2 angleichen
- get_rating_symbol nutzt jetzt -5..+5-Skala (vorher: rating>=2 → ++,
  was bei rating=2 oder 3 falsche '++' gab; jetzt: rating>=4 → ++).
- PDF-Tabelle nutzt 5 Klassen (rating-pp/-p/-0/-n/-nn) statt 3
  (positive/negative/neutral). Heller Grün/Rot-Tint für mid-strength
  ratings, kräftiges Grün/Rot für Extreme. Visuell deutlich
  unterscheidbar.
- Beibehaltung der alten Klassennamen für Backwards-Compat falls
  irgendwo zwischengespeicherte HTML-Reports liegen.

Damit ist die v2/PDF-Konsistenz fuer NRW/18/18246 (#176) bezüglich
Matrix-Symbole und -Farben hergestellt.

Refs: #175, #176

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-06 23:42:15 +02:00

678 lines
22 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""Report generation for HTML and PDF output.
All LLM-generated fields are HTML-escaped before being interpolated into
the report template. WeasyPrint will happily resolve ``<img src="file://...">``
or ``<link rel=stylesheet href="file://...">`` against the container
filesystem, so unescaped LLM output is a Local-File-Read primitive — see
issue #57 (audit findings #2 and #6). The ``_e`` helper is the single
funnel through which all LLM strings must pass on their way into the HTML.
"""
import logging
import subprocess
from html import escape as _e
from pathlib import Path
from typing import Optional
logger = logging.getLogger(__name__)
from .models import Assessment, MATRIX_LABELS, EMPFEHLUNG_CONFIG
from .bundeslaender import BUNDESLAENDER
# ECOnGOOD Colors
COLORS = {
"darkgray": "#5a5a5a",
"green": "#889e33",
"blue": "#009da5",
"lightgray": "#bfbfbf",
"orange": "#F7941D",
"red": "#d00000",
}
def get_score_color(score: float) -> str:
"""Get color for a score value."""
if score >= 7:
return COLORS["blue"]
if score >= 4:
return COLORS["green"]
if score >= 2:
return "#FFC20E"
if score >= 1:
return COLORS["orange"]
return COLORS["red"]
def get_rating_symbol(rating: int) -> str:
"""Convert numeric rating to symbol — gleiche Logik wie in models.py
und v2/components/matrix_mini.html. Skala -5..+5."""
if rating >= 4:
return "++"
if rating >= 1:
return "+"
if rating == 0:
return ""
if rating <= -4:
return ""
return ""
def get_rating_class(rating: int) -> str:
"""5-Klassen-Coloring analog zu v2 matrix_mini (m-pp/m-p/m-0/m-n/m-nn)
— vorher hatte das PDF nur 3 Klassen (positive/negative/neutral),
was zu 'gleichfarbig' für + und ++ führte."""
if rating >= 4:
return "rating-pp"
if rating >= 1:
return "rating-p"
if rating == 0:
return "rating-0"
if rating <= -4:
return "rating-nn"
return "rating-n"
def format_redline_html(text: str) -> str:
"""Convert redline markup (``**ins**`` / ``~~del~~``) to HTML.
Escapes the input first so that any HTML in the LLM output (e.g.
``<img src="file:///etc/passwd">``) becomes inert text. The marker
regexes still fire because ``**`` and ``~~`` are not HTML special
chars and survive escaping unchanged. The inserted ``<span>`` tags
are the only raw HTML in the result and are produced by us.
"""
import re
text = _e(text or "")
# **text** → green bold (inserted)
text = re.sub(r'\*\*([^*]+)\*\*', r'<span class="inserted">\1</span>', text)
# ~~text~~ → red strikethrough (deleted)
text = re.sub(r'~~([^~]+)~~', r'<span class="deleted">\1</span>', text)
return text
def build_matrix_html(assessment: Assessment) -> str:
"""Build HTML matrix table."""
rating_map = {e.field: e for e in assessment.gwoe_matrix}
rows = ["A", "B", "C", "D", "E"]
row_labels = {
"A": "Lieferant:innen",
"B": "Finanzen",
"C": "Führung/Verwaltung",
"D": "Bürger:innen",
"E": "Gesellschaft/Natur",
}
html = ['<table class="matrix-table">']
html.append('<thead><tr>')
html.append('<th></th>')
for col in range(1, 6):
html.append(f'<th>{col}</th>')
html.append('</tr></thead>')
html.append('<tbody>')
for row in rows:
html.append(f'<tr><th>{row}: {row_labels[row]}</th>')
for col in range(1, 6):
field = f"{row}{col}"
entry = rating_map.get(field)
if entry:
symbol = get_rating_symbol(entry.rating)
css_class = get_rating_class(entry.rating)
# entry.aspect comes from the LLM and is interpolated into a
# title="..." attribute — escape it so a stray double-quote
# cannot break out and inject attributes/handlers.
html.append(f'<td class="{css_class}" title="{_e(entry.aspect)}">{symbol}</td>')
else:
html.append('<td class="rating-0"></td>')
html.append('</tr>')
html.append('</tbody></table>')
return '\n'.join(html)
async def generate_html_report(
assessment: Assessment,
output_path: Path,
bundesland: Optional[str] = None,
) -> None:
"""Generate HTML report.
``bundesland`` is the optional state code (e.g. ``"NRW"``, ``"LSA"``).
When set and known in ``BUNDESLAENDER``, the resulting report carries
the parlament name in its header so the source parliament is always
visible — important since assessments from multiple bundesländer share
the same Drucksachen-ID space.
"""
empf_config = EMPFEHLUNG_CONFIG.get(assessment.empfehlung.value, {})
parlament_name = ""
if bundesland and bundesland in BUNDESLAENDER:
parlament_name = BUNDESLAENDER[bundesland].parlament_name
html = f"""<!DOCTYPE html>
<html lang="de">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>GWÖ-Antragsprüfung: {_e(assessment.title or "")}</title>
<style>
:root {{
--color-darkgray: {COLORS['darkgray']};
--color-green: {COLORS['green']};
--color-blue: {COLORS['blue']};
--color-lightgray: {COLORS['lightgray']};
--color-orange: {COLORS['orange']};
--color-red: {COLORS['red']};
}}
body {{
font-family: 'Avenir', Arial, sans-serif;
max-width: 800px;
margin: 0 auto;
padding: 1.5rem 2rem;
color: var(--color-darkgray);
line-height: 1.5;
font-size: 10pt;
}}
.header {{
text-align: center;
border-bottom: 2px solid var(--color-blue);
padding-bottom: 0.75rem;
margin-bottom: 1.25rem;
}}
.header img {{
max-width: 150px;
}}
.header-label {{
font-size: 8pt;
letter-spacing: 0.5px;
color: var(--color-blue);
margin-bottom: 0.5rem;
}}
.header-parlament {{
font-size: 9pt;
color: var(--color-blue);
font-weight: bold;
margin-top: 0.4rem;
letter-spacing: 0.3px;
}}
h1 {{
color: var(--color-darkgray);
font-size: 14pt;
margin: 0.75rem 0;
line-height: 1.3;
}}
h2 {{
color: var(--color-blue);
font-size: 11pt;
border-bottom: 1px solid var(--color-lightgray);
padding-bottom: 0.3rem;
margin-top: 1.25rem;
margin-bottom: 0.5rem;
}}
h3 {{
color: var(--color-green);
font-size: 10pt;
margin-top: 0.75rem;
margin-bottom: 0.3rem;
}}
.meta-box {{
background: #f5f5f5;
padding: 0.6rem 0.8rem;
border-radius: 3px;
margin-bottom: 0.75rem;
font-size: 9pt;
}}
.empfehlung-box {{
background: {empf_config.get('color', COLORS['blue'])}15;
border: 1px solid {empf_config.get('color', COLORS['blue'])};
padding: 0.5rem 0.75rem;
text-align: center;
border-radius: 3px;
margin: 0.75rem 0;
}}
.empfehlung-box .symbol {{
font-size: 12pt;
color: {empf_config.get('color', COLORS['blue'])};
font-weight: bold;
display: inline;
margin-right: 0.5rem;
}}
.empfehlung-box .text {{
font-size: 10pt;
display: inline;
}}
.score-bar {{
background: var(--color-lightgray);
height: 12px;
border-radius: 6px;
overflow: hidden;
margin: 0.3rem 0;
}}
.score-bar-fill {{
height: 100%;
}}
.matrix-table {{
width: 100%;
border-collapse: collapse;
margin: 0.5rem 0;
font-size: 8pt;
}}
.matrix-table th, .matrix-table td {{
border: 1px solid var(--color-lightgray);
padding: 0.25rem 0.4rem;
text-align: center;
}}
.matrix-table thead th {{
background: var(--color-blue);
color: white;
font-size: 8pt;
font-weight: normal;
}}
.matrix-table tbody th {{
background: #f5f5f5;
text-align: left;
font-weight: normal;
font-size: 8pt;
}}
/* 5-Klassen-Coloring analog zu v2 matrix_mini (#177): ++ und +
müssen visuell deutlich unterscheidbar sein. */
.matrix-table .rating-pp {{
background: var(--color-green);
color: white;
font-weight: bold;
}}
.matrix-table .rating-p {{
background: #cddaa1; /* heller Grün-Tint */
color: var(--color-darkgray);
}}
.matrix-table .rating-0 {{
background: #f6f6f6;
color: #888;
}}
.matrix-table .rating-n {{
background: #efc9c3; /* heller Rot-Tint */
color: var(--color-darkgray);
}}
.matrix-table .rating-nn {{
background: var(--color-red);
color: white;
font-weight: bold;
}}
/* Backwards-Compat fuer evtl. zwischengespeicherte HTML mit
alten Klassennamen — gleiche Optik wie rating-pp/-nn/-0. */
.matrix-table .positive {{ background: var(--color-green); color: white; font-weight: bold; }}
.matrix-table .negative {{ background: var(--color-red); color: white; font-weight: bold; }}
.matrix-table .neutral {{ background: #f0f0f0; }}
.verbesserung {{
margin: 0.5rem 0;
padding: 0.5rem;
border: 1px solid var(--color-lightgray);
border-radius: 3px;
font-size: 9pt;
}}
.verbesserung .original {{
background: #f9f9f9;
padding: 0.4rem;
margin-bottom: 0.3rem;
}}
.verbesserung .vorschlag {{
background: rgba(136, 158, 51, 0.1);
border-left: 2px solid var(--color-green);
padding: 0.4rem;
}}
.inserted {{
color: var(--color-green);
font-weight: bold;
}}
.deleted {{
color: var(--color-red);
text-decoration: line-through;
}}
.two-columns {{
display: grid;
grid-template-columns: 1fr 1fr;
gap: 0.75rem;
}}
.staerken {{
border-left: 2px solid var(--color-green);
padding-left: 0.5rem;
}}
.schwaechen {{
border-left: 2px solid var(--color-orange);
padding-left: 0.5rem;
}}
ul {{
margin: 0.3rem 0;
padding-left: 1.2rem;
}}
li {{
margin-bottom: 0.2rem;
}}
p {{
margin: 0.4rem 0;
}}
.footer {{
margin-top: 1.5rem;
padding-top: 0.5rem;
border-top: 1px solid var(--color-lightgray);
text-align: center;
color: var(--color-lightgray);
font-size: 7pt;
}}
@media print {{
body {{ max-width: none; }}
}}
</style>
</head>
<body>
<div class="header">
<div class="header-label">GEMEINWOHL-ÖKONOMIE | ANTRAGSBEWERTUNG</div>
<h1>{_e(assessment.title or "")}</h1>
{f'<div class="header-parlament">{_e(parlament_name)}</div>' if parlament_name else ''}
</div>
<div class="meta-box">
<strong>Drucksache:</strong> {_e(assessment.drucksache or "")} &nbsp;|&nbsp;
<strong>Datum:</strong> {_e(assessment.datum or "")} &nbsp;|&nbsp;
<strong>Fraktion(en):</strong> {_e(', '.join(assessment.fraktionen))} &nbsp;|&nbsp;
<strong>GWÖ-Score:</strong> <span style="color: {get_score_color(assessment.gwoe_score)}; font-weight: bold;">{assessment.gwoe_score}/10</span>
</div>
<div class="empfehlung-box">
<span class="symbol">{_e(empf_config.get('symbol', '[?]'))}</span>
<span class="text"><strong>Empfehlung:</strong> {_e(assessment.empfehlung.value)}</span>
</div>
<h2>Der Antrag im Überblick</h2>
<p>{_e(assessment.antrag_zusammenfassung or 'Keine Zusammenfassung verfügbar.')}</p>
{('<ul>' + ''.join(f'<li>{_e(k)}</li>' for k in assessment.antrag_kernpunkte) + '</ul>') if assessment.antrag_kernpunkte else ''}
<h2>GWÖ-Treue</h2>
<p style="font-size: 9pt;"><strong>Score:</strong> <span style="color: {get_score_color(assessment.gwoe_score)};">{assessment.gwoe_score}/10</span></p>
<div class="score-bar">
<div class="score-bar-fill" style="width: {assessment.gwoe_score * 10}%; background: {get_score_color(assessment.gwoe_score)};"></div>
</div>
<p><strong>Begründung:</strong> {_e(assessment.gwoe_begruendung or "")}</p>
<p><strong>Schwerpunkte:</strong> {_e(', '.join(assessment.gwoe_schwerpunkt))}</p>
<h2>Matrix-Zuordnung (Matrix 2.0 für Gemeinden)</h2>
{build_matrix_html(assessment)}
<p style="font-size: 7pt; color: #999;">
<strong>Legende:</strong> ++ stark fördernd, + fördernd, ○ neutral, widersprechend, stark widersprechend
</p>
<h3>Berührte Themenfelder</h3>
<ul>
{''.join(f'<li><strong>{_e(e.field)}:</strong> {_e(e.aspect)} [{get_rating_symbol(e.rating)}]</li>' for e in assessment.gwoe_matrix)}
</ul>
<h2>Programmtreue</h2>
{''.join(f'''
<h3>{_e(s.fraktion)} {' (Antragsteller)' if s.ist_antragsteller else ''}{' (Regierung)' if s.ist_regierung else ''}</h3>
<p><strong>Wahlprogramm:</strong> {s.wahlprogramm.score}/10 — {_e(s.wahlprogramm.begruendung or "")}</p>
<p><strong>Parteiprogramm:</strong> {s.parteiprogramm.score}/10 — {_e(s.parteiprogramm.begruendung or "")}</p>
''' for s in assessment.wahlprogramm_scores)}
<h2>Verbesserungsvorschläge</h2>
{''.join(f'''
<div class="verbesserung">
<div class="original"><strong>Original:</strong><br>{_e(v.original or "")}</div>
<div class="vorschlag"><strong>Vorschlag:</strong><br>{format_redline_html(v.vorschlag)}</div>
<div style="font-style: italic; margin-top: 0.5rem;">{_e(v.begruendung or "")}</div>
</div>
''' for v in assessment.verbesserungen) or '<p>Keine Verbesserungsvorschläge.</p>'}
<h2>Zusammenfassung</h2>
<div class="two-columns">
<div class="staerken">
<h3 style="color: var(--color-green);">Stärken</h3>
<ul>
{''.join(f'<li>{_e(s)}</li>' for s in assessment.staerken) or '<li>(keine)</li>'}
</ul>
</div>
<div class="schwaechen">
<h3 style="color: var(--color-orange);">Schwächen</h3>
<ul>
{''.join(f'<li>{_e(s)}</li>' for s in assessment.schwaechen) or '<li>(keine)</li>'}
</ul>
</div>
</div>
<div class="footer">
<p>Erstellt mit GWÖ-Antragsprüfer v4.1 | Matrix 2.0 für Gemeinden</p>
<p style="color: var(--color-blue);">germany.econgood.org</p>
</div>
</body>
</html>"""
output_path.write_text(html)
async def generate_pdf_report(
assessment: Assessment,
output_path: Path,
bundesland: Optional[str] = None,
) -> None:
"""Generate PDF report using WeasyPrint, then append the original Antrag.
Two-step pipeline:
1. Render the GWÖ-Report HTML and convert to PDF via WeasyPrint
(existing behaviour).
2. If ``assessment.link`` is a fetchable PDF URL, download it via
``httpx`` and append it after a separator page so the resulting
single file contains both the analysis and its source document
(issue #9).
The append step is best-effort: a missing/empty link is silently
skipped, network errors and parse errors fall back to a single
placeholder page so the report itself is always delivered.
``bundesland`` is forwarded to ``generate_html_report`` so the source
parlament name appears in the report header.
"""
# Step 1 — render the report itself
html_path = output_path.with_suffix('.tmp.html')
await generate_html_report(assessment, html_path, bundesland=bundesland)
try:
from weasyprint import HTML
HTML(filename=str(html_path)).write_pdf(str(output_path))
finally:
html_path.unlink(missing_ok=True)
# Step 2 — append the original Antrag (best-effort)
await _append_original_antrag(assessment, output_path)
async def _append_original_antrag(
assessment: Assessment,
report_path: Path,
) -> None:
"""Try to download the original Antrag PDF and append it to ``report_path``.
Failure modes (download error, non-PDF content, parse error) are
handled gracefully: a single placeholder page is appended noting the
issue, so the user always sees that an attempt was made.
"""
import fitz # PyMuPDF
import httpx
link = (assessment.link or "").strip()
if not link or not link.startswith(("http://", "https://")):
# Manual upload / pasted text — nothing to append.
return
download_error: Optional[str] = None
pdf_bytes: Optional[bytes] = None
try:
async with httpx.AsyncClient(
timeout=30,
follow_redirects=True,
headers={"User-Agent": "Mozilla/5.0 GWOE-Antragspruefer"},
) as client:
resp = await client.get(link)
if resp.status_code != 200:
download_error = f"HTTP {resp.status_code}"
elif not resp.content[:5].startswith(b"%PDF-"):
download_error = f"kein PDF (Content-Type: {resp.headers.get('content-type', 'unknown')})"
else:
pdf_bytes = resp.content
except Exception as e:
download_error = f"Download-Fehler: {e}"
try:
report_doc = fitz.open(report_path)
try:
# Always insert a divider page so the user sees what comes next
_insert_divider_page(report_doc, assessment, download_error)
if pdf_bytes is not None:
try:
src_doc = fitz.open(stream=pdf_bytes, filetype="pdf")
try:
report_doc.insert_pdf(src_doc)
finally:
src_doc.close()
except Exception as e:
logger.exception("_append_original_antrag: PDF-Parse-Fehler für %s", assessment.drucksache)
# PyMuPDF refuses to overwrite the source file in non-incremental
# mode — write to a sibling temp file and atomically replace.
tmp_path = report_path.with_suffix(report_path.suffix + ".tmp")
report_doc.save(
str(tmp_path),
deflate=True,
garbage=3,
)
finally:
report_doc.close()
tmp_path.replace(report_path)
except Exception as e:
# Hard failure — leave the original report file untouched.
logger.exception("_append_original_antrag: Konnte Report nicht erweitern für %s", assessment.drucksache)
def _insert_divider_page(
report_doc, # fitz.Document
assessment: Assessment,
download_error: Optional[str],
) -> None:
"""Append a single A4 separator page that introduces the original Antrag.
Uses PyMuPDF's text drawing API directly so we don't need a second
WeasyPrint round-trip just for one page.
"""
page = report_doc.new_page(width=595, height=842) # A4
margin_left = 60
y = 200
# Title
page.insert_text(
(margin_left, y),
"Original-Antrag",
fontsize=24,
fontname="helv",
color=(0 / 255, 157 / 255, 165 / 255), # var(--color-blue)
)
y += 38
# Drucksache
page.insert_text(
(margin_left, y),
f"Drucksache {assessment.drucksache}",
fontsize=14,
fontname="helv",
color=(0.35, 0.35, 0.35),
)
y += 22
# Title (truncated to ~75 chars to fit one line)
title = assessment.title or ""
if len(title) > 75:
title = title[:72] + ""
page.insert_text(
(margin_left, y),
title,
fontsize=11,
fontname="helv",
color=(0.35, 0.35, 0.35),
)
y += 40
if download_error:
page.insert_text(
(margin_left, y),
"⚠ Original-PDF konnte nicht angehängt werden.",
fontsize=11,
fontname="helv",
color=(0.82, 0.0, 0.0),
)
y += 18
page.insert_text(
(margin_left, y),
f"Grund: {download_error}",
fontsize=10,
fontname="helv",
color=(0.5, 0.5, 0.5),
)
y += 18
if assessment.link:
page.insert_text(
(margin_left, y),
f"Quelle: {assessment.link[:90]}",
fontsize=9,
fontname="helv",
color=(0.5, 0.5, 0.5),
)
else:
page.insert_text(
(margin_left, y),
"Die folgenden Seiten enthalten den unveränderten Originalantrag.",
fontsize=11,
fontname="helv",
color=(0.35, 0.35, 0.35),
)