gwoe-antragspruefer/app/report.py
Dotty Dotter c7eab5a695 feat(pdf): Heuchelei-/Opportunismus-Marker im Vote-Block (#175)
Web-Detail zeigt diese Marker bereits — pro NEIN-Fraktion einen ⚠ wenn
der eigene Wahlprogramm-Score ≥ 7/10 ist (Heuchelei: stimmt gegen die
eigenen Versprechen), pro JA-Fraktion einen ! wenn der Wahlprogramm-
Score < 3/10 (Opportunismus: stimmt zu obwohl Antrag inhaltlich nicht
zum eigenen Programm passt). Im PDF fehlten sie bisher.

Daten-Pfad: report.py rechnet die Marker einmal vor (heuchelei_score /
opportunismus_score aus app/marker.py, gefüttert mit umgemappten
fraktions_scores aus assessment.wahlprogramm_scores) und reicht zwei
Maps fraktion → score ans Template. Template macht nur noch Lookup:
``opportunismus_by_fraktion.get(f)`` neben jeder JA-Fraktion,
``heuchelei_by_fraktion.get(f)`` neben jeder NEIN-Fraktion. Plus
kompakte Legende unter dem Vote-Block, falls überhaupt Marker
vorkommen.

Stimmverhalten und Programm-Treue-Begründungen sind im PDF schon da
(verifiziert bei der Code-Inspektion). Damit ist die "PDF auf Augenhöhe
mit Web-Detail"-Liste aus #175 bis auf News-Match abgehakt; News-Match
explizit out-of-scope nach User-Entscheidung.
2026-05-09 02:21:12 +02:00

830 lines
28 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""Report generation for HTML and PDF output.
All LLM-generated fields are HTML-escaped before being interpolated into
the report template. WeasyPrint will happily resolve ``<img src="file://...">``
or ``<link rel=stylesheet href="file://...">`` against the container
filesystem, so unescaped LLM output is a Local-File-Read primitive — see
issue #57 (audit findings #2 and #6). The ``_e`` helper is the single
funnel through which all LLM strings must pass on their way into the HTML.
"""
import logging
import subprocess
from html import escape as _e
from pathlib import Path
from typing import Optional
from jinja2 import Environment, FileSystemLoader, select_autoescape
logger = logging.getLogger(__name__)
from .models import Assessment, MATRIX_LABELS, EMPFEHLUNG_CONFIG
from .bundeslaender import BUNDESLAENDER
# Eigene Jinja-Env fuer PDF-Templates (separat von Starlette templates,
# weil report.py auch von Hintergrund-Jobs ohne FastAPI-Request laufen muss).
_TEMPLATE_DIR = Path(__file__).parent / "templates"
_pdf_jinja = Environment(
loader=FileSystemLoader(str(_TEMPLATE_DIR)),
autoescape=select_autoescape(["html"]),
)
# ECOnGOOD Colors
COLORS = {
"darkgray": "#5a5a5a",
"green": "#889e33",
"blue": "#009da5",
"lightgray": "#bfbfbf",
"orange": "#F7941D",
"red": "#d00000",
}
def get_score_color(score: float) -> str:
"""Get color for a score value."""
if score >= 7:
return COLORS["blue"]
if score >= 4:
return COLORS["green"]
if score >= 2:
return "#FFC20E"
if score >= 1:
return COLORS["orange"]
return COLORS["red"]
def get_rating_symbol(rating: int) -> str:
"""Convert numeric rating to symbol — gleiche Logik wie in models.py
und v2/components/matrix_mini.html. Skala -5..+5."""
if rating >= 4:
return "++"
if rating >= 1:
return "+"
if rating == 0:
return ""
if rating <= -4:
return ""
return ""
def get_rating_class(rating: int) -> str:
"""5-Klassen-Coloring analog zu v2 matrix_mini (m-pp/m-p/m-0/m-n/m-nn)
— vorher hatte das PDF nur 3 Klassen (positive/negative/neutral),
was zu 'gleichfarbig' für + und ++ führte."""
if rating >= 4:
return "rating-pp"
if rating >= 1:
return "rating-p"
if rating == 0:
return "rating-0"
if rating <= -4:
return "rating-nn"
return "rating-n"
def format_redline_html(text: str) -> str:
"""Convert redline markup (``**ins**`` / ``~~del~~``) to HTML.
Escapes the input first so that any HTML in the LLM output (e.g.
``<img src="file:///etc/passwd">``) becomes inert text. The marker
regexes still fire because ``**`` and ``~~`` are not HTML special
chars and survive escaping unchanged. The inserted ``<span>`` tags
are the only raw HTML in the result and are produced by us.
"""
import re
text = _e(text or "")
# **text** → green bold (inserted)
text = re.sub(r'\*\*([^*]+)\*\*', r'<span class="inserted">\1</span>', text)
# ~~text~~ → red strikethrough (deleted)
text = re.sub(r'~~([^~]+)~~', r'<span class="deleted">\1</span>', text)
return text
def build_matrix_html_v2(assessment: Assessment) -> str:
"""Render Matrix mit dem v2-Macro (matrix_mini) — gleiche Quelle wie
die Web-View. Erste Stufe von #175 Phase 23: PDF nutzt v2-Block-
Macros für Konsistenz-by-Design."""
from jinja2 import Environment, FileSystemLoader
template_dir = Path(__file__).resolve().parent / "templates"
env = Environment(loader=FileSystemLoader(str(template_dir)),
autoescape=True)
macro_template = env.get_template("v2/components/matrix_mini.html")
matrix_dict = {}
for e in assessment.gwoe_matrix:
matrix_dict[e.field] = {"rating": e.rating, "symbol": ""}
# Macro über `module` aufrufen
module = macro_template.module
return str(module.matrix_mini(matrix_dict))
def build_matrix_html(assessment: Assessment) -> str:
"""Legacy-Renderer: 5x5-Tabelle für PDF (Stand vor Phase 23).
Hauptpfad rendert weiterhin diese Funktion — der v2-Macro-Pfad
(build_matrix_html_v2) ist als Folge-Schritt verfügbar, sobald
das v2.css-Stylesheet im PDF eingebunden ist.
"""
rating_map = {e.field: e for e in assessment.gwoe_matrix}
rows = ["A", "B", "C", "D", "E"]
row_labels = {
"A": "Lieferant:innen",
"B": "Finanzen",
"C": "Führung/Verwaltung",
"D": "Bürger:innen",
"E": "Gesellschaft/Natur",
}
html = ['<table class="matrix-table">']
html.append('<thead><tr>')
html.append('<th></th>')
for col in range(1, 6):
html.append(f'<th>{col}</th>')
html.append('</tr></thead>')
html.append('<tbody>')
for row in rows:
html.append(f'<tr><th>{row}: {row_labels[row]}</th>')
for col in range(1, 6):
field = f"{row}{col}"
entry = rating_map.get(field)
if entry:
symbol = get_rating_symbol(entry.rating)
css_class = get_rating_class(entry.rating)
# entry.aspect comes from the LLM and is interpolated into a
# title="..." attribute — escape it so a stray double-quote
# cannot break out and inject attributes/handlers.
html.append(f'<td class="{css_class}" title="{_e(entry.aspect)}">{symbol}</td>')
else:
html.append('<td class="rating-0"></td>')
html.append('</tr>')
html.append('</tbody></table>')
return '\n'.join(html)
async def generate_html_report(
assessment: Assessment,
output_path: Path,
bundesland: Optional[str] = None,
) -> None:
"""Generate HTML report.
``bundesland`` is the optional state code (e.g. ``"NRW"``, ``"LSA"``).
When set and known in ``BUNDESLAENDER``, the resulting report carries
the parlament name in its header so the source parliament is always
visible — important since assessments from multiple bundesländer share
the same Drucksachen-ID space.
"""
empf_config = EMPFEHLUNG_CONFIG.get(assessment.empfehlung.value, {})
parlament_name = ""
if bundesland and bundesland in BUNDESLAENDER:
parlament_name = BUNDESLAENDER[bundesland].parlament_name
html = f"""<!DOCTYPE html>
<html lang="de">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>GWÖ-Antragsprüfung: {_e(assessment.title or "")}</title>
<style>
:root {{
--color-darkgray: {COLORS['darkgray']};
--color-green: {COLORS['green']};
--color-blue: {COLORS['blue']};
--color-lightgray: {COLORS['lightgray']};
--color-orange: {COLORS['orange']};
--color-red: {COLORS['red']};
}}
body {{
font-family: 'Avenir', Arial, sans-serif;
max-width: 800px;
margin: 0 auto;
padding: 1.5rem 2rem;
color: var(--color-darkgray);
line-height: 1.5;
font-size: 10pt;
}}
.header {{
text-align: center;
border-bottom: 2px solid var(--color-blue);
padding-bottom: 0.75rem;
margin-bottom: 1.25rem;
}}
.header img {{
max-width: 150px;
}}
.header-label {{
font-size: 8pt;
letter-spacing: 0.5px;
color: var(--color-blue);
margin-bottom: 0.5rem;
}}
.header-parlament {{
font-size: 9pt;
color: var(--color-blue);
font-weight: bold;
margin-top: 0.4rem;
letter-spacing: 0.3px;
}}
h1 {{
color: var(--color-darkgray);
font-size: 14pt;
margin: 0.75rem 0;
line-height: 1.3;
}}
h2 {{
color: var(--color-blue);
font-size: 11pt;
border-bottom: 1px solid var(--color-lightgray);
padding-bottom: 0.3rem;
margin-top: 1.25rem;
margin-bottom: 0.5rem;
}}
h3 {{
color: var(--color-green);
font-size: 10pt;
margin-top: 0.75rem;
margin-bottom: 0.3rem;
}}
.meta-box {{
background: #f5f5f5;
padding: 0.6rem 0.8rem;
border-radius: 3px;
margin-bottom: 0.75rem;
font-size: 9pt;
}}
.empfehlung-box {{
background: {empf_config.get('color', COLORS['blue'])}15;
border: 1px solid {empf_config.get('color', COLORS['blue'])};
padding: 0.5rem 0.75rem;
text-align: center;
border-radius: 3px;
margin: 0.75rem 0;
}}
.empfehlung-box .symbol {{
font-size: 12pt;
color: {empf_config.get('color', COLORS['blue'])};
font-weight: bold;
display: inline;
margin-right: 0.5rem;
}}
.empfehlung-box .text {{
font-size: 10pt;
display: inline;
}}
.score-bar {{
background: var(--color-lightgray);
height: 12px;
border-radius: 6px;
overflow: hidden;
margin: 0.3rem 0;
}}
.score-bar-fill {{
height: 100%;
}}
.matrix-table {{
width: 100%;
border-collapse: collapse;
margin: 0.5rem 0;
font-size: 8pt;
}}
.matrix-table th, .matrix-table td {{
border: 1px solid var(--color-lightgray);
padding: 0.25rem 0.4rem;
text-align: center;
}}
.matrix-table thead th {{
background: var(--color-blue);
color: white;
font-size: 8pt;
font-weight: normal;
}}
.matrix-table tbody th {{
background: #f5f5f5;
text-align: left;
font-weight: normal;
font-size: 8pt;
}}
/* 5-Klassen-Coloring analog zu v2 matrix_mini (#177): ++ und +
müssen visuell deutlich unterscheidbar sein. */
.matrix-table .rating-pp {{
background: var(--color-green);
color: white;
font-weight: bold;
}}
.matrix-table .rating-p {{
background: #cddaa1; /* heller Grün-Tint */
color: var(--color-darkgray);
}}
.matrix-table .rating-0 {{
background: #f6f6f6;
color: #888;
}}
.matrix-table .rating-n {{
background: #efc9c3; /* heller Rot-Tint */
color: var(--color-darkgray);
}}
.matrix-table .rating-nn {{
background: var(--color-red);
color: white;
font-weight: bold;
}}
/* Backwards-Compat fuer evtl. zwischengespeicherte HTML mit
alten Klassennamen — gleiche Optik wie rating-pp/-nn/-0. */
.matrix-table .positive {{ background: var(--color-green); color: white; font-weight: bold; }}
.matrix-table .negative {{ background: var(--color-red); color: white; font-weight: bold; }}
.matrix-table .neutral {{ background: #f0f0f0; }}
.verbesserung {{
margin: 0.5rem 0;
padding: 0.5rem;
border: 1px solid var(--color-lightgray);
border-radius: 3px;
font-size: 9pt;
}}
.verbesserung .original {{
background: #f9f9f9;
padding: 0.4rem;
margin-bottom: 0.3rem;
}}
.verbesserung .vorschlag {{
background: rgba(136, 158, 51, 0.1);
border-left: 2px solid var(--color-green);
padding: 0.4rem;
}}
.inserted {{
color: var(--color-green);
font-weight: bold;
}}
.deleted {{
color: var(--color-red);
text-decoration: line-through;
}}
.two-columns {{
display: grid;
grid-template-columns: 1fr 1fr;
gap: 0.75rem;
}}
.staerken {{
border-left: 2px solid var(--color-green);
padding-left: 0.5rem;
}}
.schwaechen {{
border-left: 2px solid var(--color-orange);
padding-left: 0.5rem;
}}
ul {{
margin: 0.3rem 0;
padding-left: 1.2rem;
}}
li {{
margin-bottom: 0.2rem;
}}
p {{
margin: 0.4rem 0;
}}
.footer {{
margin-top: 1.5rem;
padding-top: 0.5rem;
border-top: 1px solid var(--color-lightgray);
text-align: center;
color: var(--color-lightgray);
font-size: 7pt;
}}
@media print {{
body {{ max-width: none; }}
}}
</style>
</head>
<body>
<div class="header">
<div class="header-label">GEMEINWOHL-ÖKONOMIE | ANTRAGSBEWERTUNG</div>
<h1>{_e(assessment.title or "")}</h1>
{f'<div class="header-parlament">{_e(parlament_name)}</div>' if parlament_name else ''}
</div>
<div class="meta-box">
<strong>Drucksache:</strong> {_e(assessment.drucksache or "")} &nbsp;|&nbsp;
<strong>Datum:</strong> {_e(assessment.datum or "")} &nbsp;|&nbsp;
<strong>Fraktion(en):</strong> {_e(', '.join(assessment.fraktionen))} &nbsp;|&nbsp;
<strong>GWÖ-Score:</strong> <span style="color: {get_score_color(assessment.gwoe_score)}; font-weight: bold;">{assessment.gwoe_score}/10</span>
</div>
<div class="empfehlung-box">
<span class="symbol">{_e(empf_config.get('symbol', '[?]'))}</span>
<span class="text"><strong>Empfehlung:</strong> {_e(assessment.empfehlung.value)}</span>
</div>
<h2>Der Antrag im Überblick</h2>
<p>{_e(assessment.antrag_zusammenfassung or 'Keine Zusammenfassung verfügbar.')}</p>
{('<ul>' + ''.join(f'<li>{_e(k)}</li>' for k in assessment.antrag_kernpunkte) + '</ul>') if assessment.antrag_kernpunkte else ''}
<h2>GWÖ-Treue</h2>
<p style="font-size: 9pt;"><strong>Score:</strong> <span style="color: {get_score_color(assessment.gwoe_score)};">{assessment.gwoe_score}/10</span></p>
<div class="score-bar">
<div class="score-bar-fill" style="width: {assessment.gwoe_score * 10}%; background: {get_score_color(assessment.gwoe_score)};"></div>
</div>
<p><strong>Begründung:</strong> {_e(assessment.gwoe_begruendung or "")}</p>
<p><strong>Schwerpunkte:</strong> {_e(', '.join(assessment.gwoe_schwerpunkt))}</p>
<h2>Matrix-Zuordnung (Matrix 2.0 für Gemeinden)</h2>
{build_matrix_html(assessment)}
<p style="font-size: 7pt; color: #999;">
<strong>Legende:</strong> ++ stark fördernd, + fördernd, ○ neutral, widersprechend, stark widersprechend
</p>
<h3>Berührte Themenfelder</h3>
<ul>
{''.join(f'<li><strong>{_e(e.field)}:</strong> {_e(e.aspect)} [{get_rating_symbol(e.rating)}]</li>' for e in assessment.gwoe_matrix)}
</ul>
<h2>Programmtreue</h2>
{''.join(f'''
<h3>{_e(s.fraktion)} {' (Antragsteller)' if s.ist_antragsteller else ''}{' (Regierung)' if s.ist_regierung else ''}</h3>
<p><strong>Wahlprogramm:</strong> {s.wahlprogramm.score}/10 — {_e(s.wahlprogramm.begruendung or "")}</p>
<p><strong>Parteiprogramm:</strong> {s.parteiprogramm.score}/10 — {_e(s.parteiprogramm.begruendung or "")}</p>
''' for s in assessment.wahlprogramm_scores)}
<h2>Verbesserungsvorschläge</h2>
{''.join(f'''
<div class="verbesserung">
<div class="original"><strong>Original:</strong><br>{_e(v.original or "")}</div>
<div class="vorschlag"><strong>Vorschlag:</strong><br>{format_redline_html(v.vorschlag)}</div>
<div style="font-style: italic; margin-top: 0.5rem;">{_e(v.begruendung or "")}</div>
</div>
''' for v in assessment.verbesserungen) or '<p>Keine Verbesserungsvorschläge.</p>'}
<h2>Zusammenfassung</h2>
<div class="two-columns">
<div class="staerken">
<h3 style="color: var(--color-green);">Stärken</h3>
<ul>
{''.join(f'<li>{_e(s)}</li>' for s in assessment.staerken) or '<li>(keine)</li>'}
</ul>
</div>
<div class="schwaechen">
<h3 style="color: var(--color-orange);">Schwächen</h3>
<ul>
{''.join(f'<li>{_e(s)}</li>' for s in assessment.schwaechen) or '<li>(keine)</li>'}
</ul>
</div>
</div>
<div class="footer">
<p>Erstellt mit GWÖ-Antragsprüfer v4.1 | Matrix 2.0 für Gemeinden</p>
<p style="color: var(--color-blue);">germany.econgood.org</p>
</div>
</body>
</html>"""
output_path.write_text(html)
async def generate_html_report_v3(
assessment: Assessment,
output_path: Path,
bundesland: Optional[str] = None,
) -> None:
"""Render Antrags-PDF im neuen v3-Layout (single column, A4 portrait).
Reuses die v3-Layout-Logik (Score-Hero, Matrix mit Achsen-Labels,
Programm-Treue, Verbesserungen) und ergaenzt sie um die im PDF
notwendigen Adaptionen:
- Kein interaktiver Matrix-Klick → "Schwerpunkte erklaert"-Sektion
listet die Top-3 positiven und Top-3 negativen Felder mit ihren
LLM-generierten label/aspect-Texten unter der Matrix.
- Plenum-Votes werden best-effort geladen, inkl. Konsistenz-Hinweis
(Mehrheit deckt sich / gegen GWOE-Empfehlung).
- Online-Elemente (Share, Vote-treffend, Kommentare, News, Modals)
sind im Template gar nicht erst angelegt.
Template: app/templates/v3/pdf/antrag_pdf.html
"""
matrix_lookup = {e.field: {"rating": e.rating} for e in assessment.gwoe_matrix}
# Schwerpunkt-Felder mit Erklaerung: Top + Bottom Ratings.
sorted_matrix = sorted(
assessment.gwoe_matrix, key=lambda e: e.rating, reverse=True
)
matrix_top = [
{"field": e.field, "label": e.label, "aspect": e.aspect, "rating": e.rating}
for e in sorted_matrix if e.rating > 0
][:4]
matrix_bottom = [
{"field": e.field, "label": e.label, "aspect": e.aspect, "rating": e.rating}
for e in sorted(sorted_matrix, key=lambda e: e.rating) if e.rating < 0
][:4]
# Score-Color (gleich wie Scorecard)
s = assessment.gwoe_score
if s >= 8: score_color = "#1a7f37"
elif s >= 5: score_color = "#bf6c10"
else: score_color = "#9a2a2a"
parlament_name = ""
if bundesland and bundesland in BUNDESLAENDER:
parlament_name = BUNDESLAENDER[bundesland].parlament_name
# Plenum-Votes best-effort (Hintergrund-Job kann ohne DB-Pfad laufen,
# in dem Fall einfach keine Votes anzeigen).
plenum_votes: list[dict] = []
konsistenz_state: Optional[str] = None
konsistenz_decisive: Optional[str] = None
try:
from .database import get_plenum_votes
plenum_votes = await get_plenum_votes(
bundesland or "NRW", assessment.drucksache,
)
if plenum_votes:
from .marker import consistency_state, decisive_outcome
konsistenz_state = consistency_state(
assessment.empfehlung.value, plenum_votes,
)
konsistenz_decisive = decisive_outcome(plenum_votes)
except Exception as exc:
logger.warning(
"Plenum-Votes fuer PDF nicht ladbar (drucksache=%s): %s",
assessment.drucksache, exc,
)
# Pre-compute Heuchelei-/Opportunismus-Marker pro Fraktion.
# Jinja-Globals sind im Web ``heuchelei_score`` und ``opportunismus_score``;
# sie erwarten Dict-Listen (fraktions_scores aus _row_to_detail). Das
# Pydantic-Assessment.wahlprogramm_scores hat dieselben Daten, aber als
# Pydantic-Objekte. Wir mappen einmal um und rechnen die Marker für
# alle abstimmenden Fraktionen, damit das Template nur noch Lookup
# statt Logik macht.
fraktions_scores_dict = []
for fs in (assessment.wahlprogramm_scores or []):
fraktions_scores_dict.append({
"fraktion": fs.fraktion,
"wahlprogramm": {"score": fs.wahlprogramm.score if fs.wahlprogramm else None},
})
from .marker import heuchelei_score as _h, opportunismus_score as _o
heuchelei_by_fraktion: dict[str, float] = {}
opportunismus_by_fraktion: dict[str, float] = {}
if plenum_votes and fraktions_scores_dict:
seen: set[str] = set()
for v in plenum_votes:
for f in (v.get("fraktionen_nein") or []):
if f in seen:
continue
hv = _h(f, fraktions_scores_dict)
if hv is not None:
heuchelei_by_fraktion[f] = hv
for f in (v.get("fraktionen_ja") or []):
if f in seen:
continue
ov = _o(f, fraktions_scores_dict)
if ov is not None:
opportunismus_by_fraktion[f] = ov
template = _pdf_jinja.get_template("v3/pdf/antrag_pdf.html")
html = template.render(
assessment=assessment,
matrix_lookup=matrix_lookup,
matrix_top=matrix_top,
matrix_bottom=matrix_bottom,
score_color=score_color,
parlament_name=parlament_name,
bundesland=bundesland or "",
plenum_votes=plenum_votes,
konsistenz_state=konsistenz_state,
konsistenz_decisive=konsistenz_decisive,
heuchelei_by_fraktion=heuchelei_by_fraktion,
opportunismus_by_fraktion=opportunismus_by_fraktion,
)
output_path.write_text(html)
async def generate_pdf_report(
assessment: Assessment,
output_path: Path,
bundesland: Optional[str] = None,
) -> None:
"""Generate PDF report using WeasyPrint, then append the original Antrag.
Two-step pipeline:
1. Render the GWÖ-Report HTML and convert to PDF via WeasyPrint
(existing behaviour).
2. If ``assessment.link`` is a fetchable PDF URL, download it via
``httpx`` and append it after a separator page so the resulting
single file contains both the analysis and its source document
(issue #9).
The append step is best-effort: a missing/empty link is silently
skipped, network errors and parse errors fall back to a single
placeholder page so the report itself is always delivered.
``bundesland`` is forwarded to ``generate_html_report`` so the source
parlament name appears in the report header.
"""
# Step 1 — render the report itself, neues v3-Layout (single column,
# Score-Hero, Matrix mit Achsen-Labels, Schwerpunkte-erklaert).
html_path = output_path.with_suffix('.tmp.html')
await generate_html_report_v3(assessment, html_path, bundesland=bundesland)
try:
from weasyprint import HTML
HTML(filename=str(html_path)).write_pdf(str(output_path))
finally:
html_path.unlink(missing_ok=True)
# Step 2 — append the original Antrag (best-effort)
await _append_original_antrag(assessment, output_path)
async def _append_original_antrag(
assessment: Assessment,
report_path: Path,
) -> None:
"""Try to download the original Antrag PDF and append it to ``report_path``.
Failure modes (download error, non-PDF content, parse error) are
handled gracefully: a single placeholder page is appended noting the
issue, so the user always sees that an attempt was made.
"""
import fitz # PyMuPDF
import httpx
link = (assessment.link or "").strip()
if not link or not link.startswith(("http://", "https://")):
# Manual upload / pasted text — nothing to append.
return
download_error: Optional[str] = None
pdf_bytes: Optional[bytes] = None
try:
async with httpx.AsyncClient(
timeout=30,
follow_redirects=True,
headers={"User-Agent": "Mozilla/5.0 GWOE-Antragspruefer"},
) as client:
resp = await client.get(link)
if resp.status_code != 200:
download_error = f"HTTP {resp.status_code}"
elif not resp.content[:5].startswith(b"%PDF-"):
download_error = f"kein PDF (Content-Type: {resp.headers.get('content-type', 'unknown')})"
else:
pdf_bytes = resp.content
except Exception as e:
download_error = f"Download-Fehler: {e}"
try:
report_doc = fitz.open(report_path)
try:
# Always insert a divider page so the user sees what comes next
_insert_divider_page(report_doc, assessment, download_error)
if pdf_bytes is not None:
try:
src_doc = fitz.open(stream=pdf_bytes, filetype="pdf")
try:
report_doc.insert_pdf(src_doc)
finally:
src_doc.close()
except Exception as e:
logger.exception("_append_original_antrag: PDF-Parse-Fehler für %s", assessment.drucksache)
# PyMuPDF refuses to overwrite the source file in non-incremental
# mode — write to a sibling temp file and atomically replace.
tmp_path = report_path.with_suffix(report_path.suffix + ".tmp")
report_doc.save(
str(tmp_path),
deflate=True,
garbage=3,
)
finally:
report_doc.close()
tmp_path.replace(report_path)
except Exception as e:
# Hard failure — leave the original report file untouched.
logger.exception("_append_original_antrag: Konnte Report nicht erweitern für %s", assessment.drucksache)
def _insert_divider_page(
report_doc, # fitz.Document
assessment: Assessment,
download_error: Optional[str],
) -> None:
"""Append a single A4 separator page that introduces the original Antrag.
Uses PyMuPDF's text drawing API directly so we don't need a second
WeasyPrint round-trip just for one page.
"""
page = report_doc.new_page(width=595, height=842) # A4
margin_left = 60
y = 200
# Title
page.insert_text(
(margin_left, y),
"Original-Antrag",
fontsize=24,
fontname="helv",
color=(0 / 255, 157 / 255, 165 / 255), # var(--color-blue)
)
y += 38
# Drucksache
page.insert_text(
(margin_left, y),
f"Drucksache {assessment.drucksache}",
fontsize=14,
fontname="helv",
color=(0.35, 0.35, 0.35),
)
y += 22
# Title (truncated to ~75 chars to fit one line)
title = assessment.title or ""
if len(title) > 75:
title = title[:72] + ""
page.insert_text(
(margin_left, y),
title,
fontsize=11,
fontname="helv",
color=(0.35, 0.35, 0.35),
)
y += 40
if download_error:
page.insert_text(
(margin_left, y),
"⚠ Original-PDF konnte nicht angehängt werden.",
fontsize=11,
fontname="helv",
color=(0.82, 0.0, 0.0),
)
y += 18
page.insert_text(
(margin_left, y),
f"Grund: {download_error}",
fontsize=10,
fontname="helv",
color=(0.5, 0.5, 0.5),
)
y += 18
if assessment.link:
page.insert_text(
(margin_left, y),
f"Quelle: {assessment.link[:90]}",
fontsize=9,
fontname="helv",
color=(0.5, 0.5, 0.5),
)
else:
page.insert_text(
(margin_left, y),
"Die folgenden Seiten enthalten den unveränderten Originalantrag.",
fontsize=11,
fontname="helv",
color=(0.35, 0.35, 0.35),
)