Append original Antrag-PDF to GWÖ-Report (#9)
Extends generate_pdf_report() with a best-effort second stage that
appends the original Antrag PDF to the freshly rendered GWÖ-Report so
the analysis and its source document live in the same file.
Pipeline
1. WeasyPrint renders the report PDF as before.
2. _append_original_antrag() then:
- Skips silently if assessment.link is empty or non-HTTP (manual
uploads / pasted text leave nothing to fetch).
- Downloads the original PDF via httpx (30s timeout, follow redirects,
custom user agent).
- Validates the response is actually a PDF (Content-Length not relied
on; the magic bytes %PDF- are checked).
- Adds a single A4 separator page that says "Original-Antrag",
repeats the Drucksachen-ID and title, and either confirms the
append or shows the failure reason (HTTP code, network error,
parse error) plus the source URL.
- Appends the downloaded PDF via PyMuPDF doc.insert_pdf().
- Saves to a sibling .tmp file and atomically replaces the original
(PyMuPDF refuses non-incremental save into the same file).
Edge cases handled
- No link / pasted-text upload → no append, no divider, original report
unchanged.
- Download error / 404 / non-PDF response → divider page with explicit
error message and source URL, report still ships.
- PDF parse error → divider page without appended content, error logged.
- Hard failure during save → fall back to the original WeasyPrint PDF.
Verified live in production container against drucksache 8/6645
(Untrending Frauenhass, BÜNDNIS 90/DIE GRÜNEN LSA):
- Report 4 pages + 1 divider + 3 pages original = 8 pages total
- Divider correctly placed at index 4
- Page 5 starts with "(Ausgegeben am 24.02.2026) … Drucksache 8/6645 …
Antrag — Fraktion BÜNDNIS 90/DIE GRÜNEN — Untrending Frauenhass …"
- Negative test with a synthetic 404 link: 5 pages total, divider at
index 4 with "Original-PDF konnte nicht angehängt werden. Grund: HTTP
404".
Resolves #9.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
f1867d463c
commit
80e16df288
170
app/report.py
170
app/report.py
@ -444,12 +444,25 @@ async def generate_pdf_report(
|
||||
output_path: Path,
|
||||
bundesland: Optional[str] = None,
|
||||
) -> None:
|
||||
"""Generate PDF report using WeasyPrint.
|
||||
"""Generate PDF report using WeasyPrint, then append the original Antrag.
|
||||
|
||||
Two-step pipeline:
|
||||
|
||||
1. Render the GWÖ-Report HTML and convert to PDF via WeasyPrint
|
||||
(existing behaviour).
|
||||
2. If ``assessment.link`` is a fetchable PDF URL, download it via
|
||||
``httpx`` and append it after a separator page so the resulting
|
||||
single file contains both the analysis and its source document
|
||||
(issue #9).
|
||||
|
||||
The append step is best-effort: a missing/empty link is silently
|
||||
skipped, network errors and parse errors fall back to a single
|
||||
placeholder page so the report itself is always delivered.
|
||||
|
||||
``bundesland`` is forwarded to ``generate_html_report`` so the source
|
||||
parlament name appears in the report header.
|
||||
"""
|
||||
# First generate HTML
|
||||
# Step 1 — render the report itself
|
||||
html_path = output_path.with_suffix('.tmp.html')
|
||||
await generate_html_report(assessment, html_path, bundesland=bundesland)
|
||||
|
||||
@ -458,3 +471,156 @@ async def generate_pdf_report(
|
||||
HTML(filename=str(html_path)).write_pdf(str(output_path))
|
||||
finally:
|
||||
html_path.unlink(missing_ok=True)
|
||||
|
||||
# Step 2 — append the original Antrag (best-effort)
|
||||
await _append_original_antrag(assessment, output_path)
|
||||
|
||||
|
||||
async def _append_original_antrag(
|
||||
assessment: Assessment,
|
||||
report_path: Path,
|
||||
) -> None:
|
||||
"""Try to download the original Antrag PDF and append it to ``report_path``.
|
||||
|
||||
Failure modes (download error, non-PDF content, parse error) are
|
||||
handled gracefully: a single placeholder page is appended noting the
|
||||
issue, so the user always sees that an attempt was made.
|
||||
"""
|
||||
import fitz # PyMuPDF
|
||||
import httpx
|
||||
|
||||
link = (assessment.link or "").strip()
|
||||
if not link or not link.startswith(("http://", "https://")):
|
||||
# Manual upload / pasted text — nothing to append.
|
||||
return
|
||||
|
||||
download_error: Optional[str] = None
|
||||
pdf_bytes: Optional[bytes] = None
|
||||
try:
|
||||
async with httpx.AsyncClient(
|
||||
timeout=30,
|
||||
follow_redirects=True,
|
||||
headers={"User-Agent": "Mozilla/5.0 GWOE-Antragspruefer"},
|
||||
) as client:
|
||||
resp = await client.get(link)
|
||||
if resp.status_code != 200:
|
||||
download_error = f"HTTP {resp.status_code}"
|
||||
elif not resp.content[:5].startswith(b"%PDF-"):
|
||||
download_error = f"kein PDF (Content-Type: {resp.headers.get('content-type', 'unknown')})"
|
||||
else:
|
||||
pdf_bytes = resp.content
|
||||
except Exception as e:
|
||||
download_error = f"Download-Fehler: {e}"
|
||||
|
||||
try:
|
||||
report_doc = fitz.open(report_path)
|
||||
try:
|
||||
# Always insert a divider page so the user sees what comes next
|
||||
_insert_divider_page(report_doc, assessment, download_error)
|
||||
|
||||
if pdf_bytes is not None:
|
||||
try:
|
||||
src_doc = fitz.open(stream=pdf_bytes, filetype="pdf")
|
||||
try:
|
||||
report_doc.insert_pdf(src_doc)
|
||||
finally:
|
||||
src_doc.close()
|
||||
except Exception as e:
|
||||
print(f"_append_original_antrag: PDF-Parse-Fehler für {assessment.drucksache}: {e}")
|
||||
|
||||
# PyMuPDF refuses to overwrite the source file in non-incremental
|
||||
# mode — write to a sibling temp file and atomically replace.
|
||||
tmp_path = report_path.with_suffix(report_path.suffix + ".tmp")
|
||||
report_doc.save(
|
||||
str(tmp_path),
|
||||
deflate=True,
|
||||
garbage=3,
|
||||
)
|
||||
finally:
|
||||
report_doc.close()
|
||||
tmp_path.replace(report_path)
|
||||
except Exception as e:
|
||||
# Hard failure — leave the original report file untouched.
|
||||
print(f"_append_original_antrag: Konnte Report nicht erweitern für {assessment.drucksache}: {e}")
|
||||
|
||||
|
||||
def _insert_divider_page(
|
||||
report_doc, # fitz.Document
|
||||
assessment: Assessment,
|
||||
download_error: Optional[str],
|
||||
) -> None:
|
||||
"""Append a single A4 separator page that introduces the original Antrag.
|
||||
|
||||
Uses PyMuPDF's text drawing API directly so we don't need a second
|
||||
WeasyPrint round-trip just for one page.
|
||||
"""
|
||||
page = report_doc.new_page(width=595, height=842) # A4
|
||||
margin_left = 60
|
||||
y = 200
|
||||
|
||||
# Title
|
||||
page.insert_text(
|
||||
(margin_left, y),
|
||||
"Original-Antrag",
|
||||
fontsize=24,
|
||||
fontname="helv",
|
||||
color=(0 / 255, 157 / 255, 165 / 255), # var(--color-blue)
|
||||
)
|
||||
y += 38
|
||||
|
||||
# Drucksache
|
||||
page.insert_text(
|
||||
(margin_left, y),
|
||||
f"Drucksache {assessment.drucksache}",
|
||||
fontsize=14,
|
||||
fontname="helv",
|
||||
color=(0.35, 0.35, 0.35),
|
||||
)
|
||||
y += 22
|
||||
|
||||
# Title (truncated to ~75 chars to fit one line)
|
||||
title = assessment.title or ""
|
||||
if len(title) > 75:
|
||||
title = title[:72] + "…"
|
||||
page.insert_text(
|
||||
(margin_left, y),
|
||||
title,
|
||||
fontsize=11,
|
||||
fontname="helv",
|
||||
color=(0.35, 0.35, 0.35),
|
||||
)
|
||||
y += 40
|
||||
|
||||
if download_error:
|
||||
page.insert_text(
|
||||
(margin_left, y),
|
||||
"⚠ Original-PDF konnte nicht angehängt werden.",
|
||||
fontsize=11,
|
||||
fontname="helv",
|
||||
color=(0.82, 0.0, 0.0),
|
||||
)
|
||||
y += 18
|
||||
page.insert_text(
|
||||
(margin_left, y),
|
||||
f"Grund: {download_error}",
|
||||
fontsize=10,
|
||||
fontname="helv",
|
||||
color=(0.5, 0.5, 0.5),
|
||||
)
|
||||
y += 18
|
||||
if assessment.link:
|
||||
page.insert_text(
|
||||
(margin_left, y),
|
||||
f"Quelle: {assessment.link[:90]}",
|
||||
fontsize=9,
|
||||
fontname="helv",
|
||||
color=(0.5, 0.5, 0.5),
|
||||
)
|
||||
else:
|
||||
page.insert_text(
|
||||
(margin_left, y),
|
||||
"Die folgenden Seiten enthalten den unveränderten Originalantrag.",
|
||||
fontsize=11,
|
||||
fontname="helv",
|
||||
color=(0.35, 0.35, 0.35),
|
||||
)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user