From 80e16df2880988124a23d274bd72e31100ca1845 Mon Sep 17 00:00:00 2001
From: Dotty Dotter <dotty@Mac-mini-von-Dotty.local>
Date: Tue, 7 Apr 2026 23:15:05 +0200
Subject: [PATCH] =?UTF-8?q?Append=20original=20Antrag-PDF=20to=20GW=C3=96-?=
 =?UTF-8?q?Report=20(#9)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Extends generate_pdf_report() with a best-effort second stage that
appends the original Antrag PDF to the freshly rendered GWÖ-Report so
the analysis and its source document live in the same file.

Pipeline
1. WeasyPrint renders the report PDF as before.
2. _append_original_antrag() then:
   - Skips silently if assessment.link is empty or non-HTTP (manual
     uploads / pasted text leave nothing to fetch).
   - Downloads the original PDF via httpx (30s timeout, follow redirects,
     custom user agent).
   - Validates the response is actually a PDF (Content-Length not relied
     on; the magic bytes %PDF- are checked).
   - Adds a single A4 separator page that says "Original-Antrag",
     repeats the Drucksachen-ID and title, and either confirms the
     append or shows the failure reason (HTTP code, network error,
     parse error) plus the source URL.
   - Appends the downloaded PDF via PyMuPDF doc.insert_pdf().
   - Saves to a sibling .tmp file and atomically replaces the original
     (PyMuPDF refuses non-incremental save into the same file).

Edge cases handled
- No link / pasted-text upload → no append, no divider, original report
  unchanged.
- Download error / 404 / non-PDF response → divider page with explicit
  error message and source URL, report still ships.
- PDF parse error → divider page without appended content, error logged.
- Hard failure during save → fall back to the original WeasyPrint PDF.

Verified live in production container against drucksache 8/6645
(Untrending Frauenhass, BÜNDNIS 90/DIE GRÜNEN LSA):
- Report 4 pages + 1 divider + 3 pages original = 8 pages total
- Divider correctly placed at index 4
- Page 5 starts with "(Ausgegeben am 24.02.2026) … Drucksache 8/6645 …
  Antrag — Fraktion BÜNDNIS 90/DIE GRÜNEN — Untrending Frauenhass …"
- Negative test with a synthetic 404 link: 5 pages total, divider at
  index 4 with "Original-PDF konnte nicht angehängt werden. Grund: HTTP
  404".

Resolves #9.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 app/report.py | 170 +++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 168 insertions(+), 2 deletions(-)

diff --git a/app/report.py b/app/report.py
index b125cdb..87c3ee4 100644
--- a/app/report.py
+++ b/app/report.py
@@ -444,12 +444,25 @@ async def generate_pdf_report(
     output_path: Path,
     bundesland: Optional[str] = None,
 ) -> None:
-    """Generate PDF report using WeasyPrint.
+    """Generate PDF report using WeasyPrint, then append the original Antrag.
+
+    Two-step pipeline:
+
+    1. Render the GWÖ-Report HTML and convert to PDF via WeasyPrint
+       (existing behaviour).
+    2. If ``assessment.link`` is a fetchable PDF URL, download it via
+       ``httpx`` and append it after a separator page so the resulting
+       single file contains both the analysis and its source document
+       (issue #9).
+
+    The append step is best-effort: a missing/empty link is silently
+    skipped, network errors and parse errors fall back to a single
+    placeholder page so the report itself is always delivered.
 
     ``bundesland`` is forwarded to ``generate_html_report`` so the source
     parlament name appears in the report header.
     """
-    # First generate HTML
+    # Step 1 — render the report itself
     html_path = output_path.with_suffix('.tmp.html')
     await generate_html_report(assessment, html_path, bundesland=bundesland)
 
@@ -458,3 +471,156 @@ async def generate_pdf_report(
         HTML(filename=str(html_path)).write_pdf(str(output_path))
     finally:
         html_path.unlink(missing_ok=True)
+
+    # Step 2 — append the original Antrag (best-effort)
+    await _append_original_antrag(assessment, output_path)
+
+
+async def _append_original_antrag(
+    assessment: Assessment,
+    report_path: Path,
+) -> None:
+    """Try to download the original Antrag PDF and append it to ``report_path``.
+
+    Failure modes (download error, non-PDF content, parse error) are
+    handled gracefully: a single placeholder page is appended noting the
+    issue, so the user always sees that an attempt was made.
+    """
+    import fitz  # PyMuPDF
+    import httpx
+
+    link = (assessment.link or "").strip()
+    if not link or not link.startswith(("http://", "https://")):
+        # Manual upload / pasted text — nothing to append.
+        return
+
+    download_error: Optional[str] = None
+    pdf_bytes: Optional[bytes] = None
+    try:
+        async with httpx.AsyncClient(
+            timeout=30,
+            follow_redirects=True,
+            headers={"User-Agent": "Mozilla/5.0 GWOE-Antragspruefer"},
+        ) as client:
+            resp = await client.get(link)
+        if resp.status_code != 200:
+            download_error = f"HTTP {resp.status_code}"
+        elif not resp.content[:5].startswith(b"%PDF-"):
+            download_error = f"kein PDF (Content-Type: {resp.headers.get('content-type', 'unknown')})"
+        else:
+            pdf_bytes = resp.content
+    except Exception as e:
+        download_error = f"Download-Fehler: {e}"
+
+    try:
+        report_doc = fitz.open(report_path)
+        try:
+            # Always insert a divider page so the user sees what comes next
+            _insert_divider_page(report_doc, assessment, download_error)
+
+            if pdf_bytes is not None:
+                try:
+                    src_doc = fitz.open(stream=pdf_bytes, filetype="pdf")
+                    try:
+                        report_doc.insert_pdf(src_doc)
+                    finally:
+                        src_doc.close()
+                except Exception as e:
+                    print(f"_append_original_antrag: PDF-Parse-Fehler für {assessment.drucksache}: {e}")
+
+            # PyMuPDF refuses to overwrite the source file in non-incremental
+            # mode — write to a sibling temp file and atomically replace.
+            tmp_path = report_path.with_suffix(report_path.suffix + ".tmp")
+            report_doc.save(
+                str(tmp_path),
+                deflate=True,
+                garbage=3,
+            )
+        finally:
+            report_doc.close()
+        tmp_path.replace(report_path)
+    except Exception as e:
+        # Hard failure — leave the original report file untouched.
+        print(f"_append_original_antrag: Konnte Report nicht erweitern für {assessment.drucksache}: {e}")
+
+
+def _insert_divider_page(
+    report_doc,  # fitz.Document
+    assessment: Assessment,
+    download_error: Optional[str],
+) -> None:
+    """Append a single A4 separator page that introduces the original Antrag.
+
+    Uses PyMuPDF's text drawing API directly so we don't need a second
+    WeasyPrint round-trip just for one page.
+    """
+    page = report_doc.new_page(width=595, height=842)  # A4
+    margin_left = 60
+    y = 200
+
+    # Title
+    page.insert_text(
+        (margin_left, y),
+        "Original-Antrag",
+        fontsize=24,
+        fontname="helv",
+        color=(0 / 255, 157 / 255, 165 / 255),  # var(--color-blue)
+    )
+    y += 38
+
+    # Drucksache
+    page.insert_text(
+        (margin_left, y),
+        f"Drucksache {assessment.drucksache}",
+        fontsize=14,
+        fontname="helv",
+        color=(0.35, 0.35, 0.35),
+    )
+    y += 22
+
+    # Title (truncated to ~75 chars to fit one line)
+    title = assessment.title or ""
+    if len(title) > 75:
+        title = title[:72] + "…"
+    page.insert_text(
+        (margin_left, y),
+        title,
+        fontsize=11,
+        fontname="helv",
+        color=(0.35, 0.35, 0.35),
+    )
+    y += 40
+
+    if download_error:
+        page.insert_text(
+            (margin_left, y),
+            "⚠ Original-PDF konnte nicht angehängt werden.",
+            fontsize=11,
+            fontname="helv",
+            color=(0.82, 0.0, 0.0),
+        )
+        y += 18
+        page.insert_text(
+            (margin_left, y),
+            f"Grund: {download_error}",
+            fontsize=10,
+            fontname="helv",
+            color=(0.5, 0.5, 0.5),
+        )
+        y += 18
+        if assessment.link:
+            page.insert_text(
+                (margin_left, y),
+                f"Quelle: {assessment.link[:90]}",
+                fontsize=9,
+                fontname="helv",
+                color=(0.5, 0.5, 0.5),
+            )
+    else:
+        page.insert_text(
+            (margin_left, y),
+            "Die folgenden Seiten enthalten den unveränderten Originalantrag.",
+            fontsize=11,
+            fontname="helv",
+            color=(0.35, 0.35, 0.35),
+        )