diff --git a/app/report.py b/app/report.py index b125cdb..87c3ee4 100644 --- a/app/report.py +++ b/app/report.py @@ -444,12 +444,25 @@ async def generate_pdf_report( output_path: Path, bundesland: Optional[str] = None, ) -> None: - """Generate PDF report using WeasyPrint. + """Generate PDF report using WeasyPrint, then append the original Antrag. + + Two-step pipeline: + + 1. Render the GWÖ-Report HTML and convert to PDF via WeasyPrint + (existing behaviour). + 2. If ``assessment.link`` is a fetchable PDF URL, download it via + ``httpx`` and append it after a separator page so the resulting + single file contains both the analysis and its source document + (issue #9). + + The append step is best-effort: a missing/empty link is silently + skipped, network errors and parse errors fall back to a single + placeholder page so the report itself is always delivered. ``bundesland`` is forwarded to ``generate_html_report`` so the source parlament name appears in the report header. """ - # First generate HTML + # Step 1 — render the report itself html_path = output_path.with_suffix('.tmp.html') await generate_html_report(assessment, html_path, bundesland=bundesland) @@ -458,3 +471,156 @@ async def generate_pdf_report( HTML(filename=str(html_path)).write_pdf(str(output_path)) finally: html_path.unlink(missing_ok=True) + + # Step 2 — append the original Antrag (best-effort) + await _append_original_antrag(assessment, output_path) + + +async def _append_original_antrag( + assessment: Assessment, + report_path: Path, +) -> None: + """Try to download the original Antrag PDF and append it to ``report_path``. + + Failure modes (download error, non-PDF content, parse error) are + handled gracefully: a single placeholder page is appended noting the + issue, so the user always sees that an attempt was made. + """ + import fitz # PyMuPDF + import httpx + + link = (assessment.link or "").strip() + if not link or not link.startswith(("http://", "https://")): + # Manual upload / pasted text — nothing to append. + return + + download_error: Optional[str] = None + pdf_bytes: Optional[bytes] = None + try: + async with httpx.AsyncClient( + timeout=30, + follow_redirects=True, + headers={"User-Agent": "Mozilla/5.0 GWOE-Antragspruefer"}, + ) as client: + resp = await client.get(link) + if resp.status_code != 200: + download_error = f"HTTP {resp.status_code}" + elif not resp.content[:5].startswith(b"%PDF-"): + download_error = f"kein PDF (Content-Type: {resp.headers.get('content-type', 'unknown')})" + else: + pdf_bytes = resp.content + except Exception as e: + download_error = f"Download-Fehler: {e}" + + try: + report_doc = fitz.open(report_path) + try: + # Always insert a divider page so the user sees what comes next + _insert_divider_page(report_doc, assessment, download_error) + + if pdf_bytes is not None: + try: + src_doc = fitz.open(stream=pdf_bytes, filetype="pdf") + try: + report_doc.insert_pdf(src_doc) + finally: + src_doc.close() + except Exception as e: + print(f"_append_original_antrag: PDF-Parse-Fehler für {assessment.drucksache}: {e}") + + # PyMuPDF refuses to overwrite the source file in non-incremental + # mode — write to a sibling temp file and atomically replace. + tmp_path = report_path.with_suffix(report_path.suffix + ".tmp") + report_doc.save( + str(tmp_path), + deflate=True, + garbage=3, + ) + finally: + report_doc.close() + tmp_path.replace(report_path) + except Exception as e: + # Hard failure — leave the original report file untouched. + print(f"_append_original_antrag: Konnte Report nicht erweitern für {assessment.drucksache}: {e}") + + +def _insert_divider_page( + report_doc, # fitz.Document + assessment: Assessment, + download_error: Optional[str], +) -> None: + """Append a single A4 separator page that introduces the original Antrag. + + Uses PyMuPDF's text drawing API directly so we don't need a second + WeasyPrint round-trip just for one page. + """ + page = report_doc.new_page(width=595, height=842) # A4 + margin_left = 60 + y = 200 + + # Title + page.insert_text( + (margin_left, y), + "Original-Antrag", + fontsize=24, + fontname="helv", + color=(0 / 255, 157 / 255, 165 / 255), # var(--color-blue) + ) + y += 38 + + # Drucksache + page.insert_text( + (margin_left, y), + f"Drucksache {assessment.drucksache}", + fontsize=14, + fontname="helv", + color=(0.35, 0.35, 0.35), + ) + y += 22 + + # Title (truncated to ~75 chars to fit one line) + title = assessment.title or "" + if len(title) > 75: + title = title[:72] + "…" + page.insert_text( + (margin_left, y), + title, + fontsize=11, + fontname="helv", + color=(0.35, 0.35, 0.35), + ) + y += 40 + + if download_error: + page.insert_text( + (margin_left, y), + "⚠ Original-PDF konnte nicht angehängt werden.", + fontsize=11, + fontname="helv", + color=(0.82, 0.0, 0.0), + ) + y += 18 + page.insert_text( + (margin_left, y), + f"Grund: {download_error}", + fontsize=10, + fontname="helv", + color=(0.5, 0.5, 0.5), + ) + y += 18 + if assessment.link: + page.insert_text( + (margin_left, y), + f"Quelle: {assessment.link[:90]}", + fontsize=9, + fontname="helv", + color=(0.5, 0.5, 0.5), + ) + else: + page.insert_text( + (margin_left, y), + "Die folgenden Seiten enthalten den unveränderten Originalantrag.", + fontsize=11, + fontname="helv", + color=(0.35, 0.35, 0.35), + )