GWÖ-Antragsprüfung: {_e(assessment.title or "")}

"""Report generation for HTML and PDF output. All LLM-generated fields are HTML-escaped before being interpolated into the report template. WeasyPrint will happily resolve ``

`` or ```` against the container filesystem, so unescaped LLM output is a Local-File-Read primitive — see issue #57 (audit findings #2 and #6). The ``_e`` helper is the single funnel through which all LLM strings must pass on their way into the HTML. """ import logging import subprocess from html import escape as _e from pathlib import Path from typing import Optional from jinja2 import Environment, FileSystemLoader, select_autoescape logger = logging.getLogger(__name__) from .models import Assessment, MATRIX_LABELS, EMPFEHLUNG_CONFIG from .bundeslaender import BUNDESLAENDER # Eigene Jinja-Env fuer PDF-Templates (separat von Starlette templates, # weil report.py auch von Hintergrund-Jobs ohne FastAPI-Request laufen muss). _TEMPLATE_DIR = Path(__file__).parent / "templates" _pdf_jinja = Environment( loader=FileSystemLoader(str(_TEMPLATE_DIR)), autoescape=select_autoescape(["html"]), ) # ECOnGOOD Colors COLORS = { "darkgray": "#5a5a5a", "green": "#889e33", "blue": "#009da5", "lightgray": "#bfbfbf", "orange": "#F7941D", "red": "#d00000", } def get_score_color(score: float) -> str: """Get color for a score value.""" if score >= 7: return COLORS["blue"] if score >= 4: return COLORS["green"] if score >= 2: return "#FFC20E" if score >= 1: return COLORS["orange"] return COLORS["red"] def get_rating_symbol(rating: int) -> str: """Convert numeric rating to symbol — gleiche Logik wie in models.py und v2/components/matrix_mini.html. Skala -5..+5.""" if rating >= 4: return "++" if rating >= 1: return "+" if rating == 0: return "○" if rating <= -4: return "−−" return "−" def get_rating_class(rating: int) -> str: """5-Klassen-Coloring analog zu v2 matrix_mini (m-pp/m-p/m-0/m-n/m-nn) — vorher hatte das PDF nur 3 Klassen (positive/negative/neutral), was zu 'gleichfarbig' für + und ++ führte.""" if rating >= 4: return "rating-pp" if rating >= 1: return "rating-p" if rating == 0: return "rating-0" if rating <= -4: return "rating-nn" return "rating-n" def format_redline_html(text: str) -> str: """Convert redline markup (``**ins**`` / ``~~del~~``) to HTML. Escapes the input first so that any HTML in the LLM output (e.g. ``

``) becomes inert text. The marker regexes still fire because ``**`` and ``~~`` are not HTML special chars and survive escaping unchanged. The inserted ```` tags are the only raw HTML in the result and are produced by us. """ import re text = _e(text or "") # **text** → green bold (inserted) text = re.sub(r'\*\*([^*]+)\*\*', r'\1', text) # ~~text~~ → red strikethrough (deleted) text = re.sub(r'~~([^~]+)~~', r'\1', text) return text def build_matrix_html_v2(assessment: Assessment) -> str: """Render Matrix mit dem v2-Macro (matrix_mini) — gleiche Quelle wie die Web-View. Erste Stufe von #175 Phase 23: PDF nutzt v2-Block- Macros für Konsistenz-by-Design.""" from jinja2 import Environment, FileSystemLoader template_dir = Path(__file__).resolve().parent / "templates" env = Environment(loader=FileSystemLoader(str(template_dir)), autoescape=True) macro_template = env.get_template("v2/components/matrix_mini.html") matrix_dict = {} for e in assessment.gwoe_matrix: matrix_dict[e.field] = {"rating": e.rating, "symbol": ""} # Macro über `module` aufrufen module = macro_template.module return str(module.matrix_mini(matrix_dict)) def build_matrix_html(assessment: Assessment) -> str: """Legacy-Renderer: 5x5-Tabelle für PDF (Stand vor Phase 23). Hauptpfad rendert weiterhin diese Funktion — der v2-Macro-Pfad (build_matrix_html_v2) ist als Folge-Schritt verfügbar, sobald das v2.css-Stylesheet im PDF eingebunden ist. """ rating_map = {e.field: e for e in assessment.gwoe_matrix} rows = ["A", "B", "C", "D", "E"] row_labels = { "A": "Lieferant:innen", "B": "Finanzen", "C": "Führung/Verwaltung", "D": "Bürger:innen", "E": "Gesellschaft/Natur", } html = [''] html.append('') html.append('') for col in range(1, 6): html.append(f'') html.append('') html.append('') for row in rows: html.append(f'') for col in range(1, 6): field = f"{row}{col}" entry = rating_map.get(field) if entry: symbol = get_rating_symbol(entry.rating) css_class = get_rating_class(entry.rating) # entry.aspect comes from the LLM and is interpolated into a # title="..." attribute — escape it so a stray double-quote # cannot break out and inject attributes/handlers. html.append(f'') else: html.append('') html.append('') html.append('

	{col}
{row}: {row_labels[row]}	{symbol}

') return '\n'.join(html) async def generate_html_report( assessment: Assessment, output_path: Path, bundesland: Optional[str] = None, ) -> None: """Generate HTML report. ``bundesland`` is the optional state code (e.g. ``"NRW"``, ``"LSA"``). When set and known in ``BUNDESLAENDER``, the resulting report carries the parlament name in its header so the source parliament is always visible — important since assessments from multiple bundesländer share the same Drucksachen-ID space. """ empf_config = EMPFEHLUNG_CONFIG.get(assessment.empfehlung.value, {}) parlament_name = "" if bundesland and bundesland in BUNDESLAENDER: parlament_name = BUNDESLAENDER[bundesland].parlament_name html = f""" GWÖ-Antragsprüfung: {_e(assessment.title or "")}

Drucksache: {_e(assessment.drucksache or "")} | Datum: {_e(assessment.datum or "")} | Fraktion(en): {_e(', '.join(assessment.fraktionen))} | GWÖ-Score: {assessment.gwoe_score}/10

{_e(empf_config.get('symbol', '[?]'))} Empfehlung: {_e(assessment.empfehlung.value)}

Der Antrag im Überblick

{_e(assessment.antrag_zusammenfassung or 'Keine Zusammenfassung verfügbar.')}

{('

{_e(k)}

') if assessment.antrag_kernpunkte else ''}

GWÖ-Treue

Score: {assessment.gwoe_score}/10

Begründung: {_e(assessment.gwoe_begruendung or "")}

Schwerpunkte: {_e(', '.join(assessment.gwoe_schwerpunkt))}

Matrix-Zuordnung (Matrix 2.0 für Gemeinden)

{build_matrix_html(assessment)}

Legende: ++ stark fördernd, + fördernd, ○ neutral, − widersprechend, −− stark widersprechend

Berührte Themenfelder

{_e(e.field)}: {_e(e.aspect)} [{get_rating_symbol(e.rating)}]

Programmtreue

{''.join(f'''

{_e(s.fraktion)} {' (Antragsteller)' if s.ist_antragsteller else ''}{' (Regierung)' if s.ist_regierung else ''}

Wahlprogramm: {s.wahlprogramm.score}/10 — {_e(s.wahlprogramm.begruendung or "")}

Parteiprogramm: {s.parteiprogramm.score}/10 — {_e(s.parteiprogramm.begruendung or "")}

''' for s in assessment.wahlprogramm_scores)}

Verbesserungsvorschläge

{''.join(f'''

Original:
{_e(v.original or "")}

Vorschlag:
{format_redline_html(v.vorschlag)}

{_e(v.begruendung or "")}

''' for v in assessment.verbesserungen) or '

Keine Verbesserungsvorschläge.

Zusammenfassung

Stärken

{_e(s)}
(keine)

Schwächen

{_e(s)}
(keine)

""" output_path.write_text(html) async def generate_html_report_v3( assessment: Assessment, output_path: Path, bundesland: Optional[str] = None, ) -> None: """Render Antrags-PDF im neuen v3-Layout (single column, A4 portrait). Reuses die v3-Layout-Logik (Score-Hero, Matrix mit Achsen-Labels, Programm-Treue, Verbesserungen) und ergaenzt sie um die im PDF notwendigen Adaptionen: - Kein interaktiver Matrix-Klick → "Schwerpunkte erklaert"-Sektion listet die Top-3 positiven und Top-3 negativen Felder mit ihren LLM-generierten label/aspect-Texten unter der Matrix. - Plenum-Votes werden best-effort geladen, inkl. Konsistenz-Hinweis (Mehrheit deckt sich / gegen GWOE-Empfehlung). - Online-Elemente (Share, Vote-treffend, Kommentare, News, Modals) sind im Template gar nicht erst angelegt. Template: app/templates/v3/pdf/antrag_pdf.html """ matrix_lookup = {e.field: {"rating": e.rating} for e in assessment.gwoe_matrix} # Schwerpunkt-Felder mit Erklaerung: Top + Bottom Ratings. sorted_matrix = sorted( assessment.gwoe_matrix, key=lambda e: e.rating, reverse=True ) matrix_top = [ {"field": e.field, "label": e.label, "aspect": e.aspect, "rating": e.rating} for e in sorted_matrix if e.rating > 0 ][:4] matrix_bottom = [ {"field": e.field, "label": e.label, "aspect": e.aspect, "rating": e.rating} for e in sorted(sorted_matrix, key=lambda e: e.rating) if e.rating < 0 ][:4] # Score-Color (gleich wie Scorecard) s = assessment.gwoe_score if s >= 8: score_color = "#1a7f37" elif s >= 5: score_color = "#bf6c10" else: score_color = "#9a2a2a" parlament_name = "" if bundesland and bundesland in BUNDESLAENDER: parlament_name = BUNDESLAENDER[bundesland].parlament_name # Plenum-Votes best-effort (Hintergrund-Job kann ohne DB-Pfad laufen, # in dem Fall einfach keine Votes anzeigen). plenum_votes: list[dict] = [] konsistenz_state: Optional[str] = None konsistenz_decisive: Optional[str] = None try: from .database import get_plenum_votes plenum_votes = await get_plenum_votes( bundesland or "NRW", assessment.drucksache, ) if plenum_votes: from .marker import consistency_state, decisive_outcome konsistenz_state = consistency_state( assessment.empfehlung.value, plenum_votes, ) konsistenz_decisive = decisive_outcome(plenum_votes) except Exception as exc: logger.warning( "Plenum-Votes fuer PDF nicht ladbar (drucksache=%s): %s", assessment.drucksache, exc, ) # Pre-compute Heuchelei-/Opportunismus-Marker pro Fraktion. # Jinja-Globals sind im Web ``heuchelei_score`` und ``opportunismus_score``; # sie erwarten Dict-Listen (fraktions_scores aus _row_to_detail). Das # Pydantic-Assessment.wahlprogramm_scores hat dieselben Daten, aber als # Pydantic-Objekte. Wir mappen einmal um und rechnen die Marker für # alle abstimmenden Fraktionen, damit das Template nur noch Lookup # statt Logik macht. fraktions_scores_dict = [] for fs in (assessment.wahlprogramm_scores or []): fraktions_scores_dict.append({ "fraktion": fs.fraktion, "wahlprogramm": {"score": fs.wahlprogramm.score if fs.wahlprogramm else None}, }) from .marker import heuchelei_score as _h, opportunismus_score as _o heuchelei_by_fraktion: dict[str, float] = {} opportunismus_by_fraktion: dict[str, float] = {} if plenum_votes and fraktions_scores_dict: seen: set[str] = set() for v in plenum_votes: for f in (v.get("fraktionen_nein") or []): if f in seen: continue hv = _h(f, fraktions_scores_dict) if hv is not None: heuchelei_by_fraktion[f] = hv for f in (v.get("fraktionen_ja") or []): if f in seen: continue ov = _o(f, fraktions_scores_dict) if ov is not None: opportunismus_by_fraktion[f] = ov template = _pdf_jinja.get_template("v3/pdf/antrag_pdf.html") html = template.render( assessment=assessment, matrix_lookup=matrix_lookup, matrix_top=matrix_top, matrix_bottom=matrix_bottom, score_color=score_color, parlament_name=parlament_name, bundesland=bundesland or "", plenum_votes=plenum_votes, konsistenz_state=konsistenz_state, konsistenz_decisive=konsistenz_decisive, heuchelei_by_fraktion=heuchelei_by_fraktion, opportunismus_by_fraktion=opportunismus_by_fraktion, ) output_path.write_text(html) async def generate_pdf_report( assessment: Assessment, output_path: Path, bundesland: Optional[str] = None, ) -> None: """Generate PDF report using WeasyPrint, then append the original Antrag. Two-step pipeline: 1. Render the GWÖ-Report HTML and convert to PDF via WeasyPrint (existing behaviour). 2. If ``assessment.link`` is a fetchable PDF URL, download it via ``httpx`` and append it after a separator page so the resulting single file contains both the analysis and its source document (issue #9). The append step is best-effort: a missing/empty link is silently skipped, network errors and parse errors fall back to a single placeholder page so the report itself is always delivered. ``bundesland`` is forwarded to ``generate_html_report`` so the source parlament name appears in the report header. """ # Step 1 — render the report itself, neues v3-Layout (single column, # Score-Hero, Matrix mit Achsen-Labels, Schwerpunkte-erklaert). html_path = output_path.with_suffix('.tmp.html') await generate_html_report_v3(assessment, html_path, bundesland=bundesland) try: from weasyprint import HTML HTML(filename=str(html_path)).write_pdf(str(output_path)) finally: html_path.unlink(missing_ok=True) # Step 2 — append the original Antrag (best-effort) await _append_original_antrag(assessment, output_path) async def _append_original_antrag( assessment: Assessment, report_path: Path, ) -> None: """Try to download the original Antrag PDF and append it to ``report_path``. Failure modes (download error, non-PDF content, parse error) are handled gracefully: a single placeholder page is appended noting the issue, so the user always sees that an attempt was made. """ import fitz # PyMuPDF import httpx link = (assessment.link or "").strip() if not link or not link.startswith(("http://", "https://")): # Manual upload / pasted text — nothing to append. return download_error: Optional[str] = None pdf_bytes: Optional[bytes] = None try: async with httpx.AsyncClient( timeout=30, follow_redirects=True, headers={"User-Agent": "Mozilla/5.0 GWOE-Antragspruefer"}, ) as client: resp = await client.get(link) if resp.status_code != 200: download_error = f"HTTP {resp.status_code}" elif not resp.content[:5].startswith(b"%PDF-"): download_error = f"kein PDF (Content-Type: {resp.headers.get('content-type', 'unknown')})" else: pdf_bytes = resp.content except Exception as e: download_error = f"Download-Fehler: {e}" try: report_doc = fitz.open(report_path) try: # Always insert a divider page so the user sees what comes next _insert_divider_page(report_doc, assessment, download_error) if pdf_bytes is not None: try: src_doc = fitz.open(stream=pdf_bytes, filetype="pdf") try: report_doc.insert_pdf(src_doc) finally: src_doc.close() except Exception as e: logger.exception("_append_original_antrag: PDF-Parse-Fehler für %s", assessment.drucksache) # PyMuPDF refuses to overwrite the source file in non-incremental # mode — write to a sibling temp file and atomically replace. tmp_path = report_path.with_suffix(report_path.suffix + ".tmp") report_doc.save( str(tmp_path), deflate=True, garbage=3, ) finally: report_doc.close() tmp_path.replace(report_path) except Exception as e: # Hard failure — leave the original report file untouched. logger.exception("_append_original_antrag: Konnte Report nicht erweitern für %s", assessment.drucksache) def _insert_divider_page( report_doc, # fitz.Document assessment: Assessment, download_error: Optional[str], ) -> None: """Append a single A4 separator page that introduces the original Antrag. Uses PyMuPDF's text drawing API directly so we don't need a second WeasyPrint round-trip just for one page. """ page = report_doc.new_page(width=595, height=842) # A4 margin_left = 60 y = 200 # Title page.insert_text( (margin_left, y), "Original-Antrag", fontsize=24, fontname="helv", color=(0 / 255, 157 / 255, 165 / 255), # var(--color-blue) ) y += 38 # Drucksache page.insert_text( (margin_left, y), f"Drucksache {assessment.drucksache}", fontsize=14, fontname="helv", color=(0.35, 0.35, 0.35), ) y += 22 # Title (truncated to ~75 chars to fit one line) title = assessment.title or "" if len(title) > 75: title = title[:72] + "…" page.insert_text( (margin_left, y), title, fontsize=11, fontname="helv", color=(0.35, 0.35, 0.35), ) y += 40 if download_error: page.insert_text( (margin_left, y), "⚠ Original-PDF konnte nicht angehängt werden.", fontsize=11, fontname="helv", color=(0.82, 0.0, 0.0), ) y += 18 page.insert_text( (margin_left, y), f"Grund: {download_error}", fontsize=10, fontname="helv", color=(0.5, 0.5, 0.5), ) y += 18 if assessment.link: page.insert_text( (margin_left, y), f"Quelle: {assessment.link[:90]}", fontsize=9, fontname="helv", color=(0.5, 0.5, 0.5), ) else: page.insert_text( (margin_left, y), "Die folgenden Seiten enthalten den unveränderten Originalantrag.", fontsize=11, fontname="helv", color=(0.35, 0.35, 0.35), )