Drei akute Befunde aus dem Live-System-Audit (Issue #57): - **#1 HIGH** — Resource Exhaustion via öffentlichem POST: slowapi Limiter (in-memory, IP-key) auf /analyze (10/min), /api/analyze-drucksache (10/min) und /api/programme/index (3/min). Verhindert, dass ein unauthentifizierter Client mit einer Schleife die DashScope-Quota oder die CPU des Containers leerziehen kann. Default-Storage reicht solange wir auf einem einzigen Worker laufen. - **#2 MEDIUM** + **#6 MEDIUM** (selber Root-Cause) — XXE/Local-File-Read via WeasyPrint und Stored XSS via Browser-Rendering: alle LLM-getragenen Felder in app/report.py laufen jetzt durch html.escape() bevor sie in die HTML-Template interpoliert werden. format_redline_html escape-first und ersetzt dann die Markdown-Marker durch von uns kontrollierte <span>-Tags. build_matrix_html escaped das aspect-Attribut, sodass ein nacktes " den title="..."-Wert nicht mehr beenden und einen Event- Handler injizieren kann. Toter jinja2-Import in report.py entfernt (war never used, blockierte nur den lokalen Test). - **Tests** — neue tests/test_report.py mit 8 Cases, die direkt die Bug-Klasse verifizieren: <script>, file://-img, "-attribut-breakout in Title und ein End-to-End-Render mit XSS-Payloads in jedem LLM-Feld. Die Marker-Funktionalität (** und ~~) wird mit-getestet, damit der Escape-First-Ansatz das nicht versehentlich kaputt macht. 77 alte Unit-Tests + 8 neue → 85 grün. Rate-Limit-Verifikation per TestClient ist Integration-Scope und folgt in tests/integration/test_main_security.py als separates Folge-Item. Refs: #57 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
73a7f76472
commit
64cbff5286
18
app/main.py
18
app/main.py
@ -9,6 +9,9 @@ from fastapi.responses import HTMLResponse, FileResponse, JSONResponse, Response
|
|||||||
from starlette.middleware.base import BaseHTTPMiddleware
|
from starlette.middleware.base import BaseHTTPMiddleware
|
||||||
from fastapi.staticfiles import StaticFiles
|
from fastapi.staticfiles import StaticFiles
|
||||||
from fastapi.templating import Jinja2Templates
|
from fastapi.templating import Jinja2Templates
|
||||||
|
from slowapi import Limiter, _rate_limit_exceeded_handler
|
||||||
|
from slowapi.util import get_remote_address
|
||||||
|
from slowapi.errors import RateLimitExceeded
|
||||||
|
|
||||||
from .config import settings
|
from .config import settings
|
||||||
from .database import (
|
from .database import (
|
||||||
@ -34,6 +37,15 @@ app = FastAPI(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# Rate-Limiter — fängt Resource-Exhaustion auf den teuren POST-Endpoints
|
||||||
|
# (LLM-Calls + Indexing). Issue #57 Befund #1 (HIGH). Default in-memory
|
||||||
|
# Storage; für mehrere Worker müsste man auf Redis umstellen, solange wir
|
||||||
|
# auf einem Container laufen reicht das Default-Storage.
|
||||||
|
limiter = Limiter(key_func=get_remote_address, default_limits=[])
|
||||||
|
app.state.limiter = limiter
|
||||||
|
app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler)
|
||||||
|
|
||||||
|
|
||||||
# Security Headers Middleware
|
# Security Headers Middleware
|
||||||
class SecurityHeadersMiddleware(BaseHTTPMiddleware):
|
class SecurityHeadersMiddleware(BaseHTTPMiddleware):
|
||||||
async def dispatch(self, request: Request, call_next):
|
async def dispatch(self, request: Request, call_next):
|
||||||
@ -106,7 +118,9 @@ async def index(request: Request):
|
|||||||
|
|
||||||
|
|
||||||
@app.post("/analyze")
|
@app.post("/analyze")
|
||||||
|
@limiter.limit("10/minute")
|
||||||
async def start_analysis(
|
async def start_analysis(
|
||||||
|
request: Request,
|
||||||
background_tasks: BackgroundTasks,
|
background_tasks: BackgroundTasks,
|
||||||
text: Optional[str] = Form(None),
|
text: Optional[str] = Form(None),
|
||||||
file: Optional[UploadFile] = File(None),
|
file: Optional[UploadFile] = File(None),
|
||||||
@ -412,7 +426,9 @@ async def search_landtag(
|
|||||||
|
|
||||||
# API: Analyze a document from parliament portal
|
# API: Analyze a document from parliament portal
|
||||||
@app.post("/api/analyze-drucksache")
|
@app.post("/api/analyze-drucksache")
|
||||||
|
@limiter.limit("10/minute")
|
||||||
async def analyze_drucksache(
|
async def analyze_drucksache(
|
||||||
|
request: Request,
|
||||||
background_tasks: BackgroundTasks,
|
background_tasks: BackgroundTasks,
|
||||||
drucksache: str = Form(...),
|
drucksache: str = Form(...),
|
||||||
bundesland: str = Form("NRW"),
|
bundesland: str = Form("NRW"),
|
||||||
@ -571,7 +587,9 @@ async def programme_status():
|
|||||||
|
|
||||||
|
|
||||||
@app.post("/api/programme/index")
|
@app.post("/api/programme/index")
|
||||||
|
@limiter.limit("3/minute")
|
||||||
async def index_programme(
|
async def index_programme(
|
||||||
|
request: Request,
|
||||||
background_tasks: BackgroundTasks,
|
background_tasks: BackgroundTasks,
|
||||||
programm_id: str = Form(None),
|
programm_id: str = Form(None),
|
||||||
all_programmes: bool = Form(False),
|
all_programmes: bool = Form(False),
|
||||||
|
|||||||
100
app/report.py
100
app/report.py
@ -1,11 +1,18 @@
|
|||||||
"""Report generation for HTML and PDF output."""
|
"""Report generation for HTML and PDF output.
|
||||||
|
|
||||||
|
All LLM-generated fields are HTML-escaped before being interpolated into
|
||||||
|
the report template. WeasyPrint will happily resolve ``<img src="file://...">``
|
||||||
|
or ``<link rel=stylesheet href="file://...">`` against the container
|
||||||
|
filesystem, so unescaped LLM output is a Local-File-Read primitive — see
|
||||||
|
issue #57 (audit findings #2 and #6). The ``_e`` helper is the single
|
||||||
|
funnel through which all LLM strings must pass on their way into the HTML.
|
||||||
|
"""
|
||||||
|
|
||||||
import subprocess
|
import subprocess
|
||||||
|
from html import escape as _e
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
from jinja2 import Environment, FileSystemLoader
|
|
||||||
|
|
||||||
from .models import Assessment, MATRIX_LABELS, EMPFEHLUNG_CONFIG
|
from .models import Assessment, MATRIX_LABELS, EMPFEHLUNG_CONFIG
|
||||||
from .bundeslaender import BUNDESLAENDER
|
from .bundeslaender import BUNDESLAENDER
|
||||||
|
|
||||||
@ -47,8 +54,16 @@ def get_rating_symbol(rating: int) -> str:
|
|||||||
|
|
||||||
|
|
||||||
def format_redline_html(text: str) -> str:
|
def format_redline_html(text: str) -> str:
|
||||||
"""Convert redline markup to HTML."""
|
"""Convert redline markup (``**ins**`` / ``~~del~~``) to HTML.
|
||||||
|
|
||||||
|
Escapes the input first so that any HTML in the LLM output (e.g.
|
||||||
|
``<img src="file:///etc/passwd">``) becomes inert text. The marker
|
||||||
|
regexes still fire because ``**`` and ``~~`` are not HTML special
|
||||||
|
chars and survive escaping unchanged. The inserted ``<span>`` tags
|
||||||
|
are the only raw HTML in the result and are produced by us.
|
||||||
|
"""
|
||||||
import re
|
import re
|
||||||
|
text = _e(text or "")
|
||||||
# **text** → green bold (inserted)
|
# **text** → green bold (inserted)
|
||||||
text = re.sub(r'\*\*([^*]+)\*\*', r'<span class="inserted">\1</span>', text)
|
text = re.sub(r'\*\*([^*]+)\*\*', r'<span class="inserted">\1</span>', text)
|
||||||
# ~~text~~ → red strikethrough (deleted)
|
# ~~text~~ → red strikethrough (deleted)
|
||||||
@ -85,7 +100,10 @@ def build_matrix_html(assessment: Assessment) -> str:
|
|||||||
if entry:
|
if entry:
|
||||||
symbol = get_rating_symbol(entry.rating)
|
symbol = get_rating_symbol(entry.rating)
|
||||||
css_class = "positive" if entry.rating > 0 else ("negative" if entry.rating < 0 else "neutral")
|
css_class = "positive" if entry.rating > 0 else ("negative" if entry.rating < 0 else "neutral")
|
||||||
html.append(f'<td class="{css_class}" title="{entry.aspect}">{symbol}</td>')
|
# entry.aspect comes from the LLM and is interpolated into a
|
||||||
|
# title="..." attribute — escape it so a stray double-quote
|
||||||
|
# cannot break out and inject attributes/handlers.
|
||||||
|
html.append(f'<td class="{css_class}" title="{_e(entry.aspect)}">{symbol}</td>')
|
||||||
else:
|
else:
|
||||||
html.append('<td></td>')
|
html.append('<td></td>')
|
||||||
html.append('</tr>')
|
html.append('</tr>')
|
||||||
@ -119,7 +137,7 @@ async def generate_html_report(
|
|||||||
<head>
|
<head>
|
||||||
<meta charset="UTF-8">
|
<meta charset="UTF-8">
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
<title>GWÖ-Antragsprüfung: {assessment.title}</title>
|
<title>GWÖ-Antragsprüfung: {_e(assessment.title or "")}</title>
|
||||||
<style>
|
<style>
|
||||||
:root {{
|
:root {{
|
||||||
--color-darkgray: {COLORS['darkgray']};
|
--color-darkgray: {COLORS['darkgray']};
|
||||||
@ -350,81 +368,81 @@ async def generate_html_report(
|
|||||||
<body>
|
<body>
|
||||||
<div class="header">
|
<div class="header">
|
||||||
<div class="header-label">GEMEINWOHL-ÖKONOMIE | ANTRAGSBEWERTUNG</div>
|
<div class="header-label">GEMEINWOHL-ÖKONOMIE | ANTRAGSBEWERTUNG</div>
|
||||||
<h1>{assessment.title}</h1>
|
<h1>{_e(assessment.title or "")}</h1>
|
||||||
{f'<div class="header-parlament">{parlament_name}</div>' if parlament_name else ''}
|
{f'<div class="header-parlament">{_e(parlament_name)}</div>' if parlament_name else ''}
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div class="meta-box">
|
<div class="meta-box">
|
||||||
<strong>Drucksache:</strong> {assessment.drucksache} |
|
<strong>Drucksache:</strong> {_e(assessment.drucksache or "")} |
|
||||||
<strong>Datum:</strong> {assessment.datum} |
|
<strong>Datum:</strong> {_e(assessment.datum or "")} |
|
||||||
<strong>Fraktion(en):</strong> {', '.join(assessment.fraktionen)} |
|
<strong>Fraktion(en):</strong> {_e(', '.join(assessment.fraktionen))} |
|
||||||
<strong>GWÖ-Score:</strong> <span style="color: {get_score_color(assessment.gwoe_score)}; font-weight: bold;">{assessment.gwoe_score}/10</span>
|
<strong>GWÖ-Score:</strong> <span style="color: {get_score_color(assessment.gwoe_score)}; font-weight: bold;">{assessment.gwoe_score}/10</span>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div class="empfehlung-box">
|
<div class="empfehlung-box">
|
||||||
<span class="symbol">{empf_config.get('symbol', '[?]')}</span>
|
<span class="symbol">{_e(empf_config.get('symbol', '[?]'))}</span>
|
||||||
<span class="text"><strong>Empfehlung:</strong> {assessment.empfehlung.value}</span>
|
<span class="text"><strong>Empfehlung:</strong> {_e(assessment.empfehlung.value)}</span>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<h2>Der Antrag im Überblick</h2>
|
<h2>Der Antrag im Überblick</h2>
|
||||||
<p>{assessment.antrag_zusammenfassung or 'Keine Zusammenfassung verfügbar.'}</p>
|
<p>{_e(assessment.antrag_zusammenfassung or 'Keine Zusammenfassung verfügbar.')}</p>
|
||||||
|
|
||||||
{('<ul>' + ''.join(f'<li>{k}</li>' for k in assessment.antrag_kernpunkte) + '</ul>') if assessment.antrag_kernpunkte else ''}
|
{('<ul>' + ''.join(f'<li>{_e(k)}</li>' for k in assessment.antrag_kernpunkte) + '</ul>') if assessment.antrag_kernpunkte else ''}
|
||||||
|
|
||||||
<h2>GWÖ-Treue</h2>
|
<h2>GWÖ-Treue</h2>
|
||||||
<p style="font-size: 9pt;"><strong>Score:</strong> <span style="color: {get_score_color(assessment.gwoe_score)};">{assessment.gwoe_score}/10</span></p>
|
<p style="font-size: 9pt;"><strong>Score:</strong> <span style="color: {get_score_color(assessment.gwoe_score)};">{assessment.gwoe_score}/10</span></p>
|
||||||
|
|
||||||
<div class="score-bar">
|
<div class="score-bar">
|
||||||
<div class="score-bar-fill" style="width: {assessment.gwoe_score * 10}%; background: {get_score_color(assessment.gwoe_score)};"></div>
|
<div class="score-bar-fill" style="width: {assessment.gwoe_score * 10}%; background: {get_score_color(assessment.gwoe_score)};"></div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<p><strong>Begründung:</strong> {assessment.gwoe_begruendung}</p>
|
<p><strong>Begründung:</strong> {_e(assessment.gwoe_begruendung or "")}</p>
|
||||||
<p><strong>Schwerpunkte:</strong> {', '.join(assessment.gwoe_schwerpunkt)}</p>
|
<p><strong>Schwerpunkte:</strong> {_e(', '.join(assessment.gwoe_schwerpunkt))}</p>
|
||||||
|
|
||||||
<h2>Matrix-Zuordnung (Matrix 2.0 für Gemeinden)</h2>
|
<h2>Matrix-Zuordnung (Matrix 2.0 für Gemeinden)</h2>
|
||||||
|
|
||||||
{build_matrix_html(assessment)}
|
{build_matrix_html(assessment)}
|
||||||
|
|
||||||
<p style="font-size: 7pt; color: #999;">
|
<p style="font-size: 7pt; color: #999;">
|
||||||
<strong>Legende:</strong> ++ stark fördernd, + fördernd, ○ neutral, − widersprechend, −− stark widersprechend
|
<strong>Legende:</strong> ++ stark fördernd, + fördernd, ○ neutral, − widersprechend, −− stark widersprechend
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
<h3>Berührte Themenfelder</h3>
|
<h3>Berührte Themenfelder</h3>
|
||||||
<ul>
|
<ul>
|
||||||
{''.join(f'<li><strong>{e.field}:</strong> {e.aspect} [{get_rating_symbol(e.rating)}]</li>' for e in assessment.gwoe_matrix)}
|
{''.join(f'<li><strong>{_e(e.field)}:</strong> {_e(e.aspect)} [{get_rating_symbol(e.rating)}]</li>' for e in assessment.gwoe_matrix)}
|
||||||
</ul>
|
</ul>
|
||||||
|
|
||||||
<h2>Programmtreue</h2>
|
<h2>Programmtreue</h2>
|
||||||
|
|
||||||
{''.join(f'''
|
{''.join(f'''
|
||||||
<h3>{s.fraktion} {' (Antragsteller)' if s.ist_antragsteller else ''}{' (Regierung)' if s.ist_regierung else ''}</h3>
|
<h3>{_e(s.fraktion)} {' (Antragsteller)' if s.ist_antragsteller else ''}{' (Regierung)' if s.ist_regierung else ''}</h3>
|
||||||
<p><strong>Wahlprogramm:</strong> {s.wahlprogramm.score}/10 — {s.wahlprogramm.begruendung}</p>
|
<p><strong>Wahlprogramm:</strong> {s.wahlprogramm.score}/10 — {_e(s.wahlprogramm.begruendung or "")}</p>
|
||||||
<p><strong>Parteiprogramm:</strong> {s.parteiprogramm.score}/10 — {s.parteiprogramm.begruendung}</p>
|
<p><strong>Parteiprogramm:</strong> {s.parteiprogramm.score}/10 — {_e(s.parteiprogramm.begruendung or "")}</p>
|
||||||
''' for s in assessment.wahlprogramm_scores)}
|
''' for s in assessment.wahlprogramm_scores)}
|
||||||
|
|
||||||
<h2>Verbesserungsvorschläge</h2>
|
<h2>Verbesserungsvorschläge</h2>
|
||||||
|
|
||||||
{''.join(f'''
|
{''.join(f'''
|
||||||
<div class="verbesserung">
|
<div class="verbesserung">
|
||||||
<div class="original"><strong>Original:</strong><br>{v.original}</div>
|
<div class="original"><strong>Original:</strong><br>{_e(v.original or "")}</div>
|
||||||
<div class="vorschlag"><strong>Vorschlag:</strong><br>{format_redline_html(v.vorschlag)}</div>
|
<div class="vorschlag"><strong>Vorschlag:</strong><br>{format_redline_html(v.vorschlag)}</div>
|
||||||
<div style="font-style: italic; margin-top: 0.5rem;">{v.begruendung}</div>
|
<div style="font-style: italic; margin-top: 0.5rem;">{_e(v.begruendung or "")}</div>
|
||||||
</div>
|
</div>
|
||||||
''' for v in assessment.verbesserungen) or '<p>Keine Verbesserungsvorschläge.</p>'}
|
''' for v in assessment.verbesserungen) or '<p>Keine Verbesserungsvorschläge.</p>'}
|
||||||
|
|
||||||
<h2>Zusammenfassung</h2>
|
<h2>Zusammenfassung</h2>
|
||||||
|
|
||||||
<div class="two-columns">
|
<div class="two-columns">
|
||||||
<div class="staerken">
|
<div class="staerken">
|
||||||
<h3 style="color: var(--color-green);">Stärken</h3>
|
<h3 style="color: var(--color-green);">Stärken</h3>
|
||||||
<ul>
|
<ul>
|
||||||
{''.join(f'<li>{s}</li>' for s in assessment.staerken) or '<li>(keine)</li>'}
|
{''.join(f'<li>{_e(s)}</li>' for s in assessment.staerken) or '<li>(keine)</li>'}
|
||||||
</ul>
|
</ul>
|
||||||
</div>
|
</div>
|
||||||
<div class="schwaechen">
|
<div class="schwaechen">
|
||||||
<h3 style="color: var(--color-orange);">Schwächen</h3>
|
<h3 style="color: var(--color-orange);">Schwächen</h3>
|
||||||
<ul>
|
<ul>
|
||||||
{''.join(f'<li>{s}</li>' for s in assessment.schwaechen) or '<li>(keine)</li>'}
|
{''.join(f'<li>{_e(s)}</li>' for s in assessment.schwaechen) or '<li>(keine)</li>'}
|
||||||
</ul>
|
</ul>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
@ -11,3 +11,4 @@ beautifulsoup4>=4.12.0
|
|||||||
weasyprint>=62.0
|
weasyprint>=62.0
|
||||||
pydantic>=2.9.0
|
pydantic>=2.9.0
|
||||||
pydantic-settings>=2.5.0
|
pydantic-settings>=2.5.0
|
||||||
|
slowapi>=0.1.9
|
||||||
|
|||||||
194
tests/test_report.py
Normal file
194
tests/test_report.py
Normal file
@ -0,0 +1,194 @@
|
|||||||
|
"""Tests für app.report — insbesondere XSS/XXE-Escape-Verhalten.
|
||||||
|
|
||||||
|
Abdeckung der Bug-Klasse aus Issue #57 (Security Audit, Befunde #2 und #6):
|
||||||
|
das LLM-Output landet direkt in der HTML-Vorlage, die von WeasyPrint
|
||||||
|
gerendert wird. Ohne Escape kann eine Prompt-Injection mit einem rohen
|
||||||
|
``<img src="file:///etc/passwd">`` Local File Read im Container auslösen
|
||||||
|
oder ``<script>``-Payloads im Browser des Lesers ausführen, der den
|
||||||
|
HTML-Report über ``/result/{job_id}`` öffnet.
|
||||||
|
|
||||||
|
Diese Tests bauen ein Assessment mit kontrollierten XSS-Payloads in jedem
|
||||||
|
LLM-getragenen Feld und verifizieren, dass die generierte HTML die rohen
|
||||||
|
Payloads nicht enthält und die escapeden Varianten dafür schon.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from app.models import (
|
||||||
|
Assessment,
|
||||||
|
Empfehlung,
|
||||||
|
FraktionScores,
|
||||||
|
MatrixEntry,
|
||||||
|
ProgrammScore,
|
||||||
|
Verbesserung,
|
||||||
|
Verbesserungspotenzial,
|
||||||
|
)
|
||||||
|
from app.report import build_matrix_html, format_redline_html, generate_html_report
|
||||||
|
|
||||||
|
|
||||||
|
# ─────────────────────────────────────────────────────────────────────────────
|
||||||
|
# format_redline_html unit tests
|
||||||
|
# ─────────────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
class TestFormatRedlineHtml:
|
||||||
|
"""``format_redline_html`` muss escape-first-then-replace fahren, sonst
|
||||||
|
überleben rohe HTML-Tags aus dem LLM die Konvertierung."""
|
||||||
|
|
||||||
|
def test_plain_text_passes_through(self):
|
||||||
|
assert format_redline_html("hallo welt") == "hallo welt"
|
||||||
|
|
||||||
|
def test_inserted_marker_becomes_span(self):
|
||||||
|
assert format_redline_html("**neu**") == '<span class="inserted">neu</span>'
|
||||||
|
|
||||||
|
def test_deleted_marker_becomes_span(self):
|
||||||
|
assert format_redline_html("~~weg~~") == '<span class="deleted">weg</span>'
|
||||||
|
|
||||||
|
def test_script_tag_in_input_is_escaped(self):
|
||||||
|
out = format_redline_html("vor <script>alert(1)</script> nach")
|
||||||
|
assert "<script>" not in out
|
||||||
|
assert "<script>" in out
|
||||||
|
|
||||||
|
def test_local_file_image_in_input_is_escaped(self):
|
||||||
|
out = format_redline_html('<img src="file:///etc/passwd">')
|
||||||
|
assert '<img src="file:///etc/passwd">' not in out
|
||||||
|
assert "<img" in out
|
||||||
|
|
||||||
|
def test_marker_inside_escaped_tag_still_renders(self):
|
||||||
|
# Edge: ein Angreifer kann seine Payload in **markern** einbetten,
|
||||||
|
# die Marker müssen weiterhin als spans rendern, der HTML-Inhalt
|
||||||
|
# aber escaped sein.
|
||||||
|
out = format_redline_html("**<script>x</script>**")
|
||||||
|
assert "<span class=\"inserted\">" in out
|
||||||
|
assert "<script>" not in out
|
||||||
|
assert "<script>" in out
|
||||||
|
|
||||||
|
|
||||||
|
# ─────────────────────────────────────────────────────────────────────────────
|
||||||
|
# build_matrix_html title-attribute escape
|
||||||
|
# ─────────────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def _minimal_assessment(**overrides) -> Assessment:
|
||||||
|
"""Build a smallest-possible Assessment with all required fields,
|
||||||
|
plus deep-link overrides for the field under test."""
|
||||||
|
base = dict(
|
||||||
|
drucksache="18/12345",
|
||||||
|
title="Demo-Antrag",
|
||||||
|
fraktionen=["CDU"],
|
||||||
|
datum="2026-04-09",
|
||||||
|
link=None,
|
||||||
|
gwoe_score=5.0,
|
||||||
|
gwoe_begruendung="Begründung.",
|
||||||
|
gwoe_matrix=[
|
||||||
|
MatrixEntry(field="A1", label="Lieferant:innen × 1", aspect="kein Aspekt", rating=0),
|
||||||
|
],
|
||||||
|
gwoe_schwerpunkt=["Solidarität"],
|
||||||
|
wahlprogramm_scores=[
|
||||||
|
FraktionScores(
|
||||||
|
fraktion="CDU",
|
||||||
|
wahlprogramm=ProgrammScore(score=5.0, begruendung="ok", zitate=[]),
|
||||||
|
parteiprogramm=ProgrammScore(score=5.0, begruendung="ok", zitate=[]),
|
||||||
|
),
|
||||||
|
],
|
||||||
|
verbesserungen=[],
|
||||||
|
staerken=["eine stärke"],
|
||||||
|
schwaechen=["eine schwäche"],
|
||||||
|
empfehlung=Empfehlung.UEBERARBEITEN,
|
||||||
|
verbesserungspotenzial=Verbesserungspotenzial.MITTEL,
|
||||||
|
themen=[],
|
||||||
|
antrag_zusammenfassung="zusammenfassung",
|
||||||
|
antrag_kernpunkte=["punkt eins"],
|
||||||
|
)
|
||||||
|
base.update(overrides)
|
||||||
|
return Assessment(**base)
|
||||||
|
|
||||||
|
|
||||||
|
class TestBuildMatrixHtml:
|
||||||
|
def test_aspect_with_quote_does_not_break_title_attribute(self):
|
||||||
|
"""Ein nacktes ``"`` in ``aspect`` darf das ``title="..."``-Attribut
|
||||||
|
nicht beenden — sonst kann ein folgender ``onmouseover=...``-Token
|
||||||
|
zu einem echten Event-Handler werden. Entscheidender Check: das
|
||||||
|
rohe ``"`` ist im Output durch ``"`` ersetzt, sodass das
|
||||||
|
Attribut geschlossen bleibt."""
|
||||||
|
a = _minimal_assessment(
|
||||||
|
gwoe_matrix=[
|
||||||
|
MatrixEntry(
|
||||||
|
field="A1",
|
||||||
|
label="Lieferant:innen × 1",
|
||||||
|
aspect='" onmouseover="alert(1)',
|
||||||
|
rating=1,
|
||||||
|
),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
html = build_matrix_html(a)
|
||||||
|
# Das rohe " aus dem aspect darf nicht mehr im title-Wert stehen
|
||||||
|
assert '"" onmouseover' not in html # Attribut-Breakout
|
||||||
|
assert '" onmouseover="alert' not in html
|
||||||
|
# Stattdessen muss die escapete Form vorhanden sein
|
||||||
|
assert """ in html
|
||||||
|
|
||||||
|
|
||||||
|
# ─────────────────────────────────────────────────────────────────────────────
|
||||||
|
# generate_html_report end-to-end XSS escape
|
||||||
|
# ─────────────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
_XSS = "<script>alert('xss')</script>"
|
||||||
|
_XXE = '<img src="file:///etc/passwd">'
|
||||||
|
|
||||||
|
|
||||||
|
def _build_xss_assessment() -> Assessment:
|
||||||
|
return _minimal_assessment(
|
||||||
|
title=f"Antrag {_XSS}",
|
||||||
|
gwoe_begruendung=f"Begründung mit {_XSS}",
|
||||||
|
gwoe_schwerpunkt=[f"Schwerpunkt {_XXE}"],
|
||||||
|
antrag_zusammenfassung=f"Zusammenfassung mit {_XXE}",
|
||||||
|
antrag_kernpunkte=[f"Kernpunkt mit {_XSS}"],
|
||||||
|
staerken=[f"Stärke mit {_XSS}"],
|
||||||
|
schwaechen=[f"Schwäche mit {_XXE}"],
|
||||||
|
gwoe_matrix=[
|
||||||
|
MatrixEntry(field="A1", label="Lieferant:innen × 1",
|
||||||
|
aspect=f"Aspekt {_XSS}", rating=2),
|
||||||
|
],
|
||||||
|
wahlprogramm_scores=[
|
||||||
|
FraktionScores(
|
||||||
|
fraktion=f"Fraktion {_XSS}",
|
||||||
|
wahlprogramm=ProgrammScore(score=5.0, begruendung=f"WP {_XXE}", zitate=[]),
|
||||||
|
parteiprogramm=ProgrammScore(score=5.0, begruendung=f"PP {_XSS}", zitate=[]),
|
||||||
|
),
|
||||||
|
],
|
||||||
|
verbesserungen=[
|
||||||
|
Verbesserung(
|
||||||
|
original=f"Original {_XSS}",
|
||||||
|
vorschlag=f"Vorschlag **mit {_XSS} markup**",
|
||||||
|
begruendung=f"Begründung {_XXE}",
|
||||||
|
),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_generate_html_report_escapes_all_llm_payloads(tmp_path: Path):
|
||||||
|
a = _build_xss_assessment()
|
||||||
|
out = tmp_path / "report.html"
|
||||||
|
asyncio.run(generate_html_report(a, out, bundesland="NRW"))
|
||||||
|
html = out.read_text()
|
||||||
|
|
||||||
|
# Negative: keine rohen Angriffs-Strings
|
||||||
|
assert "<script>" not in html, "rohes <script> aus LLM-Felder im Output"
|
||||||
|
assert "alert('xss')" not in html
|
||||||
|
assert 'src="file:///etc/passwd"' not in html, (
|
||||||
|
"rohes file:// in HTML würde WeasyPrint zum Local-File-Read verleiten"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Positive: escapete Form ist vorhanden (also wurde der Payload überhaupt
|
||||||
|
# mit-gerendert, nur eben sicher)
|
||||||
|
assert "<script>" in html
|
||||||
|
assert "<img" in html
|
||||||
|
|
||||||
|
# Format-Redline-Marker müssen weiterhin funktionieren (Vorschlag mit **)
|
||||||
|
assert '<span class="inserted">' in html
|
||||||
Loading…
Reference in New Issue
Block a user