diff --git a/app/main.py b/app/main.py index 6f01b95..123f21a 100644 --- a/app/main.py +++ b/app/main.py @@ -9,6 +9,9 @@ from fastapi.responses import HTMLResponse, FileResponse, JSONResponse, Response from starlette.middleware.base import BaseHTTPMiddleware from fastapi.staticfiles import StaticFiles from fastapi.templating import Jinja2Templates +from slowapi import Limiter, _rate_limit_exceeded_handler +from slowapi.util import get_remote_address +from slowapi.errors import RateLimitExceeded from .config import settings from .database import ( @@ -34,6 +37,15 @@ app = FastAPI( ) +# Rate-Limiter — fängt Resource-Exhaustion auf den teuren POST-Endpoints +# (LLM-Calls + Indexing). Issue #57 Befund #1 (HIGH). Default in-memory +# Storage; für mehrere Worker müsste man auf Redis umstellen, solange wir +# auf einem Container laufen reicht das Default-Storage. +limiter = Limiter(key_func=get_remote_address, default_limits=[]) +app.state.limiter = limiter +app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler) + + # Security Headers Middleware class SecurityHeadersMiddleware(BaseHTTPMiddleware): async def dispatch(self, request: Request, call_next): @@ -106,7 +118,9 @@ async def index(request: Request): @app.post("/analyze") +@limiter.limit("10/minute") async def start_analysis( + request: Request, background_tasks: BackgroundTasks, text: Optional[str] = Form(None), file: Optional[UploadFile] = File(None), @@ -412,7 +426,9 @@ async def search_landtag( # API: Analyze a document from parliament portal @app.post("/api/analyze-drucksache") +@limiter.limit("10/minute") async def analyze_drucksache( + request: Request, background_tasks: BackgroundTasks, drucksache: str = Form(...), bundesland: str = Form("NRW"), @@ -571,7 +587,9 @@ async def programme_status(): @app.post("/api/programme/index") +@limiter.limit("3/minute") async def index_programme( + request: Request, background_tasks: BackgroundTasks, programm_id: str = Form(None), all_programmes: bool = Form(False), diff --git a/app/report.py b/app/report.py index 87c3ee4..b95c21d 100644 --- a/app/report.py +++ b/app/report.py @@ -1,11 +1,18 @@ -"""Report generation for HTML and PDF output.""" +"""Report generation for HTML and PDF output. + +All LLM-generated fields are HTML-escaped before being interpolated into +the report template. WeasyPrint will happily resolve ```` +or ```` against the container +filesystem, so unescaped LLM output is a Local-File-Read primitive — see +issue #57 (audit findings #2 and #6). The ``_e`` helper is the single +funnel through which all LLM strings must pass on their way into the HTML. +""" import subprocess +from html import escape as _e from pathlib import Path from typing import Optional -from jinja2 import Environment, FileSystemLoader - from .models import Assessment, MATRIX_LABELS, EMPFEHLUNG_CONFIG from .bundeslaender import BUNDESLAENDER @@ -47,8 +54,16 @@ def get_rating_symbol(rating: int) -> str: def format_redline_html(text: str) -> str: - """Convert redline markup to HTML.""" + """Convert redline markup (``**ins**`` / ``~~del~~``) to HTML. + + Escapes the input first so that any HTML in the LLM output (e.g. + ````) becomes inert text. The marker + regexes still fire because ``**`` and ``~~`` are not HTML special + chars and survive escaping unchanged. The inserted ```` tags + are the only raw HTML in the result and are produced by us. + """ import re + text = _e(text or "") # **text** → green bold (inserted) text = re.sub(r'\*\*([^*]+)\*\*', r'\1', text) # ~~text~~ → red strikethrough (deleted) @@ -85,7 +100,10 @@ def build_matrix_html(assessment: Assessment) -> str: if entry: symbol = get_rating_symbol(entry.rating) css_class = "positive" if entry.rating > 0 else ("negative" if entry.rating < 0 else "neutral") - html.append(f'{symbol}') + # entry.aspect comes from the LLM and is interpolated into a + # title="..." attribute — escape it so a stray double-quote + # cannot break out and inject attributes/handlers. + html.append(f'{symbol}') else: html.append('') html.append('') @@ -119,7 +137,7 @@ async def generate_html_report( - GWÖ-Antragsprüfung: {assessment.title} + GWÖ-Antragsprüfung: {_e(assessment.title or "")}