diff --git a/app/main.py b/app/main.py
index 6f01b95..123f21a 100644
--- a/app/main.py
+++ b/app/main.py
@@ -9,6 +9,9 @@ from fastapi.responses import HTMLResponse, FileResponse, JSONResponse, Response
from starlette.middleware.base import BaseHTTPMiddleware
from fastapi.staticfiles import StaticFiles
from fastapi.templating import Jinja2Templates
+from slowapi import Limiter, _rate_limit_exceeded_handler
+from slowapi.util import get_remote_address
+from slowapi.errors import RateLimitExceeded
from .config import settings
from .database import (
@@ -34,6 +37,15 @@ app = FastAPI(
)
+# Rate-Limiter — fängt Resource-Exhaustion auf den teuren POST-Endpoints
+# (LLM-Calls + Indexing). Issue #57 Befund #1 (HIGH). Default in-memory
+# Storage; für mehrere Worker müsste man auf Redis umstellen, solange wir
+# auf einem Container laufen reicht das Default-Storage.
+limiter = Limiter(key_func=get_remote_address, default_limits=[])
+app.state.limiter = limiter
+app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler)
+
+
# Security Headers Middleware
class SecurityHeadersMiddleware(BaseHTTPMiddleware):
async def dispatch(self, request: Request, call_next):
@@ -106,7 +118,9 @@ async def index(request: Request):
@app.post("/analyze")
+@limiter.limit("10/minute")
async def start_analysis(
+ request: Request,
background_tasks: BackgroundTasks,
text: Optional[str] = Form(None),
file: Optional[UploadFile] = File(None),
@@ -412,7 +426,9 @@ async def search_landtag(
# API: Analyze a document from parliament portal
@app.post("/api/analyze-drucksache")
+@limiter.limit("10/minute")
async def analyze_drucksache(
+ request: Request,
background_tasks: BackgroundTasks,
drucksache: str = Form(...),
bundesland: str = Form("NRW"),
@@ -571,7 +587,9 @@ async def programme_status():
@app.post("/api/programme/index")
+@limiter.limit("3/minute")
async def index_programme(
+ request: Request,
background_tasks: BackgroundTasks,
programm_id: str = Form(None),
all_programmes: bool = Form(False),
diff --git a/app/report.py b/app/report.py
index 87c3ee4..b95c21d 100644
--- a/app/report.py
+++ b/app/report.py
@@ -1,11 +1,18 @@
-"""Report generation for HTML and PDF output."""
+"""Report generation for HTML and PDF output.
+
+All LLM-generated fields are HTML-escaped before being interpolated into
+the report template. WeasyPrint will happily resolve ````
+or ```` against the container
+filesystem, so unescaped LLM output is a Local-File-Read primitive — see
+issue #57 (audit findings #2 and #6). The ``_e`` helper is the single
+funnel through which all LLM strings must pass on their way into the HTML.
+"""
import subprocess
+from html import escape as _e
from pathlib import Path
from typing import Optional
-from jinja2 import Environment, FileSystemLoader
-
from .models import Assessment, MATRIX_LABELS, EMPFEHLUNG_CONFIG
from .bundeslaender import BUNDESLAENDER
@@ -47,8 +54,16 @@ def get_rating_symbol(rating: int) -> str:
def format_redline_html(text: str) -> str:
- """Convert redline markup to HTML."""
+ """Convert redline markup (``**ins**`` / ``~~del~~``) to HTML.
+
+ Escapes the input first so that any HTML in the LLM output (e.g.
+ ``
``) becomes inert text. The marker
+ regexes still fire because ``**`` and ``~~`` are not HTML special
+ chars and survive escaping unchanged. The inserted ```` tags
+ are the only raw HTML in the result and are produced by us.
+ """
import re
+ text = _e(text or "")
# **text** → green bold (inserted)
text = re.sub(r'\*\*([^*]+)\*\*', r'\1', text)
# ~~text~~ → red strikethrough (deleted)
@@ -85,7 +100,10 @@ def build_matrix_html(assessment: Assessment) -> str:
if entry:
symbol = get_rating_symbol(entry.rating)
css_class = "positive" if entry.rating > 0 else ("negative" if entry.rating < 0 else "neutral")
- html.append(f'