Problem: Alle Assessments in der Prod-DB haben Pre-#47-URLs
(/static/referenzen/X.pdf#page=N). Die _chunk_pdf_url-Änderung wirkt
nur auf NEUE Analysen, die noch nicht stattgefunden haben.
Fix (zwei Seiten):
1. Endpoint /api/wahlprogramm-cite akzeptiert jetzt auch pdf=<filename>
als Alternative zu pid=<programm_id>. Reverse-Lookup über PROGRAMME-
Registry: pdf-Filename → programm_id. Damit können die statischen
URLs aus Pre-#47-Assessments trotzdem an den Cite-Endpoint geleitet
werden.
2. Frontend: neue JS-Funktion makeCiteUrl(z) die JEDE Zitat-URL on-the-
fly umschreibt:
- /static/referenzen/X.pdf#page=N + z.text
→ /api/wahlprogramm-cite?pdf=X.pdf&seite=N&q=<urlencoded text>
- /api/wahlprogramm-cite?... → durchreichen (schon Cite-URL)
- Fallback: URL unverändert
Funktioniert retroaktiv für ALLE ~31 Assessments in der DB, ohne
Re-Analyse. Sobald ein User auf ein Zitat klickt, wird die Seite
des Wahlprogramms mit gelber Markierung gerendert.
Tests: 194/194 grün.
Refs: #47
736 lines
26 KiB
Python
736 lines
26 KiB
Python
"""GWÖ-Antragsprüfer — FastAPI Webapp."""
|
||
|
||
import logging
|
||
import uuid
|
||
from pathlib import Path
|
||
from typing import Optional
|
||
|
||
from fastapi import FastAPI, File, Form, UploadFile, Request, BackgroundTasks, HTTPException
|
||
from fastapi.responses import HTMLResponse, FileResponse, JSONResponse, Response
|
||
from starlette.middleware.base import BaseHTTPMiddleware
|
||
from fastapi.staticfiles import StaticFiles
|
||
from fastapi.templating import Jinja2Templates
|
||
from slowapi import Limiter, _rate_limit_exceeded_handler
|
||
from slowapi.util import get_remote_address
|
||
from slowapi.errors import RateLimitExceeded
|
||
|
||
from .validators import (
|
||
MAX_SEARCH_QUERY_LEN,
|
||
validate_drucksache,
|
||
validate_search_query,
|
||
)
|
||
|
||
# Strukturiertes Logging für die ganze App. uvicorn registriert seinen
|
||
# eigenen Root-Handler erst beim Start; wir setzen ein neutrales Format
|
||
# für unsere Module früh, damit logger.exception() auch beim Boot greift.
|
||
logging.basicConfig(
|
||
level=logging.INFO,
|
||
format="%(asctime)s %(levelname)-7s %(name)s: %(message)s",
|
||
)
|
||
logger = logging.getLogger(__name__)
|
||
|
||
from .config import settings
|
||
from .database import (
|
||
init_db, get_job, create_job, update_job,
|
||
get_all_assessments, get_assessment, upsert_assessment, import_json_assessments,
|
||
search_assessments
|
||
)
|
||
from .parlamente import get_adapter, ADAPTERS
|
||
from .bundeslaender import alle_bundeslaender
|
||
from .analyzer import analyze_antrag
|
||
from .report import generate_html_report, generate_pdf_report
|
||
from .embeddings import (
|
||
init_embeddings_db, get_programme_info, get_indexing_status,
|
||
index_programm, render_highlighted_page, PROGRAMME,
|
||
)
|
||
|
||
app = FastAPI(
|
||
title=settings.app_name,
|
||
version=settings.app_version,
|
||
docs_url=None, # Disable /docs in production
|
||
redoc_url=None, # Disable /redoc in production
|
||
openapi_url=None, # Disable /openapi.json in production
|
||
)
|
||
|
||
|
||
# Rate-Limiter — fängt Resource-Exhaustion auf den teuren POST-Endpoints
|
||
# (LLM-Calls + Indexing). Issue #57 Befund #1 (HIGH). Default in-memory
|
||
# Storage; für mehrere Worker müsste man auf Redis umstellen, solange wir
|
||
# auf einem Container laufen reicht das Default-Storage.
|
||
limiter = Limiter(key_func=get_remote_address, default_limits=[])
|
||
app.state.limiter = limiter
|
||
app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler)
|
||
|
||
|
||
# Security Headers Middleware
|
||
class SecurityHeadersMiddleware(BaseHTTPMiddleware):
|
||
async def dispatch(self, request: Request, call_next):
|
||
response = await call_next(request)
|
||
response.headers["X-Content-Type-Options"] = "nosniff"
|
||
response.headers["X-Frame-Options"] = "DENY"
|
||
response.headers["X-XSS-Protection"] = "1; mode=block"
|
||
response.headers["Referrer-Policy"] = "strict-origin-when-cross-origin"
|
||
response.headers["Permissions-Policy"] = "geolocation=(), microphone=(), camera=()"
|
||
# CSP: Allow self, inline styles (for templates), and PDF viewer
|
||
response.headers["Content-Security-Policy"] = (
|
||
"default-src 'self'; "
|
||
"style-src 'self' 'unsafe-inline'; "
|
||
"script-src 'self' 'unsafe-inline'; "
|
||
"img-src 'self' data:; "
|
||
"frame-ancestors 'none';"
|
||
)
|
||
return response
|
||
|
||
app.add_middleware(SecurityHeadersMiddleware)
|
||
|
||
|
||
# Setup directories
|
||
settings.data_dir.mkdir(exist_ok=True)
|
||
settings.reports_dir.mkdir(exist_ok=True)
|
||
|
||
# Static files and templates
|
||
static_dir = Path(__file__).parent / "static"
|
||
templates_dir = Path(__file__).parent / "templates"
|
||
static_dir.mkdir(exist_ok=True)
|
||
templates_dir.mkdir(exist_ok=True)
|
||
|
||
app.mount("/static", StaticFiles(directory=static_dir), name="static")
|
||
templates = Jinja2Templates(directory=str(templates_dir))
|
||
|
||
|
||
@app.on_event("startup")
|
||
async def startup():
|
||
await init_db()
|
||
init_embeddings_db()
|
||
# JSON import disabled - all assessments now live in SQLite DB only
|
||
# Legacy import would overwrite new v5 assessments with old format
|
||
# count = await import_json_assessments(settings.data_dir / "assessments")
|
||
# if count > 0:
|
||
# print(f"Imported {count} assessments from JSON files")
|
||
|
||
|
||
@app.get("/", response_class=HTMLResponse)
|
||
async def index(request: Request):
|
||
"""Landing page with upload form."""
|
||
# Frontend-Liste: synthetischer "ALL"-Eintrag (Bundesweit) zuerst, dann
|
||
# die echten Bundesländer aus der Konfig. Der "ALL"-Code ist eine reine
|
||
# Frontend/API-Konvention, kein Eintrag in bundeslaender.py.
|
||
bl_list = [{"code": "ALL", "name": "🌍 Bundesweit", "active": True}]
|
||
bl_list.extend(
|
||
{"code": bl.code, "name": bl.name, "active": bl.aktiv}
|
||
for bl in alle_bundeslaender()
|
||
)
|
||
# Map code → parlament_name, damit das Frontend ohne extra Backend-Call
|
||
# für jeden Antrag den Parlamentsnamen anzeigen kann.
|
||
parlament_names = {
|
||
bl.code: bl.parlament_name for bl in alle_bundeslaender()
|
||
}
|
||
return templates.TemplateResponse("index.html", {
|
||
"request": request,
|
||
"app_name": settings.app_name,
|
||
"bundeslaender": bl_list,
|
||
"parlament_names": parlament_names,
|
||
})
|
||
|
||
|
||
@app.post("/analyze")
|
||
@limiter.limit("10/minute")
|
||
async def start_analysis(
|
||
request: Request,
|
||
background_tasks: BackgroundTasks,
|
||
text: Optional[str] = Form(None),
|
||
file: Optional[UploadFile] = File(None),
|
||
bundesland: str = Form("NRW"),
|
||
model: str = Form("qwen-plus"),
|
||
):
|
||
"""Start analysis job."""
|
||
if not text and not file:
|
||
raise HTTPException(status_code=400, detail="Entweder Text oder PDF-Datei erforderlich")
|
||
|
||
# Extract text from PDF if uploaded
|
||
if file and file.filename:
|
||
import fitz # PyMuPDF
|
||
pdf_bytes = await file.read()
|
||
doc = fitz.open(stream=pdf_bytes, filetype="pdf")
|
||
text = ""
|
||
for page in doc:
|
||
text += page.get_text()
|
||
doc.close()
|
||
|
||
# Create job
|
||
job_id = str(uuid.uuid4())
|
||
await create_job(job_id, text[:500], bundesland, model)
|
||
|
||
# Start background analysis
|
||
background_tasks.add_task(run_analysis, job_id, text, bundesland, model)
|
||
|
||
return JSONResponse({"job_id": job_id, "status": "queued"})
|
||
|
||
|
||
async def run_analysis(job_id: str, text: str, bundesland: str, model: str):
|
||
"""Background task for analysis."""
|
||
try:
|
||
await update_job(job_id, status="processing")
|
||
|
||
# Run LLM analysis
|
||
assessment = await analyze_antrag(text, bundesland, model)
|
||
|
||
# Generate reports
|
||
html_path = settings.reports_dir / f"{job_id}.html"
|
||
pdf_path = settings.reports_dir / f"{job_id}.pdf"
|
||
|
||
await generate_html_report(assessment, html_path, bundesland=bundesland)
|
||
await generate_pdf_report(assessment, pdf_path, bundesland=bundesland)
|
||
|
||
await update_job(
|
||
job_id,
|
||
status="completed",
|
||
result=assessment.model_dump_json(),
|
||
html_path=str(html_path),
|
||
pdf_path=str(pdf_path),
|
||
)
|
||
except Exception as e:
|
||
await update_job(job_id, status="failed", error=str(e))
|
||
|
||
|
||
@app.get("/status/{job_id}")
|
||
async def get_status(job_id: str):
|
||
"""Get job status."""
|
||
job = await get_job(job_id)
|
||
if not job:
|
||
raise HTTPException(status_code=404, detail="Job nicht gefunden")
|
||
return JSONResponse({
|
||
"job_id": job_id,
|
||
"status": job["status"],
|
||
"created_at": job["created_at"],
|
||
})
|
||
|
||
|
||
@app.get("/result/{job_id}", response_class=HTMLResponse)
|
||
async def get_result(request: Request, job_id: str):
|
||
"""Get analysis result as HTML."""
|
||
job = await get_job(job_id)
|
||
if not job:
|
||
raise HTTPException(status_code=404, detail="Job nicht gefunden")
|
||
if job["status"] != "completed":
|
||
raise HTTPException(status_code=400, detail=f"Job noch nicht fertig: {job['status']}")
|
||
|
||
html_path = Path(job["html_path"])
|
||
if html_path.exists():
|
||
return HTMLResponse(html_path.read_text())
|
||
|
||
raise HTTPException(status_code=500, detail="Report nicht gefunden")
|
||
|
||
|
||
@app.get("/result/{job_id}/pdf")
|
||
async def get_pdf(job_id: str):
|
||
"""Download PDF report."""
|
||
job = await get_job(job_id)
|
||
if not job:
|
||
raise HTTPException(status_code=404, detail="Job nicht gefunden")
|
||
if job["status"] != "completed":
|
||
raise HTTPException(status_code=400, detail=f"Job noch nicht fertig: {job['status']}")
|
||
|
||
pdf_path = Path(job["pdf_path"])
|
||
if pdf_path.exists():
|
||
return FileResponse(
|
||
pdf_path,
|
||
media_type="application/pdf",
|
||
filename=f"gwoe-bericht-{job_id[:8]}.pdf"
|
||
)
|
||
|
||
raise HTTPException(status_code=500, detail="PDF nicht gefunden")
|
||
|
||
|
||
# API: Load assessments from database
|
||
@app.get("/api/assessments")
|
||
async def list_assessments(bundesland: Optional[str] = None):
|
||
"""Return assessments from database, optionally filtered by Bundesland.
|
||
|
||
``bundesland="ALL"`` and missing parameter both mean "no filter".
|
||
"""
|
||
rows = await get_all_assessments(bundesland)
|
||
|
||
# Convert DB format to frontend format
|
||
assessments = []
|
||
for row in rows:
|
||
assessments.append({
|
||
"drucksache": row.get("drucksache"),
|
||
"title": row.get("title"),
|
||
"fraktionen": row.get("fraktionen", []),
|
||
"datum": row.get("datum"),
|
||
"link": row.get("link"),
|
||
"bundesland": row.get("bundesland"),
|
||
"gwoeScore": row.get("gwoe_score"),
|
||
"gwoeBegründung": row.get("gwoe_begruendung"),
|
||
"gwoeMatrix": row.get("gwoe_matrix", []),
|
||
"gwoeSchwerpunkt": row.get("gwoe_schwerpunkt", []),
|
||
"wahlprogrammScores": row.get("wahlprogramm_scores", []),
|
||
"verbesserungen": row.get("verbesserungen", []),
|
||
"stärken": row.get("staerken", []),
|
||
"schwächen": row.get("schwaechen", []),
|
||
"empfehlung": row.get("empfehlung"),
|
||
"empfehlungSymbol": row.get("empfehlung_symbol"),
|
||
"verbesserungspotenzial": row.get("verbesserungspotenzial"),
|
||
"themen": row.get("themen", []),
|
||
"antragZusammenfassung": row.get("antrag_zusammenfassung"),
|
||
"antragKernpunkte": row.get("antrag_kernpunkte", []),
|
||
})
|
||
|
||
return assessments
|
||
|
||
|
||
# API: Get single assessment (use query param for drucksache with /)
|
||
@app.get("/api/assessment")
|
||
async def get_single_assessment(drucksache: str):
|
||
"""Get a single assessment by drucksache ID."""
|
||
drucksache = validate_drucksache(drucksache)
|
||
row = await get_assessment(drucksache)
|
||
if not row:
|
||
raise HTTPException(status_code=404, detail="Assessment nicht gefunden")
|
||
|
||
return {
|
||
"drucksache": row.get("drucksache"),
|
||
"title": row.get("title"),
|
||
"fraktionen": row.get("fraktionen", []),
|
||
"datum": row.get("datum"),
|
||
"link": row.get("link"),
|
||
"bundesland": row.get("bundesland"),
|
||
"gwoeScore": row.get("gwoe_score"),
|
||
"gwoeBegründung": row.get("gwoe_begruendung"),
|
||
"gwoeMatrix": row.get("gwoe_matrix", []),
|
||
"gwoeSchwerpunkt": row.get("gwoe_schwerpunkt", []),
|
||
"wahlprogrammScores": row.get("wahlprogramm_scores", []),
|
||
"verbesserungen": row.get("verbesserungen", []),
|
||
"stärken": row.get("staerken", []),
|
||
"schwächen": row.get("schwaechen", []),
|
||
"empfehlung": row.get("empfehlung"),
|
||
"empfehlungSymbol": row.get("empfehlung_symbol"),
|
||
"verbesserungspotenzial": row.get("verbesserungspotenzial"),
|
||
"themen": row.get("themen", []),
|
||
"antragZusammenfassung": row.get("antrag_zusammenfassung"),
|
||
"antragKernpunkte": row.get("antrag_kernpunkte", []),
|
||
}
|
||
|
||
|
||
# API: Generate PDF on demand for an assessment
|
||
@app.get("/api/assessment/pdf")
|
||
async def download_assessment_pdf(drucksache: str):
|
||
"""Generate and download PDF for an assessment."""
|
||
from .models import Assessment
|
||
|
||
drucksache = validate_drucksache(drucksache)
|
||
row = await get_assessment(drucksache)
|
||
if not row:
|
||
raise HTTPException(status_code=404, detail="Assessment nicht gefunden")
|
||
|
||
# Check if PDF already exists
|
||
safe_name = drucksache.replace("/", "-")
|
||
pdf_path = settings.reports_dir / f"{safe_name}.pdf"
|
||
|
||
if not pdf_path.exists():
|
||
# Convert DB row to Assessment model for report generation
|
||
assessment_data = {
|
||
"drucksache": row.get("drucksache"),
|
||
"title": row.get("title"),
|
||
"fraktionen": row.get("fraktionen", []),
|
||
"datum": row.get("datum"),
|
||
"link": row.get("link"),
|
||
"gwoe_score": row.get("gwoe_score") or 0,
|
||
"gwoe_begruendung": row.get("gwoe_begruendung") or "",
|
||
"gwoe_matrix": row.get("gwoe_matrix", []),
|
||
"gwoe_schwerpunkt": row.get("gwoe_schwerpunkt", []),
|
||
"wahlprogramm_scores": row.get("wahlprogramm_scores", []),
|
||
"verbesserungen": row.get("verbesserungen", []),
|
||
"staerken": row.get("staerken", []),
|
||
"schwaechen": row.get("schwaechen", []),
|
||
"empfehlung": row.get("empfehlung") or "",
|
||
"empfehlung_symbol": row.get("empfehlung_symbol") or "",
|
||
"verbesserungspotenzial": row.get("verbesserungspotenzial") or "",
|
||
"themen": row.get("themen", []),
|
||
"antrag_zusammenfassung": row.get("antrag_zusammenfassung") or "",
|
||
"antrag_kernpunkte": row.get("antrag_kernpunkte", []),
|
||
}
|
||
|
||
try:
|
||
assessment = Assessment(**assessment_data)
|
||
await generate_pdf_report(
|
||
assessment,
|
||
pdf_path,
|
||
bundesland=row.get("bundesland"),
|
||
)
|
||
except Exception as e:
|
||
raise HTTPException(status_code=500, detail=f"PDF-Generierung fehlgeschlagen: {e}")
|
||
|
||
return FileResponse(
|
||
pdf_path,
|
||
media_type="application/pdf",
|
||
filename=f"gwoe-{safe_name}.pdf"
|
||
)
|
||
|
||
|
||
# API: Search internal DB only
|
||
@app.get("/api/search")
|
||
async def search_internal(
|
||
q: str,
|
||
bundesland: str = "NRW",
|
||
limit: int = 50
|
||
):
|
||
"""
|
||
Search internal assessments database only.
|
||
"""
|
||
q = validate_search_query(q)
|
||
db_results = await search_assessments(q, bundesland, limit)
|
||
|
||
results = []
|
||
for row in db_results:
|
||
results.append({
|
||
"drucksache": row.get("drucksache"),
|
||
"title": row.get("title"),
|
||
"fraktionen": row.get("fraktionen", []),
|
||
"datum": row.get("datum"),
|
||
"link": row.get("link"),
|
||
"bundesland": bundesland,
|
||
"gwoeScore": row.get("gwoe_score"),
|
||
"themen": row.get("themen", []),
|
||
"status": "checked",
|
||
})
|
||
|
||
return results
|
||
|
||
|
||
# API: Search external parliament portal (Landtag)
|
||
@app.get("/api/search-landtag")
|
||
async def search_landtag(
|
||
q: str,
|
||
bundesland: str = "NRW",
|
||
limit: int = 20
|
||
):
|
||
"""
|
||
Search external parliament portal (e.g., NRW OPAL).
|
||
Returns results that can be analyzed with "Jetzt prüfen".
|
||
|
||
Requires a concrete Bundesland — the special "ALL" / Bundesweit mode
|
||
cannot pick a single Landtag adapter and is rejected with HTTP 400.
|
||
"""
|
||
q = validate_search_query(q)
|
||
if not bundesland or bundesland == "ALL":
|
||
raise HTTPException(
|
||
status_code=400,
|
||
detail="Landtag-Suche benötigt ein konkretes Bundesland",
|
||
)
|
||
adapter = get_adapter(bundesland)
|
||
if not adapter:
|
||
return {"error": f"Bundesland {bundesland} noch nicht unterstützt"}
|
||
|
||
try:
|
||
external = await adapter.search(q, limit)
|
||
results = []
|
||
for doc in external:
|
||
results.append({
|
||
"drucksache": doc.drucksache,
|
||
"title": doc.title,
|
||
"fraktionen": doc.fraktionen,
|
||
"datum": doc.datum,
|
||
"link": doc.link,
|
||
"bundesland": bundesland,
|
||
"typ": doc.typ,
|
||
"gwoeScore": None,
|
||
"status": "unchecked",
|
||
})
|
||
return results
|
||
except Exception as e:
|
||
logger.exception("Landtag search error for q=%r bundesland=%s", q, bundesland)
|
||
return {"error": f"Suchfehler: {str(e)}"}
|
||
|
||
|
||
# API: Analyze a document from parliament portal
|
||
@app.post("/api/analyze-drucksache")
|
||
@limiter.limit("10/minute")
|
||
async def analyze_drucksache(
|
||
request: Request,
|
||
background_tasks: BackgroundTasks,
|
||
drucksache: str = Form(...),
|
||
bundesland: str = Form("NRW"),
|
||
model: str = Form("qwen-plus")
|
||
):
|
||
"""
|
||
Download a document from parliament portal and analyze it.
|
||
"""
|
||
drucksache = validate_drucksache(drucksache)
|
||
# Check if already analyzed
|
||
existing = await get_assessment(drucksache)
|
||
if existing:
|
||
return {"status": "already_checked", "drucksache": drucksache}
|
||
|
||
# Get adapter and download
|
||
adapter = get_adapter(bundesland)
|
||
if not adapter:
|
||
raise HTTPException(status_code=400, detail=f"Bundesland {bundesland} nicht unterstützt")
|
||
|
||
# Download text
|
||
text = await adapter.download_text(drucksache)
|
||
if not text:
|
||
raise HTTPException(status_code=404, detail=f"Dokument {drucksache} nicht gefunden")
|
||
|
||
# Get document metadata
|
||
doc = await adapter.get_document(drucksache)
|
||
|
||
# Create job
|
||
job_id = str(uuid.uuid4())
|
||
await create_job(job_id, text[:500], bundesland, model)
|
||
|
||
# Start background analysis
|
||
background_tasks.add_task(
|
||
run_drucksache_analysis,
|
||
job_id, drucksache, text, bundesland, model, doc
|
||
)
|
||
|
||
return {"status": "queued", "job_id": job_id, "drucksache": drucksache}
|
||
|
||
|
||
async def run_drucksache_analysis(
|
||
job_id: str,
|
||
drucksache: str,
|
||
text: str,
|
||
bundesland: str,
|
||
model: str,
|
||
doc
|
||
):
|
||
"""Background task for drucksache analysis."""
|
||
try:
|
||
await update_job(job_id, status="processing")
|
||
|
||
# Run LLM analysis
|
||
assessment = await analyze_antrag(text, bundesland, model)
|
||
|
||
# Prepare data for DB
|
||
assessment_data = {
|
||
"drucksache": drucksache,
|
||
"title": assessment.title or (doc.title if doc else f"Drucksache {drucksache}"),
|
||
"fraktionen": assessment.fraktionen,
|
||
"datum": assessment.datum or (doc.datum if doc else ""),
|
||
"link": doc.link if doc else "",
|
||
"bundesland": bundesland,
|
||
"gwoeScore": assessment.gwoe_score,
|
||
"gwoeBegründung": assessment.gwoe_begruendung,
|
||
"gwoeMatrix": [m.model_dump() for m in assessment.gwoe_matrix],
|
||
"gwoeSchwerpunkt": assessment.gwoe_schwerpunkt,
|
||
"wahlprogrammScores": [w.model_dump() for w in assessment.wahlprogramm_scores],
|
||
"verbesserungen": [v.model_dump() for v in assessment.verbesserungen],
|
||
"stärken": assessment.staerken,
|
||
"schwächen": assessment.schwaechen,
|
||
"empfehlung": assessment.empfehlung,
|
||
"empfehlungSymbol": assessment.empfehlung_symbol,
|
||
"verbesserungspotenzial": assessment.verbesserungspotenzial,
|
||
"themen": assessment.themen,
|
||
"antragZusammenfassung": assessment.antrag_zusammenfassung,
|
||
"antragKernpunkte": assessment.antrag_kernpunkte,
|
||
"source": "webapp",
|
||
"model": model,
|
||
}
|
||
|
||
# Save to DB
|
||
await upsert_assessment(assessment_data)
|
||
|
||
# Generate reports
|
||
html_path = settings.reports_dir / f"{job_id}.html"
|
||
pdf_path = settings.reports_dir / f"{job_id}.pdf"
|
||
|
||
await generate_html_report(assessment, html_path, bundesland=bundesland)
|
||
await generate_pdf_report(assessment, pdf_path, bundesland=bundesland)
|
||
|
||
await update_job(
|
||
job_id,
|
||
status="completed",
|
||
result=assessment.model_dump_json(),
|
||
html_path=str(html_path),
|
||
pdf_path=str(pdf_path),
|
||
)
|
||
except Exception as e:
|
||
# Volltext-Stack via logger.exception, NICHT via print — landet so im
|
||
# strukturierten Container-Log und wird vom logging-Framework formatiert
|
||
logger.exception("run_drucksache_analysis failed for drucksache=%s", drucksache)
|
||
await update_job(job_id, status="failed", error=str(e))
|
||
|
||
|
||
# API: List available Bundesländer
|
||
@app.get("/api/bundeslaender")
|
||
async def list_bundeslaender():
|
||
"""List available bundesländer with their status.
|
||
|
||
Includes the synthetic "ALL" / Bundesweit entry as the first item so
|
||
that the frontend can render it directly. ``parlament_name`` is added
|
||
so the detail view can show the source parliament without an extra
|
||
backend round-trip.
|
||
"""
|
||
out = [{
|
||
"code": "ALL",
|
||
"name": "🌍 Bundesweit",
|
||
"parlament_name": None,
|
||
"active": True,
|
||
}]
|
||
out.extend({
|
||
"code": bl.code,
|
||
"name": bl.name,
|
||
"parlament_name": bl.parlament_name,
|
||
"active": bl.aktiv,
|
||
} for bl in alle_bundeslaender())
|
||
return out
|
||
|
||
|
||
# === Quellen / Programme ===
|
||
|
||
@app.get("/quellen", response_class=HTMLResponse)
|
||
async def quellen_page(request: Request):
|
||
"""Quellen-Seite mit allen Wahl- und Parteiprogrammen."""
|
||
programmes = get_programme_info()
|
||
status = get_indexing_status()
|
||
|
||
return templates.TemplateResponse("quellen.html", {
|
||
"request": request,
|
||
"app_name": settings.app_name,
|
||
"programmes": programmes,
|
||
"status": status,
|
||
})
|
||
|
||
|
||
@app.get("/api/wahlprogramm-cite")
|
||
async def wahlprogramm_cite(pid: str = "", pdf: str = "", seite: int = 1, q: str = ""):
|
||
"""Render eine Wahlprogramm-Seite mit gelb hervorgehobener Zitat-Stelle.
|
||
|
||
Issue #47: Klick auf eine Zitat-Quelle im Report soll direkt zur
|
||
Stelle im Wahlprogramm-PDF springen, mit dem zitierten Snippet
|
||
visuell markiert. Statt das ganze PDF auszuliefern (Browser scrollt
|
||
auf #page=N und Leser muss von Hand suchen), liefern wir hier ein
|
||
1-Seiten-PDF mit ``add_highlight_annot``-Annotation auf den per
|
||
``page.search_for`` gefundenen Bounding-Boxes.
|
||
|
||
Akzeptiert ``pid`` (PROGRAMME-Key) ODER ``pdf`` (Dateiname wie
|
||
``spd-grundsatzprogramm.pdf``). Letzterer ermöglicht die retroaktive
|
||
Nutzung von Pre-#47-URLs im Frontend, wo nur der statische Pfad
|
||
``/static/referenzen/<pdf>#page=<N>`` gespeichert ist.
|
||
|
||
Security: ``pid`` muss ein registrierter PROGRAMME-Key sein —
|
||
verhindert Path-Traversal und arbiträren File-Read aus dem
|
||
referenzen-Verzeichnis. ``seite`` wird per Pydantic-Coercion
|
||
auf int gezwungen. ``q`` ist auf 200 Zeichen begrenzt im Renderer.
|
||
"""
|
||
# Reverse-Lookup: pdf-Filename → programm_id, falls nur pdf angegeben.
|
||
if not pid and pdf:
|
||
for p, info in PROGRAMME.items():
|
||
if info.get("pdf") == pdf:
|
||
pid = p
|
||
break
|
||
if pid not in PROGRAMME:
|
||
raise HTTPException(status_code=404, detail="Unbekanntes Wahlprogramm")
|
||
if seite < 1 or seite > 2000:
|
||
raise HTTPException(status_code=400, detail="Ungültige Seitennummer")
|
||
|
||
pdf_bytes = render_highlighted_page(pid, seite, q)
|
||
if pdf_bytes is None:
|
||
raise HTTPException(
|
||
status_code=404,
|
||
detail="Wahlprogramm-PDF oder Seite nicht verfügbar",
|
||
)
|
||
|
||
info = PROGRAMME[pid]
|
||
safe_name = info.get("pdf", f"{pid}.pdf")
|
||
return Response(
|
||
content=pdf_bytes,
|
||
media_type="application/pdf",
|
||
headers={
|
||
"Content-Disposition": f'inline; filename="{safe_name}"',
|
||
"Cache-Control": "public, max-age=86400",
|
||
},
|
||
)
|
||
|
||
|
||
@app.get("/api/programme")
|
||
async def list_programme():
|
||
"""List all available programmes."""
|
||
return get_programme_info()
|
||
|
||
|
||
@app.get("/api/programme/status")
|
||
async def programme_status():
|
||
"""Get indexing status of all programmes."""
|
||
return get_indexing_status()
|
||
|
||
|
||
@app.post("/api/programme/index")
|
||
@limiter.limit("3/minute")
|
||
async def index_programme(
|
||
request: Request,
|
||
background_tasks: BackgroundTasks,
|
||
programm_id: str = Form(None),
|
||
all_programmes: bool = Form(False),
|
||
):
|
||
"""Index programme(s) for semantic search."""
|
||
pdf_dir = static_dir / "referenzen"
|
||
|
||
if all_programmes:
|
||
# Index sequentially to avoid DB locks
|
||
async def index_all_sequential():
|
||
for prog_id in PROGRAMME.keys():
|
||
try:
|
||
index_programm(prog_id, pdf_dir)
|
||
except Exception:
|
||
logger.exception("Error indexing programme %s", prog_id)
|
||
background_tasks.add_task(index_all_sequential)
|
||
return {"status": "indexing", "programmes": list(PROGRAMME.keys())}
|
||
|
||
if programm_id and programm_id in PROGRAMME:
|
||
background_tasks.add_task(index_programm, programm_id, pdf_dir)
|
||
return {"status": "indexing", "programm_id": programm_id}
|
||
|
||
raise HTTPException(status_code=400, detail="Ungültiges Programm")
|
||
|
||
|
||
# ─────────────────────────────────────────────────────────────────────────────
|
||
# Auswertungen #58 — Bundesland × Partei × Wahlperiode Aggregations-Sicht
|
||
# ─────────────────────────────────────────────────────────────────────────────
|
||
|
||
|
||
@app.get("/auswertungen", response_class=HTMLResponse)
|
||
async def auswertungen_page(request: Request):
|
||
"""Statische Seite, die die Matrix-Endpoints per fetch() lädt."""
|
||
from .wahlperioden import all_wahlperioden
|
||
return templates.TemplateResponse("auswertungen.html", {
|
||
"request": request,
|
||
"app_name": settings.app_name,
|
||
"wahlperioden": sorted(all_wahlperioden()),
|
||
})
|
||
|
||
|
||
@app.get("/api/auswertungen/matrix")
|
||
async def auswertungen_matrix(wahlperiode: Optional[str] = None):
|
||
"""2D-Matrix Bundesland × Partei mit Anzahl + Ø-GWÖ-Score."""
|
||
from .auswertungen import aggregate_matrix
|
||
return aggregate_matrix(filter_wp=wahlperiode)
|
||
|
||
|
||
@app.get("/api/auswertungen/zeitreihe")
|
||
async def auswertungen_zeitreihe(bundesland: str, partei: str):
|
||
"""Score-Verlauf einer (BL, Partei)-Kombination über alle WPs."""
|
||
from .auswertungen import aggregate_zeitreihe
|
||
return aggregate_zeitreihe(bundesland, partei)
|
||
|
||
|
||
@app.get("/api/auswertungen/export.csv")
|
||
async def auswertungen_export_csv():
|
||
"""Long-Format-CSV-Export aller Assessments. Deckt #45 mit ab."""
|
||
from .auswertungen import export_long_format
|
||
csv_text = export_long_format()
|
||
return Response(
|
||
content=csv_text,
|
||
media_type="text/csv",
|
||
headers={"Content-Disposition": 'attachment; filename="gwoe-assessments.csv"'},
|
||
)
|
||
|
||
|
||
# Health check
|
||
@app.get("/health")
|
||
async def health():
|
||
return {"status": "ok", "version": settings.app_version}
|