diff --git a/app/main.py b/app/main.py index 5a4c551..9b978e6 100644 --- a/app/main.py +++ b/app/main.py @@ -5,7 +5,8 @@ import uuid from pathlib import Path from typing import Optional -from fastapi import FastAPI, File, Form, UploadFile, Request, BackgroundTasks, HTTPException, Depends +from fastapi import FastAPI, File, Form, UploadFile, Request, BackgroundTasks, HTTPException, Depends, Query +import urllib.parse from fastapi.responses import HTMLResponse, FileResponse, JSONResponse, Response from pydantic import BaseModel from starlette.middleware.base import BaseHTTPMiddleware @@ -2026,6 +2027,81 @@ async def quellen_page(request: Request, current_user: Optional[dict] = Depends( }) +@app.get("/api/quellen/search") +async def quellen_search( + request: Request, + q: str = Query(..., min_length=2, max_length=200, description="Suchbegriff"), + filter: str = Query("current", regex="^(current|all)$"), + bundesland: Optional[str] = None, + partei: Optional[str] = None, + top_k: int = Query(20, ge=1, le=50), +): + """Semantische Volltextsuche über alle indizierten Wahlprogramme. + + Nutzt die DashScope-Embeddings (text-embedding-v4): wortunscharf, + findet auch synonyme/verwandte Begriffe. Filter: + - ``filter=current``: nur aktuelle Programme (gueltig_bis IS NULL) + - ``filter=all``: auch historische Programme + - ``bundesland`` / ``partei``: optionale Eingrenzung + + Returns: JSON mit Trefferliste {name, partei, bundesland, seite, text, + gueltig_ab, gueltig_bis, similarity, pdf_url, programm_id}. + """ + from .embeddings import find_relevant_chunks + from .programme import get_programm + + try: + chunks = find_relevant_chunks( + query=q, + parteien=[partei] if partei else None, + bundesland=bundesland, + top_k=top_k * 2, # mehr holen, danach historisch-Filter + min_similarity=0.30, + ) + except Exception: + logger.exception("quellen_search: embedding failed") + return JSONResponse({"error": "Suche fehlgeschlagen"}, status_code=500) + + results = [] + for c in chunks: + pid = c.get("programm_id") + prog = get_programm(pid) if pid else None + if prog is None: + continue + if filter == "current" and prog.get("gueltig_bis") is not None: + continue # historisches Programm ausgefiltert + seite = c.get("seite") or 1 + text = (c.get("text") or "").strip() + if len(text) > 320: + text = text[:317].rstrip() + "…" + results.append({ + "programm_id": pid, + "name": prog.get("name", pid), + "partei": prog.get("partei"), + "bundesland": prog.get("bundesland"), + "typ": prog.get("typ"), + "wp": prog.get("wp"), + "seite": seite, + "text": text, + "gueltig_ab": prog.get("gueltig_ab"), + "gueltig_bis": prog.get("gueltig_bis"), + "similarity": round(float(c.get("similarity", 0.0)), 4), + "pdf_url": ( + f"/api/wahlprogramm-cite?pid={pid}&seite={seite}" + f"&q={urllib.parse.quote_plus(q)[:160]}#page={seite}" + ), + }) + if len(results) >= top_k: + break + + return JSONResponse({ + "query": q, + "filter": filter, + "n_results": len(results), + "results": results, + }) + + @app.get("/api/wahlprogramm-cite") async def wahlprogramm_cite( request: Request, diff --git a/app/templates/v2/screens/quellen.html b/app/templates/v2/screens/quellen.html index 3e9be98..a75dd93 100644 --- a/app/templates/v2/screens/quellen.html +++ b/app/templates/v2/screens/quellen.html @@ -70,6 +70,81 @@ padding-bottom: 4px; border-bottom: 2px solid var(--ecg-teal); } +.search-box { + display: flex; + gap: 8px; + flex-wrap: wrap; + align-items: center; + margin: 8px 0 4px; +} +.search-box input[type="text"] { + flex: 1 1 240px; + min-width: 200px; + padding: 8px 10px; + font-family: var(--font-mono); + font-size: 13px; + border: 1px solid var(--ecg-border); + border-radius: 4px; + background: var(--ecg-card-bg); + color: var(--ecg-dark); +} +.search-box button { + padding: 8px 16px; + font-family: var(--font-mono); + font-size: 12px; + background: var(--ecg-teal); + color: #fff; + border: none; + border-radius: 4px; + cursor: pointer; +} +.search-box button:disabled { opacity: 0.5; cursor: wait; } +.search-filter { + display: inline-flex; + gap: 14px; + font-size: 12px; + font-family: var(--font-mono); +} +.search-filter label { cursor: pointer; } +.search-results { margin-top: 12px; } +.search-hit { + padding: 10px 12px; + margin-bottom: 8px; + background: var(--ecg-card-bg); + border: 1px solid var(--ecg-border); + border-radius: 4px; + border-left: 3px solid var(--ecg-teal); +} +.search-hit.historic { border-left-color: var(--ecg-dark); opacity: 0.85; } +.search-hit-meta { + font-family: var(--font-mono); + font-size: 11px; + opacity: 0.7; + margin-top: 2px; + margin-bottom: 6px; +} +.search-hit-text { + font-size: 13px; + line-height: 1.5; + color: var(--ecg-dark); +} +.search-hit-actions { + margin-top: 6px; + font-size: 11px; + font-family: var(--font-mono); +} +.search-hit-actions a { color: var(--ecg-teal); margin-right: 12px; } +.search-status { font-size: 12px; opacity: 0.7; margin-top: 8px; font-family: var(--font-mono); } +.gueltig-pill { + display: inline-block; + padding: 1px 6px; + font-size: 10px; + font-family: var(--font-mono); + border-radius: 3px; + margin-left: 4px; +} +.gueltig-pill.aktuell { background: var(--ecg-green); color: #fff; } +.gueltig-pill.historisch { background: var(--ecg-dark); color: #fff; opacity: 0.7; } {% endblock %} @@ -113,6 +188,27 @@ + +
+ Semantische Suche über alle indizierten Wahl- und Grundsatzprogramme. + Wortunscharf — Endungen sind egal, verwandte Begriffe werden ebenfalls + gefunden. +
+ +