2026-03-28 22:30:24 +01:00
|
|
|
"""SQLite database for job tracking."""
|
|
|
|
|
|
|
|
|
|
import aiosqlite
|
|
|
|
|
from datetime import datetime
|
|
|
|
|
from typing import Optional
|
|
|
|
|
|
|
|
|
|
from .config import settings
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async def init_db():
|
|
|
|
|
"""Initialize database with tables."""
|
|
|
|
|
async with aiosqlite.connect(settings.db_path) as db:
|
|
|
|
|
await db.execute("""
|
|
|
|
|
CREATE TABLE IF NOT EXISTS jobs (
|
|
|
|
|
id TEXT PRIMARY KEY,
|
|
|
|
|
status TEXT NOT NULL DEFAULT 'queued',
|
|
|
|
|
input_preview TEXT,
|
|
|
|
|
bundesland TEXT DEFAULT 'NRW',
|
|
|
|
|
model TEXT DEFAULT 'qwen-plus',
|
|
|
|
|
result TEXT,
|
|
|
|
|
html_path TEXT,
|
|
|
|
|
pdf_path TEXT,
|
|
|
|
|
error TEXT,
|
|
|
|
|
user_id TEXT,
|
|
|
|
|
created_at TEXT NOT NULL,
|
|
|
|
|
updated_at TEXT NOT NULL
|
|
|
|
|
)
|
|
|
|
|
""")
|
|
|
|
|
|
|
|
|
|
# Assessments table for pre-computed + new analyses
|
|
|
|
|
await db.execute("""
|
|
|
|
|
CREATE TABLE IF NOT EXISTS assessments (
|
|
|
|
|
drucksache TEXT PRIMARY KEY,
|
|
|
|
|
title TEXT,
|
|
|
|
|
fraktionen TEXT, -- JSON array
|
|
|
|
|
datum TEXT,
|
|
|
|
|
link TEXT,
|
|
|
|
|
bundesland TEXT DEFAULT 'NRW',
|
|
|
|
|
|
|
|
|
|
gwoe_score REAL,
|
|
|
|
|
gwoe_begruendung TEXT,
|
|
|
|
|
gwoe_matrix TEXT, -- JSON array
|
|
|
|
|
gwoe_schwerpunkt TEXT, -- JSON array
|
|
|
|
|
|
|
|
|
|
wahlprogramm_scores TEXT, -- JSON array
|
|
|
|
|
|
|
|
|
|
verbesserungen TEXT, -- JSON array
|
|
|
|
|
staerken TEXT, -- JSON array
|
|
|
|
|
schwaechen TEXT, -- JSON array
|
|
|
|
|
|
|
|
|
|
empfehlung TEXT,
|
|
|
|
|
empfehlung_symbol TEXT,
|
|
|
|
|
verbesserungspotenzial TEXT,
|
|
|
|
|
|
|
|
|
|
themen TEXT, -- JSON array
|
|
|
|
|
antrag_zusammenfassung TEXT,
|
|
|
|
|
antrag_kernpunkte TEXT, -- JSON array
|
|
|
|
|
|
|
|
|
|
source TEXT DEFAULT 'batch', -- 'batch' or 'webapp'
|
|
|
|
|
model TEXT,
|
|
|
|
|
created_at TEXT NOT NULL,
|
|
|
|
|
updated_at TEXT NOT NULL
|
|
|
|
|
)
|
|
|
|
|
""")
|
|
|
|
|
|
|
|
|
|
await db.commit()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async def create_job(
|
|
|
|
|
job_id: str,
|
|
|
|
|
input_preview: str,
|
|
|
|
|
bundesland: str = "NRW",
|
|
|
|
|
model: str = "qwen-plus",
|
|
|
|
|
user_id: Optional[str] = None,
|
|
|
|
|
) -> dict:
|
|
|
|
|
"""Create a new analysis job."""
|
|
|
|
|
now = datetime.utcnow().isoformat()
|
|
|
|
|
async with aiosqlite.connect(settings.db_path) as db:
|
|
|
|
|
await db.execute(
|
|
|
|
|
"""
|
|
|
|
|
INSERT INTO jobs (id, input_preview, bundesland, model, user_id, created_at, updated_at)
|
|
|
|
|
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
|
|
|
""",
|
|
|
|
|
(job_id, input_preview, bundesland, model, user_id, now, now),
|
|
|
|
|
)
|
|
|
|
|
await db.commit()
|
|
|
|
|
return {"id": job_id, "status": "queued", "created_at": now}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async def get_job(job_id: str) -> Optional[dict]:
|
|
|
|
|
"""Get job by ID."""
|
|
|
|
|
async with aiosqlite.connect(settings.db_path) as db:
|
|
|
|
|
db.row_factory = aiosqlite.Row
|
|
|
|
|
cursor = await db.execute("SELECT * FROM jobs WHERE id = ?", (job_id,))
|
|
|
|
|
row = await cursor.fetchone()
|
|
|
|
|
if row:
|
|
|
|
|
return dict(row)
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async def update_job(job_id: str, **kwargs) -> bool:
|
|
|
|
|
"""Update job fields."""
|
|
|
|
|
if not kwargs:
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
kwargs["updated_at"] = datetime.utcnow().isoformat()
|
|
|
|
|
|
|
|
|
|
fields = ", ".join(f"{k} = ?" for k in kwargs.keys())
|
|
|
|
|
values = list(kwargs.values()) + [job_id]
|
|
|
|
|
|
|
|
|
|
async with aiosqlite.connect(settings.db_path) as db:
|
|
|
|
|
await db.execute(f"UPDATE jobs SET {fields} WHERE id = ?", values)
|
|
|
|
|
await db.commit()
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async def get_user_jobs(user_id: str, limit: int = 50) -> list[dict]:
|
|
|
|
|
"""Get jobs for a user (for history page)."""
|
|
|
|
|
async with aiosqlite.connect(settings.db_path) as db:
|
|
|
|
|
db.row_factory = aiosqlite.Row
|
|
|
|
|
cursor = await db.execute(
|
|
|
|
|
"SELECT * FROM jobs WHERE user_id = ? ORDER BY created_at DESC LIMIT ?",
|
|
|
|
|
(user_id, limit),
|
|
|
|
|
)
|
|
|
|
|
rows = await cursor.fetchall()
|
|
|
|
|
return [dict(row) for row in rows]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async def upsert_assessment(data: dict) -> bool:
|
|
|
|
|
"""Insert or update an assessment."""
|
|
|
|
|
import json
|
|
|
|
|
now = datetime.utcnow().isoformat()
|
|
|
|
|
|
|
|
|
|
async with aiosqlite.connect(settings.db_path) as db:
|
|
|
|
|
await db.execute("""
|
|
|
|
|
INSERT INTO assessments (
|
|
|
|
|
drucksache, title, fraktionen, datum, link, bundesland,
|
|
|
|
|
gwoe_score, gwoe_begruendung, gwoe_matrix, gwoe_schwerpunkt,
|
|
|
|
|
wahlprogramm_scores, verbesserungen, staerken, schwaechen,
|
|
|
|
|
empfehlung, empfehlung_symbol, verbesserungspotenzial,
|
|
|
|
|
themen, antrag_zusammenfassung, antrag_kernpunkte,
|
|
|
|
|
source, model, created_at, updated_at
|
|
|
|
|
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
|
|
|
ON CONFLICT(drucksache) DO UPDATE SET
|
|
|
|
|
title = excluded.title,
|
|
|
|
|
gwoe_score = excluded.gwoe_score,
|
|
|
|
|
gwoe_begruendung = excluded.gwoe_begruendung,
|
|
|
|
|
gwoe_matrix = excluded.gwoe_matrix,
|
|
|
|
|
updated_at = excluded.updated_at
|
|
|
|
|
""", (
|
|
|
|
|
data.get("drucksache"),
|
|
|
|
|
data.get("title"),
|
|
|
|
|
json.dumps(data.get("fraktionen", [])),
|
|
|
|
|
data.get("datum"),
|
|
|
|
|
data.get("link"),
|
|
|
|
|
data.get("bundesland", "NRW"),
|
|
|
|
|
data.get("gwoeScore"),
|
|
|
|
|
data.get("gwoeBegründung"),
|
|
|
|
|
json.dumps(data.get("gwoeMatrix", [])),
|
|
|
|
|
json.dumps(data.get("gwoeSchwerpunkt", [])),
|
|
|
|
|
json.dumps(data.get("wahlprogrammScores", [])),
|
|
|
|
|
json.dumps(data.get("verbesserungen", [])),
|
|
|
|
|
json.dumps(data.get("stärken", [])),
|
|
|
|
|
json.dumps(data.get("schwächen", [])),
|
|
|
|
|
data.get("empfehlung"),
|
|
|
|
|
data.get("empfehlungSymbol"),
|
|
|
|
|
data.get("verbesserungspotenzial"),
|
|
|
|
|
json.dumps(data.get("themen", [])),
|
|
|
|
|
data.get("antragZusammenfassung"),
|
|
|
|
|
json.dumps(data.get("antragKernpunkte", [])),
|
|
|
|
|
data.get("source", "webapp"),
|
|
|
|
|
data.get("model"),
|
|
|
|
|
now, now
|
|
|
|
|
))
|
|
|
|
|
await db.commit()
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async def get_assessment(drucksache: str) -> Optional[dict]:
|
|
|
|
|
"""Get assessment by drucksache ID."""
|
|
|
|
|
import json
|
|
|
|
|
async with aiosqlite.connect(settings.db_path) as db:
|
|
|
|
|
db.row_factory = aiosqlite.Row
|
|
|
|
|
cursor = await db.execute(
|
|
|
|
|
"SELECT * FROM assessments WHERE drucksache = ?", (drucksache,)
|
|
|
|
|
)
|
|
|
|
|
row = await cursor.fetchone()
|
|
|
|
|
if row:
|
|
|
|
|
d = dict(row)
|
|
|
|
|
# Parse JSON fields
|
|
|
|
|
for field in ["fraktionen", "gwoe_matrix", "gwoe_schwerpunkt",
|
|
|
|
|
"wahlprogramm_scores", "verbesserungen", "staerken",
|
|
|
|
|
"schwaechen", "themen", "antrag_kernpunkte"]:
|
|
|
|
|
if d.get(field):
|
|
|
|
|
try:
|
|
|
|
|
d[field] = json.loads(d[field])
|
|
|
|
|
except:
|
|
|
|
|
pass
|
|
|
|
|
return d
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
Bundesland filter & transparency: stringent split + visible source (#8)
Brings the Bundesland-Dropdown from a cosmetic header widget to a real
filter that propagates through every layer (Listing, internal search,
statistics, party/tag filters, upload mode), and at the same time makes
the source parliament visible in every place where assessments from
multiple bundesländer can be mixed.
Backend
- database.get_all_assessments(bundesland=None) — new optional filter,
"ALL" treated as None.
- database.search_assessments — bug fix: previous `if bundesland:`
branch incorrectly added a `WHERE bundesland='ALL'` clause; now
guarded with `bundesland and bundesland != "ALL"`.
- main.list_assessments — accepts ?bundesland= query param, includes the
bundesland field in the response so the frontend can render badges.
- main.get_single_assessment — also includes bundesland in the response
so the detail header can show the source parlament.
- main.search_landtag — early HTTP 400 when bundesland is missing or
"ALL"; the live Landtag adapter cannot serve a synthetic Bundesweit
request.
- main.index() and main.list_bundeslaender — synthetic "🌍 Bundesweit"
entry prepended to the bundesländer list (kept out of bundeslaender.py
on purpose — ALL is not a real state). Both endpoints additionally
expose a parlament_names map so the frontend can render the source
parliament without an extra round-trip.
Report (PDF + HTML)
- generate_html_report / generate_pdf_report — new optional bundesland
parameter. When set, the report header carries the parliament name
("Landtag von Sachsen-Anhalt", "Landtag Nordrhein-Westfalen", …)
beside the title. Three call sites updated: run_analysis,
run_drucksache_analysis, download_assessment_pdf.
Frontend (templates/index.html)
- Header dropdown gets the synthetic ALL entry as first option;
initial currentBundesland is now 'ALL' (was 'NRW').
- localStorage persistence: changeBundesland writes, DOMContentLoaded
reads and validates against the visible options.
- changeBundesland resets the score / party / tag filter state, syncs
the upload-mode bundesland select, disables the Landtag-Suche button
+ tooltip when ALL, and toggles a data-mode attribute on
.list-content (used by CSS to show/hide the per-item bundesland
badge).
- loadAssessments now sends ?bundesland=… so the API does the actual
filtering. updateStats renders an additional per-bundesland average
block (Ø NRW: x · Ø LSA: y) when in ALL mode and the loaded list
spans more than one bundesland.
- renderList prepends a small "bl-badge" beside the Drucksachen-Nummer.
Hidden in single-bundesland mode via CSS selector to avoid clutter.
- showDetail header now shows the parliament name as its own line
(.detail-parlament).
- searchLandtag has an early-out alert if currentBundesland === 'ALL',
saving a network round-trip.
- Upload-Mode bundesland select now starts with a "— Bundesland wählen
—" placeholder (no auto-default), and startAnalysis validates that a
concrete bundesland was chosen.
CSS
- .bl-badge plus the .list-content[data-mode="single"] hide rule.
- .detail-parlament for the detail header line.
- .header-parlament for the PDF report header line.
Resolves #8.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-07 23:00:39 +02:00
|
|
|
async def get_all_assessments(bundesland: str = None) -> list[dict]:
|
|
|
|
|
"""Get all assessments from database, optionally filtered by Bundesland.
|
|
|
|
|
|
|
|
|
|
The special value ``"ALL"`` and ``None`` mean no filter — both behave
|
|
|
|
|
identically and return every row. Any other value becomes a strict
|
|
|
|
|
``WHERE bundesland = ?`` match.
|
|
|
|
|
"""
|
2026-03-28 22:30:24 +01:00
|
|
|
import json
|
Bundesland filter & transparency: stringent split + visible source (#8)
Brings the Bundesland-Dropdown from a cosmetic header widget to a real
filter that propagates through every layer (Listing, internal search,
statistics, party/tag filters, upload mode), and at the same time makes
the source parliament visible in every place where assessments from
multiple bundesländer can be mixed.
Backend
- database.get_all_assessments(bundesland=None) — new optional filter,
"ALL" treated as None.
- database.search_assessments — bug fix: previous `if bundesland:`
branch incorrectly added a `WHERE bundesland='ALL'` clause; now
guarded with `bundesland and bundesland != "ALL"`.
- main.list_assessments — accepts ?bundesland= query param, includes the
bundesland field in the response so the frontend can render badges.
- main.get_single_assessment — also includes bundesland in the response
so the detail header can show the source parlament.
- main.search_landtag — early HTTP 400 when bundesland is missing or
"ALL"; the live Landtag adapter cannot serve a synthetic Bundesweit
request.
- main.index() and main.list_bundeslaender — synthetic "🌍 Bundesweit"
entry prepended to the bundesländer list (kept out of bundeslaender.py
on purpose — ALL is not a real state). Both endpoints additionally
expose a parlament_names map so the frontend can render the source
parliament without an extra round-trip.
Report (PDF + HTML)
- generate_html_report / generate_pdf_report — new optional bundesland
parameter. When set, the report header carries the parliament name
("Landtag von Sachsen-Anhalt", "Landtag Nordrhein-Westfalen", …)
beside the title. Three call sites updated: run_analysis,
run_drucksache_analysis, download_assessment_pdf.
Frontend (templates/index.html)
- Header dropdown gets the synthetic ALL entry as first option;
initial currentBundesland is now 'ALL' (was 'NRW').
- localStorage persistence: changeBundesland writes, DOMContentLoaded
reads and validates against the visible options.
- changeBundesland resets the score / party / tag filter state, syncs
the upload-mode bundesland select, disables the Landtag-Suche button
+ tooltip when ALL, and toggles a data-mode attribute on
.list-content (used by CSS to show/hide the per-item bundesland
badge).
- loadAssessments now sends ?bundesland=… so the API does the actual
filtering. updateStats renders an additional per-bundesland average
block (Ø NRW: x · Ø LSA: y) when in ALL mode and the loaded list
spans more than one bundesland.
- renderList prepends a small "bl-badge" beside the Drucksachen-Nummer.
Hidden in single-bundesland mode via CSS selector to avoid clutter.
- showDetail header now shows the parliament name as its own line
(.detail-parlament).
- searchLandtag has an early-out alert if currentBundesland === 'ALL',
saving a network round-trip.
- Upload-Mode bundesland select now starts with a "— Bundesland wählen
—" placeholder (no auto-default), and startAnalysis validates that a
concrete bundesland was chosen.
CSS
- .bl-badge plus the .list-content[data-mode="single"] hide rule.
- .detail-parlament for the detail header line.
- .header-parlament for the PDF report header line.
Resolves #8.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-07 23:00:39 +02:00
|
|
|
sql = "SELECT * FROM assessments"
|
|
|
|
|
params: list = []
|
|
|
|
|
if bundesland and bundesland != "ALL":
|
|
|
|
|
sql += " WHERE bundesland = ?"
|
|
|
|
|
params.append(bundesland)
|
|
|
|
|
sql += " ORDER BY gwoe_score DESC"
|
|
|
|
|
|
2026-03-28 22:30:24 +01:00
|
|
|
async with aiosqlite.connect(settings.db_path) as db:
|
|
|
|
|
db.row_factory = aiosqlite.Row
|
Bundesland filter & transparency: stringent split + visible source (#8)
Brings the Bundesland-Dropdown from a cosmetic header widget to a real
filter that propagates through every layer (Listing, internal search,
statistics, party/tag filters, upload mode), and at the same time makes
the source parliament visible in every place where assessments from
multiple bundesländer can be mixed.
Backend
- database.get_all_assessments(bundesland=None) — new optional filter,
"ALL" treated as None.
- database.search_assessments — bug fix: previous `if bundesland:`
branch incorrectly added a `WHERE bundesland='ALL'` clause; now
guarded with `bundesland and bundesland != "ALL"`.
- main.list_assessments — accepts ?bundesland= query param, includes the
bundesland field in the response so the frontend can render badges.
- main.get_single_assessment — also includes bundesland in the response
so the detail header can show the source parlament.
- main.search_landtag — early HTTP 400 when bundesland is missing or
"ALL"; the live Landtag adapter cannot serve a synthetic Bundesweit
request.
- main.index() and main.list_bundeslaender — synthetic "🌍 Bundesweit"
entry prepended to the bundesländer list (kept out of bundeslaender.py
on purpose — ALL is not a real state). Both endpoints additionally
expose a parlament_names map so the frontend can render the source
parliament without an extra round-trip.
Report (PDF + HTML)
- generate_html_report / generate_pdf_report — new optional bundesland
parameter. When set, the report header carries the parliament name
("Landtag von Sachsen-Anhalt", "Landtag Nordrhein-Westfalen", …)
beside the title. Three call sites updated: run_analysis,
run_drucksache_analysis, download_assessment_pdf.
Frontend (templates/index.html)
- Header dropdown gets the synthetic ALL entry as first option;
initial currentBundesland is now 'ALL' (was 'NRW').
- localStorage persistence: changeBundesland writes, DOMContentLoaded
reads and validates against the visible options.
- changeBundesland resets the score / party / tag filter state, syncs
the upload-mode bundesland select, disables the Landtag-Suche button
+ tooltip when ALL, and toggles a data-mode attribute on
.list-content (used by CSS to show/hide the per-item bundesland
badge).
- loadAssessments now sends ?bundesland=… so the API does the actual
filtering. updateStats renders an additional per-bundesland average
block (Ø NRW: x · Ø LSA: y) when in ALL mode and the loaded list
spans more than one bundesland.
- renderList prepends a small "bl-badge" beside the Drucksachen-Nummer.
Hidden in single-bundesland mode via CSS selector to avoid clutter.
- showDetail header now shows the parliament name as its own line
(.detail-parlament).
- searchLandtag has an early-out alert if currentBundesland === 'ALL',
saving a network round-trip.
- Upload-Mode bundesland select now starts with a "— Bundesland wählen
—" placeholder (no auto-default), and startAnalysis validates that a
concrete bundesland was chosen.
CSS
- .bl-badge plus the .list-content[data-mode="single"] hide rule.
- .detail-parlament for the detail header line.
- .header-parlament for the PDF report header line.
Resolves #8.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-07 23:00:39 +02:00
|
|
|
cursor = await db.execute(sql, params)
|
2026-03-28 22:30:24 +01:00
|
|
|
rows = await cursor.fetchall()
|
|
|
|
|
results = []
|
|
|
|
|
for row in rows:
|
|
|
|
|
d = dict(row)
|
|
|
|
|
# Parse JSON fields
|
|
|
|
|
for field in ["fraktionen", "gwoe_matrix", "gwoe_schwerpunkt",
|
|
|
|
|
"wahlprogramm_scores", "verbesserungen", "staerken",
|
|
|
|
|
"schwaechen", "themen", "antrag_kernpunkte"]:
|
|
|
|
|
if d.get(field):
|
|
|
|
|
try:
|
|
|
|
|
d[field] = json.loads(d[field])
|
|
|
|
|
except:
|
|
|
|
|
pass
|
|
|
|
|
results.append(d)
|
|
|
|
|
return results
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async def import_json_assessments(assessments_dir):
|
|
|
|
|
"""Import assessments from JSON files into database."""
|
|
|
|
|
import json
|
|
|
|
|
from pathlib import Path
|
|
|
|
|
|
|
|
|
|
dir_path = Path(assessments_dir)
|
|
|
|
|
if not dir_path.exists():
|
|
|
|
|
return 0
|
|
|
|
|
|
|
|
|
|
count = 0
|
|
|
|
|
for f in dir_path.glob("*.json"):
|
|
|
|
|
try:
|
|
|
|
|
data = json.loads(f.read_text())
|
|
|
|
|
data["source"] = "batch"
|
|
|
|
|
await upsert_assessment(data)
|
|
|
|
|
count += 1
|
|
|
|
|
except Exception as e:
|
|
|
|
|
print(f"Error importing {f}: {e}")
|
|
|
|
|
|
|
|
|
|
return count
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _parse_search_query(query: str) -> tuple[list[str], bool]:
|
|
|
|
|
"""
|
|
|
|
|
Parse search query for AND logic and exact phrases.
|
|
|
|
|
Returns: (terms, is_exact)
|
|
|
|
|
|
|
|
|
|
Examples:
|
|
|
|
|
- 'Klimaschutz Energie' -> (['klimaschutz', 'energie'], False)
|
|
|
|
|
- '"Grüner Stahl"' -> (['grüner stahl'], True)
|
|
|
|
|
"""
|
|
|
|
|
query = query.strip()
|
|
|
|
|
|
|
|
|
|
# Check for exact phrase (entire query in quotes)
|
|
|
|
|
if query.startswith('"') and query.endswith('"') and query.count('"') == 2:
|
|
|
|
|
exact = query[1:-1].strip()
|
|
|
|
|
return ([exact.lower()], True)
|
|
|
|
|
|
|
|
|
|
# Extract quoted phrases and regular terms
|
Phase A: Audit-Restbefunde #57.3/4/7 (Roadmap #59)
Drei verbleibende Audit-Befunde aus #57 in einem Patch:
- **#57.3 MEDIUM** Drucksache-Regex-Validation: neue
app/validators.py mit validate_drucksache() als gemeinsamer
Validation-Funnel. Pattern ^\d{1,3}/\d{1,7}([-(].{1,20})?$ deckt
alle 10 aktiven Bundesländer (8/6390, 18/12345, 8/6390(neu),
23/3700-A) ab und blockt Path-Traversal (../, /etc/passwd) plus
Standard-Injection (;, <, &). Drei Endpoints durchgeschleust:
/api/assessment, /api/assessment/pdf, /api/analyze-drucksache.
- **#57.4 MEDIUM** print() → logging.getLogger(__name__): main.py
und analyzer.py auf strukturiertes Logging umgestellt. LLM-Inhalte
werden NICHT mehr als Volltext geloggt — neue Helper
_content_fingerprint() liefert nur "len=N sha1=XXXX", reicht zur
Forensik ohne Antrag-Inhalte ins Container-Log zu leaken.
basicConfig() mit ISO-Format setzt strukturiertes Logging früh,
damit logger.exception() auch beim Boot greift.
- **#57.7 LOW-MED** Search-Query-Limit: validate_search_query() mit
MAX_SEARCH_QUERY_LEN=200 schützt /api/search und /api/search-landtag
vor 10-MB-Query-DoS. database._parse_search_query() loggt jetzt
shlex.ValueError-Fallback statt ihn zu verschlucken (deckt Memory-
Regel "stille excepts in Adaptern" ab).
Tests: neue tests/test_main_validators.py mit 22 Cases — Drucksache-
Whitelist-Roundtrip + Path-Traversal-Reject, Search-Query Längen-
Edge-Cases. 107 Unit-Tests grün (85 alt + 22 neu).
Validators in eigenem Modul (app/validators.py), damit Tests sie ohne
slowapi-Dependency direkt importieren können.
Refs: #57, #59 (Phase A)
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-09 11:15:16 +02:00
|
|
|
import logging
|
2026-03-28 22:30:24 +01:00
|
|
|
import shlex
|
|
|
|
|
try:
|
|
|
|
|
parts = shlex.split(query)
|
Phase A: Audit-Restbefunde #57.3/4/7 (Roadmap #59)
Drei verbleibende Audit-Befunde aus #57 in einem Patch:
- **#57.3 MEDIUM** Drucksache-Regex-Validation: neue
app/validators.py mit validate_drucksache() als gemeinsamer
Validation-Funnel. Pattern ^\d{1,3}/\d{1,7}([-(].{1,20})?$ deckt
alle 10 aktiven Bundesländer (8/6390, 18/12345, 8/6390(neu),
23/3700-A) ab und blockt Path-Traversal (../, /etc/passwd) plus
Standard-Injection (;, <, &). Drei Endpoints durchgeschleust:
/api/assessment, /api/assessment/pdf, /api/analyze-drucksache.
- **#57.4 MEDIUM** print() → logging.getLogger(__name__): main.py
und analyzer.py auf strukturiertes Logging umgestellt. LLM-Inhalte
werden NICHT mehr als Volltext geloggt — neue Helper
_content_fingerprint() liefert nur "len=N sha1=XXXX", reicht zur
Forensik ohne Antrag-Inhalte ins Container-Log zu leaken.
basicConfig() mit ISO-Format setzt strukturiertes Logging früh,
damit logger.exception() auch beim Boot greift.
- **#57.7 LOW-MED** Search-Query-Limit: validate_search_query() mit
MAX_SEARCH_QUERY_LEN=200 schützt /api/search und /api/search-landtag
vor 10-MB-Query-DoS. database._parse_search_query() loggt jetzt
shlex.ValueError-Fallback statt ihn zu verschlucken (deckt Memory-
Regel "stille excepts in Adaptern" ab).
Tests: neue tests/test_main_validators.py mit 22 Cases — Drucksache-
Whitelist-Roundtrip + Path-Traversal-Reject, Search-Query Längen-
Edge-Cases. 107 Unit-Tests grün (85 alt + 22 neu).
Validators in eigenem Modul (app/validators.py), damit Tests sie ohne
slowapi-Dependency direkt importieren können.
Refs: #57, #59 (Phase A)
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-09 11:15:16 +02:00
|
|
|
except ValueError as e:
|
|
|
|
|
# Unbalanced quote — fall back to whitespace split, but log so we
|
|
|
|
|
# notice patterns of malformed queries (Issue #57 Befund #7).
|
|
|
|
|
logging.getLogger(__name__).warning(
|
|
|
|
|
"shlex.split failed on search query (%s), falling back to whitespace split", e
|
|
|
|
|
)
|
2026-03-28 22:30:24 +01:00
|
|
|
parts = query.split()
|
Phase A: Audit-Restbefunde #57.3/4/7 (Roadmap #59)
Drei verbleibende Audit-Befunde aus #57 in einem Patch:
- **#57.3 MEDIUM** Drucksache-Regex-Validation: neue
app/validators.py mit validate_drucksache() als gemeinsamer
Validation-Funnel. Pattern ^\d{1,3}/\d{1,7}([-(].{1,20})?$ deckt
alle 10 aktiven Bundesländer (8/6390, 18/12345, 8/6390(neu),
23/3700-A) ab und blockt Path-Traversal (../, /etc/passwd) plus
Standard-Injection (;, <, &). Drei Endpoints durchgeschleust:
/api/assessment, /api/assessment/pdf, /api/analyze-drucksache.
- **#57.4 MEDIUM** print() → logging.getLogger(__name__): main.py
und analyzer.py auf strukturiertes Logging umgestellt. LLM-Inhalte
werden NICHT mehr als Volltext geloggt — neue Helper
_content_fingerprint() liefert nur "len=N sha1=XXXX", reicht zur
Forensik ohne Antrag-Inhalte ins Container-Log zu leaken.
basicConfig() mit ISO-Format setzt strukturiertes Logging früh,
damit logger.exception() auch beim Boot greift.
- **#57.7 LOW-MED** Search-Query-Limit: validate_search_query() mit
MAX_SEARCH_QUERY_LEN=200 schützt /api/search und /api/search-landtag
vor 10-MB-Query-DoS. database._parse_search_query() loggt jetzt
shlex.ValueError-Fallback statt ihn zu verschlucken (deckt Memory-
Regel "stille excepts in Adaptern" ab).
Tests: neue tests/test_main_validators.py mit 22 Cases — Drucksache-
Whitelist-Roundtrip + Path-Traversal-Reject, Search-Query Längen-
Edge-Cases. 107 Unit-Tests grün (85 alt + 22 neu).
Validators in eigenem Modul (app/validators.py), damit Tests sie ohne
slowapi-Dependency direkt importieren können.
Refs: #57, #59 (Phase A)
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-09 11:15:16 +02:00
|
|
|
|
2026-03-28 22:30:24 +01:00
|
|
|
return ([p.lower() for p in parts], False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async def search_assessments(query: str, bundesland: str = None, limit: int = 50) -> list[dict]:
|
|
|
|
|
"""Search assessments by title, drucksache, or themen. Supports AND logic."""
|
|
|
|
|
import json
|
|
|
|
|
|
|
|
|
|
terms, is_exact = _parse_search_query(query)
|
|
|
|
|
|
|
|
|
|
# Build SQL for first term (to narrow down results)
|
|
|
|
|
first_term = terms[0] if terms else query.lower()
|
|
|
|
|
|
|
|
|
|
sql = """
|
|
|
|
|
SELECT * FROM assessments
|
|
|
|
|
WHERE (
|
|
|
|
|
LOWER(drucksache) LIKE ?
|
|
|
|
|
OR LOWER(title) LIKE ?
|
|
|
|
|
OR LOWER(themen) LIKE ?
|
|
|
|
|
OR LOWER(fraktionen) LIKE ?
|
|
|
|
|
)
|
|
|
|
|
"""
|
|
|
|
|
params = [f"%{first_term}%"] * 4
|
|
|
|
|
|
Bundesland filter & transparency: stringent split + visible source (#8)
Brings the Bundesland-Dropdown from a cosmetic header widget to a real
filter that propagates through every layer (Listing, internal search,
statistics, party/tag filters, upload mode), and at the same time makes
the source parliament visible in every place where assessments from
multiple bundesländer can be mixed.
Backend
- database.get_all_assessments(bundesland=None) — new optional filter,
"ALL" treated as None.
- database.search_assessments — bug fix: previous `if bundesland:`
branch incorrectly added a `WHERE bundesland='ALL'` clause; now
guarded with `bundesland and bundesland != "ALL"`.
- main.list_assessments — accepts ?bundesland= query param, includes the
bundesland field in the response so the frontend can render badges.
- main.get_single_assessment — also includes bundesland in the response
so the detail header can show the source parlament.
- main.search_landtag — early HTTP 400 when bundesland is missing or
"ALL"; the live Landtag adapter cannot serve a synthetic Bundesweit
request.
- main.index() and main.list_bundeslaender — synthetic "🌍 Bundesweit"
entry prepended to the bundesländer list (kept out of bundeslaender.py
on purpose — ALL is not a real state). Both endpoints additionally
expose a parlament_names map so the frontend can render the source
parliament without an extra round-trip.
Report (PDF + HTML)
- generate_html_report / generate_pdf_report — new optional bundesland
parameter. When set, the report header carries the parliament name
("Landtag von Sachsen-Anhalt", "Landtag Nordrhein-Westfalen", …)
beside the title. Three call sites updated: run_analysis,
run_drucksache_analysis, download_assessment_pdf.
Frontend (templates/index.html)
- Header dropdown gets the synthetic ALL entry as first option;
initial currentBundesland is now 'ALL' (was 'NRW').
- localStorage persistence: changeBundesland writes, DOMContentLoaded
reads and validates against the visible options.
- changeBundesland resets the score / party / tag filter state, syncs
the upload-mode bundesland select, disables the Landtag-Suche button
+ tooltip when ALL, and toggles a data-mode attribute on
.list-content (used by CSS to show/hide the per-item bundesland
badge).
- loadAssessments now sends ?bundesland=… so the API does the actual
filtering. updateStats renders an additional per-bundesland average
block (Ø NRW: x · Ø LSA: y) when in ALL mode and the loaded list
spans more than one bundesland.
- renderList prepends a small "bl-badge" beside the Drucksachen-Nummer.
Hidden in single-bundesland mode via CSS selector to avoid clutter.
- showDetail header now shows the parliament name as its own line
(.detail-parlament).
- searchLandtag has an early-out alert if currentBundesland === 'ALL',
saving a network round-trip.
- Upload-Mode bundesland select now starts with a "— Bundesland wählen
—" placeholder (no auto-default), and startAnalysis validates that a
concrete bundesland was chosen.
CSS
- .bl-badge plus the .list-content[data-mode="single"] hide rule.
- .detail-parlament for the detail header line.
- .header-parlament for the PDF report header line.
Resolves #8.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-07 23:00:39 +02:00
|
|
|
if bundesland and bundesland != "ALL":
|
2026-03-28 22:30:24 +01:00
|
|
|
sql += " AND bundesland = ?"
|
|
|
|
|
params.append(bundesland)
|
|
|
|
|
|
|
|
|
|
sql += " ORDER BY gwoe_score DESC LIMIT ?"
|
|
|
|
|
params.append(limit)
|
|
|
|
|
|
|
|
|
|
async with aiosqlite.connect(settings.db_path) as db:
|
|
|
|
|
db.row_factory = aiosqlite.Row
|
|
|
|
|
cursor = await db.execute(sql, params)
|
|
|
|
|
rows = await cursor.fetchall()
|
|
|
|
|
|
|
|
|
|
results = []
|
|
|
|
|
for row in rows:
|
|
|
|
|
d = dict(row)
|
|
|
|
|
for field in ["fraktionen", "gwoe_matrix", "gwoe_schwerpunkt",
|
|
|
|
|
"wahlprogramm_scores", "verbesserungen", "staerken",
|
|
|
|
|
"schwaechen", "themen", "antrag_kernpunkte"]:
|
|
|
|
|
if d.get(field):
|
|
|
|
|
try:
|
|
|
|
|
d[field] = json.loads(d[field])
|
|
|
|
|
except:
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
# Apply AND filter for multiple terms
|
|
|
|
|
if len(terms) > 1 or is_exact:
|
|
|
|
|
searchable = f"{d.get('title', '')} {d.get('drucksache', '')} {' '.join(d.get('fraktionen', []))} {' '.join(d.get('themen', []))}".lower()
|
|
|
|
|
if is_exact:
|
|
|
|
|
if terms[0] not in searchable:
|
|
|
|
|
continue
|
|
|
|
|
else:
|
|
|
|
|
if not all(term in searchable for term in terms):
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
results.append(d)
|
|
|
|
|
|
|
|
|
|
return results
|