feat: FTS5 Volltextsuche mit Snippet-Highlighting (#11)

- SQLite FTS5 Virtual Table (vorlagen_fts) über Betreff, Volltext, Zusammenfassung
- Tokenizer unicode61 mit Diakritik-Entfernung
- API: FTS5 MATCH statt LIKE, LIKE als Fallback wenn kein Index
- Snippet-Highlights (<mark>) in Suchergebnissen
- Migrations-Script: scripts/migrate_fts5.py
- 22.337 Einträge indexiert, ~4.200 Treffer für 'Klimaschutz'

Closes #11
This commit is contained in:
Dotty Dotter 2026-04-01 13:17:27 +02:00
parent 1801f8f7fd
commit 2ab8046b78
3 changed files with 146 additions and 15 deletions

View File

@ -54,16 +54,31 @@ def list_vorlagen(
params.append(partei) params.append(partei)
if suche: if suche:
where_clauses.append( # Try FTS5 first, fall back to LIKE
"(v.betreff LIKE ? OR v.aktenzeichen LIKE ?" has_fts = conn.execute(
" OR v.volltext_clean LIKE ?" "SELECT name FROM sqlite_master WHERE type='table' AND name='vorlagen_fts'"
" OR v.id IN (" ).fetchone()
" SELECT kb.vorlage_id FROM ki_bewertungen kb" if has_fts:
" WHERE kb.typ = 'zusammenfassung' AND kb.begruendung LIKE ?" # Escape FTS5 special characters and build query
"))" fts_query = " ".join(
) f'"{word}"' for word in suche.split() if word.strip()
like = f"%{suche}%" )
params.extend([like, like, like, like]) if fts_query:
where_clauses.append(
"v.id IN (SELECT rowid FROM vorlagen_fts WHERE vorlagen_fts MATCH ?)"
)
params.append(fts_query)
else:
where_clauses.append(
"(v.betreff LIKE ? OR v.aktenzeichen LIKE ?"
" OR v.volltext_clean LIKE ?"
" OR v.id IN ("
" SELECT kb.vorlage_id FROM ki_bewertungen kb"
" WHERE kb.typ = 'zusammenfassung' AND kb.begruendung LIKE ?"
"))"
)
like = f"%{suche}%"
params.extend([like, like, like, like])
where_sql = ("WHERE " + " AND ".join(where_clauses)) if where_clauses else "" where_sql = ("WHERE " + " AND ".join(where_clauses)) if where_clauses else ""
@ -97,6 +112,28 @@ def list_vorlagen(
{"kuerzel": a["kuerzel"], "name": a["name"], "farbe": a["farbe"]} {"kuerzel": a["kuerzel"], "name": a["name"], "farbe": a["farbe"]}
) )
# FTS5 snippets for search results
snippet_map: dict = {}
if suche and has_fts and fts_query and vorlage_ids:
placeholders = ",".join("?" * len(vorlage_ids))
try:
snippet_rows = conn.execute(
f"""SELECT rowid,
snippet(vorlagen_fts, 1, '<mark>', '</mark>', '', 12) as snip_betreff,
snippet(vorlagen_fts, 2, '<mark>', '</mark>', '', 20) as snip_volltext,
snippet(vorlagen_fts, 3, '<mark>', '</mark>', '', 16) as snip_zusammenfassung
FROM vorlagen_fts
WHERE vorlagen_fts MATCH ? AND rowid IN ({placeholders})""",
[fts_query] + vorlage_ids,
).fetchall()
for sr in snippet_rows:
# Pick best non-empty snippet
snip = sr["snip_zusammenfassung"] or sr["snip_volltext"] or sr["snip_betreff"] or ""
if snip:
snippet_map[sr["rowid"]] = snip
except Exception:
pass # Snippets are optional
items = [ items = [
{ {
"id": r["id"], "id": r["id"],
@ -106,6 +143,7 @@ def list_vorlagen(
"datum_eingang": r["datum_eingang"], "datum_eingang": r["datum_eingang"],
"ist_verwaltungsvorlage": bool(r["ist_verwaltungsvorlage"]), "ist_verwaltungsvorlage": bool(r["ist_verwaltungsvorlage"]),
"antragsteller": antragsteller_map.get(r["id"], []), "antragsteller": antragsteller_map.get(r["id"], []),
**({"snippet": snippet_map[r["id"]]} if r["id"] in snippet_map else {}),
} }
for r in rows for r in rows
] ]

View File

@ -147,11 +147,16 @@
{/if} {/if}
</a> </a>
</td> </td>
<td class="px-4 py-3 text-sm text-gray-700 max-w-lg truncate"> <td class="px-4 py-3 text-sm text-gray-700 max-w-lg">
{#if filterSuche} <div class="truncate">
{@html highlight(v.betreff, filterSuche)} {#if filterSuche}
{:else} {@html highlight(v.betreff, filterSuche)}
{v.betreff || '-'} {:else}
{v.betreff || '-'}
{/if}
</div>
{#if v.snippet}
<div class="text-xs text-gray-500 mt-1 line-clamp-2">{@html v.snippet}</div>
{/if} {/if}
</td> </td>
<td class="px-4 py-3"> <td class="px-4 py-3">

88
scripts/migrate_fts5.py Normal file
View File

@ -0,0 +1,88 @@
#!/usr/bin/env python3
"""Migrate: Create FTS5 virtual table for full-text search.
Usage:
python scripts/migrate_fts5.py [path/to/tracker.db]
"""
import sqlite3
import sys
from pathlib import Path
def migrate(db_path: str):
conn = sqlite3.connect(db_path)
conn.execute("PRAGMA journal_mode = WAL")
# Check if FTS table already exists
existing = conn.execute(
"SELECT name FROM sqlite_master WHERE type='table' AND name='vorlagen_fts'"
).fetchone()
if existing:
print("⚠️ vorlagen_fts existiert bereits — wird neu aufgebaut")
conn.execute("DROP TABLE IF EXISTS vorlagen_fts")
# Create FTS5 table (content-sync'd with vorlagen + ki_bewertungen)
conn.execute("""
CREATE VIRTUAL TABLE vorlagen_fts USING fts5(
aktenzeichen,
betreff,
volltext,
zusammenfassung,
content='',
contentless_delete=1,
tokenize='unicode61 remove_diacritics 2'
)
""")
# Populate from vorlagen + ki_bewertungen
cursor = conn.execute("""
SELECT v.id, v.aktenzeichen, v.betreff, v.volltext_clean,
kb.begruendung as zusammenfassung
FROM vorlagen v
LEFT JOIN ki_bewertungen kb ON kb.vorlage_id = v.id AND kb.typ = 'zusammenfassung'
""")
batch = []
count = 0
for row in cursor:
batch.append((
row[0], # rowid = vorlage.id
row[1] or '', # aktenzeichen
row[2] or '', # betreff
row[3] or '', # volltext
row[4] or '', # zusammenfassung
))
if len(batch) >= 1000:
conn.executemany(
"INSERT INTO vorlagen_fts(rowid, aktenzeichen, betreff, volltext, zusammenfassung) VALUES (?, ?, ?, ?, ?)",
batch
)
count += len(batch)
print(f" {count} Vorlagen indexiert...")
batch = []
if batch:
conn.executemany(
"INSERT INTO vorlagen_fts(rowid, aktenzeichen, betreff, volltext, zusammenfassung) VALUES (?, ?, ?, ?, ?)",
batch
)
count += len(batch)
conn.commit()
print(f"✅ FTS5-Index erstellt: {count} Vorlagen indexiert")
# Test
test = conn.execute(
"SELECT COUNT(*) FROM vorlagen_fts WHERE vorlagen_fts MATCH 'Klimaschutz'"
).fetchone()[0]
print(f" Test 'Klimaschutz': {test} Treffer")
conn.close()
if __name__ == "__main__":
db = sys.argv[1] if len(sys.argv) > 1 else str(
Path(__file__).resolve().parents[1] / "data" / "tracker.db"
)
print(f"🔄 FTS5-Migration: {db}")
migrate(db)