feat: FTS5 Volltextsuche mit Snippet-Highlighting (#11)
- SQLite FTS5 Virtual Table (vorlagen_fts) über Betreff, Volltext, Zusammenfassung - Tokenizer unicode61 mit Diakritik-Entfernung - API: FTS5 MATCH statt LIKE, LIKE als Fallback wenn kein Index - Snippet-Highlights (<mark>) in Suchergebnissen - Migrations-Script: scripts/migrate_fts5.py - 22.337 Einträge indexiert, ~4.200 Treffer für 'Klimaschutz' Closes #11
This commit is contained in:
parent
1801f8f7fd
commit
2ab8046b78
@ -54,6 +54,21 @@ def list_vorlagen(
|
||||
params.append(partei)
|
||||
|
||||
if suche:
|
||||
# Try FTS5 first, fall back to LIKE
|
||||
has_fts = conn.execute(
|
||||
"SELECT name FROM sqlite_master WHERE type='table' AND name='vorlagen_fts'"
|
||||
).fetchone()
|
||||
if has_fts:
|
||||
# Escape FTS5 special characters and build query
|
||||
fts_query = " ".join(
|
||||
f'"{word}"' for word in suche.split() if word.strip()
|
||||
)
|
||||
if fts_query:
|
||||
where_clauses.append(
|
||||
"v.id IN (SELECT rowid FROM vorlagen_fts WHERE vorlagen_fts MATCH ?)"
|
||||
)
|
||||
params.append(fts_query)
|
||||
else:
|
||||
where_clauses.append(
|
||||
"(v.betreff LIKE ? OR v.aktenzeichen LIKE ?"
|
||||
" OR v.volltext_clean LIKE ?"
|
||||
@ -97,6 +112,28 @@ def list_vorlagen(
|
||||
{"kuerzel": a["kuerzel"], "name": a["name"], "farbe": a["farbe"]}
|
||||
)
|
||||
|
||||
# FTS5 snippets for search results
|
||||
snippet_map: dict = {}
|
||||
if suche and has_fts and fts_query and vorlage_ids:
|
||||
placeholders = ",".join("?" * len(vorlage_ids))
|
||||
try:
|
||||
snippet_rows = conn.execute(
|
||||
f"""SELECT rowid,
|
||||
snippet(vorlagen_fts, 1, '<mark>', '</mark>', '…', 12) as snip_betreff,
|
||||
snippet(vorlagen_fts, 2, '<mark>', '</mark>', '…', 20) as snip_volltext,
|
||||
snippet(vorlagen_fts, 3, '<mark>', '</mark>', '…', 16) as snip_zusammenfassung
|
||||
FROM vorlagen_fts
|
||||
WHERE vorlagen_fts MATCH ? AND rowid IN ({placeholders})""",
|
||||
[fts_query] + vorlage_ids,
|
||||
).fetchall()
|
||||
for sr in snippet_rows:
|
||||
# Pick best non-empty snippet
|
||||
snip = sr["snip_zusammenfassung"] or sr["snip_volltext"] or sr["snip_betreff"] or ""
|
||||
if snip:
|
||||
snippet_map[sr["rowid"]] = snip
|
||||
except Exception:
|
||||
pass # Snippets are optional
|
||||
|
||||
items = [
|
||||
{
|
||||
"id": r["id"],
|
||||
@ -106,6 +143,7 @@ def list_vorlagen(
|
||||
"datum_eingang": r["datum_eingang"],
|
||||
"ist_verwaltungsvorlage": bool(r["ist_verwaltungsvorlage"]),
|
||||
"antragsteller": antragsteller_map.get(r["id"], []),
|
||||
**({"snippet": snippet_map[r["id"]]} if r["id"] in snippet_map else {}),
|
||||
}
|
||||
for r in rows
|
||||
]
|
||||
|
||||
@ -147,12 +147,17 @@
|
||||
{/if}
|
||||
</a>
|
||||
</td>
|
||||
<td class="px-4 py-3 text-sm text-gray-700 max-w-lg truncate">
|
||||
<td class="px-4 py-3 text-sm text-gray-700 max-w-lg">
|
||||
<div class="truncate">
|
||||
{#if filterSuche}
|
||||
{@html highlight(v.betreff, filterSuche)}
|
||||
{:else}
|
||||
{v.betreff || '-'}
|
||||
{/if}
|
||||
</div>
|
||||
{#if v.snippet}
|
||||
<div class="text-xs text-gray-500 mt-1 line-clamp-2">{@html v.snippet}</div>
|
||||
{/if}
|
||||
</td>
|
||||
<td class="px-4 py-3">
|
||||
{#if v.antragsteller?.length}
|
||||
|
||||
88
scripts/migrate_fts5.py
Normal file
88
scripts/migrate_fts5.py
Normal file
@ -0,0 +1,88 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Migrate: Create FTS5 virtual table for full-text search.
|
||||
|
||||
Usage:
|
||||
python scripts/migrate_fts5.py [path/to/tracker.db]
|
||||
"""
|
||||
import sqlite3
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def migrate(db_path: str):
|
||||
conn = sqlite3.connect(db_path)
|
||||
conn.execute("PRAGMA journal_mode = WAL")
|
||||
|
||||
# Check if FTS table already exists
|
||||
existing = conn.execute(
|
||||
"SELECT name FROM sqlite_master WHERE type='table' AND name='vorlagen_fts'"
|
||||
).fetchone()
|
||||
if existing:
|
||||
print("⚠️ vorlagen_fts existiert bereits — wird neu aufgebaut")
|
||||
conn.execute("DROP TABLE IF EXISTS vorlagen_fts")
|
||||
|
||||
# Create FTS5 table (content-sync'd with vorlagen + ki_bewertungen)
|
||||
conn.execute("""
|
||||
CREATE VIRTUAL TABLE vorlagen_fts USING fts5(
|
||||
aktenzeichen,
|
||||
betreff,
|
||||
volltext,
|
||||
zusammenfassung,
|
||||
content='',
|
||||
contentless_delete=1,
|
||||
tokenize='unicode61 remove_diacritics 2'
|
||||
)
|
||||
""")
|
||||
|
||||
# Populate from vorlagen + ki_bewertungen
|
||||
cursor = conn.execute("""
|
||||
SELECT v.id, v.aktenzeichen, v.betreff, v.volltext_clean,
|
||||
kb.begruendung as zusammenfassung
|
||||
FROM vorlagen v
|
||||
LEFT JOIN ki_bewertungen kb ON kb.vorlage_id = v.id AND kb.typ = 'zusammenfassung'
|
||||
""")
|
||||
|
||||
batch = []
|
||||
count = 0
|
||||
for row in cursor:
|
||||
batch.append((
|
||||
row[0], # rowid = vorlage.id
|
||||
row[1] or '', # aktenzeichen
|
||||
row[2] or '', # betreff
|
||||
row[3] or '', # volltext
|
||||
row[4] or '', # zusammenfassung
|
||||
))
|
||||
if len(batch) >= 1000:
|
||||
conn.executemany(
|
||||
"INSERT INTO vorlagen_fts(rowid, aktenzeichen, betreff, volltext, zusammenfassung) VALUES (?, ?, ?, ?, ?)",
|
||||
batch
|
||||
)
|
||||
count += len(batch)
|
||||
print(f" {count} Vorlagen indexiert...")
|
||||
batch = []
|
||||
|
||||
if batch:
|
||||
conn.executemany(
|
||||
"INSERT INTO vorlagen_fts(rowid, aktenzeichen, betreff, volltext, zusammenfassung) VALUES (?, ?, ?, ?, ?)",
|
||||
batch
|
||||
)
|
||||
count += len(batch)
|
||||
|
||||
conn.commit()
|
||||
print(f"✅ FTS5-Index erstellt: {count} Vorlagen indexiert")
|
||||
|
||||
# Test
|
||||
test = conn.execute(
|
||||
"SELECT COUNT(*) FROM vorlagen_fts WHERE vorlagen_fts MATCH 'Klimaschutz'"
|
||||
).fetchone()[0]
|
||||
print(f" Test 'Klimaschutz': {test} Treffer")
|
||||
|
||||
conn.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
db = sys.argv[1] if len(sys.argv) > 1 else str(
|
||||
Path(__file__).resolve().parents[1] / "data" / "tracker.db"
|
||||
)
|
||||
print(f"🔄 FTS5-Migration: {db}")
|
||||
migrate(db)
|
||||
Loading…
Reference in New Issue
Block a user