feat: FTS5 Volltextsuche mit Snippet-Highlighting (#11)
- SQLite FTS5 Virtual Table (vorlagen_fts) über Betreff, Volltext, Zusammenfassung - Tokenizer unicode61 mit Diakritik-Entfernung - API: FTS5 MATCH statt LIKE, LIKE als Fallback wenn kein Index - Snippet-Highlights (<mark>) in Suchergebnissen - Migrations-Script: scripts/migrate_fts5.py - 22.337 Einträge indexiert, ~4.200 Treffer für 'Klimaschutz' Closes #11
This commit is contained in:
parent
1801f8f7fd
commit
2ab8046b78
@ -54,6 +54,21 @@ def list_vorlagen(
|
|||||||
params.append(partei)
|
params.append(partei)
|
||||||
|
|
||||||
if suche:
|
if suche:
|
||||||
|
# Try FTS5 first, fall back to LIKE
|
||||||
|
has_fts = conn.execute(
|
||||||
|
"SELECT name FROM sqlite_master WHERE type='table' AND name='vorlagen_fts'"
|
||||||
|
).fetchone()
|
||||||
|
if has_fts:
|
||||||
|
# Escape FTS5 special characters and build query
|
||||||
|
fts_query = " ".join(
|
||||||
|
f'"{word}"' for word in suche.split() if word.strip()
|
||||||
|
)
|
||||||
|
if fts_query:
|
||||||
|
where_clauses.append(
|
||||||
|
"v.id IN (SELECT rowid FROM vorlagen_fts WHERE vorlagen_fts MATCH ?)"
|
||||||
|
)
|
||||||
|
params.append(fts_query)
|
||||||
|
else:
|
||||||
where_clauses.append(
|
where_clauses.append(
|
||||||
"(v.betreff LIKE ? OR v.aktenzeichen LIKE ?"
|
"(v.betreff LIKE ? OR v.aktenzeichen LIKE ?"
|
||||||
" OR v.volltext_clean LIKE ?"
|
" OR v.volltext_clean LIKE ?"
|
||||||
@ -97,6 +112,28 @@ def list_vorlagen(
|
|||||||
{"kuerzel": a["kuerzel"], "name": a["name"], "farbe": a["farbe"]}
|
{"kuerzel": a["kuerzel"], "name": a["name"], "farbe": a["farbe"]}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# FTS5 snippets for search results
|
||||||
|
snippet_map: dict = {}
|
||||||
|
if suche and has_fts and fts_query and vorlage_ids:
|
||||||
|
placeholders = ",".join("?" * len(vorlage_ids))
|
||||||
|
try:
|
||||||
|
snippet_rows = conn.execute(
|
||||||
|
f"""SELECT rowid,
|
||||||
|
snippet(vorlagen_fts, 1, '<mark>', '</mark>', '…', 12) as snip_betreff,
|
||||||
|
snippet(vorlagen_fts, 2, '<mark>', '</mark>', '…', 20) as snip_volltext,
|
||||||
|
snippet(vorlagen_fts, 3, '<mark>', '</mark>', '…', 16) as snip_zusammenfassung
|
||||||
|
FROM vorlagen_fts
|
||||||
|
WHERE vorlagen_fts MATCH ? AND rowid IN ({placeholders})""",
|
||||||
|
[fts_query] + vorlage_ids,
|
||||||
|
).fetchall()
|
||||||
|
for sr in snippet_rows:
|
||||||
|
# Pick best non-empty snippet
|
||||||
|
snip = sr["snip_zusammenfassung"] or sr["snip_volltext"] or sr["snip_betreff"] or ""
|
||||||
|
if snip:
|
||||||
|
snippet_map[sr["rowid"]] = snip
|
||||||
|
except Exception:
|
||||||
|
pass # Snippets are optional
|
||||||
|
|
||||||
items = [
|
items = [
|
||||||
{
|
{
|
||||||
"id": r["id"],
|
"id": r["id"],
|
||||||
@ -106,6 +143,7 @@ def list_vorlagen(
|
|||||||
"datum_eingang": r["datum_eingang"],
|
"datum_eingang": r["datum_eingang"],
|
||||||
"ist_verwaltungsvorlage": bool(r["ist_verwaltungsvorlage"]),
|
"ist_verwaltungsvorlage": bool(r["ist_verwaltungsvorlage"]),
|
||||||
"antragsteller": antragsteller_map.get(r["id"], []),
|
"antragsteller": antragsteller_map.get(r["id"], []),
|
||||||
|
**({"snippet": snippet_map[r["id"]]} if r["id"] in snippet_map else {}),
|
||||||
}
|
}
|
||||||
for r in rows
|
for r in rows
|
||||||
]
|
]
|
||||||
|
|||||||
@ -147,12 +147,17 @@
|
|||||||
{/if}
|
{/if}
|
||||||
</a>
|
</a>
|
||||||
</td>
|
</td>
|
||||||
<td class="px-4 py-3 text-sm text-gray-700 max-w-lg truncate">
|
<td class="px-4 py-3 text-sm text-gray-700 max-w-lg">
|
||||||
|
<div class="truncate">
|
||||||
{#if filterSuche}
|
{#if filterSuche}
|
||||||
{@html highlight(v.betreff, filterSuche)}
|
{@html highlight(v.betreff, filterSuche)}
|
||||||
{:else}
|
{:else}
|
||||||
{v.betreff || '-'}
|
{v.betreff || '-'}
|
||||||
{/if}
|
{/if}
|
||||||
|
</div>
|
||||||
|
{#if v.snippet}
|
||||||
|
<div class="text-xs text-gray-500 mt-1 line-clamp-2">{@html v.snippet}</div>
|
||||||
|
{/if}
|
||||||
</td>
|
</td>
|
||||||
<td class="px-4 py-3">
|
<td class="px-4 py-3">
|
||||||
{#if v.antragsteller?.length}
|
{#if v.antragsteller?.length}
|
||||||
|
|||||||
88
scripts/migrate_fts5.py
Normal file
88
scripts/migrate_fts5.py
Normal file
@ -0,0 +1,88 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Migrate: Create FTS5 virtual table for full-text search.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
python scripts/migrate_fts5.py [path/to/tracker.db]
|
||||||
|
"""
|
||||||
|
import sqlite3
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
|
def migrate(db_path: str):
|
||||||
|
conn = sqlite3.connect(db_path)
|
||||||
|
conn.execute("PRAGMA journal_mode = WAL")
|
||||||
|
|
||||||
|
# Check if FTS table already exists
|
||||||
|
existing = conn.execute(
|
||||||
|
"SELECT name FROM sqlite_master WHERE type='table' AND name='vorlagen_fts'"
|
||||||
|
).fetchone()
|
||||||
|
if existing:
|
||||||
|
print("⚠️ vorlagen_fts existiert bereits — wird neu aufgebaut")
|
||||||
|
conn.execute("DROP TABLE IF EXISTS vorlagen_fts")
|
||||||
|
|
||||||
|
# Create FTS5 table (content-sync'd with vorlagen + ki_bewertungen)
|
||||||
|
conn.execute("""
|
||||||
|
CREATE VIRTUAL TABLE vorlagen_fts USING fts5(
|
||||||
|
aktenzeichen,
|
||||||
|
betreff,
|
||||||
|
volltext,
|
||||||
|
zusammenfassung,
|
||||||
|
content='',
|
||||||
|
contentless_delete=1,
|
||||||
|
tokenize='unicode61 remove_diacritics 2'
|
||||||
|
)
|
||||||
|
""")
|
||||||
|
|
||||||
|
# Populate from vorlagen + ki_bewertungen
|
||||||
|
cursor = conn.execute("""
|
||||||
|
SELECT v.id, v.aktenzeichen, v.betreff, v.volltext_clean,
|
||||||
|
kb.begruendung as zusammenfassung
|
||||||
|
FROM vorlagen v
|
||||||
|
LEFT JOIN ki_bewertungen kb ON kb.vorlage_id = v.id AND kb.typ = 'zusammenfassung'
|
||||||
|
""")
|
||||||
|
|
||||||
|
batch = []
|
||||||
|
count = 0
|
||||||
|
for row in cursor:
|
||||||
|
batch.append((
|
||||||
|
row[0], # rowid = vorlage.id
|
||||||
|
row[1] or '', # aktenzeichen
|
||||||
|
row[2] or '', # betreff
|
||||||
|
row[3] or '', # volltext
|
||||||
|
row[4] or '', # zusammenfassung
|
||||||
|
))
|
||||||
|
if len(batch) >= 1000:
|
||||||
|
conn.executemany(
|
||||||
|
"INSERT INTO vorlagen_fts(rowid, aktenzeichen, betreff, volltext, zusammenfassung) VALUES (?, ?, ?, ?, ?)",
|
||||||
|
batch
|
||||||
|
)
|
||||||
|
count += len(batch)
|
||||||
|
print(f" {count} Vorlagen indexiert...")
|
||||||
|
batch = []
|
||||||
|
|
||||||
|
if batch:
|
||||||
|
conn.executemany(
|
||||||
|
"INSERT INTO vorlagen_fts(rowid, aktenzeichen, betreff, volltext, zusammenfassung) VALUES (?, ?, ?, ?, ?)",
|
||||||
|
batch
|
||||||
|
)
|
||||||
|
count += len(batch)
|
||||||
|
|
||||||
|
conn.commit()
|
||||||
|
print(f"✅ FTS5-Index erstellt: {count} Vorlagen indexiert")
|
||||||
|
|
||||||
|
# Test
|
||||||
|
test = conn.execute(
|
||||||
|
"SELECT COUNT(*) FROM vorlagen_fts WHERE vorlagen_fts MATCH 'Klimaschutz'"
|
||||||
|
).fetchone()[0]
|
||||||
|
print(f" Test 'Klimaschutz': {test} Treffer")
|
||||||
|
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
db = sys.argv[1] if len(sys.argv) > 1 else str(
|
||||||
|
Path(__file__).resolve().parents[1] / "data" / "tracker.db"
|
||||||
|
)
|
||||||
|
print(f"🔄 FTS5-Migration: {db}")
|
||||||
|
migrate(db)
|
||||||
Loading…
Reference in New Issue
Block a user