Tuning: min_similarity 0.45→0.35 + Anker 5→4 Wörter — mehr Chunks + weniger Drops

This commit is contained in:
Dotty Dotter 2026-04-10 20:06:35 +02:00
parent 14140571d8
commit 3b6ecacc1e

View File

@ -543,7 +543,7 @@ def get_relevant_quotes_for_antrag(
typ="wahlprogramm", typ="wahlprogramm",
bundesland=bundesland, bundesland=bundesland,
top_k=top_k_per_partei, top_k=top_k_per_partei,
min_similarity=0.45, min_similarity=0.35,
) )
# Parteiprogramm (Grundsatz, federal — bundesland=NULL matched implizit) # Parteiprogramm (Grundsatz, federal — bundesland=NULL matched implizit)
@ -553,7 +553,7 @@ def get_relevant_quotes_for_antrag(
typ="parteiprogramm", typ="parteiprogramm",
bundesland=bundesland, bundesland=bundesland,
top_k=top_k_per_partei, top_k=top_k_per_partei,
min_similarity=0.45, min_similarity=0.35,
) )
if wahl_chunks or partei_chunks: if wahl_chunks or partei_chunks:
@ -767,10 +767,10 @@ def find_chunk_for_text(text: str, chunks: list[dict]) -> Optional[dict]:
if needle in norm: if needle in norm:
return c return c
words = needle.split() words = needle.split()
if len(words) < 5: if len(words) < 4:
return None return None
for i in range(len(words) - 4): for i in range(len(words) - 3):
anchor = " ".join(words[i:i + 5]) anchor = " ".join(words[i:i + 4])
for c, norm in chunks_norm: for c, norm in chunks_norm:
if anchor in norm: if anchor in norm:
return c return c