From 3b6ecacc1e8b0d336937997d93782bb7eb74ccdb Mon Sep 17 00:00:00 2001 From: Dotty Dotter Date: Fri, 10 Apr 2026 20:06:35 +0200 Subject: [PATCH] =?UTF-8?q?Tuning:=20min=5Fsimilarity=200.45=E2=86=920.35?= =?UTF-8?q?=20+=20Anker=205=E2=86=924=20W=C3=B6rter=20=E2=80=94=20mehr=20C?= =?UTF-8?q?hunks=20+=20weniger=20Drops?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/embeddings.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/app/embeddings.py b/app/embeddings.py index 117ea2b..1611635 100644 --- a/app/embeddings.py +++ b/app/embeddings.py @@ -543,7 +543,7 @@ def get_relevant_quotes_for_antrag( typ="wahlprogramm", bundesland=bundesland, top_k=top_k_per_partei, - min_similarity=0.45, + min_similarity=0.35, ) # Parteiprogramm (Grundsatz, federal — bundesland=NULL matched implizit) @@ -553,7 +553,7 @@ def get_relevant_quotes_for_antrag( typ="parteiprogramm", bundesland=bundesland, top_k=top_k_per_partei, - min_similarity=0.45, + min_similarity=0.35, ) if wahl_chunks or partei_chunks: @@ -767,10 +767,10 @@ def find_chunk_for_text(text: str, chunks: list[dict]) -> Optional[dict]: if needle in norm: return c words = needle.split() - if len(words) < 5: + if len(words) < 4: return None - for i in range(len(words) - 4): - anchor = " ".join(words[i:i + 5]) + for i in range(len(words) - 3): + anchor = " ".join(words[i:i + 4]) for c, norm in chunks_norm: if anchor in norm: return c