Tuning: min_similarity 0.45→0.35 + Anker 5→4 Wörter — mehr Chunks + weniger Drops
This commit is contained in:
parent
14140571d8
commit
3b6ecacc1e
@ -543,7 +543,7 @@ def get_relevant_quotes_for_antrag(
|
|||||||
typ="wahlprogramm",
|
typ="wahlprogramm",
|
||||||
bundesland=bundesland,
|
bundesland=bundesland,
|
||||||
top_k=top_k_per_partei,
|
top_k=top_k_per_partei,
|
||||||
min_similarity=0.45,
|
min_similarity=0.35,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Parteiprogramm (Grundsatz, federal — bundesland=NULL matched implizit)
|
# Parteiprogramm (Grundsatz, federal — bundesland=NULL matched implizit)
|
||||||
@ -553,7 +553,7 @@ def get_relevant_quotes_for_antrag(
|
|||||||
typ="parteiprogramm",
|
typ="parteiprogramm",
|
||||||
bundesland=bundesland,
|
bundesland=bundesland,
|
||||||
top_k=top_k_per_partei,
|
top_k=top_k_per_partei,
|
||||||
min_similarity=0.45,
|
min_similarity=0.35,
|
||||||
)
|
)
|
||||||
|
|
||||||
if wahl_chunks or partei_chunks:
|
if wahl_chunks or partei_chunks:
|
||||||
@ -767,10 +767,10 @@ def find_chunk_for_text(text: str, chunks: list[dict]) -> Optional[dict]:
|
|||||||
if needle in norm:
|
if needle in norm:
|
||||||
return c
|
return c
|
||||||
words = needle.split()
|
words = needle.split()
|
||||||
if len(words) < 5:
|
if len(words) < 4:
|
||||||
return None
|
return None
|
||||||
for i in range(len(words) - 4):
|
for i in range(len(words) - 3):
|
||||||
anchor = " ".join(words[i:i + 5])
|
anchor = " ".join(words[i:i + 4])
|
||||||
for c, norm in chunks_norm:
|
for c, norm in chunks_norm:
|
||||||
if anchor in norm:
|
if anchor in norm:
|
||||||
return c
|
return c
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user