Fix #60: NameError in get_relevant_quotes_for_antrag (Phase B refactor leftover)
Root cause: der #55-Refactor (eb045d0) hat in get_relevant_quotes_for_antrag ``partei_upper`` zu ``partei_lookup`` umbenannt — aber die Dict-Write-Zeile ``results[partei_upper] = ...`` wurde übersehen. Bei jedem Aufruf knallt seither ein NameError, der in analyzer.py vom breiten ``except Exception`` verschluckt und still auf die Keyword-Fallback-Suche umgeleitet wird. Konsequenz: 100% der Assessments seiteb045d0(inkl. autonomer Roadmap-Run #59) liefen ohne Embedding-Retrieval — daher die LLM-Halluzinationen aus #60. Fix: - embeddings.py:528: partei_upper → partei_lookup - analyzer.py:249: NameError/AttributeError/TypeError/KeyError nicht mehr schlucken. Programmierfehler im Embedding-Pfad sollen hart fehlschlagen, damit die nächste Refactor-Regression nicht wieder 24h still degradiert läuft. Echte Network-/API-Exceptions fallen weiterhin auf den Keyword-Pfad zurück. - tests/test_embeddings.py: Regression-Test, der get_relevant_quotes_for_antrag mit gemockten chunks aufruft und sicherstellt, dass die Funktion nicht crasht und ein populiertes Result liefert. Hätte den Bug beieb045d0sofort gefangen. Refs: #60, #55, #59
This commit is contained in:
parent
19e5fe4691
commit
ed64399dbb
@ -246,6 +246,11 @@ async def analyze_antrag(text: str, bundesland: str = "NRW", model: str = "qwen-
|
||||
text, fraktionen, bundesland=bundesland, top_k_per_partei=2,
|
||||
)
|
||||
quotes_context = format_quotes_for_prompt(semantic_quotes)
|
||||
except (NameError, AttributeError, TypeError, KeyError):
|
||||
# Programmierfehler (z.B. der partei_upper-Refactor-Rest aus
|
||||
# #55/eb045d0, der zu Issue #60 führte) sollen hart fehlschlagen
|
||||
# statt still auf den schwächeren Keyword-Pfad zurückzufallen.
|
||||
raise
|
||||
except Exception:
|
||||
logger.exception("Semantic search failed, falling back to keyword search")
|
||||
quotes = find_relevant_quotes(text, fraktionen, bundesland=bundesland)
|
||||
|
||||
@ -525,7 +525,7 @@ def get_relevant_quotes_for_antrag(
|
||||
)
|
||||
|
||||
if wahl_chunks or partei_chunks:
|
||||
results[partei_upper] = {
|
||||
results[partei_lookup] = {
|
||||
"wahlprogramm": wahl_chunks,
|
||||
"parteiprogramm": partei_chunks,
|
||||
}
|
||||
|
||||
@ -21,7 +21,12 @@ if "openai" not in sys.modules:
|
||||
openai_stub.OpenAI = lambda **kw: None
|
||||
sys.modules["openai"] = openai_stub
|
||||
|
||||
from app.embeddings import _chunk_source_label, format_quotes_for_prompt
|
||||
from app import embeddings as embeddings_mod
|
||||
from app.embeddings import (
|
||||
_chunk_source_label,
|
||||
format_quotes_for_prompt,
|
||||
get_relevant_quotes_for_antrag,
|
||||
)
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
@ -131,6 +136,51 @@ class TestFormatQuotesForPrompt:
|
||||
assert "**Wahlprogramm:**" in out
|
||||
assert "**Grundsatzprogramm:**" in out
|
||||
|
||||
def test_get_relevant_quotes_for_antrag_populates_results(self, monkeypatch):
|
||||
"""Regression for the partei_upper NameError (Phase B / #55 / eb045d0):
|
||||
|
||||
The dict-write line still referenced ``partei_upper`` after the
|
||||
rest of the function had been renamed to ``partei_lookup``. The
|
||||
result was that ``get_relevant_quotes_for_antrag`` raised
|
||||
``NameError`` on every call, was silently swallowed by the
|
||||
``except Exception`` in ``analyzer.run_analysis``, and silently
|
||||
downgraded *every* assessment to keyword search — which then
|
||||
caused the LLM hallucinations tracked in #60.
|
||||
|
||||
Test strategy: monkeypatch ``find_relevant_chunks`` so we don't
|
||||
need real embeddings, then call the wrapper and assert it
|
||||
actually returns a populated dict instead of crashing.
|
||||
"""
|
||||
def fake_find_relevant_chunks(query, parteien=None, typ=None,
|
||||
bundesland=None, top_k=3,
|
||||
min_similarity=0.5):
|
||||
return [{
|
||||
"programm_id": "gruene-nrw-2022",
|
||||
"partei": parteien[0] if parteien else "GRÜNE",
|
||||
"typ": typ or "wahlprogramm",
|
||||
"seite": 58,
|
||||
"text": "Wahlalter ab 16",
|
||||
"similarity": 0.7,
|
||||
}]
|
||||
|
||||
monkeypatch.setattr(embeddings_mod, "find_relevant_chunks",
|
||||
fake_find_relevant_chunks)
|
||||
|
||||
result = get_relevant_quotes_for_antrag(
|
||||
antrag_text="Wahlalter ab 16",
|
||||
fraktionen=["GRÜNE"],
|
||||
bundesland="NRW",
|
||||
top_k_per_partei=2,
|
||||
)
|
||||
assert result, "Expected a non-empty result dict, got empty"
|
||||
# The keys are canonical party names; either GRÜNE itself or
|
||||
# whatever the canonical mapper returns for it.
|
||||
assert any("GR" in k.upper() for k in result.keys())
|
||||
# And the structure must be the {wahlprogramm, parteiprogramm} dict
|
||||
first = next(iter(result.values()))
|
||||
assert "wahlprogramm" in first
|
||||
assert "parteiprogramm" in first
|
||||
|
||||
def test_text_truncated_at_500_chars(self):
|
||||
long_chunk = {
|
||||
"FDP": {
|
||||
|
||||
Loading…
Reference in New Issue
Block a user