Hybrid-Zitate: verified/unverified statt drop + UI-Labels

reconstruct_zitate droppt Zitate nicht mehr bei No-Match, sondern markiert sie als verified=false. Das ist ehrlicher: paraphrasierte Zitate sind wertvoller Kontext, sie brauchen nur ein visuelles Unterscheidungsmerkmal. UI: - Verifizierte Zitate: grüner solid Border, "✓ verifiziert" - Paraphrasierte Zitate: gelber dashed Border, "~ paraphrasiert (nicht wörtlich im Programm)" - Warning-Text: "Zu diesem Themenkomplex konnten keine konkreten Formulierungen im Wahlprogramm gefunden werden" - Antragsteller:in / Landesregierung als farbige Badges Zitat-Model: neues Optional[bool] Feld "verified". Tests: 206 passed (test_drops angepasst auf neues Verhalten).
2026-04-10 21:45:36 +02:00 · 2026-04-10 21:45:36 +02:00 · f1a7da8544
commit f1a7da8544
parent 9c162d14ac
4 changed files with 44 additions and 26 deletions
--- a/app/embeddings.py
+++ b/app/embeddings.py
@ -797,20 +797,18 @@ def find_chunk_for_text(text: str, chunks: list[dict]) -> Optional[dict]:


 def reconstruct_zitate(data: dict, semantic_quotes: dict) -> dict:
-    """Replace LLM-emitted quelle/url with canonical chunk values; drop unbacked.
+    """Verify and reconstruct LLM-emitted zitate against retrieved chunks.

-    Walks over ``data['wahlprogrammScores'][i][kind]['zitate']`` (the raw
-    LLM-output dict, not the Pydantic model). For each Zitat:
+    For each Zitat:
+    * **verified** (substring/4-word-anchor match): overwrite quelle/url
+      with canonical chunk values, set ``verified: true``.
+    * **unverified** (no match found): keep the Zitat but set
+      ``verified: false``. The UI shows it with a different style so the
+      user knows it's an LLM-Paraphrase, not a wörtliches Zitat.

-    * Locate the chunk whose text contains the snippet (or a 5-word anchor
-      from it). Search across **all** retrieved chunks regardless of party,
-      so cross-mixes between Q-IDs become invisible to the persisted output.
-    * If found: overwrite ``quelle`` and ``url`` with values derived from
-      the matching chunk's ``programm_id`` + ``seite``. The LLM is no longer
-      trusted for these fields.
-    * If not found: drop the Zitat entirely.
-
-    Returns the same ``data`` dict (mutated in place) for chaining.
+    This replaces the old drop-on-no-match behavior (ADR 0001 Option B)
+    with a more honest approach: paraphrased citations are still valuable
+    context, they just need to be marked as such.
    """
    if not semantic_quotes:
        return data
@ -830,12 +828,16 @@ def reconstruct_zitate(data: dict, semantic_quotes: dict) -> dict:
            for z in zitate:
                text = z.get("text", "")
                matched = find_chunk_for_text(text, all_chunks)
-                if matched is None:
-                    continue
-                z["quelle"] = _chunk_source_label(matched)
-                url = _chunk_pdf_url(matched)
-                if url:
-                    z["url"] = url
+                if matched is not None:
+                    z["quelle"] = _chunk_source_label(matched)
+                    url = _chunk_pdf_url(matched)
+                    if url:
+                        z["url"] = url
+                    z["verified"] = True
+                else:
+                    # Kein Match — Zitat behalten aber als unverified markieren.
+                    # Die LLM-emittierte quelle/url bleibt (best effort).
+                    z["verified"] = False
                cleaned.append(z)
            blk["zitate"] = cleaned
    return data
--- a/app/models.py
+++ b/app/models.py
@ -45,6 +45,7 @@ class Zitat(BaseModel):
    text: str
    quelle: str
    url: Optional[str] = None
+    verified: Optional[bool] = None  # True=wörtlich im Chunk, False=paraphrasiert, None=pre-#97


 class ProgrammScore(BaseModel):
--- a/app/templates/index.html
+++ b/app/templates/index.html
@ -1619,14 +1619,22 @@

            const wahlprogrammHtml = (item.wahlprogrammScores || []).map(wp => {
                // Zitate formatieren mit klickbaren Links + Highlighting
-                const zitateHtml = (wp.wahlprogramm?.zitate || []).map(z => `
-                    <div style="margin: 0.5rem 0; padding: 0.5rem; background: #f8f9fa; border-left: 3px solid #889e33; font-size: 0.85rem;">
+                const zitateHtml = (wp.wahlprogramm?.zitate || []).map(z => {
+                    const isVerified = z.verified !== false;
+                    const borderColor = isVerified ? '#889e33' : '#ffc107';
+                    const bgColor = isVerified ? '#f8f9fa' : '#fffbf0';
+                    const badge = isVerified
+                        ? '<span style="font-size:0.7rem;color:#889e33;">✓ verifiziert</span>'
+                        : '<span style="font-size:0.7rem;color:#b8860b;">~ paraphrasiert (nicht wörtlich im Programm)</span>';
+                    return `
+                    <div style="margin: 0.5rem 0; padding: 0.5rem; background: ${bgColor}; border-left: 3px ${isVerified ? 'solid' : 'dashed'} ${borderColor}; font-size: 0.85rem;">
                        <em>"${z.text}"</em><br>
-                        <a href="${makeCiteUrl(z, item.drucksache, item.bundesland)}" target="_blank" style="color: #009da5; font-size: 0.8rem;">
+                        ${z.quelle ? `<a href="${makeCiteUrl(z, item.drucksache, item.bundesland)}" target="_blank" style="color: #009da5; font-size: 0.8rem;">
                            📄 ${z.quelle}
-                        </a>
-                    </div>
-                `).join('');
+                        </a>` : ''}
+                        ${badge}
+                    </div>`;
+                }).join('');

                // Issue #63: Transparenz-Warnung bei Score > 0 ohne Zitate.
                // Differenziert zwischen "Score 0 = keine Quellen" (LLM hat
--- a/tests/test_embeddings.py
+++ b/tests/test_embeddings.py
@ -316,8 +316,15 @@ class TestReconstructZitate:
        }
        out = reconstruct_zitate(data, semantic_quotes)
        zitate = out["wahlprogrammScores"][0]["wahlprogramm"]["zitate"]
-        assert len(zitate) == 1
-        assert "geschlechtersensiblen" in zitate[0]["text"]
+        # Beide Zitate bleiben erhalten — das nicht-matchende wird als
+        # unverified markiert statt gedroppt (Hybrid-Ansatz).
+        assert len(zitate) == 2
+        # Das halluzinierte Zitat ist unverified
+        halluziniert = [z for z in zitate if "Rechtsextremismus" in z["text"]]
+        assert halluziniert[0]["verified"] is False
+        # Das echte Zitat ist verified
+        echt = [z for z in zitate if "geschlechtersensiblen" in z["text"]]
+        assert echt[0]["verified"] is True

    def test_empty_semantic_quotes_is_noop(self):
        data = {"wahlprogrammScores": [{