gwoe-antragspruefer/app/presse_generator.py

"""Pressemitteilungs-Generator fuer #170 Phase 4.

Erzeugt einen LLM-generierten Pressemitteilungs-Vorschlag, der einen
GWÖ-bewerteten Antrag in den Kontext eines aktuellen News-Artikels stellt.

Manueller Trigger via UI-Button — kein Auto-Versand. Drafts werden in
``presse_drafts`` persistiert und in der UI als Liste sichtbar.

Tonalitaet:
- GWÖ-Sicht (Gemeinwohl-orientiert, nicht parteipolitisch)
- Faktenbasiert, keine Lobbying-Sprache
- 200-250 Worte, presseaehnlicher Aufbau (Lead-Paragraph + Begruendung)
"""
from __future__ import annotations

import json
import logging
import sqlite3
from pathlib import Path
from typing import Optional

logger = logging.getLogger(__name__)


SYSTEM_PROMPT = """Du bist Pressereferent:in einer Gemeinwohl-Ökonomie-
Initiative. Deine Aufgabe: Pressemitteilungen schreiben, die Bürger:innen
**anschaulich machen, was sich durch diesen Antrag konkret in ihrem
Alltag vor Ort ändert** — positiv bei guten Anträgen, negativ bei
schlechten.

## Pflicht-Elemente

1. **Konkrete Alltagswirkung** — keine Abstraktion. Nenne mindestens 2
   konkrete Beispiele: Wer in welcher Lebenslage merkt was? (z.B.
   "Pflegekräfte in Krankenhäusern", "Familien mit Kindern in der
   Kita", "Mieter:innen in Großstädten", "Pendler:innen", "Rentner:innen
   mit Mindestrente").

2. **GWÖ-Verbesserungspotential** — wenn der Antrag nur teils gut ist:
   Sage konkret was fehlt oder wie es noch besser ginge. Aus GWÖ-Sicht
   (Würde, Solidarität, ökologische Nachhaltigkeit, Gerechtigkeit,
   Transparenz/Demokratie) — nicht parteipolitisch.

3. **Drucksache + Quelle nennen** — der Antrag muss klar identifiziert
   sein (z.B. "Drucksache 21/4757 des Bundestages"). Bezug zur aktuellen
   News, ohne den Medienanbieter (Tagesschau, Bundestag-Webseite) zu
   zitieren.

## Stil

- 220–280 Worte
- Aktive Verben, kurze Sätze (max 25 Worte)
- Keine Floskeln ("zukunftsweisend", "innovative Lösung"). Stattdessen
  konkret: "Familien mit zwei Kindern und 2.800 € Netto-Einkommen
  bekommen ..."
- Bei NEGATIV-Anträgen: klar benennen, was der Antrag verschlechtert
  ("Erhöht die Belastung der Mieter:innen um geschätzt X €/Monat" —
  konkret, nicht "ist sozial unausgewogen")
- Schluss: konkrete Forderung ("Wir fordern den Bundestag auf, …")
  ODER konstruktiver Verbesserungsvorschlag

## Struktur

- **Titel**: thesenstark, max 100 Zeichen, inkl. der Alltagswirkung
  (nicht nur Antragsname)
- **Lead-Paragraph** (1-2 Sätze): Wer? Was? Welche Auswirkung im
  Alltag?
- **Begründung** (3-4 Sätze): konkrete Beispiele aus dem Leben +
  GWÖ-Bewertung
- **Verbesserungspotential** (1-2 Sätze, falls Antrag nicht voll überzeugt)
- **Forderung/Schluss** (1 Satz)

## Output-Format

Antworte NUR mit gültigem JSON:
{
  "titel": "<thesenstark, max 100 Zeichen, inkl. konkreter Wirkung>",
  "body": "<220–280 Worte mit den Pflicht-Elementen>"
}"""


def _build_user_prompt(
    drucksache: str,
    bundesland: str,
    antrag_titel: str,
    antrag_zusammenfassung: str,
    gwoe_score: float,
    gwoe_begruendung: str,
    empfehlung: str,
    news_titel: str,
    news_summary: str,
    news_url: str,
) -> str:
    """Konstruiert den User-Prompt aus Antrags- und News-Daten."""
    return f"""## Aktueller Antrag

Drucksache: {drucksache} ({bundesland})
Titel: {antrag_titel}

Zusammenfassung: {antrag_zusammenfassung or "(keine vorhanden)"}

GWÖ-Score: {gwoe_score}/10
GWÖ-Begründung: {gwoe_begruendung or "(keine vorhanden)"}
Empfehlung: {empfehlung or "(keine)"}

## Aktueller Nachrichten-Kontext

Schlagzeile: {news_titel}

Inhalt: {news_summary or "(keine Zusammenfassung verfügbar)"}

Quelle: {news_url}

## Deine Aufgabe

Schreibe eine Pressemitteilung, die diesen Antrag in den Kontext der
aktuellen Nachrichtenlage stellt. Begründe aus GWÖ-Sicht, warum der
Antrag gerade jetzt relevant ist (oder warum er die aktuelle Debatte
ergänzt/korrigiert). Wenn der GWÖ-Score niedrig ist (< 5), sei dabei
kritisch — die PM kann auch eine Ablehnung des Antrags begründen.
"""


def _find_existing_draft(
    drucksache: str, news_url: str, db_path: Path,
) -> Optional[dict]:
    """Sucht einen bereits generierten Draft fuer (drucksache, news_url).

    Bei mehreren Treffern wird der NEUESTE zurueckgegeben. Idempotenz-
    Schutz vor doppelter LLM-Generierung (#170 Followup).
    """
    if not Path(db_path).exists():
        return None
    conn = sqlite3.connect(str(db_path))
    try:
        row = conn.execute(
            """SELECT id, drucksache, bundesland, news_url, news_titel,
                      titel, body, model, created_at
            FROM presse_drafts
            WHERE drucksache=? AND news_url=?
            ORDER BY id DESC LIMIT 1""",
            (drucksache, news_url),
        ).fetchone()
    finally:
        conn.close()
    if not row:
        return None
    return {
        "id": row[0], "drucksache": row[1], "bundesland": row[2],
        "news_url": row[3], "news_titel": row[4],
        "titel": row[5], "body": row[6], "model": row[7],
        "created_at": row[8],
    }


async def generate_draft(
    drucksache: str,
    news_url: str,
    db_path: Optional[Path] = None,
    bewerter=None,
    force: bool = False,
) -> dict:
    """Erzeugt einen Pressemitteilungs-Draft und persistiert ihn.

    Args:
        drucksache: ID des Antrags (mit Bundesland-Kontext aus DB).
        news_url: URL des News-Artikels (Lookup in news_articles).
        db_path: optional override fuer Tests.
        bewerter: optional injected QwenBewerter (fuer Tests). Wenn None,
                  wird der Default mit settings instanziiert.
        force: Wenn True, wird auch bei vorhandenem Draft fuer das gleiche
               (drucksache, news_url)-Paar ein neuer LLM-Call gemacht.
               Default False — Idempotenz-Schutz vor LLM-Kosten.

    Returns:
        ``{"id": int, "drucksache": ..., "bundesland": ...,
           "news_url": ..., "news_titel": ...,
           "titel": str, "body": str, "model": str, "created_at": ISO,
           "_was_existing": bool}``

        ``_was_existing=True`` zeigt an, dass kein neuer LLM-Call gemacht
        wurde, sondern ein vorhandener Draft zurueckgegeben wurde.

    Raises:
        ValueError: wenn drucksache oder news_url nicht gefunden.
    """
    from .config import settings
    from .adapters.qwen_bewerter import LlmRequest

    path = db_path or settings.db_path

    # Idempotenz-Check: hat es schon einen Draft fuer das Paar?
    if not force:
        existing = _find_existing_draft(drucksache, news_url, path)
        if existing:
            existing["_was_existing"] = True
            return existing

    conn = sqlite3.connect(str(path))
    try:
        antrag = conn.execute(
            """SELECT bundesland, title, antrag_zusammenfassung, gwoe_score,
                      gwoe_begruendung, empfehlung
            FROM assessments WHERE drucksache=?""",
            (drucksache,),
        ).fetchone()
        news = conn.execute(
            "SELECT titel, summary FROM news_articles WHERE url=?",
            (news_url,),
        ).fetchone()
    finally:
        conn.close()

    if not antrag:
        raise ValueError(f"Drucksache {drucksache} nicht in assessments")
    if not news:
        raise ValueError(f"News-URL {news_url} nicht in news_articles")

    user_prompt = _build_user_prompt(
        drucksache=drucksache,
        bundesland=antrag[0],
        antrag_titel=antrag[1] or "",
        antrag_zusammenfassung=antrag[2] or "",
        gwoe_score=antrag[3] or 0.0,
        gwoe_begruendung=antrag[4] or "",
        empfehlung=antrag[5] or "",
        news_titel=news[0],
        news_summary=news[1] or "",
        news_url=news_url,
    )

    if bewerter is None:
        from .adapters.qwen_bewerter import QwenBewerter
        bewerter = QwenBewerter()

    # Premium-Modell (qwen-max) statt -plus, weil PM-Erzeugung hoehere
    # Sprachqualitaet braucht als Antrags-Bewertung. Tradeoff: ~3× teurer
    # (~6 Cent statt 2 Cent), ~2× langsamer (~30 s statt 15 s).
    model = settings.llm_model_premium

    req = LlmRequest(
        system_prompt=SYSTEM_PROMPT,
        user_prompt=user_prompt,
        model=model,
        base_temperature=0.3,
        max_tokens=1500,
        max_retries=2,
    )
    result = await bewerter.bewerte(req)

    titel = (result.get("titel") or "").strip()[:200]
    body = (result.get("body") or "").strip()
    if not titel or not body:
        raise ValueError("LLM-Response unvollständig (titel oder body leer)")

    # Persist
    conn = sqlite3.connect(str(path))
    try:
        cur = conn.execute(
            """INSERT INTO presse_drafts
            (drucksache, bundesland, news_url, news_titel, titel, body, model)
            VALUES (?, ?, ?, ?, ?, ?, ?)""",
            (drucksache, antrag[0], news_url, news[0], titel, body, model),
        )
        draft_id = cur.lastrowid
        row = conn.execute(
            """SELECT id, drucksache, bundesland, news_url, news_titel,
                      titel, body, model, created_at
            FROM presse_drafts WHERE id=?""",
            (draft_id,),
        ).fetchone()
        conn.commit()
    finally:
        conn.close()

    return {
        "_was_existing": False,
        "id": row[0], "drucksache": row[1], "bundesland": row[2],
        "news_url": row[3], "news_titel": row[4],
        "titel": row[5], "body": row[6], "model": row[7],
        "created_at": row[8],
    }


def list_drafts(
    limit: int = 20,
    db_path: Optional[Path] = None,
) -> list[dict]:
    """Liste der zuletzt generierten Drafts. Default-Limit 20."""
    from .config import settings

    path = db_path or settings.db_path
    if not Path(path).exists():
        return []
    conn = sqlite3.connect(str(path))
    try:
        rows = conn.execute(
            """SELECT id, drucksache, bundesland, news_url, news_titel,
                      titel, body, model, created_at
            FROM presse_drafts
            ORDER BY id DESC LIMIT ?""",
            (limit,),
        ).fetchall()
    finally:
        conn.close()
    return [
        {
            "id": r[0], "drucksache": r[1], "bundesland": r[2],
            "news_url": r[3], "news_titel": r[4],
            "titel": r[5], "body": r[6], "model": r[7],
            "created_at": r[8],
        }
        for r in rows
    ]


def list_drafts_for(
    drucksache: str,
    news_url: str,
    db_path: Optional[Path] = None,
) -> list[dict]:
    """Alle Versions-Drafts fuer ein (drucksache, news_url)-Paar, neueste oben."""
    from .config import settings

    path = db_path or settings.db_path
    if not Path(path).exists():
        return []
    conn = sqlite3.connect(str(path))
    try:
        rows = conn.execute(
            """SELECT id, drucksache, bundesland, news_url, news_titel,
                      titel, body, model, created_at
            FROM presse_drafts
            WHERE drucksache=? AND news_url=?
            ORDER BY id DESC""",
            (drucksache, news_url),
        ).fetchall()
    finally:
        conn.close()
    return [
        {
            "id": r[0], "drucksache": r[1], "bundesland": r[2],
            "news_url": r[3], "news_titel": r[4],
            "titel": r[5], "body": r[6], "model": r[7],
            "created_at": r[8],
        }
        for r in rows
    ]


def get_draft(
    draft_id: int,
    db_path: Optional[Path] = None,
) -> Optional[dict]:
    """Einen Draft per ID abrufen."""
    from .config import settings

    path = db_path or settings.db_path
    if not Path(path).exists():
        return None
    conn = sqlite3.connect(str(path))
    try:
        row = conn.execute(
            """SELECT id, drucksache, bundesland, news_url, news_titel,
                      titel, body, model, created_at
            FROM presse_drafts WHERE id=?""",
            (draft_id,),
        ).fetchone()
    finally:
        conn.close()
    if not row:
        return None
    return {
        "id": row[0], "drucksache": row[1], "bundesland": row[2],
        "news_url": row[3], "news_titel": row[4],
        "titel": row[5], "body": row[6], "model": row[7],
        "created_at": row[8],
    }