Compare commits
No commits in common. "83669c528bc7b6048b98740aafffad35d83b98ab" and "aa60f22820f7ab3087ab5b7c02159ee98c19c6ea" have entirely different histories.
83669c528b
...
aa60f22820
129
backend/app.py
129
backend/app.py
@ -114,135 +114,6 @@ def get_words(podcast_id: str, episode_id: str):
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def _table_exists(db, name: str) -> bool:
|
|
||||||
return db.execute(
|
|
||||||
"SELECT 1 FROM sqlite_master WHERE type='table' AND name=?", (name,)
|
|
||||||
).fetchone() is not None
|
|
||||||
|
|
||||||
|
|
||||||
@app.get("/api/podcasts/{podcast_id}/episodes/{episode_id}/claims")
|
|
||||||
def get_episode_claims(podcast_id: str, episode_id: str, claim_type: Optional[str] = None):
|
|
||||||
"""Claims (Behauptungen) für eine Episode."""
|
|
||||||
db = get_db()
|
|
||||||
if not _table_exists(db, "claims"):
|
|
||||||
db.close()
|
|
||||||
return {"available": False, "claims": []}
|
|
||||||
sql = ("SELECT id, paragraph_idx, claim_text, claim_type, verifiable, start_time "
|
|
||||||
"FROM claims WHERE podcast_id = ? AND episode_id = ?")
|
|
||||||
params = [podcast_id, episode_id]
|
|
||||||
if claim_type:
|
|
||||||
sql += " AND claim_type = ?"
|
|
||||||
params.append(claim_type)
|
|
||||||
sql += " ORDER BY paragraph_idx, id"
|
|
||||||
rows = db.execute(sql, params).fetchall()
|
|
||||||
db.close()
|
|
||||||
return {"available": True, "claims": [dict(r) for r in rows]}
|
|
||||||
|
|
||||||
|
|
||||||
@app.get("/api/podcasts/{podcast_id}/episodes/{episode_id}/questions")
|
|
||||||
def get_episode_questions(podcast_id: str, episode_id: str, question_type: Optional[str] = None,
|
|
||||||
answered: Optional[str] = None):
|
|
||||||
"""Fragen einer Episode."""
|
|
||||||
db = get_db()
|
|
||||||
if not _table_exists(db, "questions"):
|
|
||||||
db.close()
|
|
||||||
return {"available": False, "questions": []}
|
|
||||||
sql = ("SELECT id, paragraph_idx, question_text, question_type, answered, "
|
|
||||||
"answered_by_podcast, answered_by_episode, answered_by_idx, start_time "
|
|
||||||
"FROM questions WHERE podcast_id = ? AND episode_id = ?")
|
|
||||||
params = [podcast_id, episode_id]
|
|
||||||
if question_type:
|
|
||||||
sql += " AND question_type = ?"
|
|
||||||
params.append(question_type)
|
|
||||||
if answered:
|
|
||||||
sql += " AND answered = ?"
|
|
||||||
params.append(answered)
|
|
||||||
sql += " ORDER BY paragraph_idx, id"
|
|
||||||
rows = db.execute(sql, params).fetchall()
|
|
||||||
db.close()
|
|
||||||
return {"available": True, "questions": [dict(r) for r in rows]}
|
|
||||||
|
|
||||||
|
|
||||||
@app.get("/api/podcasts/{podcast_id}/episodes/{episode_id}/analyses-summary")
|
|
||||||
def get_episode_analyses_summary(podcast_id: str, episode_id: str):
|
|
||||||
"""Zähler für die Analyse-Datentöpfe einer Episode (für UI-Buttons)."""
|
|
||||||
db = get_db()
|
|
||||||
out = {}
|
|
||||||
if _table_exists(db, "claims"):
|
|
||||||
out["claims"] = db.execute(
|
|
||||||
"SELECT COUNT(*) FROM claims WHERE podcast_id = ? AND episode_id = ?",
|
|
||||||
(podcast_id, episode_id)
|
|
||||||
).fetchone()[0]
|
|
||||||
if _table_exists(db, "questions"):
|
|
||||||
out["questions"] = db.execute(
|
|
||||||
"SELECT COUNT(*) FROM questions WHERE podcast_id = ? AND episode_id = ?",
|
|
||||||
(podcast_id, episode_id)
|
|
||||||
).fetchone()[0]
|
|
||||||
out["questions_unanswered"] = db.execute(
|
|
||||||
"SELECT COUNT(*) FROM questions WHERE podcast_id = ? AND episode_id = ? AND answered = 'no'",
|
|
||||||
(podcast_id, episode_id)
|
|
||||||
).fetchone()[0]
|
|
||||||
db.close()
|
|
||||||
return out
|
|
||||||
|
|
||||||
|
|
||||||
@app.get("/api/analyses/gaps")
|
|
||||||
def get_gaps_analysis(min_size: int = 0, missing_in: Optional[str] = None, limit: int = 200):
|
|
||||||
"""Leerstellen-Analyse (#14): Cluster, die in mindestens einem Podcast fehlen."""
|
|
||||||
path = Path(DATA_DIR) / "gaps_analysis.json"
|
|
||||||
if not path.exists():
|
|
||||||
return {"available": False}
|
|
||||||
try:
|
|
||||||
with open(path) as f:
|
|
||||||
data = json.load(f)
|
|
||||||
except Exception:
|
|
||||||
return {"available": False}
|
|
||||||
gaps = data.get("gaps", [])
|
|
||||||
if min_size > 0:
|
|
||||||
gaps = [g for g in gaps if g.get("cluster_size", 0) >= min_size]
|
|
||||||
if missing_in:
|
|
||||||
gaps = [g for g in gaps if g.get("missing_in") == missing_in]
|
|
||||||
gaps = gaps[:limit]
|
|
||||||
return {
|
|
||||||
"available": True,
|
|
||||||
"total_paragraphs": data.get("total_paragraphs"),
|
|
||||||
"podcasts": data.get("podcasts", []),
|
|
||||||
"n_clusters": data.get("n_clusters"),
|
|
||||||
"clusters": data.get("clusters", []),
|
|
||||||
"gaps": gaps,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@app.get("/api/analyses/shifts")
|
|
||||||
def get_shifts_analysis(podcast: Optional[str] = None, theme: Optional[str] = None,
|
|
||||||
min_drift: float = 0.0, limit: int = 200):
|
|
||||||
"""Narrative-Shift-Analyse (#15): Drift zwischen aufeinanderfolgenden Episoden je Theme."""
|
|
||||||
path = Path(DATA_DIR) / "narrative_shifts.json"
|
|
||||||
if not path.exists():
|
|
||||||
return {"available": False}
|
|
||||||
try:
|
|
||||||
with open(path) as f:
|
|
||||||
data = json.load(f)
|
|
||||||
except Exception:
|
|
||||||
return {"available": False}
|
|
||||||
shifts = data.get("shifts", [])
|
|
||||||
if podcast:
|
|
||||||
shifts = [s for s in shifts if s.get("podcast") == podcast]
|
|
||||||
if theme:
|
|
||||||
shifts = [s for s in shifts if s.get("theme") == theme]
|
|
||||||
if min_drift > 0:
|
|
||||||
shifts = [s for s in shifts if s.get("max_drift", 0) >= min_drift]
|
|
||||||
shifts = shifts[:limit]
|
|
||||||
podcasts = sorted({s.get("podcast") for s in data.get("shifts", []) if s.get("podcast")})
|
|
||||||
return {
|
|
||||||
"available": True,
|
|
||||||
"total_themes_tracked": data.get("total_themes_tracked"),
|
|
||||||
"themes": data.get("themes", []),
|
|
||||||
"podcasts": podcasts,
|
|
||||||
"shifts": shifts,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@app.get("/api/search")
|
@app.get("/api/search")
|
||||||
def search(q: str = Query(..., min_length=2), podcast_id: Optional[str] = None, limit: int = 50):
|
def search(q: str = Query(..., min_length=2), podcast_id: Optional[str] = None, limit: int = 50):
|
||||||
"""Full-text search across all transcripts."""
|
"""Full-text search across all transcripts."""
|
||||||
|
|||||||
@ -16,13 +16,8 @@ import sqlite3
|
|||||||
|
|
||||||
from openai import OpenAI
|
from openai import OpenAI
|
||||||
|
|
||||||
# Lokaler Helper
|
|
||||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
||||||
from json_utils import parse_llm_json
|
|
||||||
|
|
||||||
DB_PATH = sys.argv[1] if len(sys.argv) > 1 else "data/db.sqlite"
|
DB_PATH = sys.argv[1] if len(sys.argv) > 1 else "data/db.sqlite"
|
||||||
LIMIT = int(sys.argv[2]) if len(sys.argv) > 2 and not sys.argv[2].startswith("--") else 500
|
LIMIT = int(sys.argv[2]) if len(sys.argv) > 2 else 500
|
||||||
RERUN_ERRORS = "--rerun-errors" in sys.argv
|
|
||||||
|
|
||||||
API_KEY = os.environ.get("DASHSCOPE_API_KEY", "")
|
API_KEY = os.environ.get("DASHSCOPE_API_KEY", "")
|
||||||
BASE_URL = "https://dashscope-intl.aliyuncs.com/compatible-mode/v1"
|
BASE_URL = "https://dashscope-intl.aliyuncs.com/compatible-mode/v1"
|
||||||
@ -51,37 +46,25 @@ Absatz B ({meta_b}):
|
|||||||
|
|
||||||
Welche logische Relation besteht von A zu B?"""
|
Welche logische Relation besteht von A zu B?"""
|
||||||
|
|
||||||
last_err = None
|
try:
|
||||||
for attempt in range(3):
|
resp = client.chat.completions.create(
|
||||||
try:
|
model=MODEL,
|
||||||
resp = client.chat.completions.create(
|
messages=[
|
||||||
model=MODEL,
|
{"role": "system", "content": SYSTEM_PROMPT},
|
||||||
messages=[
|
{"role": "user", "content": user_msg},
|
||||||
{"role": "system", "content": SYSTEM_PROMPT},
|
],
|
||||||
{"role": "user", "content": user_msg},
|
temperature=0.1,
|
||||||
],
|
max_tokens=150,
|
||||||
temperature=0.1,
|
)
|
||||||
max_tokens=200,
|
content = resp.choices[0].message.content.strip()
|
||||||
)
|
# Parse JSON from response
|
||||||
content = resp.choices[0].message.content
|
if content.startswith("```"):
|
||||||
usage = getattr(resp, "usage", None)
|
content = content.split("```")[1].strip()
|
||||||
tokens = (usage.prompt_tokens, usage.completion_tokens) if usage else (0, 0)
|
if content.startswith("json"):
|
||||||
try:
|
content = content[4:].strip()
|
||||||
parsed = parse_llm_json(content, expect="object")
|
return json.loads(content)
|
||||||
parsed["_tokens"] = tokens
|
except Exception as e:
|
||||||
return parsed
|
return {"relation": "error", "confidence": 0, "explanation": str(e)}
|
||||||
except ValueError as pe:
|
|
||||||
last_err = f"parse: {pe}"
|
|
||||||
# Bei Parse-Fehler kein Retry: das Modell wuerde wieder dasselbe liefern.
|
|
||||||
break
|
|
||||||
except Exception as e:
|
|
||||||
last_err = str(e)
|
|
||||||
# Retry bei Netzwerk/Rate-Limit
|
|
||||||
if attempt < 2:
|
|
||||||
time.sleep(2 ** attempt)
|
|
||||||
continue
|
|
||||||
break
|
|
||||||
return {"relation": "error", "confidence": 0, "explanation": str(last_err), "_tokens": (0, 0)}
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
@ -90,8 +73,7 @@ def main():
|
|||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
client = OpenAI(api_key=API_KEY, base_url=BASE_URL)
|
client = OpenAI(api_key=API_KEY, base_url=BASE_URL)
|
||||||
db = sqlite3.connect(DB_PATH, timeout=60.0)
|
db = sqlite3.connect(DB_PATH)
|
||||||
db.execute("PRAGMA busy_timeout=60000")
|
|
||||||
db.row_factory = sqlite3.Row
|
db.row_factory = sqlite3.Row
|
||||||
|
|
||||||
# Create output table
|
# Create output table
|
||||||
@ -105,58 +87,35 @@ def main():
|
|||||||
CREATE INDEX IF NOT EXISTS idx_arglinks ON argument_links(relation);
|
CREATE INDEX IF NOT EXISTS idx_arglinks ON argument_links(relation);
|
||||||
""")
|
""")
|
||||||
|
|
||||||
if RERUN_ERRORS:
|
# Get top semantic links (cross-episode, prefer cross-podcast)
|
||||||
# Hole error-Records, loesche sie, baue Eingabe-Liste daraus auf.
|
rows = db.execute("""
|
||||||
err_rows = db.execute("""
|
SELECT sl.podcast_id, sl.source_episode, sl.source_idx,
|
||||||
SELECT al.source_podcast as podcast_id, al.source_episode, al.source_idx,
|
sl.target_podcast, sl.target_episode, sl.target_idx, sl.score,
|
||||||
al.target_podcast, al.target_episode, al.target_idx, al.score,
|
p1.text as source_text, p2.text as target_text,
|
||||||
p1.text as source_text, p2.text as target_text,
|
e1.title as source_title, e1.guest as source_guest,
|
||||||
e1.title as source_title, e1.guest as source_guest,
|
e2.title as target_title, e2.guest as target_guest
|
||||||
e2.title as target_title, e2.guest as target_guest
|
FROM semantic_links sl
|
||||||
FROM argument_links al
|
JOIN paragraphs p1 ON sl.podcast_id = p1.podcast_id AND sl.source_episode = p1.episode_id AND sl.source_idx = p1.idx
|
||||||
JOIN paragraphs p1 ON al.source_podcast = p1.podcast_id AND al.source_episode = p1.episode_id AND al.source_idx = p1.idx
|
JOIN paragraphs p2 ON sl.target_podcast = p2.podcast_id AND sl.target_episode = p2.episode_id AND sl.target_idx = p2.idx
|
||||||
JOIN paragraphs p2 ON al.target_podcast = p2.podcast_id AND al.target_episode = p2.episode_id AND al.target_idx = p2.idx
|
JOIN episodes e1 ON sl.podcast_id = e1.podcast_id AND sl.source_episode = e1.id
|
||||||
JOIN episodes e1 ON al.source_podcast = e1.podcast_id AND al.source_episode = e1.id
|
JOIN episodes e2 ON sl.target_podcast = e2.podcast_id AND sl.target_episode = e2.id
|
||||||
JOIN episodes e2 ON al.target_podcast = e2.podcast_id AND al.target_episode = e2.id
|
WHERE sl.source_episode != sl.target_episode
|
||||||
WHERE al.relation = 'error'
|
ORDER BY sl.score DESC
|
||||||
""").fetchall()
|
LIMIT ?
|
||||||
rows = err_rows
|
""", (LIMIT,)).fetchall()
|
||||||
del_count = db.execute("DELETE FROM argument_links WHERE relation='error'").rowcount
|
|
||||||
db.commit()
|
|
||||||
print(f"RE-RUN: {del_count} error-Records geloescht, {len(rows)} werden neu klassifiziert.")
|
|
||||||
existing = set()
|
|
||||||
else:
|
|
||||||
# Get top semantic links (cross-episode, prefer cross-podcast)
|
|
||||||
rows = db.execute("""
|
|
||||||
SELECT sl.podcast_id, sl.source_episode, sl.source_idx,
|
|
||||||
sl.target_podcast, sl.target_episode, sl.target_idx, sl.score,
|
|
||||||
p1.text as source_text, p2.text as target_text,
|
|
||||||
e1.title as source_title, e1.guest as source_guest,
|
|
||||||
e2.title as target_title, e2.guest as target_guest
|
|
||||||
FROM semantic_links sl
|
|
||||||
JOIN paragraphs p1 ON sl.podcast_id = p1.podcast_id AND sl.source_episode = p1.episode_id AND sl.source_idx = p1.idx
|
|
||||||
JOIN paragraphs p2 ON sl.target_podcast = p2.podcast_id AND sl.target_episode = p2.episode_id AND sl.target_idx = p2.idx
|
|
||||||
JOIN episodes e1 ON sl.podcast_id = e1.podcast_id AND sl.source_episode = e1.id
|
|
||||||
JOIN episodes e2 ON sl.target_podcast = e2.podcast_id AND sl.target_episode = e2.id
|
|
||||||
WHERE sl.source_episode != sl.target_episode
|
|
||||||
ORDER BY sl.score DESC
|
|
||||||
LIMIT ?
|
|
||||||
""", (LIMIT,)).fetchall()
|
|
||||||
|
|
||||||
print(f"Klassifiziere {len(rows)} Paare mit {MODEL}…")
|
print(f"Klassifiziere {len(rows)} Paare mit {MODEL}…")
|
||||||
|
|
||||||
# Check already processed
|
# Check already processed
|
||||||
existing = set()
|
existing = set()
|
||||||
try:
|
try:
|
||||||
for r in db.execute("SELECT source_podcast||source_episode||source_idx||target_podcast||target_episode||target_idx as k FROM argument_links").fetchall():
|
for r in db.execute("SELECT source_podcast||source_episode||source_idx||target_podcast||target_episode||target_idx as k FROM argument_links").fetchall():
|
||||||
existing.add(r["k"])
|
existing.add(r["k"])
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
processed = 0
|
processed = 0
|
||||||
skipped = 0
|
skipped = 0
|
||||||
total_in_tokens = 0
|
|
||||||
total_out_tokens = 0
|
|
||||||
|
|
||||||
for i, row in enumerate(rows):
|
for i, row in enumerate(rows):
|
||||||
key = f"{row['podcast_id']}{row['source_episode']}{row['source_idx']}{row['target_podcast']}{row['target_episode']}{row['target_idx']}"
|
key = f"{row['podcast_id']}{row['source_episode']}{row['source_idx']}{row['target_podcast']}{row['target_episode']}{row['target_idx']}"
|
||||||
@ -173,10 +132,6 @@ def main():
|
|||||||
row["target_text"][:800], meta_b
|
row["target_text"][:800], meta_b
|
||||||
)
|
)
|
||||||
|
|
||||||
in_t, out_t = result.pop("_tokens", (0, 0))
|
|
||||||
total_in_tokens += in_t
|
|
||||||
total_out_tokens += out_t
|
|
||||||
|
|
||||||
db.execute(
|
db.execute(
|
||||||
"INSERT INTO argument_links (source_podcast, source_episode, source_idx, "
|
"INSERT INTO argument_links (source_podcast, source_episode, source_idx, "
|
||||||
"target_podcast, target_episode, target_idx, relation, confidence, explanation, score) "
|
"target_podcast, target_episode, target_idx, relation, confidence, explanation, score) "
|
||||||
@ -203,9 +158,6 @@ def main():
|
|||||||
print("Verteilung:")
|
print("Verteilung:")
|
||||||
for s in stats:
|
for s in stats:
|
||||||
print(f" {s['relation']}: {s['c']}")
|
print(f" {s['relation']}: {s['c']}")
|
||||||
# qwen-plus: ~$0.40/1M input, ~$1.20/1M output (DashScope intl, grobe Schaetzung)
|
|
||||||
cost = total_in_tokens / 1e6 * 0.40 + total_out_tokens / 1e6 * 1.20
|
|
||||||
print(f"Tokens: in={total_in_tokens} out={total_out_tokens} ~${cost:.4f}")
|
|
||||||
|
|
||||||
db.close()
|
db.close()
|
||||||
|
|
||||||
|
|||||||
@ -15,12 +15,8 @@ import sqlite3
|
|||||||
|
|
||||||
from openai import OpenAI
|
from openai import OpenAI
|
||||||
|
|
||||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
||||||
from json_utils import parse_llm_json
|
|
||||||
|
|
||||||
DB_PATH = sys.argv[1] if len(sys.argv) > 1 else "data/db.sqlite"
|
DB_PATH = sys.argv[1] if len(sys.argv) > 1 else "data/db.sqlite"
|
||||||
LIMIT = int(sys.argv[2]) if len(sys.argv) > 2 and not sys.argv[2].startswith("--") else 100
|
LIMIT = int(sys.argv[2]) if len(sys.argv) > 2 else 100
|
||||||
RERUN_ERRORS = "--rerun-errors" in sys.argv
|
|
||||||
API_KEY = os.environ.get("DASHSCOPE_API_KEY", "")
|
API_KEY = os.environ.get("DASHSCOPE_API_KEY", "")
|
||||||
BASE_URL = "https://dashscope-intl.aliyuncs.com/compatible-mode/v1"
|
BASE_URL = "https://dashscope-intl.aliyuncs.com/compatible-mode/v1"
|
||||||
MODEL = "qwen-plus"
|
MODEL = "qwen-plus"
|
||||||
@ -46,35 +42,24 @@ Podcast B — {meta_b}:
|
|||||||
|
|
||||||
Erstelle die Gegenüberstellung."""
|
Erstelle die Gegenüberstellung."""
|
||||||
|
|
||||||
last_err = None
|
try:
|
||||||
for attempt in range(3):
|
resp = client.chat.completions.create(
|
||||||
try:
|
model=MODEL,
|
||||||
resp = client.chat.completions.create(
|
messages=[
|
||||||
model=MODEL,
|
{"role": "system", "content": SYSTEM_PROMPT},
|
||||||
messages=[
|
{"role": "user", "content": user_msg},
|
||||||
{"role": "system", "content": SYSTEM_PROMPT},
|
],
|
||||||
{"role": "user", "content": user_msg},
|
temperature=0.2,
|
||||||
],
|
max_tokens=300,
|
||||||
temperature=0.2,
|
)
|
||||||
max_tokens=400,
|
content = resp.choices[0].message.content.strip()
|
||||||
)
|
if content.startswith("```"):
|
||||||
content = resp.choices[0].message.content
|
content = content.split("```")[1].strip()
|
||||||
usage = getattr(resp, "usage", None)
|
if content.startswith("json"):
|
||||||
tokens = (usage.prompt_tokens, usage.completion_tokens) if usage else (0, 0)
|
content = content[4:].strip()
|
||||||
try:
|
return json.loads(content)
|
||||||
parsed = parse_llm_json(content, expect="object")
|
except Exception as e:
|
||||||
parsed["_tokens"] = tokens
|
return {"topic": "error", "agreement": "", "divergence": "", "insight": str(e)}
|
||||||
return parsed
|
|
||||||
except ValueError as pe:
|
|
||||||
last_err = f"parse: {pe}"
|
|
||||||
break
|
|
||||||
except Exception as e:
|
|
||||||
last_err = str(e)
|
|
||||||
if attempt < 2:
|
|
||||||
time.sleep(2 ** attempt)
|
|
||||||
continue
|
|
||||||
break
|
|
||||||
return {"topic": "error", "agreement": "", "divergence": "", "insight": str(last_err), "_tokens": (0, 0)}
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
@ -83,8 +68,7 @@ def main():
|
|||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
client = OpenAI(api_key=API_KEY, base_url=BASE_URL)
|
client = OpenAI(api_key=API_KEY, base_url=BASE_URL)
|
||||||
db = sqlite3.connect(DB_PATH, timeout=60.0)
|
db = sqlite3.connect(DB_PATH)
|
||||||
db.execute("PRAGMA busy_timeout=60000")
|
|
||||||
db.row_factory = sqlite3.Row
|
db.row_factory = sqlite3.Row
|
||||||
|
|
||||||
db.executescript("""
|
db.executescript("""
|
||||||
@ -98,60 +82,36 @@ def main():
|
|||||||
CREATE INDEX IF NOT EXISTS idx_debates_topic ON debates(topic);
|
CREATE INDEX IF NOT EXISTS idx_debates_topic ON debates(topic);
|
||||||
""")
|
""")
|
||||||
|
|
||||||
if RERUN_ERRORS:
|
# Get strongest cross-podcast links
|
||||||
rows = db.execute("""
|
rows = db.execute("""
|
||||||
SELECT d.source_podcast as podcast_id, d.source_episode, d.source_idx,
|
SELECT sl.podcast_id, sl.source_episode, sl.source_idx,
|
||||||
d.target_podcast, d.target_episode, d.target_idx, d.score,
|
sl.target_podcast, sl.target_episode, sl.target_idx, sl.score,
|
||||||
p1.text as source_text, p2.text as target_text,
|
p1.text as source_text, p2.text as target_text,
|
||||||
pc1.name as source_podcast_name, pc2.name as target_podcast_name,
|
pc1.name as source_podcast_name, pc2.name as target_podcast_name,
|
||||||
e1.title as source_title, e1.guest as source_guest,
|
e1.title as source_title, e1.guest as source_guest,
|
||||||
e2.title as target_title, e2.guest as target_guest
|
e2.title as target_title, e2.guest as target_guest
|
||||||
FROM debates d
|
FROM semantic_links sl
|
||||||
JOIN paragraphs p1 ON d.source_podcast = p1.podcast_id AND d.source_episode = p1.episode_id AND d.source_idx = p1.idx
|
JOIN paragraphs p1 ON sl.podcast_id = p1.podcast_id AND sl.source_episode = p1.episode_id AND sl.source_idx = p1.idx
|
||||||
JOIN paragraphs p2 ON d.target_podcast = p2.podcast_id AND d.target_episode = p2.episode_id AND d.target_idx = p2.idx
|
JOIN paragraphs p2 ON sl.target_podcast = p2.podcast_id AND sl.target_episode = p2.episode_id AND sl.target_idx = p2.idx
|
||||||
JOIN episodes e1 ON d.source_podcast = e1.podcast_id AND d.source_episode = e1.id
|
JOIN episodes e1 ON sl.podcast_id = e1.podcast_id AND sl.source_episode = e1.id
|
||||||
JOIN episodes e2 ON d.target_podcast = e2.podcast_id AND d.target_episode = e2.id
|
JOIN episodes e2 ON sl.target_podcast = e2.podcast_id AND sl.target_episode = e2.id
|
||||||
JOIN podcasts pc1 ON d.source_podcast = pc1.id
|
JOIN podcasts pc1 ON sl.podcast_id = pc1.id
|
||||||
JOIN podcasts pc2 ON d.target_podcast = pc2.id
|
JOIN podcasts pc2 ON sl.target_podcast = pc2.id
|
||||||
WHERE d.topic = 'error'
|
WHERE sl.podcast_id != sl.target_podcast
|
||||||
""").fetchall()
|
ORDER BY sl.score DESC
|
||||||
del_count = db.execute("DELETE FROM debates WHERE topic='error'").rowcount
|
LIMIT ?
|
||||||
db.commit()
|
""", (LIMIT,)).fetchall()
|
||||||
print(f"RE-RUN: {del_count} error-Records geloescht, {len(rows)} werden neu kuratiert.")
|
|
||||||
existing = set()
|
|
||||||
else:
|
|
||||||
# Get strongest cross-podcast links
|
|
||||||
rows = db.execute("""
|
|
||||||
SELECT sl.podcast_id, sl.source_episode, sl.source_idx,
|
|
||||||
sl.target_podcast, sl.target_episode, sl.target_idx, sl.score,
|
|
||||||
p1.text as source_text, p2.text as target_text,
|
|
||||||
pc1.name as source_podcast_name, pc2.name as target_podcast_name,
|
|
||||||
e1.title as source_title, e1.guest as source_guest,
|
|
||||||
e2.title as target_title, e2.guest as target_guest
|
|
||||||
FROM semantic_links sl
|
|
||||||
JOIN paragraphs p1 ON sl.podcast_id = p1.podcast_id AND sl.source_episode = p1.episode_id AND sl.source_idx = p1.idx
|
|
||||||
JOIN paragraphs p2 ON sl.target_podcast = p2.podcast_id AND sl.target_episode = p2.episode_id AND sl.target_idx = p2.idx
|
|
||||||
JOIN episodes e1 ON sl.podcast_id = e1.podcast_id AND sl.source_episode = e1.id
|
|
||||||
JOIN episodes e2 ON sl.target_podcast = e2.podcast_id AND sl.target_episode = e2.id
|
|
||||||
JOIN podcasts pc1 ON sl.podcast_id = pc1.id
|
|
||||||
JOIN podcasts pc2 ON sl.target_podcast = pc2.id
|
|
||||||
WHERE sl.podcast_id != sl.target_podcast
|
|
||||||
ORDER BY sl.score DESC
|
|
||||||
LIMIT ?
|
|
||||||
""", (LIMIT,)).fetchall()
|
|
||||||
|
|
||||||
print(f"Kuratiere {len(rows)} Cross-Podcast-Debatten mit {MODEL}…")
|
print(f"Kuratiere {len(rows)} Cross-Podcast-Debatten mit {MODEL}…")
|
||||||
|
|
||||||
existing = set()
|
existing = set()
|
||||||
try:
|
try:
|
||||||
for r in db.execute("SELECT source_podcast||source_episode||source_idx||target_podcast||target_episode||target_idx as k FROM debates").fetchall():
|
for r in db.execute("SELECT source_podcast||source_episode||source_idx||target_podcast||target_episode||target_idx as k FROM debates").fetchall():
|
||||||
existing.add(r["k"])
|
existing.add(r["k"])
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
processed = 0
|
processed = 0
|
||||||
total_in_tokens = 0
|
|
||||||
total_out_tokens = 0
|
|
||||||
for i, row in enumerate(rows):
|
for i, row in enumerate(rows):
|
||||||
key = f"{row['podcast_id']}{row['source_episode']}{row['source_idx']}{row['target_podcast']}{row['target_episode']}{row['target_idx']}"
|
key = f"{row['podcast_id']}{row['source_episode']}{row['source_idx']}{row['target_podcast']}{row['target_episode']}{row['target_idx']}"
|
||||||
if key in existing:
|
if key in existing:
|
||||||
@ -161,9 +121,6 @@ def main():
|
|||||||
meta_b = f"{row['target_podcast_name']} / {row['target_episode']}: {row['target_title']} ({row['target_guest']})"
|
meta_b = f"{row['target_podcast_name']} / {row['target_episode']}: {row['target_title']} ({row['target_guest']})"
|
||||||
|
|
||||||
result = curate_pair(client, row["source_text"][:800], meta_a, row["target_text"][:800], meta_b)
|
result = curate_pair(client, row["source_text"][:800], meta_a, row["target_text"][:800], meta_b)
|
||||||
in_t, out_t = result.pop("_tokens", (0, 0))
|
|
||||||
total_in_tokens += in_t
|
|
||||||
total_out_tokens += out_t
|
|
||||||
|
|
||||||
db.execute(
|
db.execute(
|
||||||
"INSERT INTO debates (topic, source_podcast, source_episode, source_idx, "
|
"INSERT INTO debates (topic, source_podcast, source_episode, source_idx, "
|
||||||
@ -189,8 +146,6 @@ def main():
|
|||||||
print("Top-Themen:")
|
print("Top-Themen:")
|
||||||
for t in topics:
|
for t in topics:
|
||||||
print(f" {t['topic']}: {t['c']}")
|
print(f" {t['topic']}: {t['c']}")
|
||||||
cost = total_in_tokens / 1e6 * 0.40 + total_out_tokens / 1e6 * 1.20
|
|
||||||
print(f"Tokens: in={total_in_tokens} out={total_out_tokens} ~${cost:.4f}")
|
|
||||||
|
|
||||||
db.close()
|
db.close()
|
||||||
|
|
||||||
|
|||||||
@ -1,294 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
"""Auto-Quote-Extraktion fuer einen Podcast (z.B. LdN).
|
|
||||||
|
|
||||||
Pro Episode: Qwen erhaelt das (gekuerzte) Transkript als Paragraphen-Array.
|
|
||||||
Output: 3-5 markante Zitate als JSON, mit para_idx, text, verbatim, speaker, is_top, themes.
|
|
||||||
|
|
||||||
Audio-Timestamps werden aus der `paragraphs`-Tabelle ueber `para_idx` zugeordnet.
|
|
||||||
|
|
||||||
Nutzung:
|
|
||||||
DASHSCOPE_API_KEY=... python3 extract_quotes.py [db-pfad] [podcast_id]
|
|
||||||
|
|
||||||
Bei wiederholtem Aufruf: Episoden mit bestehenden Quotes werden uebersprungen.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import json
|
|
||||||
import os
|
|
||||||
import re
|
|
||||||
import sys
|
|
||||||
import time
|
|
||||||
import sqlite3
|
|
||||||
|
|
||||||
from openai import OpenAI
|
|
||||||
|
|
||||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
||||||
from json_utils import parse_llm_json
|
|
||||||
|
|
||||||
DB_PATH = sys.argv[1] if len(sys.argv) > 1 else "data/db.sqlite"
|
|
||||||
PODCAST_ID = sys.argv[2] if len(sys.argv) > 2 else "ldn"
|
|
||||||
API_KEY = os.environ.get("DASHSCOPE_API_KEY", "")
|
|
||||||
BASE_URL = "https://dashscope-intl.aliyuncs.com/compatible-mode/v1"
|
|
||||||
MODEL = "qwen-plus"
|
|
||||||
|
|
||||||
# Konservatives Pricing fuer Budget-Tracking (qwen-plus intl)
|
|
||||||
COST_IN = 0.0008 / 1000
|
|
||||||
COST_OUT = 0.002 / 1000
|
|
||||||
|
|
||||||
PARA_CHAR_LIMIT = 600 # pro Paragraph
|
|
||||||
HARD_BUDGET_USD = 1.50
|
|
||||||
|
|
||||||
SYSTEM_PROMPT = """Du bist Diskursanalyst. Du erhaeltst ein Podcast-Transkript als Paragraphen-Liste mit Index-Markern [P0], [P1], ...
|
|
||||||
|
|
||||||
Waehle 3 bis 5 markante Zitate, die fuer diese Episode/diesen Diskurs charakteristisch sind. Praeferenz fuer:
|
|
||||||
- Pointierte Aussagen, Thesen, Schluesselformulierungen
|
|
||||||
- Konkrete Beispiele, die ein groesseres Argument verdichten
|
|
||||||
- Aussagen mit klarer Sprecher-Position
|
|
||||||
|
|
||||||
KEINE Floskeln, KEINE Begruessungen, KEINE Werbeblock-Zitate.
|
|
||||||
|
|
||||||
Antworte NUR mit einem JSON-Array. Jedes Element:
|
|
||||||
{
|
|
||||||
"para_idx": <int>, // Index des Paragraphen (P-Marker)
|
|
||||||
"text": "<geglaettete Form, ohne Fuellwoerter, ein Satz>",
|
|
||||||
"verbatim": "<woertlicher Snippet aus dem Transkript, max. 2 Saetze>",
|
|
||||||
"speaker": "<Name oder leerer String>",
|
|
||||||
"is_top": <true wenn dies das prominenteste Zitat der Episode ist, sonst false; max. 1x true pro Antwort>,
|
|
||||||
"themes": ["<theme-id>", ...] // ZWINGEND nur aus den erlaubten IDs (siehe unten); leeres Array wenn unklar
|
|
||||||
}
|
|
||||||
|
|
||||||
ERLAUBTE THEME-IDs (NUR diese verwenden, sonst leeres Array):
|
|
||||||
gaza-nahost, haushalt-investitionen, klima-verkehr, krieg-ukraine, migration-asyl, parteienlandschaft, trump-usa
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
class Budget:
|
|
||||||
def __init__(self, hard_limit_usd):
|
|
||||||
self.hard_limit = hard_limit_usd
|
|
||||||
self.tokens_in = 0
|
|
||||||
self.tokens_out = 0
|
|
||||||
|
|
||||||
def add(self, usage):
|
|
||||||
if usage:
|
|
||||||
self.tokens_in += getattr(usage, "prompt_tokens", 0) or 0
|
|
||||||
self.tokens_out += getattr(usage, "completion_tokens", 0) or 0
|
|
||||||
|
|
||||||
def cost(self):
|
|
||||||
return self.tokens_in * COST_IN + self.tokens_out * COST_OUT
|
|
||||||
|
|
||||||
def over(self):
|
|
||||||
return self.cost() > self.hard_limit
|
|
||||||
|
|
||||||
|
|
||||||
def load_themes(db, podcast_id):
|
|
||||||
return [r["id"] for r in db.execute("SELECT id FROM themes WHERE podcast_id=?", (podcast_id,)).fetchall()]
|
|
||||||
|
|
||||||
|
|
||||||
def build_user_msg(episode, paragraphs):
|
|
||||||
head = f"Episode {episode['id']}: {episode['title'][:200]}"
|
|
||||||
if episode.get("guest"):
|
|
||||||
head += f" (Gast: {episode['guest']})"
|
|
||||||
blocks = []
|
|
||||||
for p in paragraphs:
|
|
||||||
snippet = p["text"][:PARA_CHAR_LIMIT]
|
|
||||||
blocks.append(f"[P{p['idx']}] {snippet}")
|
|
||||||
return head + "\n\n" + "\n\n".join(blocks)
|
|
||||||
|
|
||||||
|
|
||||||
def call_llm(client, user_msg, budget):
|
|
||||||
last_err = None
|
|
||||||
for attempt in range(2):
|
|
||||||
try:
|
|
||||||
resp = client.chat.completions.create(
|
|
||||||
model=MODEL,
|
|
||||||
messages=[
|
|
||||||
{"role": "system", "content": SYSTEM_PROMPT},
|
|
||||||
{"role": "user", "content": user_msg},
|
|
||||||
],
|
|
||||||
temperature=0.2,
|
|
||||||
max_tokens=1500,
|
|
||||||
)
|
|
||||||
budget.add(getattr(resp, "usage", None))
|
|
||||||
content = resp.choices[0].message.content
|
|
||||||
try:
|
|
||||||
return parse_llm_json(content, expect="array"), None
|
|
||||||
except ValueError as pe:
|
|
||||||
last_err = f"parse: {pe} :: head={content[:200]}"
|
|
||||||
break
|
|
||||||
except Exception as e:
|
|
||||||
last_err = str(e)
|
|
||||||
if attempt < 1:
|
|
||||||
time.sleep(2)
|
|
||||||
continue
|
|
||||||
return None, last_err
|
|
||||||
|
|
||||||
|
|
||||||
def next_quote_id(db, podcast_id):
|
|
||||||
rows = db.execute(
|
|
||||||
"SELECT id FROM quotes WHERE podcast_id=? AND id LIKE 'q%'", (podcast_id,)
|
|
||||||
).fetchall()
|
|
||||||
max_n = 0
|
|
||||||
for r in rows:
|
|
||||||
m = re.match(r"q(\d+)$", r["id"])
|
|
||||||
if m:
|
|
||||||
n = int(m.group(1))
|
|
||||||
if n > max_n:
|
|
||||||
max_n = n
|
|
||||||
return max_n + 1
|
|
||||||
|
|
||||||
|
|
||||||
def process_episode(db, client, episode, allowed_themes, budget, next_id):
|
|
||||||
paras = db.execute(
|
|
||||||
"SELECT idx, start_time, end_time, text FROM paragraphs "
|
|
||||||
"WHERE podcast_id=? AND episode_id=? ORDER BY idx",
|
|
||||||
(episode["podcast_id"], episode["id"]),
|
|
||||||
).fetchall()
|
|
||||||
if not paras:
|
|
||||||
return 0, next_id, "no-paragraphs"
|
|
||||||
|
|
||||||
paragraph_dicts = [dict(p) for p in paras]
|
|
||||||
para_lookup = {p["idx"]: p for p in paragraph_dicts}
|
|
||||||
|
|
||||||
user_msg = build_user_msg(dict(episode), paragraph_dicts)
|
|
||||||
result, err = call_llm(client, user_msg, budget)
|
|
||||||
if result is None:
|
|
||||||
return 0, next_id, f"llm-fail: {err}"
|
|
||||||
if not isinstance(result, list):
|
|
||||||
return 0, next_id, "llm: no array"
|
|
||||||
|
|
||||||
inserted = 0
|
|
||||||
top_count = 0
|
|
||||||
for item in result:
|
|
||||||
if not isinstance(item, dict):
|
|
||||||
continue
|
|
||||||
try:
|
|
||||||
idx = int(item.get("para_idx", -1))
|
|
||||||
except (TypeError, ValueError):
|
|
||||||
continue
|
|
||||||
para = para_lookup.get(idx)
|
|
||||||
if not para:
|
|
||||||
continue
|
|
||||||
text = (item.get("text") or "").strip()
|
|
||||||
verbatim = (item.get("verbatim") or "").strip()
|
|
||||||
speaker = (item.get("speaker") or "").strip()
|
|
||||||
if not text and not verbatim:
|
|
||||||
continue
|
|
||||||
|
|
||||||
themes_raw = item.get("themes") or []
|
|
||||||
if not isinstance(themes_raw, list):
|
|
||||||
themes_raw = []
|
|
||||||
themes = [t for t in themes_raw if t in allowed_themes]
|
|
||||||
|
|
||||||
is_top_raw = item.get("is_top")
|
|
||||||
is_top = bool(is_top_raw) and top_count == 0
|
|
||||||
if is_top:
|
|
||||||
top_count += 1
|
|
||||||
|
|
||||||
qid = f"q{next_id}"
|
|
||||||
next_id += 1
|
|
||||||
try:
|
|
||||||
db.execute(
|
|
||||||
"INSERT INTO quotes (id, podcast_id, episode_id, text, verbatim, speaker, "
|
|
||||||
"start_time, end_time, is_top_quote, themes_json) "
|
|
||||||
"VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
|
|
||||||
(qid, episode["podcast_id"], episode["id"],
|
|
||||||
text[:1000], verbatim[:2000], speaker[:200],
|
|
||||||
para["start_time"], para["end_time"], 1 if is_top else 0,
|
|
||||||
json.dumps(themes, ensure_ascii=False)),
|
|
||||||
)
|
|
||||||
inserted += 1
|
|
||||||
except sqlite3.IntegrityError:
|
|
||||||
# Duplikat - skip
|
|
||||||
pass
|
|
||||||
return inserted, next_id, None
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
if not API_KEY:
|
|
||||||
print("DASHSCOPE_API_KEY nicht gesetzt.")
|
|
||||||
sys.exit(1)
|
|
||||||
client = OpenAI(api_key=API_KEY, base_url=BASE_URL, timeout=60.0, max_retries=1)
|
|
||||||
db = sqlite3.connect(DB_PATH, timeout=30.0)
|
|
||||||
db.execute("PRAGMA busy_timeout=30000")
|
|
||||||
db.row_factory = sqlite3.Row
|
|
||||||
|
|
||||||
# Sicherstellen, dass quotes-Tabelle existiert (sollte sie)
|
|
||||||
db.executescript("""
|
|
||||||
CREATE TABLE IF NOT EXISTS quotes (
|
|
||||||
id TEXT, podcast_id TEXT, episode_id TEXT,
|
|
||||||
text TEXT, verbatim TEXT, speaker TEXT,
|
|
||||||
start_time REAL, end_time REAL,
|
|
||||||
is_top_quote BOOLEAN, themes_json TEXT,
|
|
||||||
PRIMARY KEY (podcast_id, id)
|
|
||||||
);
|
|
||||||
CREATE INDEX IF NOT EXISTS idx_quotes_episode ON quotes(podcast_id, episode_id);
|
|
||||||
""")
|
|
||||||
|
|
||||||
allowed_themes = load_themes(db, PODCAST_ID)
|
|
||||||
print(f"Erlaubte Themes ({PODCAST_ID}): {allowed_themes}")
|
|
||||||
|
|
||||||
episodes = db.execute(
|
|
||||||
"SELECT id, podcast_id, title, guest FROM episodes WHERE podcast_id=? ORDER BY id",
|
|
||||||
(PODCAST_ID,),
|
|
||||||
).fetchall()
|
|
||||||
print(f"Verarbeite {len(episodes)} Episoden fuer {PODCAST_ID}…")
|
|
||||||
|
|
||||||
# Skip episodes with existing quotes
|
|
||||||
done = set()
|
|
||||||
for r in db.execute(
|
|
||||||
"SELECT DISTINCT episode_id FROM quotes WHERE podcast_id=?", (PODCAST_ID,)
|
|
||||||
).fetchall():
|
|
||||||
done.add(r["episode_id"])
|
|
||||||
print(f" {len(done)} Episoden haben bereits Quotes — werden uebersprungen")
|
|
||||||
|
|
||||||
next_id = next_quote_id(db, PODCAST_ID)
|
|
||||||
print(f" Naechste Quote-ID: q{next_id}")
|
|
||||||
|
|
||||||
budget = Budget(hard_limit_usd=HARD_BUDGET_USD)
|
|
||||||
|
|
||||||
total_inserted = 0
|
|
||||||
failures = []
|
|
||||||
for i, ep in enumerate(episodes):
|
|
||||||
if ep["id"] in done:
|
|
||||||
continue
|
|
||||||
if budget.over():
|
|
||||||
print(f"!! Budget ueberschritten ({budget.cost():.4f} USD) — Abbruch")
|
|
||||||
break
|
|
||||||
try:
|
|
||||||
n, next_id, err = process_episode(db, client, ep, allowed_themes, budget, next_id)
|
|
||||||
except Exception as e:
|
|
||||||
n, err = 0, str(e)
|
|
||||||
total_inserted += n
|
|
||||||
if err:
|
|
||||||
failures.append((ep["id"], err))
|
|
||||||
# Commit nach jeder Episode (Crash-Sicherheit)
|
|
||||||
db.commit()
|
|
||||||
print(f" [{i+1}/{len(episodes)}] {ep['id']}: +{n} quotes "
|
|
||||||
f"(total={total_inserted}, cost=${budget.cost():.4f}, err={'-' if not err else err[:80]})", flush=True)
|
|
||||||
time.sleep(0.4)
|
|
||||||
|
|
||||||
db.commit()
|
|
||||||
|
|
||||||
print()
|
|
||||||
print("=== Zusammenfassung Aufgabe B ===")
|
|
||||||
print(f" Quotes inserted: {total_inserted}")
|
|
||||||
print(f" Tokens in={budget.tokens_in} out={budget.tokens_out}")
|
|
||||||
print(f" Kosten ~${budget.cost():.4f}")
|
|
||||||
if failures:
|
|
||||||
print(f" Fehler in {len(failures)} Episoden, erste 5:")
|
|
||||||
for ep_id, err in failures[:5]:
|
|
||||||
print(f" {ep_id}: {err[:120]}")
|
|
||||||
|
|
||||||
# Sanity-Check: Quotes pro Episode
|
|
||||||
counts = db.execute(
|
|
||||||
"SELECT episode_id, COUNT(*) c FROM quotes WHERE podcast_id=? GROUP BY episode_id ORDER BY c",
|
|
||||||
(PODCAST_ID,),
|
|
||||||
).fetchall()
|
|
||||||
print(f" Episoden mit Quotes: {len(counts)}")
|
|
||||||
if counts:
|
|
||||||
cs = [c["c"] for c in counts]
|
|
||||||
print(f" Quotes/Episode: min={min(cs)} max={max(cs)} mean={sum(cs)/len(cs):.1f}")
|
|
||||||
db.close()
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
@ -1,322 +0,0 @@
|
|||||||
"""Robuster JSON-Parser fuer LLM-Antworten.
|
|
||||||
|
|
||||||
Behebt typische Probleme:
|
|
||||||
- Markdown-Codefences (```json ... ```)
|
|
||||||
- Vorspann/Nachspann ausserhalb des JSON-Blocks
|
|
||||||
- Trailing commas
|
|
||||||
- Unescaped quotes innerhalb von Strings (heuristisch)
|
|
||||||
- Smart-Quotes
|
|
||||||
"""
|
|
||||||
|
|
||||||
import json
|
|
||||||
import re
|
|
||||||
from typing import Any, Optional
|
|
||||||
|
|
||||||
|
|
||||||
def _strip_codefence(s: str) -> str:
|
|
||||||
s = s.strip()
|
|
||||||
if s.startswith("```"):
|
|
||||||
# entferne erste Zeile ```... und schliessende ```
|
|
||||||
s = re.sub(r"^```[a-zA-Z0-9_-]*\s*\n?", "", s)
|
|
||||||
s = re.sub(r"\n?```\s*$", "", s)
|
|
||||||
return s.strip()
|
|
||||||
|
|
||||||
|
|
||||||
def _find_balanced(s: str, open_char: str, close_char: str) -> Optional[str]:
|
|
||||||
"""Extrahiere ersten balancierten {...} oder [...]-Block, respektiert String-Literals.
|
|
||||||
|
|
||||||
Wenn keine vollstaendige Balance erreicht wird (truncated JSON), wird der bis zum
|
|
||||||
Ende verfuegbare Block zurueckgegeben — das Repair-Pipeline-Stadium kann den dann
|
|
||||||
ggf. ergaenzen.
|
|
||||||
"""
|
|
||||||
start = s.find(open_char)
|
|
||||||
if start == -1:
|
|
||||||
return None
|
|
||||||
depth = 0
|
|
||||||
in_str = False
|
|
||||||
esc = False
|
|
||||||
for i in range(start, len(s)):
|
|
||||||
c = s[i]
|
|
||||||
if in_str:
|
|
||||||
if esc:
|
|
||||||
esc = False
|
|
||||||
elif c == "\\":
|
|
||||||
esc = True
|
|
||||||
elif c == '"':
|
|
||||||
in_str = False
|
|
||||||
continue
|
|
||||||
if c == '"':
|
|
||||||
in_str = True
|
|
||||||
continue
|
|
||||||
if c == open_char:
|
|
||||||
depth += 1
|
|
||||||
elif c == close_char:
|
|
||||||
depth -= 1
|
|
||||||
if depth == 0:
|
|
||||||
return s[start:i + 1]
|
|
||||||
# Truncated: gib trotzdem den bisher gesehenen Block zurueck
|
|
||||||
return s[start:]
|
|
||||||
|
|
||||||
|
|
||||||
def _close_truncated(block: str, open_char: str, close_char: str) -> str:
|
|
||||||
"""Schliesst einen abgeschnittenen JSON-Block heuristisch.
|
|
||||||
|
|
||||||
Ansatz:
|
|
||||||
1. Scanne Zeichen, tracke (in_string, esc, depth).
|
|
||||||
2. Wenn am Ende ein String offen ist: schliesse mit ".
|
|
||||||
3. Schneide einen evtl. unvollstaendigen Wert-Tail nach dem letzten
|
|
||||||
sicheren Komma/Open-Brace/Close-Brace.
|
|
||||||
4. Ergaenze fehlende } / ] entsprechend depth.
|
|
||||||
"""
|
|
||||||
s = block
|
|
||||||
in_str = False
|
|
||||||
esc = False
|
|
||||||
depth = 0
|
|
||||||
# last_safe = Position direkt nach einem komplett-abgeschlossenen Element
|
|
||||||
# (komma, open, close), das heisst: wir koennen dort ohne Datenverlust schneiden.
|
|
||||||
last_safe = 0
|
|
||||||
for i, c in enumerate(s):
|
|
||||||
if in_str:
|
|
||||||
if esc:
|
|
||||||
esc = False
|
|
||||||
elif c == "\\":
|
|
||||||
esc = True
|
|
||||||
elif c == '"':
|
|
||||||
in_str = False
|
|
||||||
last_safe = i + 1
|
|
||||||
continue
|
|
||||||
if c == '"':
|
|
||||||
in_str = True
|
|
||||||
continue
|
|
||||||
if c in "{[":
|
|
||||||
depth += 1
|
|
||||||
last_safe = i + 1
|
|
||||||
elif c in "}]":
|
|
||||||
depth -= 1
|
|
||||||
last_safe = i + 1
|
|
||||||
elif c == ",":
|
|
||||||
last_safe = i # vor dem Komma ist sicher
|
|
||||||
elif c == ":":
|
|
||||||
# Doppelpunkt: kein safe-cut hier
|
|
||||||
pass
|
|
||||||
elif not c.isspace():
|
|
||||||
# Wert-Token (Zahl, true/false/null)
|
|
||||||
last_safe = i + 1
|
|
||||||
|
|
||||||
# Falls String am Ende offen: alle Zeichen behalten, am Ende " ergaenzen.
|
|
||||||
# Sonst: schneiden auf last_safe (entfernt unvollstaendige Werte/Keys).
|
|
||||||
if in_str:
|
|
||||||
# String einfach schliessen, lass Inhalt drin
|
|
||||||
s = s + '"'
|
|
||||||
else:
|
|
||||||
s = s[:last_safe] if last_safe > 0 else s
|
|
||||||
|
|
||||||
# Trailing whitespace + comma entfernen
|
|
||||||
s = re.sub(r"[\s,]+$", "", s)
|
|
||||||
|
|
||||||
# Pruefe ob letzter Token ein Key ohne Wert ist: "..." am Ende vor depth-close
|
|
||||||
# Pattern: ... "key" oder ... "key": (ohne Wert) -> entferne diesen unfertigen Eintrag
|
|
||||||
# Naive Heuristik: wenn der Inhalt mit "key" oder "key": endet ohne folgenden Wert,
|
|
||||||
# schneide bis zum letzten , oder { vor dieser Stelle.
|
|
||||||
# Recompute depth nach den Aenderungen
|
|
||||||
depth = 0
|
|
||||||
in_str = False
|
|
||||||
esc = False
|
|
||||||
for c in s:
|
|
||||||
if in_str:
|
|
||||||
if esc:
|
|
||||||
esc = False
|
|
||||||
elif c == "\\":
|
|
||||||
esc = True
|
|
||||||
elif c == '"':
|
|
||||||
in_str = False
|
|
||||||
continue
|
|
||||||
if c == '"':
|
|
||||||
in_str = True
|
|
||||||
elif c in "{[":
|
|
||||||
depth += 1
|
|
||||||
elif c in "}]":
|
|
||||||
depth -= 1
|
|
||||||
|
|
||||||
# Wenn wir mit "key" oder "key": (ohne Wert!) enden, schneide bis vorheriger ,/{.
|
|
||||||
# Wichtig: nur wenn vor diesem `"..."` ein `,` oder `{` (also Key-Position) liegt,
|
|
||||||
# nicht wenn ein `:` (Wert-Position) liegt.
|
|
||||||
tail_match = re.search(r'("[^"]*")(\s*:?)\s*$', s)
|
|
||||||
if tail_match and not s.rstrip().endswith(("}", "]")):
|
|
||||||
before = s[:tail_match.start()].rstrip()
|
|
||||||
prev_char = before[-1] if before else ""
|
|
||||||
# Nur trimmen, wenn dies ein Key ohne Wert ist (vor sich , oder {)
|
|
||||||
if prev_char in ",{":
|
|
||||||
cut = max(s.rfind(",", 0, tail_match.start()), s.rfind("{", 0, tail_match.start()))
|
|
||||||
if cut > 0:
|
|
||||||
s = s[:cut].rstrip().rstrip(",")
|
|
||||||
# depth neu berechnen
|
|
||||||
depth = 0
|
|
||||||
in_str = False
|
|
||||||
esc = False
|
|
||||||
for c in s:
|
|
||||||
if in_str:
|
|
||||||
if esc:
|
|
||||||
esc = False
|
|
||||||
elif c == "\\":
|
|
||||||
esc = True
|
|
||||||
elif c == '"':
|
|
||||||
in_str = False
|
|
||||||
continue
|
|
||||||
if c == '"':
|
|
||||||
in_str = True
|
|
||||||
elif c in "{[":
|
|
||||||
depth += 1
|
|
||||||
elif c in "}]":
|
|
||||||
depth -= 1
|
|
||||||
|
|
||||||
# Fehlende Klammern ergaenzen — kann gemischt sein, einfach von rechts pruefen
|
|
||||||
# was offen ist.
|
|
||||||
# Wir wissen: am Anfang ist open_char, depth zaehlt {[ +1 und }] -1.
|
|
||||||
# Fuer korrektes Schliessen muessen wir die Reihenfolge der offenen
|
|
||||||
# Klammern kennen. Vereinfachung: zaehle separat.
|
|
||||||
open_curly = s.count("{") - s.count("}")
|
|
||||||
open_brack = s.count("[") - s.count("]")
|
|
||||||
# Annahme: schliessende Klammern in umgekehrter Reihenfolge der oeffnenden
|
|
||||||
# Naive: suche letzte offene Klammer und schliesse damit.
|
|
||||||
while open_curly > 0 or open_brack > 0:
|
|
||||||
# finde letzte offene Klammer im String (ausserhalb von strings)
|
|
||||||
last_open = None
|
|
||||||
in_str = False
|
|
||||||
esc = False
|
|
||||||
for i, c in enumerate(s):
|
|
||||||
if in_str:
|
|
||||||
if esc:
|
|
||||||
esc = False
|
|
||||||
elif c == "\\":
|
|
||||||
esc = True
|
|
||||||
elif c == '"':
|
|
||||||
in_str = False
|
|
||||||
continue
|
|
||||||
if c == '"':
|
|
||||||
in_str = True
|
|
||||||
elif c in "{[":
|
|
||||||
last_open = (i, c)
|
|
||||||
if last_open is None:
|
|
||||||
break
|
|
||||||
# schliesse die zuletzt geoeffnete (innerste am rechten Rand)
|
|
||||||
# Aber: koennten dazwischen schon geschlossene sein. Vereinfacht:
|
|
||||||
# schliesse ab Ende.
|
|
||||||
if open_curly > 0 and (open_brack == 0 or last_open[1] == "{"):
|
|
||||||
s += "}"
|
|
||||||
open_curly -= 1
|
|
||||||
elif open_brack > 0:
|
|
||||||
s += "]"
|
|
||||||
open_brack -= 1
|
|
||||||
else:
|
|
||||||
break
|
|
||||||
return s
|
|
||||||
|
|
||||||
|
|
||||||
def _normalize_quotes(s: str) -> str:
|
|
||||||
# Ersetze typografische Anfuehrungszeichen durch ASCII (nur ausserhalb von String-Werten heikel,
|
|
||||||
# aber pragmatisch: Modelle setzen sie fast nur als Begrenzer falsch).
|
|
||||||
return (s.replace("“", '"').replace("”", '"')
|
|
||||||
.replace("„", '"').replace("‟", '"')
|
|
||||||
.replace("‘", "'").replace("’", "'"))
|
|
||||||
|
|
||||||
|
|
||||||
def _strip_trailing_commas(s: str) -> str:
|
|
||||||
return re.sub(r",(\s*[}\]])", r"\1", s)
|
|
||||||
|
|
||||||
|
|
||||||
def _escape_inner_quotes(block: str) -> str:
|
|
||||||
"""Heuristik: in JSON-Strings unescaped " in escaped \" umwandeln.
|
|
||||||
|
|
||||||
Idee: Wir scannen Token fuer Token. Wenn wir in einem String sind und ein " auftritt,
|
|
||||||
pruefen wir, ob danach ein Strukturzeichen (`,`, `}`, `]`, `:` mit moeglichem Whitespace)
|
|
||||||
folgt. Wenn nicht, ist es ein eingebettetes Anfuehrungszeichen und wird escaped.
|
|
||||||
"""
|
|
||||||
out = []
|
|
||||||
in_str = False
|
|
||||||
esc = False
|
|
||||||
i = 0
|
|
||||||
while i < len(block):
|
|
||||||
c = block[i]
|
|
||||||
if not in_str:
|
|
||||||
out.append(c)
|
|
||||||
if c == '"':
|
|
||||||
in_str = True
|
|
||||||
i += 1
|
|
||||||
continue
|
|
||||||
# in_str = True
|
|
||||||
if esc:
|
|
||||||
out.append(c)
|
|
||||||
esc = False
|
|
||||||
i += 1
|
|
||||||
continue
|
|
||||||
if c == "\\":
|
|
||||||
out.append(c)
|
|
||||||
esc = True
|
|
||||||
i += 1
|
|
||||||
continue
|
|
||||||
if c == '"':
|
|
||||||
# Schau voraus: erlaubt nur whitespace + [,}\]:]
|
|
||||||
j = i + 1
|
|
||||||
while j < len(block) and block[j] in " \t\r\n":
|
|
||||||
j += 1
|
|
||||||
if j >= len(block) or block[j] in ",}]:":
|
|
||||||
# echtes Stringende
|
|
||||||
out.append(c)
|
|
||||||
in_str = False
|
|
||||||
else:
|
|
||||||
# eingebettetes Quote -> escapen
|
|
||||||
out.append("\\\"")
|
|
||||||
i += 1
|
|
||||||
continue
|
|
||||||
out.append(c)
|
|
||||||
i += 1
|
|
||||||
return "".join(out)
|
|
||||||
|
|
||||||
|
|
||||||
def parse_llm_json(content: str, expect: str = "object") -> Any:
|
|
||||||
"""Parst eine LLM-Antwort robust als JSON.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
content: Rohantwort des Modells.
|
|
||||||
expect: 'object' oder 'array'.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
geparstes Python-Objekt.
|
|
||||||
|
|
||||||
Raises:
|
|
||||||
ValueError, wenn nichts geparst werden konnte.
|
|
||||||
"""
|
|
||||||
if content is None:
|
|
||||||
raise ValueError("leere Antwort")
|
|
||||||
s = _normalize_quotes(_strip_codefence(content))
|
|
||||||
|
|
||||||
open_c, close_c = ("{", "}") if expect == "object" else ("[", "]")
|
|
||||||
block = _find_balanced(s, open_c, close_c)
|
|
||||||
if block is None:
|
|
||||||
# Fallback: vielleicht steht doch das andere Format drin
|
|
||||||
alt_open, alt_close = ("[", "]") if expect == "object" else ("{", "}")
|
|
||||||
block = _find_balanced(s, alt_open, alt_close)
|
|
||||||
if block is None:
|
|
||||||
raise ValueError(f"kein {expect} gefunden in: {content[:200]}")
|
|
||||||
|
|
||||||
closed = _close_truncated(block, open_c, close_c)
|
|
||||||
attempts = [
|
|
||||||
block,
|
|
||||||
_strip_trailing_commas(block),
|
|
||||||
_escape_inner_quotes(block),
|
|
||||||
_strip_trailing_commas(_escape_inner_quotes(block)),
|
|
||||||
closed,
|
|
||||||
_strip_trailing_commas(closed),
|
|
||||||
_escape_inner_quotes(closed),
|
|
||||||
_strip_trailing_commas(_escape_inner_quotes(closed)),
|
|
||||||
]
|
|
||||||
|
|
||||||
last_err = None
|
|
||||||
for attempt in attempts:
|
|
||||||
try:
|
|
||||||
return json.loads(attempt)
|
|
||||||
except json.JSONDecodeError as e:
|
|
||||||
last_err = e
|
|
||||||
continue
|
|
||||||
raise ValueError(f"JSON-Parse fehlgeschlagen nach Repair-Versuchen: {last_err}; raw={content[:300]}")
|
|
||||||
@ -1,239 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
"""Re-Run der fehlerhaften Records in `debates` und `argument_links` mit robustem JSON-Parser.
|
|
||||||
|
|
||||||
Vorgehen:
|
|
||||||
- Lade alle Records mit topic='error' (debates) bzw. relation='error' (argument_links)
|
|
||||||
- Hole Source-/Target-Paragraph aus DB
|
|
||||||
- Sende erneut an qwen-plus, parse mit json_utils.parse_llm_json
|
|
||||||
- UPDATE bestehenden Eintrag
|
|
||||||
|
|
||||||
Nutzung:
|
|
||||||
DASHSCOPE_API_KEY=... python3 rerun_errors.py [db-pfad]
|
|
||||||
"""
|
|
||||||
|
|
||||||
import json
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
import time
|
|
||||||
import sqlite3
|
|
||||||
|
|
||||||
from openai import OpenAI
|
|
||||||
|
|
||||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
||||||
from json_utils import parse_llm_json
|
|
||||||
|
|
||||||
DB_PATH = sys.argv[1] if len(sys.argv) > 1 else "data/db.sqlite"
|
|
||||||
API_KEY = os.environ.get("DASHSCOPE_API_KEY", "")
|
|
||||||
BASE_URL = "https://dashscope-intl.aliyuncs.com/compatible-mode/v1"
|
|
||||||
MODEL = "qwen-plus"
|
|
||||||
|
|
||||||
# Kosten qwen-plus (DashScope intl, Stand 2025): $0.0008 / 1k input, $0.002 / 1k output
|
|
||||||
COST_IN = 0.0008 / 1000
|
|
||||||
COST_OUT = 0.002 / 1000
|
|
||||||
|
|
||||||
DEBATES_SYSTEM = """Du bist ein Diskursanalyst. Du erhältst zwei Textabschnitte aus VERSCHIEDENEN Podcasts, die dasselbe Thema behandeln.
|
|
||||||
|
|
||||||
Erstelle eine kurze Gegenüberstellung. Antworte NUR mit JSON:
|
|
||||||
|
|
||||||
{
|
|
||||||
"topic": "Das gemeinsame Thema in 3-5 Wörtern",
|
|
||||||
"agreement": "Worin stimmen beide überein? (1-2 Sätze)",
|
|
||||||
"divergence": "Worin unterscheiden sie sich? (1-2 Sätze, oder 'keine wesentliche Divergenz')",
|
|
||||||
"insight": "Was lernt man durch die Gegenüberstellung, das man aus keinem der beiden allein lernen würde? (1 Satz)"
|
|
||||||
}"""
|
|
||||||
|
|
||||||
ARGS_SYSTEM = """Du bist ein Diskursanalyst. Du erhältst zwei Textabschnitte aus Podcast-Transkripten.
|
|
||||||
Klassifiziere die logische Relation zwischen ihnen. Antworte NUR mit einem JSON-Objekt:
|
|
||||||
|
|
||||||
{"relation": "...", "confidence": 0.0-1.0, "explanation": "Ein Satz Begruendung"}
|
|
||||||
|
|
||||||
Moegliche Relationen:
|
|
||||||
- "erweitert": B baut auf A auf, ergaenzt, vertieft
|
|
||||||
- "widerspricht": B widerspricht A, nennt Gegenargument
|
|
||||||
- "belegt": B liefert Evidenz/Daten fuer A's These
|
|
||||||
- "relativiert": B schraenkt A ein, nennt Ausnahmen/Bedingungen
|
|
||||||
- "gleicher_punkt": A und B sagen im Kern dasselbe
|
|
||||||
- "kein_bezug": Trotz thematischer Naehe kein logischer Bezug"""
|
|
||||||
|
|
||||||
|
|
||||||
class Budget:
|
|
||||||
def __init__(self, hard_limit_usd):
|
|
||||||
self.hard_limit = hard_limit_usd
|
|
||||||
self.tokens_in = 0
|
|
||||||
self.tokens_out = 0
|
|
||||||
|
|
||||||
def add(self, usage):
|
|
||||||
if usage:
|
|
||||||
self.tokens_in += getattr(usage, "prompt_tokens", 0) or 0
|
|
||||||
self.tokens_out += getattr(usage, "completion_tokens", 0) or 0
|
|
||||||
|
|
||||||
def cost(self):
|
|
||||||
return self.tokens_in * COST_IN + self.tokens_out * COST_OUT
|
|
||||||
|
|
||||||
def over(self):
|
|
||||||
return self.cost() > self.hard_limit
|
|
||||||
|
|
||||||
|
|
||||||
def call_llm(client, system, user, max_tokens, budget):
|
|
||||||
last_err = None
|
|
||||||
for attempt in range(2):
|
|
||||||
try:
|
|
||||||
resp = client.chat.completions.create(
|
|
||||||
model=MODEL,
|
|
||||||
messages=[
|
|
||||||
{"role": "system", "content": system},
|
|
||||||
{"role": "user", "content": user},
|
|
||||||
],
|
|
||||||
temperature=0.1 if "Klassifiziere" in system else 0.2,
|
|
||||||
max_tokens=max_tokens,
|
|
||||||
)
|
|
||||||
budget.add(getattr(resp, "usage", None))
|
|
||||||
content = resp.choices[0].message.content
|
|
||||||
try:
|
|
||||||
return parse_llm_json(content, expect="object"), None
|
|
||||||
except ValueError as pe:
|
|
||||||
last_err = f"parse: {pe}"
|
|
||||||
# Ein Retry mit anderer Temperature waere moeglich; wir akzeptieren parse-fail
|
|
||||||
break
|
|
||||||
except Exception as e:
|
|
||||||
last_err = str(e)
|
|
||||||
if attempt < 1:
|
|
||||||
time.sleep(2)
|
|
||||||
continue
|
|
||||||
return None, last_err
|
|
||||||
|
|
||||||
|
|
||||||
def rerun_debates(db, client, budget):
|
|
||||||
rows = db.execute("""
|
|
||||||
SELECT d.id as did, d.source_podcast, d.source_episode, d.source_idx,
|
|
||||||
d.target_podcast, d.target_episode, d.target_idx,
|
|
||||||
p1.text as source_text, p2.text as target_text,
|
|
||||||
pc1.name as source_pname, pc2.name as target_pname,
|
|
||||||
e1.title as source_title, e1.guest as source_guest,
|
|
||||||
e2.title as target_title, e2.guest as target_guest
|
|
||||||
FROM debates d
|
|
||||||
JOIN paragraphs p1 ON d.source_podcast = p1.podcast_id AND d.source_episode = p1.episode_id AND d.source_idx = p1.idx
|
|
||||||
JOIN paragraphs p2 ON d.target_podcast = p2.podcast_id AND d.target_episode = p2.episode_id AND d.target_idx = p2.idx
|
|
||||||
JOIN episodes e1 ON d.source_podcast = e1.podcast_id AND d.source_episode = e1.id
|
|
||||||
JOIN episodes e2 ON d.target_podcast = e2.podcast_id AND d.target_episode = e2.id
|
|
||||||
JOIN podcasts pc1 ON d.source_podcast = pc1.id
|
|
||||||
JOIN podcasts pc2 ON d.target_podcast = pc2.id
|
|
||||||
WHERE d.topic = 'error'
|
|
||||||
""").fetchall()
|
|
||||||
print(f"[debates] {len(rows)} Error-Records zu reparieren")
|
|
||||||
fixed = 0
|
|
||||||
still_err = 0
|
|
||||||
for i, r in enumerate(rows):
|
|
||||||
if budget.over():
|
|
||||||
print(f"[debates] Kosten-Limit erreicht bei {i}/{len(rows)} (cost=${budget.cost():.4f})")
|
|
||||||
break
|
|
||||||
meta_a = f"{r['source_pname']} / {r['source_episode']}: {r['source_title']} ({r['source_guest']})"
|
|
||||||
meta_b = f"{r['target_pname']} / {r['target_episode']}: {r['target_title']} ({r['target_guest']})"
|
|
||||||
user = (f"Podcast A — {meta_a}:\n\"{r['source_text'][:800]}\"\n\n"
|
|
||||||
f"Podcast B — {meta_b}:\n\"{r['target_text'][:800]}\"\n\n"
|
|
||||||
"Erstelle die Gegenueberstellung.")
|
|
||||||
result, err = call_llm(client, DEBATES_SYSTEM, user, 600, budget)
|
|
||||||
if result is not None and result.get("topic"):
|
|
||||||
db.execute(
|
|
||||||
"UPDATE debates SET topic=?, agreement=?, divergence=?, insight=? WHERE id=?",
|
|
||||||
(result.get("topic", "")[:120],
|
|
||||||
result.get("agreement", "")[:1000],
|
|
||||||
result.get("divergence", "")[:1000],
|
|
||||||
result.get("insight", "")[:1000],
|
|
||||||
r["did"]),
|
|
||||||
)
|
|
||||||
fixed += 1
|
|
||||||
else:
|
|
||||||
still_err += 1
|
|
||||||
db.execute(
|
|
||||||
"UPDATE debates SET insight=? WHERE id=?",
|
|
||||||
(f"rerun-failed: {err}"[:500], r["did"]),
|
|
||||||
)
|
|
||||||
if (i + 1) % 10 == 0:
|
|
||||||
db.commit()
|
|
||||||
print(f" [debates] {i+1}/{len(rows)} gefixt={fixed} still_err={still_err} cost=${budget.cost():.4f}")
|
|
||||||
time.sleep(0.3)
|
|
||||||
db.commit()
|
|
||||||
print(f"[debates] fertig: gefixt={fixed} still_err={still_err}")
|
|
||||||
return fixed, still_err
|
|
||||||
|
|
||||||
|
|
||||||
def rerun_args(db, client, budget):
|
|
||||||
rows = db.execute("""
|
|
||||||
SELECT a.id as aid, a.source_podcast, a.source_episode, a.source_idx,
|
|
||||||
a.target_podcast, a.target_episode, a.target_idx,
|
|
||||||
p1.text as source_text, p2.text as target_text,
|
|
||||||
e1.title as source_title, e1.guest as source_guest,
|
|
||||||
e2.title as target_title, e2.guest as target_guest
|
|
||||||
FROM argument_links a
|
|
||||||
JOIN paragraphs p1 ON a.source_podcast = p1.podcast_id AND a.source_episode = p1.episode_id AND a.source_idx = p1.idx
|
|
||||||
JOIN paragraphs p2 ON a.target_podcast = p2.podcast_id AND a.target_episode = p2.episode_id AND a.target_idx = p2.idx
|
|
||||||
JOIN episodes e1 ON a.source_podcast = e1.podcast_id AND a.source_episode = e1.id
|
|
||||||
JOIN episodes e2 ON a.target_podcast = e2.podcast_id AND a.target_episode = e2.id
|
|
||||||
WHERE a.relation = 'error'
|
|
||||||
""").fetchall()
|
|
||||||
print(f"[argument_links] {len(rows)} Error-Records zu reparieren")
|
|
||||||
fixed = 0
|
|
||||||
still_err = 0
|
|
||||||
for i, r in enumerate(rows):
|
|
||||||
if budget.over():
|
|
||||||
print(f"[args] Kosten-Limit erreicht bei {i}/{len(rows)} (cost=${budget.cost():.4f})")
|
|
||||||
break
|
|
||||||
meta_a = f"{r['source_episode']}: {r['source_title']} — {r['source_guest']}"
|
|
||||||
meta_b = f"{r['target_episode']}: {r['target_title']} — {r['target_guest']}"
|
|
||||||
user = (f"Absatz A ({meta_a}):\n\"{r['source_text'][:800]}\"\n\n"
|
|
||||||
f"Absatz B ({meta_b}):\n\"{r['target_text'][:800]}\"\n\n"
|
|
||||||
"Welche logische Relation besteht von A zu B?")
|
|
||||||
result, err = call_llm(client, ARGS_SYSTEM, user, 350, budget)
|
|
||||||
if result is not None and result.get("relation") and result.get("relation") != "error":
|
|
||||||
db.execute(
|
|
||||||
"UPDATE argument_links SET relation=?, confidence=?, explanation=? WHERE id=?",
|
|
||||||
(str(result.get("relation", ""))[:60],
|
|
||||||
float(result.get("confidence", 0) or 0),
|
|
||||||
str(result.get("explanation", ""))[:1000],
|
|
||||||
r["aid"]),
|
|
||||||
)
|
|
||||||
fixed += 1
|
|
||||||
else:
|
|
||||||
still_err += 1
|
|
||||||
db.execute(
|
|
||||||
"UPDATE argument_links SET explanation=? WHERE id=?",
|
|
||||||
(f"rerun-failed: {err}"[:500], r["aid"]),
|
|
||||||
)
|
|
||||||
if (i + 1) % 20 == 0:
|
|
||||||
db.commit()
|
|
||||||
print(f" [args] {i+1}/{len(rows)} gefixt={fixed} still_err={still_err} cost=${budget.cost():.4f}")
|
|
||||||
time.sleep(0.25)
|
|
||||||
db.commit()
|
|
||||||
print(f"[argument_links] fertig: gefixt={fixed} still_err={still_err}")
|
|
||||||
return fixed, still_err
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
if not API_KEY:
|
|
||||||
print("DASHSCOPE_API_KEY nicht gesetzt.")
|
|
||||||
sys.exit(1)
|
|
||||||
client = OpenAI(api_key=API_KEY, base_url=BASE_URL, timeout=30.0, max_retries=1)
|
|
||||||
db = sqlite3.connect(DB_PATH, timeout=30)
|
|
||||||
db.row_factory = sqlite3.Row
|
|
||||||
db.execute("PRAGMA busy_timeout=30000")
|
|
||||||
|
|
||||||
# Aufgabe-A-Budget: 1 USD
|
|
||||||
budget = Budget(hard_limit_usd=1.0)
|
|
||||||
|
|
||||||
print(f"DB: {DB_PATH}, Modell: {MODEL}")
|
|
||||||
d_fixed, d_err = rerun_debates(db, client, budget)
|
|
||||||
a_fixed, a_err = rerun_args(db, client, budget)
|
|
||||||
|
|
||||||
print()
|
|
||||||
print("=== Zusammenfassung Aufgabe A ===")
|
|
||||||
print(f" debates gefixt={d_fixed} still_err={d_err}")
|
|
||||||
print(f" argument_links gefixt={a_fixed} still_err={a_err}")
|
|
||||||
print(f" Tokens in={budget.tokens_in} out={budget.tokens_out}")
|
|
||||||
print(f" Kosten ~${budget.cost():.4f}")
|
|
||||||
|
|
||||||
db.close()
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
@ -591,16 +591,6 @@ const TranscriptView = {
|
|||||||
|
|
||||||
panel.innerHTML = html;
|
panel.innerHTML = html;
|
||||||
|
|
||||||
// Cache word elements + start times for fast binary-search sync
|
|
||||||
if (this.words) {
|
|
||||||
this._wordEls = Array.from(panel.querySelectorAll('.word[data-ws]'));
|
|
||||||
this._wordTimes = this._wordEls.map(el => parseFloat(el.dataset.ws));
|
|
||||||
this.activeWordIdx = -1;
|
|
||||||
} else {
|
|
||||||
this._wordEls = null;
|
|
||||||
this._wordTimes = null;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Detect user scroll
|
// Detect user scroll
|
||||||
panel.onscroll = () => { this.userScrolled = true; };
|
panel.onscroll = () => { this.userScrolled = true; };
|
||||||
|
|
||||||
@ -630,51 +620,27 @@ const TranscriptView = {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Word-level sync (#12) — binary search + delta updates
|
// Word-level sync (#12)
|
||||||
if (!this._wordEls || this._wordEls.length === 0) return;
|
if (this.words) {
|
||||||
const times = this._wordTimes;
|
const prev = document.querySelector('.word.word-active');
|
||||||
let lo = 0, hi = times.length - 1, newIdx = -1;
|
if (prev) prev.classList.replace('word-active', 'word-spoken');
|
||||||
while (lo <= hi) {
|
|
||||||
const mid = (lo + hi) >> 1;
|
|
||||||
if (times[mid] <= time) { newIdx = mid; lo = mid + 1; }
|
|
||||||
else hi = mid - 1;
|
|
||||||
}
|
|
||||||
// Past the end of the last word? Treat it as spoken, no active word.
|
|
||||||
if (newIdx >= 0) {
|
|
||||||
const we = parseFloat(this._wordEls[newIdx].dataset.we);
|
|
||||||
if (time > we + 0.05) newIdx = -2; // sentinel: all up to length-1 spoken
|
|
||||||
}
|
|
||||||
const targetIdx = newIdx === -2 ? this._wordEls.length - 1 : newIdx;
|
|
||||||
const prevIdx = this.activeWordIdx;
|
|
||||||
if (targetIdx === prevIdx && newIdx !== -2) return;
|
|
||||||
|
|
||||||
if (targetIdx > prevIdx) {
|
// Find current word by time
|
||||||
// Forward: mark old active + words in between as spoken
|
const wordEl = document.querySelector(`.word[data-ws]`);
|
||||||
if (prevIdx >= 0) {
|
if (wordEl) {
|
||||||
const prevEl = this._wordEls[prevIdx];
|
const allWords = document.querySelectorAll('.word[data-ws]');
|
||||||
prevEl.classList.remove('word-active');
|
for (const w of allWords) {
|
||||||
prevEl.classList.add('word-spoken');
|
const ws = parseFloat(w.dataset.ws);
|
||||||
}
|
const we = parseFloat(w.dataset.we);
|
||||||
for (let i = Math.max(0, prevIdx + 1); i < targetIdx; i++) {
|
if (time >= ws && time < we) {
|
||||||
this._wordEls[i].classList.add('word-spoken');
|
w.classList.add('word-active');
|
||||||
}
|
break;
|
||||||
if (newIdx >= 0) {
|
} else if (time >= we) {
|
||||||
this._wordEls[targetIdx].classList.add('word-active');
|
w.classList.add('word-spoken');
|
||||||
} else {
|
}
|
||||||
// Past last word
|
}
|
||||||
this._wordEls[targetIdx].classList.add('word-spoken');
|
|
||||||
}
|
|
||||||
} else if (targetIdx < prevIdx) {
|
|
||||||
// Backward seek: clear classes from targetIdx+1 .. prevIdx
|
|
||||||
for (let i = targetIdx + 1; i <= prevIdx; i++) {
|
|
||||||
this._wordEls[i].classList.remove('word-active', 'word-spoken');
|
|
||||||
}
|
|
||||||
if (targetIdx >= 0 && newIdx !== -2) {
|
|
||||||
this._wordEls[targetIdx].classList.remove('word-spoken');
|
|
||||||
this._wordEls[targetIdx].classList.add('word-active');
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
this.activeWordIdx = newIdx === -2 ? targetIdx : newIdx;
|
|
||||||
},
|
},
|
||||||
|
|
||||||
seekTo(time) {
|
seekTo(time) {
|
||||||
@ -735,331 +701,6 @@ const TranscriptView = {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// ── Analysis View (#16 claims / #17 questions) ──
|
|
||||||
const AnalysisView = {
|
|
||||||
visible: false,
|
|
||||||
mode: null,
|
|
||||||
episodeId: null,
|
|
||||||
items: null,
|
|
||||||
filter: null,
|
|
||||||
answeredFilter: null,
|
|
||||||
|
|
||||||
async show(episodeId, mode) {
|
|
||||||
if (!CURRENT_PODCAST) return;
|
|
||||||
TranscriptView.hide(); AnalysisView.hide(); GapsView.hide(); ShiftsView.hide();
|
|
||||||
this.episodeId = episodeId;
|
|
||||||
this.mode = mode;
|
|
||||||
this.visible = true;
|
|
||||||
this.filter = null;
|
|
||||||
this.answeredFilter = null;
|
|
||||||
|
|
||||||
const panel = document.getElementById('panel');
|
|
||||||
const ep = DATA.episodes.find(e => e.id === episodeId);
|
|
||||||
const staffel = DATA.staffeln.find(s => s.id === ep.staffel);
|
|
||||||
panel.innerHTML = `<h2 style="color:${staffel.color}">${ep.id}: ${ep.title} — ${mode === 'claims' ? 'Behauptungen' : 'Fragen'}</h2><p class="subtitle">Lädt …</p>`;
|
|
||||||
|
|
||||||
try {
|
|
||||||
const r = await fetch(`${API_BASE}/api/podcasts/${CURRENT_PODCAST}/episodes/${episodeId}/${mode}`);
|
|
||||||
const data = await r.json();
|
|
||||||
this.items = data[mode] || [];
|
|
||||||
} catch (e) {
|
|
||||||
panel.innerHTML += `<p style="color:var(--accent-warm)">Fehler: ${escHtml(e.message)}</p>`;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
this.render();
|
|
||||||
},
|
|
||||||
|
|
||||||
render() {
|
|
||||||
if (!this.visible || !this.items) return;
|
|
||||||
const panel = document.getElementById('panel');
|
|
||||||
const ep = DATA.episodes.find(e => e.id === this.episodeId);
|
|
||||||
const staffel = DATA.staffeln.find(s => s.id === ep.staffel);
|
|
||||||
const typeKey = this.mode === 'claims' ? 'claim_type' : 'question_type';
|
|
||||||
|
|
||||||
let html = `<h2 style="color:${staffel.color}">${ep.id}: ${ep.title} — ${this.mode === 'claims' ? 'Behauptungen' : 'Fragen'}</h2>`;
|
|
||||||
html += `<p class="subtitle">${ep.guest} · ${this.items.length} Einträge</p>`;
|
|
||||||
html += `<button class="transcript-toggle" onclick="showEpisodeById('${ep.id}')">← Zurück zur Episode</button>`;
|
|
||||||
|
|
||||||
// Type-Filter
|
|
||||||
const types = [...new Set(this.items.map(i => i[typeKey]))].sort();
|
|
||||||
html += `<div style="margin-top:12px;display:flex;flex-wrap:wrap;gap:6px">`;
|
|
||||||
const chip = (label, count, active, onclick) =>
|
|
||||||
`<span class="theme-tag" style="cursor:pointer;${active ? 'background:var(--accent)33;border-color:var(--accent);color:var(--text)' : ''}" onclick="${onclick}">${label} (${count})</span>`;
|
|
||||||
html += chip('alle', this.items.length, !this.filter, `AnalysisView.setFilter(null)`);
|
|
||||||
types.forEach(t => {
|
|
||||||
const n = this.items.filter(i => i[typeKey] === t).length;
|
|
||||||
html += chip(t, n, this.filter === t, `AnalysisView.setFilter('${t}')`);
|
|
||||||
});
|
|
||||||
html += '</div>';
|
|
||||||
|
|
||||||
// Answered-Filter (nur Fragen)
|
|
||||||
if (this.mode === 'questions') {
|
|
||||||
html += `<div style="margin-top:6px;display:flex;flex-wrap:wrap;gap:6px">`;
|
|
||||||
const states = ['no', 'partial', 'yes', 'self_answered'];
|
|
||||||
const labels = {no:'unbeantwortet', partial:'teilweise', yes:'beantwortet', self_answered:'selbst beantwortet'};
|
|
||||||
html += chip('beliebig', this.items.length, !this.answeredFilter, `AnalysisView.setAnsweredFilter(null)`);
|
|
||||||
states.forEach(s => {
|
|
||||||
const n = this.items.filter(i => i.answered === s).length;
|
|
||||||
if (n > 0) html += chip(labels[s], n, this.answeredFilter === s, `AnalysisView.setAnsweredFilter('${s}')`);
|
|
||||||
});
|
|
||||||
html += '</div>';
|
|
||||||
}
|
|
||||||
|
|
||||||
// Items
|
|
||||||
let filtered = this.items;
|
|
||||||
if (this.filter) filtered = filtered.filter(i => i[typeKey] === this.filter);
|
|
||||||
if (this.answeredFilter) filtered = filtered.filter(i => i.answered === this.answeredFilter);
|
|
||||||
|
|
||||||
if (filtered.length === 0) {
|
|
||||||
html += `<p class="subtitle" style="margin-top:16px">Keine Einträge mit aktuellem Filter.</p>`;
|
|
||||||
}
|
|
||||||
|
|
||||||
filtered.forEach(it => {
|
|
||||||
const ts = (it.start_time !== null && it.start_time !== undefined) ? fmtTime(it.start_time) : '–';
|
|
||||||
const text = this.mode === 'claims' ? it.claim_text : it.question_text;
|
|
||||||
const type = it[typeKey];
|
|
||||||
let badges = `<span class="theme-tag" style="font-size:10px;margin-right:4px">${type}</span>`;
|
|
||||||
if (this.mode === 'claims' && it.verifiable) {
|
|
||||||
badges += `<span class="theme-tag" style="font-size:10px;margin-right:4px;opacity:0.7">verifizierbar</span>`;
|
|
||||||
}
|
|
||||||
if (this.mode === 'questions') {
|
|
||||||
const a = it.answered;
|
|
||||||
const lbl = {no:'offen', partial:'teilweise', yes:'beantwortet', self_answered:'selbst beantwortet'}[a] || a;
|
|
||||||
const col = a === 'no' ? 'var(--accent-warm)' : (a === 'yes' ? 'var(--accent-green)' : 'var(--text-muted)');
|
|
||||||
badges += `<span class="theme-tag" style="font-size:10px;margin-right:4px;color:${col};border-color:${col}44">${lbl}</span>`;
|
|
||||||
}
|
|
||||||
html += `<div class="transcript-para" onclick="AnalysisView.jumpTo(${it.start_time || 0})">`;
|
|
||||||
html += `<span class="ts">${ts}</span>`;
|
|
||||||
html += badges;
|
|
||||||
html += escHtml(text);
|
|
||||||
html += '</div>';
|
|
||||||
});
|
|
||||||
panel.innerHTML = html;
|
|
||||||
},
|
|
||||||
|
|
||||||
setFilter(t) { this.filter = t; this.render(); },
|
|
||||||
setAnsweredFilter(s) { this.answeredFilter = s; this.render(); },
|
|
||||||
|
|
||||||
jumpTo(time) {
|
|
||||||
TranscriptView.show(this.episodeId, time);
|
|
||||||
},
|
|
||||||
|
|
||||||
hide() { this.visible = false; this.episodeId = null; this.items = null; }
|
|
||||||
};
|
|
||||||
|
|
||||||
// ── Gaps View (#14 Leerstellen-Detektor) ──
|
|
||||||
const GapsView = {
|
|
||||||
visible: false,
|
|
||||||
data: null,
|
|
||||||
missingFilter: null,
|
|
||||||
minSize: 0,
|
|
||||||
|
|
||||||
async show() {
|
|
||||||
TranscriptView.hide(); AnalysisView.hide(); GapsView.hide(); ShiftsView.hide();
|
|
||||||
this.visible = true;
|
|
||||||
const panel = document.getElementById('panel');
|
|
||||||
panel.innerHTML = `<h2>Leerstellen</h2><p class="subtitle">Lädt …</p>`;
|
|
||||||
try {
|
|
||||||
const r = await fetch(`${API_BASE}/api/analyses/gaps`);
|
|
||||||
const data = await r.json();
|
|
||||||
if (!data.available) {
|
|
||||||
panel.innerHTML = `<h2>Leerstellen</h2><p class="subtitle">Keine Leerstellen-Analyse vorhanden.</p>`;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
this.data = data;
|
|
||||||
} catch (e) {
|
|
||||||
panel.innerHTML = `<h2>Leerstellen</h2><p style="color:var(--accent-warm)">Fehler: ${escHtml(e.message)}</p>`;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
this.render();
|
|
||||||
},
|
|
||||||
|
|
||||||
render() {
|
|
||||||
if (!this.visible || !this.data) return;
|
|
||||||
const panel = document.getElementById('panel');
|
|
||||||
const d = this.data;
|
|
||||||
let html = `<h2>Leerstellen</h2>`;
|
|
||||||
html += `<p class="subtitle">${d.gaps.length} Themen-Cluster fehlen in mindestens einem Podcast · ${d.n_clusters} Cluster aus ${d.total_paragraphs} Absätzen über ${(d.podcasts || []).join(', ')}</p>`;
|
|
||||||
|
|
||||||
const podcasts = d.podcasts || [];
|
|
||||||
const chip = (label, count, active, onclick) =>
|
|
||||||
`<span class="theme-tag" style="cursor:pointer;${active ? 'background:var(--accent)33;border-color:var(--accent);color:var(--text)' : ''}" onclick="${onclick}">${label}${count !== null ? ` (${count})` : ''}</span>`;
|
|
||||||
|
|
||||||
html += `<div style="margin-top:12px;display:flex;flex-wrap:wrap;gap:6px">`;
|
|
||||||
html += chip('alle Podcasts', d.gaps.length, !this.missingFilter, `GapsView.setMissing(null)`);
|
|
||||||
podcasts.forEach(p => {
|
|
||||||
const n = d.gaps.filter(g => g.missing_in === p).length;
|
|
||||||
if (n > 0) html += chip(`fehlt in ${p}`, n, this.missingFilter === p, `GapsView.setMissing('${p}')`);
|
|
||||||
});
|
|
||||||
html += '</div>';
|
|
||||||
|
|
||||||
let filtered = d.gaps;
|
|
||||||
if (this.missingFilter) filtered = filtered.filter(g => g.missing_in === this.missingFilter);
|
|
||||||
if (this.minSize > 0) filtered = filtered.filter(g => g.cluster_size >= this.minSize);
|
|
||||||
|
|
||||||
if (filtered.length === 0) {
|
|
||||||
html += `<p class="subtitle" style="margin-top:16px">Keine Leerstellen mit aktuellem Filter.</p>`;
|
|
||||||
}
|
|
||||||
|
|
||||||
filtered.forEach(g => {
|
|
||||||
html += `<div class="transcript-para" style="cursor:default">`;
|
|
||||||
html += `<div style="display:flex;justify-content:space-between;gap:8px;margin-bottom:6px">`;
|
|
||||||
html += `<strong>${escHtml(g.cluster_label)}</strong>`;
|
|
||||||
html += `<span class="theme-tag" style="font-size:10px;color:var(--accent-warm);border-color:var(--accent-warm)44">fehlt in ${escHtml(g.missing_in)}</span>`;
|
|
||||||
html += `</div>`;
|
|
||||||
html += `<div class="subtitle" style="margin-bottom:6px">${g.cluster_size} Absätze · ${g.present_in_count} im anderen Podcast</div>`;
|
|
||||||
(g.representative || []).slice(0, 3).forEach(r => {
|
|
||||||
const epClickable = r.podcast === CURRENT_PODCAST;
|
|
||||||
const click = epClickable ? `onclick="GapsView.jumpTo('${r.episode}')"` : '';
|
|
||||||
const cur = epClickable ? 'cursor:pointer;' : '';
|
|
||||||
html += `<div style="${cur}padding:6px 0;border-top:1px solid var(--border)" ${click}>`;
|
|
||||||
html += `<span class="ts">${escHtml(r.podcast)}/${escHtml(r.episode)}</span> `;
|
|
||||||
html += escHtml(r.text);
|
|
||||||
html += `</div>`;
|
|
||||||
});
|
|
||||||
html += `</div>`;
|
|
||||||
});
|
|
||||||
panel.innerHTML = html;
|
|
||||||
},
|
|
||||||
|
|
||||||
setMissing(p) { this.missingFilter = p; this.render(); },
|
|
||||||
|
|
||||||
jumpTo(episodeId) {
|
|
||||||
if (!CURRENT_PODCAST) return;
|
|
||||||
const ep = DATA && DATA.episodes && DATA.episodes.find(e => e.id === episodeId);
|
|
||||||
if (ep) showEpisode(ep);
|
|
||||||
},
|
|
||||||
|
|
||||||
hide() { this.visible = false; }
|
|
||||||
};
|
|
||||||
|
|
||||||
// ── Shifts View (#15 Narrative Shift Detection) ──
|
|
||||||
const ShiftsView = {
|
|
||||||
visible: false,
|
|
||||||
data: null,
|
|
||||||
podcastFilter: null,
|
|
||||||
themeFilter: null,
|
|
||||||
expanded: {},
|
|
||||||
|
|
||||||
async show() {
|
|
||||||
TranscriptView.hide(); AnalysisView.hide(); GapsView.hide(); ShiftsView.hide();
|
|
||||||
this.visible = true;
|
|
||||||
const panel = document.getElementById('panel');
|
|
||||||
panel.innerHTML = `<h2>Narrative Shifts</h2><p class="subtitle">Lädt …</p>`;
|
|
||||||
try {
|
|
||||||
const r = await fetch(`${API_BASE}/api/analyses/shifts`);
|
|
||||||
const data = await r.json();
|
|
||||||
if (!data.available) {
|
|
||||||
panel.innerHTML = `<h2>Narrative Shifts</h2><p class="subtitle">Keine Shift-Analyse vorhanden.</p>`;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
this.data = data;
|
|
||||||
} catch (e) {
|
|
||||||
panel.innerHTML = `<h2>Narrative Shifts</h2><p style="color:var(--accent-warm)">Fehler: ${escHtml(e.message)}</p>`;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
this.render();
|
|
||||||
},
|
|
||||||
|
|
||||||
render() {
|
|
||||||
if (!this.visible || !this.data) return;
|
|
||||||
const panel = document.getElementById('panel');
|
|
||||||
const d = this.data;
|
|
||||||
let html = `<h2>Narrative Shifts</h2>`;
|
|
||||||
html += `<p class="subtitle">${d.shifts.length} Theme-Verläufe in ${(d.podcasts || []).join(', ')} · ${d.total_themes_tracked} Themen getrackt · semantische Drift zwischen aufeinanderfolgenden Episoden</p>`;
|
|
||||||
|
|
||||||
const chip = (label, count, active, onclick) =>
|
|
||||||
`<span class="theme-tag" style="cursor:pointer;${active ? 'background:var(--accent)33;border-color:var(--accent);color:var(--text)' : ''}" onclick="${onclick}">${label}${count !== null ? ` (${count})` : ''}</span>`;
|
|
||||||
|
|
||||||
html += `<div style="margin-top:12px;display:flex;flex-wrap:wrap;gap:6px">`;
|
|
||||||
html += chip('alle Podcasts', d.shifts.length, !this.podcastFilter, `ShiftsView.setPodcast(null)`);
|
|
||||||
(d.podcasts || []).forEach(p => {
|
|
||||||
const n = d.shifts.filter(s => s.podcast === p).length;
|
|
||||||
if (n > 0) html += chip(p, n, this.podcastFilter === p, `ShiftsView.setPodcast('${p}')`);
|
|
||||||
});
|
|
||||||
html += '</div>';
|
|
||||||
|
|
||||||
const themesPresent = [...new Set(d.shifts.map(s => s.theme))].sort();
|
|
||||||
html += `<div style="margin-top:6px;display:flex;flex-wrap:wrap;gap:6px">`;
|
|
||||||
html += chip('alle Themen', null, !this.themeFilter, `ShiftsView.setTheme(null)`);
|
|
||||||
themesPresent.forEach(t => {
|
|
||||||
html += chip(t, null, this.themeFilter === t, `ShiftsView.setTheme('${t}')`);
|
|
||||||
});
|
|
||||||
html += '</div>';
|
|
||||||
|
|
||||||
let filtered = d.shifts;
|
|
||||||
if (this.podcastFilter) filtered = filtered.filter(s => s.podcast === this.podcastFilter);
|
|
||||||
if (this.themeFilter) filtered = filtered.filter(s => s.theme === this.themeFilter);
|
|
||||||
filtered = filtered.slice().sort((a, b) => (b.max_drift || 0) - (a.max_drift || 0));
|
|
||||||
|
|
||||||
if (filtered.length === 0) {
|
|
||||||
html += `<p class="subtitle" style="margin-top:16px">Keine Shifts mit aktuellem Filter.</p>`;
|
|
||||||
}
|
|
||||||
|
|
||||||
filtered.forEach(s => {
|
|
||||||
const key = `${s.podcast}__${s.theme}`;
|
|
||||||
const isOpen = !!this.expanded[key];
|
|
||||||
const meanPct = ((s.mean_drift || 0) * 100).toFixed(0);
|
|
||||||
const maxPct = ((s.max_drift || 0) * 100).toFixed(0);
|
|
||||||
const spikes = s.spikes || [];
|
|
||||||
|
|
||||||
html += `<div class="transcript-para" style="cursor:default">`;
|
|
||||||
html += `<div style="display:flex;justify-content:space-between;gap:8px;margin-bottom:6px;align-items:baseline">`;
|
|
||||||
html += `<strong>${escHtml(s.theme)}</strong>`;
|
|
||||||
html += `<span class="theme-tag" style="font-size:10px">${escHtml(s.podcast)}</span>`;
|
|
||||||
html += `</div>`;
|
|
||||||
html += `<div class="subtitle" style="margin-bottom:6px">`;
|
|
||||||
html += `${s.n_episodes} Episoden · Mittel-Drift ${meanPct}% · Max-Drift ${maxPct}%`;
|
|
||||||
if (spikes.length) html += ` · <span style="color:var(--accent-warm)">${spikes.length} Spike${spikes.length > 1 ? 's' : ''}</span>`;
|
|
||||||
html += `</div>`;
|
|
||||||
|
|
||||||
// Top-Drifts (Spikes oder Top 3)
|
|
||||||
const top = spikes.length ? spikes : (s.drifts || []).slice().sort((a,b) => (b.drift||0)-(a.drift||0)).slice(0, 3);
|
|
||||||
top.forEach(dr => {
|
|
||||||
const pct = ((dr.drift || 0) * 100).toFixed(0);
|
|
||||||
const fromClick = `ShiftsView.jumpTo('${s.podcast}','${dr.from}')`;
|
|
||||||
const toClick = `ShiftsView.jumpTo('${s.podcast}','${dr.to}')`;
|
|
||||||
html += `<div style="display:flex;align-items:center;gap:8px;padding:4px 0;border-top:1px solid var(--border);font-size:13px">`;
|
|
||||||
html += `<span class="ts" style="cursor:pointer" onclick="${fromClick}">${escHtml(dr.from)}</span>`;
|
|
||||||
html += `<span style="color:var(--text-muted)">→</span>`;
|
|
||||||
html += `<span class="ts" style="cursor:pointer" onclick="${toClick}">${escHtml(dr.to)}</span>`;
|
|
||||||
html += `<span style="margin-left:auto;color:${(dr.drift||0) > 0.5 ? 'var(--accent-warm)' : 'var(--text-muted)'}">${pct}%</span>`;
|
|
||||||
html += `</div>`;
|
|
||||||
});
|
|
||||||
|
|
||||||
// Toggle für vollständige Drift-Sequenz
|
|
||||||
const allDrifts = s.drifts || [];
|
|
||||||
if (allDrifts.length > top.length) {
|
|
||||||
html += `<div style="margin-top:6px"><span class="theme-tag" style="cursor:pointer;font-size:11px" onclick="ShiftsView.toggle('${key}')">${isOpen ? 'verkürzen' : `alle ${allDrifts.length} Übergänge zeigen`}</span></div>`;
|
|
||||||
if (isOpen) {
|
|
||||||
html += `<div style="margin-top:6px;display:flex;flex-wrap:wrap;gap:4px">`;
|
|
||||||
allDrifts.forEach(dr => {
|
|
||||||
const pct = ((dr.drift || 0) * 100).toFixed(0);
|
|
||||||
const intensity = Math.min(1, (dr.drift || 0) / 0.6);
|
|
||||||
const bg = `rgba(220,120,80,${(0.1 + intensity * 0.5).toFixed(2)})`;
|
|
||||||
html += `<span title="${escHtml(dr.from)} → ${escHtml(dr.to)}: ${pct}%" style="font-size:10px;padding:2px 6px;border-radius:3px;background:${bg};cursor:pointer" onclick="ShiftsView.jumpTo('${s.podcast}','${dr.to}')">${escHtml(dr.to)} ${pct}%</span>`;
|
|
||||||
});
|
|
||||||
html += `</div>`;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
html += `</div>`;
|
|
||||||
});
|
|
||||||
panel.innerHTML = html;
|
|
||||||
},
|
|
||||||
|
|
||||||
setPodcast(p) { this.podcastFilter = p; this.render(); },
|
|
||||||
setTheme(t) { this.themeFilter = t; this.render(); },
|
|
||||||
toggle(key) { this.expanded[key] = !this.expanded[key]; this.render(); },
|
|
||||||
|
|
||||||
jumpTo(podcastId, episodeId) {
|
|
||||||
if (CURRENT_PODCAST !== podcastId) return;
|
|
||||||
const ep = DATA && DATA.episodes && DATA.episodes.find(e => e.id === episodeId);
|
|
||||||
if (ep) showEpisode(ep);
|
|
||||||
},
|
|
||||||
|
|
||||||
hide() { this.visible = false; }
|
|
||||||
};
|
|
||||||
|
|
||||||
// ── Search ──
|
// ── Search ──
|
||||||
const Search = {
|
const Search = {
|
||||||
init() {
|
init() {
|
||||||
@ -1156,7 +797,7 @@ const Search = {
|
|||||||
|
|
||||||
showResults(results, query) {
|
showResults(results, query) {
|
||||||
const panel = document.getElementById('panel');
|
const panel = document.getElementById('panel');
|
||||||
TranscriptView.hide(); AnalysisView.hide(); GapsView.hide(); ShiftsView.hide();
|
TranscriptView.hide();
|
||||||
|
|
||||||
if (results.length === 0) {
|
if (results.length === 0) {
|
||||||
panel.innerHTML = `<p class="subtitle">Keine Treffer für "${escHtml(query)}"</p>`;
|
panel.innerHTML = `<p class="subtitle">Keine Treffer für "${escHtml(query)}"</p>`;
|
||||||
@ -1184,7 +825,7 @@ const Search = {
|
|||||||
},
|
},
|
||||||
|
|
||||||
showSemanticResults(results, query) {
|
showSemanticResults(results, query) {
|
||||||
TranscriptView.hide(); AnalysisView.hide(); GapsView.hide(); ShiftsView.hide();
|
TranscriptView.hide();
|
||||||
const panel = document.getElementById('panel');
|
const panel = document.getElementById('panel');
|
||||||
let html = `<h2>${results.length} semantische Treffer für "${escHtml(query)}" <span class="semantic-badge">KI</span></h2>`;
|
let html = `<h2>${results.length} semantische Treffer für "${escHtml(query)}" <span class="semantic-badge">KI</span></h2>`;
|
||||||
results.forEach(r => {
|
results.forEach(r => {
|
||||||
@ -1197,7 +838,7 @@ const Search = {
|
|||||||
},
|
},
|
||||||
|
|
||||||
showApiResults(results, query) {
|
showApiResults(results, query) {
|
||||||
TranscriptView.hide(); AnalysisView.hide(); GapsView.hide(); ShiftsView.hide();
|
TranscriptView.hide();
|
||||||
const panel = document.getElementById('panel');
|
const panel = document.getElementById('panel');
|
||||||
let html = `<h2>${results.length} Treffer für "${escHtml(query)}"</h2>`;
|
let html = `<h2>${results.length} Treffer für "${escHtml(query)}"</h2>`;
|
||||||
results.forEach(r => {
|
results.forEach(r => {
|
||||||
@ -1337,8 +978,6 @@ function showPodcastSelector(podcasts) {
|
|||||||
if (podcasts.length > 1) {
|
if (podcasts.length > 1) {
|
||||||
selectorHtml += '<div class="compare-actions">';
|
selectorHtml += '<div class="compare-actions">';
|
||||||
selectorHtml += '<button class="compare-btn" onclick="startCompare()">Podcasts vergleichen</button>';
|
selectorHtml += '<button class="compare-btn" onclick="startCompare()">Podcasts vergleichen</button>';
|
||||||
selectorHtml += '<button class="compare-btn" onclick="GapsView.show()">Leerstellen anzeigen</button>';
|
|
||||||
selectorHtml += '<button class="compare-btn" onclick="ShiftsView.show()">Narrative Shifts</button>';
|
|
||||||
selectorHtml += '</div>';
|
selectorHtml += '</div>';
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1485,15 +1124,11 @@ function init() {
|
|||||||
document.getElementById('app-title').innerHTML = ALL_PODCASTS.length > 1
|
document.getElementById('app-title').innerHTML = ALL_PODCASTS.length > 1
|
||||||
? `<span style="cursor:pointer" onclick="showPodcastList()" title="Zurück zur Übersicht">←</span> <span>${escHtml(name)}</span>`
|
? `<span style="cursor:pointer" onclick="showPodcastList()" title="Zurück zur Übersicht">←</span> <span>${escHtml(name)}</span>`
|
||||||
: `<span>${escHtml(name)}</span>`;
|
: `<span>${escHtml(name)}</span>`;
|
||||||
const gapsBtn = ALL_PODCASTS.length > 1
|
|
||||||
? `<p style="margin-top:12px"><button class="transcript-toggle" onclick="GapsView.show()">Leerstellen anzeigen</button> <button class="transcript-toggle" onclick="ShiftsView.show()">Narrative Shifts</button></p>`
|
|
||||||
: '';
|
|
||||||
document.getElementById('welcome-panel').innerHTML = `
|
document.getElementById('welcome-panel').innerHTML = `
|
||||||
<h2>${escHtml(name)}</h2>
|
<h2>${escHtml(name)}</h2>
|
||||||
<p>${escHtml(DATA.description || '')}<br>
|
<p>${escHtml(DATA.description || '')}<br>
|
||||||
${DATA.episodes.length} Folgen, ${DATA.staffeln.length} Staffeln, ${DATA.quotes.length} Zitate</p>
|
${DATA.episodes.length} Folgen, ${DATA.staffeln.length} Staffeln, ${DATA.quotes.length} Zitate</p>
|
||||||
<p style="margin-top:16px">Klicke auf einen Themenknoten oder eine Episode.</p>
|
<p style="margin-top:16px">Klicke auf einen Themenknoten oder eine Episode.</p>`;
|
||||||
${gapsBtn}`;
|
|
||||||
|
|
||||||
buildFilters();
|
buildFilters();
|
||||||
// Wait for DOM to render the SVG element before building the graph
|
// Wait for DOM to render the SVG element before building the graph
|
||||||
@ -1548,27 +1183,16 @@ function buildGraph() {
|
|||||||
svg.attr('viewBox', `0 0 ${W} ${H}`).attr('preserveAspectRatio', 'xMidYMid meet');
|
svg.attr('viewBox', `0 0 ${W} ${H}`).attr('preserveAspectRatio', 'xMidYMid meet');
|
||||||
|
|
||||||
const nodes = [], links = [], episodeMap = {};
|
const nodes = [], links = [], episodeMap = {};
|
||||||
const hasThemes = (DATA.themes || []).length > 0;
|
|
||||||
const hasQuotes = (DATA.quotes || []).length > 0;
|
|
||||||
|
|
||||||
nodes.push({ id: 'center', type: 'center', label: (DATA.name || 'PODCAST').replace(/\s+/g, '\n'),
|
nodes.push({ id: 'center', type: 'center', label: (DATA.name || 'PODCAST').replace(/\s+/g, '\n'),
|
||||||
r: 40 * sc, fx: W / 2, fy: H / 2, color: '#60a5fa' });
|
r: 40 * sc, fx: W / 2, fy: H / 2, color: '#60a5fa' });
|
||||||
|
|
||||||
if (hasThemes) {
|
DATA.themes.forEach(t => {
|
||||||
DATA.themes.forEach(t => {
|
const ml = isMobile ? 18 : 25;
|
||||||
const ml = isMobile ? 18 : 25;
|
nodes.push({ id: t.id, type: 'theme', label: t.label.length > ml ? t.label.substring(0, ml - 3) + '…' : t.label,
|
||||||
nodes.push({ id: t.id, type: 'theme', label: t.label.length > ml ? t.label.substring(0, ml - 3) + '…' : t.label,
|
fullLabel: t.label, description: t.description, r: 28 * sc, color: t.color, episodes: t.episodes });
|
||||||
fullLabel: t.label, description: t.description, r: 28 * sc, color: t.color, episodes: t.episodes });
|
links.push({ source: 'center', target: t.id, type: 'center-theme' });
|
||||||
links.push({ source: 'center', target: t.id, type: 'center-theme' });
|
});
|
||||||
});
|
|
||||||
} else {
|
|
||||||
// Fallback: staffeln as hubs
|
|
||||||
DATA.staffeln.forEach(s => {
|
|
||||||
nodes.push({ id: `staffel-${s.id}`, type: 'staffel', label: `S${s.id}: ${s.name}`,
|
|
||||||
fullLabel: s.name, staffel: s.id, r: 28 * sc, color: s.color });
|
|
||||||
links.push({ source: 'center', target: `staffel-${s.id}`, type: 'center-theme' });
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
DATA.episodes.forEach(ep => {
|
DATA.episodes.forEach(ep => {
|
||||||
const st = DATA.staffeln.find(s => s.id === ep.staffel);
|
const st = DATA.staffeln.find(s => s.id === ep.staffel);
|
||||||
@ -1578,28 +1202,18 @@ function buildGraph() {
|
|||||||
episodeMap[ep.id] = n;
|
episodeMap[ep.id] = n;
|
||||||
});
|
});
|
||||||
|
|
||||||
if (hasThemes) {
|
DATA.themes.forEach(t => t.episodes.forEach(epId => {
|
||||||
DATA.themes.forEach(t => t.episodes.forEach(epId => {
|
if (episodeMap[epId]) links.push({ source: t.id, target: epId, type: 'theme-episode' });
|
||||||
if (episodeMap[epId]) links.push({ source: t.id, target: epId, type: 'theme-episode' });
|
}));
|
||||||
}));
|
|
||||||
} else {
|
|
||||||
DATA.episodes.forEach(ep => {
|
|
||||||
if (DATA.staffeln.find(s => s.id === ep.staffel)) {
|
|
||||||
links.push({ source: `staffel-${ep.staffel}`, target: ep.id, type: 'theme-episode' });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
if (hasQuotes) {
|
DATA.quotes.filter(q => q.isTopQuote || q.startTime !== null).forEach(q => {
|
||||||
DATA.quotes.filter(q => q.isTopQuote || q.startTime !== null).forEach(q => {
|
const ep = episodeMap[q.episode];
|
||||||
const ep = episodeMap[q.episode];
|
nodes.push({ id: q.id, type: 'quote', text: q.text, speaker: q.speaker, episode: q.episode,
|
||||||
nodes.push({ id: q.id, type: 'quote', text: q.text, speaker: q.speaker, episode: q.episode,
|
themes: q.themes, startTime: q.startTime, endTime: q.endTime, audioFile: q.audioFile,
|
||||||
themes: q.themes, startTime: q.startTime, endTime: q.endTime, audioFile: q.audioFile,
|
isTopQuote: q.isTopQuote, verbatim: q.verbatim,
|
||||||
isTopQuote: q.isTopQuote, verbatim: q.verbatim,
|
r: (q.isTopQuote ? 6 : 4) * sc, color: ep ? ep.color : '#666', staffel: ep ? ep.staffel : 0 });
|
||||||
r: (q.isTopQuote ? 6 : 4) * sc, color: ep ? ep.color : '#666', staffel: ep ? ep.staffel : 0 });
|
links.push({ source: q.episode, target: q.id, type: 'episode-quote' });
|
||||||
links.push({ source: q.episode, target: q.id, type: 'episode-quote' });
|
});
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
simulation = d3.forceSimulation(nodes)
|
simulation = d3.forceSimulation(nodes)
|
||||||
.force('link', d3.forceLink(links).id(d => d.id).distance(d => {
|
.force('link', d3.forceLink(links).id(d => d.id).distance(d => {
|
||||||
@ -1644,7 +1258,7 @@ function buildGraph() {
|
|||||||
epNodes.append('circle').attr('r', d => d.r).attr('fill', 'transparent').attr('stroke', d => d.color).attr('stroke-width', 1.5);
|
epNodes.append('circle').attr('r', d => d.r).attr('fill', 'transparent').attr('stroke', d => d.color).attr('stroke-width', 1.5);
|
||||||
epNodes.append('text').attr('dy', 4).text(d => d.label);
|
epNodes.append('text').attr('dy', 4).text(d => d.label);
|
||||||
|
|
||||||
const themeNodes = nodeG.selectAll('.node-theme').data(nodes.filter(n => n.type === 'theme' || n.type === 'staffel')).join('g')
|
const themeNodes = nodeG.selectAll('.node-theme').data(nodes.filter(n => n.type === 'theme')).join('g')
|
||||||
.attr('class', 'node-theme').call(drag(simulation));
|
.attr('class', 'node-theme').call(drag(simulation));
|
||||||
themeNodes.append('circle').attr('r', d => d.r).attr('fill', d => d.color + '33').attr('stroke', d => d.color);
|
themeNodes.append('circle').attr('r', d => d.r).attr('fill', d => d.color + '33').attr('stroke', d => d.color);
|
||||||
themeNodes.append('text').attr('dy', d => -d.r - 8).text(d => d.label);
|
themeNodes.append('text').attr('dy', d => -d.r - 8).text(d => d.label);
|
||||||
@ -1671,18 +1285,10 @@ function updateVisibility() {
|
|||||||
const s = activeStaffel;
|
const s = activeStaffel;
|
||||||
window._quoteNodes.style('display', d => s === 0 || d.staffel === s ? null : 'none');
|
window._quoteNodes.style('display', d => s === 0 || d.staffel === s ? null : 'none');
|
||||||
window._epNodes.style('display', d => s === 0 || d.staffel === s ? null : 'none');
|
window._epNodes.style('display', d => s === 0 || d.staffel === s ? null : 'none');
|
||||||
if (window._themeNodes) {
|
|
||||||
window._themeNodes.style('display', d => {
|
|
||||||
if (d.type !== 'staffel') return null;
|
|
||||||
return s === 0 || d.staffel === s ? null : 'none';
|
|
||||||
});
|
|
||||||
}
|
|
||||||
window._linkEls.style('display', d => {
|
window._linkEls.style('display', d => {
|
||||||
if (s === 0) return null;
|
if (s === 0) return null;
|
||||||
const tgt = typeof d.target === 'object' ? d.target : window._nodes.find(n => n.id === d.target);
|
const tgt = typeof d.target === 'object' ? d.target : window._nodes.find(n => n.id === d.target);
|
||||||
const src = typeof d.source === 'object' ? d.source : window._nodes.find(n => n.id === d.source);
|
|
||||||
if (tgt && tgt.staffel && tgt.staffel !== s) return 'none';
|
if (tgt && tgt.staffel && tgt.staffel !== s) return 'none';
|
||||||
if (src && src.type === 'staffel' && src.staffel !== s) return 'none';
|
|
||||||
return null;
|
return null;
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@ -1704,7 +1310,6 @@ function drag(sim) {
|
|||||||
if (d.type !== 'center') { d.fx = null; d.fy = null; }
|
if (d.type !== 'center') { d.fx = null; d.fy = null; }
|
||||||
if (!moved) {
|
if (!moved) {
|
||||||
if (d.type === 'theme') showTheme(d);
|
if (d.type === 'theme') showTheme(d);
|
||||||
else if (d.type === 'staffel') filterStaffel(d.staffel);
|
|
||||||
else if (d.type === 'episode') showEpisode(d);
|
else if (d.type === 'episode') showEpisode(d);
|
||||||
else if (d.type === 'quote') showQuoteDetail(d);
|
else if (d.type === 'quote') showQuoteDetail(d);
|
||||||
}
|
}
|
||||||
@ -1713,7 +1318,7 @@ function drag(sim) {
|
|||||||
|
|
||||||
// ── Panel: Theme ──
|
// ── Panel: Theme ──
|
||||||
function showTheme(theme) {
|
function showTheme(theme) {
|
||||||
TranscriptView.hide(); AnalysisView.hide(); GapsView.hide(); ShiftsView.hide();
|
TranscriptView.hide();
|
||||||
const panel = document.getElementById('panel');
|
const panel = document.getElementById('panel');
|
||||||
const td = DATA.themes.find(t => t.id === theme.id);
|
const td = DATA.themes.find(t => t.id === theme.id);
|
||||||
const quotes = DATA.quotes.filter(q => q.themes.includes(theme.id));
|
const quotes = DATA.quotes.filter(q => q.themes.includes(theme.id));
|
||||||
@ -1739,7 +1344,6 @@ function showTheme(theme) {
|
|||||||
// ── Panel: Episode ──
|
// ── Panel: Episode ──
|
||||||
function showEpisode(ep) {
|
function showEpisode(ep) {
|
||||||
TranscriptView.hide();
|
TranscriptView.hide();
|
||||||
AnalysisView.hide();
|
|
||||||
const panel = document.getElementById('panel');
|
const panel = document.getElementById('panel');
|
||||||
const epData = DATA.episodes.find(e => e.id === (ep.id || ep));
|
const epData = DATA.episodes.find(e => e.id === (ep.id || ep));
|
||||||
const staffel = DATA.staffeln.find(s => s.id === epData.staffel);
|
const staffel = DATA.staffeln.find(s => s.id === epData.staffel);
|
||||||
@ -1749,24 +1353,9 @@ function showEpisode(ep) {
|
|||||||
html += `<p class="subtitle">Gast: ${epData.guest} · Staffel ${epData.staffel}: ${staffel.name}</p>`;
|
html += `<p class="subtitle">Gast: ${epData.guest} · Staffel ${epData.staffel}: ${staffel.name}</p>`;
|
||||||
html += `<p class="subtitle">${quotes.length} Zitate</p>`;
|
html += `<p class="subtitle">${quotes.length} Zitate</p>`;
|
||||||
|
|
||||||
// Action buttons
|
// Transcript button
|
||||||
if (epData.audioFile) {
|
if (epData.audioFile) {
|
||||||
html += `<button class="transcript-toggle" onclick="TranscriptView.show('${epData.id}')">Transkript lesen</button> `;
|
html += `<button class="transcript-toggle" onclick="TranscriptView.show('${epData.id}')">Transkript lesen</button>`;
|
||||||
}
|
|
||||||
if (CURRENT_PODCAST) {
|
|
||||||
html += `<button class="transcript-toggle" id="btn-claims-${epData.id}" onclick="AnalysisView.show('${epData.id}','claims')">Behauptungen</button> `;
|
|
||||||
html += `<button class="transcript-toggle" id="btn-questions-${epData.id}" onclick="AnalysisView.show('${epData.id}','questions')">Fragen</button>`;
|
|
||||||
fetch(`${API_BASE}/api/podcasts/${CURRENT_PODCAST}/episodes/${epData.id}/analyses-summary`)
|
|
||||||
.then(r => r.json())
|
|
||||||
.then(s => {
|
|
||||||
const cb = document.getElementById('btn-claims-' + epData.id);
|
|
||||||
if (cb && typeof s.claims === 'number') cb.textContent = `Behauptungen (${s.claims})`;
|
|
||||||
const qb = document.getElementById('btn-questions-' + epData.id);
|
|
||||||
if (qb && typeof s.questions === 'number') {
|
|
||||||
const open = s.questions_unanswered ? `, ${s.questions_unanswered} offen` : '';
|
|
||||||
qb.textContent = `Fragen (${s.questions}${open})`;
|
|
||||||
}
|
|
||||||
}).catch(() => {});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const epThemes = DATA.themes.filter(t => t.episodes.includes(epData.id));
|
const epThemes = DATA.themes.filter(t => t.episodes.includes(epData.id));
|
||||||
@ -2119,28 +1708,14 @@ function buildTimeline() {
|
|||||||
}
|
}
|
||||||
container.style.display = '';
|
container.style.display = '';
|
||||||
|
|
||||||
const hasQuotes = (DATA.quotes || []).length > 0;
|
|
||||||
let html = '<div style="max-width:900px;margin:0 auto">';
|
let html = '<div style="max-width:900px;margin:0 auto">';
|
||||||
|
|
||||||
DATA.staffeln.forEach(staffel => {
|
DATA.staffeln.forEach(staffel => {
|
||||||
const eps = DATA.episodes.filter(e => e.staffel === staffel.id);
|
const eps = DATA.episodes.filter(e => e.staffel === staffel.id);
|
||||||
html += `<div style="margin-bottom:24px">`;
|
html += `<div style="margin-bottom:24px">`;
|
||||||
html += `<h3 style="color:${staffel.color};font-size:14px;margin-bottom:12px">Staffel ${staffel.id}: ${staffel.name} <span style="color:var(--text-muted);font-weight:400">· ${eps.length} Folgen</span></h3>`;
|
html += `<h3 style="color:${staffel.color};font-size:14px;margin-bottom:12px">Staffel ${staffel.id}: ${staffel.name}</h3>`;
|
||||||
|
|
||||||
eps.forEach(ep => {
|
eps.forEach(ep => {
|
||||||
if (!hasQuotes) {
|
|
||||||
html += `<div style="display:flex;gap:12px;margin-bottom:8px;align-items:flex-start">`;
|
|
||||||
html += `<div style="min-width:60px;text-align:right">`;
|
|
||||||
html += `<div style="font-size:12px;font-weight:600;color:${staffel.color}">${ep.id}</div>`;
|
|
||||||
if (ep.guest) html += `<div style="font-size:10px;color:var(--text-muted)">${escHtml(ep.guest)}</div>`;
|
|
||||||
html += `</div>`;
|
|
||||||
html += `<div style="flex:1">`;
|
|
||||||
html += `<div style="font-size:13px;font-weight:500;cursor:pointer" onclick="showEpisodeById('${ep.id}')">${escHtml(ep.title)}</div>`;
|
|
||||||
html += `</div>`;
|
|
||||||
html += `</div>`;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
const quotes = DATA.quotes.filter(q => q.episode === ep.id);
|
const quotes = DATA.quotes.filter(q => q.episode === ep.id);
|
||||||
const topQuotes = quotes.filter(q => q.isTopQuote);
|
const topQuotes = quotes.filter(q => q.isTopQuote);
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user