Generic tool for building interactive mindmap visualizations from podcast transcripts. Includes: audio download, SRT conversion, quote-timestamp matching, D3.js mindmap webapp. Configurable via project.yaml — no podcast-specific content. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
43 lines
1.4 KiB
Python
43 lines
1.4 KiB
Python
"""Shared configuration loader for podcast-mindmap pipeline."""
|
|
|
|
import os
|
|
import yaml
|
|
|
|
|
|
def load_project(project_dir):
|
|
"""Load project.yaml from a project directory."""
|
|
config_path = os.path.join(project_dir, "project.yaml")
|
|
if not os.path.exists(config_path):
|
|
raise FileNotFoundError(f"No project.yaml found in {project_dir}")
|
|
|
|
with open(config_path, "r", encoding="utf-8") as f:
|
|
config = yaml.safe_load(f)
|
|
|
|
# Build derived mappings
|
|
config["_project_dir"] = os.path.abspath(project_dir)
|
|
config["_audio_dir"] = os.path.join(project_dir, "audio")
|
|
config["_transcripts_dir"] = os.path.join(project_dir, "transcripts")
|
|
config["_data_dir"] = os.path.join(project_dir, "data")
|
|
config["_webapp_dir"] = os.path.join(project_dir, "webapp")
|
|
|
|
# Episode lookup
|
|
config["_episodes_by_id"] = {ep["id"]: ep for ep in config["episodes"]}
|
|
|
|
# Audio file mapping: S1E1 → S1E1-Thema.m4a
|
|
config["_audio_files"] = {}
|
|
config["_srt_keys"] = {}
|
|
for ep in config["episodes"]:
|
|
slug = ep["id"] + "-" + ep["title"].replace(" ", "-").replace("ö", "oe").replace("ü", "ue").replace("ä", "ae")
|
|
config["_audio_files"][ep["id"]] = slug + ".m4a"
|
|
config["_srt_keys"][ep["id"]] = slug
|
|
|
|
return config
|
|
|
|
|
|
def get_staffel(config, staffel_id):
|
|
"""Get staffel metadata by ID."""
|
|
for s in config["staffeln"]:
|
|
if s["id"] == staffel_id:
|
|
return s
|
|
return None
|