Generic tool for building interactive mindmap visualizations from podcast transcripts. Includes: audio download, SRT conversion, quote-timestamp matching, D3.js mindmap webapp. Configurable via project.yaml — no podcast-specific content. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
115 lines
3.7 KiB
Python
115 lines
3.7 KiB
Python
#!/usr/bin/env python3
|
|
"""Master pipeline: runs all processing steps for a podcast-mindmap project.
|
|
|
|
Usage:
|
|
python pipeline.py /path/to/project
|
|
python pipeline.py /path/to/project --step download
|
|
python pipeline.py /path/to/project --step convert
|
|
python pipeline.py /path/to/project --step match
|
|
python pipeline.py /path/to/project --step serve
|
|
"""
|
|
|
|
import argparse
|
|
import os
|
|
import shutil
|
|
import subprocess
|
|
import sys
|
|
|
|
SCRIPTS_DIR = os.path.dirname(os.path.abspath(__file__))
|
|
WEBAPP_DIR = os.path.join(os.path.dirname(SCRIPTS_DIR), "webapp")
|
|
|
|
|
|
def run_step(script_name, project_dir):
|
|
"""Run a pipeline script."""
|
|
script = os.path.join(SCRIPTS_DIR, script_name)
|
|
print(f"\n{'='*60}")
|
|
print(f"Running: {script_name}")
|
|
print(f"{'='*60}")
|
|
subprocess.run([sys.executable, script, project_dir], check=True)
|
|
|
|
|
|
def setup_webapp(project_dir):
|
|
"""Copy webapp files and link data."""
|
|
data_dir = os.path.join(project_dir, "data")
|
|
webapp_dest = os.path.join(project_dir, "webapp")
|
|
os.makedirs(webapp_dest, exist_ok=True)
|
|
|
|
# Copy webapp files
|
|
for f in os.listdir(WEBAPP_DIR):
|
|
src = os.path.join(WEBAPP_DIR, f)
|
|
dst = os.path.join(webapp_dest, f)
|
|
if os.path.isfile(src):
|
|
shutil.copy2(src, dst)
|
|
|
|
# Copy data
|
|
data_src = os.path.join(data_dir, "mindmap_data.json")
|
|
if os.path.exists(data_src):
|
|
shutil.copy2(data_src, os.path.join(webapp_dest, "mindmap_data.json"))
|
|
|
|
# Symlink audio
|
|
audio_link = os.path.join(webapp_dest, "audio")
|
|
audio_src = os.path.join(project_dir, "audio")
|
|
if os.path.exists(audio_src) and not os.path.exists(audio_link):
|
|
os.symlink(audio_src, audio_link)
|
|
|
|
print(f"\nWebapp ready in {webapp_dest}")
|
|
|
|
|
|
def serve(project_dir, port=9123):
|
|
"""Start the web server."""
|
|
webapp_dir = os.path.join(project_dir, "webapp")
|
|
server_script = os.path.join(webapp_dir, "server.py")
|
|
if not os.path.exists(server_script):
|
|
setup_webapp(project_dir)
|
|
|
|
# Get LAN IP
|
|
import socket
|
|
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
|
|
try:
|
|
s.connect(('8.8.8.8', 80))
|
|
ip = s.getsockname()[0]
|
|
except Exception:
|
|
ip = "localhost"
|
|
finally:
|
|
s.close()
|
|
|
|
print(f"\nServer: http://localhost:{port}")
|
|
print(f"LAN: http://{ip}:{port}")
|
|
subprocess.run([sys.executable, server_script], cwd=webapp_dir)
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Podcast Mindmap Pipeline")
|
|
parser.add_argument("project_dir", help="Path to the project directory")
|
|
parser.add_argument("--step", choices=["download", "convert", "match", "webapp", "serve", "all"],
|
|
default="all", help="Which step to run")
|
|
parser.add_argument("--port", type=int, default=9123, help="Server port")
|
|
args = parser.parse_args()
|
|
|
|
project_dir = os.path.abspath(args.project_dir)
|
|
|
|
if args.step == "download":
|
|
run_step("download_audio.py", project_dir)
|
|
elif args.step == "convert":
|
|
run_step("convert_srt.py", project_dir)
|
|
elif args.step == "match":
|
|
run_step("match_quotes.py", project_dir)
|
|
elif args.step == "webapp":
|
|
setup_webapp(project_dir)
|
|
elif args.step == "serve":
|
|
setup_webapp(project_dir)
|
|
serve(project_dir, args.port)
|
|
elif args.step == "all":
|
|
run_step("download_audio.py", project_dir)
|
|
print("\n*** Transkribiere die Audio-Dateien jetzt mit MacWhisper (SRT-Export). ***")
|
|
print("*** Drücke Enter wenn die SRT-Dateien im audio/ Ordner liegen. ***")
|
|
input()
|
|
run_step("convert_srt.py", project_dir)
|
|
run_step("match_quotes.py", project_dir)
|
|
setup_webapp(project_dir)
|
|
serve(project_dir, args.port)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|