Generic tool for building interactive mindmap visualizations from podcast transcripts. Includes: audio download, SRT conversion, quote-timestamp matching, D3.js mindmap webapp. Configurable via project.yaml — no podcast-specific content. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
42 lines
1.2 KiB
Python
42 lines
1.2 KiB
Python
#!/usr/bin/env python3
|
|
"""Download audio from YouTube for all episodes in a project."""
|
|
|
|
import os
|
|
import subprocess
|
|
import sys
|
|
from config import load_project
|
|
|
|
|
|
def main():
|
|
project_dir = sys.argv[1] if len(sys.argv) > 1 else "."
|
|
config = load_project(project_dir)
|
|
audio_dir = config["_audio_dir"]
|
|
os.makedirs(audio_dir, exist_ok=True)
|
|
|
|
for ep in config["episodes"]:
|
|
if "youtube" not in ep:
|
|
print(f"SKIP: {ep['id']} — no YouTube ID")
|
|
continue
|
|
|
|
audio_file = config["_audio_files"][ep["id"]]
|
|
output_path = os.path.join(audio_dir, audio_file)
|
|
|
|
if os.path.exists(output_path):
|
|
print(f"EXISTS: {audio_file}")
|
|
continue
|
|
|
|
print(f"DOWNLOAD: {ep['id']} ({ep['youtube']}) → {audio_file}")
|
|
slug = config["_srt_keys"][ep["id"]]
|
|
subprocess.run([
|
|
"yt-dlp", "--force-overwrites",
|
|
"-x", "--audio-format", "m4a", "--audio-quality", "0",
|
|
"-o", os.path.join(audio_dir, slug + ".%(ext)s"),
|
|
f"https://www.youtube.com/watch?v={ep['youtube']}"
|
|
], check=True)
|
|
|
|
print(f"\nDone. Audio files in {audio_dir}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|