podcast-mindmap/scripts/pipeline.py

115 lines
3.7 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
"""Master pipeline: runs all processing steps for a podcast-mindmap project.
Usage:
python pipeline.py /path/to/project
python pipeline.py /path/to/project --step download
python pipeline.py /path/to/project --step convert
python pipeline.py /path/to/project --step match
python pipeline.py /path/to/project --step serve
"""
import argparse
import os
import shutil
import subprocess
import sys
SCRIPTS_DIR = os.path.dirname(os.path.abspath(__file__))
WEBAPP_DIR = os.path.join(os.path.dirname(SCRIPTS_DIR), "webapp")
def run_step(script_name, project_dir):
"""Run a pipeline script."""
script = os.path.join(SCRIPTS_DIR, script_name)
print(f"\n{'='*60}")
print(f"Running: {script_name}")
print(f"{'='*60}")
subprocess.run([sys.executable, script, project_dir], check=True)
def setup_webapp(project_dir):
"""Copy webapp files and link data."""
data_dir = os.path.join(project_dir, "data")
webapp_dest = os.path.join(project_dir, "webapp")
os.makedirs(webapp_dest, exist_ok=True)
# Copy webapp files
for f in os.listdir(WEBAPP_DIR):
src = os.path.join(WEBAPP_DIR, f)
dst = os.path.join(webapp_dest, f)
if os.path.isfile(src):
shutil.copy2(src, dst)
# Copy data
data_src = os.path.join(data_dir, "mindmap_data.json")
if os.path.exists(data_src):
shutil.copy2(data_src, os.path.join(webapp_dest, "mindmap_data.json"))
# Symlink audio
audio_link = os.path.join(webapp_dest, "audio")
audio_src = os.path.join(project_dir, "audio")
if os.path.exists(audio_src) and not os.path.exists(audio_link):
os.symlink(audio_src, audio_link)
print(f"\nWebapp ready in {webapp_dest}")
def serve(project_dir, port=9123):
"""Start the web server."""
webapp_dir = os.path.join(project_dir, "webapp")
server_script = os.path.join(webapp_dir, "server.py")
if not os.path.exists(server_script):
setup_webapp(project_dir)
# Get LAN IP
import socket
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
try:
s.connect(('8.8.8.8', 80))
ip = s.getsockname()[0]
except Exception:
ip = "localhost"
finally:
s.close()
print(f"\nServer: http://localhost:{port}")
print(f"LAN: http://{ip}:{port}")
subprocess.run([sys.executable, server_script], cwd=webapp_dir)
def main():
parser = argparse.ArgumentParser(description="Podcast Mindmap Pipeline")
parser.add_argument("project_dir", help="Path to the project directory")
parser.add_argument("--step", choices=["download", "convert", "match", "webapp", "serve", "all"],
default="all", help="Which step to run")
parser.add_argument("--port", type=int, default=9123, help="Server port")
args = parser.parse_args()
project_dir = os.path.abspath(args.project_dir)
if args.step == "download":
run_step("download_audio.py", project_dir)
elif args.step == "convert":
run_step("convert_srt.py", project_dir)
elif args.step == "match":
run_step("match_quotes.py", project_dir)
elif args.step == "webapp":
setup_webapp(project_dir)
elif args.step == "serve":
setup_webapp(project_dir)
serve(project_dir, args.port)
elif args.step == "all":
run_step("download_audio.py", project_dir)
print("\n*** Transkribiere die Audio-Dateien jetzt mit MacWhisper (SRT-Export). ***")
print("*** Drücke Enter wenn die SRT-Dateien im audio/ Ordner liegen. ***")
input()
run_step("convert_srt.py", project_dir)
run_step("match_quotes.py", project_dir)
setup_webapp(project_dir)
serve(project_dir, args.port)
if __name__ == "__main__":
main()