- scripts/sync_oparl.py: 5-Phasen-Sync (Import → Scrape → Ketten → Status → FTS5) - Inkrementell: Nur neue Papers, stoppt nach 3 leeren Seiten - Dry-Run-Modus (--dry-run) - API: GET /api/sync/status + POST /api/sync/trigger - Cron-fähig (Exit 0/1, stdout-Logging) - Sync-State in data/sync_state.json - 11 neue Vorlagen beim Dry-Run erkannt Closes #3
105 lines
2.7 KiB
Python
105 lines
2.7 KiB
Python
from __future__ import annotations
|
|
"""API routes for OParl sync management."""
|
|
|
|
import json
|
|
import threading
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
|
|
from fastapi import APIRouter, HTTPException
|
|
|
|
router = APIRouter(prefix="/sync", tags=["sync"])
|
|
|
|
# Sync state file (written by sync_oparl.py)
|
|
PROJECT_ROOT = Path(__file__).resolve().parents[5]
|
|
SYNC_STATE_PATH = PROJECT_ROOT / "data" / "sync_state.json"
|
|
|
|
# Background job tracking
|
|
_sync_job: dict | None = None
|
|
|
|
|
|
def _load_sync_state() -> dict:
|
|
"""Load last sync state from file."""
|
|
if SYNC_STATE_PATH.exists():
|
|
try:
|
|
return json.loads(SYNC_STATE_PATH.read_text())
|
|
except Exception:
|
|
pass
|
|
return {}
|
|
|
|
|
|
@router.get("/status")
|
|
def sync_status():
|
|
"""Zeigt letzten Sync-Zeitpunkt + Statistiken."""
|
|
state = _load_sync_state()
|
|
|
|
# Laufender Job?
|
|
if _sync_job and _sync_job.get("status") == "running":
|
|
return {
|
|
"running": True,
|
|
"started_at": _sync_job.get("started_at"),
|
|
"last_sync": state,
|
|
}
|
|
|
|
if not state:
|
|
return {
|
|
"running": False,
|
|
"last_sync": None,
|
|
"message": "Noch kein Sync durchgeführt",
|
|
}
|
|
|
|
return {
|
|
"running": False,
|
|
"last_sync": state,
|
|
}
|
|
|
|
|
|
def _run_sync():
|
|
"""Background-Thread für den Sync."""
|
|
global _sync_job
|
|
try:
|
|
# Import hier, damit der Server nicht beim Start abbricht
|
|
import sys
|
|
sys.path.insert(0, str(PROJECT_ROOT / "scripts"))
|
|
sys.path.insert(0, str(PROJECT_ROOT / "backend" / "src"))
|
|
|
|
from sync_oparl import sync
|
|
result = sync(dry_run=False)
|
|
|
|
_sync_job = {
|
|
"status": "done",
|
|
"started_at": _sync_job["started_at"] if _sync_job else None,
|
|
"finished_at": datetime.now().isoformat(),
|
|
"result": result,
|
|
}
|
|
except Exception as e:
|
|
_sync_job = {
|
|
"status": "error",
|
|
"error": str(e),
|
|
"started_at": _sync_job["started_at"] if _sync_job else None,
|
|
"finished_at": datetime.now().isoformat(),
|
|
}
|
|
|
|
|
|
@router.post("/trigger")
|
|
def trigger_sync():
|
|
"""Triggert einen OParl-Sync als Background-Job."""
|
|
global _sync_job
|
|
|
|
if _sync_job and _sync_job.get("status") == "running":
|
|
raise HTTPException(status_code=409, detail="Sync läuft bereits")
|
|
|
|
_sync_job = {
|
|
"status": "running",
|
|
"started_at": datetime.now().isoformat(),
|
|
}
|
|
|
|
t = threading.Thread(target=_run_sync, daemon=True)
|
|
t.start()
|
|
|
|
return {
|
|
"status": "started",
|
|
"started_at": _sync_job["started_at"],
|
|
"message": "Sync gestartet. Status unter GET /api/sync/status abrufbar.",
|
|
}
|