Graceful Shutdown v2: Queue sperren + nur laufende Jobs abwarten

- _shutting_down Flag: sperrt enqueue() bei Shutdown → User bekommt "Server wird neu gestartet" statt stilles Einreihen in tote Queue - graceful_shutdown wartet NUR auf processing-Jobs (nicht ganze Queue) - Queued-Jobs bleiben in DB als stale → User kann nach Restart re-triggern - Timeout 15 min (900s) — ein LLM-Call dauert max ~120s - stop_grace_period: 15m in docker-compose - get_queue_status() meldet shutting_down für UI-Feedback
2026-04-10 23:20:23 +02:00 · 2026-04-10 23:20:23 +02:00 · f4b7b000a1
commit f4b7b000a1
parent 2dc504ffea
3 changed files with 37 additions and 17 deletions
--- a/app/main.py
+++ b/app/main.py
@ -134,7 +134,7 @@ async def startup():
 async def shutdown():
    """Graceful Shutdown: warte auf laufende Queue-Jobs bevor der Container stirbt."""
    from .queue import graceful_shutdown
-    await graceful_shutdown(timeout=300)
+    await graceful_shutdown(timeout=900)  # 15 min — passend zu stop_grace_period
    # JSON import disabled - all assessments now live in SQLite DB only
--- a/app/queue.py
+++ b/app/queue.py
@ -15,7 +15,8 @@ logger = logging.getLogger(__name__)
 # Konfiguration
 MAX_QUEUE_SIZE = 50
 CONCURRENCY = int(os.environ.get("QUEUE_CONCURRENCY", "3"))
-MIN_PAUSE_SECONDS = 3  # Pause pro Worker zwischen Jobs
+MIN_PAUSE_SECONDS = 3
 _shutting_down = False  # Sperrt neue Jobs bei Graceful Shutdown
 BACKOFF_BASE = 15
 BACKOFF_MAX = 300
@ -45,6 +46,8 @@ async def enqueue(
    **kwargs: Any,
 ) -> int:
    """Add a job to the queue. Returns queue position."""
    if _shutting_down:
        raise QueueFullError("Server wird neu gestartet. Bitte in Kürze erneut versuchen.")
    try:
        _queue.put_nowait((job_id, callback, args, kwargs))
    except asyncio.QueueFull:
@ -81,6 +84,7 @@ def get_queue_status() -> dict:
        "pending": pending,
        "max_size": MAX_QUEUE_SIZE,
        "concurrency": CONCURRENCY,
        "shutting_down": _shutting_down,
        "processed_total": _stats["processed"],
        "failed_total": _stats["failed"],
        "estimated_wait_seconds": round(estimated_wait),
@ -158,25 +162,41 @@ def start_worker() -> list[asyncio.Task]:
    return _worker_tasks
-async def graceful_shutdown(timeout: int = 300):
+async def graceful_shutdown(timeout: int = 900):
-    """Wait for running jobs to finish before shutdown.
+    """Graceful Shutdown: aktuell laufende Jobs beenden, Queue sperren.
-    Called from FastAPI shutdown event. Waits up to `timeout` seconds
+    1. Sperrt neue Jobs (_shutting_down = True)
-    for the queue to drain and workers to finish their current job.
+    2. Wartet bis alle gerade PROCESSING-Jobs fertig sind (max timeout)
    3. Queued-Jobs bleiben in der DB als 'stale' → User kann nach
       Restart erneut triggern
    Timeout 15 min (900s) — ein einzelner LLM-Call dauert max ~120s,
    bei 3 parallelen Workern also max ~120s reale Wartezeit.
    """
-    pending = _queue.qsize()
+    global _shutting_down
    _shutting_down = True
    processing = sum(1 for j in _jobs.values() if j.get("status") == "processing")
-    if pending == 0 and processing == 0:
+    pending = _queue.qsize()
-        logger.info("Queue empty, shutdown immediately")
+
    if processing == 0:
        logger.info("Graceful shutdown: keine laufenden Jobs, sofort beenden (%d queued verworfen)", pending)
        return
-    logger.warning("Graceful shutdown: waiting for %d pending + %d processing jobs (max %ds)",
+    logger.warning("Graceful shutdown: warte auf %d laufende Jobs (max %ds). %d queued werden beim Restart stale.",
-                    pending, processing, timeout)
+                    processing, timeout, pending)
-    try:
+
-        await asyncio.wait_for(_queue.join(), timeout=timeout)
+    # Warte nur auf die laufenden Jobs, nicht auf die ganze Queue
-        logger.info("Queue drained, shutdown clean")
+    start = time.time()
-    except asyncio.TimeoutError:
+    while time.time() - start < timeout:
-        logger.error("Graceful shutdown timeout after %ds, %d jobs still pending", timeout, _queue.qsize())
+        still_processing = sum(1 for j in _jobs.values() if j.get("status") == "processing")
        if still_processing == 0:
            logger.info("Graceful shutdown: alle laufenden Jobs beendet nach %.0fs", time.time() - start)
            return
        await asyncio.sleep(2)
    logger.error("Graceful shutdown: Timeout nach %ds, %d Jobs noch aktiv",
                  timeout, sum(1 for j in _jobs.values() if j.get("status") == "processing"))
 async def re_enqueue_pending():
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -3,7 +3,7 @@ services:
    build: .
    container_name: gwoe-antragspruefer
    restart: unless-stopped
-    stop_grace_period: 5m  # Queue-Jobs zu Ende laufen lassen vor Kill
+    stop_grace_period: 15m  # Laufende LLM-Jobs zu Ende laufen lassen
    environment:
      - DASHSCOPE_API_KEY=${DASHSCOPE_API_KEY}
      - KEYCLOAK_URL=https://sso.toppyr.de