Graceful Shutdown v2: Queue sperren + nur laufende Jobs abwarten
- _shutting_down Flag: sperrt enqueue() bei Shutdown → User bekommt "Server wird neu gestartet" statt stilles Einreihen in tote Queue - graceful_shutdown wartet NUR auf processing-Jobs (nicht ganze Queue) - Queued-Jobs bleiben in DB als stale → User kann nach Restart re-triggern - Timeout 15 min (900s) — ein LLM-Call dauert max ~120s - stop_grace_period: 15m in docker-compose - get_queue_status() meldet shutting_down für UI-Feedback
This commit is contained in:
parent
2dc504ffea
commit
f4b7b000a1
@ -134,7 +134,7 @@ async def startup():
|
||||
async def shutdown():
|
||||
"""Graceful Shutdown: warte auf laufende Queue-Jobs bevor der Container stirbt."""
|
||||
from .queue import graceful_shutdown
|
||||
await graceful_shutdown(timeout=300)
|
||||
await graceful_shutdown(timeout=900) # 15 min — passend zu stop_grace_period
|
||||
|
||||
|
||||
# JSON import disabled - all assessments now live in SQLite DB only
|
||||
|
||||
50
app/queue.py
50
app/queue.py
@ -15,7 +15,8 @@ logger = logging.getLogger(__name__)
|
||||
# Konfiguration
|
||||
MAX_QUEUE_SIZE = 50
|
||||
CONCURRENCY = int(os.environ.get("QUEUE_CONCURRENCY", "3"))
|
||||
MIN_PAUSE_SECONDS = 3 # Pause pro Worker zwischen Jobs
|
||||
MIN_PAUSE_SECONDS = 3
|
||||
_shutting_down = False # Sperrt neue Jobs bei Graceful Shutdown
|
||||
BACKOFF_BASE = 15
|
||||
BACKOFF_MAX = 300
|
||||
|
||||
@ -45,6 +46,8 @@ async def enqueue(
|
||||
**kwargs: Any,
|
||||
) -> int:
|
||||
"""Add a job to the queue. Returns queue position."""
|
||||
if _shutting_down:
|
||||
raise QueueFullError("Server wird neu gestartet. Bitte in Kürze erneut versuchen.")
|
||||
try:
|
||||
_queue.put_nowait((job_id, callback, args, kwargs))
|
||||
except asyncio.QueueFull:
|
||||
@ -81,6 +84,7 @@ def get_queue_status() -> dict:
|
||||
"pending": pending,
|
||||
"max_size": MAX_QUEUE_SIZE,
|
||||
"concurrency": CONCURRENCY,
|
||||
"shutting_down": _shutting_down,
|
||||
"processed_total": _stats["processed"],
|
||||
"failed_total": _stats["failed"],
|
||||
"estimated_wait_seconds": round(estimated_wait),
|
||||
@ -158,25 +162,41 @@ def start_worker() -> list[asyncio.Task]:
|
||||
return _worker_tasks
|
||||
|
||||
|
||||
async def graceful_shutdown(timeout: int = 300):
|
||||
"""Wait for running jobs to finish before shutdown.
|
||||
async def graceful_shutdown(timeout: int = 900):
|
||||
"""Graceful Shutdown: aktuell laufende Jobs beenden, Queue sperren.
|
||||
|
||||
Called from FastAPI shutdown event. Waits up to `timeout` seconds
|
||||
for the queue to drain and workers to finish their current job.
|
||||
1. Sperrt neue Jobs (_shutting_down = True)
|
||||
2. Wartet bis alle gerade PROCESSING-Jobs fertig sind (max timeout)
|
||||
3. Queued-Jobs bleiben in der DB als 'stale' → User kann nach
|
||||
Restart erneut triggern
|
||||
|
||||
Timeout 15 min (900s) — ein einzelner LLM-Call dauert max ~120s,
|
||||
bei 3 parallelen Workern also max ~120s reale Wartezeit.
|
||||
"""
|
||||
pending = _queue.qsize()
|
||||
global _shutting_down
|
||||
_shutting_down = True
|
||||
|
||||
processing = sum(1 for j in _jobs.values() if j.get("status") == "processing")
|
||||
if pending == 0 and processing == 0:
|
||||
logger.info("Queue empty, shutdown immediately")
|
||||
pending = _queue.qsize()
|
||||
|
||||
if processing == 0:
|
||||
logger.info("Graceful shutdown: keine laufenden Jobs, sofort beenden (%d queued verworfen)", pending)
|
||||
return
|
||||
|
||||
logger.warning("Graceful shutdown: waiting for %d pending + %d processing jobs (max %ds)",
|
||||
pending, processing, timeout)
|
||||
try:
|
||||
await asyncio.wait_for(_queue.join(), timeout=timeout)
|
||||
logger.info("Queue drained, shutdown clean")
|
||||
except asyncio.TimeoutError:
|
||||
logger.error("Graceful shutdown timeout after %ds, %d jobs still pending", timeout, _queue.qsize())
|
||||
logger.warning("Graceful shutdown: warte auf %d laufende Jobs (max %ds). %d queued werden beim Restart stale.",
|
||||
processing, timeout, pending)
|
||||
|
||||
# Warte nur auf die laufenden Jobs, nicht auf die ganze Queue
|
||||
start = time.time()
|
||||
while time.time() - start < timeout:
|
||||
still_processing = sum(1 for j in _jobs.values() if j.get("status") == "processing")
|
||||
if still_processing == 0:
|
||||
logger.info("Graceful shutdown: alle laufenden Jobs beendet nach %.0fs", time.time() - start)
|
||||
return
|
||||
await asyncio.sleep(2)
|
||||
|
||||
logger.error("Graceful shutdown: Timeout nach %ds, %d Jobs noch aktiv",
|
||||
timeout, sum(1 for j in _jobs.values() if j.get("status") == "processing"))
|
||||
|
||||
|
||||
async def re_enqueue_pending():
|
||||
|
||||
@ -3,7 +3,7 @@ services:
|
||||
build: .
|
||||
container_name: gwoe-antragspruefer
|
||||
restart: unless-stopped
|
||||
stop_grace_period: 5m # Queue-Jobs zu Ende laufen lassen vor Kill
|
||||
stop_grace_period: 15m # Laufende LLM-Jobs zu Ende laufen lassen
|
||||
environment:
|
||||
- DASHSCOPE_API_KEY=${DASHSCOPE_API_KEY}
|
||||
- KEYCLOAK_URL=https://sso.toppyr.de
|
||||
|
||||
Loading…
Reference in New Issue
Block a user