Graceful Shutdown v2: Queue sperren + nur laufende Jobs abwarten
- _shutting_down Flag: sperrt enqueue() bei Shutdown → User bekommt "Server wird neu gestartet" statt stilles Einreihen in tote Queue - graceful_shutdown wartet NUR auf processing-Jobs (nicht ganze Queue) - Queued-Jobs bleiben in DB als stale → User kann nach Restart re-triggern - Timeout 15 min (900s) — ein LLM-Call dauert max ~120s - stop_grace_period: 15m in docker-compose - get_queue_status() meldet shutting_down für UI-Feedback
This commit is contained in:
parent
2dc504ffea
commit
f4b7b000a1
@ -134,7 +134,7 @@ async def startup():
|
|||||||
async def shutdown():
|
async def shutdown():
|
||||||
"""Graceful Shutdown: warte auf laufende Queue-Jobs bevor der Container stirbt."""
|
"""Graceful Shutdown: warte auf laufende Queue-Jobs bevor der Container stirbt."""
|
||||||
from .queue import graceful_shutdown
|
from .queue import graceful_shutdown
|
||||||
await graceful_shutdown(timeout=300)
|
await graceful_shutdown(timeout=900) # 15 min — passend zu stop_grace_period
|
||||||
|
|
||||||
|
|
||||||
# JSON import disabled - all assessments now live in SQLite DB only
|
# JSON import disabled - all assessments now live in SQLite DB only
|
||||||
|
|||||||
50
app/queue.py
50
app/queue.py
@ -15,7 +15,8 @@ logger = logging.getLogger(__name__)
|
|||||||
# Konfiguration
|
# Konfiguration
|
||||||
MAX_QUEUE_SIZE = 50
|
MAX_QUEUE_SIZE = 50
|
||||||
CONCURRENCY = int(os.environ.get("QUEUE_CONCURRENCY", "3"))
|
CONCURRENCY = int(os.environ.get("QUEUE_CONCURRENCY", "3"))
|
||||||
MIN_PAUSE_SECONDS = 3 # Pause pro Worker zwischen Jobs
|
MIN_PAUSE_SECONDS = 3
|
||||||
|
_shutting_down = False # Sperrt neue Jobs bei Graceful Shutdown
|
||||||
BACKOFF_BASE = 15
|
BACKOFF_BASE = 15
|
||||||
BACKOFF_MAX = 300
|
BACKOFF_MAX = 300
|
||||||
|
|
||||||
@ -45,6 +46,8 @@ async def enqueue(
|
|||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> int:
|
) -> int:
|
||||||
"""Add a job to the queue. Returns queue position."""
|
"""Add a job to the queue. Returns queue position."""
|
||||||
|
if _shutting_down:
|
||||||
|
raise QueueFullError("Server wird neu gestartet. Bitte in Kürze erneut versuchen.")
|
||||||
try:
|
try:
|
||||||
_queue.put_nowait((job_id, callback, args, kwargs))
|
_queue.put_nowait((job_id, callback, args, kwargs))
|
||||||
except asyncio.QueueFull:
|
except asyncio.QueueFull:
|
||||||
@ -81,6 +84,7 @@ def get_queue_status() -> dict:
|
|||||||
"pending": pending,
|
"pending": pending,
|
||||||
"max_size": MAX_QUEUE_SIZE,
|
"max_size": MAX_QUEUE_SIZE,
|
||||||
"concurrency": CONCURRENCY,
|
"concurrency": CONCURRENCY,
|
||||||
|
"shutting_down": _shutting_down,
|
||||||
"processed_total": _stats["processed"],
|
"processed_total": _stats["processed"],
|
||||||
"failed_total": _stats["failed"],
|
"failed_total": _stats["failed"],
|
||||||
"estimated_wait_seconds": round(estimated_wait),
|
"estimated_wait_seconds": round(estimated_wait),
|
||||||
@ -158,25 +162,41 @@ def start_worker() -> list[asyncio.Task]:
|
|||||||
return _worker_tasks
|
return _worker_tasks
|
||||||
|
|
||||||
|
|
||||||
async def graceful_shutdown(timeout: int = 300):
|
async def graceful_shutdown(timeout: int = 900):
|
||||||
"""Wait for running jobs to finish before shutdown.
|
"""Graceful Shutdown: aktuell laufende Jobs beenden, Queue sperren.
|
||||||
|
|
||||||
Called from FastAPI shutdown event. Waits up to `timeout` seconds
|
1. Sperrt neue Jobs (_shutting_down = True)
|
||||||
for the queue to drain and workers to finish their current job.
|
2. Wartet bis alle gerade PROCESSING-Jobs fertig sind (max timeout)
|
||||||
|
3. Queued-Jobs bleiben in der DB als 'stale' → User kann nach
|
||||||
|
Restart erneut triggern
|
||||||
|
|
||||||
|
Timeout 15 min (900s) — ein einzelner LLM-Call dauert max ~120s,
|
||||||
|
bei 3 parallelen Workern also max ~120s reale Wartezeit.
|
||||||
"""
|
"""
|
||||||
pending = _queue.qsize()
|
global _shutting_down
|
||||||
|
_shutting_down = True
|
||||||
|
|
||||||
processing = sum(1 for j in _jobs.values() if j.get("status") == "processing")
|
processing = sum(1 for j in _jobs.values() if j.get("status") == "processing")
|
||||||
if pending == 0 and processing == 0:
|
pending = _queue.qsize()
|
||||||
logger.info("Queue empty, shutdown immediately")
|
|
||||||
|
if processing == 0:
|
||||||
|
logger.info("Graceful shutdown: keine laufenden Jobs, sofort beenden (%d queued verworfen)", pending)
|
||||||
return
|
return
|
||||||
|
|
||||||
logger.warning("Graceful shutdown: waiting for %d pending + %d processing jobs (max %ds)",
|
logger.warning("Graceful shutdown: warte auf %d laufende Jobs (max %ds). %d queued werden beim Restart stale.",
|
||||||
pending, processing, timeout)
|
processing, timeout, pending)
|
||||||
try:
|
|
||||||
await asyncio.wait_for(_queue.join(), timeout=timeout)
|
# Warte nur auf die laufenden Jobs, nicht auf die ganze Queue
|
||||||
logger.info("Queue drained, shutdown clean")
|
start = time.time()
|
||||||
except asyncio.TimeoutError:
|
while time.time() - start < timeout:
|
||||||
logger.error("Graceful shutdown timeout after %ds, %d jobs still pending", timeout, _queue.qsize())
|
still_processing = sum(1 for j in _jobs.values() if j.get("status") == "processing")
|
||||||
|
if still_processing == 0:
|
||||||
|
logger.info("Graceful shutdown: alle laufenden Jobs beendet nach %.0fs", time.time() - start)
|
||||||
|
return
|
||||||
|
await asyncio.sleep(2)
|
||||||
|
|
||||||
|
logger.error("Graceful shutdown: Timeout nach %ds, %d Jobs noch aktiv",
|
||||||
|
timeout, sum(1 for j in _jobs.values() if j.get("status") == "processing"))
|
||||||
|
|
||||||
|
|
||||||
async def re_enqueue_pending():
|
async def re_enqueue_pending():
|
||||||
|
|||||||
@ -3,7 +3,7 @@ services:
|
|||||||
build: .
|
build: .
|
||||||
container_name: gwoe-antragspruefer
|
container_name: gwoe-antragspruefer
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
stop_grace_period: 5m # Queue-Jobs zu Ende laufen lassen vor Kill
|
stop_grace_period: 15m # Laufende LLM-Jobs zu Ende laufen lassen
|
||||||
environment:
|
environment:
|
||||||
- DASHSCOPE_API_KEY=${DASHSCOPE_API_KEY}
|
- DASHSCOPE_API_KEY=${DASHSCOPE_API_KEY}
|
||||||
- KEYCLOAK_URL=https://sso.toppyr.de
|
- KEYCLOAK_URL=https://sso.toppyr.de
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user