#95 Job-Queue: SQLite-backed asyncio Worker mit Backpressure

FIFO-Queue für Analyse-Jobs — ersetzt FastAPI BackgroundTasks: app/queue.py: - asyncio.Queue mit MAX_QUEUE_SIZE=50 - Einzelner Worker-Coroutine (Concurrency=1, DashScope-freundlich) - MIN_PAUSE_SECONDS=10 zwischen Jobs - Exponentielles Backoff bei Serien-Fehlern (15s → 5min) - get_queue_status() für den Status-Endpoint - QueueFullError → HTTP 429 + Retry-After Header - start_worker() als FastAPI-Startup-Task - re_enqueue_pending() markiert Crash-Überlebende als 'stale' main.py: - POST /api/analyze-drucksache nutzt queue.enqueue() statt background_tasks.add_task() - Response enthält queue_position - GET /api/queue/status zeigt pending, max_size, processed, estimated_wait_seconds, worker_running - Worker wird bei app.startup() gestartet Tests: 201 passed, 5 skipped. Refs: #95, #44 (Batch baut auf Queue auf)
2026-04-10 17:24:34 +02:00 · 2026-04-10 17:24:34 +02:00 · 289d37a84b
commit 289d37a84b
parent 1a82f8294c
2 changed files with 200 additions and 8 deletions
--- a/app/main.py
+++ b/app/main.py
@ -123,6 +123,10 @@ templates = Jinja2Templates(directory=str(templates_dir))
 async def startup():
    await init_db()
    init_embeddings_db()
    # Job-Queue Worker starten (#95)
    from .queue import start_worker, re_enqueue_pending
    await re_enqueue_pending()
    start_worker()
    # JSON import disabled - all assessments now live in SQLite DB only
    # Legacy import would overwrite new v5 assessments with old format
    # count = await import_json_assessments(settings.data_dir / "assessments")
@ -264,6 +268,15 @@ async def get_pdf(job_id: str):
    raise HTTPException(status_code=500, detail="PDF nicht gefunden")
 # ─── Queue-Status (#95) ─────────────────────────────────────────────────────
@app.get("/api/queue/status")
 async def queue_status():
    """Aktueller Queue-Stand: wartende Jobs, geschätzte Wartezeit."""
    from .queue import get_queue_status
    return get_queue_status()
 # ─── Auth-Endpoints (#43) ───────────────────────────────────────────────────
@app.get("/api/auth/me")
@ -524,17 +537,31 @@ async def analyze_drucksache(
    # Get document metadata
    doc = await adapter.get_document(drucksache)
-    # Create job
+    # Create job and enqueue (#95)
    from .queue import enqueue, QueueFullError
    job_id = str(uuid.uuid4())
    await create_job(job_id, text[:500], bundesland, model)
    try:
        position = await enqueue(
            job_id,
            run_drucksache_analysis,
            job_id, drucksache, text, bundesland, model, doc,
        )
    except QueueFullError:
        await update_job(job_id, status="rejected", error="Queue voll")
        raise HTTPException(
            status_code=429,
            detail="Analyse-Queue ist voll. Bitte später erneut versuchen.",
            headers={"Retry-After": "60"},
        )
-    # Start background analysis
+    return {
-    background_tasks.add_task(
+        "status": "queued",
-        run_drucksache_analysis, 
+        "job_id": job_id,
-        job_id, drucksache, text, bundesland, model, doc
+        "drucksache": drucksache,
-    )
+        "queue_position": position,
-    
+    }
    return {"status": "queued", "job_id": job_id, "drucksache": drucksache}
 async def run_drucksache_analysis(
--- a/app/queue.py
+++ b/app/queue.py
@ -0,0 +1,165 @@
 """Analysis job queue with single-worker processing (#95).
 Implements a FIFO queue backed by the existing ``jobs`` SQLite table
 with an in-memory ``asyncio.Queue`` for the worker loop. On startup,
 any ``status='queued'`` jobs from a previous run are re-enqueued.
 Architecture:
 - ``enqueue(job_id, callback, *args)`` → puts a job on the queue
 - A single worker coroutine processes jobs sequentially (concurrency=1)
  with a configurable pause between calls (default 10s, DashScope-friendly)
 - ``get_queue_status()`` returns position count + estimated wait
 - ``MAX_QUEUE_SIZE`` limits backpressure → HTTP 429 when full
 The worker is started via ``start_worker()`` which should be called
 from the FastAPI startup event.
 """
 import asyncio
 import logging
 import time
 from typing import Any, Callable, Coroutine, Optional
 logger = logging.getLogger(__name__)
 # Konfiguration
 MAX_QUEUE_SIZE = 50  # Maximale Queue-Tiefe bevor 429 zurückgegeben wird
 MIN_PAUSE_SECONDS = 10  # Mindest-Pause zwischen LLM-Calls (DashScope Rate-Limit)
 BACKOFF_BASE = 15  # Exponentielles Backoff bei Fehlern (Sekunden)
 BACKOFF_MAX = 300  # Maximales Backoff (5 Minuten)
 # In-Memory Queue
 _queue: asyncio.Queue = asyncio.Queue(maxsize=MAX_QUEUE_SIZE)
 _worker_task: Optional[asyncio.Task] = None
 _stats = {
    "processed": 0,
    "failed": 0,
    "started_at": None,
    "last_completed_at": None,
    "avg_duration": 60.0,  # Initialer Schätzwert: 60s pro Job
 }
 class QueueFullError(Exception):
    """Raised when the queue is at capacity."""
    pass
 async def enqueue(
    job_id: str,
    callback: Callable[..., Coroutine],
    *args: Any,
    **kwargs: Any,
 ) -> int:
    """Add a job to the queue. Returns the queue position (1-indexed).
    Raises ``QueueFullError`` if the queue is at capacity.
    """
    try:
        _queue.put_nowait((job_id, callback, args, kwargs))
    except asyncio.QueueFull:
        raise QueueFullError(
            f"Queue voll ({MAX_QUEUE_SIZE} Jobs). Bitte später erneut versuchen."
        )
    position = _queue.qsize()
    logger.info("Job %s enqueued at position %d", job_id, position)
    return position
 def get_queue_status() -> dict:
    """Current queue status for the /api/queue/status endpoint."""
    pending = _queue.qsize()
    avg = _stats["avg_duration"]
    estimated_wait = pending * (avg + MIN_PAUSE_SECONDS)
    return {
        "pending": pending,
        "max_size": MAX_QUEUE_SIZE,
        "processed_total": _stats["processed"],
        "failed_total": _stats["failed"],
        "estimated_wait_seconds": round(estimated_wait),
        "avg_job_duration_seconds": round(avg, 1),
        "worker_running": _worker_task is not None and not _worker_task.done(),
    }
 async def _worker():
    """Single worker coroutine — processes jobs sequentially."""
    logger.info("Queue worker started (max_queue=%d, pause=%ds)", MAX_QUEUE_SIZE, MIN_PAUSE_SECONDS)
    _stats["started_at"] = time.time()
    consecutive_failures = 0
    while True:
        job_id, callback, args, kwargs = await _queue.get()
        t0 = time.time()
        try:
            logger.info("Processing job %s (queue remaining: %d)", job_id, _queue.qsize())
            await callback(*args, **kwargs)
            duration = time.time() - t0
            _stats["processed"] += 1
            _stats["last_completed_at"] = time.time()
            # Gleitender Durchschnitt der Job-Dauer
            _stats["avg_duration"] = (_stats["avg_duration"] * 0.8) + (duration * 0.2)
            consecutive_failures = 0
            logger.info("Job %s completed in %.1fs", job_id, duration)
        except Exception:
            _stats["failed"] += 1
            consecutive_failures += 1
            logger.exception("Job %s failed", job_id)
            # Exponentielles Backoff bei Serien-Fehlern (z.B. LLM-Throttling)
            if consecutive_failures > 1:
                backoff = min(BACKOFF_BASE * (2 ** (consecutive_failures - 2)), BACKOFF_MAX)
                logger.warning("Backoff %ds after %d consecutive failures", backoff, consecutive_failures)
                await asyncio.sleep(backoff)
        finally:
            _queue.task_done()
        # Mindest-Pause zwischen Jobs (DashScope-freundlich)
        await asyncio.sleep(MIN_PAUSE_SECONDS)
 def start_worker() -> asyncio.Task:
    """Start the worker coroutine. Call from FastAPI startup event."""
    global _worker_task
    if _worker_task is not None and not _worker_task.done():
        logger.warning("Worker already running, not starting a second one")
        return _worker_task
    _worker_task = asyncio.create_task(_worker())
    return _worker_task
 async def re_enqueue_pending():
    """Re-enqueue jobs with status='queued' from a previous run.
    Called at startup to recover from container restarts. Only picks up
    jobs that were queued but never started processing.
    """
    import aiosqlite
    from .config import settings
    async with aiosqlite.connect(settings.db_path) as db:
        db.row_factory = aiosqlite.Row
        rows = await db.execute(
            "SELECT id FROM jobs WHERE status = 'queued' ORDER BY created_at"
        )
        queued = await rows.fetchall()
    if not queued:
        return
    logger.info("Re-enqueueing %d pending jobs from previous run", len(queued))
    # We can't re-enqueue with the original callback here because we don't
    # know the original parameters. Mark them as 'stale' instead — the user
    # can re-trigger via the UI.
    import aiosqlite
    async with aiosqlite.connect(settings.db_path) as db:
        for row in queued:
            await db.execute(
                "UPDATE jobs SET status = 'stale', updated_at = datetime('now') WHERE id = ?",
                (row["id"],),
            )
        await db.commit()
    logger.info("Marked %d stale jobs (user must re-trigger)", len(queued))