#44 Batch-Analyse: POST /api/batch-analyze
Neuer Endpoint der die neuesten ungeprüften Drucksachen eines BL automatisch sucht, herunterlädt und in die Queue (#95) einreiht: POST /api/batch-analyze bundesland=NRW (Pflicht) limit=10 (1-100, default 10) Flow: 1. adapter.search("", limit=limit*3) holt neueste Drucksachen 2. Pro Drucksache: check ob schon bewertet → skip 3. download_text → enqueue(run_drucksache_analysis) 4. Queue verarbeitet seriell mit 10s Pause (DashScope-freundlich) Response: { "status": "batch_enqueued", "enqueued": 7, "skipped_existing": 3, "jobs": [{"drucksache": "18/...", "title": "...", "queue_position": 1}, ...] } Rate-limited auf 3/min. Erfordert Auth (#43). Bei voller Queue: enqueued nur soweit Platz, kein Error. Tests: 201 passed. Refs: #44, #95 (Queue-Basis)
This commit is contained in:
parent
289d37a84b
commit
6a433e9217
67
app/main.py
67
app/main.py
@ -504,6 +504,73 @@ async def search_landtag(
|
|||||||
return {"error": f"Suchfehler: {str(e)}"}
|
return {"error": f"Suchfehler: {str(e)}"}
|
||||||
|
|
||||||
|
|
||||||
|
# API: Batch-Analyse (#44) — enqueued ungeprüfte Drucksachen eines BL
|
||||||
|
@app.post("/api/batch-analyze")
|
||||||
|
@limiter.limit("3/minute")
|
||||||
|
async def batch_analyze(
|
||||||
|
request: Request,
|
||||||
|
bundesland: str = Form(...),
|
||||||
|
limit: int = Form(10),
|
||||||
|
user: dict = Depends(require_auth),
|
||||||
|
):
|
||||||
|
"""Sucht die neuesten Drucksachen im Landtag-Portal und enqueued
|
||||||
|
alle, die noch nicht in der DB bewertet sind.
|
||||||
|
|
||||||
|
Returns: Liste der enqueued Drucksachen + Queue-Position.
|
||||||
|
"""
|
||||||
|
from .queue import enqueue, QueueFullError
|
||||||
|
|
||||||
|
if limit < 1 or limit > 100:
|
||||||
|
raise HTTPException(status_code=400, detail="limit muss 1-100 sein")
|
||||||
|
|
||||||
|
adapter = get_adapter(bundesland)
|
||||||
|
if not adapter:
|
||||||
|
raise HTTPException(status_code=400, detail=f"Bundesland {bundesland} nicht unterstützt")
|
||||||
|
|
||||||
|
# Neueste Drucksachen vom Landtag holen (leer = neueste Anträge)
|
||||||
|
drucksachen = await adapter.search("", limit=limit * 3) # 3× holen wegen Typ-Filter
|
||||||
|
|
||||||
|
enqueued = []
|
||||||
|
skipped = 0
|
||||||
|
for doc in drucksachen:
|
||||||
|
if len(enqueued) >= limit:
|
||||||
|
break
|
||||||
|
# Schon bewertet?
|
||||||
|
existing = await get_assessment(doc.drucksache)
|
||||||
|
if existing:
|
||||||
|
skipped += 1
|
||||||
|
continue
|
||||||
|
# Text herunterladen
|
||||||
|
text = await adapter.download_text(doc.drucksache)
|
||||||
|
if not text:
|
||||||
|
continue
|
||||||
|
# Enqueue
|
||||||
|
job_id = str(uuid.uuid4())
|
||||||
|
await create_job(job_id, text[:500], bundesland, "qwen-plus")
|
||||||
|
try:
|
||||||
|
position = await enqueue(
|
||||||
|
job_id,
|
||||||
|
run_drucksache_analysis,
|
||||||
|
job_id, doc.drucksache, text, bundesland, "qwen-plus", doc,
|
||||||
|
)
|
||||||
|
enqueued.append({
|
||||||
|
"drucksache": doc.drucksache,
|
||||||
|
"title": doc.title,
|
||||||
|
"job_id": job_id,
|
||||||
|
"queue_position": position,
|
||||||
|
})
|
||||||
|
except QueueFullError:
|
||||||
|
break
|
||||||
|
|
||||||
|
return {
|
||||||
|
"status": "batch_enqueued",
|
||||||
|
"bundesland": bundesland,
|
||||||
|
"enqueued": len(enqueued),
|
||||||
|
"skipped_existing": skipped,
|
||||||
|
"jobs": enqueued,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
# API: Analyze a document from parliament portal
|
# API: Analyze a document from parliament portal
|
||||||
@app.post("/api/analyze-drucksache")
|
@app.post("/api/analyze-drucksache")
|
||||||
@limiter.limit("10/minute")
|
@limiter.limit("10/minute")
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user