feat: Batch-Analyse mit "Alle Bundeslaender"-Modus
User-Wunsch: Batch-Analyse soll auch Anträge aus mehreren BL gleichzeitig ranziehen koennen, nicht nur einen einzelnen. - Neue Dropdown-Option "— Alle aktiven Bundesländer (Limit verteilt) —" als Default - Backend: bei `bundesland=ALL` iteriert ueber `aktive_bundeslaender()` und verteilt das Limit proportional (limit // N pro BL). - Helper `_enqueue_for_bl()` extrahiert die BL-spezifische Logik. - Adapter-Fehler pro BL werden geloggt + skipt, blockieren nicht die anderen BL. - Response-Erweiterung: `per_bundesland`-Liste mit Per-BL-Stats (enqueued / skipped_existing / error). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
f008570cff
commit
48a272a87d
88
app/main.py
88
app/main.py
@ -1416,46 +1416,35 @@ async def search_landtag(
|
||||
|
||||
|
||||
# API: Batch-Analyse (#44) — enqueued ungeprüfte Drucksachen eines BL
|
||||
@app.post("/api/batch-analyze")
|
||||
@limiter.limit("3/minute")
|
||||
async def batch_analyze(
|
||||
request: Request,
|
||||
bundesland: str = Form(...),
|
||||
limit: int = Form(10),
|
||||
user: dict = Depends(require_admin),
|
||||
):
|
||||
"""Sucht die neuesten Drucksachen im Landtag-Portal und enqueued
|
||||
alle, die noch nicht in der DB bewertet sind.
|
||||
async def _enqueue_for_bl(
|
||||
bundesland: str, limit: int,
|
||||
) -> tuple[list[dict], int]:
|
||||
"""Enqueued bis zu ``limit`` neue Drucksachen aus einem BL.
|
||||
|
||||
Returns: Liste der enqueued Drucksachen + Queue-Position.
|
||||
Returns ``(enqueued_jobs, skipped_existing)``.
|
||||
"""
|
||||
from .queue import enqueue, QueueFullError
|
||||
|
||||
if limit < 1 or limit > 100:
|
||||
raise HTTPException(status_code=400, detail="limit muss 1-100 sein")
|
||||
|
||||
adapter = get_adapter(bundesland)
|
||||
if not adapter:
|
||||
raise HTTPException(status_code=400, detail=f"Bundesland {bundesland} nicht unterstützt")
|
||||
return [], 0
|
||||
|
||||
# Neueste Drucksachen vom Landtag holen, gefiltert auf abstimmbare Typen (#127).
|
||||
drucksachen = adapter._filter_abstimmbar(await adapter.search("", limit=limit * 10))
|
||||
drucksachen = adapter._filter_abstimmbar(
|
||||
await adapter.search("", limit=limit * 10)
|
||||
)
|
||||
|
||||
enqueued = []
|
||||
enqueued: list[dict] = []
|
||||
skipped = 0
|
||||
for doc in drucksachen:
|
||||
if len(enqueued) >= limit:
|
||||
break
|
||||
# Schon bewertet?
|
||||
existing = await get_assessment(doc.drucksache)
|
||||
if existing:
|
||||
skipped += 1
|
||||
continue
|
||||
# Text herunterladen
|
||||
text = await adapter.download_text(doc.drucksache)
|
||||
if not text:
|
||||
continue
|
||||
# Enqueue
|
||||
job_id = str(uuid.uuid4())
|
||||
await create_job(job_id, text[:500], bundesland, "qwen-plus", drucksache=doc.drucksache)
|
||||
try:
|
||||
@ -1468,12 +1457,69 @@ async def batch_analyze(
|
||||
enqueued.append({
|
||||
"drucksache": doc.drucksache,
|
||||
"title": doc.title,
|
||||
"bundesland": bundesland,
|
||||
"job_id": job_id,
|
||||
"queue_position": position,
|
||||
})
|
||||
except QueueFullError:
|
||||
break
|
||||
return enqueued, skipped
|
||||
|
||||
|
||||
@app.post("/api/batch-analyze")
|
||||
@limiter.limit("3/minute")
|
||||
async def batch_analyze(
|
||||
request: Request,
|
||||
bundesland: str = Form(...),
|
||||
limit: int = Form(10),
|
||||
user: dict = Depends(require_admin),
|
||||
):
|
||||
"""Sucht die neuesten Drucksachen im Landtag-Portal und enqueued
|
||||
alle, die noch nicht in der DB bewertet sind.
|
||||
|
||||
`bundesland="ALL"` iteriert ueber alle aktiven Bundeslaender und
|
||||
verteilt das Limit proportional. Adapter-Fehler pro BL werden
|
||||
geloggt und uebersprungen.
|
||||
|
||||
Returns: Liste der enqueued Drucksachen + Queue-Position.
|
||||
"""
|
||||
if limit < 1 or limit > 100:
|
||||
raise HTTPException(status_code=400, detail="limit muss 1-100 sein")
|
||||
|
||||
if bundesland.upper() == "ALL":
|
||||
from .bundeslaender import aktive_bundeslaender
|
||||
active_bls = [bl.code for bl in aktive_bundeslaender()]
|
||||
per_bl = max(1, limit // len(active_bls))
|
||||
enqueued: list[dict] = []
|
||||
skipped_total = 0
|
||||
per_bl_stats: list[dict] = []
|
||||
for bl in active_bls:
|
||||
if len(enqueued) >= limit:
|
||||
break
|
||||
try:
|
||||
bl_enq, bl_skip = await _enqueue_for_bl(bl, per_bl)
|
||||
except Exception as e:
|
||||
logger.warning("batch_analyze BL=%s failed: %s", bl, e)
|
||||
per_bl_stats.append({"bundesland": bl, "error": str(e)[:200]})
|
||||
continue
|
||||
enqueued.extend(bl_enq[: max(0, limit - len(enqueued))])
|
||||
skipped_total += bl_skip
|
||||
per_bl_stats.append({
|
||||
"bundesland": bl, "enqueued": len(bl_enq), "skipped_existing": bl_skip,
|
||||
})
|
||||
return {
|
||||
"status": "batch_enqueued",
|
||||
"bundesland": "ALL",
|
||||
"enqueued": len(enqueued),
|
||||
"skipped_existing": skipped_total,
|
||||
"jobs": enqueued,
|
||||
"per_bundesland": per_bl_stats,
|
||||
}
|
||||
|
||||
# Single BL
|
||||
if not get_adapter(bundesland):
|
||||
raise HTTPException(status_code=400, detail=f"Bundesland {bundesland} nicht unterstützt")
|
||||
enqueued, skipped = await _enqueue_for_bl(bundesland, limit)
|
||||
return {
|
||||
"status": "batch_enqueued",
|
||||
"bundesland": bundesland,
|
||||
|
||||
@ -79,6 +79,7 @@
|
||||
|
||||
<label for="batch-bl">Bundesland</label>
|
||||
<select id="batch-bl" name="bundesland">
|
||||
<option value="ALL">— Alle aktiven Bundesländer (Limit verteilt) —</option>
|
||||
{% for bl in bundeslaender %}
|
||||
<option value="{{ bl.code }}"{% if bl.code == 'NRW' %} selected{% endif %}>{{ bl.name }} ({{ bl.code }})</option>
|
||||
{% endfor %}
|
||||
|
||||
Loading…
Reference in New Issue
Block a user