feat: Batch-Analyse mit "Alle Bundeslaender"-Modus
User-Wunsch: Batch-Analyse soll auch Anträge aus mehreren BL gleichzeitig ranziehen koennen, nicht nur einen einzelnen. - Neue Dropdown-Option "— Alle aktiven Bundesländer (Limit verteilt) —" als Default - Backend: bei `bundesland=ALL` iteriert ueber `aktive_bundeslaender()` und verteilt das Limit proportional (limit // N pro BL). - Helper `_enqueue_for_bl()` extrahiert die BL-spezifische Logik. - Adapter-Fehler pro BL werden geloggt + skipt, blockieren nicht die anderen BL. - Response-Erweiterung: `per_bundesland`-Liste mit Per-BL-Stats (enqueued / skipped_existing / error). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
f008570cff
commit
48a272a87d
88
app/main.py
88
app/main.py
@ -1416,46 +1416,35 @@ async def search_landtag(
|
|||||||
|
|
||||||
|
|
||||||
# API: Batch-Analyse (#44) — enqueued ungeprüfte Drucksachen eines BL
|
# API: Batch-Analyse (#44) — enqueued ungeprüfte Drucksachen eines BL
|
||||||
@app.post("/api/batch-analyze")
|
async def _enqueue_for_bl(
|
||||||
@limiter.limit("3/minute")
|
bundesland: str, limit: int,
|
||||||
async def batch_analyze(
|
) -> tuple[list[dict], int]:
|
||||||
request: Request,
|
"""Enqueued bis zu ``limit`` neue Drucksachen aus einem BL.
|
||||||
bundesland: str = Form(...),
|
|
||||||
limit: int = Form(10),
|
|
||||||
user: dict = Depends(require_admin),
|
|
||||||
):
|
|
||||||
"""Sucht die neuesten Drucksachen im Landtag-Portal und enqueued
|
|
||||||
alle, die noch nicht in der DB bewertet sind.
|
|
||||||
|
|
||||||
Returns: Liste der enqueued Drucksachen + Queue-Position.
|
Returns ``(enqueued_jobs, skipped_existing)``.
|
||||||
"""
|
"""
|
||||||
from .queue import enqueue, QueueFullError
|
from .queue import enqueue, QueueFullError
|
||||||
|
|
||||||
if limit < 1 or limit > 100:
|
|
||||||
raise HTTPException(status_code=400, detail="limit muss 1-100 sein")
|
|
||||||
|
|
||||||
adapter = get_adapter(bundesland)
|
adapter = get_adapter(bundesland)
|
||||||
if not adapter:
|
if not adapter:
|
||||||
raise HTTPException(status_code=400, detail=f"Bundesland {bundesland} nicht unterstützt")
|
return [], 0
|
||||||
|
|
||||||
# Neueste Drucksachen vom Landtag holen, gefiltert auf abstimmbare Typen (#127).
|
drucksachen = adapter._filter_abstimmbar(
|
||||||
drucksachen = adapter._filter_abstimmbar(await adapter.search("", limit=limit * 10))
|
await adapter.search("", limit=limit * 10)
|
||||||
|
)
|
||||||
|
|
||||||
enqueued = []
|
enqueued: list[dict] = []
|
||||||
skipped = 0
|
skipped = 0
|
||||||
for doc in drucksachen:
|
for doc in drucksachen:
|
||||||
if len(enqueued) >= limit:
|
if len(enqueued) >= limit:
|
||||||
break
|
break
|
||||||
# Schon bewertet?
|
|
||||||
existing = await get_assessment(doc.drucksache)
|
existing = await get_assessment(doc.drucksache)
|
||||||
if existing:
|
if existing:
|
||||||
skipped += 1
|
skipped += 1
|
||||||
continue
|
continue
|
||||||
# Text herunterladen
|
|
||||||
text = await adapter.download_text(doc.drucksache)
|
text = await adapter.download_text(doc.drucksache)
|
||||||
if not text:
|
if not text:
|
||||||
continue
|
continue
|
||||||
# Enqueue
|
|
||||||
job_id = str(uuid.uuid4())
|
job_id = str(uuid.uuid4())
|
||||||
await create_job(job_id, text[:500], bundesland, "qwen-plus", drucksache=doc.drucksache)
|
await create_job(job_id, text[:500], bundesland, "qwen-plus", drucksache=doc.drucksache)
|
||||||
try:
|
try:
|
||||||
@ -1468,12 +1457,69 @@ async def batch_analyze(
|
|||||||
enqueued.append({
|
enqueued.append({
|
||||||
"drucksache": doc.drucksache,
|
"drucksache": doc.drucksache,
|
||||||
"title": doc.title,
|
"title": doc.title,
|
||||||
|
"bundesland": bundesland,
|
||||||
"job_id": job_id,
|
"job_id": job_id,
|
||||||
"queue_position": position,
|
"queue_position": position,
|
||||||
})
|
})
|
||||||
except QueueFullError:
|
except QueueFullError:
|
||||||
break
|
break
|
||||||
|
return enqueued, skipped
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/api/batch-analyze")
|
||||||
|
@limiter.limit("3/minute")
|
||||||
|
async def batch_analyze(
|
||||||
|
request: Request,
|
||||||
|
bundesland: str = Form(...),
|
||||||
|
limit: int = Form(10),
|
||||||
|
user: dict = Depends(require_admin),
|
||||||
|
):
|
||||||
|
"""Sucht die neuesten Drucksachen im Landtag-Portal und enqueued
|
||||||
|
alle, die noch nicht in der DB bewertet sind.
|
||||||
|
|
||||||
|
`bundesland="ALL"` iteriert ueber alle aktiven Bundeslaender und
|
||||||
|
verteilt das Limit proportional. Adapter-Fehler pro BL werden
|
||||||
|
geloggt und uebersprungen.
|
||||||
|
|
||||||
|
Returns: Liste der enqueued Drucksachen + Queue-Position.
|
||||||
|
"""
|
||||||
|
if limit < 1 or limit > 100:
|
||||||
|
raise HTTPException(status_code=400, detail="limit muss 1-100 sein")
|
||||||
|
|
||||||
|
if bundesland.upper() == "ALL":
|
||||||
|
from .bundeslaender import aktive_bundeslaender
|
||||||
|
active_bls = [bl.code for bl in aktive_bundeslaender()]
|
||||||
|
per_bl = max(1, limit // len(active_bls))
|
||||||
|
enqueued: list[dict] = []
|
||||||
|
skipped_total = 0
|
||||||
|
per_bl_stats: list[dict] = []
|
||||||
|
for bl in active_bls:
|
||||||
|
if len(enqueued) >= limit:
|
||||||
|
break
|
||||||
|
try:
|
||||||
|
bl_enq, bl_skip = await _enqueue_for_bl(bl, per_bl)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("batch_analyze BL=%s failed: %s", bl, e)
|
||||||
|
per_bl_stats.append({"bundesland": bl, "error": str(e)[:200]})
|
||||||
|
continue
|
||||||
|
enqueued.extend(bl_enq[: max(0, limit - len(enqueued))])
|
||||||
|
skipped_total += bl_skip
|
||||||
|
per_bl_stats.append({
|
||||||
|
"bundesland": bl, "enqueued": len(bl_enq), "skipped_existing": bl_skip,
|
||||||
|
})
|
||||||
|
return {
|
||||||
|
"status": "batch_enqueued",
|
||||||
|
"bundesland": "ALL",
|
||||||
|
"enqueued": len(enqueued),
|
||||||
|
"skipped_existing": skipped_total,
|
||||||
|
"jobs": enqueued,
|
||||||
|
"per_bundesland": per_bl_stats,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Single BL
|
||||||
|
if not get_adapter(bundesland):
|
||||||
|
raise HTTPException(status_code=400, detail=f"Bundesland {bundesland} nicht unterstützt")
|
||||||
|
enqueued, skipped = await _enqueue_for_bl(bundesland, limit)
|
||||||
return {
|
return {
|
||||||
"status": "batch_enqueued",
|
"status": "batch_enqueued",
|
||||||
"bundesland": bundesland,
|
"bundesland": bundesland,
|
||||||
|
|||||||
@ -79,6 +79,7 @@
|
|||||||
|
|
||||||
<label for="batch-bl">Bundesland</label>
|
<label for="batch-bl">Bundesland</label>
|
||||||
<select id="batch-bl" name="bundesland">
|
<select id="batch-bl" name="bundesland">
|
||||||
|
<option value="ALL">— Alle aktiven Bundesländer (Limit verteilt) —</option>
|
||||||
{% for bl in bundeslaender %}
|
{% for bl in bundeslaender %}
|
||||||
<option value="{{ bl.code }}"{% if bl.code == 'NRW' %} selected{% endif %}>{{ bl.name }} ({{ bl.code }})</option>
|
<option value="{{ bl.code }}"{% if bl.code == 'NRW' %} selected{% endif %}>{{ bl.name }} ({{ bl.code }})</option>
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user