Compare commits
No commits in common. "main" and "gh-pages" have entirely different histories.
40
.coveragerc
40
.coveragerc
@ -1,40 +0,0 @@
|
|||||||
[run]
|
|
||||||
source = app
|
|
||||||
omit =
|
|
||||||
# Hilfs-Skripte und Migrations-Tools — nicht produktiver Code
|
|
||||||
app/reindex_embeddings.py
|
|
||||||
app/sync_abgeordnetenwatch.py
|
|
||||||
# Generated / Auto-Discovery
|
|
||||||
app/__init__.py
|
|
||||||
|
|
||||||
[report]
|
|
||||||
# Faustregel ADR 0007: keine 100%-Jagd, aber kritische Pfade abdecken.
|
|
||||||
# show_missing-Flag macht Luecken im CI-Output sofort sichtbar.
|
|
||||||
#
|
|
||||||
# fail_under=50 ist die aktuelle Baseline (Stand 2026-04-28). Verbleibende
|
|
||||||
# unabgedeckte Bereiche brauchen integration-Setup statt Unit-Tests:
|
|
||||||
# - app/main.py (FastAPI-Endpoints, ~900 LOC) — TestClient-Smoke-Tests
|
|
||||||
# sind lokal geskippt mangels voller Deps; laufen in der Docker-Suite.
|
|
||||||
# - app/parlamente.py (16 Adapter, ~3400 LOC) — Live-HTTP gegen Landtage,
|
|
||||||
# tests/integration/ deckt das ab.
|
|
||||||
# - app/queue.py _worker (async-Loop, while True, hart zu testen).
|
|
||||||
# - app/report.py WeasyPrint-PDF-Render-Pfade.
|
|
||||||
# - app/embeddings.py OpenAI/DashScope-Calls.
|
|
||||||
# Schwelle hochsetzen, wenn integration-Suite lokal lauffaehig wird.
|
|
||||||
show_missing = true
|
|
||||||
skip_covered = false
|
|
||||||
precision = 1
|
|
||||||
fail_under = 50
|
|
||||||
|
|
||||||
# Zeilen, die nicht gezaehlt werden sollen — typische Boilerplate ohne
|
|
||||||
# eigentliche Testbarkeit.
|
|
||||||
exclude_lines =
|
|
||||||
pragma: no cover
|
|
||||||
def __repr__
|
|
||||||
raise NotImplementedError
|
|
||||||
if __name__ == .__main__.:
|
|
||||||
if TYPE_CHECKING:
|
|
||||||
\.\.\.
|
|
||||||
|
|
||||||
[html]
|
|
||||||
directory = htmlcov
|
|
||||||
@ -1,7 +0,0 @@
|
|||||||
data/
|
|
||||||
reports/
|
|
||||||
__pycache__/
|
|
||||||
*.pyc
|
|
||||||
.env
|
|
||||||
venv/
|
|
||||||
.git/
|
|
||||||
@ -1,7 +0,0 @@
|
|||||||
# DashScope API (Alibaba Qwen)
|
|
||||||
DASHSCOPE_API_KEY=your-api-key-here
|
|
||||||
|
|
||||||
# Optional: Keycloak SSO
|
|
||||||
KEYCLOAK_URL=https://sso.example.com
|
|
||||||
KEYCLOAK_REALM=collaboration
|
|
||||||
KEYCLOAK_CLIENT_ID=gwoe-antragspruefer
|
|
||||||
25
.gitignore
vendored
25
.gitignore
vendored
@ -1,25 +0,0 @@
|
|||||||
# Python
|
|
||||||
__pycache__/
|
|
||||||
*.py[cod]
|
|
||||||
*$py.class
|
|
||||||
venv/
|
|
||||||
.env
|
|
||||||
|
|
||||||
# Data (persistent on server, not in repo)
|
|
||||||
data/
|
|
||||||
reports/
|
|
||||||
|
|
||||||
# IDE
|
|
||||||
.idea/
|
|
||||||
.vscode/
|
|
||||||
*.swp
|
|
||||||
|
|
||||||
# OS
|
|
||||||
.DS_Store
|
|
||||||
Thumbs.db
|
|
||||||
site/
|
|
||||||
|
|
||||||
# Coverage reports (Phase 3 von #134, ADR 0007)
|
|
||||||
.coverage
|
|
||||||
.coverage.*
|
|
||||||
htmlcov/
|
|
||||||
@ -1,7 +0,0 @@
|
|||||||
data/
|
|
||||||
reports/
|
|
||||||
__pycache__/
|
|
||||||
*.pyc
|
|
||||||
.env
|
|
||||||
venv/
|
|
||||||
.git/
|
|
||||||
606
404.html
Normal file
606
404.html
Normal file
@ -0,0 +1,606 @@
|
|||||||
|
|
||||||
|
<!doctype html>
|
||||||
|
<html lang="de" class="no-js">
|
||||||
|
<head>
|
||||||
|
|
||||||
|
<meta charset="utf-8">
|
||||||
|
<meta name="viewport" content="width=device-width,initial-scale=1">
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<link rel="icon" href="/assets/images/favicon.png">
|
||||||
|
<meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.7.6">
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<title>GWÖ-Antragsprüfer Docs</title>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<link rel="stylesheet" href="/assets/stylesheets/main.484c7ddc.min.css">
|
||||||
|
|
||||||
|
|
||||||
|
<link rel="stylesheet" href="/assets/stylesheets/palette.ab4e12ef.min.css">
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
|
||||||
|
<link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Roboto:300,300i,400,400i,700,700i%7CRoboto+Mono:400,400i,700,700i&display=fallback">
|
||||||
|
<style>:root{--md-text-font:"Roboto";--md-code-font:"Roboto Mono"}</style>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<script>__md_scope=new URL("/",location),__md_hash=e=>[...e].reduce(((e,_)=>(e<<5)-e+_.charCodeAt(0)),0),__md_get=(e,_=localStorage,t=__md_scope)=>JSON.parse(_.getItem(t.pathname+"."+e)),__md_set=(e,_,t=localStorage,a=__md_scope)=>{try{t.setItem(a.pathname+"."+e,JSON.stringify(_))}catch(e){}}</script>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</head>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<body dir="ltr" data-md-color-scheme="default" data-md-color-primary="teal" data-md-color-accent="light-green">
|
||||||
|
|
||||||
|
|
||||||
|
<input class="md-toggle" data-md-toggle="drawer" type="checkbox" id="__drawer" autocomplete="off">
|
||||||
|
<input class="md-toggle" data-md-toggle="search" type="checkbox" id="__search" autocomplete="off">
|
||||||
|
<label class="md-overlay" for="__drawer"></label>
|
||||||
|
<div data-md-component="skip">
|
||||||
|
|
||||||
|
</div>
|
||||||
|
<div data-md-component="announce">
|
||||||
|
|
||||||
|
</div>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<header class="md-header md-header--shadow" data-md-component="header">
|
||||||
|
<nav class="md-header__inner md-grid" aria-label="Kopfzeile">
|
||||||
|
<a href="/." title="GWÖ-Antragsprüfer Docs" class="md-header__button md-logo" aria-label="GWÖ-Antragsprüfer Docs" data-md-component="logo">
|
||||||
|
|
||||||
|
|
||||||
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3 3 3 0 0 0 3 3m0 3.54C9.64 9.35 6.5 8 3 8v11c3.5 0 6.64 1.35 9 3.54 2.36-2.19 5.5-3.54 9-3.54V8c-3.5 0-6.64 1.35-9 3.54"/></svg>
|
||||||
|
|
||||||
|
</a>
|
||||||
|
<label class="md-header__button md-icon" for="__drawer">
|
||||||
|
|
||||||
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M3 6h18v2H3zm0 5h18v2H3zm0 5h18v2H3z"/></svg>
|
||||||
|
</label>
|
||||||
|
<div class="md-header__title" data-md-component="header-title">
|
||||||
|
<div class="md-header__ellipsis">
|
||||||
|
<div class="md-header__topic">
|
||||||
|
<span class="md-ellipsis">
|
||||||
|
GWÖ-Antragsprüfer Docs
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
<div class="md-header__topic" data-md-component="header-topic">
|
||||||
|
<span class="md-ellipsis">
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<label class="md-header__button md-icon" for="__search">
|
||||||
|
|
||||||
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.52 6.52 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5"/></svg>
|
||||||
|
</label>
|
||||||
|
<div class="md-search" data-md-component="search" role="dialog">
|
||||||
|
<label class="md-search__overlay" for="__search"></label>
|
||||||
|
<div class="md-search__inner" role="search">
|
||||||
|
<form class="md-search__form" name="search">
|
||||||
|
<input type="text" class="md-search__input" name="query" aria-label="Suche" placeholder="Suche" autocapitalize="off" autocorrect="off" autocomplete="off" spellcheck="false" data-md-component="search-query" required>
|
||||||
|
<label class="md-search__icon md-icon" for="__search">
|
||||||
|
|
||||||
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.52 6.52 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5"/></svg>
|
||||||
|
|
||||||
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11z"/></svg>
|
||||||
|
</label>
|
||||||
|
<nav class="md-search__options" aria-label="Suche">
|
||||||
|
|
||||||
|
<button type="reset" class="md-search__icon md-icon" title="Zurücksetzen" aria-label="Zurücksetzen" tabindex="-1">
|
||||||
|
|
||||||
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M19 6.41 17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12z"/></svg>
|
||||||
|
</button>
|
||||||
|
</nav>
|
||||||
|
|
||||||
|
</form>
|
||||||
|
<div class="md-search__output">
|
||||||
|
<div class="md-search__scrollwrap" tabindex="0" data-md-scrollfix>
|
||||||
|
<div class="md-search-result" data-md-component="search-result">
|
||||||
|
<div class="md-search-result__meta">
|
||||||
|
Suche wird initialisiert
|
||||||
|
</div>
|
||||||
|
<ol class="md-search-result__list" role="presentation"></ol>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<div class="md-header__source">
|
||||||
|
<a href="https://repo.toppyr.de/tobias/gwoe-antragspruefer" title="Zum Repository" class="md-source" data-md-component="source">
|
||||||
|
<div class="md-source__icon md-icon">
|
||||||
|
|
||||||
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M439.6 236.1 244 40.5c-5.4-5.5-12.8-8.5-20.4-8.5s-15 3-20.4 8.4L162.5 81l51.5 51.5c27.1-9.1 52.7 16.8 43.4 43.7l49.7 49.7c34.2-11.8 61.2 31 35.5 56.7-26.5 26.5-70.2-2.9-56-37.3L240.3 199v121.9c25.3 12.5 22.3 41.8 9.1 55-6.4 6.4-15.2 10.1-24.3 10.1s-17.8-3.6-24.3-10.1c-17.6-17.6-11.1-46.9 11.2-56v-123c-20.8-8.5-24.6-30.7-18.6-45L142.6 101 8.5 235.1C3 240.6 0 247.9 0 255.5s3 15 8.5 20.4l195.6 195.7c5.4 5.4 12.7 8.4 20.4 8.4s15-3 20.4-8.4l194.7-194.7c5.4-5.4 8.4-12.8 8.4-20.4s-3-15-8.4-20.4"/></svg>
|
||||||
|
</div>
|
||||||
|
<div class="md-source__repository">
|
||||||
|
tobias/gwoe-antragspruefer
|
||||||
|
</div>
|
||||||
|
</a>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
</nav>
|
||||||
|
|
||||||
|
</header>
|
||||||
|
|
||||||
|
<div class="md-container" data-md-component="container">
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<main class="md-main" data-md-component="main">
|
||||||
|
<div class="md-main__inner md-grid">
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<div class="md-sidebar md-sidebar--primary" data-md-component="sidebar" data-md-type="navigation" >
|
||||||
|
<div class="md-sidebar__scrollwrap">
|
||||||
|
<div class="md-sidebar__inner">
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<nav class="md-nav md-nav--primary" aria-label="Navigation" data-md-level="0">
|
||||||
|
<label class="md-nav__title" for="__drawer">
|
||||||
|
<a href="/." title="GWÖ-Antragsprüfer Docs" class="md-nav__button md-logo" aria-label="GWÖ-Antragsprüfer Docs" data-md-component="logo">
|
||||||
|
|
||||||
|
|
||||||
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3 3 3 0 0 0 3 3m0 3.54C9.64 9.35 6.5 8 3 8v11c3.5 0 6.64 1.35 9 3.54 2.36-2.19 5.5-3.54 9-3.54V8c-3.5 0-6.64 1.35-9 3.54"/></svg>
|
||||||
|
|
||||||
|
</a>
|
||||||
|
GWÖ-Antragsprüfer Docs
|
||||||
|
</label>
|
||||||
|
|
||||||
|
<div class="md-nav__source">
|
||||||
|
<a href="https://repo.toppyr.de/tobias/gwoe-antragspruefer" title="Zum Repository" class="md-source" data-md-component="source">
|
||||||
|
<div class="md-source__icon md-icon">
|
||||||
|
|
||||||
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M439.6 236.1 244 40.5c-5.4-5.5-12.8-8.5-20.4-8.5s-15 3-20.4 8.4L162.5 81l51.5 51.5c27.1-9.1 52.7 16.8 43.4 43.7l49.7 49.7c34.2-11.8 61.2 31 35.5 56.7-26.5 26.5-70.2-2.9-56-37.3L240.3 199v121.9c25.3 12.5 22.3 41.8 9.1 55-6.4 6.4-15.2 10.1-24.3 10.1s-17.8-3.6-24.3-10.1c-17.6-17.6-11.1-46.9 11.2-56v-123c-20.8-8.5-24.6-30.7-18.6-45L142.6 101 8.5 235.1C3 240.6 0 247.9 0 255.5s3 15 8.5 20.4l195.6 195.7c5.4 5.4 12.7 8.4 20.4 8.4s15-3 20.4-8.4l194.7-194.7c5.4-5.4 8.4-12.8 8.4-20.4s-3-15-8.4-20.4"/></svg>
|
||||||
|
</div>
|
||||||
|
<div class="md-source__repository">
|
||||||
|
tobias/gwoe-antragspruefer
|
||||||
|
</div>
|
||||||
|
</a>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<ul class="md-nav__list" data-md-scrollfix>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<li class="md-nav__item">
|
||||||
|
<a href="/." class="md-nav__link">
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<span class="md-ellipsis">
|
||||||
|
|
||||||
|
|
||||||
|
Start
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</span>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</a>
|
||||||
|
</li>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<li class="md-nav__item md-nav__item--section md-nav__item--nested">
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_2" >
|
||||||
|
|
||||||
|
|
||||||
|
<label class="md-nav__link" for="__nav_2" id="__nav_2_label" tabindex="">
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<span class="md-ellipsis">
|
||||||
|
|
||||||
|
|
||||||
|
Architecture Decision Records
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</span>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<span class="md-nav__icon md-icon"></span>
|
||||||
|
</label>
|
||||||
|
|
||||||
|
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_2_label" aria-expanded="false">
|
||||||
|
<label class="md-nav__title" for="__nav_2">
|
||||||
|
<span class="md-nav__icon md-icon"></span>
|
||||||
|
|
||||||
|
|
||||||
|
Architecture Decision Records
|
||||||
|
|
||||||
|
|
||||||
|
</label>
|
||||||
|
<ul class="md-nav__list" data-md-scrollfix>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<li class="md-nav__item">
|
||||||
|
<a href="/adr/" class="md-nav__link">
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<span class="md-ellipsis">
|
||||||
|
|
||||||
|
|
||||||
|
Übersicht
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</span>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</a>
|
||||||
|
</li>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<li class="md-nav__item">
|
||||||
|
<a href="/adr/0001-llm-citation-binding/" class="md-nav__link">
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<span class="md-ellipsis">
|
||||||
|
|
||||||
|
|
||||||
|
0001 LLM-Citation-Binding
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</span>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</a>
|
||||||
|
</li>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<li class="md-nav__item">
|
||||||
|
<a href="/adr/0002-adapter-architecture/" class="md-nav__link">
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<span class="md-ellipsis">
|
||||||
|
|
||||||
|
|
||||||
|
0002 Adapter-Architektur
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</span>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</a>
|
||||||
|
</li>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<li class="md-nav__item">
|
||||||
|
<a href="/adr/0003-citation-property-tests/" class="md-nav__link">
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<span class="md-ellipsis">
|
||||||
|
|
||||||
|
|
||||||
|
0003 Citation-Property-Tests
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</span>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</a>
|
||||||
|
</li>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<li class="md-nav__item">
|
||||||
|
<a href="/adr/0004-deployment-workflow/" class="md-nav__link">
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<span class="md-ellipsis">
|
||||||
|
|
||||||
|
|
||||||
|
0004 Deployment-Workflow
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</span>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</a>
|
||||||
|
</li>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</ul>
|
||||||
|
</nav>
|
||||||
|
|
||||||
|
</li>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<li class="md-nav__item md-nav__item--section md-nav__item--nested">
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_3" >
|
||||||
|
|
||||||
|
|
||||||
|
<label class="md-nav__link" for="__nav_3" id="__nav_3_label" tabindex="">
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<span class="md-ellipsis">
|
||||||
|
|
||||||
|
|
||||||
|
Archiv
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</span>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<span class="md-nav__icon md-icon"></span>
|
||||||
|
</label>
|
||||||
|
|
||||||
|
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_3_label" aria-expanded="false">
|
||||||
|
<label class="md-nav__title" for="__nav_3">
|
||||||
|
<span class="md-nav__icon md-icon"></span>
|
||||||
|
|
||||||
|
|
||||||
|
Archiv
|
||||||
|
|
||||||
|
|
||||||
|
</label>
|
||||||
|
<ul class="md-nav__list" data-md-scrollfix>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<li class="md-nav__item">
|
||||||
|
<a href="/archive/" class="md-nav__link">
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<span class="md-ellipsis">
|
||||||
|
|
||||||
|
|
||||||
|
Übersicht
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</span>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</a>
|
||||||
|
</li>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</ul>
|
||||||
|
</nav>
|
||||||
|
|
||||||
|
</li>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</ul>
|
||||||
|
</nav>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<div class="md-sidebar md-sidebar--secondary" data-md-component="sidebar" data-md-type="toc" >
|
||||||
|
<div class="md-sidebar__scrollwrap">
|
||||||
|
<div class="md-sidebar__inner">
|
||||||
|
|
||||||
|
|
||||||
|
<nav class="md-nav md-nav--secondary" aria-label="Inhaltsverzeichnis">
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</nav>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<div class="md-content" data-md-component="content">
|
||||||
|
|
||||||
|
<article class="md-content__inner md-typeset">
|
||||||
|
|
||||||
|
<h1>404 - Not found</h1>
|
||||||
|
|
||||||
|
</article>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
|
||||||
|
<script>var target=document.getElementById(location.hash.slice(1));target&&target.name&&(target.checked=target.name.startsWith("__tabbed_"))</script>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
</main>
|
||||||
|
|
||||||
|
<footer class="md-footer">
|
||||||
|
|
||||||
|
<div class="md-footer-meta md-typeset">
|
||||||
|
<div class="md-footer-meta__inner md-grid">
|
||||||
|
<div class="md-copyright">
|
||||||
|
|
||||||
|
|
||||||
|
Made with
|
||||||
|
<a href="https://squidfunk.github.io/mkdocs-material/" target="_blank" rel="noopener">
|
||||||
|
Material for MkDocs
|
||||||
|
</a>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</footer>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
<div class="md-dialog" data-md-component="dialog">
|
||||||
|
<div class="md-dialog__inner md-typeset"></div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<script id="__config" type="application/json">{"annotate": null, "base": "/", "features": ["navigation.sections", "navigation.expand", "search.highlight"], "search": "/assets/javascripts/workers/search.2c215733.min.js", "tags": null, "translations": {"clipboard.copied": "In Zwischenablage kopiert", "clipboard.copy": "In Zwischenablage kopieren", "search.result.more.one": "1 weiteres Suchergebnis auf dieser Seite", "search.result.more.other": "# weitere Suchergebnisse auf dieser Seite", "search.result.none": "Keine Suchergebnisse", "search.result.one": "1 Suchergebnis", "search.result.other": "# Suchergebnisse", "search.result.placeholder": "Suchbegriff eingeben", "search.result.term.missing": "Es fehlt", "select.version": "Version ausw\u00e4hlen"}, "version": null}</script>
|
||||||
|
|
||||||
|
|
||||||
|
<script src="/assets/javascripts/bundle.79ae519e.min.js"></script>
|
||||||
|
|
||||||
|
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
100
CHANGELOG.md
100
CHANGELOG.md
@ -1,100 +0,0 @@
|
|||||||
# Changelog
|
|
||||||
|
|
||||||
Alle markanten Änderungen pro Release. Format an [Keep a Changelog](https://keepachangelog.com/de/1.1.0/) angelehnt, semantisches Versioning.
|
|
||||||
|
|
||||||
## [1.0.0] — 2026-04-21
|
|
||||||
|
|
||||||
Erstes konsolidiertes Release nach längerer 0.x-Entwicklungsphase. Live unter
|
|
||||||
<https://gwoe.toppyr.de/>.
|
|
||||||
|
|
||||||
### Hinzugefügt — Frontend (v2)
|
|
||||||
|
|
||||||
- **Komplettes Redesign** auf das ECOnGOOD Corporate Design (Manual Juni 2024) — Tokens-Datei, Avenir/Nunito-Sans-Stack, Phosphor-Icon-Set, Dark-Mode mit `data-theme`-Attribut (#114, #139)
|
|
||||||
- **AppShell** mit zwei-Spalten-Layout (Sidebar 230 px, Main), Drawer auf Mobile, Navigation in vier Gruppen LESEN/PRÜFEN/DATEN/ADMINISTRATION
|
|
||||||
- **Server-Side-Routing** für Antragsdetail (`/antrag/{drucksache}`), keine reine Client-Seite mehr
|
|
||||||
- **Login-Modal** in der Topbar mit Tabs Anmelden/Registrieren via Direct-Access-Grant — kein Keycloak-Redirect mehr (#129)
|
|
||||||
- **Keyboard-Shortcuts** j/k/Enter/Esc/?/⏎ im Listenmodus mit Help-Modal
|
|
||||||
- **Sort-Dropdown** mit acht Optionen (Score/Datum/Drs.-Nr./Titel je asc/desc), localStorage-persistiert
|
|
||||||
- **Antragsdetail vollständig** mit ScoreHero, Matrix-Mini 5×5 (klickbar mit Erklärungs-Modal), Programm-Treue-Tabelle pro Fraktion (auch ohne Zitate), §INS§/§DEL§-Redline-Parser, Versionshistorie, namentlichem Abstimmungsverhalten als Balken pro Fraktion (#106 Phase 1)
|
|
||||||
- **Bookmarks/Voting/Kommentare/Share/Re-Analyze** alle in v2-Detail integriert mit Auth-Modal-Fallback
|
|
||||||
- **Live-Landtag-Suche** als eigener Screen `/v2/landtag-suche`
|
|
||||||
- **Admin-Panel** mit drei Screens (Freischaltungen, Queue mit 5 s Auto-Refresh, Abos für alle User)
|
|
||||||
- **Open-Graph-Bilder** pro Antrag (1200×630 PNG, Playwright-gerendert, SHA-Cache) (#141)
|
|
||||||
|
|
||||||
### Hinzugefügt — Backend
|
|
||||||
|
|
||||||
- **16 Landesparlamente + Bundestag** als Adapter (BUND, NRW, BE, HH, BW, RP, LSA, MV, HB, HE, BY, SL, TH, BB, SN, SH; NI deferred wegen Login)
|
|
||||||
- **abgeordnetenwatch.de-Integration** Phase 1 für strukturierte Roll-Call-Votes — 28 977 BT-Votes in DB, Drucksachen-Match via 9 BL-spezifische URL-Patterns + Datum/Titel-Fallback (#106)
|
|
||||||
- **Drucksachen-Typen-Normalisierung** filtert Anträge/Gesetzentwürfe von Kleinen Anfragen etc. (#127)
|
|
||||||
- **Embeddings v3 → v4** Modell-Migration mit WRITE/READ-Pattern (ADR 0006)
|
|
||||||
- **DDD-Lightweight-Migration** Tag 1-4: `LlmBewerter`-Port, `QwenBewerter`-Adapter, drei Repositories (Antrag/Bewertung/Abonnement), Domain-Verhalten auf Pydantic-Modellen (ADR 0008, #136)
|
|
||||||
- **Mail-Digest** mit täglichem Cron 07:00, BL/Partei-Filter pro User-Abo (#124)
|
|
||||||
- **Monitoring-Scan** aller Adapter mit Kosten-Schätzung — Beobachtung ohne Auto-Fetch, Mail-Report mit „0-Kontext"-Hinweis (#135)
|
|
||||||
- **Merkliste server-seitig** mit Migration aus localStorage (#140)
|
|
||||||
- **Wahlprogramm-Auto-Download** halbautomatisch mit SHA-Gate, kuratierte URL-Liste, Admin-UI (#138)
|
|
||||||
- **Fehlende Wahlprogramme** automatisch im Assessment markiert + UI-Hinweis (#128)
|
|
||||||
- **Clustering** via Embedding-Nähe-Graph mit Bubble-Chart (#105)
|
|
||||||
- **Background-Queue** mit drei parallelen Workern, Graceful Shutdown 15 min, Job-Persistenz (#99)
|
|
||||||
- **Voting + Kommentare** mit Visibility-Modi (öffentlich/angemeldet/nur ich) (#94)
|
|
||||||
- **RSS/Atom-Feed** für neue Bewertungen (#125)
|
|
||||||
|
|
||||||
### Hinzugefügt — Tests & Doku
|
|
||||||
|
|
||||||
- **574 Tests, 13 skipped** — Unit-Suite < 2 s, plus Integration/E2E unter Markern
|
|
||||||
- **Bug-Regression-Tests** für fünf historische Fixes (PRAGMA-Cursor, JWT-azp, CDU-PDF, PFLICHT-FRAKTIONEN, NRW-Titel)
|
|
||||||
- **Live-Adapter-Tests** + Frontend-Cross-Validation + Citation-Substring-Tests (`pytest -m integration`)
|
|
||||||
- **Playwright-E2E-Tests** (`pytest -m e2e`)
|
|
||||||
- **Smoke-Test-Script** `scripts/smoke-test.sh` für Gesamt-Funktionsprüfung gegen Live-System
|
|
||||||
- **8 ADRs** dokumentiert, plus DDD-Bewertung (1 237 LOC) und Protokoll-Parser-v6-Machbarkeit (418 LOC)
|
|
||||||
- **Zugriffsrechte-Doc** mit 63 Routes × User-Status-Matrix
|
|
||||||
- **Doppel-Lizenz** Code MIT + Daten/Bewertungen CC-BY-4.0
|
|
||||||
|
|
||||||
### Geändert
|
|
||||||
|
|
||||||
- `/` zeigt jetzt v2-Frontend, classic unter `/classic` weiterhin erreichbar
|
|
||||||
- Auswertungen mit BL-Filter (#137 fix)
|
|
||||||
- Direkt-Verlinkbarkeit (`/antrag/{drs}`) als Permalinks ersetzen Query-Parameter (#132)
|
|
||||||
- Social-Media-Texte werden vom LLM erzeugt und in DB gespeichert (#133)
|
|
||||||
- v5-Prompt mit PFLICHT-FRAKTIONEN aller LT-Fraktionen, nicht nur Antragsteller+Regierung
|
|
||||||
- Citation-Binding server-seitig: Quellen-Label der Zitate werden gegen die tatsächlich abgerufenen Chunks rekonstruiert (ADR 0001)
|
|
||||||
- Mail-Digest-Template mit „0-Kontext"-Hinweis falls keine neuen Drucksachen seit letztem Scan
|
|
||||||
- Login als HttpOnly-Cookie + separate `rt`-Cookie für Refresh-Token (`/api/auth/logout`-Route für sauberen Cookie-Reset)
|
|
||||||
|
|
||||||
### Bekannte Einschränkungen
|
|
||||||
|
|
||||||
- **NI (Niedersachsen)** im Monitoring-Scan geskippt — NILAS-Portal ist Login-protected, HAR-Capture nötig (#22)
|
|
||||||
- **Saarland-Adapter** swallowt manche httpx-Exceptions tiefer im Code als der erste Fix-Layer (#142)
|
|
||||||
- **Drucksachen-Match in MV/BY/BB/TH/HH/SL** für abgeordnetenwatch-Polls noch lückenhaft — deren `field_intro`-HTML enthält keine PDF-Links, der Datum+Titel-Fallback hängt von vorheriger Indexierung ab
|
|
||||||
- **Plenarprotokoll-Parser v6** für nicht-namentliche Abstimmungen ist Phase 2, nicht in 1.0 (#106 follow-up)
|
|
||||||
- **DDD-Callsite-Migration** in `main.py` (~21 direkte Database-Aufrufe → Repository-Dependency-Injection) als Folge-PR offen (#136 follow-up)
|
|
||||||
|
|
||||||
### Sicherheit
|
|
||||||
|
|
||||||
- **Security-Headers** (CSP, X-Frame-Options, X-Content-Type-Options, Referrer-Policy, Permissions-Policy)
|
|
||||||
- **Rate-Limiting** auf teuren POST-Endpoints (10/min auf `/api/analyze-drucksache`)
|
|
||||||
- **Eingabe-Validatoren** (Drucksachen-Format-Regex, Such-Query-Längen-Cap)
|
|
||||||
- **JWT-Validation** über Keycloak JWKS, `azp`-Check statt `aud` für Public Clients (49c1b92)
|
|
||||||
|
|
||||||
### Statistik
|
|
||||||
|
|
||||||
- 11 789 LOC Python in `app/`
|
|
||||||
- 23 Module, 8 Templates-Verzeichnisse
|
|
||||||
- 71 produktive Bewertungen in der Live-DB
|
|
||||||
- 85 Wahlprogramme indexiert (Embeddings v4, ~50 000 Chunks)
|
|
||||||
- 28 977 abgeordnetenwatch-Votes
|
|
||||||
- 574 Tests, 0 Regressions
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## [0.x] — Pre-Release-Phase
|
|
||||||
|
|
||||||
Frühere Iterationen. Siehe `git log` für Detail-Historie. Wesentliche Meilensteine:
|
|
||||||
|
|
||||||
- v3 → v4 Embedding-Migration (#123)
|
|
||||||
- Clustering + Force-Graph (#105, #108)
|
|
||||||
- Bookmarks + Kommentare (#94)
|
|
||||||
- Methodik-/Transparenz-Seite (#96)
|
|
||||||
- Keycloak SSO (#43)
|
|
||||||
- Multi-BL-Adapter (#22 Reihe, #72-#87)
|
|
||||||
|
|
||||||
[1.0.0]: https://repo.toppyr.de/tobias/gwoe-antragspruefer/releases/tag/v1.0.0
|
|
||||||
57
DATA-LICENSE
57
DATA-LICENSE
@ -1,57 +0,0 @@
|
|||||||
Datenrechte für GWÖ-Antragsprüfer
|
|
||||||
================================================================================
|
|
||||||
|
|
||||||
Copyright (c) 2026 Tobias Rödel und Mitwirkende
|
|
||||||
|
|
||||||
Dieses Werk umfasst alle vom GWÖ-Antragsprüfer **erzeugten** Inhalte:
|
|
||||||
- Bewertungen (Assessments) im JSON-Format
|
|
||||||
- GWÖ-Score-Werte und Matrix-Zuordnungen
|
|
||||||
- Begründungstexte und Empfehlungen
|
|
||||||
- Verbesserungsvorschläge im Redline-Format
|
|
||||||
- Themen-Tags, Stärken/Schwächen-Listen
|
|
||||||
- Aggregations-Tabellen und Auswertungs-Daten
|
|
||||||
- Generierte PDF-Berichte
|
|
||||||
|
|
||||||
Diese Inhalte sind lizenziert unter der
|
|
||||||
|
|
||||||
Creative Commons Attribution 4.0 International License (CC BY 4.0)
|
|
||||||
|
|
||||||
https://creativecommons.org/licenses/by/4.0/deed.de
|
|
||||||
|
|
||||||
Du darfst:
|
|
||||||
- Teilen — das Material in jedwedem Format oder Medium vervielfältigen und
|
|
||||||
weiterverbreiten
|
|
||||||
- Bearbeiten — das Material remixen, verändern und darauf aufbauen
|
|
||||||
und zwar für beliebige Zwecke, auch kommerziell.
|
|
||||||
|
|
||||||
Unter folgenden Bedingungen:
|
|
||||||
- Namensnennung — Du musst angemessene Urheber- und Rechteangaben machen,
|
|
||||||
einen Link zur Lizenz beifügen und angeben, ob Änderungen vorgenommen
|
|
||||||
wurden. Empfohlene Quellangabe:
|
|
||||||
|
|
||||||
"GWÖ-Antragsprüfer · gwoe.toppyr.de · CC BY 4.0"
|
|
||||||
|
|
||||||
- Keine weiteren Einschränkungen — Du darfst keine zusätzlichen Klauseln
|
|
||||||
oder technische Verfahren einsetzen, die anderen rechtlich irgendetwas
|
|
||||||
untersagen, was die Lizenz erlaubt.
|
|
||||||
|
|
||||||
================================================================================
|
|
||||||
NICHT von dieser Lizenz gedeckt:
|
|
||||||
|
|
||||||
- Quellcode des GWÖ-Antragsprüfers selbst — siehe LICENSE (MIT).
|
|
||||||
|
|
||||||
- Original-Antrags-PDFs und Plenarprotokolle der Landesparlamente und des
|
|
||||||
Bundestags — diese unterliegen den jeweiligen Veröffentlichungs-
|
|
||||||
Bedingungen ihrer Quellen. Sie werden vom Antragsprüfer ausschließlich
|
|
||||||
zur Bewertung referenziert, nicht weiterverbreitet.
|
|
||||||
|
|
||||||
- Wahlprogramme und Grundsatzprogramme der politischen Parteien — diese
|
|
||||||
sind urheberrechtlich geschützt und gehören den jeweiligen Parteien.
|
|
||||||
Indexierte Snippets werden im Rahmen des Zitatrechts (§ 51 UrhG)
|
|
||||||
zur Verifikation der Bewertungen genutzt.
|
|
||||||
|
|
||||||
- Logos und CD-Elemente der Gemeinwohl-Ökonomie / ECOnGOOD — diese
|
|
||||||
unterliegen den Markenrichtlinien der ECOnGOOD-Föderation.
|
|
||||||
|
|
||||||
================================================================================
|
|
||||||
Kontakt für Lizenzfragen: mail@tobiasroedel.de
|
|
||||||
34
Dockerfile
34
Dockerfile
@ -1,34 +0,0 @@
|
|||||||
FROM python:3.12-slim
|
|
||||||
|
|
||||||
# Install system dependencies for WeasyPrint
|
|
||||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
||||||
libpango-1.0-0 \
|
|
||||||
libpangocairo-1.0-0 \
|
|
||||||
libgdk-pixbuf-2.0-0 \
|
|
||||||
libffi-dev \
|
|
||||||
shared-mime-info \
|
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
|
||||||
|
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
# Install Python dependencies
|
|
||||||
COPY requirements.txt .
|
|
||||||
RUN pip install --no-cache-dir -r requirements.txt
|
|
||||||
|
|
||||||
# Copy application code only (data/reports are mounted as volumes)
|
|
||||||
COPY app/ ./app/
|
|
||||||
|
|
||||||
# Create non-root user and directories (#119 Security)
|
|
||||||
RUN adduser --disabled-password --gecos '' --uid 1000 appuser \
|
|
||||||
&& mkdir -p /app/data /app/reports \
|
|
||||||
&& chown -R appuser:appuser /app
|
|
||||||
|
|
||||||
USER appuser
|
|
||||||
|
|
||||||
# Environment
|
|
||||||
ENV PYTHONUNBUFFERED=1
|
|
||||||
ENV PYTHONDONTWRITEBYTECODE=1
|
|
||||||
|
|
||||||
EXPOSE 8000
|
|
||||||
|
|
||||||
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
|
|
||||||
21
LICENSE
21
LICENSE
@ -1,21 +0,0 @@
|
|||||||
MIT License
|
|
||||||
|
|
||||||
Copyright (c) 2026 Tobias Rödel
|
|
||||||
|
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
||||||
of this software and associated documentation files (the "Software"), to deal
|
|
||||||
in the Software without restriction, including without limitation the rights
|
|
||||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
||||||
copies of the Software, and to permit persons to whom the Software is
|
|
||||||
furnished to do so, subject to the following conditions:
|
|
||||||
|
|
||||||
The above copyright notice and this permission notice shall be included in all
|
|
||||||
copies or substantial portions of the Software.
|
|
||||||
|
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
||||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
||||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
||||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
||||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
||||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
||||||
SOFTWARE.
|
|
||||||
147
README.md
147
README.md
@ -1,147 +0,0 @@
|
|||||||
# GWÖ-Antragsprüfer
|
|
||||||
|
|
||||||
**Automatische Gemeinwohl-Bilanzierung von Parlamentsanträgen nach der GWÖ-Matrix 2.0 für Gemeinden**
|
|
||||||
|
|
||||||

|
|
||||||

|
|
||||||

|
|
||||||

|
|
||||||

|
|
||||||
|
|
||||||
Live unter <https://gwoe.toppyr.de/>.
|
|
||||||
|
|
||||||
## Was macht das Tool?
|
|
||||||
|
|
||||||
Der GWÖ-Antragsprüfer analysiert Anträge aus deutschen Landesparlamenten und dem Bundestag und bewertet sie nach den Kriterien der **Gemeinwohl-Ökonomie (GWÖ)**:
|
|
||||||
|
|
||||||
- **GWÖ-Score (0–10)** — Wie gut entspricht der Antrag den GWÖ-Werten?
|
|
||||||
- **Matrix-Zuordnung** — Welche der 25 Felder der GWÖ-Matrix für Gemeinden werden adressiert?
|
|
||||||
- **Programm-Treue** — Passt der Antrag zum Wahl- und Grundsatzprogramm jeder Fraktion?
|
|
||||||
- **Verbesserungsvorschläge** — Konkrete Textänderungen mit GWÖ-Begründung im Redline-Format
|
|
||||||
- **Zitate mit Verifikation** — Belege aus den Wahl-/Grundsatzprogrammen, server-seitig gegen Original-Chunks geprüft (siehe ADR 0001)
|
|
||||||
|
|
||||||
## Aktive Datenquellen (Stand Release 1.0)
|
|
||||||
|
|
||||||
**16 Bundesländer + Bundestag** — alle aktiven Adapter:
|
|
||||||
|
|
||||||
| BL | Wahlperiode | Quelle |
|
|
||||||
|---|---|---|
|
|
||||||
| BUND | 21 (2025–2029) | bundestag.de DIP |
|
|
||||||
| BW | 17 (2021–2026) | PARLIS |
|
|
||||||
| BY | 19 (2023–2028) | Bayern Landtag |
|
|
||||||
| BE | 19 (2023–2026) | Berlin AGH |
|
|
||||||
| BB | 8 (2024–2029) | StarWeb |
|
|
||||||
| HB | 21 (2023–2027) | ParlDok |
|
|
||||||
| HH | 23 (2025–2029) | ParlDok |
|
|
||||||
| HE | 21 (2024–2029) | Hessen Landtag |
|
|
||||||
| MV | 8 (2021–2026) | ParlDok |
|
|
||||||
| NI | — | NILAS (login-protected, deferred) |
|
|
||||||
| NRW | 18 (2022–2027) | OPAL |
|
|
||||||
| RP | 18 (2021–2026) | StarWeb |
|
|
||||||
| LSA | 8 (2021–2026) | StarWeb |
|
|
||||||
| SL | 17 (2022–2027) | Umbraco |
|
|
||||||
| SN | 8 (2024–2029) | XML-Export |
|
|
||||||
| SH | 20 (2022–2027) | Schleswig-Holstein |
|
|
||||||
| TH | 8 (2024–2029) | StarWeb |
|
|
||||||
|
|
||||||
Plus **abgeordnetenwatch.de**-Integration für strukturierte namentliche Abstimmungen (alle 16 BL + BT).
|
|
||||||
|
|
||||||
## Features
|
|
||||||
|
|
||||||
### Frontend (v2, ECOnGOOD-CD)
|
|
||||||
|
|
||||||
- **Listenansicht** mit Score-Band-Filter, BL-Chip-Filter, Sort-Dropdown (8 Optionen), Live-Suche
|
|
||||||
- **Antragsdetail** mit ScoreHero, Matrix 5×5, Zitaten, Redline-Diff, Programm-Treue pro Fraktion, Versionshistorie, namentlichem Abstimmungsverhalten (wenn vorhanden)
|
|
||||||
- **Bookmark-Liste** (server-seitig pro User), **Kommentare**, **Voting**, **Share-Buttons** (Threads/X/Mastodon mit LLM-Texten), **Re-Analyze**
|
|
||||||
- **Auswertungen** mit BL×Partei-Matrix, Themen×Fraktion-Heatmap, Cluster-Bubble-Chart
|
|
||||||
- **Tag-Cloud**, **Cluster-Liste**, **Landtag-Live-Suche**, **Methodik**, **Quellen**
|
|
||||||
- **Admin-Panel** Freischaltungen / Queue / Abos / Wahlprogramme
|
|
||||||
- **Dark-Mode**, **Phosphor-Icons**, Avenir/Nunito-Sans, **Keyboard-Shortcuts** (j/k/Enter/Esc/?/⏎)
|
|
||||||
|
|
||||||
### Backend
|
|
||||||
|
|
||||||
- **FastAPI** + Jinja2 + Vanilla JS (kein Build-Tool)
|
|
||||||
- **SQLite** mit aiosqlite (Source of Truth)
|
|
||||||
- **Qwen-Plus** (DashScope) für die LLM-Bewertung — austauschbar via `LlmBewerter`-Port (ADR 0008)
|
|
||||||
- **Embeddings v4** für die Zitat-Verifikation (ADR 0006)
|
|
||||||
- **Keycloak SSO** mit Direct-Access-Grant (Login-Modal in der App, kein Redirect)
|
|
||||||
- **Background-Queue** mit 3 parallelen Workern + Graceful Shutdown
|
|
||||||
- **Daily-Digest-Mail** für Abonnent:innen
|
|
||||||
- **Monitoring-Scan** aller Adapter mit Kosten-Schätzung — Beobachtung ohne Auto-Fetch
|
|
||||||
- **OG-Cards** (Open-Graph-Bilder pro Antrag, Playwright-gerendert)
|
|
||||||
- **WeasyPrint** für PDF-Reports
|
|
||||||
|
|
||||||
### Tests
|
|
||||||
|
|
||||||
- **574 Tests, 13 skipped** — Unit + Integration + Property + Bug-Regression + DDD
|
|
||||||
- Live-Adapter-Tests gegen alle 17 Quellen (`pytest -m integration`)
|
|
||||||
- Citation-Substring-Verification gegen Original-PDFs
|
|
||||||
- E2E-Browser-Tests via Playwright (`pytest -m e2e`)
|
|
||||||
|
|
||||||
## Architektur
|
|
||||||
|
|
||||||
Detailliert in [`docs/`](docs/):
|
|
||||||
|
|
||||||
- [`docs/adr/`](docs/adr/) — Architecture Decision Records (8 ADRs)
|
|
||||||
- [`docs/analysen/ddd-bewertung.md`](docs/analysen/ddd-bewertung.md) — DDD-Analyse + Migrations-Roadmap
|
|
||||||
- [`docs/reference/zugriffsrechte.md`](docs/reference/zugriffsrechte.md) — 63 Routes × User-Status-Matrix
|
|
||||||
- [`docs/reference/api.md`](docs/reference/api.md) — API-Reference
|
|
||||||
|
|
||||||
DDD-Lightweight-Migration ist **Tag 1-4 abgeschlossen** (Ports, Adapter, Repositories, Domain-Verhalten — siehe ADR 0008). Callsite-Migration in `main.py` ist Folge-PR.
|
|
||||||
|
|
||||||
## Schnellstart
|
|
||||||
|
|
||||||
### Voraussetzungen
|
|
||||||
|
|
||||||
- Docker + Docker Compose
|
|
||||||
- Python 3.12 (für lokale Tests)
|
|
||||||
- DashScope API-Key (Qwen)
|
|
||||||
- Keycloak (optional, für Login)
|
|
||||||
|
|
||||||
### Installation
|
|
||||||
|
|
||||||
```bash
|
|
||||||
git clone https://repo.toppyr.de/tobias/gwoe-antragspruefer
|
|
||||||
cd gwoe-antragspruefer/webapp
|
|
||||||
cp .env.example .env # API-Keys eintragen
|
|
||||||
docker compose up -d --build
|
|
||||||
```
|
|
||||||
|
|
||||||
App auf <http://localhost:8000>.
|
|
||||||
|
|
||||||
### Tests
|
|
||||||
|
|
||||||
```bash
|
|
||||||
python3 -m pytest tests/ -q # Unit-Suite (574 Tests, < 2 s)
|
|
||||||
python3 -m pytest tests/ -m integration # Live-Adapter-Tests (langsam)
|
|
||||||
./scripts/smoke-test.sh # Gesamt-Funktionsprüfung gegen Live
|
|
||||||
```
|
|
||||||
|
|
||||||
### Deploy (Server)
|
|
||||||
|
|
||||||
```bash
|
|
||||||
./scripts/deploy.sh # mit Uptime-Kuma-Wartungsmodus
|
|
||||||
./scripts/run-digest.sh # Daily-Mail-Digest (Cron 07:00)
|
|
||||||
./scripts/run-monitoring-scan.sh # Monitoring-Scan (manuell oder Cron)
|
|
||||||
```
|
|
||||||
|
|
||||||
## Lizenz
|
|
||||||
|
|
||||||
Zwei getrennte Lizenzen:
|
|
||||||
|
|
||||||
- **Quellcode** — [MIT](LICENSE)
|
|
||||||
- **Bewertungs-Daten und -Berichte** (Assessments, Matrix-Zuordnungen, Verbesserungsvorschläge, Themen-Tags etc.) — [CC BY 4.0](DATA-LICENSE)
|
|
||||||
|
|
||||||
Wahlprogramme und Antrags-PDFs der Parlamente unterliegen der jeweiligen Urheber-Lizenz der Quelle und werden hier nur zur Verifikation referenziert.
|
|
||||||
|
|
||||||
## Mitwirken
|
|
||||||
|
|
||||||
Issues unter <https://repo.toppyr.de/tobias/gwoe-antragspruefer>. Pull Requests willkommen — beachte ADR 0004 (Deployment-Workflow) und die Test-Konventionen in `pytest.ini`.
|
|
||||||
|
|
||||||
## Statistiken (Stand Release 1.0)
|
|
||||||
|
|
||||||
- 16 BL + Bundestag aktiv
|
|
||||||
- 85 Wahlprogramme indexiert (Embeddings v4)
|
|
||||||
- 71 produktive Bewertungen in der Live-DB
|
|
||||||
- 28 977 abgeordnetenwatch-Votes (BUND)
|
|
||||||
- 11 789 LOC Python in `app/`
|
|
||||||
1059
adr/0001-llm-citation-binding/index.html
Normal file
1059
adr/0001-llm-citation-binding/index.html
Normal file
File diff suppressed because it is too large
Load Diff
1088
adr/0002-adapter-architecture/index.html
Normal file
1088
adr/0002-adapter-architecture/index.html
Normal file
File diff suppressed because it is too large
Load Diff
1036
adr/0003-citation-property-tests/index.html
Normal file
1036
adr/0003-citation-property-tests/index.html
Normal file
File diff suppressed because it is too large
Load Diff
1155
adr/0004-deployment-workflow/index.html
Normal file
1155
adr/0004-deployment-workflow/index.html
Normal file
File diff suppressed because it is too large
Load Diff
838
adr/index.html
Normal file
838
adr/index.html
Normal file
@ -0,0 +1,838 @@
|
|||||||
|
|
||||||
|
<!doctype html>
|
||||||
|
<html lang="de" class="no-js">
|
||||||
|
<head>
|
||||||
|
|
||||||
|
<meta charset="utf-8">
|
||||||
|
<meta name="viewport" content="width=device-width,initial-scale=1">
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<link rel="canonical" href="https://docs.gwoe.toppyr.de/adr/">
|
||||||
|
|
||||||
|
|
||||||
|
<link rel="prev" href="..">
|
||||||
|
|
||||||
|
|
||||||
|
<link rel="next" href="0001-llm-citation-binding/">
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<link rel="icon" href="../assets/images/favicon.png">
|
||||||
|
<meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.7.6">
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<title>Übersicht - GWÖ-Antragsprüfer Docs</title>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<link rel="stylesheet" href="../assets/stylesheets/main.484c7ddc.min.css">
|
||||||
|
|
||||||
|
|
||||||
|
<link rel="stylesheet" href="../assets/stylesheets/palette.ab4e12ef.min.css">
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
|
||||||
|
<link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Roboto:300,300i,400,400i,700,700i%7CRoboto+Mono:400,400i,700,700i&display=fallback">
|
||||||
|
<style>:root{--md-text-font:"Roboto";--md-code-font:"Roboto Mono"}</style>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<script>__md_scope=new URL("..",location),__md_hash=e=>[...e].reduce(((e,_)=>(e<<5)-e+_.charCodeAt(0)),0),__md_get=(e,_=localStorage,t=__md_scope)=>JSON.parse(_.getItem(t.pathname+"."+e)),__md_set=(e,_,t=localStorage,a=__md_scope)=>{try{t.setItem(a.pathname+"."+e,JSON.stringify(_))}catch(e){}}</script>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</head>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<body dir="ltr" data-md-color-scheme="default" data-md-color-primary="teal" data-md-color-accent="light-green">
|
||||||
|
|
||||||
|
|
||||||
|
<input class="md-toggle" data-md-toggle="drawer" type="checkbox" id="__drawer" autocomplete="off">
|
||||||
|
<input class="md-toggle" data-md-toggle="search" type="checkbox" id="__search" autocomplete="off">
|
||||||
|
<label class="md-overlay" for="__drawer"></label>
|
||||||
|
<div data-md-component="skip">
|
||||||
|
|
||||||
|
|
||||||
|
<a href="#architecture-decision-records-adrs" class="md-skip">
|
||||||
|
Zum Inhalt
|
||||||
|
</a>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
<div data-md-component="announce">
|
||||||
|
|
||||||
|
</div>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<header class="md-header md-header--shadow" data-md-component="header">
|
||||||
|
<nav class="md-header__inner md-grid" aria-label="Kopfzeile">
|
||||||
|
<a href=".." title="GWÖ-Antragsprüfer Docs" class="md-header__button md-logo" aria-label="GWÖ-Antragsprüfer Docs" data-md-component="logo">
|
||||||
|
|
||||||
|
|
||||||
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3 3 3 0 0 0 3 3m0 3.54C9.64 9.35 6.5 8 3 8v11c3.5 0 6.64 1.35 9 3.54 2.36-2.19 5.5-3.54 9-3.54V8c-3.5 0-6.64 1.35-9 3.54"/></svg>
|
||||||
|
|
||||||
|
</a>
|
||||||
|
<label class="md-header__button md-icon" for="__drawer">
|
||||||
|
|
||||||
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M3 6h18v2H3zm0 5h18v2H3zm0 5h18v2H3z"/></svg>
|
||||||
|
</label>
|
||||||
|
<div class="md-header__title" data-md-component="header-title">
|
||||||
|
<div class="md-header__ellipsis">
|
||||||
|
<div class="md-header__topic">
|
||||||
|
<span class="md-ellipsis">
|
||||||
|
GWÖ-Antragsprüfer Docs
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
<div class="md-header__topic" data-md-component="header-topic">
|
||||||
|
<span class="md-ellipsis">
|
||||||
|
|
||||||
|
Übersicht
|
||||||
|
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<label class="md-header__button md-icon" for="__search">
|
||||||
|
|
||||||
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.52 6.52 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5"/></svg>
|
||||||
|
</label>
|
||||||
|
<div class="md-search" data-md-component="search" role="dialog">
|
||||||
|
<label class="md-search__overlay" for="__search"></label>
|
||||||
|
<div class="md-search__inner" role="search">
|
||||||
|
<form class="md-search__form" name="search">
|
||||||
|
<input type="text" class="md-search__input" name="query" aria-label="Suche" placeholder="Suche" autocapitalize="off" autocorrect="off" autocomplete="off" spellcheck="false" data-md-component="search-query" required>
|
||||||
|
<label class="md-search__icon md-icon" for="__search">
|
||||||
|
|
||||||
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.52 6.52 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5"/></svg>
|
||||||
|
|
||||||
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11z"/></svg>
|
||||||
|
</label>
|
||||||
|
<nav class="md-search__options" aria-label="Suche">
|
||||||
|
|
||||||
|
<button type="reset" class="md-search__icon md-icon" title="Zurücksetzen" aria-label="Zurücksetzen" tabindex="-1">
|
||||||
|
|
||||||
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M19 6.41 17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12z"/></svg>
|
||||||
|
</button>
|
||||||
|
</nav>
|
||||||
|
|
||||||
|
</form>
|
||||||
|
<div class="md-search__output">
|
||||||
|
<div class="md-search__scrollwrap" tabindex="0" data-md-scrollfix>
|
||||||
|
<div class="md-search-result" data-md-component="search-result">
|
||||||
|
<div class="md-search-result__meta">
|
||||||
|
Suche wird initialisiert
|
||||||
|
</div>
|
||||||
|
<ol class="md-search-result__list" role="presentation"></ol>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<div class="md-header__source">
|
||||||
|
<a href="https://repo.toppyr.de/tobias/gwoe-antragspruefer" title="Zum Repository" class="md-source" data-md-component="source">
|
||||||
|
<div class="md-source__icon md-icon">
|
||||||
|
|
||||||
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M439.6 236.1 244 40.5c-5.4-5.5-12.8-8.5-20.4-8.5s-15 3-20.4 8.4L162.5 81l51.5 51.5c27.1-9.1 52.7 16.8 43.4 43.7l49.7 49.7c34.2-11.8 61.2 31 35.5 56.7-26.5 26.5-70.2-2.9-56-37.3L240.3 199v121.9c25.3 12.5 22.3 41.8 9.1 55-6.4 6.4-15.2 10.1-24.3 10.1s-17.8-3.6-24.3-10.1c-17.6-17.6-11.1-46.9 11.2-56v-123c-20.8-8.5-24.6-30.7-18.6-45L142.6 101 8.5 235.1C3 240.6 0 247.9 0 255.5s3 15 8.5 20.4l195.6 195.7c5.4 5.4 12.7 8.4 20.4 8.4s15-3 20.4-8.4l194.7-194.7c5.4-5.4 8.4-12.8 8.4-20.4s-3-15-8.4-20.4"/></svg>
|
||||||
|
</div>
|
||||||
|
<div class="md-source__repository">
|
||||||
|
tobias/gwoe-antragspruefer
|
||||||
|
</div>
|
||||||
|
</a>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
</nav>
|
||||||
|
|
||||||
|
</header>
|
||||||
|
|
||||||
|
<div class="md-container" data-md-component="container">
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<main class="md-main" data-md-component="main">
|
||||||
|
<div class="md-main__inner md-grid">
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<div class="md-sidebar md-sidebar--primary" data-md-component="sidebar" data-md-type="navigation" >
|
||||||
|
<div class="md-sidebar__scrollwrap">
|
||||||
|
<div class="md-sidebar__inner">
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<nav class="md-nav md-nav--primary" aria-label="Navigation" data-md-level="0">
|
||||||
|
<label class="md-nav__title" for="__drawer">
|
||||||
|
<a href=".." title="GWÖ-Antragsprüfer Docs" class="md-nav__button md-logo" aria-label="GWÖ-Antragsprüfer Docs" data-md-component="logo">
|
||||||
|
|
||||||
|
|
||||||
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3 3 3 0 0 0 3 3m0 3.54C9.64 9.35 6.5 8 3 8v11c3.5 0 6.64 1.35 9 3.54 2.36-2.19 5.5-3.54 9-3.54V8c-3.5 0-6.64 1.35-9 3.54"/></svg>
|
||||||
|
|
||||||
|
</a>
|
||||||
|
GWÖ-Antragsprüfer Docs
|
||||||
|
</label>
|
||||||
|
|
||||||
|
<div class="md-nav__source">
|
||||||
|
<a href="https://repo.toppyr.de/tobias/gwoe-antragspruefer" title="Zum Repository" class="md-source" data-md-component="source">
|
||||||
|
<div class="md-source__icon md-icon">
|
||||||
|
|
||||||
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M439.6 236.1 244 40.5c-5.4-5.5-12.8-8.5-20.4-8.5s-15 3-20.4 8.4L162.5 81l51.5 51.5c27.1-9.1 52.7 16.8 43.4 43.7l49.7 49.7c34.2-11.8 61.2 31 35.5 56.7-26.5 26.5-70.2-2.9-56-37.3L240.3 199v121.9c25.3 12.5 22.3 41.8 9.1 55-6.4 6.4-15.2 10.1-24.3 10.1s-17.8-3.6-24.3-10.1c-17.6-17.6-11.1-46.9 11.2-56v-123c-20.8-8.5-24.6-30.7-18.6-45L142.6 101 8.5 235.1C3 240.6 0 247.9 0 255.5s3 15 8.5 20.4l195.6 195.7c5.4 5.4 12.7 8.4 20.4 8.4s15-3 20.4-8.4l194.7-194.7c5.4-5.4 8.4-12.8 8.4-20.4s-3-15-8.4-20.4"/></svg>
|
||||||
|
</div>
|
||||||
|
<div class="md-source__repository">
|
||||||
|
tobias/gwoe-antragspruefer
|
||||||
|
</div>
|
||||||
|
</a>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<ul class="md-nav__list" data-md-scrollfix>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<li class="md-nav__item">
|
||||||
|
<a href=".." class="md-nav__link">
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<span class="md-ellipsis">
|
||||||
|
|
||||||
|
|
||||||
|
Start
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</span>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</a>
|
||||||
|
</li>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<li class="md-nav__item md-nav__item--active md-nav__item--section md-nav__item--nested">
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_2" checked>
|
||||||
|
|
||||||
|
|
||||||
|
<label class="md-nav__link" for="__nav_2" id="__nav_2_label" tabindex="">
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<span class="md-ellipsis">
|
||||||
|
|
||||||
|
|
||||||
|
Architecture Decision Records
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</span>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<span class="md-nav__icon md-icon"></span>
|
||||||
|
</label>
|
||||||
|
|
||||||
|
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_2_label" aria-expanded="true">
|
||||||
|
<label class="md-nav__title" for="__nav_2">
|
||||||
|
<span class="md-nav__icon md-icon"></span>
|
||||||
|
|
||||||
|
|
||||||
|
Architecture Decision Records
|
||||||
|
|
||||||
|
|
||||||
|
</label>
|
||||||
|
<ul class="md-nav__list" data-md-scrollfix>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<li class="md-nav__item md-nav__item--active">
|
||||||
|
|
||||||
|
<input class="md-nav__toggle md-toggle" type="checkbox" id="__toc">
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<label class="md-nav__link md-nav__link--active" for="__toc">
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<span class="md-ellipsis">
|
||||||
|
|
||||||
|
|
||||||
|
Übersicht
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</span>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<span class="md-nav__icon md-icon"></span>
|
||||||
|
</label>
|
||||||
|
|
||||||
|
<a href="./" class="md-nav__link md-nav__link--active">
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<span class="md-ellipsis">
|
||||||
|
|
||||||
|
|
||||||
|
Übersicht
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</span>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</a>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<nav class="md-nav md-nav--secondary" aria-label="Inhaltsverzeichnis">
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<label class="md-nav__title" for="__toc">
|
||||||
|
<span class="md-nav__icon md-icon"></span>
|
||||||
|
Inhaltsverzeichnis
|
||||||
|
</label>
|
||||||
|
<ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
|
||||||
|
|
||||||
|
<li class="md-nav__item">
|
||||||
|
<a href="#workflow" class="md-nav__link">
|
||||||
|
<span class="md-ellipsis">
|
||||||
|
|
||||||
|
Workflow
|
||||||
|
|
||||||
|
</span>
|
||||||
|
</a>
|
||||||
|
|
||||||
|
</li>
|
||||||
|
|
||||||
|
<li class="md-nav__item">
|
||||||
|
<a href="#index" class="md-nav__link">
|
||||||
|
<span class="md-ellipsis">
|
||||||
|
|
||||||
|
Index
|
||||||
|
|
||||||
|
</span>
|
||||||
|
</a>
|
||||||
|
|
||||||
|
</li>
|
||||||
|
|
||||||
|
<li class="md-nav__item">
|
||||||
|
<a href="#wann-adr-wann-nicht" class="md-nav__link">
|
||||||
|
<span class="md-ellipsis">
|
||||||
|
|
||||||
|
Wann ADR, wann nicht
|
||||||
|
|
||||||
|
</span>
|
||||||
|
</a>
|
||||||
|
|
||||||
|
</li>
|
||||||
|
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
</nav>
|
||||||
|
|
||||||
|
</li>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<li class="md-nav__item">
|
||||||
|
<a href="0001-llm-citation-binding/" class="md-nav__link">
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<span class="md-ellipsis">
|
||||||
|
|
||||||
|
|
||||||
|
0001 LLM-Citation-Binding
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</span>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</a>
|
||||||
|
</li>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<li class="md-nav__item">
|
||||||
|
<a href="0002-adapter-architecture/" class="md-nav__link">
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<span class="md-ellipsis">
|
||||||
|
|
||||||
|
|
||||||
|
0002 Adapter-Architektur
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</span>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</a>
|
||||||
|
</li>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<li class="md-nav__item">
|
||||||
|
<a href="0003-citation-property-tests/" class="md-nav__link">
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<span class="md-ellipsis">
|
||||||
|
|
||||||
|
|
||||||
|
0003 Citation-Property-Tests
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</span>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</a>
|
||||||
|
</li>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<li class="md-nav__item">
|
||||||
|
<a href="0004-deployment-workflow/" class="md-nav__link">
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<span class="md-ellipsis">
|
||||||
|
|
||||||
|
|
||||||
|
0004 Deployment-Workflow
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</span>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</a>
|
||||||
|
</li>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</ul>
|
||||||
|
</nav>
|
||||||
|
|
||||||
|
</li>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<li class="md-nav__item md-nav__item--section md-nav__item--nested">
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_3" >
|
||||||
|
|
||||||
|
|
||||||
|
<label class="md-nav__link" for="__nav_3" id="__nav_3_label" tabindex="">
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<span class="md-ellipsis">
|
||||||
|
|
||||||
|
|
||||||
|
Archiv
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</span>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<span class="md-nav__icon md-icon"></span>
|
||||||
|
</label>
|
||||||
|
|
||||||
|
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_3_label" aria-expanded="false">
|
||||||
|
<label class="md-nav__title" for="__nav_3">
|
||||||
|
<span class="md-nav__icon md-icon"></span>
|
||||||
|
|
||||||
|
|
||||||
|
Archiv
|
||||||
|
|
||||||
|
|
||||||
|
</label>
|
||||||
|
<ul class="md-nav__list" data-md-scrollfix>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<li class="md-nav__item">
|
||||||
|
<a href="../archive/" class="md-nav__link">
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<span class="md-ellipsis">
|
||||||
|
|
||||||
|
|
||||||
|
Übersicht
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</span>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</a>
|
||||||
|
</li>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</ul>
|
||||||
|
</nav>
|
||||||
|
|
||||||
|
</li>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</ul>
|
||||||
|
</nav>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<div class="md-sidebar md-sidebar--secondary" data-md-component="sidebar" data-md-type="toc" >
|
||||||
|
<div class="md-sidebar__scrollwrap">
|
||||||
|
<div class="md-sidebar__inner">
|
||||||
|
|
||||||
|
|
||||||
|
<nav class="md-nav md-nav--secondary" aria-label="Inhaltsverzeichnis">
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<label class="md-nav__title" for="__toc">
|
||||||
|
<span class="md-nav__icon md-icon"></span>
|
||||||
|
Inhaltsverzeichnis
|
||||||
|
</label>
|
||||||
|
<ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
|
||||||
|
|
||||||
|
<li class="md-nav__item">
|
||||||
|
<a href="#workflow" class="md-nav__link">
|
||||||
|
<span class="md-ellipsis">
|
||||||
|
|
||||||
|
Workflow
|
||||||
|
|
||||||
|
</span>
|
||||||
|
</a>
|
||||||
|
|
||||||
|
</li>
|
||||||
|
|
||||||
|
<li class="md-nav__item">
|
||||||
|
<a href="#index" class="md-nav__link">
|
||||||
|
<span class="md-ellipsis">
|
||||||
|
|
||||||
|
Index
|
||||||
|
|
||||||
|
</span>
|
||||||
|
</a>
|
||||||
|
|
||||||
|
</li>
|
||||||
|
|
||||||
|
<li class="md-nav__item">
|
||||||
|
<a href="#wann-adr-wann-nicht" class="md-nav__link">
|
||||||
|
<span class="md-ellipsis">
|
||||||
|
|
||||||
|
Wann ADR, wann nicht
|
||||||
|
|
||||||
|
</span>
|
||||||
|
</a>
|
||||||
|
|
||||||
|
</li>
|
||||||
|
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
</nav>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<div class="md-content" data-md-component="content">
|
||||||
|
|
||||||
|
<article class="md-content__inner md-typeset">
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<h1 id="architecture-decision-records-adrs">Architecture Decision Records (ADRs)<a class="headerlink" href="#architecture-decision-records-adrs" title="Permanent link">¶</a></h1>
|
||||||
|
<p>ADRs dokumentieren signifikante Architektur-Entscheidungen mit Kontext, Optionen
|
||||||
|
und Konsequenzen. Format inspiriert von <a href="https://cognitect.com/blog/2011/11/15/documenting-architecture-decisions">Michael Nygard</a>.</p>
|
||||||
|
<h2 id="workflow">Workflow<a class="headerlink" href="#workflow" title="Permanent link">¶</a></h2>
|
||||||
|
<ol>
|
||||||
|
<li>Neue Entscheidung steht an → Kopie von <code>template.md</code> mit nächster freier
|
||||||
|
Nummer (<code>NNNN-kebap-titel.md</code>).</li>
|
||||||
|
<li>Status <code>proposed</code> → diskutiert in Issue/PR → bei Akzeptanz auf <code>accepted</code>.</li>
|
||||||
|
<li><strong>Niemals editieren nach <code>accepted</code>.</strong> Wenn eine Entscheidung sich ändert,
|
||||||
|
neuer ADR mit <code>Supersedes: NNNN-…</code> im Header und der alte ADR bekommt
|
||||||
|
<code>Superseded by: MMMM-…</code>.</li>
|
||||||
|
<li>Status <code>deprecated</code> für Entscheidungen, die ohne Nachfolger auslaufen.</li>
|
||||||
|
</ol>
|
||||||
|
<h2 id="index">Index<a class="headerlink" href="#index" title="Permanent link">¶</a></h2>
|
||||||
|
<table>
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th>ID</th>
|
||||||
|
<th>Titel</th>
|
||||||
|
<th>Status</th>
|
||||||
|
<th>Datum</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
<tr>
|
||||||
|
<td><a href="0001-llm-citation-binding/">0001</a></td>
|
||||||
|
<td>LLM-Citations server-seitig binden statt prompt-seitig</td>
|
||||||
|
<td>accepted</td>
|
||||||
|
<td>2026-04-10</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td><a href="0002-adapter-architecture/">0002</a></td>
|
||||||
|
<td>Adapter-Pattern mit ParlamentAdapter-Basisklasse + Registry</td>
|
||||||
|
<td>accepted</td>
|
||||||
|
<td>2026-04-10</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td><a href="0003-citation-property-tests/">0003</a></td>
|
||||||
|
<td>Sub-D Property-Verification: Zitate als Substring der zitierten PDF-Seite</td>
|
||||||
|
<td>accepted</td>
|
||||||
|
<td>2026-04-10</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td><a href="0004-deployment-workflow/">0004</a></td>
|
||||||
|
<td>Docker Compose Deploy mit DB-/Reports-Volume und SN-XML-Sonderpfad</td>
|
||||||
|
<td>accepted</td>
|
||||||
|
<td>2026-04-10</td>
|
||||||
|
</tr>
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
<h2 id="wann-adr-wann-nicht">Wann ADR, wann nicht<a class="headerlink" href="#wann-adr-wann-nicht" title="Permanent link">¶</a></h2>
|
||||||
|
<table>
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th>ADR-würdig</th>
|
||||||
|
<th>nicht ADR-würdig</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
<tr>
|
||||||
|
<td>Wahl zwischen mehreren plausiblen Architekturen mit Trade-offs</td>
|
||||||
|
<td>Bug-Fix</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Strukturelle Konsequenzen für mehrere Module</td>
|
||||||
|
<td>Refactoring innerhalb eines Moduls</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Reverse-Engineering-Findings die andere Adapter beeinflussen</td>
|
||||||
|
<td>Stiländerungen, Linting-Konventionen</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Neue externe Abhängigkeiten oder APIs</td>
|
||||||
|
<td>Dependency-Bumps ohne API-Änderung</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Workflow-Konventionen die mehrere Sessions überdauern müssen</td>
|
||||||
|
<td>Tagesgeschäft, Issue-Tracking</td>
|
||||||
|
</tr>
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
<p>Faustregel: Wenn ein neuer Kollege (oder eine neue Session) die Entscheidung
|
||||||
|
sonst rückgängig machen würde, gehört sie in einen ADR.</p>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</article>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
|
||||||
|
<script>var target=document.getElementById(location.hash.slice(1));target&&target.name&&(target.checked=target.name.startsWith("__tabbed_"))</script>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
</main>
|
||||||
|
|
||||||
|
<footer class="md-footer">
|
||||||
|
|
||||||
|
<div class="md-footer-meta md-typeset">
|
||||||
|
<div class="md-footer-meta__inner md-grid">
|
||||||
|
<div class="md-copyright">
|
||||||
|
|
||||||
|
|
||||||
|
Made with
|
||||||
|
<a href="https://squidfunk.github.io/mkdocs-material/" target="_blank" rel="noopener">
|
||||||
|
Material for MkDocs
|
||||||
|
</a>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</footer>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
<div class="md-dialog" data-md-component="dialog">
|
||||||
|
<div class="md-dialog__inner md-typeset"></div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<script id="__config" type="application/json">{"annotate": null, "base": "..", "features": ["navigation.sections", "navigation.expand", "search.highlight"], "search": "../assets/javascripts/workers/search.2c215733.min.js", "tags": null, "translations": {"clipboard.copied": "In Zwischenablage kopiert", "clipboard.copy": "In Zwischenablage kopieren", "search.result.more.one": "1 weiteres Suchergebnis auf dieser Seite", "search.result.more.other": "# weitere Suchergebnisse auf dieser Seite", "search.result.none": "Keine Suchergebnisse", "search.result.one": "1 Suchergebnis", "search.result.other": "# Suchergebnisse", "search.result.placeholder": "Suchbegriff eingeben", "search.result.term.missing": "Es fehlt", "select.version": "Version ausw\u00e4hlen"}, "version": null}</script>
|
||||||
|
|
||||||
|
|
||||||
|
<script src="../assets/javascripts/bundle.79ae519e.min.js"></script>
|
||||||
|
|
||||||
|
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
807
adr/template/index.html
Normal file
807
adr/template/index.html
Normal file
@ -0,0 +1,807 @@
|
|||||||
|
|
||||||
|
<!doctype html>
|
||||||
|
<html lang="de" class="no-js">
|
||||||
|
<head>
|
||||||
|
|
||||||
|
<meta charset="utf-8">
|
||||||
|
<meta name="viewport" content="width=device-width,initial-scale=1">
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<link rel="canonical" href="https://docs.gwoe.toppyr.de/adr/template/">
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<link rel="icon" href="../../assets/images/favicon.png">
|
||||||
|
<meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.7.6">
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<title>NNNN — Titel der Entscheidung - GWÖ-Antragsprüfer Docs</title>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<link rel="stylesheet" href="../../assets/stylesheets/main.484c7ddc.min.css">
|
||||||
|
|
||||||
|
|
||||||
|
<link rel="stylesheet" href="../../assets/stylesheets/palette.ab4e12ef.min.css">
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
|
||||||
|
<link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Roboto:300,300i,400,400i,700,700i%7CRoboto+Mono:400,400i,700,700i&display=fallback">
|
||||||
|
<style>:root{--md-text-font:"Roboto";--md-code-font:"Roboto Mono"}</style>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<script>__md_scope=new URL("../..",location),__md_hash=e=>[...e].reduce(((e,_)=>(e<<5)-e+_.charCodeAt(0)),0),__md_get=(e,_=localStorage,t=__md_scope)=>JSON.parse(_.getItem(t.pathname+"."+e)),__md_set=(e,_,t=localStorage,a=__md_scope)=>{try{t.setItem(a.pathname+"."+e,JSON.stringify(_))}catch(e){}}</script>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</head>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<body dir="ltr" data-md-color-scheme="default" data-md-color-primary="teal" data-md-color-accent="light-green">
|
||||||
|
|
||||||
|
|
||||||
|
<input class="md-toggle" data-md-toggle="drawer" type="checkbox" id="__drawer" autocomplete="off">
|
||||||
|
<input class="md-toggle" data-md-toggle="search" type="checkbox" id="__search" autocomplete="off">
|
||||||
|
<label class="md-overlay" for="__drawer"></label>
|
||||||
|
<div data-md-component="skip">
|
||||||
|
|
||||||
|
|
||||||
|
<a href="#nnnn-titel-der-entscheidung" class="md-skip">
|
||||||
|
Zum Inhalt
|
||||||
|
</a>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
<div data-md-component="announce">
|
||||||
|
|
||||||
|
</div>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<header class="md-header md-header--shadow" data-md-component="header">
|
||||||
|
<nav class="md-header__inner md-grid" aria-label="Kopfzeile">
|
||||||
|
<a href="../.." title="GWÖ-Antragsprüfer Docs" class="md-header__button md-logo" aria-label="GWÖ-Antragsprüfer Docs" data-md-component="logo">
|
||||||
|
|
||||||
|
|
||||||
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3 3 3 0 0 0 3 3m0 3.54C9.64 9.35 6.5 8 3 8v11c3.5 0 6.64 1.35 9 3.54 2.36-2.19 5.5-3.54 9-3.54V8c-3.5 0-6.64 1.35-9 3.54"/></svg>
|
||||||
|
|
||||||
|
</a>
|
||||||
|
<label class="md-header__button md-icon" for="__drawer">
|
||||||
|
|
||||||
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M3 6h18v2H3zm0 5h18v2H3zm0 5h18v2H3z"/></svg>
|
||||||
|
</label>
|
||||||
|
<div class="md-header__title" data-md-component="header-title">
|
||||||
|
<div class="md-header__ellipsis">
|
||||||
|
<div class="md-header__topic">
|
||||||
|
<span class="md-ellipsis">
|
||||||
|
GWÖ-Antragsprüfer Docs
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
<div class="md-header__topic" data-md-component="header-topic">
|
||||||
|
<span class="md-ellipsis">
|
||||||
|
|
||||||
|
NNNN — Titel der Entscheidung
|
||||||
|
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<label class="md-header__button md-icon" for="__search">
|
||||||
|
|
||||||
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.52 6.52 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5"/></svg>
|
||||||
|
</label>
|
||||||
|
<div class="md-search" data-md-component="search" role="dialog">
|
||||||
|
<label class="md-search__overlay" for="__search"></label>
|
||||||
|
<div class="md-search__inner" role="search">
|
||||||
|
<form class="md-search__form" name="search">
|
||||||
|
<input type="text" class="md-search__input" name="query" aria-label="Suche" placeholder="Suche" autocapitalize="off" autocorrect="off" autocomplete="off" spellcheck="false" data-md-component="search-query" required>
|
||||||
|
<label class="md-search__icon md-icon" for="__search">
|
||||||
|
|
||||||
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.52 6.52 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5"/></svg>
|
||||||
|
|
||||||
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11z"/></svg>
|
||||||
|
</label>
|
||||||
|
<nav class="md-search__options" aria-label="Suche">
|
||||||
|
|
||||||
|
<button type="reset" class="md-search__icon md-icon" title="Zurücksetzen" aria-label="Zurücksetzen" tabindex="-1">
|
||||||
|
|
||||||
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M19 6.41 17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12z"/></svg>
|
||||||
|
</button>
|
||||||
|
</nav>
|
||||||
|
|
||||||
|
</form>
|
||||||
|
<div class="md-search__output">
|
||||||
|
<div class="md-search__scrollwrap" tabindex="0" data-md-scrollfix>
|
||||||
|
<div class="md-search-result" data-md-component="search-result">
|
||||||
|
<div class="md-search-result__meta">
|
||||||
|
Suche wird initialisiert
|
||||||
|
</div>
|
||||||
|
<ol class="md-search-result__list" role="presentation"></ol>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<div class="md-header__source">
|
||||||
|
<a href="https://repo.toppyr.de/tobias/gwoe-antragspruefer" title="Zum Repository" class="md-source" data-md-component="source">
|
||||||
|
<div class="md-source__icon md-icon">
|
||||||
|
|
||||||
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M439.6 236.1 244 40.5c-5.4-5.5-12.8-8.5-20.4-8.5s-15 3-20.4 8.4L162.5 81l51.5 51.5c27.1-9.1 52.7 16.8 43.4 43.7l49.7 49.7c34.2-11.8 61.2 31 35.5 56.7-26.5 26.5-70.2-2.9-56-37.3L240.3 199v121.9c25.3 12.5 22.3 41.8 9.1 55-6.4 6.4-15.2 10.1-24.3 10.1s-17.8-3.6-24.3-10.1c-17.6-17.6-11.1-46.9 11.2-56v-123c-20.8-8.5-24.6-30.7-18.6-45L142.6 101 8.5 235.1C3 240.6 0 247.9 0 255.5s3 15 8.5 20.4l195.6 195.7c5.4 5.4 12.7 8.4 20.4 8.4s15-3 20.4-8.4l194.7-194.7c5.4-5.4 8.4-12.8 8.4-20.4s-3-15-8.4-20.4"/></svg>
|
||||||
|
</div>
|
||||||
|
<div class="md-source__repository">
|
||||||
|
tobias/gwoe-antragspruefer
|
||||||
|
</div>
|
||||||
|
</a>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
</nav>
|
||||||
|
|
||||||
|
</header>
|
||||||
|
|
||||||
|
<div class="md-container" data-md-component="container">
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<main class="md-main" data-md-component="main">
|
||||||
|
<div class="md-main__inner md-grid">
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<div class="md-sidebar md-sidebar--primary" data-md-component="sidebar" data-md-type="navigation" >
|
||||||
|
<div class="md-sidebar__scrollwrap">
|
||||||
|
<div class="md-sidebar__inner">
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<nav class="md-nav md-nav--primary" aria-label="Navigation" data-md-level="0">
|
||||||
|
<label class="md-nav__title" for="__drawer">
|
||||||
|
<a href="../.." title="GWÖ-Antragsprüfer Docs" class="md-nav__button md-logo" aria-label="GWÖ-Antragsprüfer Docs" data-md-component="logo">
|
||||||
|
|
||||||
|
|
||||||
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3 3 3 0 0 0 3 3m0 3.54C9.64 9.35 6.5 8 3 8v11c3.5 0 6.64 1.35 9 3.54 2.36-2.19 5.5-3.54 9-3.54V8c-3.5 0-6.64 1.35-9 3.54"/></svg>
|
||||||
|
|
||||||
|
</a>
|
||||||
|
GWÖ-Antragsprüfer Docs
|
||||||
|
</label>
|
||||||
|
|
||||||
|
<div class="md-nav__source">
|
||||||
|
<a href="https://repo.toppyr.de/tobias/gwoe-antragspruefer" title="Zum Repository" class="md-source" data-md-component="source">
|
||||||
|
<div class="md-source__icon md-icon">
|
||||||
|
|
||||||
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M439.6 236.1 244 40.5c-5.4-5.5-12.8-8.5-20.4-8.5s-15 3-20.4 8.4L162.5 81l51.5 51.5c27.1-9.1 52.7 16.8 43.4 43.7l49.7 49.7c34.2-11.8 61.2 31 35.5 56.7-26.5 26.5-70.2-2.9-56-37.3L240.3 199v121.9c25.3 12.5 22.3 41.8 9.1 55-6.4 6.4-15.2 10.1-24.3 10.1s-17.8-3.6-24.3-10.1c-17.6-17.6-11.1-46.9 11.2-56v-123c-20.8-8.5-24.6-30.7-18.6-45L142.6 101 8.5 235.1C3 240.6 0 247.9 0 255.5s3 15 8.5 20.4l195.6 195.7c5.4 5.4 12.7 8.4 20.4 8.4s15-3 20.4-8.4l194.7-194.7c5.4-5.4 8.4-12.8 8.4-20.4s-3-15-8.4-20.4"/></svg>
|
||||||
|
</div>
|
||||||
|
<div class="md-source__repository">
|
||||||
|
tobias/gwoe-antragspruefer
|
||||||
|
</div>
|
||||||
|
</a>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<ul class="md-nav__list" data-md-scrollfix>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<li class="md-nav__item">
|
||||||
|
<a href="../.." class="md-nav__link">
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<span class="md-ellipsis">
|
||||||
|
|
||||||
|
|
||||||
|
Start
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</span>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</a>
|
||||||
|
</li>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<li class="md-nav__item md-nav__item--section md-nav__item--nested">
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_2" >
|
||||||
|
|
||||||
|
|
||||||
|
<label class="md-nav__link" for="__nav_2" id="__nav_2_label" tabindex="">
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<span class="md-ellipsis">
|
||||||
|
|
||||||
|
|
||||||
|
Architecture Decision Records
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</span>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<span class="md-nav__icon md-icon"></span>
|
||||||
|
</label>
|
||||||
|
|
||||||
|
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_2_label" aria-expanded="false">
|
||||||
|
<label class="md-nav__title" for="__nav_2">
|
||||||
|
<span class="md-nav__icon md-icon"></span>
|
||||||
|
|
||||||
|
|
||||||
|
Architecture Decision Records
|
||||||
|
|
||||||
|
|
||||||
|
</label>
|
||||||
|
<ul class="md-nav__list" data-md-scrollfix>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<li class="md-nav__item">
|
||||||
|
<a href="../" class="md-nav__link">
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<span class="md-ellipsis">
|
||||||
|
|
||||||
|
|
||||||
|
Übersicht
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</span>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</a>
|
||||||
|
</li>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<li class="md-nav__item">
|
||||||
|
<a href="../0001-llm-citation-binding/" class="md-nav__link">
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<span class="md-ellipsis">
|
||||||
|
|
||||||
|
|
||||||
|
0001 LLM-Citation-Binding
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</span>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</a>
|
||||||
|
</li>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<li class="md-nav__item">
|
||||||
|
<a href="../0002-adapter-architecture/" class="md-nav__link">
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<span class="md-ellipsis">
|
||||||
|
|
||||||
|
|
||||||
|
0002 Adapter-Architektur
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</span>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</a>
|
||||||
|
</li>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<li class="md-nav__item">
|
||||||
|
<a href="../0003-citation-property-tests/" class="md-nav__link">
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<span class="md-ellipsis">
|
||||||
|
|
||||||
|
|
||||||
|
0003 Citation-Property-Tests
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</span>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</a>
|
||||||
|
</li>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<li class="md-nav__item">
|
||||||
|
<a href="../0004-deployment-workflow/" class="md-nav__link">
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<span class="md-ellipsis">
|
||||||
|
|
||||||
|
|
||||||
|
0004 Deployment-Workflow
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</span>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</a>
|
||||||
|
</li>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</ul>
|
||||||
|
</nav>
|
||||||
|
|
||||||
|
</li>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<li class="md-nav__item md-nav__item--section md-nav__item--nested">
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_3" >
|
||||||
|
|
||||||
|
|
||||||
|
<label class="md-nav__link" for="__nav_3" id="__nav_3_label" tabindex="">
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<span class="md-ellipsis">
|
||||||
|
|
||||||
|
|
||||||
|
Archiv
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</span>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<span class="md-nav__icon md-icon"></span>
|
||||||
|
</label>
|
||||||
|
|
||||||
|
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_3_label" aria-expanded="false">
|
||||||
|
<label class="md-nav__title" for="__nav_3">
|
||||||
|
<span class="md-nav__icon md-icon"></span>
|
||||||
|
|
||||||
|
|
||||||
|
Archiv
|
||||||
|
|
||||||
|
|
||||||
|
</label>
|
||||||
|
<ul class="md-nav__list" data-md-scrollfix>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<li class="md-nav__item">
|
||||||
|
<a href="../../archive/" class="md-nav__link">
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<span class="md-ellipsis">
|
||||||
|
|
||||||
|
|
||||||
|
Übersicht
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</span>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</a>
|
||||||
|
</li>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</ul>
|
||||||
|
</nav>
|
||||||
|
|
||||||
|
</li>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</ul>
|
||||||
|
</nav>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<div class="md-sidebar md-sidebar--secondary" data-md-component="sidebar" data-md-type="toc" >
|
||||||
|
<div class="md-sidebar__scrollwrap">
|
||||||
|
<div class="md-sidebar__inner">
|
||||||
|
|
||||||
|
|
||||||
|
<nav class="md-nav md-nav--secondary" aria-label="Inhaltsverzeichnis">
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<label class="md-nav__title" for="__toc">
|
||||||
|
<span class="md-nav__icon md-icon"></span>
|
||||||
|
Inhaltsverzeichnis
|
||||||
|
</label>
|
||||||
|
<ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
|
||||||
|
|
||||||
|
<li class="md-nav__item">
|
||||||
|
<a href="#kontext" class="md-nav__link">
|
||||||
|
<span class="md-ellipsis">
|
||||||
|
|
||||||
|
Kontext
|
||||||
|
|
||||||
|
</span>
|
||||||
|
</a>
|
||||||
|
|
||||||
|
</li>
|
||||||
|
|
||||||
|
<li class="md-nav__item">
|
||||||
|
<a href="#optionen" class="md-nav__link">
|
||||||
|
<span class="md-ellipsis">
|
||||||
|
|
||||||
|
Optionen
|
||||||
|
|
||||||
|
</span>
|
||||||
|
</a>
|
||||||
|
|
||||||
|
<nav class="md-nav" aria-label="Optionen">
|
||||||
|
<ul class="md-nav__list">
|
||||||
|
|
||||||
|
<li class="md-nav__item">
|
||||||
|
<a href="#option-a" class="md-nav__link">
|
||||||
|
<span class="md-ellipsis">
|
||||||
|
|
||||||
|
Option A — …
|
||||||
|
|
||||||
|
</span>
|
||||||
|
</a>
|
||||||
|
|
||||||
|
</li>
|
||||||
|
|
||||||
|
<li class="md-nav__item">
|
||||||
|
<a href="#option-b" class="md-nav__link">
|
||||||
|
<span class="md-ellipsis">
|
||||||
|
|
||||||
|
Option B — …
|
||||||
|
|
||||||
|
</span>
|
||||||
|
</a>
|
||||||
|
|
||||||
|
</li>
|
||||||
|
|
||||||
|
</ul>
|
||||||
|
</nav>
|
||||||
|
|
||||||
|
</li>
|
||||||
|
|
||||||
|
<li class="md-nav__item">
|
||||||
|
<a href="#entscheidung" class="md-nav__link">
|
||||||
|
<span class="md-ellipsis">
|
||||||
|
|
||||||
|
Entscheidung
|
||||||
|
|
||||||
|
</span>
|
||||||
|
</a>
|
||||||
|
|
||||||
|
</li>
|
||||||
|
|
||||||
|
<li class="md-nav__item">
|
||||||
|
<a href="#konsequenzen" class="md-nav__link">
|
||||||
|
<span class="md-ellipsis">
|
||||||
|
|
||||||
|
Konsequenzen
|
||||||
|
|
||||||
|
</span>
|
||||||
|
</a>
|
||||||
|
|
||||||
|
<nav class="md-nav" aria-label="Konsequenzen">
|
||||||
|
<ul class="md-nav__list">
|
||||||
|
|
||||||
|
<li class="md-nav__item">
|
||||||
|
<a href="#positiv" class="md-nav__link">
|
||||||
|
<span class="md-ellipsis">
|
||||||
|
|
||||||
|
Positiv
|
||||||
|
|
||||||
|
</span>
|
||||||
|
</a>
|
||||||
|
|
||||||
|
</li>
|
||||||
|
|
||||||
|
<li class="md-nav__item">
|
||||||
|
<a href="#negativ" class="md-nav__link">
|
||||||
|
<span class="md-ellipsis">
|
||||||
|
|
||||||
|
Negativ
|
||||||
|
|
||||||
|
</span>
|
||||||
|
</a>
|
||||||
|
|
||||||
|
</li>
|
||||||
|
|
||||||
|
<li class="md-nav__item">
|
||||||
|
<a href="#folgen-fur-andere-adrs" class="md-nav__link">
|
||||||
|
<span class="md-ellipsis">
|
||||||
|
|
||||||
|
Folgen für andere ADRs
|
||||||
|
|
||||||
|
</span>
|
||||||
|
</a>
|
||||||
|
|
||||||
|
</li>
|
||||||
|
|
||||||
|
</ul>
|
||||||
|
</nav>
|
||||||
|
|
||||||
|
</li>
|
||||||
|
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
</nav>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<div class="md-content" data-md-component="content">
|
||||||
|
|
||||||
|
<article class="md-content__inner md-typeset">
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<h1 id="nnnn-titel-der-entscheidung">NNNN — Titel der Entscheidung<a class="headerlink" href="#nnnn-titel-der-entscheidung" title="Permanent link">¶</a></h1>
|
||||||
|
<table>
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th></th>
|
||||||
|
<th></th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
<tr>
|
||||||
|
<td><strong>Status</strong></td>
|
||||||
|
<td>proposed / accepted / deprecated / superseded</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td><strong>Datum</strong></td>
|
||||||
|
<td>YYYY-MM-DD</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td><strong>Supersedes</strong></td>
|
||||||
|
<td>(optional) link auf vorherigen ADR</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td><strong>Superseded by</strong></td>
|
||||||
|
<td>(optional) link auf neueren ADR</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td><strong>Refs</strong></td>
|
||||||
|
<td>Issues, PRs, Commits</td>
|
||||||
|
</tr>
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
<h2 id="kontext">Kontext<a class="headerlink" href="#kontext" title="Permanent link">¶</a></h2>
|
||||||
|
<p>Welches Problem hat zu der Entscheidung geführt? Was ist der Stand vorher,
|
||||||
|
welche Constraints sind im Spiel, welche Stakeholder sind betroffen?</p>
|
||||||
|
<h2 id="optionen">Optionen<a class="headerlink" href="#optionen" title="Permanent link">¶</a></h2>
|
||||||
|
<p>Welche Alternativen wurden ernsthaft erwogen? Mindestens 2, gerne mehr.</p>
|
||||||
|
<h3 id="option-a">Option A — …<a class="headerlink" href="#option-a" title="Permanent link">¶</a></h3>
|
||||||
|
<p>Beschreibung. Vor- und Nachteile.</p>
|
||||||
|
<h3 id="option-b">Option B — …<a class="headerlink" href="#option-b" title="Permanent link">¶</a></h3>
|
||||||
|
<p>Beschreibung. Vor- und Nachteile.</p>
|
||||||
|
<h2 id="entscheidung">Entscheidung<a class="headerlink" href="#entscheidung" title="Permanent link">¶</a></h2>
|
||||||
|
<p>Welche Option wurde gewählt und warum? Konkret und unmissverständlich, sodass
|
||||||
|
ein neuer Kollege ohne Diskussion weitermachen kann.</p>
|
||||||
|
<h2 id="konsequenzen">Konsequenzen<a class="headerlink" href="#konsequenzen" title="Permanent link">¶</a></h2>
|
||||||
|
<p>Was wird leichter / schwerer durch diese Entscheidung? Welche Folge-Arbeiten
|
||||||
|
fallen an? Welche Teile des Systems werden berührt?</p>
|
||||||
|
<h3 id="positiv">Positiv<a class="headerlink" href="#positiv" title="Permanent link">¶</a></h3>
|
||||||
|
<ul>
|
||||||
|
<li>…</li>
|
||||||
|
</ul>
|
||||||
|
<h3 id="negativ">Negativ<a class="headerlink" href="#negativ" title="Permanent link">¶</a></h3>
|
||||||
|
<ul>
|
||||||
|
<li>…</li>
|
||||||
|
</ul>
|
||||||
|
<h3 id="folgen-fur-andere-adrs">Folgen für andere ADRs<a class="headerlink" href="#folgen-fur-andere-adrs" title="Permanent link">¶</a></h3>
|
||||||
|
<ul>
|
||||||
|
<li>…</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</article>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
|
||||||
|
<script>var target=document.getElementById(location.hash.slice(1));target&&target.name&&(target.checked=target.name.startsWith("__tabbed_"))</script>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
</main>
|
||||||
|
|
||||||
|
<footer class="md-footer">
|
||||||
|
|
||||||
|
<div class="md-footer-meta md-typeset">
|
||||||
|
<div class="md-footer-meta__inner md-grid">
|
||||||
|
<div class="md-copyright">
|
||||||
|
|
||||||
|
|
||||||
|
Made with
|
||||||
|
<a href="https://squidfunk.github.io/mkdocs-material/" target="_blank" rel="noopener">
|
||||||
|
Material for MkDocs
|
||||||
|
</a>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</footer>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
<div class="md-dialog" data-md-component="dialog">
|
||||||
|
<div class="md-dialog__inner md-typeset"></div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<script id="__config" type="application/json">{"annotate": null, "base": "../..", "features": ["navigation.sections", "navigation.expand", "search.highlight"], "search": "../../assets/javascripts/workers/search.2c215733.min.js", "tags": null, "translations": {"clipboard.copied": "In Zwischenablage kopiert", "clipboard.copy": "In Zwischenablage kopieren", "search.result.more.one": "1 weiteres Suchergebnis auf dieser Seite", "search.result.more.other": "# weitere Suchergebnisse auf dieser Seite", "search.result.none": "Keine Suchergebnisse", "search.result.one": "1 Suchergebnis", "search.result.other": "# Suchergebnisse", "search.result.placeholder": "Suchbegriff eingeben", "search.result.term.missing": "Es fehlt", "select.version": "Version ausw\u00e4hlen"}, "version": null}</script>
|
||||||
|
|
||||||
|
|
||||||
|
<script src="../../assets/javascripts/bundle.79ae519e.min.js"></script>
|
||||||
|
|
||||||
|
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
BIN
app/__pycache__/__init__.cpython-313.pyc
Normal file
BIN
app/__pycache__/__init__.cpython-313.pyc
Normal file
Binary file not shown.
BIN
app/__pycache__/__init__.cpython-314.pyc
Normal file
BIN
app/__pycache__/__init__.cpython-314.pyc
Normal file
Binary file not shown.
BIN
app/__pycache__/analyzer.cpython-313.pyc
Normal file
BIN
app/__pycache__/analyzer.cpython-313.pyc
Normal file
Binary file not shown.
BIN
app/__pycache__/analyzer.cpython-314.pyc
Normal file
BIN
app/__pycache__/analyzer.cpython-314.pyc
Normal file
Binary file not shown.
BIN
app/__pycache__/config.cpython-313.pyc
Normal file
BIN
app/__pycache__/config.cpython-313.pyc
Normal file
Binary file not shown.
BIN
app/__pycache__/config.cpython-314.pyc
Normal file
BIN
app/__pycache__/config.cpython-314.pyc
Normal file
Binary file not shown.
BIN
app/__pycache__/database.cpython-313.pyc
Normal file
BIN
app/__pycache__/database.cpython-313.pyc
Normal file
Binary file not shown.
BIN
app/__pycache__/database.cpython-314.pyc
Normal file
BIN
app/__pycache__/database.cpython-314.pyc
Normal file
Binary file not shown.
BIN
app/__pycache__/embeddings.cpython-313.pyc
Normal file
BIN
app/__pycache__/embeddings.cpython-313.pyc
Normal file
Binary file not shown.
BIN
app/__pycache__/main.cpython-313.pyc
Normal file
BIN
app/__pycache__/main.cpython-313.pyc
Normal file
Binary file not shown.
BIN
app/__pycache__/main.cpython-314.pyc
Normal file
BIN
app/__pycache__/main.cpython-314.pyc
Normal file
Binary file not shown.
BIN
app/__pycache__/models.cpython-313.pyc
Normal file
BIN
app/__pycache__/models.cpython-313.pyc
Normal file
Binary file not shown.
BIN
app/__pycache__/models.cpython-314.pyc
Normal file
BIN
app/__pycache__/models.cpython-314.pyc
Normal file
Binary file not shown.
BIN
app/__pycache__/parlamente.cpython-313.pyc
Normal file
BIN
app/__pycache__/parlamente.cpython-313.pyc
Normal file
Binary file not shown.
BIN
app/__pycache__/parlamente.cpython-314.pyc
Normal file
BIN
app/__pycache__/parlamente.cpython-314.pyc
Normal file
Binary file not shown.
BIN
app/__pycache__/report.cpython-313.pyc
Normal file
BIN
app/__pycache__/report.cpython-313.pyc
Normal file
Binary file not shown.
BIN
app/__pycache__/report.cpython-314.pyc
Normal file
BIN
app/__pycache__/report.cpython-314.pyc
Normal file
Binary file not shown.
BIN
app/__pycache__/wahlprogramme.cpython-313.pyc
Normal file
BIN
app/__pycache__/wahlprogramme.cpython-313.pyc
Normal file
Binary file not shown.
@ -1,285 +0,0 @@
|
|||||||
"""Adapter für abgeordnetenwatch.de API v2 (#106 Phase 1).
|
|
||||||
|
|
||||||
Liefert strukturierte Abstimmungsdaten (namentliche Abstimmungen)
|
|
||||||
pro Bundesland + Bundestag. Daten werden lokal in abgeordnetenwatch_polls
|
|
||||||
und abgeordnetenwatch_votes gecacht.
|
|
||||||
|
|
||||||
API-Docs: https://www.abgeordnetenwatch.de/api/v2
|
|
||||||
"""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import logging
|
|
||||||
import re
|
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
import httpx
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
# Mapping unserer BL-Codes auf abgeordnetenwatch parliament-IDs.
|
|
||||||
# IDs aus GET /api/v2/parliaments (Stand April 2026).
|
|
||||||
PARLIAMENT_ID: dict[str, int] = {
|
|
||||||
"BT": 5, # Bundestag (auch "BUND")
|
|
||||||
"BUND": 5, # Alias
|
|
||||||
"NRW": 4,
|
|
||||||
"BE": 2, # Berlin
|
|
||||||
"HH": 3, # Hamburg
|
|
||||||
"BW": 6, # Baden-Württemberg
|
|
||||||
"RP": 7, # Rheinland-Pfalz
|
|
||||||
"LSA": 8, # Sachsen-Anhalt
|
|
||||||
"MV": 9, # Mecklenburg-Vorpommern
|
|
||||||
"HB": 10, # Bremen
|
|
||||||
"HE": 11, # Hessen
|
|
||||||
"NI": 12, # Niedersachsen
|
|
||||||
"BY": 13, # Bayern
|
|
||||||
"SL": 14, # Saarland
|
|
||||||
"TH": 15, # Thüringen
|
|
||||||
"BB": 16, # Brandenburg
|
|
||||||
"SN": 17, # Sachsen
|
|
||||||
"SH": 18, # Schleswig-Holstein
|
|
||||||
}
|
|
||||||
|
|
||||||
_BASE = "https://www.abgeordnetenwatch.de/api/v2"
|
|
||||||
|
|
||||||
# Drucksachen-Extraktion aus field_intro-HTML — pro Landtag eigenes URL-/
|
|
||||||
# Dateinamen-Schema. Reihenfolge: erst Generic-Pattern "WP/NR" probieren
|
|
||||||
# (BUND, HE), dann BL-spezifische Patterns aus den Drucksachen-PDF-URLs.
|
|
||||||
_DS_PATTERNS: list[re.Pattern] = [
|
|
||||||
# Generic: "20/12345" — BUND, HE und ähnliche
|
|
||||||
re.compile(r"\b(\d{1,2})/(\d{3,5})\b"),
|
|
||||||
# NRW: MMD18-2142.pdf
|
|
||||||
re.compile(r"MMD(\d{1,2})-(\d{3,5})\.pdf", re.IGNORECASE),
|
|
||||||
# BE: d19-0564.pdf
|
|
||||||
re.compile(r"/d(\d{1,2})-(\d{4})\.pdf", re.IGNORECASE),
|
|
||||||
# BW: 17_7713_D.pdf
|
|
||||||
re.compile(r"/(\d{1,2})_(\d{3,5})_D\.pdf", re.IGNORECASE),
|
|
||||||
# HB: D21L0568.pdf (D<wp>L<nr>)
|
|
||||||
re.compile(r"/D(\d{1,2})L(\d{3,5})\.pdf", re.IGNORECASE),
|
|
||||||
# SH: drucksache-20-00187.pdf
|
|
||||||
re.compile(r"drucksache-(\d{1,2})-(\d{3,5})\.pdf", re.IGNORECASE),
|
|
||||||
# SL: Gs17_0503.pdf
|
|
||||||
re.compile(r"/Gs(\d{1,2})_(\d{3,5})\.pdf", re.IGNORECASE),
|
|
||||||
# LSA: wp8/drs/d0145… (Reihenfolge: wp dann nr)
|
|
||||||
re.compile(r"/wp(\d{1,2})/drs/d(\d{3,5})", re.IGNORECASE),
|
|
||||||
# SN: dok_nr=2150&...&leg_per=8 — params können in beliebiger Reihenfolge auftreten
|
|
||||||
re.compile(r"dok_nr=(\d{3,5}).*leg_per=(\d{1,2})", re.IGNORECASE),
|
|
||||||
# RP: 538-18.pdf (Reihenfolge: nr-wp)
|
|
||||||
re.compile(r"/(\d{3,5})-(\d{1,2})\.pdf", re.IGNORECASE),
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
def extract_drucksache_from_intro(html: str) -> Optional[str]:
|
|
||||||
"""Extrahiert die erste Drucksachen-Nummer aus dem field_intro-HTML.
|
|
||||||
|
|
||||||
Probiert mehrere Landtags-spezifische URL-Patterns durch (NRW MMD<wp>-<nr>,
|
|
||||||
BW <wp>_<nr>_D.pdf, etc.) und gibt die erste Fundstelle als
|
|
||||||
"<wp>/<nr>"-String zurück. Reihenfolge im Match-Tupel ist immer (wp, nr) —
|
|
||||||
die Patterns selbst kümmern sich um eventuelle URL-Reihenfolgen-Eigenheiten
|
|
||||||
(RP hat z.B. nr-wp, SN hat dok_nr=...&leg_per=..., dort drehen wir).
|
|
||||||
"""
|
|
||||||
if not html:
|
|
||||||
return None
|
|
||||||
for pat in _DS_PATTERNS:
|
|
||||||
m = pat.search(html)
|
|
||||||
if not m:
|
|
||||||
continue
|
|
||||||
# Spezialfall RP: nr-wp im URL → drehen, damit Output wp/nr
|
|
||||||
if "-" in m.re.pattern and m.re.pattern.startswith("/(\\d{3,5})"):
|
|
||||||
return f"{m.group(2)}/{m.group(1)}"
|
|
||||||
# Spezialfall SN: dok_nr (Gruppe 1) + leg_per (Gruppe 2) → wp/nr
|
|
||||||
if "dok_nr" in m.re.pattern:
|
|
||||||
return f"{m.group(2)}/{m.group(1)}"
|
|
||||||
# Standard: (wp, nr)
|
|
||||||
return f"{m.group(1)}/{m.group(2)}"
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
async def fallback_drucksache_by_date_title(
|
|
||||||
datum: Optional[str],
|
|
||||||
titel: Optional[str],
|
|
||||||
bundesland: str,
|
|
||||||
) -> Optional[str]:
|
|
||||||
"""Fallback-Drucksachen-Lookup via Datum + Titel gegen die Assessments-DB.
|
|
||||||
|
|
||||||
Wird aufgerufen wenn ``extract_drucksache_from_intro`` kein Pattern findet
|
|
||||||
(betrifft MV/BY/BB/TH/HH/SL deren intro-HTML keine PDF-URLs enthält).
|
|
||||||
|
|
||||||
Sucht Assessments für ``bundesland`` innerhalb von ±14 Tagen um ``datum``
|
|
||||||
und einem Titel-Substring-Match. Gibt die Drucksachen-Nummer des ersten
|
|
||||||
Treffers zurück oder ``None``.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
datum: ISO-Datum des Polls (``field_poll_date``, z.B. ``"2026-04-01"``).
|
|
||||||
titel: Label/Titel des Polls (wird als LIKE-Substring geprüft).
|
|
||||||
bundesland: Unser BL-Code (z.B. ``"MV"``).
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Drucksachen-Nummer als String (z.B. ``"7/1234"``) oder ``None``.
|
|
||||||
"""
|
|
||||||
if not datum or not titel:
|
|
||||||
return None
|
|
||||||
|
|
||||||
# Titel-Substring: nur die ersten 40 Zeichen für den LIKE-Match verwenden,
|
|
||||||
# da Poll-Labels und Assessment-Titel leicht voneinander abweichen können.
|
|
||||||
titel_substr = titel.strip()[:40]
|
|
||||||
|
|
||||||
from .config import settings as _settings
|
|
||||||
import aiosqlite as _aio
|
|
||||||
|
|
||||||
async with _aio.connect(_settings.db_path) as db:
|
|
||||||
cur = await db.execute(
|
|
||||||
"""
|
|
||||||
SELECT drucksache FROM assessments
|
|
||||||
WHERE bundesland = ?
|
|
||||||
AND ABS(julianday(datum) - julianday(?)) < 14
|
|
||||||
AND LOWER(title) LIKE ?
|
|
||||||
ORDER BY ABS(julianday(datum) - julianday(?))
|
|
||||||
LIMIT 1
|
|
||||||
""",
|
|
||||||
(bundesland.upper(), datum, f"%{titel_substr.lower()}%", datum),
|
|
||||||
)
|
|
||||||
row = await cur.fetchone()
|
|
||||||
|
|
||||||
if row:
|
|
||||||
logger.debug(
|
|
||||||
"fallback_drucksache_by_date_title: %s/%s → %s",
|
|
||||||
bundesland, datum, row[0],
|
|
||||||
)
|
|
||||||
return row[0]
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
async def fetch_polls(bundesland_code: str, limit: int = 100) -> list[dict]:
|
|
||||||
"""Holt aktuelle Abstimmungen für ein Bundesland von abgeordnetenwatch.
|
|
||||||
|
|
||||||
Gibt eine Liste von Poll-Dicts zurück; jedes Dict enthält zusätzlich
|
|
||||||
den geparsten Key ``drucksache`` (kann None sein).
|
|
||||||
|
|
||||||
Args:
|
|
||||||
bundesland_code: Unser BL-Code (z.B. "NRW", "BT", "BUND").
|
|
||||||
limit: Maximale Anzahl Polls; wird als range_end übergeben.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Liste von Poll-Dicts mit den Feldern aus der API plus ``drucksache``.
|
|
||||||
|
|
||||||
Raises:
|
|
||||||
ValueError: Wenn der bundesland_code nicht in PARLIAMENT_ID ist.
|
|
||||||
httpx.HTTPError: Bei Netzwerkproblemen.
|
|
||||||
"""
|
|
||||||
parliament_id = PARLIAMENT_ID.get(bundesland_code.upper())
|
|
||||||
if parliament_id is None:
|
|
||||||
raise ValueError(
|
|
||||||
f"Unbekannter BL-Code '{bundesland_code}'. "
|
|
||||||
f"Bekannte Codes: {sorted(PARLIAMENT_ID.keys())}"
|
|
||||||
)
|
|
||||||
|
|
||||||
async with httpx.AsyncClient(timeout=30.0) as client:
|
|
||||||
# Zuerst aktuellen ParliamentPeriod für das Parlament holen —
|
|
||||||
# /polls filtert nach field_legislature (period-id), NICHT parliament-id.
|
|
||||||
pp_resp = await client.get(
|
|
||||||
f"{_BASE}/parliament-periods",
|
|
||||||
params={"parliament": parliament_id, "type": "legislature", "range_end": 5},
|
|
||||||
)
|
|
||||||
pp_resp.raise_for_status()
|
|
||||||
periods = (pp_resp.json() or {}).get("data") or []
|
|
||||||
# Aktuelle Periode: sortiere nach start-date desc, nimm die neueste
|
|
||||||
current = sorted(
|
|
||||||
periods,
|
|
||||||
key=lambda x: x.get("start_date_period") or "",
|
|
||||||
reverse=True,
|
|
||||||
)
|
|
||||||
if not current:
|
|
||||||
logger.warning("Keine ParliamentPeriod für %s (parliament_id=%d)",
|
|
||||||
bundesland_code, parliament_id)
|
|
||||||
return []
|
|
||||||
period_id = current[0]["id"]
|
|
||||||
|
|
||||||
# Polls für diese Periode
|
|
||||||
resp = await client.get(
|
|
||||||
f"{_BASE}/polls",
|
|
||||||
params={"field_legislature": period_id, "range_end": limit},
|
|
||||||
)
|
|
||||||
resp.raise_for_status()
|
|
||||||
data = resp.json()
|
|
||||||
|
|
||||||
polls_raw: list[dict] = data.get("data") or []
|
|
||||||
polls = []
|
|
||||||
for p in polls_raw:
|
|
||||||
intro_html = p.get("field_intro") or ""
|
|
||||||
polls.append({
|
|
||||||
"id": p.get("id"),
|
|
||||||
"label": p.get("label") or p.get("field_poll_date", ""),
|
|
||||||
"field_poll_date": p.get("field_poll_date"),
|
|
||||||
"field_accepted": p.get("field_accepted"),
|
|
||||||
"field_topics": p.get("field_topics") or [],
|
|
||||||
"field_intro": intro_html,
|
|
||||||
"field_legislature": p.get("field_legislature") or {},
|
|
||||||
"drucksache": extract_drucksache_from_intro(intro_html),
|
|
||||||
})
|
|
||||||
|
|
||||||
logger.info(
|
|
||||||
"abgeordnetenwatch: %d polls für %s (parliament_id=%d)",
|
|
||||||
len(polls), bundesland_code, parliament_id,
|
|
||||||
)
|
|
||||||
return polls
|
|
||||||
|
|
||||||
|
|
||||||
async def fetch_votes_for_poll(poll_id: int) -> list[dict]:
|
|
||||||
"""Holt namentliche Einzelstimmen für eine Abstimmung.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
poll_id: ID der Abstimmung (aus polls[].id).
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Liste von Vote-Dicts mit den Feldern:
|
|
||||||
poll_id, politician_id, politician_name, partei, vote.
|
|
||||||
vote ist einer von: "yes", "no", "abstain", "no_show".
|
|
||||||
|
|
||||||
Raises:
|
|
||||||
httpx.HTTPError: Bei Netzwerkproblemen.
|
|
||||||
"""
|
|
||||||
# /votes?poll=X funktioniert (empirisch ermittelt);
|
|
||||||
# NICHT field_poll (500) und NICHT /polls/{id}?related_data=votes
|
|
||||||
# (liefert leeres related_data). Einfaches ?poll=<id>.
|
|
||||||
url = f"{_BASE}/votes"
|
|
||||||
params = {"poll": poll_id, "range_end": 1000}
|
|
||||||
|
|
||||||
async with httpx.AsyncClient(timeout=30.0) as client:
|
|
||||||
resp = await client.get(url, params=params)
|
|
||||||
resp.raise_for_status()
|
|
||||||
data = resp.json()
|
|
||||||
|
|
||||||
votes_raw: list[dict] = data.get("data") or []
|
|
||||||
votes = []
|
|
||||||
for v in votes_raw:
|
|
||||||
politician = v.get("mandate") or v.get("politician") or {}
|
|
||||||
politician_id = politician.get("id") or v.get("mandate_id")
|
|
||||||
politician_name = politician.get("label") or politician.get("name") or ""
|
|
||||||
|
|
||||||
# Partei aus politician.party oder fraction
|
|
||||||
partei = ""
|
|
||||||
party = politician.get("party") or {}
|
|
||||||
if isinstance(party, dict):
|
|
||||||
partei = party.get("label") or party.get("short_label") or ""
|
|
||||||
fraction = v.get("fraction") or {}
|
|
||||||
if not partei and isinstance(fraction, dict):
|
|
||||||
partei = fraction.get("full_name") or fraction.get("label") or ""
|
|
||||||
|
|
||||||
vote_value = (v.get("vote") or "").lower()
|
|
||||||
# API liefert "yes"/"no"/"abstain"/"no_show" — direkt übernehmen
|
|
||||||
if vote_value not in ("yes", "no", "abstain", "no_show"):
|
|
||||||
vote_value = "no_show"
|
|
||||||
|
|
||||||
votes.append({
|
|
||||||
"poll_id": poll_id,
|
|
||||||
"politician_id": politician_id,
|
|
||||||
"politician_name": politician_name,
|
|
||||||
"partei": partei,
|
|
||||||
"vote": vote_value,
|
|
||||||
})
|
|
||||||
|
|
||||||
logger.info(
|
|
||||||
"abgeordnetenwatch: %d votes für poll_id=%d", len(votes), poll_id
|
|
||||||
)
|
|
||||||
return votes
|
|
||||||
@ -1,11 +0,0 @@
|
|||||||
"""Adapter: konkrete Implementierungen der Ports.
|
|
||||||
|
|
||||||
Vorläufig enthält dieses Modul nur den Qwen-LLM-Adapter. Perspektivisch
|
|
||||||
wandern die 17 Parlaments-Adapter aus ``parlamente.py`` hierher (eigener
|
|
||||||
Folge-PR, weil das eine umfangreichere Umstellung ist und die
|
|
||||||
Adapter-ABC dort bereits existiert — siehe ADR 0002).
|
|
||||||
"""
|
|
||||||
|
|
||||||
from .qwen_bewerter import QwenBewerter
|
|
||||||
|
|
||||||
__all__ = ["QwenBewerter"]
|
|
||||||
@ -1,111 +0,0 @@
|
|||||||
"""QwenBewerter — Produktions-Adapter für den LlmBewerter-Port.
|
|
||||||
|
|
||||||
Kapselt den ``AsyncOpenAI``-Client gegen die DashScope-API, den Retry-
|
|
||||||
Loop mit Temperatur-Escalation und das Markdown-Fence-Stripping. Die
|
|
||||||
Retry-Semantik bleibt identisch zu ``analyzer.py`` vor der Migration:
|
|
||||||
bis zu ``max_retries`` Versuche, Temperatur steigt um 0.1 pro Versuch.
|
|
||||||
|
|
||||||
Der Adapter gibt den geparsten ``dict`` zurück — Pydantic-Validierung,
|
|
||||||
Citation-Binding und Missing-Programme-Check bleiben Sache des Callers
|
|
||||||
in ``analyzer.py``.
|
|
||||||
"""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import hashlib
|
|
||||||
import json
|
|
||||||
import logging
|
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
from ..config import settings
|
|
||||||
from ..ports.llm_bewerter import LlmRequest
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
def _content_fingerprint(content: str) -> str:
|
|
||||||
"""Log-sicherer Identifier ohne PII-Leak (Issue #57 Befund #4)."""
|
|
||||||
if not content:
|
|
||||||
return "len=0"
|
|
||||||
h = hashlib.sha1(content.encode("utf-8", errors="replace")).hexdigest()[:8]
|
|
||||||
return f"len={len(content)} sha1={h}"
|
|
||||||
|
|
||||||
|
|
||||||
def _strip_markdown_fences(content: str) -> str:
|
|
||||||
"""Entfernt Markdown-Code-Fences, die Qwen trotz Prompt manchmal ergänzt.
|
|
||||||
|
|
||||||
In Sync mit ``analyzer.py`` vor der Migration; Einheitstests in
|
|
||||||
``tests/test_analyzer.py`` spiegeln exakt diese Logik.
|
|
||||||
"""
|
|
||||||
content = content.strip()
|
|
||||||
if content.startswith("```"):
|
|
||||||
content = content.split("\n", 1)[1]
|
|
||||||
if content.endswith("```"):
|
|
||||||
content = content.rsplit("```", 1)[0]
|
|
||||||
if content.startswith("```json"):
|
|
||||||
content = content[7:]
|
|
||||||
return content.strip()
|
|
||||||
|
|
||||||
|
|
||||||
class QwenBewerter:
|
|
||||||
"""LlmBewerter-Adapter für Qwen Plus (via DashScope)."""
|
|
||||||
|
|
||||||
def __init__(
|
|
||||||
self,
|
|
||||||
api_key: Optional[str] = None,
|
|
||||||
base_url: Optional[str] = None,
|
|
||||||
client=None,
|
|
||||||
) -> None:
|
|
||||||
"""Konstruktor-Injection erlaubt Tests, einen Mock-Client zu reichen
|
|
||||||
ohne Netzwerk-Zugriff. Prod nutzt den Default: Settings aus
|
|
||||||
``config.py`` + ``AsyncOpenAI``."""
|
|
||||||
self._api_key = api_key or settings.dashscope_api_key
|
|
||||||
self._base_url = base_url or settings.dashscope_base_url
|
|
||||||
self._client = client # lazy-created in .bewerte() wenn nicht gesetzt
|
|
||||||
|
|
||||||
def _get_client(self):
|
|
||||||
if self._client is not None:
|
|
||||||
return self._client
|
|
||||||
# Lazy-Import, damit die Test-Suite ohne ``openai``-Paket laufen kann.
|
|
||||||
from openai import AsyncOpenAI
|
|
||||||
|
|
||||||
self._client = AsyncOpenAI(api_key=self._api_key, base_url=self._base_url)
|
|
||||||
return self._client
|
|
||||||
|
|
||||||
async def bewerte(self, request: LlmRequest) -> dict:
|
|
||||||
"""Führt den LLM-Call aus, bis JSON-Parse klappt oder Retries erschöpft."""
|
|
||||||
client = self._get_client()
|
|
||||||
|
|
||||||
last_error: Optional[Exception] = None
|
|
||||||
for attempt in range(request.max_retries):
|
|
||||||
response = await client.chat.completions.create(
|
|
||||||
model=request.model,
|
|
||||||
messages=[
|
|
||||||
{"role": "system", "content": request.system_prompt},
|
|
||||||
{"role": "user", "content": request.user_prompt},
|
|
||||||
],
|
|
||||||
temperature=request.base_temperature + (attempt * 0.1),
|
|
||||||
max_tokens=request.max_tokens,
|
|
||||||
)
|
|
||||||
content = response.choices[0].message.content.strip()
|
|
||||||
content = _strip_markdown_fences(content)
|
|
||||||
|
|
||||||
try:
|
|
||||||
return json.loads(content)
|
|
||||||
except json.JSONDecodeError as e:
|
|
||||||
last_error = e
|
|
||||||
logger.warning(
|
|
||||||
"LLM JSON parse error attempt %d/%d (%s) — content %s",
|
|
||||||
attempt + 1, request.max_retries, e,
|
|
||||||
_content_fingerprint(content),
|
|
||||||
)
|
|
||||||
if attempt >= request.max_retries - 1:
|
|
||||||
logger.error(
|
|
||||||
"LLM JSON parsing exhausted retries, content %s",
|
|
||||||
_content_fingerprint(content),
|
|
||||||
)
|
|
||||||
raise
|
|
||||||
|
|
||||||
# Unreachable — letzter Versuch hat raised. Für Typcheck.
|
|
||||||
assert last_error is not None
|
|
||||||
raise last_error
|
|
||||||
414
app/analyzer.py
414
app/analyzer.py
@ -1,414 +0,0 @@
|
|||||||
"""LLM-based analysis of parliamentary motions against GWÖ matrix.
|
|
||||||
|
|
||||||
Seit ADR 0008: Die reinen LLM-Calls laufen über den ``LlmBewerter``-Port
|
|
||||||
(``app/ports/llm_bewerter.py``); der Default-Adapter ist ``QwenBewerter``
|
|
||||||
(``app/adapters/qwen_bewerter.py``). Citation-Binding, Missing-Programme-
|
|
||||||
Check und Pydantic-Validierung bleiben hier in der Application-Schicht.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import hashlib
|
|
||||||
import json
|
|
||||||
import logging
|
|
||||||
import re
|
|
||||||
from pathlib import Path
|
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
from .config import settings
|
|
||||||
from .models import Assessment
|
|
||||||
from .bundeslaender import BUNDESLAENDER
|
|
||||||
from .wahlprogramm_check import check_missing_programmes
|
|
||||||
from .ports.llm_bewerter import LlmBewerter, LlmRequest
|
|
||||||
from .wahlprogramme import (
|
|
||||||
find_relevant_quotes,
|
|
||||||
format_quote_for_prompt,
|
|
||||||
WAHLPROGRAMM_KONTEXT_FILES,
|
|
||||||
)
|
|
||||||
from .embeddings import (
|
|
||||||
get_relevant_quotes_for_antrag,
|
|
||||||
format_quotes_for_prompt,
|
|
||||||
reconstruct_zitate,
|
|
||||||
EMBEDDINGS_DB,
|
|
||||||
)
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
def _content_fingerprint(content: str) -> str:
|
|
||||||
"""Cheap, log-safe identifier for an LLM response: length + first 8 chars
|
|
||||||
of SHA-1. Lets us correlate retries without ever leaking the LLM's
|
|
||||||
actual output (which may contain sensitive Antrags-Inhalte). Issue
|
|
||||||
#57 Befund #4.
|
|
||||||
|
|
||||||
Wird nach ADR 0008 nur noch für post-LLM-Diagnostik (Pydantic-Validation)
|
|
||||||
gebraucht; der LLM-Retry-Loop selbst loggt in ``QwenBewerter``.
|
|
||||||
"""
|
|
||||||
if not content:
|
|
||||||
return "len=0"
|
|
||||||
h = hashlib.sha1(content.encode("utf-8", errors="replace")).hexdigest()[:8]
|
|
||||||
return f"len={len(content)} sha1={h}"
|
|
||||||
|
|
||||||
|
|
||||||
def get_default_bewerter() -> LlmBewerter:
|
|
||||||
"""Lazy-Instanziierung des Default-Adapters.
|
|
||||||
|
|
||||||
Der Adapter-Import ist lazy, damit Tests ohne ``openai``-Paket und ohne
|
|
||||||
DashScope-Credentials laufen (das Stubbing in ``conftest.py`` reicht,
|
|
||||||
solange niemand Top-Level importiert).
|
|
||||||
"""
|
|
||||||
from .adapters.qwen_bewerter import QwenBewerter
|
|
||||||
|
|
||||||
return QwenBewerter()
|
|
||||||
|
|
||||||
# Load context files
|
|
||||||
KONTEXT_DIR = Path(__file__).parent / "kontext"
|
|
||||||
|
|
||||||
|
|
||||||
def load_context_file(name: str) -> str:
|
|
||||||
"""Load a context file from the kontext directory."""
|
|
||||||
path = KONTEXT_DIR / name
|
|
||||||
if path.exists():
|
|
||||||
return path.read_text()
|
|
||||||
return ""
|
|
||||||
|
|
||||||
|
|
||||||
USER_PROMPT_TEMPLATE = """Analysiere den folgenden Antrag:
|
|
||||||
|
|
||||||
<kontext>
|
|
||||||
{bundesland_context}
|
|
||||||
</kontext>
|
|
||||||
|
|
||||||
<wahlprogramm_zitate>
|
|
||||||
{quotes_context}
|
|
||||||
</wahlprogramm_zitate>
|
|
||||||
|
|
||||||
<antrag>
|
|
||||||
{text}
|
|
||||||
</antrag>
|
|
||||||
|
|
||||||
**PFLICHT-FRAKTIONEN:** Du MUSST ALLE folgenden Fraktionen der aktuellen Wahlperiode in `wahlprogrammScores` bewerten — keine auslassen:
|
|
||||||
{pflicht_fraktionen}
|
|
||||||
|
|
||||||
Bewerte nach GWÖ-Matrix 2.0 für Gemeinden:
|
|
||||||
1. GWÖ-Treue (0-10) mit Matrix-Zuordnung und Symbolen (++/+/○/−/−−)
|
|
||||||
2. Wahlprogrammtreue JEDER der oben genannten Pflicht-Fraktionen (0-10)
|
|
||||||
3. Parteiprogrammtreue JEDER der oben genannten Pflicht-Fraktionen (0-10)
|
|
||||||
4. Bis zu 3 Verbesserungsvorschläge in Redline-Syntax
|
|
||||||
5. Themen-Tags für Kategorisierung
|
|
||||||
|
|
||||||
**ZITATEREGEL — STRIKT:** In jedem ``wahlprogrammScores[].wahlprogramm.zitate[].quelle``
|
|
||||||
und ``parteiprogrammScores[].parteiprogramm.zitate[].quelle`` musst du **wortgleich**
|
|
||||||
einen der oben in ``<wahlprogramm_zitate>`` aufgelisteten Quellen-Labels (Programm-Name +
|
|
||||||
Seite) übernehmen — z.B. ``"CDU Mecklenburg-Vorpommern Wahlprogramm 2021, S. 33"``.
|
|
||||||
Erfinde keine Quellen aus deinem Trainingswissen. Nimm keine Quelle aus einem anderen
|
|
||||||
Bundesland (z.B. NRW 2022) als die hier aufgelisteten — selbst wenn dir die dortigen
|
|
||||||
Programme bekannter sind. Findest du oben für eine Partei keinen passenden Chunk, lass
|
|
||||||
``zitate`` leer (``[]``) und vermerke das in der ``begruendung``.
|
|
||||||
|
|
||||||
Ausgabe als reines JSON ohne Markdown-Codeblöcke."""
|
|
||||||
|
|
||||||
|
|
||||||
def get_user_prompt_template() -> str:
|
|
||||||
"""Public Template-String fuer Transparenz-Seite (#145).
|
|
||||||
|
|
||||||
Enthaelt die Platzhalter ``{bundesland_context}``, ``{quotes_context}``,
|
|
||||||
``{text}`` und ``{pflicht_fraktionen}`` — gerendert wird in
|
|
||||||
``analyze_text`` direkt via ``.format(...)``.
|
|
||||||
"""
|
|
||||||
return USER_PROMPT_TEMPLATE
|
|
||||||
|
|
||||||
|
|
||||||
def get_system_prompt() -> str:
|
|
||||||
"""Build the system prompt with GWÖ matrix context."""
|
|
||||||
return """Du bist ein Experte für Gemeinwohl-Ökonomie (GWÖ) und parlamentarische Analyse. Du bewertest Anträge aus Landesparlamenten systematisch nach drei Dimensionen:
|
|
||||||
|
|
||||||
1. **GWÖ-Treue** (0-10): Übereinstimmung mit der GWÖ-Matrix 2.0 für Gemeinden
|
|
||||||
2. **Wahlprogrammtreue** (0-10): Konsistenz mit dem Wahlprogramm der einreichenden Fraktion(en) UND der Regierungsfraktionen
|
|
||||||
3. **Parteiprogrammtreue** (0-10): Konsistenz mit dem Grundsatzprogramm der einreichenden Fraktion(en) UND der Regierungsfraktionen
|
|
||||||
|
|
||||||
## GWÖ-Matrix 2.0 für Gemeinden
|
|
||||||
|
|
||||||
Die Matrix besteht aus 5 Berührungsgruppen × 5 Werte = 25 Themenfelder.
|
|
||||||
|
|
||||||
### Die fünf Werte (Spalten) mit Staatsprinzipien
|
|
||||||
|
|
||||||
| Nr | Wert | Staatsprinzip | Kernfragen |
|
|
||||||
|----|------|---------------|------------|
|
|
||||||
| 1 | **Menschenwürde** | Rechtsstaatsprinzip | Werden Grundrechte geschützt? Rechtliche Gleichstellung? |
|
|
||||||
| 2 | **Solidarität** | Gemeinnutz | Wird das Gemeinwohl gefördert? Mehrwert für die Gemeinschaft? |
|
|
||||||
| 3 | **Ökologische Nachhaltigkeit** | Umwelt-Verantwortung | Klimaschutz? Ressourcenschonung? Biodiversität? |
|
|
||||||
| 4 | **Soziale Gerechtigkeit** | Sozialstaatsprinzip | Gerechte Verteilung? Daseinsvorsorge? Soziale Absicherung? |
|
|
||||||
| 5 | **Transparenz & Mitbestimmung** | Demokratie | Bürgerbeteiligung? Offenlegung? Demokratische Prozesse? |
|
|
||||||
|
|
||||||
### Die fünf Berührungsgruppen (Zeilen)
|
|
||||||
|
|
||||||
| Code | Gruppe | Beschreibung |
|
|
||||||
|------|--------|-------------|
|
|
||||||
| **A** | Ausgelagerte Betriebe, Lieferant:innen, Dienstleister:innen | Externe Beschaffung, Lieferketten |
|
|
||||||
| **B** | Finanzpartner:innen, Geldgeber:innen, Steuerzahler:innen | Umgang mit öffentlichen Mitteln, Haushalt |
|
|
||||||
| **C** | Politische Führung, Verwaltung, Ehrenamtliche | Mandatsträger:innen, Mitarbeitende |
|
|
||||||
| **D** | Bürger:innen und Wirtschaft | Wirkung innerhalb der Grenzen, Daseinsvorsorge |
|
|
||||||
| **E** | Staat, Gesellschaft und Natur | Wirkung über die Grenzen hinaus, Zukunft |
|
|
||||||
|
|
||||||
### Matrix-Feldwertung (Skala -5 bis +5)
|
|
||||||
|
|
||||||
| Symbol | Rating | Bedeutung |
|
|
||||||
|--------|--------|-----------|
|
|
||||||
| `++` | +4 bis +5 | Stark fördernd, vorbildlich |
|
|
||||||
| `+` | +1 bis +3 | Fördernd |
|
|
||||||
| `○` | 0 | Neutral/nicht berührt |
|
|
||||||
| `−` | -1 bis -3 | Widersprechend |
|
|
||||||
| `−−` | -4 bis -5 | Stark widersprechend, fundamentaler Widerspruch |
|
|
||||||
|
|
||||||
**Skala-Logik:**
|
|
||||||
- **0** = Antrag berührt dieses Feld nicht
|
|
||||||
- **+1 bis +5** = Stärke der Übereinstimmung mit GWÖ-Werten
|
|
||||||
- **-1 bis -5** = Stärke des Widerspruchs zu GWÖ-Werten
|
|
||||||
|
|
||||||
### Empfehlungs-Kategorien
|
|
||||||
|
|
||||||
| Empfehlung | Kriterium |
|
|
||||||
|------------|-----------|
|
|
||||||
| **Uneingeschränkt unterstützen** | GWÖ 8-10, keine gravierenden Schwächen |
|
|
||||||
| **Unterstützen mit Änderungen** | GWÖ 5-7, Verbesserungspotenzial vorhanden |
|
|
||||||
| **Überarbeiten** | GWÖ 3-4, grundlegende Probleme |
|
|
||||||
| **Ablehnen** | GWÖ 0-2, fundamentaler Widerspruch zu GWÖ-Werten |
|
|
||||||
|
|
||||||
## Ausgabeformat
|
|
||||||
|
|
||||||
Antworte NUR mit einem JSON-Objekt im folgenden Format (keine Markdown-Codeblöcke):
|
|
||||||
|
|
||||||
{
|
|
||||||
"drucksache": "Drucksachennummer falls bekannt, sonst 'unbekannt'",
|
|
||||||
"title": "Titel des Antrags",
|
|
||||||
"fraktionen": ["Fraktion1"],
|
|
||||||
"datum": "YYYY-MM-DD oder unbekannt",
|
|
||||||
"link": null,
|
|
||||||
"gwoeScore": 0-10,
|
|
||||||
"gwoeBegründung": "3-4 Sätze mit Bezug zu konkreten Themenfeldern",
|
|
||||||
"gwoeMatrix": [
|
|
||||||
{ "field": "D4", "label": "Soziale öffentliche Leistung", "aspect": "Konkreter Bezug", "rating": 2, "symbol": "+" }
|
|
||||||
],
|
|
||||||
"gwoeSchwerpunkt": ["D4", "D1"],
|
|
||||||
"wahlprogrammScores": [
|
|
||||||
{
|
|
||||||
"fraktion": "SPD",
|
|
||||||
"istAntragsteller": true,
|
|
||||||
"wahlprogramm": {
|
|
||||||
"score": 9,
|
|
||||||
"begründung": "...",
|
|
||||||
"zitate": [
|
|
||||||
{
|
|
||||||
"text": "Exaktes Zitat aus Wahlprogramm",
|
|
||||||
"quelle": "SPD NRW Wahlprogramm 2022, S. 47",
|
|
||||||
"url": "/static/referenzen/spd-nrw-2022.pdf#page=47"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"parteiprogramm": { "score": 8, "begründung": "..." }
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"verbesserungen": [
|
|
||||||
{
|
|
||||||
"original": "Originaltext aus dem Antrag",
|
|
||||||
"vorschlag": "Verbesserter Text mit **Ergänzungen** und ~~Streichungen~~",
|
|
||||||
"begruendung": "Bezug zu GWÖ-Themenfeld"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"stärken": ["Punkt 1", "Punkt 2"],
|
|
||||||
"schwächen": ["Punkt 1"],
|
|
||||||
"empfehlung": "Ablehnen | Überarbeiten | Unterstützen mit Änderungen | Uneingeschränkt unterstützen",
|
|
||||||
"empfehlungSymbol": "[X] | [!] | [+] | [++]",
|
|
||||||
"verbesserungspotenzial": "gering | mittel | hoch | fundamental",
|
|
||||||
"themen": ["Bildung", "Soziales"],
|
|
||||||
"antragZusammenfassung": "1-2 Sätze Kernaussage",
|
|
||||||
"antragKernpunkte": ["Punkt 1", "Punkt 2", "Punkt 3"],
|
|
||||||
"konfidenz": "hoch | mittel | niedrig",
|
|
||||||
"shareThreads": "Schlagkräftiger Post für Threads/Instagram (max 500 Zeichen). Emoji, Engagement, CTA, konkret auf den Antrag bezogen. Hashtags: #Gemeinwohl #GWÖ + 2-3 thematische.",
|
|
||||||
"shareTwitter": "Prägnanter Tweet für X/Twitter (max 280 Zeichen). Knackig, pointiert, mit Emoji und 2 Hashtags.",
|
|
||||||
"shareMastodon": "Sachlicher aber ansprechender Post für Mastodon (max 500 Zeichen). Informativ, quellenbasiert, mit Kontext."
|
|
||||||
}
|
|
||||||
|
|
||||||
## Wichtige Regeln
|
|
||||||
|
|
||||||
- **Verbesserungsvorschläge**: Maximal 3! Fokussiere auf die wirkungsvollsten Änderungen, die den GWÖ-Score am meisten verbessern würden.
|
|
||||||
- **Zitate**: Jedes Zitat MUSS auf einen `[Qn]`-Chunk aus dem mitgelieferten Kontext verweisen und den `text`-String **wörtlich** (mind. 5 zusammenhängende Wörter) aus genau diesem Chunk übernehmen. Kein Paraphrasieren, kein Cross-Referencing aus dem Trainingswissen. Wenn kein Chunk passt: lass `zitate` leer — lieber 0 Zitate als ein erfundenes. Die ausführliche ZITATEREGEL steht im wahlprogramm_zitate-Block.
|
|
||||||
- **Matrix-Bewertung**: Bewerte nur Felder, die der Antrag tatsächlich berührt. Nicht jeder Antrag betrifft alle 25 Felder.
|
|
||||||
- **Gesamtscore-Berechnung**: Der gwoeScore (0-10) berücksichtigt die Matrix-Bewertungen:
|
|
||||||
- Wenn EIN Feld -4 oder -5 hat → Gesamtscore maximal 3/10
|
|
||||||
- Wenn EIN Feld -3 hat → Gesamtscore maximal 4/10
|
|
||||||
- Bei "Ablehnen" → Score 0-2/10
|
|
||||||
- Bei "Uneingeschränkt unterstützen" → Score 8-10/10
|
|
||||||
- **Matrix-Felder**: Bewertung -5 bis +5 (Symbole: −− / − / ○ / + / ++)
|
|
||||||
- **Konfidenz**: Selbsteinschätzung der Bewertungssicherheit:
|
|
||||||
- "hoch": Antrag ist eindeutig, GWÖ-Bezug klar, genügend Kontext
|
|
||||||
- "mittel": Antrag ist mehrdeutig oder berührt Nischenthemen
|
|
||||||
- "niedrig": Antrag ist sehr kurz, unklar oder fachfremd — Bewertung unsicher"""
|
|
||||||
|
|
||||||
|
|
||||||
def get_bundesland_context(bundesland: str) -> str:
|
|
||||||
"""Build the LLM context block for a specific state.
|
|
||||||
|
|
||||||
Liest Regierungsfraktionen und Parlamentsname aus ``BUNDESLAENDER`` und
|
|
||||||
die optionale Wahlprogramm-Übersichtsdatei aus ``WAHLPROGRAMM_KONTEXT_FILES``.
|
|
||||||
Federal-level Grundsatzprogramme (parteiprogramme.md) sind bundesländer-
|
|
||||||
übergreifend.
|
|
||||||
|
|
||||||
Raises:
|
|
||||||
ValueError: bei unbekanntem oder inaktivem Bundesland. Pre-#5
|
|
||||||
existierte hier ein silent fallback auf NRW — bewusst entfernt,
|
|
||||||
damit Konfigurationslücken früh sichtbar werden.
|
|
||||||
"""
|
|
||||||
bl = BUNDESLAENDER.get(bundesland)
|
|
||||||
if bl is None:
|
|
||||||
raise ValueError(f"Unbekanntes Bundesland: {bundesland}")
|
|
||||||
if not bl.aktiv:
|
|
||||||
raise ValueError(
|
|
||||||
f"Bundesland {bundesland} ist nicht aktiv (siehe bundeslaender.py)"
|
|
||||||
)
|
|
||||||
|
|
||||||
wahlprogramm_kontext_file = WAHLPROGRAMM_KONTEXT_FILES.get(bundesland)
|
|
||||||
wahlprogramme_text = (
|
|
||||||
load_context_file(wahlprogramm_kontext_file) if wahlprogramm_kontext_file else ""
|
|
||||||
)
|
|
||||||
parteiprogramme_text = load_context_file("parteiprogramme.md")
|
|
||||||
|
|
||||||
return f"""
|
|
||||||
## Parlament
|
|
||||||
|
|
||||||
{bl.parlament_name} (Wahlperiode {bl.wahlperiode}, seit {bl.wahlperiode_start})
|
|
||||||
|
|
||||||
## Wahlprogramme {bl.name}
|
|
||||||
|
|
||||||
{wahlprogramme_text or '(keine Übersichtsdatei hinterlegt)'}
|
|
||||||
|
|
||||||
## Grundsatzprogramme der Parteien
|
|
||||||
|
|
||||||
{parteiprogramme_text}
|
|
||||||
|
|
||||||
## Regierungsfraktionen in {bl.name}
|
|
||||||
|
|
||||||
{', '.join(bl.regierungsfraktionen)}
|
|
||||||
|
|
||||||
## Im Landtag vertretene Fraktionen
|
|
||||||
|
|
||||||
{', '.join(bl.landtagsfraktionen)}
|
|
||||||
|
|
||||||
Bei Oppositionsanträgen: Bewerte zusätzlich, ob die Regierungsfraktionen zustimmen würden.
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
async def analyze_antrag(
|
|
||||||
text: str,
|
|
||||||
bundesland: str = "NRW",
|
|
||||||
model: str = "qwen-plus",
|
|
||||||
bewerter: Optional[LlmBewerter] = None,
|
|
||||||
) -> Assessment:
|
|
||||||
"""Analyze a parliamentary motion using the LLM.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
text: Antrag-Volltext (plain).
|
|
||||||
bundesland: BL-Code aus ``bundeslaender.py``.
|
|
||||||
model: LLM-Modell (wird vom Default-Adapter ``QwenBewerter``
|
|
||||||
akzeptiert; andere Adapter können eigene Modell-Namen nutzen).
|
|
||||||
bewerter: ``LlmBewerter``-Implementierung. Default: ``QwenBewerter``
|
|
||||||
(DashScope/Qwen). Tests reichen hier ``FakeLlmBewerter``.
|
|
||||||
|
|
||||||
Nach ADR 0008: der HTTP-Call samt Retry-Loop lebt im Adapter; hier
|
|
||||||
bleibt nur noch die Application-Logik (Prompt-Komposition, Semantic-
|
|
||||||
Search, Citation-Binding, Missing-Programme-Check, Pydantic-Validation
|
|
||||||
und Domain-Invarianten-Warnings).
|
|
||||||
"""
|
|
||||||
|
|
||||||
if bewerter is None:
|
|
||||||
bewerter = get_default_bewerter()
|
|
||||||
|
|
||||||
system_prompt = get_system_prompt()
|
|
||||||
bundesland_context = get_bundesland_context(bundesland)
|
|
||||||
|
|
||||||
# Extrahiere Fraktionen aus Text (einfache Heuristik): Welche der im
|
|
||||||
# Landtag vertretenen Parteien werden im Antrag genannt? Quelle ist
|
|
||||||
# BUNDESLAENDER.landtagsfraktionen — nicht WAHLPROGRAMME, weil wir
|
|
||||||
# auch Fraktionen erkennen wollen, für die wir (noch) kein Wahlprogramm
|
|
||||||
# hinterlegt haben.
|
|
||||||
landtagsfraktionen = BUNDESLAENDER[bundesland].landtagsfraktionen
|
|
||||||
text_lower = text.lower()
|
|
||||||
fraktionen = [
|
|
||||||
partei for partei in landtagsfraktionen
|
|
||||||
if partei in text or partei.lower() in text_lower
|
|
||||||
]
|
|
||||||
|
|
||||||
# Suche relevante Zitate via semantische Suche (Embeddings)
|
|
||||||
quotes_context = ""
|
|
||||||
semantic_quotes: dict = {}
|
|
||||||
if EMBEDDINGS_DB.exists():
|
|
||||||
try:
|
|
||||||
semantic_quotes = get_relevant_quotes_for_antrag(
|
|
||||||
text, fraktionen, bundesland=bundesland, top_k_per_partei=5,
|
|
||||||
)
|
|
||||||
quotes_context = format_quotes_for_prompt(
|
|
||||||
semantic_quotes, searched_parties=fraktionen,
|
|
||||||
)
|
|
||||||
except (NameError, AttributeError, TypeError, KeyError):
|
|
||||||
# Programmierfehler (z.B. der partei_upper-Refactor-Rest aus
|
|
||||||
# #55/eb045d0, der zu Issue #60 führte) sollen hart fehlschlagen
|
|
||||||
# statt still auf den schwächeren Keyword-Pfad zurückzufallen.
|
|
||||||
raise
|
|
||||||
except Exception:
|
|
||||||
logger.exception("Semantic search failed, falling back to keyword search")
|
|
||||||
quotes = find_relevant_quotes(text, fraktionen, bundesland=bundesland)
|
|
||||||
quotes_context = format_quote_for_prompt(quotes)
|
|
||||||
else:
|
|
||||||
# Fallback to keyword search
|
|
||||||
quotes = find_relevant_quotes(text, fraktionen, bundesland=bundesland)
|
|
||||||
quotes_context = format_quote_for_prompt(quotes)
|
|
||||||
|
|
||||||
user_prompt = USER_PROMPT_TEMPLATE.format(
|
|
||||||
bundesland_context=bundesland_context,
|
|
||||||
quotes_context=quotes_context if quotes_context else "Keine relevanten Zitate gefunden.",
|
|
||||||
text=text,
|
|
||||||
pflicht_fraktionen=", ".join(BUNDESLAENDER[bundesland].landtagsfraktionen),
|
|
||||||
)
|
|
||||||
|
|
||||||
# LLM-Call über den Port. Retry-Loop + Markdown-Stripping wohnen im
|
|
||||||
# Adapter (``QwenBewerter``). Bei exhausted retries wirft er
|
|
||||||
# json.JSONDecodeError — wir lassen das durchpropagieren wie vor der
|
|
||||||
# Migration.
|
|
||||||
request = LlmRequest(
|
|
||||||
system_prompt=system_prompt,
|
|
||||||
user_prompt=user_prompt,
|
|
||||||
model=model,
|
|
||||||
)
|
|
||||||
data = await bewerter.bewerte(request)
|
|
||||||
|
|
||||||
# Issue #60 Option B — server-side reconstruction of citation quelle/url
|
|
||||||
# from the actually retrieved chunks, before Pydantic validation. Der LLM
|
|
||||||
# ist nicht mehr Quelle für die Quellen-Labels; wir ersetzen sie durch
|
|
||||||
# das kanonische _chunk_source_label und droppen Zitate ohne Chunk-Match.
|
|
||||||
if semantic_quotes:
|
|
||||||
data = reconstruct_zitate(data, semantic_quotes)
|
|
||||||
|
|
||||||
# #128: Fehlende Wahlprogramme server-seitig erkennen und eintragen. Der
|
|
||||||
# LLM bekommt diese Information nicht — sie basiert auf der lokalen
|
|
||||||
# Registry, nicht auf dem LLM-Wissen.
|
|
||||||
missing = check_missing_programmes(bundesland, landtagsfraktionen)
|
|
||||||
if missing:
|
|
||||||
logger.warning(
|
|
||||||
"Fehlende Wahlprogramme für %s in %s: %s",
|
|
||||||
landtagsfraktionen, bundesland, missing,
|
|
||||||
)
|
|
||||||
data["fehlendeProgramme"] = missing
|
|
||||||
|
|
||||||
# Pydantic-Validation: harter Check auf Schema-Drift.
|
|
||||||
assessment = Assessment.model_validate(data)
|
|
||||||
|
|
||||||
# Tag-4-Invarianten-Warnings (ADR 0008): Verstöße gegen das Score-Cap
|
|
||||||
# werden geloggt, aber nicht geworfen — das LLM soll lernen, nicht der
|
|
||||||
# Produktivbetrieb brechen.
|
|
||||||
if assessment.verletzt_score_cap():
|
|
||||||
logger.warning(
|
|
||||||
"Assessment %s verletzt Score-Cap: gwoe_score=%.1f bei "
|
|
||||||
"fundamental-kritischem Matrix-Feld (rating≤-4)",
|
|
||||||
assessment.drucksache, assessment.gwoe_score,
|
|
||||||
)
|
|
||||||
|
|
||||||
return assessment
|
|
||||||
@ -1,244 +0,0 @@
|
|||||||
"""Aggregations-Funktionen für die Auswertungen-Seite (#58).
|
|
||||||
|
|
||||||
Liest direkt aus ``data/gwoe-antraege.db`` (assessments-Tabelle) und baut
|
|
||||||
drei Sichten:
|
|
||||||
|
|
||||||
1. ``aggregate_matrix(filter_wp=None)`` — 2D-Matrix Bundesland × Partei
|
|
||||||
mit (n, Ø-GWÖ-Score). Filterbar nach Wahlperiode.
|
|
||||||
2. ``aggregate_zeitreihe(bundesland, partei)`` — Score-Verlauf einer
|
|
||||||
(BL, Partei)-Kombination über alle bekannten WPs.
|
|
||||||
3. ``export_long_format()`` — Long-Format-Tabelle für CSV-Export
|
|
||||||
(deckt zusätzlich Issue #45 ab).
|
|
||||||
|
|
||||||
Partei-Auflösung läuft strikt über ``app.parteien.normalize_partei`` —
|
|
||||||
ohne den Mapper aus #55 würde z.B. BB-FW mit RP-FW in einen Topf
|
|
||||||
gerührt.
|
|
||||||
"""
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import csv
|
|
||||||
import io
|
|
||||||
import json
|
|
||||||
import sqlite3
|
|
||||||
from collections import defaultdict
|
|
||||||
from pathlib import Path
|
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
from .config import settings
|
|
||||||
from .parteien import normalize_partei
|
|
||||||
from .wahlperioden import wahlperiode_for
|
|
||||||
|
|
||||||
|
|
||||||
# ─────────────────────────────────────────────────────────────────────────────
|
|
||||||
# Datenstrukturen
|
|
||||||
# ─────────────────────────────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
|
|
||||||
def _load_assessments(db_path: Optional[Path] = None) -> list[dict]:
|
|
||||||
"""Lese alle Assessments aus der SQLite-DB. Kein Filter — die
|
|
||||||
Aggregations-Funktionen filtern selbst. Kein async, weil die
|
|
||||||
Sicht synchron berechnet werden kann."""
|
|
||||||
path = db_path or settings.db_path
|
|
||||||
if not Path(path).exists():
|
|
||||||
return []
|
|
||||||
conn = sqlite3.connect(str(path))
|
|
||||||
try:
|
|
||||||
conn.row_factory = sqlite3.Row
|
|
||||||
rows = conn.execute(
|
|
||||||
"""
|
|
||||||
SELECT drucksache, bundesland, datum, fraktionen, gwoe_score
|
|
||||||
FROM assessments
|
|
||||||
WHERE gwoe_score IS NOT NULL
|
|
||||||
"""
|
|
||||||
).fetchall()
|
|
||||||
finally:
|
|
||||||
conn.close()
|
|
||||||
|
|
||||||
out: list[dict] = []
|
|
||||||
for r in rows:
|
|
||||||
try:
|
|
||||||
fraktionen = json.loads(r["fraktionen"]) if r["fraktionen"] else []
|
|
||||||
except (json.JSONDecodeError, TypeError):
|
|
||||||
fraktionen = []
|
|
||||||
out.append({
|
|
||||||
"drucksache": r["drucksache"],
|
|
||||||
"bundesland": r["bundesland"],
|
|
||||||
"datum": r["datum"] or "",
|
|
||||||
"fraktionen": fraktionen,
|
|
||||||
"gwoe_score": r["gwoe_score"],
|
|
||||||
})
|
|
||||||
return out
|
|
||||||
|
|
||||||
|
|
||||||
# ─────────────────────────────────────────────────────────────────────────────
|
|
||||||
# 1. Matrix Bundesland × Partei
|
|
||||||
# ─────────────────────────────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
|
|
||||||
def aggregate_matrix(
|
|
||||||
filter_wp: Optional[str] = None,
|
|
||||||
filter_bl: Optional[str] = None,
|
|
||||||
db_path: Optional[Path] = None,
|
|
||||||
) -> dict:
|
|
||||||
"""Aggregate assessments to a 2D matrix.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
``{
|
|
||||||
"bundeslaender": [...],
|
|
||||||
"parteien": [...],
|
|
||||||
"cells": {
|
|
||||||
"<bl>": {"<partei>": {"n": int, "avg": float}}
|
|
||||||
},
|
|
||||||
"filter_wp": <filter_wp> | None,
|
|
||||||
"filter_bl": <filter_bl> | None,
|
|
||||||
"total": int,
|
|
||||||
}``
|
|
||||||
|
|
||||||
``filter_wp`` ist eine ``"<BL>-WP<n>"``-Kennung wie ``"NRW-WP18"``;
|
|
||||||
nur Assessments dieser Wahlperiode fließen ein. ``None`` = keine
|
|
||||||
WP-Einschränkung (alle WPs zusammen).
|
|
||||||
|
|
||||||
``filter_bl`` schränkt auf ein Bundesland ein (z.B. ``"NRW"``);
|
|
||||||
``None`` = alle Bundesländer.
|
|
||||||
"""
|
|
||||||
rows = _load_assessments(db_path)
|
|
||||||
|
|
||||||
bundeslaender: set[str] = set()
|
|
||||||
parteien: set[str] = set()
|
|
||||||
sums: defaultdict[tuple[str, str], float] = defaultdict(float)
|
|
||||||
counts: defaultdict[tuple[str, str], int] = defaultdict(int)
|
|
||||||
total = 0
|
|
||||||
|
|
||||||
for row in rows:
|
|
||||||
bl = row["bundesland"]
|
|
||||||
if not bl:
|
|
||||||
continue
|
|
||||||
if filter_bl is not None and bl != filter_bl:
|
|
||||||
continue
|
|
||||||
if filter_wp is not None:
|
|
||||||
wp = wahlperiode_for(row["datum"], bl)
|
|
||||||
if wp != filter_wp:
|
|
||||||
continue
|
|
||||||
bundeslaender.add(bl)
|
|
||||||
for raw_partei in row["fraktionen"]:
|
|
||||||
canonical = normalize_partei(raw_partei, bundesland=bl) or raw_partei
|
|
||||||
parteien.add(canonical)
|
|
||||||
key = (bl, canonical)
|
|
||||||
sums[key] += row["gwoe_score"]
|
|
||||||
counts[key] += 1
|
|
||||||
total += 1
|
|
||||||
|
|
||||||
cells: dict[str, dict[str, dict]] = {}
|
|
||||||
for (bl, partei), s in sums.items():
|
|
||||||
n = counts[(bl, partei)]
|
|
||||||
cells.setdefault(bl, {})[partei] = {
|
|
||||||
"n": n,
|
|
||||||
"avg": round(s / n, 2) if n else None,
|
|
||||||
}
|
|
||||||
|
|
||||||
return {
|
|
||||||
"bundeslaender": sorted(bundeslaender),
|
|
||||||
"parteien": sorted(parteien),
|
|
||||||
"cells": cells,
|
|
||||||
"filter_wp": filter_wp,
|
|
||||||
"filter_bl": filter_bl,
|
|
||||||
"total": total,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
# ─────────────────────────────────────────────────────────────────────────────
|
|
||||||
# 1b. Hilfsfunktion: Liste aller bekannten Wahlperioden
|
|
||||||
# ─────────────────────────────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
|
|
||||||
def get_wahlperioden(db_path: Optional[Path] = None) -> list[str]:
|
|
||||||
"""Gibt alle bekannten Wahlperioden aus den vorhandenen Assessments zurück,
|
|
||||||
aufsteigend sortiert."""
|
|
||||||
rows = _load_assessments(db_path)
|
|
||||||
wps: set[str] = set()
|
|
||||||
for r in rows:
|
|
||||||
wp = wahlperiode_for(r["drucksache"], r["bundesland"])
|
|
||||||
if wp:
|
|
||||||
wps.add(wp)
|
|
||||||
return sorted(wps)
|
|
||||||
|
|
||||||
|
|
||||||
# ─────────────────────────────────────────────────────────────────────────────
|
|
||||||
# 2. Zeitreihe pro (BL, Partei) über alle Wahlperioden
|
|
||||||
# ─────────────────────────────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
|
|
||||||
def aggregate_zeitreihe(
|
|
||||||
bundesland: str,
|
|
||||||
partei: str,
|
|
||||||
db_path: Optional[Path] = None,
|
|
||||||
) -> dict:
|
|
||||||
"""Score-Verlauf einer (BL, Partei)-Kombination über alle WPs.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
``{
|
|
||||||
"bundesland": str,
|
|
||||||
"partei": str,
|
|
||||||
"wahlperioden": [
|
|
||||||
{"wp": "<BL>-WP<n>", "n": int, "avg": float},
|
|
||||||
...
|
|
||||||
]
|
|
||||||
}``
|
|
||||||
"""
|
|
||||||
rows = _load_assessments(db_path)
|
|
||||||
sums: defaultdict[str, float] = defaultdict(float)
|
|
||||||
counts: defaultdict[str, int] = defaultdict(int)
|
|
||||||
|
|
||||||
for row in rows:
|
|
||||||
if row["bundesland"] != bundesland:
|
|
||||||
continue
|
|
||||||
canonical_partei_in_row = {
|
|
||||||
normalize_partei(p, bundesland=bundesland) or p
|
|
||||||
for p in row["fraktionen"]
|
|
||||||
}
|
|
||||||
if partei not in canonical_partei_in_row:
|
|
||||||
continue
|
|
||||||
wp = wahlperiode_for(row["datum"], bundesland)
|
|
||||||
if wp is None:
|
|
||||||
continue
|
|
||||||
sums[wp] += row["gwoe_score"]
|
|
||||||
counts[wp] += 1
|
|
||||||
|
|
||||||
wps = sorted(sums.keys())
|
|
||||||
return {
|
|
||||||
"bundesland": bundesland,
|
|
||||||
"partei": partei,
|
|
||||||
"wahlperioden": [
|
|
||||||
{"wp": wp, "n": counts[wp], "avg": round(sums[wp] / counts[wp], 2)}
|
|
||||||
for wp in wps
|
|
||||||
],
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
# ─────────────────────────────────────────────────────────────────────────────
|
|
||||||
# 3. Long-Format-Export für CSV (deckt #45 mit ab)
|
|
||||||
# ─────────────────────────────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
|
|
||||||
def export_long_format(db_path: Optional[Path] = None) -> str:
|
|
||||||
"""Long-Format-CSV-Export aller Assessments für externe Auswertung.
|
|
||||||
|
|
||||||
Spalten: ``drucksache,bundesland,wahlperiode,datum,partei,gwoe_score``.
|
|
||||||
Eine Zeile pro (drucksache, partei) — wenn ein Antrag mehrere
|
|
||||||
Fraktionen hat (Koalitionsanträge), erscheinen entsprechend mehrere
|
|
||||||
Zeilen mit identischer Drucksache.
|
|
||||||
"""
|
|
||||||
rows = _load_assessments(db_path)
|
|
||||||
buf = io.StringIO()
|
|
||||||
writer = csv.writer(buf, dialect="excel")
|
|
||||||
writer.writerow(["drucksache", "bundesland", "wahlperiode", "datum", "partei", "gwoe_score"])
|
|
||||||
for r in rows:
|
|
||||||
bl = r["bundesland"] or ""
|
|
||||||
wp = wahlperiode_for(r["datum"], bl) if bl else ""
|
|
||||||
for raw_partei in r["fraktionen"]:
|
|
||||||
canonical = normalize_partei(raw_partei, bundesland=bl) or raw_partei
|
|
||||||
writer.writerow([
|
|
||||||
r["drucksache"], bl, wp or "", r["datum"], canonical,
|
|
||||||
f"{r['gwoe_score']:.2f}",
|
|
||||||
])
|
|
||||||
return buf.getvalue()
|
|
||||||
300
app/auth.py
300
app/auth.py
@ -1,300 +0,0 @@
|
|||||||
"""Keycloak JWT Authentication for FastAPI (#43).
|
|
||||||
|
|
||||||
Read-Only-Endpoints (GET) bleiben offen. Write-Endpoints (POST) erfordern
|
|
||||||
ein gültiges Keycloak-JWT. Das Modul cached den JWKS (public keys) für
|
|
||||||
1 Stunde und validiert Token-Signatur + Expiry + Audience + Issuer.
|
|
||||||
|
|
||||||
Wenn Keycloak nicht konfiguriert ist (KEYCLOAK_URL leer), ist Auth
|
|
||||||
**deaktiviert** — alle Endpoints sind offen. Das erlaubt lokale
|
|
||||||
Entwicklung ohne Keycloak-Server.
|
|
||||||
|
|
||||||
Usage in main.py:
|
|
||||||
|
|
||||||
from .auth import get_current_user, require_auth
|
|
||||||
|
|
||||||
@app.post("/api/analyze-drucksache")
|
|
||||||
async def analyze(request: Request, user = Depends(require_auth)):
|
|
||||||
... # user ist ein dict mit sub, email, name, roles
|
|
||||||
|
|
||||||
@app.get("/api/auth/me")
|
|
||||||
async def auth_me(user = Depends(get_current_user)):
|
|
||||||
... # user ist None wenn nicht eingeloggt, dict wenn eingeloggt
|
|
||||||
"""
|
|
||||||
|
|
||||||
import logging
|
|
||||||
import time
|
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
import httpx
|
|
||||||
from fastapi import Depends, HTTPException, Request
|
|
||||||
|
|
||||||
from .config import settings
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
# ─────────────────────────────────────────────────────────────────────────────
|
|
||||||
# JWKS Cache — lädt die Public Keys vom Keycloak-Server, cached für 1h.
|
|
||||||
# ─────────────────────────────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
_jwks_cache: dict = {}
|
|
||||||
_jwks_cache_time: float = 0
|
|
||||||
_JWKS_CACHE_TTL = 3600 # 1h
|
|
||||||
|
|
||||||
|
|
||||||
def _keycloak_issuer() -> str:
|
|
||||||
return f"{settings.keycloak_url}/realms/{settings.keycloak_realm}"
|
|
||||||
|
|
||||||
|
|
||||||
def _keycloak_jwks_url() -> str:
|
|
||||||
return f"{_keycloak_issuer()}/protocol/openid-connect/certs"
|
|
||||||
|
|
||||||
|
|
||||||
async def _get_jwks() -> dict:
|
|
||||||
"""Fetch or return cached JWKS from Keycloak."""
|
|
||||||
global _jwks_cache, _jwks_cache_time
|
|
||||||
|
|
||||||
if _jwks_cache and (time.time() - _jwks_cache_time) < _JWKS_CACHE_TTL:
|
|
||||||
return _jwks_cache
|
|
||||||
|
|
||||||
url = _keycloak_jwks_url()
|
|
||||||
try:
|
|
||||||
async with httpx.AsyncClient(timeout=10) as client:
|
|
||||||
resp = await client.get(url)
|
|
||||||
if resp.status_code == 200:
|
|
||||||
_jwks_cache = resp.json()
|
|
||||||
_jwks_cache_time = time.time()
|
|
||||||
logger.info("JWKS refreshed from %s (%d keys)", url, len(_jwks_cache.get("keys", [])))
|
|
||||||
return _jwks_cache
|
|
||||||
else:
|
|
||||||
logger.error("JWKS fetch failed: HTTP %s from %s", resp.status_code, url)
|
|
||||||
except Exception:
|
|
||||||
logger.exception("JWKS fetch error from %s", url)
|
|
||||||
|
|
||||||
return _jwks_cache # Return stale cache if refresh fails
|
|
||||||
|
|
||||||
|
|
||||||
def _is_auth_enabled() -> bool:
|
|
||||||
"""Auth ist nur aktiv wenn alle drei Keycloak-Settings gesetzt sind."""
|
|
||||||
return bool(
|
|
||||||
settings.keycloak_url
|
|
||||||
and settings.keycloak_realm
|
|
||||||
and settings.keycloak_client_id
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
# ─────────────────────────────────────────────────────────────────────────────
|
|
||||||
# Token-Extraktion und Validierung
|
|
||||||
# ─────────────────────────────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
|
|
||||||
def _extract_token(request: Request) -> Optional[str]:
|
|
||||||
"""Extrahiere Bearer-Token aus Authorization-Header oder Cookie."""
|
|
||||||
auth = request.headers.get("authorization", "")
|
|
||||||
if auth.startswith("Bearer "):
|
|
||||||
return auth[7:]
|
|
||||||
# Fallback: Cookie (für Browser-Redirects nach Keycloak-Login)
|
|
||||||
return request.cookies.get("access_token")
|
|
||||||
|
|
||||||
|
|
||||||
async def _validate_token(token: str) -> Optional[dict]:
|
|
||||||
"""Validiere JWT gegen Keycloak-JWKS. Returns Payload oder None."""
|
|
||||||
try:
|
|
||||||
from jose import jwt, JWTError, ExpiredSignatureError
|
|
||||||
|
|
||||||
jwks = await _get_jwks()
|
|
||||||
if not jwks or "keys" not in jwks:
|
|
||||||
logger.warning("No JWKS available for token validation")
|
|
||||||
return None
|
|
||||||
|
|
||||||
# Decode Header um den Key-ID (kid) zu finden
|
|
||||||
unverified_header = jwt.get_unverified_header(token)
|
|
||||||
kid = unverified_header.get("kid")
|
|
||||||
|
|
||||||
# Finde den passenden Public Key
|
|
||||||
rsa_key = None
|
|
||||||
for key in jwks.get("keys", []):
|
|
||||||
if key.get("kid") == kid:
|
|
||||||
rsa_key = key
|
|
||||||
break
|
|
||||||
|
|
||||||
if not rsa_key:
|
|
||||||
logger.warning("JWT kid %s not found in JWKS", kid)
|
|
||||||
return None
|
|
||||||
|
|
||||||
# Keycloak setzt aud="account" für Public Clients, nicht den
|
|
||||||
# client_id. Prüfe azp (authorized party) statt aud, und
|
|
||||||
# deaktiviere den strikten aud-Check.
|
|
||||||
payload = jwt.decode(
|
|
||||||
token,
|
|
||||||
rsa_key,
|
|
||||||
algorithms=["RS256"],
|
|
||||||
issuer=_keycloak_issuer(),
|
|
||||||
options={"verify_exp": True, "verify_aud": False},
|
|
||||||
)
|
|
||||||
|
|
||||||
# azp muss unserem Client entsprechen
|
|
||||||
if payload.get("azp") != settings.keycloak_client_id:
|
|
||||||
logger.warning("JWT azp %s != expected %s", payload.get("azp"), settings.keycloak_client_id)
|
|
||||||
return None
|
|
||||||
|
|
||||||
return {
|
|
||||||
"sub": payload.get("sub"),
|
|
||||||
"email": payload.get("email", ""),
|
|
||||||
"name": payload.get("preferred_username", payload.get("name", "")),
|
|
||||||
"roles": payload.get("realm_access", {}).get("roles", []),
|
|
||||||
}
|
|
||||||
|
|
||||||
except ExpiredSignatureError:
|
|
||||||
logger.debug("JWT expired")
|
|
||||||
return None
|
|
||||||
except JWTError as e:
|
|
||||||
logger.debug("JWT validation failed: %s", e)
|
|
||||||
return None
|
|
||||||
except ImportError:
|
|
||||||
logger.error("python-jose not installed — JWT validation disabled")
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
# ─────────────────────────────────────────────────────────────────────────────
|
|
||||||
# FastAPI Dependencies
|
|
||||||
# ─────────────────────────────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
|
|
||||||
async def get_current_user(request: Request) -> Optional[dict]:
|
|
||||||
"""Optionale Auth — gibt User-Dict oder None zurück.
|
|
||||||
|
|
||||||
Für Endpoints die sowohl mit als auch ohne Login funktionieren
|
|
||||||
(z.B. UI-Personalisierung, Bookmark-Anzeige).
|
|
||||||
"""
|
|
||||||
if not _is_auth_enabled():
|
|
||||||
return None
|
|
||||||
|
|
||||||
token = _extract_token(request)
|
|
||||||
if not token:
|
|
||||||
return None
|
|
||||||
|
|
||||||
return await _validate_token(token)
|
|
||||||
|
|
||||||
|
|
||||||
async def require_auth(request: Request) -> dict:
|
|
||||||
"""Pflicht-Auth — gibt User-Dict oder HTTP 401.
|
|
||||||
|
|
||||||
Für Write-Endpoints (POST analyze, index).
|
|
||||||
Wenn Auth nicht konfiguriert ist: ALLE durchlassen (Dev-Modus).
|
|
||||||
"""
|
|
||||||
if not _is_auth_enabled():
|
|
||||||
return {"sub": "anonymous", "email": "", "name": "Dev-Modus", "roles": []}
|
|
||||||
|
|
||||||
token = _extract_token(request)
|
|
||||||
if not token:
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=401,
|
|
||||||
detail="Anmeldung erforderlich",
|
|
||||||
headers={"WWW-Authenticate": "Bearer"},
|
|
||||||
)
|
|
||||||
|
|
||||||
user = await _validate_token(token)
|
|
||||||
if not user:
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=401,
|
|
||||||
detail="Token ungültig oder abgelaufen",
|
|
||||||
headers={"WWW-Authenticate": "Bearer"},
|
|
||||||
)
|
|
||||||
|
|
||||||
return user
|
|
||||||
|
|
||||||
|
|
||||||
async def require_admin(request: Request) -> dict:
|
|
||||||
"""Admin-Auth — gibt User-Dict oder HTTP 403.
|
|
||||||
|
|
||||||
Prüft ob der User die Rolle 'admin' oder 'gwoe-admin' hat.
|
|
||||||
Im Dev-Modus (Auth deaktiviert): durchlassen.
|
|
||||||
Für: Batch-Analyse, Programm-Indexierung, Assessment-Löschung.
|
|
||||||
"""
|
|
||||||
if not _is_auth_enabled():
|
|
||||||
return {"sub": "anonymous", "email": "", "name": "Dev-Modus", "roles": ["admin"]}
|
|
||||||
|
|
||||||
user = await require_auth(request)
|
|
||||||
roles = user.get("roles", [])
|
|
||||||
if "admin" in roles or "gwoe-admin" in roles:
|
|
||||||
return user
|
|
||||||
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=403,
|
|
||||||
detail="Admin-Berechtigung erforderlich",
|
|
||||||
)
|
|
||||||
|
|
||||||
return user
|
|
||||||
|
|
||||||
|
|
||||||
# ─────────────────────────────────────────────────────────────────────────────
|
|
||||||
# Auth-Info-Endpoint
|
|
||||||
# ─────────────────────────────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
|
|
||||||
async def keycloak_admin_token() -> str:
|
|
||||||
"""Holt ein Admin-Token vom Keycloak-Master-Realm.
|
|
||||||
|
|
||||||
Verwendet die Credentials aus den Umgebungsvariablen KEYCLOAK_ADMIN_USER
|
|
||||||
und KEYCLOAK_ADMIN_PASSWORD. Wirft HTTPException bei Fehlschlag.
|
|
||||||
"""
|
|
||||||
import httpx
|
|
||||||
if not settings.keycloak_admin_user or not settings.keycloak_admin_password:
|
|
||||||
raise HTTPException(status_code=500, detail="Keycloak-Admin-Credentials nicht konfiguriert")
|
|
||||||
async with httpx.AsyncClient(timeout=10) as client:
|
|
||||||
resp = await client.post(
|
|
||||||
f"{settings.keycloak_url}/realms/master/protocol/openid-connect/token",
|
|
||||||
data={
|
|
||||||
"grant_type": "password",
|
|
||||||
"client_id": "admin-cli",
|
|
||||||
"username": settings.keycloak_admin_user,
|
|
||||||
"password": settings.keycloak_admin_password,
|
|
||||||
},
|
|
||||||
)
|
|
||||||
if resp.status_code != 200:
|
|
||||||
raise HTTPException(status_code=500, detail="Keycloak-Verbindung fehlgeschlagen")
|
|
||||||
return resp.json()["access_token"]
|
|
||||||
|
|
||||||
|
|
||||||
def keycloak_login_url(redirect_uri: str) -> str:
|
|
||||||
"""Baut die Keycloak-Login-URL für den Browser-Redirect."""
|
|
||||||
if not _is_auth_enabled():
|
|
||||||
return ""
|
|
||||||
from urllib.parse import quote
|
|
||||||
return (
|
|
||||||
f"{_keycloak_issuer()}/protocol/openid-connect/auth"
|
|
||||||
f"?client_id={settings.keycloak_client_id}"
|
|
||||||
f"&redirect_uri={quote(redirect_uri)}"
|
|
||||||
f"&response_type=code"
|
|
||||||
f"&scope=openid profile email"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
async def direct_login(username: str, password: str) -> dict:
|
|
||||||
"""Login via Keycloak Direct Access Grant (#129).
|
|
||||||
|
|
||||||
Gibt bei Erfolg {access_token, refresh_token, expires_in} zurück.
|
|
||||||
Wirft HTTPException bei Fehler (falsche Credentials, Account gesperrt, etc.).
|
|
||||||
"""
|
|
||||||
if not _is_auth_enabled():
|
|
||||||
raise HTTPException(status_code=400, detail="Auth nicht aktiviert")
|
|
||||||
token_url = f"{_keycloak_issuer()}/protocol/openid-connect/token"
|
|
||||||
async with httpx.AsyncClient(timeout=10) as client:
|
|
||||||
resp = await client.post(
|
|
||||||
token_url,
|
|
||||||
data={
|
|
||||||
"grant_type": "password",
|
|
||||||
"client_id": settings.keycloak_client_id,
|
|
||||||
"username": username,
|
|
||||||
"password": password,
|
|
||||||
"scope": "openid profile email",
|
|
||||||
},
|
|
||||||
)
|
|
||||||
if resp.status_code == 401:
|
|
||||||
error = resp.json().get("error_description", "Ungültige Anmeldedaten")
|
|
||||||
raise HTTPException(status_code=401, detail=error)
|
|
||||||
if resp.status_code != 200:
|
|
||||||
error = resp.json().get("error_description", f"Keycloak-Fehler ({resp.status_code})")
|
|
||||||
raise HTTPException(status_code=resp.status_code, detail=error)
|
|
||||||
return resp.json()
|
|
||||||
@ -1,480 +0,0 @@
|
|||||||
"""Zentrale Konfiguration aller 16 deutschen Bundesländer.
|
|
||||||
|
|
||||||
Dieses Modul ist die Single Source of Truth für alle bundeslandspezifischen
|
|
||||||
Daten: Parlamente, Regierungen, Wahlperioden, Doku-Systeme, etc. Andere
|
|
||||||
Module (main.py, parlamente.py, wahlprogramme.py, analyzer.py) lesen
|
|
||||||
ausschließlich von hier.
|
|
||||||
|
|
||||||
Stand: April 2026. Nach jeder Landtagswahl bzw. Regierungsbildung müssen
|
|
||||||
die betroffenen Einträge aktualisiert werden.
|
|
||||||
|
|
||||||
Datenquellen: Wikipedia, offizielle Landtagsseiten, parlamentsspiegel.de,
|
|
||||||
https://github.com/okfde/dokukratie (für Doku-System-Zuordnung).
|
|
||||||
"""
|
|
||||||
|
|
||||||
from dataclasses import dataclass, field
|
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class Bundesland:
|
|
||||||
"""Konfiguration eines deutschen Bundeslands.
|
|
||||||
|
|
||||||
Attributes:
|
|
||||||
code: Übliches Kürzel im politischen Sprachgebrauch (NRW, BY, LSA…).
|
|
||||||
Bei Mehrdeutigkeit ISO-3166-2-DE-Suffix; Sachsen-Anhalt nutzt
|
|
||||||
jedoch das politisch dominante "LSA" statt ISO "ST".
|
|
||||||
name: Vollständiger Landesname.
|
|
||||||
parlament_name: Offizieller Name des Parlaments.
|
|
||||||
wahlperiode: Aktuelle Wahlperiode als Zahl.
|
|
||||||
wahlperiode_start: Beginn der aktuellen WP (ISO-Datum).
|
|
||||||
naechste_wahl: Nächste reguläre Landtagswahl (ISO-Datum), oder None
|
|
||||||
wenn noch nicht festgesetzt.
|
|
||||||
regierungsfraktionen: Parteien der aktuellen Landesregierung in
|
|
||||||
Reihenfolge der Größe.
|
|
||||||
landtagsfraktionen: Alle aktuell im Landtag vertretenen Fraktionen.
|
|
||||||
doku_system: Verwendetes Parlamentsdokumentationssystem.
|
|
||||||
Werte: "OPAL", "StarWeb", "ParlDok", "PARDOK", "PARLIS",
|
|
||||||
"PARiS", "Eigensystem".
|
|
||||||
doku_base_url: Basis-URL der Parlamentsdokumentation.
|
|
||||||
drucksache_format: Beispielhaftes Format einer Drucksachen-ID,
|
|
||||||
z.B. "18/12345" für NRW WP18.
|
|
||||||
dokukratie_scraper: Code-Name des Dokukratie-Scrapers (falls
|
|
||||||
vorhanden), nützlich für künftige Adapter-Implementierung.
|
|
||||||
aktiv: Ob das Bundesland im Frontend auswählbar und im Analyzer
|
|
||||||
unterstützt ist. Inaktive Bundesländer werden im UI als
|
|
||||||
"(bald)" angezeigt und sind disabled.
|
|
||||||
anmerkung: Optionale Hinweise zu Sondersituationen (z.B.
|
|
||||||
Koalitionsverhandlungen, jüngste Wahl, geschätzte Termine).
|
|
||||||
"""
|
|
||||||
|
|
||||||
code: str
|
|
||||||
name: str
|
|
||||||
parlament_name: str
|
|
||||||
wahlperiode: int
|
|
||||||
wahlperiode_start: str
|
|
||||||
naechste_wahl: Optional[str]
|
|
||||||
regierungsfraktionen: list[str]
|
|
||||||
landtagsfraktionen: list[str]
|
|
||||||
doku_system: str
|
|
||||||
doku_base_url: str
|
|
||||||
drucksache_format: str
|
|
||||||
dokukratie_scraper: Optional[str]
|
|
||||||
aktiv: bool = False
|
|
||||||
anmerkung: str = ""
|
|
||||||
|
|
||||||
|
|
||||||
# Hauptregister: code -> Bundesland-Instanz.
|
|
||||||
# Reihenfolge alphabetisch nach offiziellem Namen für stabile UI-Sortierung.
|
|
||||||
# Sonder-Eintrag "BUND" für den Deutschen Bundestag (technisch kein BL,
|
|
||||||
# aber teilt die gesamte Adapter/Analyzer-Pipeline mit den 16 BL).
|
|
||||||
BUNDESLAENDER: dict[str, Bundesland] = {
|
|
||||||
"BUND": Bundesland(
|
|
||||||
code="BUND",
|
|
||||||
name="Deutscher Bundestag",
|
|
||||||
parlament_name="Deutscher Bundestag",
|
|
||||||
wahlperiode=21,
|
|
||||||
wahlperiode_start="2025-03-25", # Konstituierung 21. WP nach BTW 2025
|
|
||||||
naechste_wahl="2029-09-30", # geschätzt
|
|
||||||
regierungsfraktionen=["CDU", "CSU", "SPD"], # Kabinett Merz, schwarz-rot
|
|
||||||
landtagsfraktionen=["CDU", "CSU", "AfD", "SPD", "GRÜNE", "LINKE", "BSW", "FDP"],
|
|
||||||
doku_system="DIP",
|
|
||||||
doku_base_url="https://search.dip.bundestag.de",
|
|
||||||
drucksache_format="21/12345",
|
|
||||||
dokukratie_scraper=None,
|
|
||||||
aktiv=True,
|
|
||||||
anmerkung=(
|
|
||||||
"DIP-API auf search.dip.bundestag.de mit öffentlichem "
|
|
||||||
"API-Key aus dip-config.js und Origin-Header-Locking auf "
|
|
||||||
"https://dip.bundestag.de. ~600 Anträge pro Wahlperiode. "
|
|
||||||
"Kabinett Merz seit Mai 2025 (CDU/CSU+SPD nach BSW-Aus). "
|
|
||||||
"BundestagAdapter implementiert in #56."
|
|
||||||
),
|
|
||||||
),
|
|
||||||
"BW": Bundesland(
|
|
||||||
code="BW",
|
|
||||||
name="Baden-Württemberg",
|
|
||||||
parlament_name="Landtag von Baden-Württemberg",
|
|
||||||
wahlperiode=17,
|
|
||||||
wahlperiode_start="2021-05-01",
|
|
||||||
naechste_wahl="2031-03-08",
|
|
||||||
regierungsfraktionen=["GRÜNE", "CDU"],
|
|
||||||
landtagsfraktionen=["GRÜNE", "CDU", "AfD", "SPD", "FDP"],
|
|
||||||
doku_system="PARLIS",
|
|
||||||
doku_base_url="https://parlis.landtag-bw.de",
|
|
||||||
drucksache_format="17/12345",
|
|
||||||
dokukratie_scraper="bw",
|
|
||||||
aktiv=True,
|
|
||||||
anmerkung=(
|
|
||||||
"PARLIS auf parlis.landtag-bw.de läuft auf demselben "
|
|
||||||
"eUI-Backend wie LSA-PADOKA und BE-PARDOK, aber mit drei "
|
|
||||||
"Unterschieden: minimales lines-Schema (l1/l2/l3/l4), "
|
|
||||||
"asynchrones Polling (initial → search_id → poll → "
|
|
||||||
"report_id) und Hit-Records als JSON-in-HTML-Comments. "
|
|
||||||
"Eigene Adapter-Klasse PARLISAdapter (#29). Wahl zum 18. "
|
|
||||||
"Landtag fand am 08.03.2026 statt; Koalitionsverhandlungen "
|
|
||||||
"GRÜNE+CDU laufen, Kabinett Kretschmann III geschäftsführend. "
|
|
||||||
"Nach Konstituierung des 18. LT ca. Mai 2026 müssen WP und "
|
|
||||||
"Wahltermin aktualisiert werden."
|
|
||||||
),
|
|
||||||
),
|
|
||||||
"BY": Bundesland(
|
|
||||||
code="BY",
|
|
||||||
name="Bayern",
|
|
||||||
parlament_name="Bayerischer Landtag",
|
|
||||||
wahlperiode=19,
|
|
||||||
wahlperiode_start="2023-10-30",
|
|
||||||
naechste_wahl="2028-10-08",
|
|
||||||
regierungsfraktionen=["CSU", "FW"],
|
|
||||||
landtagsfraktionen=["CSU", "GRÜNE", "FW", "AfD", "SPD"],
|
|
||||||
doku_system="Eigensystem",
|
|
||||||
doku_base_url="https://www.bayern.landtag.de",
|
|
||||||
drucksache_format="19/1234",
|
|
||||||
dokukratie_scraper="by",
|
|
||||||
anmerkung="Wahltermin 2028 noch nicht offiziell festgesetzt; Schätzung Herbst 2028.",
|
|
||||||
aktiv=True,
|
|
||||||
),
|
|
||||||
"BE": Bundesland(
|
|
||||||
code="BE",
|
|
||||||
name="Berlin",
|
|
||||||
parlament_name="Abgeordnetenhaus von Berlin",
|
|
||||||
wahlperiode=19,
|
|
||||||
wahlperiode_start="2023-04-27",
|
|
||||||
naechste_wahl="2026-09-20",
|
|
||||||
regierungsfraktionen=["CDU", "SPD"],
|
|
||||||
landtagsfraktionen=["CDU", "SPD", "GRÜNE", "LINKE", "AfD"],
|
|
||||||
doku_system="PARDOK",
|
|
||||||
doku_base_url="https://pardok.parlament-berlin.de",
|
|
||||||
drucksache_format="19/1234",
|
|
||||||
dokukratie_scraper="be",
|
|
||||||
aktiv=True,
|
|
||||||
anmerkung=(
|
|
||||||
"PARDOK = portala/eUI-Framework (gleiche Engine wie LSA-PADOKA, "
|
|
||||||
"unter /portala/ statt /portal/). Hit list arrives as production "
|
|
||||||
"HTML cards instead of LSA-style Perl Data::Dumper blocks — "
|
|
||||||
"PortalaAdapter auto-detects both formats. document_type=None "
|
|
||||||
"for BE because Berlin's ETYPF index uses different value strings "
|
|
||||||
"than LSA. Wahlprogramme zur LTW 2023 sind noch nicht indexiert "
|
|
||||||
"(Folge-Issue) — Analyse läuft daher mit Grundsatzprogramm-"
|
|
||||||
"Zitaten als Fallback. Open-Data-XML unter "
|
|
||||||
"parlament-berlin.de/dokumente/open-data ist eine alternative "
|
|
||||||
"Datenquelle, derzeit nicht verwendet."
|
|
||||||
),
|
|
||||||
),
|
|
||||||
"BB": Bundesland(
|
|
||||||
code="BB",
|
|
||||||
name="Brandenburg",
|
|
||||||
parlament_name="Landtag Brandenburg",
|
|
||||||
wahlperiode=8,
|
|
||||||
# Wahltag (statt Konstituierende Sitzung am 2024-10-23), damit
|
|
||||||
# die Geschäftsordnungs-Drucksachen der konstituierenden Sitzung
|
|
||||||
# in den Plausibilitäts-Check fallen (siehe #61 Bug 4).
|
|
||||||
wahlperiode_start="2024-09-22",
|
|
||||||
naechste_wahl="2029-09-23",
|
|
||||||
regierungsfraktionen=["SPD", "BSW"],
|
|
||||||
landtagsfraktionen=["SPD", "AfD", "CDU", "BSW"],
|
|
||||||
doku_system="portala",
|
|
||||||
doku_base_url="https://www.parlamentsdokumentation.brandenburg.de",
|
|
||||||
drucksache_format="8/1234",
|
|
||||||
dokukratie_scraper="bb",
|
|
||||||
aktiv=True,
|
|
||||||
anmerkung=(
|
|
||||||
"Kabinett Woidke IV (SPD-BSW) seit Dezember 2024. Knappe "
|
|
||||||
"Mehrheit (zwei Sitze). Doku-System ist NICHT StarWeb wie "
|
|
||||||
"ursprünglich klassifiziert (das alte /starweb/LBB/ELVIS/-"
|
|
||||||
"Frontend ist nur Legacy), sondern das moderne portala/eUI-"
|
|
||||||
"Backend auf /portal/browse.tt.json mit db_id=lbb.lissh. "
|
|
||||||
"Wiederverwendet PortalaAdapter aus #2/#3 (#27)."
|
|
||||||
),
|
|
||||||
),
|
|
||||||
"HB": Bundesland(
|
|
||||||
code="HB",
|
|
||||||
name="Bremen",
|
|
||||||
parlament_name="Bremische Bürgerschaft",
|
|
||||||
wahlperiode=21,
|
|
||||||
wahlperiode_start="2023-07-05",
|
|
||||||
naechste_wahl="2027-05-09",
|
|
||||||
regierungsfraktionen=["SPD", "GRÜNE", "LINKE"],
|
|
||||||
landtagsfraktionen=["SPD", "CDU", "GRÜNE", "LINKE", "AfD", "BiW"],
|
|
||||||
doku_system="PARiS",
|
|
||||||
doku_base_url="https://paris.bremische-buergerschaft.de/starweb/paris",
|
|
||||||
drucksache_format="21/1234S",
|
|
||||||
dokukratie_scraper="hb",
|
|
||||||
aktiv=True,
|
|
||||||
anmerkung=(
|
|
||||||
"PARiS ist eine alte Java-Servlet-Variante von StarWeb. "
|
|
||||||
"Single-POST-Search gegen /starweb/paris/servlet.starweb mit "
|
|
||||||
"form-urlencoded Body, Hits in <tbody name='RecordRepeater'>. "
|
|
||||||
"Drucksachen tragen einen S/L-Suffix für Stadtbürgerschaft "
|
|
||||||
"vs. Landtag (z.B. 21/730S). Eigener PARiSHBAdapter (#21/#33). "
|
|
||||||
"AfD durch Listenstreichung 2023 nicht im Landtag, stattdessen "
|
|
||||||
"BiW. Wahltag 2027 noch nicht festgesetzt."
|
|
||||||
),
|
|
||||||
),
|
|
||||||
"HH": Bundesland(
|
|
||||||
code="HH",
|
|
||||||
name="Hamburg",
|
|
||||||
parlament_name="Hamburgische Bürgerschaft",
|
|
||||||
wahlperiode=23,
|
|
||||||
wahlperiode_start="2025-03-26",
|
|
||||||
naechste_wahl="2030-03-03",
|
|
||||||
regierungsfraktionen=["SPD", "GRÜNE"],
|
|
||||||
landtagsfraktionen=["SPD", "CDU", "GRÜNE", "LINKE", "AfD"],
|
|
||||||
doku_system="ParlDok",
|
|
||||||
doku_base_url="https://www.buergerschaft-hh.de/parldok",
|
|
||||||
drucksache_format="23/1234",
|
|
||||||
dokukratie_scraper="hh",
|
|
||||||
aktiv=True,
|
|
||||||
anmerkung=(
|
|
||||||
"Wahl am 02.03.2025; Senat Tschentscher III seit 07.05.2025 "
|
|
||||||
"vereidigt. ParlDok 8.3.1 (J3S GmbH) — kompatibel mit der MV-"
|
|
||||||
"Variante (8.3.5), gleiches /parldok/Fulltext/Search-Schema. "
|
|
||||||
"Aktiv via ParLDokAdapter-Registry-Eintrag in #28."
|
|
||||||
),
|
|
||||||
),
|
|
||||||
"HE": Bundesland(
|
|
||||||
code="HE",
|
|
||||||
name="Hessen",
|
|
||||||
parlament_name="Hessischer Landtag",
|
|
||||||
wahlperiode=21,
|
|
||||||
wahlperiode_start="2024-01-18",
|
|
||||||
naechste_wahl="2028-10-22",
|
|
||||||
regierungsfraktionen=["CDU", "SPD"],
|
|
||||||
landtagsfraktionen=["CDU", "AfD", "SPD", "GRÜNE", "FDP"],
|
|
||||||
doku_system="portala",
|
|
||||||
doku_base_url="https://starweb.hessen.de/portal",
|
|
||||||
drucksache_format="21/1234",
|
|
||||||
dokukratie_scraper="he",
|
|
||||||
aktiv=True,
|
|
||||||
anmerkung=(
|
|
||||||
"starweb.hessen.de läuft auf demselben portala/eUI-Backend "
|
|
||||||
"wie LSA/BE/BB/RP, aber mit HE-spezifischem Hit-Format: "
|
|
||||||
"Cards (efxRecordRepeater) mit Daten in HTML-Kommentar-"
|
|
||||||
"Perl-Dumps (WEV01-WEV12). PortalaAdapter mit eigenem "
|
|
||||||
"Parser-Modus _parse_hit_list_he_comment_dump (#24/#30). "
|
|
||||||
"Wahltermin 2028 ist Schätzung."
|
|
||||||
),
|
|
||||||
),
|
|
||||||
"MV": Bundesland(
|
|
||||||
code="MV",
|
|
||||||
name="Mecklenburg-Vorpommern",
|
|
||||||
parlament_name="Landtag Mecklenburg-Vorpommern",
|
|
||||||
wahlperiode=8,
|
|
||||||
wahlperiode_start="2021-10-26",
|
|
||||||
naechste_wahl="2026-09-20",
|
|
||||||
regierungsfraktionen=["SPD", "LINKE"],
|
|
||||||
landtagsfraktionen=["SPD", "AfD", "CDU", "LINKE", "GRÜNE", "FDP"],
|
|
||||||
doku_system="ParlDok",
|
|
||||||
doku_base_url="https://www.dokumentation.landtag-mv.de",
|
|
||||||
drucksache_format="8/1234",
|
|
||||||
dokukratie_scraper="mv",
|
|
||||||
aktiv=True,
|
|
||||||
anmerkung=(
|
|
||||||
"ParlDok 8.3.5 (J3S GmbH) — moderne SPA, JSON-API unter "
|
|
||||||
"/parldok/Fulltext/Search. ParLDokAdapter (eigene Implementierung, "
|
|
||||||
"nicht portala-kompatibel). Die in dokukratie/mv.yml beschriebene "
|
|
||||||
"Legacy-HTML-Form (parldok/formalkriterien) ist mit dem 8.x-Upgrade "
|
|
||||||
"deprecated. Suche filtert via facet_lp=10/id=8 server-seitig auf "
|
|
||||||
"WP8, type=Antrag wird client-seitig gefiltert. Wahlprogramme zur "
|
|
||||||
"LTW 26.09.2021 sind noch nicht indexiert (Folge-Issue) — Analyse "
|
|
||||||
"läuft daher mit Grundsatzprogramm-Zitaten als Fallback. Wahltag "
|
|
||||||
"offiziell auf 20.09.2026 festgelegt."
|
|
||||||
),
|
|
||||||
),
|
|
||||||
"NI": Bundesland(
|
|
||||||
code="NI",
|
|
||||||
name="Niedersachsen",
|
|
||||||
parlament_name="Niedersächsischer Landtag",
|
|
||||||
wahlperiode=19,
|
|
||||||
wahlperiode_start="2022-11-08",
|
|
||||||
naechste_wahl="2027-10-10",
|
|
||||||
regierungsfraktionen=["SPD", "GRÜNE"],
|
|
||||||
landtagsfraktionen=["SPD", "CDU", "GRÜNE", "AfD"],
|
|
||||||
doku_system="StarWeb",
|
|
||||||
doku_base_url="https://www.landtag-niedersachsen.de",
|
|
||||||
drucksache_format="19/12345",
|
|
||||||
dokukratie_scraper="ni",
|
|
||||||
anmerkung=(
|
|
||||||
"Wahltermin Herbst 2027 (zwischen 11.07. und 03.10.2027) noch nicht festgesetzt; "
|
|
||||||
"geschätzt. Olaf Lies (SPD) seit 20.05.2025 Ministerpräsident."
|
|
||||||
),
|
|
||||||
aktiv=True,
|
|
||||||
),
|
|
||||||
"NRW": Bundesland(
|
|
||||||
code="NRW",
|
|
||||||
name="Nordrhein-Westfalen",
|
|
||||||
parlament_name="Landtag Nordrhein-Westfalen",
|
|
||||||
wahlperiode=18,
|
|
||||||
wahlperiode_start="2022-06-01",
|
|
||||||
naechste_wahl="2027-05-15",
|
|
||||||
regierungsfraktionen=["CDU", "GRÜNE"],
|
|
||||||
landtagsfraktionen=["CDU", "SPD", "GRÜNE", "FDP", "AfD"],
|
|
||||||
doku_system="OPAL",
|
|
||||||
doku_base_url="https://opal.landtag.nrw.de",
|
|
||||||
drucksache_format="18/12345",
|
|
||||||
dokukratie_scraper="nw",
|
|
||||||
aktiv=True,
|
|
||||||
anmerkung=(
|
|
||||||
"OPAL in NRW ist eine eigene Implementierung, nicht identisch mit dem "
|
|
||||||
"StarWeb-basierten OPAL in RLP. Wahltermin 2027 ist Schätzung."
|
|
||||||
),
|
|
||||||
),
|
|
||||||
"RP": Bundesland(
|
|
||||||
code="RP",
|
|
||||||
name="Rheinland-Pfalz",
|
|
||||||
parlament_name="Landtag Rheinland-Pfalz",
|
|
||||||
wahlperiode=18,
|
|
||||||
wahlperiode_start="2021-05-18",
|
|
||||||
naechste_wahl="2031-03-22",
|
|
||||||
regierungsfraktionen=["SPD", "GRÜNE", "FDP"],
|
|
||||||
landtagsfraktionen=["SPD", "CDU", "AfD", "GRÜNE", "FREIE WÄHLER", "FDP"],
|
|
||||||
doku_system="portala",
|
|
||||||
doku_base_url="https://opal.rlp.de",
|
|
||||||
drucksache_format="18/12345",
|
|
||||||
dokukratie_scraper="rp",
|
|
||||||
aktiv=True,
|
|
||||||
anmerkung=(
|
|
||||||
"OPAL in RLP läuft tatsächlich auf dem portala/eUI-Backend "
|
|
||||||
"(NICHT StarWeb wie ursprünglich klassifiziert), erreichbar "
|
|
||||||
"unter /portal/browse.tt.json mit db_id=rlp.lissh. "
|
|
||||||
"Wiederverwendet PortalaAdapter aus #2/#3 (#30). NICHT "
|
|
||||||
"verwechseln mit dem NRW OPAL — anderer Markenname, "
|
|
||||||
"andere Engine. Wahl zum 19. Landtag fand am 22.03.2026 "
|
|
||||||
"statt; Koalitionsverhandlungen CDU+SPD laufen, Kabinett "
|
|
||||||
"Schweitzer I geschäftsführend. Nach Konstituierung müssen "
|
|
||||||
"WP und Wahltermin aktualisiert werden."
|
|
||||||
),
|
|
||||||
),
|
|
||||||
"SL": Bundesland(
|
|
||||||
code="SL",
|
|
||||||
name="Saarland",
|
|
||||||
parlament_name="Landtag des Saarlandes",
|
|
||||||
wahlperiode=17,
|
|
||||||
wahlperiode_start="2022-04-25",
|
|
||||||
naechste_wahl="2027-04-18",
|
|
||||||
regierungsfraktionen=["SPD"],
|
|
||||||
landtagsfraktionen=["SPD", "CDU", "AfD"],
|
|
||||||
doku_system="Eigensystem",
|
|
||||||
doku_base_url="https://www.landtag-saar.de",
|
|
||||||
drucksache_format="17/1234",
|
|
||||||
dokukratie_scraper="sl",
|
|
||||||
anmerkung=(
|
|
||||||
"Einzige SPD-Alleinregierung in Deutschland. AfD-Status im 17. LT vor "
|
|
||||||
"produktiver Nutzung verifizieren."
|
|
||||||
),
|
|
||||||
aktiv=True,
|
|
||||||
),
|
|
||||||
"SN": Bundesland(
|
|
||||||
code="SN",
|
|
||||||
name="Sachsen",
|
|
||||||
parlament_name="Sächsischer Landtag",
|
|
||||||
wahlperiode=8,
|
|
||||||
wahlperiode_start="2024-10-01",
|
|
||||||
naechste_wahl="2029-09-02",
|
|
||||||
regierungsfraktionen=["CDU", "SPD"],
|
|
||||||
landtagsfraktionen=["CDU", "AfD", "BSW", "SPD", "LINKE", "GRÜNE"],
|
|
||||||
doku_system="EDAS-XML-Export",
|
|
||||||
doku_base_url="https://edas.landtag.sachsen.de",
|
|
||||||
drucksache_format="8/1234",
|
|
||||||
dokukratie_scraper="sn",
|
|
||||||
aktiv=True,
|
|
||||||
anmerkung=(
|
|
||||||
"Minderheitsregierung CDU+SPD (Kabinett Kretschmer III seit "
|
|
||||||
"18.12.2024). EDAS ist ASP.NET-Webforms mit DevExpress-"
|
|
||||||
"Postbacks UND robots.txt: Disallow: / — direktes Scraping "
|
|
||||||
"blockiert. Stattdessen liest SNEdasXmlAdapter die wöchentlich "
|
|
||||||
"manuell aus der EDAS-Suchmaske exportierte XML-Datei aus "
|
|
||||||
"data/sn-edas-export.xml. PDF-URLs werden lazy beim "
|
|
||||||
"download_text() aus dem viewer_navigation.aspx-Frame "
|
|
||||||
"extrahiert (single GET, kein Postback). Schließt #26."
|
|
||||||
),
|
|
||||||
),
|
|
||||||
"LSA": Bundesland(
|
|
||||||
code="LSA",
|
|
||||||
name="Sachsen-Anhalt",
|
|
||||||
parlament_name="Landtag von Sachsen-Anhalt",
|
|
||||||
wahlperiode=8,
|
|
||||||
wahlperiode_start="2021-07-06",
|
|
||||||
naechste_wahl="2026-09-06",
|
|
||||||
regierungsfraktionen=["CDU", "SPD", "FDP"],
|
|
||||||
landtagsfraktionen=["CDU", "AfD", "LINKE", "SPD", "GRÜNE", "FDP"],
|
|
||||||
doku_system="PARDOK",
|
|
||||||
doku_base_url="https://padoka.landtag.sachsen-anhalt.de",
|
|
||||||
drucksache_format="8/1234",
|
|
||||||
dokukratie_scraper="st",
|
|
||||||
aktiv=True,
|
|
||||||
anmerkung=(
|
|
||||||
"ISO-Code wäre ST; LSA ist im politischen Sprachgebrauch dominant. "
|
|
||||||
"Sven Schulze (CDU) seit 28.01.2026 MP nach Rücktritt Haseloff. "
|
|
||||||
"PADOKA wurde von StarWeb auf das portala/eUI-Framework migriert "
|
|
||||||
"(gleiche Engine wie Berlin/PARDOK). dokukratie's st.yml ist veraltet. "
|
|
||||||
"Suche läuft via POST /portal/browse.tt.json + report.tt.html."
|
|
||||||
),
|
|
||||||
),
|
|
||||||
"SH": Bundesland(
|
|
||||||
code="SH",
|
|
||||||
name="Schleswig-Holstein",
|
|
||||||
parlament_name="Schleswig-Holsteinischer Landtag",
|
|
||||||
wahlperiode=20,
|
|
||||||
wahlperiode_start="2022-06-07",
|
|
||||||
naechste_wahl="2027-04-18",
|
|
||||||
regierungsfraktionen=["CDU", "GRÜNE"],
|
|
||||||
landtagsfraktionen=["CDU", "GRÜNE", "SPD", "FDP", "SSW"],
|
|
||||||
doku_system="StarWeb",
|
|
||||||
doku_base_url="http://lissh.lvn.parlanet.de",
|
|
||||||
drucksache_format="20/1234",
|
|
||||||
dokukratie_scraper="sh",
|
|
||||||
aktiv=True,
|
|
||||||
anmerkung=(
|
|
||||||
"SSW ist von der 5%-Hürde befreit. Doku-System ist die "
|
|
||||||
"alte Starfinder-CGI auf lissh.lvn.parlanet.de — URL-"
|
|
||||||
"basiert via "
|
|
||||||
"/cgi-bin/starfinder/0?path=lisshfl.txt&search=WP=20+AND+dtyp=antrag, "
|
|
||||||
"Latin-1-encoding. NICHT die moderne StarWeb-Servlet-"
|
|
||||||
"Variante (BB/HE/NI/RP/HB) — eigene Klasse "
|
|
||||||
"StarFinderCGIAdapter."
|
|
||||||
),
|
|
||||||
),
|
|
||||||
"TH": Bundesland(
|
|
||||||
code="TH",
|
|
||||||
name="Thüringen",
|
|
||||||
parlament_name="Thüringer Landtag",
|
|
||||||
wahlperiode=8,
|
|
||||||
wahlperiode_start="2024-10-01",
|
|
||||||
naechste_wahl="2029-09-01",
|
|
||||||
regierungsfraktionen=["CDU", "BSW", "SPD"],
|
|
||||||
landtagsfraktionen=["AfD", "CDU", "LINKE", "BSW", "SPD"],
|
|
||||||
doku_system="ParlDok",
|
|
||||||
doku_base_url="https://parldok.thueringer-landtag.de",
|
|
||||||
drucksache_format="8/1234",
|
|
||||||
dokukratie_scraper="th",
|
|
||||||
aktiv=True,
|
|
||||||
anmerkung=(
|
|
||||||
"Erste Brombeer-Koalition Deutschlands (CDU+BSW+SPD) als "
|
|
||||||
"Minderheitsregierung mit 44 von 88 Sitzen. Mario Voigt "
|
|
||||||
"(CDU) seit Dezember 2024 MP. ParlDok 8.3.5 (J3S GmbH) — "
|
|
||||||
"EXAKT dieselbe Version wie MV. ParLDokAdapter direkt "
|
|
||||||
"wiederverwendbar als Registry-Eintrag (#25). Achtung: "
|
|
||||||
"alter Hostname parldok.thueringen.de redirected per 303 "
|
|
||||||
"auf parldok.thueringer-landtag.de — neuer Hostname ist "
|
|
||||||
"der korrekte."
|
|
||||||
),
|
|
||||||
),
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def get(code: str) -> Optional[Bundesland]:
|
|
||||||
"""Bundesland-Konfig per Code abrufen, oder None."""
|
|
||||||
return BUNDESLAENDER.get(code)
|
|
||||||
|
|
||||||
|
|
||||||
def aktive_bundeslaender() -> list[Bundesland]:
|
|
||||||
"""Alle aktuell aktiven (im Analyzer unterstützten) Bundesländer."""
|
|
||||||
return [bl for bl in BUNDESLAENDER.values() if bl.aktiv]
|
|
||||||
|
|
||||||
|
|
||||||
def alle_bundeslaender() -> list[Bundesland]:
|
|
||||||
"""Alle 16 Bundesländer (aktive zuerst, dann alphabetisch nach Name)."""
|
|
||||||
aktiv = sorted([bl for bl in BUNDESLAENDER.values() if bl.aktiv], key=lambda b: b.name)
|
|
||||||
inaktiv = sorted([bl for bl in BUNDESLAENDER.values() if not bl.aktiv], key=lambda b: b.name)
|
|
||||||
return aktiv + inaktiv
|
|
||||||
@ -1,312 +0,0 @@
|
|||||||
"""Antrag-Clustering via Cosine-Similarity + Union-Find (#105).
|
|
||||||
|
|
||||||
Nutzt die v4-Embeddings aus assessments.summary_embedding (gefüllt durch #123)
|
|
||||||
und baut eine hierarchische Cluster-Struktur ohne externe Dependencies
|
|
||||||
(kein sklearn, kein numpy — für <500 Assessments ist pure Python ausreichend).
|
|
||||||
|
|
||||||
Algorithmus: Connected-Components via Union-Find über Kanten mit
|
|
||||||
Cosine-Similarity ≥ threshold. Level 0 = alle Anträge, Level 1 tighter Cluster.
|
|
||||||
Bei Clustern > 30 wird rekursiv mit höherem Threshold nachgeteilt.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import json
|
|
||||||
import logging
|
|
||||||
import math
|
|
||||||
from collections import Counter
|
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
import aiosqlite
|
|
||||||
|
|
||||||
from .config import settings
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
# Cosine-Similarity-Thresholds
|
|
||||||
# Empirisch kalibriert an der Prod-DB (57 Assessments, 2026-04-11):
|
|
||||||
# 0.50 → 6 sinnvolle Cluster + 26 singletons (bester Default)
|
|
||||||
# 0.55 → 5 tighter Cluster
|
|
||||||
# 0.60 → 4 kleine Cluster, zu streng (die meisten themenähnlichen
|
|
||||||
# Anträge fallen raus)
|
|
||||||
# 0.70+ → fast alle singletons
|
|
||||||
# v4-Embeddings auf deutschen Parlamentsanträgen clustern bei ~0.50.
|
|
||||||
DEFAULT_THRESHOLD = 0.55
|
|
||||||
SUBCLUSTER_THRESHOLD = 0.70
|
|
||||||
MAX_CLUSTER_SIZE = 30 # darüber: sub-clustern
|
|
||||||
|
|
||||||
|
|
||||||
# ─── Math-Helpers ───────────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
def _cosine(a: list[float], b: list[float]) -> float:
|
|
||||||
dot = sum(x * y for x, y in zip(a, b))
|
|
||||||
na = math.sqrt(sum(x * x for x in a))
|
|
||||||
nb = math.sqrt(sum(x * x for x in b))
|
|
||||||
if na == 0 or nb == 0:
|
|
||||||
return 0.0
|
|
||||||
return dot / (na * nb)
|
|
||||||
|
|
||||||
|
|
||||||
class UnionFind:
|
|
||||||
"""Klassisches Union-Find mit Path-Compression."""
|
|
||||||
|
|
||||||
def __init__(self, n: int):
|
|
||||||
self.parent = list(range(n))
|
|
||||||
self.rank = [0] * n
|
|
||||||
|
|
||||||
def find(self, x: int) -> int:
|
|
||||||
root = x
|
|
||||||
while self.parent[root] != root:
|
|
||||||
root = self.parent[root]
|
|
||||||
# Path-Compression
|
|
||||||
while self.parent[x] != root:
|
|
||||||
self.parent[x], x = root, self.parent[x]
|
|
||||||
return root
|
|
||||||
|
|
||||||
def union(self, a: int, b: int) -> None:
|
|
||||||
ra, rb = self.find(a), self.find(b)
|
|
||||||
if ra == rb:
|
|
||||||
return
|
|
||||||
if self.rank[ra] < self.rank[rb]:
|
|
||||||
self.parent[ra] = rb
|
|
||||||
elif self.rank[ra] > self.rank[rb]:
|
|
||||||
self.parent[rb] = ra
|
|
||||||
else:
|
|
||||||
self.parent[rb] = ra
|
|
||||||
self.rank[ra] += 1
|
|
||||||
|
|
||||||
|
|
||||||
# ─── DB-Lader ───────────────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
async def load_assessment_items(
|
|
||||||
bundesland: Optional[str] = None,
|
|
||||||
) -> list[dict]:
|
|
||||||
"""Lädt alle Assessments mit gefülltem summary_embedding."""
|
|
||||||
sql = """
|
|
||||||
SELECT drucksache, title, fraktionen, datum, link, bundesland,
|
|
||||||
gwoe_score, empfehlung, empfehlung_symbol, themen,
|
|
||||||
summary_embedding
|
|
||||||
FROM assessments
|
|
||||||
WHERE summary_embedding IS NOT NULL
|
|
||||||
"""
|
|
||||||
params: list = []
|
|
||||||
if bundesland:
|
|
||||||
sql += " AND bundesland = ?"
|
|
||||||
params.append(bundesland)
|
|
||||||
|
|
||||||
items = []
|
|
||||||
async with aiosqlite.connect(settings.db_path) as db:
|
|
||||||
db.row_factory = aiosqlite.Row
|
|
||||||
async with db.execute(sql, params) as cur:
|
|
||||||
async for row in cur:
|
|
||||||
try:
|
|
||||||
vec = json.loads(bytes(row["summary_embedding"]).decode())
|
|
||||||
except Exception:
|
|
||||||
logger.warning("bad embedding for %s", row["drucksache"])
|
|
||||||
continue
|
|
||||||
items.append({
|
|
||||||
"drucksache": row["drucksache"],
|
|
||||||
"title": row["title"],
|
|
||||||
"fraktionen": json.loads(row["fraktionen"] or "[]"),
|
|
||||||
"datum": row["datum"],
|
|
||||||
"link": row["link"],
|
|
||||||
"bundesland": row["bundesland"],
|
|
||||||
"gwoe_score": row["gwoe_score"],
|
|
||||||
"empfehlung": row["empfehlung"],
|
|
||||||
"empfehlung_symbol": row["empfehlung_symbol"],
|
|
||||||
"themen": json.loads(row["themen"] or "[]"),
|
|
||||||
"embedding": vec,
|
|
||||||
})
|
|
||||||
return items
|
|
||||||
|
|
||||||
|
|
||||||
# ─── Clustering ─────────────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
def _cluster_indices(items: list[dict], threshold: float) -> list[list[int]]:
|
|
||||||
"""Union-Find-Clustering: Knoten = Items, Kante = cosine ≥ threshold."""
|
|
||||||
n = len(items)
|
|
||||||
uf = UnionFind(n)
|
|
||||||
for i in range(n):
|
|
||||||
for j in range(i + 1, n):
|
|
||||||
if _cosine(items[i]["embedding"], items[j]["embedding"]) >= threshold:
|
|
||||||
uf.union(i, j)
|
|
||||||
|
|
||||||
groups: dict[int, list[int]] = {}
|
|
||||||
for i in range(n):
|
|
||||||
root = uf.find(i)
|
|
||||||
groups.setdefault(root, []).append(i)
|
|
||||||
# Sortiere Cluster absteigend nach Größe
|
|
||||||
return sorted(groups.values(), key=len, reverse=True)
|
|
||||||
|
|
||||||
|
|
||||||
def _dominant_fraktion(items: list[dict]) -> Optional[str]:
|
|
||||||
counts: Counter = Counter()
|
|
||||||
for item in items:
|
|
||||||
for f in item.get("fraktionen") or []:
|
|
||||||
counts[f] += 1
|
|
||||||
if not counts:
|
|
||||||
return None
|
|
||||||
return counts.most_common(1)[0][0]
|
|
||||||
|
|
||||||
|
|
||||||
def _cluster_label(items: list[dict]) -> str:
|
|
||||||
"""Generiert ein Cluster-Label aus den häufigsten Themen der Mitglieder.
|
|
||||||
|
|
||||||
Nimmt die Top-2-3 Themen die in der Mehrheit der Cluster-Mitglieder
|
|
||||||
vorkommen und kombiniert sie zu einem prägnanten Label.
|
|
||||||
Fallback: kürzester Titel.
|
|
||||||
"""
|
|
||||||
# Themen-Häufigkeit über alle Cluster-Mitglieder
|
|
||||||
themen_counts: Counter = Counter()
|
|
||||||
for item in items:
|
|
||||||
for thema in item.get("themen") or []:
|
|
||||||
themen_counts[thema] += 1
|
|
||||||
|
|
||||||
if themen_counts:
|
|
||||||
# Top-Themen die in ≥50% der Mitglieder vorkommen, max 3
|
|
||||||
threshold = max(1, len(items) // 2)
|
|
||||||
top = [t for t, c in themen_counts.most_common(5) if c >= threshold][:3]
|
|
||||||
if top:
|
|
||||||
return " · ".join(top)
|
|
||||||
|
|
||||||
# Fallback: kürzester Titel
|
|
||||||
titles = [i["title"] for i in items if i.get("title")]
|
|
||||||
if titles:
|
|
||||||
return min(titles, key=len)
|
|
||||||
return "Cluster"
|
|
||||||
|
|
||||||
|
|
||||||
def _cluster_summary(cluster_items: list[dict], include_edges: bool = False) -> dict:
|
|
||||||
"""Zusammenfassung eines Clusters für die API-Antwort."""
|
|
||||||
scores = [i["gwoe_score"] for i in cluster_items if i.get("gwoe_score") is not None]
|
|
||||||
avg_score = round(sum(scores) / len(scores), 1) if scores else None
|
|
||||||
out = {
|
|
||||||
"size": len(cluster_items),
|
|
||||||
"label": _cluster_label(cluster_items),
|
|
||||||
"dominant_fraktion": _dominant_fraktion(cluster_items),
|
|
||||||
"avg_gwoe_score": avg_score,
|
|
||||||
"drucksachen": [i["drucksache"] for i in cluster_items],
|
|
||||||
}
|
|
||||||
if include_edges:
|
|
||||||
# Detail-Items pro Mitglied (für Force-Graph-Rendering)
|
|
||||||
out["nodes"] = [
|
|
||||||
{
|
|
||||||
"drucksache": i["drucksache"],
|
|
||||||
"title": i["title"],
|
|
||||||
"bundesland": i["bundesland"],
|
|
||||||
"fraktionen": i["fraktionen"],
|
|
||||||
"gwoe_score": i["gwoe_score"],
|
|
||||||
"empfehlung": i["empfehlung"],
|
|
||||||
}
|
|
||||||
for i in cluster_items
|
|
||||||
]
|
|
||||||
# Pairwise Cosine-Similarity als Kanten
|
|
||||||
edges = []
|
|
||||||
for a in range(len(cluster_items)):
|
|
||||||
for b in range(a + 1, len(cluster_items)):
|
|
||||||
sim = _cosine(cluster_items[a]["embedding"], cluster_items[b]["embedding"])
|
|
||||||
edges.append({"a": a, "b": b, "sim": round(sim, 3)})
|
|
||||||
out["edges"] = edges
|
|
||||||
return out
|
|
||||||
|
|
||||||
|
|
||||||
async def build_hierarchy(
|
|
||||||
bundesland: Optional[str] = None,
|
|
||||||
threshold: float = DEFAULT_THRESHOLD,
|
|
||||||
subcluster_threshold: float = SUBCLUSTER_THRESHOLD,
|
|
||||||
max_cluster_size: int = MAX_CLUSTER_SIZE,
|
|
||||||
) -> dict:
|
|
||||||
"""Lädt Assessments, clustert sie hierarchisch und gibt eine serialisierbare
|
|
||||||
Struktur zurück:
|
|
||||||
|
|
||||||
{
|
|
||||||
"meta": {"total": N, "threshold": 0.70, ...},
|
|
||||||
"clusters": [
|
|
||||||
{"size": 12, "label": ..., "dominant_fraktion": ...,
|
|
||||||
"drucksachen": [...], "subclusters": [ ... ] | None},
|
|
||||||
...
|
|
||||||
],
|
|
||||||
"singletons": [drucksache, drucksache, ...]
|
|
||||||
}
|
|
||||||
|
|
||||||
Bei Clustern größer als max_cluster_size wird rekursiv mit
|
|
||||||
subcluster_threshold ein zweiter Durchgang gestartet.
|
|
||||||
"""
|
|
||||||
items = await load_assessment_items(bundesland=bundesland)
|
|
||||||
if not items:
|
|
||||||
return {
|
|
||||||
"meta": {"total": 0, "threshold": threshold, "bundesland": bundesland},
|
|
||||||
"clusters": [],
|
|
||||||
"singletons": [],
|
|
||||||
}
|
|
||||||
|
|
||||||
top_groups = _cluster_indices(items, threshold)
|
|
||||||
|
|
||||||
clusters_out: list[dict] = []
|
|
||||||
singletons_out: list[str] = []
|
|
||||||
|
|
||||||
for group in top_groups:
|
|
||||||
if len(group) == 1:
|
|
||||||
singletons_out.append(items[group[0]]["drucksache"])
|
|
||||||
continue
|
|
||||||
|
|
||||||
cluster_items = [items[i] for i in group]
|
|
||||||
entry = _cluster_summary(cluster_items, include_edges=True)
|
|
||||||
|
|
||||||
# Sub-Clustern falls zu groß
|
|
||||||
if len(cluster_items) > max_cluster_size:
|
|
||||||
sub_groups = _cluster_indices(cluster_items, subcluster_threshold)
|
|
||||||
subs = []
|
|
||||||
for sg in sub_groups:
|
|
||||||
if len(sg) == 1:
|
|
||||||
continue
|
|
||||||
subs.append(_cluster_summary([cluster_items[i] for i in sg]))
|
|
||||||
entry["subclusters"] = subs
|
|
||||||
else:
|
|
||||||
entry["subclusters"] = None
|
|
||||||
|
|
||||||
clusters_out.append(entry)
|
|
||||||
|
|
||||||
return {
|
|
||||||
"meta": {
|
|
||||||
"total": len(items),
|
|
||||||
"threshold": threshold,
|
|
||||||
"subcluster_threshold": subcluster_threshold,
|
|
||||||
"max_cluster_size": max_cluster_size,
|
|
||||||
"bundesland": bundesland,
|
|
||||||
"num_clusters": len(clusters_out),
|
|
||||||
"num_singletons": len(singletons_out),
|
|
||||||
},
|
|
||||||
"clusters": clusters_out,
|
|
||||||
"singletons": singletons_out,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
# ─── Ähnlichkeits-Suche für #108 Teil B ─────────────────────────────────────
|
|
||||||
|
|
||||||
async def find_similar_assessments(drucksache: str, top_k: int = 5) -> list[dict]:
|
|
||||||
"""Findet die top_k ähnlichsten Assessments zu einem gegebenen per
|
|
||||||
Cosine-Similarity über das Summary-Embedding."""
|
|
||||||
items = await load_assessment_items()
|
|
||||||
target = next((i for i in items if i["drucksache"] == drucksache), None)
|
|
||||||
if target is None:
|
|
||||||
return []
|
|
||||||
|
|
||||||
scored = []
|
|
||||||
for other in items:
|
|
||||||
if other["drucksache"] == drucksache:
|
|
||||||
continue
|
|
||||||
sim = _cosine(target["embedding"], other["embedding"])
|
|
||||||
scored.append((sim, other))
|
|
||||||
scored.sort(key=lambda t: t[0], reverse=True)
|
|
||||||
|
|
||||||
return [
|
|
||||||
{
|
|
||||||
"drucksache": other["drucksache"],
|
|
||||||
"title": other["title"],
|
|
||||||
"bundesland": other["bundesland"],
|
|
||||||
"fraktionen": other["fraktionen"],
|
|
||||||
"gwoe_score": other["gwoe_score"],
|
|
||||||
"empfehlung": other["empfehlung"],
|
|
||||||
"similarity": round(sim, 3),
|
|
||||||
}
|
|
||||||
for sim, other in scored[:top_k]
|
|
||||||
]
|
|
||||||
@ -1,71 +0,0 @@
|
|||||||
from pydantic_settings import BaseSettings
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
|
|
||||||
class Settings(BaseSettings):
|
|
||||||
app_name: str = "GWÖ-Antragsprüfer"
|
|
||||||
app_version: str = "1.0.2"
|
|
||||||
prompt_version: str = "v4.1"
|
|
||||||
|
|
||||||
# Paths
|
|
||||||
base_dir: Path = Path(__file__).resolve().parent.parent
|
|
||||||
data_dir: Path = base_dir / "data"
|
|
||||||
reports_dir: Path = base_dir / "reports"
|
|
||||||
kontext_dir: Path = Path(__file__).resolve().parent / "kontext"
|
|
||||||
db_path: Path = data_dir / "gwoe-antraege.db"
|
|
||||||
|
|
||||||
# LLM
|
|
||||||
dashscope_api_key: str = ""
|
|
||||||
dashscope_base_url: str = "https://dashscope-intl.aliyuncs.com/compatible-mode/v1"
|
|
||||||
llm_model_default: str = "qwen-plus-latest"
|
|
||||||
llm_model_premium: str = "qwen-max"
|
|
||||||
|
|
||||||
# Embedding-Modell: neue Rows werden immer mit embedding_model_write geschrieben,
|
|
||||||
# Lese-Queries filtern nach embedding_model_read. Zwei Settings erlauben einen
|
|
||||||
# Zero-Downtime-Switch von v3 auf v4 (siehe Issue #123):
|
|
||||||
# Phase 1: write=v4, read=v3 → Prod läuft weiter, Reindex füllt v4-Rows
|
|
||||||
# Phase 2: write=v4, read=v4 → Switch aktiv, alte v3-Rows können gelöscht werden
|
|
||||||
embedding_model_write: str = "text-embedding-v4"
|
|
||||||
embedding_model_read: str = "text-embedding-v3"
|
|
||||||
embedding_dimensions: int = 1024
|
|
||||||
|
|
||||||
# Keycloak
|
|
||||||
keycloak_url: str = ""
|
|
||||||
keycloak_realm: str = ""
|
|
||||||
keycloak_client_id: str = ""
|
|
||||||
keycloak_admin_user: str = ""
|
|
||||||
keycloak_admin_password: str = ""
|
|
||||||
|
|
||||||
# Server
|
|
||||||
host: str = "0.0.0.0"
|
|
||||||
port: int = 8000
|
|
||||||
|
|
||||||
# SMTP (Issue #124 E-Mail-Benachrichtigung)
|
|
||||||
# 1blu: smtp.1blu.de:465 SSL, username = Postfachname (NICHT E-Mail!),
|
|
||||||
# z.B. "q294440_0-gwoe-toppyr". Passwort via ENV SMTP_PASSWORD.
|
|
||||||
smtp_host: str = ""
|
|
||||||
smtp_port: int = 465
|
|
||||||
smtp_user: str = ""
|
|
||||||
smtp_password: str = ""
|
|
||||||
smtp_from_email: str = "noreply@toppyr.de"
|
|
||||||
smtp_from_name: str = "GWÖ-Antragsprüfer"
|
|
||||||
# URL-Basis für Links in Mails (Unsubscribe, Detail-Ansicht)
|
|
||||||
base_url: str = "https://gwoe.toppyr.de"
|
|
||||||
# Token für Unsubscribe-Links (HMAC-Secret)
|
|
||||||
unsubscribe_secret: str = "change-me-in-prod"
|
|
||||||
|
|
||||||
# Gitea-API-Token für Feedback-Issues (Issue #feedback-widget)
|
|
||||||
# Wert in .env: GITEA_TOKEN=<token>
|
|
||||||
# Token-Quelle: cat ~/.claude/.gitea-token
|
|
||||||
gitea_token: str = ""
|
|
||||||
gitea_api_url: str = "https://repo.toppyr.de/api/v1"
|
|
||||||
gitea_repo_owner: str = "tobias"
|
|
||||||
gitea_repo_name: str = "gwoe-antragspruefer"
|
|
||||||
# Komma-getrennte Liste zusätzlicher Labels, die Feedback-Issues bekommen.
|
|
||||||
# Auf Dev: "feedback,dev" — damit Issues aus gwoe-dev.toppyr.de unterscheidbar sind.
|
|
||||||
gitea_feedback_labels: str = "feedback"
|
|
||||||
|
|
||||||
model_config = {"env_file": ".env", "env_file_encoding": "utf-8"}
|
|
||||||
|
|
||||||
|
|
||||||
settings = Settings()
|
|
||||||
1294
app/database.py
1294
app/database.py
File diff suppressed because it is too large
Load Diff
@ -1,130 +0,0 @@
|
|||||||
"""Drucksache-Typ-Normalisierung (#127).
|
|
||||||
|
|
||||||
Jeder Landtag hat eigene Bezeichnungen für Dokumenttypen. Dieses Modul
|
|
||||||
normalisiert sie auf einheitliche Kategorien und bestimmt ob eine
|
|
||||||
Drucksache abstimmbar ist (= GWÖ-Bewertung sinnvoll).
|
|
||||||
"""
|
|
||||||
|
|
||||||
# Normierte Kategorien
|
|
||||||
ANTRAG = "antrag"
|
|
||||||
GESETZENTWURF = "gesetzentwurf"
|
|
||||||
AENDERUNGSANTRAG = "aenderungsantrag"
|
|
||||||
DRINGLICHKEITSANTRAG = "dringlichkeitsantrag"
|
|
||||||
ENTSCHLIESSUNGSANTRAG = "entschliessungsantrag"
|
|
||||||
BESCHLUSSEMPFEHLUNG = "beschlussempfehlung"
|
|
||||||
KLEINE_ANFRAGE = "kleine_anfrage"
|
|
||||||
GROSSE_ANFRAGE = "grosse_anfrage"
|
|
||||||
UNTERRICHTUNG = "unterrichtung"
|
|
||||||
PETITION = "petition"
|
|
||||||
WAHLVORSCHLAG = "wahlvorschlag"
|
|
||||||
BERICHT = "bericht"
|
|
||||||
SONSTIGE = "sonstige"
|
|
||||||
|
|
||||||
ABSTIMMBARE_TYPEN = {
|
|
||||||
ANTRAG,
|
|
||||||
GESETZENTWURF,
|
|
||||||
AENDERUNGSANTRAG,
|
|
||||||
DRINGLICHKEITSANTRAG,
|
|
||||||
ENTSCHLIESSUNGSANTRAG,
|
|
||||||
}
|
|
||||||
|
|
||||||
# Übersetzungstabelle: Original-Typ (lowercase) → normierter Typ.
|
|
||||||
# Keys werden case-insensitive + substring-matched.
|
|
||||||
# Reihenfolge: spezifischere zuerst (z.B. "kleine anfrage" vor "anfrage").
|
|
||||||
_TYP_MAP = [
|
|
||||||
# Abstimmbar
|
|
||||||
("gesetzentwurf", GESETZENTWURF),
|
|
||||||
("änderungsantrag", AENDERUNGSANTRAG),
|
|
||||||
("aenderungsantrag", AENDERUNGSANTRAG),
|
|
||||||
("dringlichkeitsantrag", DRINGLICHKEITSANTRAG),
|
|
||||||
("entschließungsantrag", ENTSCHLIESSUNGSANTRAG),
|
|
||||||
("entschliessungsantrag", ENTSCHLIESSUNGSANTRAG),
|
|
||||||
("antrag gemäß", ANTRAG),
|
|
||||||
("antrag", ANTRAG),
|
|
||||||
# Nicht abstimmbar
|
|
||||||
("kleine anfrage", KLEINE_ANFRAGE),
|
|
||||||
("große anfrage", GROSSE_ANFRAGE),
|
|
||||||
("grosse anfrage", GROSSE_ANFRAGE),
|
|
||||||
("anfrage", KLEINE_ANFRAGE),
|
|
||||||
("beschlussempfehlung", BESCHLUSSEMPFEHLUNG),
|
|
||||||
("unterrichtung", UNTERRICHTUNG),
|
|
||||||
("bericht", BERICHT),
|
|
||||||
("mitteilung", UNTERRICHTUNG),
|
|
||||||
("vorlage", UNTERRICHTUNG),
|
|
||||||
("petition", PETITION),
|
|
||||||
("wahlvorschlag", WAHLVORSCHLAG),
|
|
||||||
("stellungnahme", SONSTIGE),
|
|
||||||
("drucksache", SONSTIGE),
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
def normalize_typ(original: str) -> str:
|
|
||||||
"""Normalisiert einen BL-spezifischen Typ-String auf eine Kategorie.
|
|
||||||
|
|
||||||
Case-insensitiv, Substring-Match, spezifischere Patterns zuerst.
|
|
||||||
"""
|
|
||||||
if not original:
|
|
||||||
return SONSTIGE
|
|
||||||
low = original.lower().strip()
|
|
||||||
for pattern, norm in _TYP_MAP:
|
|
||||||
if pattern in low:
|
|
||||||
return norm
|
|
||||||
return SONSTIGE
|
|
||||||
|
|
||||||
|
|
||||||
def ist_abstimmbar(typ_normiert: str) -> bool:
|
|
||||||
"""Prüft ob ein normierter Typ zur Abstimmung steht.
|
|
||||||
|
|
||||||
``sonstige`` wird durchgelassen (benefit of the doubt) — wenn der
|
|
||||||
Adapter den Typ nicht bestimmen kann (z.B. NRW liefert nur
|
|
||||||
"Drucksache"), wird der echte Check erst beim Analysieren gemacht
|
|
||||||
(aus dem Dokument-Text).
|
|
||||||
"""
|
|
||||||
return typ_normiert in ABSTIMMBARE_TYPEN or typ_normiert == SONSTIGE
|
|
||||||
|
|
||||||
|
|
||||||
def ist_abstimmbar_original(original: str) -> bool:
|
|
||||||
"""Convenience: prüft direkt am Original-Typ-String."""
|
|
||||||
return ist_abstimmbar(normalize_typ(original))
|
|
||||||
|
|
||||||
|
|
||||||
# Frage-Präfixe die typisch für Kleine Anfragen sind. Wird genutzt wenn der
|
|
||||||
# Adapter (z.B. NRW) den Typ nur als "Drucksache" liefert — wir versuchen
|
|
||||||
# anhand des Titels eine bessere Klassifikation, damit Search-Ergebnisse
|
|
||||||
# nicht voll mit nicht-abstimmbaren Anfragen sind.
|
|
||||||
_FRAGE_PRAEFIXE = (
|
|
||||||
"welche ", "wie viele ", "wieviel", "wie viel ", "wie hoch ", "wie ",
|
|
||||||
"wann ", "warum ", "weshalb ", "wo ", "wer ", "wie steht ", "wie weit ",
|
|
||||||
"ist es ", "ist der ", "ist die ", "ist das ", "sind ",
|
|
||||||
"trifft es ", "kann ", "wird ", "wieso ", "was ",
|
|
||||||
"hat ", "hat der ", "hat die ", "hat das ",
|
|
||||||
"haben ", "war ", "waren ",
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def likely_kleine_anfrage_titel(title: str) -> bool:
|
|
||||||
"""Heuristik: erkennt Kleine Anfragen am Titel-Format.
|
|
||||||
|
|
||||||
Wenn der Titel mit einem typischen Frage-Präfix beginnt oder mit "?" endet,
|
|
||||||
behandeln wir die Drucksache als Kleine Anfrage. NRW-OPAL klassifiziert
|
|
||||||
alle Drucksachen als "Drucksache" → ohne diese Heuristik landen Anfragen
|
|
||||||
in den Search-Ergebnissen, was den User verwirrt (#149 Folge).
|
|
||||||
|
|
||||||
Args:
|
|
||||||
title: Drucksachen-Titel inkl. evtl. Nummer-Präfix wie "1Welche...".
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
True wenn der Titel wie eine Kleine Anfrage aussieht.
|
|
||||||
"""
|
|
||||||
if not title:
|
|
||||||
return False
|
|
||||||
t = title.strip()
|
|
||||||
# Manche Adapter prefixen mit Nummerierung wie "1Welche..." — strippen
|
|
||||||
while t and (t[0].isdigit() or t[0] in " .-"):
|
|
||||||
t = t[1:]
|
|
||||||
t_low = t.lower()
|
|
||||||
if t_low.startswith(_FRAGE_PRAEFIXE):
|
|
||||||
return True
|
|
||||||
if t.rstrip().endswith("?"):
|
|
||||||
return True
|
|
||||||
return False
|
|
||||||
1082
app/embeddings.py
1082
app/embeddings.py
File diff suppressed because it is too large
Load Diff
@ -1,175 +0,0 @@
|
|||||||
"""BL-uebergreifende Ingest-CLI fuer Plenarprotokolle (#106 / #126).
|
|
||||||
|
|
||||||
Pipeline:
|
|
||||||
1. PDF laden (Pfad oder URL)
|
|
||||||
2. ``protokoll_parsers.parse_protocol(bundesland, pdf_path)`` waehlt den
|
|
||||||
BL-spezifischen Parser aus der Registry
|
|
||||||
3. ``upsert_plenum_vote()`` schreibt jede Abstimmung in die DB
|
|
||||||
|
|
||||||
CLI:
|
|
||||||
python -m app.ingest_votes --pdf MMP18-119.pdf
|
|
||||||
python -m app.ingest_votes --url https://landtag.nrw.de/.../MMP18-119.pdf
|
|
||||||
python -m app.ingest_votes --pdf x.pdf --bundesland NRW --protokoll-id MMP18-119
|
|
||||||
python -m app.ingest_votes --supported # Liste der BL mit Parser
|
|
||||||
|
|
||||||
Aktuell registriert: NRW. Folge-BL via app/protokoll_parsers/<bl>.py + Eintrag
|
|
||||||
in PROTOKOLL_PARSERS — siehe ADR 0009.
|
|
||||||
"""
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import argparse
|
|
||||||
import asyncio
|
|
||||||
import logging
|
|
||||||
import sys
|
|
||||||
import tempfile
|
|
||||||
import urllib.request
|
|
||||||
from pathlib import Path
|
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
from .protokoll_parsers import parse_protocol, supported_bundeslaender
|
|
||||||
from .database import upsert_plenum_vote
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
def _derive_protokoll_id(pdf_path: Path) -> str:
|
|
||||||
"""Ermittle Protokoll-ID aus dem Datei-Stem (z.B. 'MMP18-119.pdf' → 'MMP18-119')."""
|
|
||||||
return pdf_path.stem
|
|
||||||
|
|
||||||
|
|
||||||
def _download_pdf(url: str, dest: Path) -> Path:
|
|
||||||
"""Lade ein PDF von einer URL in einen Pfad. Wirft bei HTTP-Fehlern."""
|
|
||||||
req = urllib.request.Request(
|
|
||||||
url,
|
|
||||||
headers={"User-Agent": "GWOeAntragspruefer/1.0 (+https://gwoe.toppyr.de)"},
|
|
||||||
)
|
|
||||||
with urllib.request.urlopen(req, timeout=60) as resp:
|
|
||||||
dest.write_bytes(resp.read())
|
|
||||||
return dest
|
|
||||||
|
|
||||||
|
|
||||||
async def ingest_pdf(
|
|
||||||
pdf_path: Path,
|
|
||||||
*,
|
|
||||||
bundesland: str = "NRW",
|
|
||||||
protokoll_id: Optional[str] = None,
|
|
||||||
quelle_url: Optional[str] = None,
|
|
||||||
) -> dict:
|
|
||||||
"""Parse das PDF mit dem BL-Parser und schreibe alle Abstimmungen in die DB.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Statistik-Dict ``{parsed, written, skipped_no_drucksache, errors,
|
|
||||||
protokoll_id, bundesland}``.
|
|
||||||
|
|
||||||
Raises:
|
|
||||||
NotImplementedError: wenn fuer ``bundesland`` kein Parser registriert ist.
|
|
||||||
"""
|
|
||||||
pid = protokoll_id or _derive_protokoll_id(pdf_path)
|
|
||||||
parsed = parse_protocol(bundesland, str(pdf_path))
|
|
||||||
|
|
||||||
written = 0
|
|
||||||
skipped_no_ds = 0
|
|
||||||
errors: list[str] = []
|
|
||||||
|
|
||||||
for entry in parsed:
|
|
||||||
ds = entry.get("drucksache")
|
|
||||||
if not ds:
|
|
||||||
skipped_no_ds += 1
|
|
||||||
continue
|
|
||||||
try:
|
|
||||||
await upsert_plenum_vote(
|
|
||||||
bundesland=bundesland,
|
|
||||||
drucksache=ds,
|
|
||||||
ergebnis=entry["ergebnis"],
|
|
||||||
einstimmig=bool(entry.get("einstimmig", False)),
|
|
||||||
fraktionen_ja=entry.get("votes", {}).get("ja", []),
|
|
||||||
fraktionen_nein=entry.get("votes", {}).get("nein", []),
|
|
||||||
fraktionen_enthaltung=entry.get("votes", {}).get("enthaltung", []),
|
|
||||||
quelle_protokoll=pid,
|
|
||||||
quelle_url=quelle_url,
|
|
||||||
)
|
|
||||||
written += 1
|
|
||||||
except Exception as exc:
|
|
||||||
logger.exception("Upsert fehlgeschlagen fuer %s", ds)
|
|
||||||
errors.append(f"{ds}: {exc}")
|
|
||||||
|
|
||||||
return {
|
|
||||||
"parsed": len(parsed),
|
|
||||||
"written": written,
|
|
||||||
"skipped_no_drucksache": skipped_no_ds,
|
|
||||||
"errors": errors,
|
|
||||||
"protokoll_id": pid,
|
|
||||||
"bundesland": bundesland,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def _cli() -> None:
|
|
||||||
logging.basicConfig(level=logging.INFO, format="%(levelname)s %(message)s")
|
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(
|
|
||||||
description="Plenarprotokoll → plenum_vote_results (#106 / #126)",
|
|
||||||
)
|
|
||||||
src = parser.add_mutually_exclusive_group(required=False)
|
|
||||||
src.add_argument("--pdf", help="Pfad zu lokalem PDF")
|
|
||||||
src.add_argument("--url", help="HTTP(S)-URL zum PDF")
|
|
||||||
parser.add_argument("--bundesland", default="NRW",
|
|
||||||
help="Bundesland-Code (default: NRW)")
|
|
||||||
parser.add_argument("--protokoll-id",
|
|
||||||
help="Protokoll-ID (default: aus Datei-Stem)")
|
|
||||||
parser.add_argument("--supported", action="store_true",
|
|
||||||
help="Liste alle BL-Codes mit registriertem Parser")
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
if args.supported:
|
|
||||||
for bl in supported_bundeslaender():
|
|
||||||
print(bl)
|
|
||||||
sys.exit(0)
|
|
||||||
|
|
||||||
if not args.pdf and not args.url:
|
|
||||||
parser.error("--pdf oder --url ist erforderlich")
|
|
||||||
|
|
||||||
if args.url:
|
|
||||||
# Tmpfile-Suffix aus der URL ableiten (PDF, XML, ...) — der BUND-Parser
|
|
||||||
# nutzt XML, der NRW-Parser nutzt PDF. Suffix beeinflusst nur den
|
|
||||||
# Dateinamen; Parser lesen Inhalt nach Format.
|
|
||||||
url_suffix = "." + args.url.rsplit(".", 1)[-1].split("?")[0]
|
|
||||||
if url_suffix not in (".pdf", ".xml", ".html"):
|
|
||||||
url_suffix = ".pdf"
|
|
||||||
with tempfile.NamedTemporaryFile(suffix=url_suffix, delete=False) as tmp:
|
|
||||||
tmp_path = Path(tmp.name)
|
|
||||||
try:
|
|
||||||
print(f"Lade {args.url} → {tmp_path} …")
|
|
||||||
_download_pdf(args.url, tmp_path)
|
|
||||||
pid = args.protokoll_id or args.url.rsplit("/", 1)[-1].rsplit(".", 1)[0]
|
|
||||||
stats = asyncio.run(ingest_pdf(
|
|
||||||
tmp_path, bundesland=args.bundesland,
|
|
||||||
protokoll_id=pid, quelle_url=args.url,
|
|
||||||
))
|
|
||||||
finally:
|
|
||||||
tmp_path.unlink(missing_ok=True)
|
|
||||||
else:
|
|
||||||
pdf_path = Path(args.pdf)
|
|
||||||
if not pdf_path.exists():
|
|
||||||
print(f"FEHLER: PDF nicht gefunden: {pdf_path}", file=sys.stderr)
|
|
||||||
sys.exit(1)
|
|
||||||
stats = asyncio.run(ingest_pdf(
|
|
||||||
pdf_path, bundesland=args.bundesland,
|
|
||||||
protokoll_id=args.protokoll_id,
|
|
||||||
))
|
|
||||||
|
|
||||||
print()
|
|
||||||
print(f"Protokoll {stats['protokoll_id']} ({stats['bundesland']})")
|
|
||||||
print(f" parsed: {stats['parsed']}")
|
|
||||||
print(f" written: {stats['written']}")
|
|
||||||
if stats["skipped_no_drucksache"]:
|
|
||||||
print(f" ohne DS: {stats['skipped_no_drucksache']}")
|
|
||||||
if stats["errors"]:
|
|
||||||
print(f" errors: {len(stats['errors'])}")
|
|
||||||
for e in stats["errors"][:5]:
|
|
||||||
print(f" {e}")
|
|
||||||
if stats["written"] == 0 and not stats["errors"]:
|
|
||||||
sys.exit(2)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
_cli()
|
|
||||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,139 +0,0 @@
|
|||||||
# GWÖ-Matrix V2.0 — Gemeinden
|
|
||||||
|
|
||||||
*Quelle: [Matrix-Gemeinwohl-Bilanzierung-Gemeinden-V2.0.pdf](https://germany.econgood.org/wp-content/uploads/sites/8/2024/04/Matrix-Gemeinwohl-Bilanzierung-Gemeinden-V2.0.pdf)*
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Überblick
|
|
||||||
|
|
||||||
Die Matrix 2.0 für Gemeinden bietet einen etwas weiteren Blick als die später erschienene Matrix 2.1.A für die öffentliche Hand. Sie wurde speziell für kommunale Gebietskörperschaften entwickelt und eignet sich gut für die Bewertung parlamentarischer Anträge auf Landes- und Kommunalebene.
|
|
||||||
|
|
||||||
**Struktur:** 5 Berührungsgruppen × 5 Werte = 25 Themenfelder
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Die 5 Werte (Spalten)
|
|
||||||
|
|
||||||
| Nr | Wert | Staatsprinzip |
|
|
||||||
|----|------|---------------|
|
|
||||||
| 1 | **Menschenwürde** | Rechtsstaatsprinzip |
|
|
||||||
| 2 | **Solidarität** | Gemeinnutz |
|
|
||||||
| 3 | **Ökologische Nachhaltigkeit** | Umwelt-Verantwortung |
|
|
||||||
| 4 | **Soziale Gerechtigkeit** | Sozialstaatsprinzip |
|
|
||||||
| 5 | **Transparenz & Demokratische Mitbestimmung** | Demokratie |
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Die 5 Berührungsgruppen (Zeilen)
|
|
||||||
|
|
||||||
| Code | Gruppe | Beschreibung |
|
|
||||||
|------|--------|--------------|
|
|
||||||
| **A** | Ausgelagerte/selbständige Betriebe, Lieferant:innen, Dienstleister:innen | Externe Beschaffung, Lieferketten, ausgelagerte Aufgaben |
|
|
||||||
| **B** | Finanzpartner:innen, Geldgeber:innen, Steuerzahler:innen | Umgang mit öffentlichen Mitteln, Haushalt, Finanzpolitik |
|
|
||||||
| **C** | Politische Führung, Verwaltung und koordinierte Ehrenamtliche | Mandatsträger:innen, Mitarbeitende, ehrenamtlich Engagierte |
|
|
||||||
| **D** | Bürger:innen und Wirtschaft | Wirkung auf Bevölkerung und lokale Wirtschaft, Daseinsvorsorge |
|
|
||||||
| **E** | Staat, Gesellschaft und Natur | Überregionale/langfristige Wirkung, zukünftige Generationen |
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Die 25 Themenfelder
|
|
||||||
|
|
||||||
### Zeile A: Lieferant:innen & Dienstleister:innen
|
|
||||||
|
|
||||||
| Feld | Titel | Kernfragen |
|
|
||||||
|------|-------|------------|
|
|
||||||
| **A1** | Grundrechtsschutz und Menschenwürde in der Lieferkette | Werden bei Beschaffung Menschenrechte beachtet? Sorgfaltspflichten? |
|
|
||||||
| **A2** | Nutzen für die Gemeinde | Bringt die Beschaffung Mehrwert für die Gemeinde? Regionale Wertschöpfung? |
|
|
||||||
| **A3** | Ökologische Verantwortung für die Lieferkette | Umweltkriterien bei Vergabe? Nachhaltige Lieferketten? |
|
|
||||||
| **A4** | Soziale Verantwortung für die Lieferkette | Faire Arbeitsbedingungen bei Lieferanten? Tariftreue? |
|
|
||||||
| **A5** | Öffentliche Rechenschaft und Mitsprache | Transparenz bei Auftragsvergabe? Einbindung von Stakeholdern? |
|
|
||||||
|
|
||||||
### Zeile B: Finanzpartner:innen & Steuerzahler:innen
|
|
||||||
|
|
||||||
| Feld | Titel | Kernfragen |
|
|
||||||
|------|-------|------------|
|
|
||||||
| **B1** | Ethisches Finanzgebaren / Geld und Mensch | Ethische Geldanlage? Keine spekulativen Investments? |
|
|
||||||
| **B2** | Gemeinnutz im Finanzgebaren | Dienen Finanzen dem Gemeinwohl? Gerechte Mittelverteilung? |
|
|
||||||
| **B3** | Ökologische Verantwortung der Finanzpolitik | Klimaschutz im Haushalt? Grüne Investitionen? |
|
|
||||||
| **B4** | Soziale Verantwortung der Finanzpolitik | Sozial gerechte Haushaltsplanung? Unterstützung Bedürftiger? |
|
|
||||||
| **B5** | Rechenschaft und Partizipation in der Finanzpolitik | Transparenter Haushalt? Bürgerbeteiligung bei Finanzen? |
|
|
||||||
|
|
||||||
### Zeile C: Politische Führung & Verwaltung
|
|
||||||
|
|
||||||
| Feld | Titel | Kernfragen |
|
|
||||||
|------|-------|------------|
|
|
||||||
| **C1** | Individuelle Rechts- und Gleichstellung | Gleichstellung der Mitarbeitenden? Anti-Diskriminierung? |
|
|
||||||
| **C2** | Gemeinsame Zielvereinbarung für das Gemeinwohl | Gemeinsame Vision? Gemeinwohlorientierte Verwaltungskultur? |
|
|
||||||
| **C3** | Förderung ökologischen Verhaltens | Umweltbildung? Nachhaltige Verwaltung? |
|
|
||||||
| **C4** | Gerechte Verteilung von Arbeit | Work-Life-Balance? Faire Arbeitsbedingungen? |
|
|
||||||
| **C5** | Transparente Kommunikation und demokratische Prozesse | Offene Verwaltung? Beteiligung der Mitarbeitenden? |
|
|
||||||
|
|
||||||
### Zeile D: Bürger:innen und Wirtschaft
|
|
||||||
|
|
||||||
| Feld | Titel | Kernfragen |
|
|
||||||
|------|-------|------------|
|
|
||||||
| **D1** | Schutz des Individuums, Rechtsgleichheit | Bürgerrechte geschützt? Gleicher Zugang zu Leistungen? |
|
|
||||||
| **D2** | Gesamtwohl in der Gemeinde | Fördert die Maßnahme das lokale Gemeinwohl? |
|
|
||||||
| **D3** | Ökologische Gestaltung der öffentlichen Leistung | Umweltfreundliche öffentliche Dienste? Klimaschutz? |
|
|
||||||
| **D4** | Soziale Gestaltung der öffentlichen Leistung | Sozial gerechte Daseinsvorsorge? Inklusion? |
|
|
||||||
| **D5** | Transparente Kommunikation und demokratische Einbindung | Bürgerbeteiligung? Transparenz der Entscheidungen? |
|
|
||||||
|
|
||||||
### Zeile E: Staat, Gesellschaft und Natur
|
|
||||||
|
|
||||||
| Feld | Titel | Kernfragen |
|
|
||||||
|------|-------|------------|
|
|
||||||
| **E1** | Gestaltung der Bedingungen für ein menschenwürdiges Leben – zukünftige Generationen | Generationengerechtigkeit? Langfristige Lebensqualität? |
|
|
||||||
| **E2** | Beitrag zum Gesamtwohl | Überregionaler Nutzen? Solidarität mit anderen? |
|
|
||||||
| **E3** | Verantwortung für ökologische Auswirkungen | Klimawirkung über die Region hinaus? Biodiversität? |
|
|
||||||
| **E4** | Beitrag zum sozialen Ausgleich | Strukturpolitik? Ausgleich zwischen Regionen? |
|
|
||||||
| **E5** | Transparente und demokratische Mitbestimmung | Partizipation auf höherer Ebene? Demokratieförderung? |
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Relevanz für Landesanträge
|
|
||||||
|
|
||||||
Die meisten parlamentarischen Anträge betreffen:
|
|
||||||
|
|
||||||
- **D-Zeile (primär):** Wirkung auf Bürger:innen und Wirtschaft im Land
|
|
||||||
- **E-Zeile (sekundär):** Überregionale oder langfristige Auswirkungen
|
|
||||||
- **C-Zeile:** Wenn es um Verwaltungsreformen geht
|
|
||||||
- **B-Zeile:** Bei Haushalts- und Finanzthemen
|
|
||||||
|
|
||||||
**Prinzip:** D (intern/lokal) hat Vorrang vor E (extern/überregional). Themen mit hauptsächlich interner Wirkung gehören zu D.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Bewertungsskala
|
|
||||||
|
|
||||||
| Punkte | Stufe | Beschreibung |
|
|
||||||
|--------|-------|--------------|
|
|
||||||
| 7-10 | **Vorbildlich** | Innovative Maßnahmen, weitreichende Verbesserungen |
|
|
||||||
| 4-6 | **Erfahren** | Erkennbare Verbesserungen, gute Ergebnisse |
|
|
||||||
| 2-3 | **Fortgeschritten** | Erste Maßnahmen, erste Erfolge |
|
|
||||||
| 1 | **Erste Schritte** | Erstes Engagement |
|
|
||||||
| 0 | **Basislinie** | Nur gesetzliche Anforderungen |
|
|
||||||
| negativ | **Widerspruch** | Aktiver Widerspruch zu GWÖ-Werten |
|
|
||||||
|
|
||||||
### Feldwertung
|
|
||||||
|
|
||||||
- `++` (+2/+3): Stark fördernd
|
|
||||||
- `+` (+1): Fördernd
|
|
||||||
- `○` (0): Neutral
|
|
||||||
- `−` (-1): Widersprechend
|
|
||||||
- `−−` (-2/-3): Stark widersprechend
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Unterschied zu Matrix 2.1.A
|
|
||||||
|
|
||||||
| Aspekt | Matrix 2.0 (Gemeinden) | Matrix 2.1.A (Öffentliche Hand) |
|
|
||||||
|--------|------------------------|--------------------------------|
|
|
||||||
| **Fokus** | Kommunale Ebene | Alle öffentlichen Gebietskörperschaften |
|
|
||||||
| **Zeile A** | "Ausgelagerte Betriebe" | "Lieferant:innen" |
|
|
||||||
| **Zeile D** | "Bürger:innen und Wirtschaft" | "Bevölkerung und Wirtschaft" |
|
|
||||||
| **Detailgrad** | Kompakter | Ausführlicher |
|
|
||||||
| **Ideal für** | Kommunalpolitik, konkrete Projekte | Landespolitik, übergeordnete Themen |
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
*Stand: März 2026*
|
|
||||||
File diff suppressed because it is too large
Load Diff
@ -1,148 +0,0 @@
|
|||||||
# Parteiprogramme — Kurzreferenz
|
|
||||||
|
|
||||||
*Für die Bewertung von Wahlprogrammtreue UND Grundsatzprogrammtreue*
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Übersicht der Programme
|
|
||||||
|
|
||||||
| Partei | Wahlprogramm NRW 2022 | Grundsatzprogramm |
|
|
||||||
|--------|----------------------|-------------------|
|
|
||||||
| **CDU** | Landtagswahl 2022 | "In Freiheit leben" (2024) |
|
|
||||||
| **SPD** | Landtagswahl 2022 | Hamburger Programm (2007) |
|
|
||||||
| **GRÜNE** | Landtagswahl 2022 | "...zu achten und zu schützen..." (2020) |
|
|
||||||
| **FDP** | Landtagswahl 2022 | "Verantwortung für die Freiheit" (2012) |
|
|
||||||
| **AfD** | Landtagswahl 2022 | "Programm für Deutschland" (2016) |
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## CDU
|
|
||||||
|
|
||||||
### Wahlprogramm NRW 2022 — Kernpositionen
|
|
||||||
|
|
||||||
- **Sicherheit:** Mehr Polizei, härtere Strafen, Null-Toleranz
|
|
||||||
- **Bildung:** 10.000 neue Lehrkräfte, Digitalisierung, Talentschulen
|
|
||||||
- **Klimaschutz:** Klimaneutralität 2045, Technologieoffenheit
|
|
||||||
- **Wirtschaft:** Bürokratieabbau, Mittelstandsförderung
|
|
||||||
- **Infrastruktur:** Straßenbau UND ÖPNV-Ausbau
|
|
||||||
|
|
||||||
### Grundsatzprogramm 2024 — Leitideen
|
|
||||||
|
|
||||||
- **Menschenbild:** Christlich-demokratisches Menschenbild, Würde, Freiheit, Verantwortung
|
|
||||||
- **Staat:** Subsidiäre Ordnung, Föderalismus, starker aber begrenzter Staat
|
|
||||||
- **Wirtschaft:** Soziale Marktwirtschaft, Eigentum, Leistungsprinzip
|
|
||||||
- **Umwelt:** Schöpfungsverantwortung, Technologieoffenheit, Marktwirtschaftlicher Umweltschutz
|
|
||||||
- **Europa:** Europäische Einigung, transatlantische Partnerschaft
|
|
||||||
- **Familie:** Ehe und Familie als Fundament, Wahlfreiheit
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## SPD
|
|
||||||
|
|
||||||
### Wahlprogramm NRW 2022 — Kernpositionen
|
|
||||||
|
|
||||||
- **Bildung:** Gebührenfreie Kitas, Ganztagsschule, Abschaffung Schulform-Segregation
|
|
||||||
- **Wohnen:** 100.000 neue Wohnungen, Mietendeckel-Prüfung
|
|
||||||
- **Arbeit:** Tariftreue bei Vergaben, 13€ Landesmindestlohn
|
|
||||||
- **Klimaschutz:** Klimaneutralität 2040, Kohleausstieg beschleunigen
|
|
||||||
- **Soziales:** Soziale Gerechtigkeit, Chancengleichheit
|
|
||||||
|
|
||||||
### Grundsatzprogramm (Hamburger Programm) 2007 — Leitideen
|
|
||||||
|
|
||||||
- **Grundwerte:** Freiheit, Gerechtigkeit, Solidarität
|
|
||||||
- **Demokratischer Sozialismus:** Nicht Endzustand, sondern andauernde Aufgabe
|
|
||||||
- **Arbeit:** Recht auf Arbeit, gerechte Verteilung, starke Gewerkschaften
|
|
||||||
- **Sozialstaat:** Vorsorgender Sozialstaat, Bildung als Schlüssel
|
|
||||||
- **Nachhaltigkeit:** Ökologische Verantwortung als Teil der Grundwerte
|
|
||||||
- **Globalisierung:** Internationale Solidarität, gerechte Weltwirtschaft
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## BÜNDNIS 90/DIE GRÜNEN
|
|
||||||
|
|
||||||
### Wahlprogramm NRW 2022 — Kernpositionen
|
|
||||||
|
|
||||||
- **Klimaschutz:** Klimaneutralität deutlich vor 2040, Kohleausstieg 2030
|
|
||||||
- **Energie:** 100% Erneuerbare, Solarpflicht, Windkraftausbau
|
|
||||||
- **Mobilität:** Verkehrswende, 365€-Ticket, Fahrradland NRW
|
|
||||||
- **Demokratie:** Bürger:innenräte, Absenkung Wahlalter
|
|
||||||
- **Wirtschaft:** Gemeinwohlorientierung, regionale Wertschöpfung
|
|
||||||
- **Naturschutz:** 30% Naturschutzfläche
|
|
||||||
|
|
||||||
### Grundsatzprogramm 2020 — Leitideen
|
|
||||||
|
|
||||||
- **Ökologie:** Klimaschutz als Menschheitsaufgabe, planetare Grenzen
|
|
||||||
- **Demokratie:** Lebendige Demokratie, Partizipation, Bürger:innenbeteiligung
|
|
||||||
- **Gerechtigkeit:** Sozial-ökologische Transformation, Teilhabe für alle
|
|
||||||
- **Selbstbestimmung:** Individuelle Freiheit, Vielfalt, Emanzipation
|
|
||||||
- **Frieden:** Gewaltfreiheit, internationale Verantwortung
|
|
||||||
- **Europäische Einigung:** Föderales Europa
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## FDP
|
|
||||||
|
|
||||||
### Wahlprogramm NRW 2022 — Kernpositionen
|
|
||||||
|
|
||||||
- **Digitalisierung:** Digitales Musterland, E-Government
|
|
||||||
- **Bildung:** Weltbeste Bildung, MINT-Förderung, digitale Schulen
|
|
||||||
- **Wirtschaft:** Bürokratieabbau, Startup-Förderung, Entlastung
|
|
||||||
- **Klimaschutz:** Technologieoffenheit, Emissionshandel, kein Verbote
|
|
||||||
- **Mobilität:** Technologieoffenheit, Infrastrukturausbau
|
|
||||||
|
|
||||||
### Grundsatzprogramm 2012 — Leitideen
|
|
||||||
|
|
||||||
- **Freiheit:** Individuelle Freiheit als höchster Wert
|
|
||||||
- **Verantwortung:** Eigenverantwortung vor Staatsverantwortung
|
|
||||||
- **Chancen:** Chancengerechtigkeit, Aufstieg durch Leistung
|
|
||||||
- **Marktwirtschaft:** Freie Marktwirtschaft, Wettbewerb, Eigentum
|
|
||||||
- **Rechtsstaat:** Bürgerrechte, Datenschutz, schlanker Staat
|
|
||||||
- **Bildung:** Bildung als Bürgerrecht, Vielfalt der Bildungswege
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## AfD
|
|
||||||
|
|
||||||
### Wahlprogramm NRW 2022 — Kernpositionen
|
|
||||||
|
|
||||||
- **Migration:** Strikte Begrenzung, Abschiebungen, "Remigration"
|
|
||||||
- **Energie:** Kernkraft, Kohle behalten, gegen Windkraft
|
|
||||||
- **Sicherheit:** Mehr Polizei, härtere Strafen
|
|
||||||
- **Corona:** Gegen Maßnahmen, keine Impfpflicht
|
|
||||||
- **Bildung:** Leistungsprinzip, gegen "Gendersprache"
|
|
||||||
|
|
||||||
### Grundsatzprogramm 2016 — Leitideen
|
|
||||||
|
|
||||||
- **Demokratie:** Direkte Demokratie, Volksabstimmungen
|
|
||||||
- **Nation:** Nationale Souveränität, EU-Kritik, Euro-Ausstieg
|
|
||||||
- **Familie:** Traditionelles Familienbild, gegen "Gender-Ideologie"
|
|
||||||
- **Einwanderung:** Strikte Kontrolle, kulturelle Integration
|
|
||||||
- **Wirtschaft:** Soziale Marktwirtschaft, gegen Subventionen
|
|
||||||
- **Energie:** Gegen Energiewende, für Kernkraft und Kohle
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Bewertungsskala für Programmtreue
|
|
||||||
|
|
||||||
| Score | Bedeutung |
|
|
||||||
|-------|-----------|
|
|
||||||
| **9-10** | Vollständige Übereinstimmung, könnte aus dem Programm stammen |
|
|
||||||
| **7-8** | Hohe Übereinstimmung, unterstützt Kernziele |
|
|
||||||
| **5-6** | Partielle Übereinstimmung, keine Widersprüche |
|
|
||||||
| **3-4** | Geringe Übereinstimmung, marginaler Bezug |
|
|
||||||
| **1-2** | Widerspricht Teilaspekten des Programms |
|
|
||||||
| **0** | Vollständiger Widerspruch zu Kernpositionen |
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Dateien im Kontext-Ordner
|
|
||||||
|
|
||||||
- `cdu-grundsatzprogramm-2024.pdf` — CDU "In Freiheit leben"
|
|
||||||
- `spd-hamburger-programm-2007.pdf` — SPD Hamburger Programm
|
|
||||||
- `gruene-grundsatzprogramm-2020.pdf` — Grüne "...zu achten und zu schützen..."
|
|
||||||
- `fdp-grundsatzprogramm-2012.pdf` — FDP "Verantwortung für die Freiheit"
|
|
||||||
- `afd-grundsatzprogramm-2016.pdf` — AfD "Programm für Deutschland"
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
*Stand: März 2026*
|
|
||||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,266 +0,0 @@
|
|||||||
# Wahlprogramme NRW 2022 — Detailreferenz
|
|
||||||
|
|
||||||
*Für präzise Bewertung der Wahlprogrammtreue bei NRW-Landtagsanträgen*
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## CDU NRW 2022: "NRW. Gemeinsam. Gestalten."
|
|
||||||
|
|
||||||
### Wirtschaft & Arbeit
|
|
||||||
- **Bürokratieabbau:** "Bürokratiebremse" – für jede neue Regelung eine streichen
|
|
||||||
- **Mittelstand:** Förderprogramme, Fachkräftesicherung, Ausbildungsoffensive
|
|
||||||
- **Digitalisierung:** Gigabit-Ausbau, 5G flächendeckend, digitale Verwaltung
|
|
||||||
- **Gründer:** Startup-Land NRW, Risikokapital, Gründerstipendien
|
|
||||||
- **Tariftreue:** Keine gesetzliche Tariftreuepflicht (Vertragsfreiheit)
|
|
||||||
|
|
||||||
### Bildung & Wissenschaft
|
|
||||||
- **Lehrkräfte:** 10.000 neue Stellen, Quereinsteiger erleichtern
|
|
||||||
- **Digitale Schule:** Tablets für alle, IT-Ausstattung, Informatik Pflichtfach
|
|
||||||
- **Talentschulen:** Mehr Förderung in Brennpunktvierteln
|
|
||||||
- **Kitas:** Qualitätsoffensive, flexible Betreuungszeiten
|
|
||||||
- **Hochschulen:** Exzellenzstrategie, Forschungsförderung
|
|
||||||
|
|
||||||
### Innere Sicherheit
|
|
||||||
- **Polizei:** 3.000 neue Stellen, bessere Ausstattung, Bodycams
|
|
||||||
- **Null-Toleranz:** Konsequente Strafverfolgung, schnelle Verfahren
|
|
||||||
- **Clankriminalität:** Spezielle Ermittlungsgruppen, Vermögensabschöpfung
|
|
||||||
- **Justiz:** Mehr Richter, digitale Gerichte
|
|
||||||
|
|
||||||
### Umwelt & Klima
|
|
||||||
- **Klimaneutralität:** 2045 (nicht früher), Technologieoffenheit
|
|
||||||
- **Wasserstoff:** NRW als Wasserstoffland, Infrastrukturausbau
|
|
||||||
- **Wald:** 10.000 ha Aufforstung, klimaresistenter Wald
|
|
||||||
- **Energieeffizienz:** Gebäudesanierung fördern, nicht erzwingen
|
|
||||||
- **Windkraft:** Ja, aber mit Abstandsregelungen (1000m)
|
|
||||||
|
|
||||||
### Mobilität & Infrastruktur
|
|
||||||
- **Straßen:** Sanierung Landesstraßen, Engpassbeseitigung Autobahnen
|
|
||||||
- **ÖPNV:** Ausbau RRX, mehr Taktung, barrierefreie Bahnhöfe
|
|
||||||
- **Radwege:** Radschnellwege fördern, aber kein Vorrang vor Straße
|
|
||||||
- **E-Mobilität:** Ladeinfrastruktur, Förderung E-Autos
|
|
||||||
|
|
||||||
### Soziales & Gesundheit
|
|
||||||
- **Pflege:** Landespflegekammer, Fachkräfteoffensive
|
|
||||||
- **Krankenhäuser:** Standortsicherung, Investitionsprogramm
|
|
||||||
- **Familie:** Familienzentren, Betreuungsgeld
|
|
||||||
- **Ehrenamt:** Ehrenamtskarte, Bürokratieabbau für Vereine
|
|
||||||
|
|
||||||
### Besondere Positionen
|
|
||||||
- **Eigentumsschutz:** Gegen Enteignungen, für Investitionsanreize
|
|
||||||
- **Leistungsprinzip:** Bildungsaufstieg durch Leistung, gegen Gleichmacherei
|
|
||||||
- **Sicherheit vor Freiheitseinschränkung:** Balance, aber Sicherheit Priorität
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## SPD NRW 2022: "NRW. Für euch. Für Dich."
|
|
||||||
|
|
||||||
### Wirtschaft & Arbeit
|
|
||||||
- **Tariftreue:** Gesetzliche Tariftreuepflicht bei öffentlicher Vergabe
|
|
||||||
- **Landesmindestlohn:** 13€ für Landesbeschäftigte und Auftragnehmer
|
|
||||||
- **Transformation:** Aktive Industriepolitik, Kohleausstieg sozial gestalten
|
|
||||||
- **Gute Arbeit:** Befristungen eindämmen, Werkverträge regulieren
|
|
||||||
- **Mitbestimmung:** Stärkung Betriebsräte, Tarifbindung erhöhen
|
|
||||||
|
|
||||||
### Bildung & Wissenschaft
|
|
||||||
- **Gebührenfrei:** Komplette Bildungskette gebührenfrei (Kita bis Master)
|
|
||||||
- **Ganztagsschule:** Rechtsanspruch, rhythmisierter Ganztag
|
|
||||||
- **Chancengleichheit:** Längeres gemeinsames Lernen, keine Schulformempfehlung
|
|
||||||
- **Schulsozialarbeit:** Deutlicher Ausbau, feste Stellen
|
|
||||||
- **Inklusion:** Konsequent umsetzen, Ressourcen bereitstellen
|
|
||||||
|
|
||||||
### Innere Sicherheit
|
|
||||||
- **Polizei:** Mehr Stellen, bessere Ausbildung, interkulturelle Kompetenz
|
|
||||||
- **Prävention:** Mehr Jugendarbeit, Ausstiegsprogramme
|
|
||||||
- **Rechtsextremismus:** Schwerpunkt Bekämpfung, Verfassungsschutz reformieren
|
|
||||||
- **Kennzeichnungspflicht:** Individuelle Kennung für Polizeibeamte
|
|
||||||
|
|
||||||
### Umwelt & Klima
|
|
||||||
- **Klimaneutralität:** 2040 (schneller als CDU)
|
|
||||||
- **Kohleausstieg:** Beschleunigen, aber sozial absichern
|
|
||||||
- **Erneuerbare:** Massive Beschleunigung Windkraft, Photovoltaikpflicht
|
|
||||||
- **Naturschutz:** Mehr Schutzgebiete, Biotopvernetzung
|
|
||||||
- **ÖPNV:** 365€-Ticket, kostenloser ÖPNV für Schüler/Azubis
|
|
||||||
|
|
||||||
### Wohnen
|
|
||||||
- **Neubau:** 100.000 neue Wohnungen, öffentlicher Wohnungsbau
|
|
||||||
- **Mieten:** Mietpreisbremse verschärfen, Mietendeckel prüfen
|
|
||||||
- **Sozialwohnungen:** Bindungsfristen verlängern, mehr Förderung
|
|
||||||
- **Bodenrecht:** Kommunales Vorkaufsrecht stärken
|
|
||||||
|
|
||||||
### Soziales & Gesundheit
|
|
||||||
- **Pflege:** Flächentarifvertrag, mehr Personal, bessere Bezahlung
|
|
||||||
- **Krankenhäuser:** Keine Privatisierung, Daseinsvorsorge
|
|
||||||
- **Gesundheitszentren:** Medizinische Versorgungszentren im ländlichen Raum
|
|
||||||
- **Kinderarmut:** Bekämpfung als Schwerpunkt, Kindergrundsicherung
|
|
||||||
|
|
||||||
### Besondere Positionen
|
|
||||||
- **Vermögenssteuer:** Auf Bundesebene einsetzen
|
|
||||||
- **Umverteilung:** Reichere stärker belasten, Entlastung für Normalverdiener
|
|
||||||
- **Öffentlicher Dienst:** Stärken, nicht auslagern
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## BÜNDNIS 90/DIE GRÜNEN NRW 2022: "Wirtschaft und Klima zusammen denken"
|
|
||||||
|
|
||||||
### Wirtschaft & Arbeit
|
|
||||||
- **Transformation:** Grüne Industriepolitik, klimaneutrale Wirtschaft
|
|
||||||
- **Gemeinwohl:** Gemeinwohlökonomie fördern, alternative Wirtschaftsmodelle
|
|
||||||
- **Gute Arbeit:** Tariftreue, faire Löhne, Arbeitszeitverkürzung ermöglichen
|
|
||||||
- **Startups:** Grüne Gründungen, Social Entrepreneurship
|
|
||||||
- **Regionale Wirtschaft:** Lokale Wertschöpfungsketten stärken
|
|
||||||
|
|
||||||
### Bildung & Wissenschaft
|
|
||||||
- **Gebührenfrei:** Bildung darf nichts kosten
|
|
||||||
- **Inklusion:** Inklusive Schulen, multiprofessionelle Teams
|
|
||||||
- **Digitalisierung:** Datenschutzkonform, medienkompetent
|
|
||||||
- **Lehrkräfte:** Bessere Arbeitsbedingungen, kleinere Klassen
|
|
||||||
- **Demokratiebildung:** Schule als demokratischer Lernort
|
|
||||||
|
|
||||||
### Umwelt & Klima (KERNTHEMA)
|
|
||||||
- **Klimaneutralität:** Deutlich vor 2040, Sektorziele
|
|
||||||
- **Kohleausstieg:** 2030, nicht später
|
|
||||||
- **Erneuerbare:** 100%, Solarpflicht auf Dächern, 2% Landesfläche Wind
|
|
||||||
- **Naturschutz:** 30% Landesfläche unter Schutz
|
|
||||||
- **Biodiversität:** Artenvielfalt sichern, Pestizidreduktion
|
|
||||||
- **Kreislaufwirtschaft:** Ressourcenschonung, Mehrweg, Reparatur
|
|
||||||
|
|
||||||
### Mobilität (KERNTHEMA)
|
|
||||||
- **Verkehrswende:** Vorrang für Fuß, Rad, ÖPNV
|
|
||||||
- **365€-Ticket:** Bezahlbarer ÖPNV für alle
|
|
||||||
- **Fahrradland NRW:** Radschnellwege, sichere Radinfrastruktur
|
|
||||||
- **Autoverkehr:** Reduzieren, Tempo 30 innerorts als Regel
|
|
||||||
- **E-Mobilität:** Fördern, aber Fokus auf Verkehrsvermeidung
|
|
||||||
|
|
||||||
### Demokratie & Partizipation
|
|
||||||
- **Bürger:innenräte:** Institutionalisieren, losbasiert
|
|
||||||
- **Wahlalter:** Absenken auf 16 (Landtag) und 14 (Kommune)
|
|
||||||
- **Transparenz:** Open Government, Lobbyregister
|
|
||||||
- **Vielfalt:** Antidiskriminierung, Diversity in Verwaltung
|
|
||||||
|
|
||||||
### Soziales
|
|
||||||
- **Kinderrechte:** In Landesverfassung verankern
|
|
||||||
- **Pflege:** Gute Arbeitsbedingungen, Fachkräfteoffensive
|
|
||||||
- **Inklusion:** Barrierefreiheit konsequent umsetzen
|
|
||||||
- **Geschlechtergerechtigkeit:** Parität, Equal Pay
|
|
||||||
|
|
||||||
### Besondere Positionen
|
|
||||||
- **Postwachstum:** Wirtschaftswachstum nicht als Selbstzweck
|
|
||||||
- **Suffizi:** Weniger Verbrauch, bewusster Konsum
|
|
||||||
- **Bürger:innenenergie:** Dezentrale, demokratische Energieversorgung
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## FDP NRW 2022: "Nie gab es mehr zu tun"
|
|
||||||
|
|
||||||
### Wirtschaft & Arbeit
|
|
||||||
- **Entlastung:** Steuern senken, Abgaben reduzieren
|
|
||||||
- **Bürokratieabbau:** Radikal entrümpeln, Genehmigungen beschleunigen
|
|
||||||
- **Digitalisierung:** Digitales Musterland, volldigitale Verwaltung
|
|
||||||
- **Gründer:** Beste Bedingungen für Startups, Risikokapital
|
|
||||||
- **Technologieoffenheit:** Keine Verbote, Markt entscheiden lassen
|
|
||||||
|
|
||||||
### Bildung (KERNTHEMA)
|
|
||||||
- **Weltbeste Bildung:** Anspruch auf internationale Spitze
|
|
||||||
- **MINT:** Massive Förderung, Informatik als Pflichtfach
|
|
||||||
- **Individuelle Förderung:** Begabtenförderung, kein Einheitsbrei
|
|
||||||
- **Schulvielfalt:** Differenziertes Schulsystem erhalten
|
|
||||||
- **Digitale Bildung:** Tablets, Cloud, modernste Ausstattung
|
|
||||||
|
|
||||||
### Umwelt & Klima
|
|
||||||
- **Emissionshandel:** Marktbasierter Klimaschutz, CO2-Preis
|
|
||||||
- **Technologieoffenheit:** Alle Technologien, auch Kernkraft prüfen
|
|
||||||
- **Innovation:** Klimaschutz durch Fortschritt, nicht Verzicht
|
|
||||||
- **Gegen Verbote:** Keine Fahrverbote, keine Heizungsvorschriften
|
|
||||||
- **Wasserstoff:** Schlüsseltechnologie, Infrastruktur aufbauen
|
|
||||||
|
|
||||||
### Mobilität
|
|
||||||
- **Technologieoffenheit:** E-Fuels, Wasserstoff, keine Verbrenner-Verbote
|
|
||||||
- **Infrastruktur:** Straßen und Schiene ausbauen
|
|
||||||
- **ÖPNV:** Attraktiver machen, aber nicht zu Lasten Individualverkehr
|
|
||||||
- **Flugtaxis:** Urban Air Mobility fördern
|
|
||||||
|
|
||||||
### Demokratie & Bürgerrechte
|
|
||||||
- **Bürgerrechte:** Datenschutz, Privatsphäre, gegen Überwachung
|
|
||||||
- **Eigenverantwortung:** Weniger Staat, mehr individuelle Freiheit
|
|
||||||
- **Gegen Bevormundung:** Keine Gendersprache-Vorschriften, keine Verbote
|
|
||||||
|
|
||||||
### Besondere Positionen
|
|
||||||
- **Eigentumsschutz:** Gegen Enteignungen, für Investitionsanreize
|
|
||||||
- **Leistungsprinzip:** Aufstieg durch Leistung, gegen Gleichmacherei
|
|
||||||
- **Privat vor Staat:** Privatwirtschaft effizienter als öffentliche Hand
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## AfD NRW 2022
|
|
||||||
|
|
||||||
### Wirtschaft
|
|
||||||
- **Subventionen:** Gegen staatliche Eingriffe in Wirtschaft
|
|
||||||
- **Mittelstand:** Fördern durch Entlastung, nicht Subventionen
|
|
||||||
- **Arbeitsmarkt:** Regulierung reduzieren
|
|
||||||
- **Globalisierung:** Kritisch, nationale Wirtschaft stärken
|
|
||||||
|
|
||||||
### Bildung
|
|
||||||
- **Leistungsprinzip:** Differenziertes Schulsystem, keine Einheitsschule
|
|
||||||
- **Gegen "Ideologie":** Keine "Gender-Sprache", keine "Klimaideologie"
|
|
||||||
- **Disziplin:** Ordnung in Schulen, Respekt vor Lehrern
|
|
||||||
- **Deutsche Kultur:** Kulturelle Bildung, deutsche Geschichte
|
|
||||||
|
|
||||||
### Energie (KERNTHEMA)
|
|
||||||
- **Kernkraft:** Laufzeitverlängerung, neue Kraftwerke
|
|
||||||
- **Kohle:** Braunkohle erhalten, Versorgungssicherheit
|
|
||||||
- **Gegen Energiewende:** "Planwirtschaft", unwirtschaftlich
|
|
||||||
- **Gegen Windkraft:** Landschaftszerstörung, Wertverlust
|
|
||||||
|
|
||||||
### Migration (KERNTHEMA)
|
|
||||||
- **Begrenzung:** Massive Reduzierung Zuwanderung
|
|
||||||
- **Abschiebungen:** Konsequent durchsetzen
|
|
||||||
- **Integration:** Assimilation fordern, deutsche Leitkultur
|
|
||||||
- **"Remigration":** Rückführung abgelehnter Asylbewerber
|
|
||||||
|
|
||||||
### Sicherheit
|
|
||||||
- **Mehr Polizei:** Deutlich aufstocken
|
|
||||||
- **Härtere Strafen:** Strafverschärfungen, schnelle Verfahren
|
|
||||||
- **Grenzkontrollen:** An allen Grenzen
|
|
||||||
|
|
||||||
### Besondere Positionen
|
|
||||||
- **EU-Kritik:** "Brüsseler Bürokratie", nationale Souveränität
|
|
||||||
- **Direkte Demokratie:** Volksabstimmungen auf Landesebene
|
|
||||||
- **Gegen Corona-Maßnahmen:** Keine Impfpflicht, Maßnahmen beenden
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Schnell-Referenz: Kernkonflikte
|
|
||||||
|
|
||||||
| Thema | CDU/FDP | SPD/GRÜNE | AfD |
|
|
||||||
|-------|---------|-----------|-----|
|
|
||||||
| **Klima-Tempo** | 2045, technologieoffen | vor 2040, Erneuerbare | gegen Energiewende |
|
|
||||||
| **Kohleausstieg** | schrittweise | 2030 | gar nicht |
|
|
||||||
| **Tariftreue** | freiwillig | gesetzlich | - |
|
|
||||||
| **Vermögensteuer** | nein | ja | nein |
|
|
||||||
| **ÖPNV** | Ausbau | Vorrang vor Auto | kein Vorrang |
|
|
||||||
| **Migration** | gesteuert | humanitär | stark begrenzt |
|
|
||||||
| **Bildung** | differenziert | inklusiv | "leistungsorientiert" |
|
|
||||||
| **Windkraft** | mit Abstand | Beschleunigung | dagegen |
|
|
||||||
| **Bürgerräte** | skeptisch | dafür | Volksabstimmungen |
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Regierungskoalition 2022-2027: CDU + GRÜNE
|
|
||||||
|
|
||||||
**Koalitionsvertrag "Zukunftsvertrag"** — wichtige Kompromisse:
|
|
||||||
|
|
||||||
- Klimaneutralität: 2045 (CDU) mit ambitioniertem Pfad (Grüne)
|
|
||||||
- Kohle: Kein festes Datum, aber "so früh wie möglich"
|
|
||||||
- Windkraft: Ausbau beschleunigen, aber Akzeptanz sichern
|
|
||||||
- Mobilität: ÖPNV stärken UND Straßen erhalten
|
|
||||||
- Bildung: Qualitätsoffensive, keine Strukturreform
|
|
||||||
- Sicherheit: Mehr Polizei, aber auch Prävention
|
|
||||||
|
|
||||||
**Bei Oppositionsanträgen prüfen:**
|
|
||||||
- Würde CDU zustimmen? (Wirtschaft, Sicherheit, Pragmatismus)
|
|
||||||
- Würden Grüne zustimmen? (Klima, Soziales, Demokratie)
|
|
||||||
- Koalitions-Kompromisslinie beachten
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
*Stand: März 2026*
|
|
||||||
220
app/mail.py
220
app/mail.py
@ -1,220 +0,0 @@
|
|||||||
"""Mail-Sending + Daily-Digest für E-Mail-Benachrichtigungen (#124).
|
|
||||||
|
|
||||||
Nutzt die Standard-Library `smtplib` (blockierend) in einem Thread-Executor,
|
|
||||||
damit kein zusätzlicher Dependency-Eintrag nötig ist. 1blu SMTP:
|
|
||||||
smtp.1blu.de:465 SSL, username = Postfachname (NICHT E-Mail!)
|
|
||||||
Credentials kommen aus settings.smtp_user / smtp_password via ENV.
|
|
||||||
|
|
||||||
Unsubscribe-Token: HMAC-SHA256 von sub_id + secret, URL-sicher base64-encoded.
|
|
||||||
"""
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import asyncio
|
|
||||||
import base64
|
|
||||||
import hashlib
|
|
||||||
import hmac
|
|
||||||
import html
|
|
||||||
import logging
|
|
||||||
import smtplib
|
|
||||||
import ssl
|
|
||||||
from datetime import datetime
|
|
||||||
from email.message import EmailMessage
|
|
||||||
|
|
||||||
from .config import settings
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
# ─── Unsubscribe-Token ──────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
def _unsubscribe_token(sub_id: int) -> str:
|
|
||||||
"""Erzeugt HMAC-Token für Unsubscribe-Link."""
|
|
||||||
msg = str(sub_id).encode()
|
|
||||||
sig = hmac.new(settings.unsubscribe_secret.encode(), msg, hashlib.sha256).digest()
|
|
||||||
return base64.urlsafe_b64encode(sig).decode().rstrip("=")[:22]
|
|
||||||
|
|
||||||
|
|
||||||
def verify_unsubscribe_token(sub_id: int, token: str) -> bool:
|
|
||||||
"""Verifiziert, dass der Token zur sub_id passt. Konstante Zeit."""
|
|
||||||
expected = _unsubscribe_token(sub_id)
|
|
||||||
return hmac.compare_digest(expected, token)
|
|
||||||
|
|
||||||
|
|
||||||
def unsubscribe_url(sub_id: int) -> str:
|
|
||||||
token = _unsubscribe_token(sub_id)
|
|
||||||
return f"{settings.base_url}/unsubscribe/{sub_id}/{token}"
|
|
||||||
|
|
||||||
|
|
||||||
# ─── SMTP-Send ──────────────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
def _send_sync(to_email: str, subject: str, text_body: str, html_body: str) -> None:
|
|
||||||
"""Blockierender Send via smtplib."""
|
|
||||||
if not settings.smtp_host or not settings.smtp_user:
|
|
||||||
raise RuntimeError("SMTP nicht konfiguriert (settings.smtp_host/user leer)")
|
|
||||||
|
|
||||||
msg = EmailMessage()
|
|
||||||
msg["From"] = f"{settings.smtp_from_name} <{settings.smtp_from_email}>"
|
|
||||||
msg["To"] = to_email
|
|
||||||
msg["Subject"] = subject
|
|
||||||
msg.set_content(text_body)
|
|
||||||
msg.add_alternative(html_body, subtype="html")
|
|
||||||
|
|
||||||
ctx = ssl.create_default_context()
|
|
||||||
with smtplib.SMTP_SSL(settings.smtp_host, settings.smtp_port, context=ctx) as server:
|
|
||||||
server.login(settings.smtp_user, settings.smtp_password)
|
|
||||||
server.send_message(msg)
|
|
||||||
|
|
||||||
|
|
||||||
async def send_mail(to_email: str, subject: str, text_body: str, html_body: str) -> None:
|
|
||||||
"""Async-Wrapper — SMTP-Call läuft im Thread-Executor."""
|
|
||||||
loop = asyncio.get_running_loop()
|
|
||||||
await loop.run_in_executor(None, _send_sync, to_email, subject, text_body, html_body)
|
|
||||||
|
|
||||||
|
|
||||||
# ─── Digest-Komposition ─────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
def _filter_assessments(rows: list[dict], bundesland: str | None, partei: str | None, since: str | None) -> list[dict]:
|
|
||||||
"""Filtert Assessment-Rows nach Abo-Kriterien."""
|
|
||||||
result = []
|
|
||||||
for r in rows:
|
|
||||||
if bundesland and (r.get("bundesland") or "") != bundesland:
|
|
||||||
continue
|
|
||||||
if partei:
|
|
||||||
fraktionen = r.get("fraktionen") or []
|
|
||||||
if not any(partei.upper() in (f or "").upper() for f in fraktionen):
|
|
||||||
continue
|
|
||||||
if since and (r.get("updated_at") or "") <= since:
|
|
||||||
continue
|
|
||||||
result.append(r)
|
|
||||||
return result
|
|
||||||
|
|
||||||
|
|
||||||
def compose_digest(sub: dict, assessments: list[dict]) -> tuple[str, str, str]:
|
|
||||||
"""Baut Subject, Text- und HTML-Body für einen Digest.
|
|
||||||
|
|
||||||
Returns: (subject, text_body, html_body)
|
|
||||||
"""
|
|
||||||
n = len(assessments)
|
|
||||||
filter_label_parts = []
|
|
||||||
if sub.get("bundesland"):
|
|
||||||
filter_label_parts.append(sub["bundesland"])
|
|
||||||
if sub.get("partei"):
|
|
||||||
filter_label_parts.append(sub["partei"])
|
|
||||||
filter_label = " · ".join(filter_label_parts) if filter_label_parts else "alle Bundesländer & Parteien"
|
|
||||||
|
|
||||||
subject = f"[GWÖ-Antragsprüfer] {n} neue Bewertung{'en' if n != 1 else ''} — {filter_label}"
|
|
||||||
|
|
||||||
unsub = unsubscribe_url(sub["id"])
|
|
||||||
|
|
||||||
# Plaintext
|
|
||||||
text_lines = [
|
|
||||||
f"Neue Antragsbewertungen — Filter: {filter_label}",
|
|
||||||
"=" * 60,
|
|
||||||
"",
|
|
||||||
]
|
|
||||||
for a in assessments[:20]:
|
|
||||||
score = a.get("gwoe_score")
|
|
||||||
title = a.get("title") or a.get("drucksache")
|
|
||||||
emp = a.get("empfehlung") or ""
|
|
||||||
fraktionen = ", ".join(a.get("fraktionen") or [])
|
|
||||||
url = f"{settings.base_url}/?drucksache={a.get('drucksache')}"
|
|
||||||
text_lines.append(f"• {title}")
|
|
||||||
text_lines.append(f" Score: {score}/10 — {emp}")
|
|
||||||
text_lines.append(f" Fraktionen: {fraktionen}")
|
|
||||||
text_lines.append(f" {url}")
|
|
||||||
text_lines.append("")
|
|
||||||
if n > 20:
|
|
||||||
text_lines.append(f"… und {n - 20} weitere. Alle anzeigen: {settings.base_url}")
|
|
||||||
text_lines.append("")
|
|
||||||
text_lines.append("—")
|
|
||||||
text_lines.append(f"Abo verwalten: {settings.base_url}")
|
|
||||||
text_lines.append(f"Abbestellen: {unsub}")
|
|
||||||
text_body = "\n".join(text_lines)
|
|
||||||
|
|
||||||
# HTML
|
|
||||||
html_items = []
|
|
||||||
for a in assessments[:20]:
|
|
||||||
score = a.get("gwoe_score")
|
|
||||||
title = html.escape(a.get("title") or a.get("drucksache") or "")
|
|
||||||
emp = html.escape(a.get("empfehlung") or "")
|
|
||||||
fraktionen = html.escape(", ".join(a.get("fraktionen") or []))
|
|
||||||
zus = html.escape((a.get("antrag_zusammenfassung") or "")[:200])
|
|
||||||
url = html.escape(f"{settings.base_url}/?drucksache={a.get('drucksache')}")
|
|
||||||
html_items.append(f"""
|
|
||||||
<div style="border-left:3px solid #007a80;padding:8px 12px;margin:12px 0;background:#f9f9f9">
|
|
||||||
<a href="{url}" style="color:#007a80;text-decoration:none;font-weight:bold">{title}</a><br>
|
|
||||||
<span style="color:#666;font-size:0.9em">Score: <b>{score}/10</b> — {emp} — {fraktionen}</span><br>
|
|
||||||
<span style="color:#444;font-size:0.9em">{zus}</span>
|
|
||||||
</div>""")
|
|
||||||
|
|
||||||
more_link = ""
|
|
||||||
if n > 20:
|
|
||||||
more_link = f'<p><a href="{settings.base_url}">… und {n - 20} weitere ansehen</a></p>'
|
|
||||||
|
|
||||||
html_body = f"""<!DOCTYPE html>
|
|
||||||
<html><body style="font-family:Helvetica,Arial,sans-serif;max-width:600px;margin:0 auto;padding:20px;color:#333">
|
|
||||||
<h2 style="color:#007a80">{n} neue Antragsbewertung{'en' if n != 1 else ''}</h2>
|
|
||||||
<p style="color:#666">Filter: <b>{html.escape(filter_label)}</b></p>
|
|
||||||
{''.join(html_items)}
|
|
||||||
{more_link}
|
|
||||||
<hr style="border:none;border-top:1px solid #ddd;margin:20px 0">
|
|
||||||
<p style="font-size:0.85em;color:#888">
|
|
||||||
<a href="{html.escape(settings.base_url)}" style="color:#888">Abo verwalten</a> ·
|
|
||||||
<a href="{html.escape(unsub)}" style="color:#888">Abbestellen</a>
|
|
||||||
</p>
|
|
||||||
</body></html>"""
|
|
||||||
|
|
||||||
return subject, text_body, html_body
|
|
||||||
|
|
||||||
|
|
||||||
async def run_daily_digest() -> dict:
|
|
||||||
"""Daily-Digest-Runner. Iteriert alle due Abos und verschickt.
|
|
||||||
|
|
||||||
Gibt Statistik zurück: {sent, failed, skipped_empty}.
|
|
||||||
"""
|
|
||||||
from .database import (
|
|
||||||
get_all_assessments,
|
|
||||||
get_all_subscriptions_due,
|
|
||||||
mark_subscription_sent,
|
|
||||||
)
|
|
||||||
|
|
||||||
stats = {"sent": 0, "failed": 0, "skipped_empty": 0}
|
|
||||||
|
|
||||||
subs = await get_all_subscriptions_due("daily")
|
|
||||||
if not subs:
|
|
||||||
logger.info("run_daily_digest: keine due subscriptions")
|
|
||||||
return stats
|
|
||||||
|
|
||||||
all_assessments = await get_all_assessments(None)
|
|
||||||
|
|
||||||
for sub in subs:
|
|
||||||
matches = _filter_assessments(
|
|
||||||
all_assessments,
|
|
||||||
bundesland=sub.get("bundesland"),
|
|
||||||
partei=sub.get("partei"),
|
|
||||||
since=sub.get("last_sent"),
|
|
||||||
)
|
|
||||||
if not matches:
|
|
||||||
stats["skipped_empty"] += 1
|
|
||||||
# Last-sent trotzdem setzen, damit wir nicht jede Minute wieder testen
|
|
||||||
await mark_subscription_sent(sub["id"])
|
|
||||||
continue
|
|
||||||
|
|
||||||
try:
|
|
||||||
subject, text_body, html_body = compose_digest(sub, matches)
|
|
||||||
await send_mail(sub["email"], subject, text_body, html_body)
|
|
||||||
await mark_subscription_sent(sub["id"])
|
|
||||||
stats["sent"] += 1
|
|
||||||
logger.info("digest sent to %s (%d items)", sub["email"], len(matches))
|
|
||||||
except Exception:
|
|
||||||
logger.exception("digest failed for sub_id=%s", sub["id"])
|
|
||||||
stats["failed"] += 1
|
|
||||||
|
|
||||||
return stats
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
# python -m app.mail → führt den Daily-Digest-Lauf aus
|
|
||||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
|
|
||||||
result = asyncio.run(run_daily_digest())
|
|
||||||
print(f"Digest-Lauf fertig: {result}")
|
|
||||||
2806
app/main.py
2806
app/main.py
File diff suppressed because it is too large
Load Diff
241
app/models.py
241
app/models.py
@ -1,241 +0,0 @@
|
|||||||
"""Python types ported from TypeScript types.ts — GWÖ-Matrix 2.0 für Gemeinden."""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
from enum import Enum
|
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
from pydantic import BaseModel, Field
|
|
||||||
|
|
||||||
|
|
||||||
# --- Enums ---
|
|
||||||
|
|
||||||
class Empfehlung(str, Enum):
|
|
||||||
ABLEHNEN = "Ablehnen"
|
|
||||||
UEBERARBEITEN = "Überarbeiten"
|
|
||||||
UNTERSTUETZEN_MIT = "Unterstützen mit Änderungen"
|
|
||||||
UNEINGESCHRAENKT = "Uneingeschränkt unterstützen"
|
|
||||||
|
|
||||||
|
|
||||||
class EmpfehlungSymbol(str, Enum):
|
|
||||||
X = "[X]"
|
|
||||||
BANG = "[!]"
|
|
||||||
PLUS = "[+]"
|
|
||||||
DPLUS = "[++]"
|
|
||||||
|
|
||||||
|
|
||||||
class Verbesserungspotenzial(str, Enum):
|
|
||||||
GERING = "gering"
|
|
||||||
MITTEL = "mittel"
|
|
||||||
HOCH = "hoch"
|
|
||||||
FUNDAMENTAL = "fundamental"
|
|
||||||
|
|
||||||
|
|
||||||
# --- Sub-models ---
|
|
||||||
|
|
||||||
class MatrixEntry(BaseModel):
|
|
||||||
field: str = Field(..., pattern=r"^[A-E][1-5]$")
|
|
||||||
label: str
|
|
||||||
aspect: str
|
|
||||||
rating: int = Field(..., ge=-5, le=5) # Neue Skala: -5 bis +5
|
|
||||||
symbol: Optional[str] = None
|
|
||||||
|
|
||||||
# ─── Domain-Verhalten (ADR 0008) ──────────────────────────────────────
|
|
||||||
|
|
||||||
def ist_fundamental_kritisch(self) -> bool:
|
|
||||||
"""True, wenn das Feld einen fundamentalen Widerspruch zu
|
|
||||||
GWÖ-Werten beschreibt (rating ≤ -4).
|
|
||||||
|
|
||||||
Diese Regel triggert den Score-Cap: ein einziges fundamental-
|
|
||||||
kritisches Feld deckelt den Gesamt-Score auf 3/10 (siehe
|
|
||||||
``Assessment.verletzt_score_cap``).
|
|
||||||
"""
|
|
||||||
return self.rating <= -4
|
|
||||||
|
|
||||||
def to_symbol(self) -> str:
|
|
||||||
"""Berechnet das Matrix-Symbol aus dem Rating.
|
|
||||||
|
|
||||||
Quelle: analyzer.py System-Prompt „Matrix-Feldwertung (Skala -5 bis +5)".
|
|
||||||
Der LLM liefert das Symbol heute selbst; diese Methode erlaubt
|
|
||||||
server-seitige Konsistenz-Prüfung und ist die Basis, um das
|
|
||||||
Symbol-Feld perspektivisch ganz aus dem LLM-Output zu entfernen.
|
|
||||||
"""
|
|
||||||
r = self.rating
|
|
||||||
if r >= 4:
|
|
||||||
return "++"
|
|
||||||
if r >= 1:
|
|
||||||
return "+"
|
|
||||||
if r == 0:
|
|
||||||
return "○"
|
|
||||||
if r >= -3:
|
|
||||||
return "−"
|
|
||||||
return "−−"
|
|
||||||
|
|
||||||
|
|
||||||
class Zitat(BaseModel):
|
|
||||||
text: str
|
|
||||||
quelle: str
|
|
||||||
url: Optional[str] = None
|
|
||||||
verified: Optional[bool] = None # True=wörtlich im Chunk, False=paraphrasiert, None=pre-#97
|
|
||||||
|
|
||||||
|
|
||||||
class ProgrammScore(BaseModel):
|
|
||||||
score: float = Field(..., ge=0, le=10)
|
|
||||||
begruendung: str = Field(..., alias="begründung")
|
|
||||||
zitate: list[Zitat] = Field(default_factory=list)
|
|
||||||
|
|
||||||
model_config = {"populate_by_name": True}
|
|
||||||
|
|
||||||
|
|
||||||
class FraktionScores(BaseModel):
|
|
||||||
fraktion: str
|
|
||||||
ist_antragsteller: Optional[bool] = Field(None, alias="istAntragsteller")
|
|
||||||
ist_regierung: Optional[bool] = Field(None, alias="istRegierung")
|
|
||||||
wahlprogramm: ProgrammScore
|
|
||||||
parteiprogramm: ProgrammScore
|
|
||||||
|
|
||||||
model_config = {"populate_by_name": True}
|
|
||||||
|
|
||||||
|
|
||||||
class Verbesserung(BaseModel):
|
|
||||||
original: str
|
|
||||||
vorschlag: str
|
|
||||||
begruendung: str
|
|
||||||
|
|
||||||
|
|
||||||
# --- Main Assessment ---
|
|
||||||
|
|
||||||
class Assessment(BaseModel):
|
|
||||||
drucksache: str
|
|
||||||
title: str
|
|
||||||
fraktionen: list[str]
|
|
||||||
datum: str
|
|
||||||
link: Optional[str] = None
|
|
||||||
|
|
||||||
gwoe_score: float = Field(..., ge=0, le=10, alias="gwoeScore")
|
|
||||||
gwoe_begruendung: str = Field(..., alias="gwoeBegründung")
|
|
||||||
gwoe_matrix: list[MatrixEntry] = Field(..., alias="gwoeMatrix")
|
|
||||||
gwoe_schwerpunkt: list[str] = Field(..., alias="gwoeSchwerpunkt")
|
|
||||||
|
|
||||||
wahlprogramm_scores: list[FraktionScores] = Field(..., alias="wahlprogrammScores")
|
|
||||||
|
|
||||||
verbesserungen: list[Verbesserung] = []
|
|
||||||
|
|
||||||
staerken: list[str] = Field(default_factory=list, alias="stärken")
|
|
||||||
schwaechen: list[str] = Field(default_factory=list, alias="schwächen")
|
|
||||||
empfehlung: Empfehlung
|
|
||||||
empfehlung_symbol: Optional[str] = Field(None, alias="empfehlungSymbol")
|
|
||||||
verbesserungspotenzial: Verbesserungspotenzial
|
|
||||||
|
|
||||||
themen: list[str] = []
|
|
||||||
antrag_zusammenfassung: Optional[str] = Field(None, alias="antragZusammenfassung")
|
|
||||||
antrag_kernpunkte: Optional[list[str]] = Field(None, alias="antragKernpunkte")
|
|
||||||
konfidenz: Optional[str] = Field(None, description="LLM-Selbsteinschätzung: hoch/mittel/niedrig")
|
|
||||||
share_threads: Optional[str] = Field(None, alias="shareThreads", description="Social-Post für Threads (max 500 Zeichen)")
|
|
||||||
share_twitter: Optional[str] = Field(None, alias="shareTwitter", description="Social-Post für X/Twitter (max 280 Zeichen)")
|
|
||||||
share_mastodon: Optional[str] = Field(None, alias="shareMastodon", description="Social-Post für Mastodon (max 500 Zeichen)")
|
|
||||||
|
|
||||||
# #128: Fraktionen ohne hinterlegtes Wahlprogramm — wird server-seitig
|
|
||||||
# nach dem LLM-Call befüllt, nicht vom LLM selbst.
|
|
||||||
fehlende_programme: Optional[list[str]] = Field(
|
|
||||||
default_factory=list,
|
|
||||||
alias="fehlendeProgramme",
|
|
||||||
description="Fraktionen ohne hinterlegtes Wahlprogramm für dieses Bundesland",
|
|
||||||
)
|
|
||||||
|
|
||||||
model_config = {"populate_by_name": True}
|
|
||||||
|
|
||||||
# ─── Domain-Verhalten (ADR 0008) ──────────────────────────────────────
|
|
||||||
|
|
||||||
def ist_ablehnung(self) -> bool:
|
|
||||||
"""True, wenn die Empfehlung „Ablehnen" lautet."""
|
|
||||||
return self.empfehlung == Empfehlung.ABLEHNEN
|
|
||||||
|
|
||||||
def ist_uneingeschraenkt_unterstuetzend(self) -> bool:
|
|
||||||
"""True, wenn die Empfehlung „Uneingeschränkt unterstützen" lautet."""
|
|
||||||
return self.empfehlung == Empfehlung.UNEINGESCHRAENKT
|
|
||||||
|
|
||||||
def hat_fundamental_kritisches_feld(self) -> bool:
|
|
||||||
"""True, wenn mindestens ein Matrix-Feld rating ≤ -4 hat.
|
|
||||||
|
|
||||||
Basis für ``verletzt_score_cap``. Nutzt die VO-Methode
|
|
||||||
``MatrixEntry.ist_fundamental_kritisch``.
|
|
||||||
"""
|
|
||||||
return any(m.ist_fundamental_kritisch() for m in self.gwoe_matrix)
|
|
||||||
|
|
||||||
def verletzt_score_cap(self) -> bool:
|
|
||||||
"""Prüft die Regel aus dem System-Prompt:
|
|
||||||
|
|
||||||
Wenn ein Matrix-Feld rating ≤ -4 hat, ist Gesamt-Score max. 3/10.
|
|
||||||
|
|
||||||
Der LLM-Prompt formuliert diese Regel als Soll-Anweisung; sie kann
|
|
||||||
trotzdem verletzt werden. Diese Methode macht die Regel server-
|
|
||||||
seitig prüfbar und ist der Anker für die Warning-Logik in
|
|
||||||
``analyzer.py`` (Tag-4-Schritt der DDD-Lightweight-Migration).
|
|
||||||
"""
|
|
||||||
return self.hat_fundamental_kritisches_feld() and self.gwoe_score > 3.0
|
|
||||||
|
|
||||||
|
|
||||||
# --- Matrix constants ---
|
|
||||||
|
|
||||||
MATRIX_LABELS: dict[str, str] = {
|
|
||||||
"A1": "Grundrechtsschutz und Menschenwürde in der Lieferkette",
|
|
||||||
"A2": "Nutzen für die Gemeinde",
|
|
||||||
"A3": "Ökologische Verantwortung für die Lieferkette",
|
|
||||||
"A4": "Soziale Verantwortung für die Lieferkette",
|
|
||||||
"A5": "Öffentliche Rechenschaft und Mitsprache",
|
|
||||||
"B1": "Ethisches Finanzgebaren / Geld und Mensch",
|
|
||||||
"B2": "Gemeinnutz im Finanzgebaren",
|
|
||||||
"B3": "Ökologische Verantwortung der Finanzpolitik",
|
|
||||||
"B4": "Soziale Verantwortung der Finanzpolitik",
|
|
||||||
"B5": "Rechenschaft und Partizipation in der Finanzpolitik",
|
|
||||||
"C1": "Individuelle Rechts- und Gleichstellung",
|
|
||||||
"C2": "Gemeinsame Zielvereinbarung für das Gemeinwohl",
|
|
||||||
"C3": "Förderung ökologischen Verhaltens",
|
|
||||||
"C4": "Gerechte Verteilung von Arbeit",
|
|
||||||
"C5": "Transparente Kommunikation und demokratische Prozesse",
|
|
||||||
"D1": "Schutz des Individuums, Rechtsgleichheit",
|
|
||||||
"D2": "Gesamtwohl in der Gemeinde",
|
|
||||||
"D3": "Ökologische Gestaltung der öffentlichen Leistung",
|
|
||||||
"D4": "Soziale Gestaltung der öffentlichen Leistung",
|
|
||||||
"D5": "Transparente Kommunikation und demokratische Einbindung",
|
|
||||||
"E1": "Gestaltung der Bedingungen für ein menschenwürdiges Leben – zukünftige Generationen",
|
|
||||||
"E2": "Beitrag zum Gesamtwohl",
|
|
||||||
"E3": "Verantwortung für ökologische Auswirkungen",
|
|
||||||
"E4": "Beitrag zum sozialen Ausgleich",
|
|
||||||
"E5": "Transparente und demokratische Mitbestimmung",
|
|
||||||
}
|
|
||||||
|
|
||||||
ROW_LABELS: dict[str, str] = {
|
|
||||||
"A": "Ausgelagerte Betriebe, Lieferant:innen, Dienstleister:innen",
|
|
||||||
"B": "Finanzpartner:innen, Geldgeber:innen, Steuerzahler:innen",
|
|
||||||
"C": "Politische Führung, Verwaltung, Ehrenamtliche",
|
|
||||||
"D": "Bürger:innen und Wirtschaft",
|
|
||||||
"E": "Staat, Gesellschaft und Natur",
|
|
||||||
}
|
|
||||||
|
|
||||||
COL_LABELS = [
|
|
||||||
"Menschenwürde",
|
|
||||||
"Solidarität",
|
|
||||||
"Ökologische Nachhaltigkeit",
|
|
||||||
"Soziale Gerechtigkeit",
|
|
||||||
"Transparenz & Demokratie",
|
|
||||||
]
|
|
||||||
|
|
||||||
COL_STAATSPRINZIPIEN = [
|
|
||||||
"Rechtsstaatsprinzip",
|
|
||||||
"Gemeinnutz",
|
|
||||||
"Umwelt-Verantwortung",
|
|
||||||
"Sozialstaatsprinzip",
|
|
||||||
"Demokratie",
|
|
||||||
]
|
|
||||||
|
|
||||||
MATRIX_VERSION = "2.0"
|
|
||||||
MATRIX_TITLE = "Matrix 2.0 für Gemeinden"
|
|
||||||
|
|
||||||
EMPFEHLUNG_CONFIG: dict[str, dict] = {
|
|
||||||
"Ablehnen": {"symbol": "[X]", "color": "#d00000", "css_class": "empf-ablehnen"},
|
|
||||||
"Überarbeiten": {"symbol": "[!]", "color": "#F7941D", "css_class": "empf-ueberarbeiten"},
|
|
||||||
"Unterstützen mit Änderungen": {"symbol": "[+]", "color": "#009da5", "css_class": "empf-unterstuetzen"},
|
|
||||||
"Uneingeschränkt unterstützen": {"symbol": "[++]", "color": "#889e33", "css_class": "empf-voll"},
|
|
||||||
}
|
|
||||||
@ -1,332 +0,0 @@
|
|||||||
"""Täglicher Monitoring-Scan für neue Landtags-Drucksachen (#135).
|
|
||||||
|
|
||||||
Nur Metadaten — kein PDF-Download, kein LLM-Call.
|
|
||||||
|
|
||||||
Ablauf:
|
|
||||||
1. Iteriert alle aktiven Bundesländer via aktive_bundeslaender().
|
|
||||||
2. Ruft adapter.search("", limit=50) (Fallback: " " oder "*") auf.
|
|
||||||
3. UPSERTs Treffer in monitoring_scans. seen_first_at bleibt stabil,
|
|
||||||
last_seen_at wird immer gesetzt.
|
|
||||||
4. Aggregiert Ergebnisse in monitoring_daily_summary.
|
|
||||||
5. Gibt ScanResult zurück, aus dem run_monitoring_digest() den
|
|
||||||
Mail-Digest baut.
|
|
||||||
|
|
||||||
Kosten-Schätzung (Qwen Plus, Stand April 2026):
|
|
||||||
Quelle: https://help.aliyun.com/zh/dashscope/developer-reference/tongyi-qianwen-7b-14b-72b-api-pricing
|
|
||||||
Input: 0.0004 USD / 1 K Token
|
|
||||||
Output: 0.0012 USD / 1 K Token
|
|
||||||
Kurs: 1 USD = 0.93 EUR (Näherung April 2026)
|
|
||||||
"""
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import asyncio
|
|
||||||
import logging
|
|
||||||
from dataclasses import dataclass, field
|
|
||||||
from datetime import datetime, timezone
|
|
||||||
|
|
||||||
from .bundeslaender import aktive_bundeslaender
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
# ─── Kosten-Schätzung ────────────────────────────────────────────────────────
|
|
||||||
# Preise aus DashScope-Dokumentation (USD, Stand April 2026):
|
|
||||||
# https://help.aliyun.com/zh/dashscope/developer-reference/tongyi-qianwen-7b-14b-72b-api-pricing
|
|
||||||
_QWEN_PLUS_INPUT_USD_PER_1K = 0.0004
|
|
||||||
_QWEN_PLUS_OUTPUT_USD_PER_1K = 0.0012
|
|
||||||
_USD_TO_EUR = 0.93 # Näherungskurs April 2026 (als Konstante OK für Schätzung)
|
|
||||||
|
|
||||||
# Default-Annahmen pro Analyse (Durchschnittswerte aus Produktionsbetrieb)
|
|
||||||
_DEFAULT_AVG_IN_TOKENS = 20_000
|
|
||||||
_DEFAULT_AVG_OUT_TOKENS = 3_000
|
|
||||||
|
|
||||||
|
|
||||||
def estimate_cost_qwen_plus(
|
|
||||||
n_new: int,
|
|
||||||
avg_in_tokens: int = _DEFAULT_AVG_IN_TOKENS,
|
|
||||||
avg_out_tokens: int = _DEFAULT_AVG_OUT_TOKENS,
|
|
||||||
) -> float:
|
|
||||||
"""Schätzt die Analysekosten in EUR für n_new neue Drucksachen (Qwen Plus).
|
|
||||||
|
|
||||||
Rechnet auf Basis der offiziellen DashScope-Preise, Umrechnung USD→EUR
|
|
||||||
mit festem Näherungskurs. Ergebnis ist eine Schätzung, keine Garantie.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
n_new: Anzahl neuer Drucksachen.
|
|
||||||
avg_in_tokens: Durchschnittliche Input-Token pro Antrag (Default 20 000).
|
|
||||||
avg_out_tokens: Durchschnittliche Output-Token pro Antrag (Default 3 000).
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Geschätzte Kosten in EUR.
|
|
||||||
"""
|
|
||||||
if n_new <= 0:
|
|
||||||
return 0.0
|
|
||||||
input_cost_usd = (avg_in_tokens / 1000) * _QWEN_PLUS_INPUT_USD_PER_1K * n_new
|
|
||||||
output_cost_usd = (avg_out_tokens / 1000) * _QWEN_PLUS_OUTPUT_USD_PER_1K * n_new
|
|
||||||
total_eur = (input_cost_usd + output_cost_usd) * _USD_TO_EUR
|
|
||||||
return round(total_eur, 4)
|
|
||||||
|
|
||||||
|
|
||||||
# ─── Datenklassen ────────────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class BundeslandScanResult:
|
|
||||||
"""Scan-Ergebnis für ein einzelnes Bundesland."""
|
|
||||||
bundesland: str
|
|
||||||
total_seen: int = 0
|
|
||||||
new_count: int = 0
|
|
||||||
error: str | None = None
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class DailyScanResult:
|
|
||||||
"""Gesamtergebnis eines daily_scan()-Laufs."""
|
|
||||||
scan_date: str # YYYY-MM-DD
|
|
||||||
results: list[BundeslandScanResult] = field(default_factory=list)
|
|
||||||
new_total: int = 0 # Summe aller new_count
|
|
||||||
total_seen: int = 0 # Summe aller total_seen
|
|
||||||
estimated_cost_eur: float = 0.0
|
|
||||||
errors: list[str] = field(default_factory=list)
|
|
||||||
|
|
||||||
|
|
||||||
# ─── Adapter-Suche ───────────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
DEFAULT_DAILY_LIMIT = 50
|
|
||||||
|
|
||||||
# Bundesländer, die vom täglichen Monitoring-Scan ausgenommen sind.
|
|
||||||
# NI (Niedersachsen): NILAS-Portal erfordert Login — unauthentifizierte Anfragen
|
|
||||||
# liefern Login-Page-HTML, das der JSON-Comment-Parser als ~50 Junk-Records parsed.
|
|
||||||
# Ausnahme bleibt bis ein gültiger HAR-Capture vorliegt (siehe Issue #22).
|
|
||||||
_MONITORING_SKIP: frozenset[str] = frozenset({"NI"})
|
|
||||||
|
|
||||||
|
|
||||||
async def _search_adapter(adapter, bundesland_code: str, limit: int = DEFAULT_DAILY_LIMIT) -> list:
|
|
||||||
"""Sucht via Adapter nach aktuellen Drucksachen.
|
|
||||||
|
|
||||||
Probiert der Reihe nach: leerer String, Leerzeichen, Sternchen —
|
|
||||||
und fängt alle Exceptions ab, damit ein Adapter-Fehler den
|
|
||||||
Gesamt-Scan nicht abbricht. ``limit`` steuert pro-Adapter-Obergrenze;
|
|
||||||
für Initial-Seeding ggf. höher setzen.
|
|
||||||
"""
|
|
||||||
for query in ("", " ", "*"):
|
|
||||||
try:
|
|
||||||
results = await adapter.search(query, limit=limit)
|
|
||||||
return results
|
|
||||||
except Exception as e:
|
|
||||||
if query == "*":
|
|
||||||
# Alle Versuche gescheitert — Exception nach oben durchreichen
|
|
||||||
raise
|
|
||||||
logger.debug(
|
|
||||||
"%s: search(%r) fehlgeschlagen (%s), versuche nächsten Query",
|
|
||||||
bundesland_code, query, e,
|
|
||||||
)
|
|
||||||
return []
|
|
||||||
|
|
||||||
|
|
||||||
# ─── Haupt-Scan ──────────────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
async def daily_scan(limit: int = DEFAULT_DAILY_LIMIT) -> DailyScanResult:
|
|
||||||
"""Täglicher Scan aller aktiven Bundesländer nach neuen Drucksachen.
|
|
||||||
|
|
||||||
Kein PDF-Download, kein LLM-Call — nur Metadaten. ``limit`` gilt
|
|
||||||
pro Adapter; für Initial-Seeding größer setzen (z.B. 500).
|
|
||||||
"""
|
|
||||||
from .parlamente import ADAPTERS
|
|
||||||
from .database import upsert_monitoring_scan, upsert_monitoring_summary
|
|
||||||
|
|
||||||
now_utc = datetime.now(timezone.utc)
|
|
||||||
scan_date = now_utc.strftime("%Y-%m-%d")
|
|
||||||
now_iso = now_utc.strftime("%Y-%m-%dT%H:%M:%S")
|
|
||||||
|
|
||||||
result = DailyScanResult(scan_date=scan_date)
|
|
||||||
|
|
||||||
active_bls = aktive_bundeslaender()
|
|
||||||
|
|
||||||
for bl in active_bls:
|
|
||||||
if bl.code in _MONITORING_SKIP:
|
|
||||||
logger.debug("%s: Monitoring-Skip aktiv — übersprungen", bl.code)
|
|
||||||
continue
|
|
||||||
|
|
||||||
adapter = ADAPTERS.get(bl.code)
|
|
||||||
if adapter is None:
|
|
||||||
logger.debug("Kein Adapter für %s — übersprungen", bl.code)
|
|
||||||
continue
|
|
||||||
|
|
||||||
bl_result = BundeslandScanResult(bundesland=bl.code)
|
|
||||||
|
|
||||||
try:
|
|
||||||
docs = await _search_adapter(adapter, bl.code, limit=limit)
|
|
||||||
except Exception as exc:
|
|
||||||
err_msg = f"{type(exc).__name__}: {str(exc)[:500]}"
|
|
||||||
logger.exception("Adapter-Fehler bei %s", bl.code)
|
|
||||||
bl_result.error = err_msg
|
|
||||||
result.errors.append(f"{bl.code}: {err_msg}")
|
|
||||||
await upsert_monitoring_summary(
|
|
||||||
scan_date=scan_date,
|
|
||||||
bundesland=bl.code,
|
|
||||||
total_seen=0,
|
|
||||||
new_count=0,
|
|
||||||
errors=err_msg,
|
|
||||||
)
|
|
||||||
result.results.append(bl_result)
|
|
||||||
continue
|
|
||||||
|
|
||||||
bl_result.total_seen = len(docs)
|
|
||||||
new_this_bl = 0
|
|
||||||
|
|
||||||
for doc in docs:
|
|
||||||
try:
|
|
||||||
is_new = await upsert_monitoring_scan(
|
|
||||||
bundesland=doc.bundesland,
|
|
||||||
drucksache=doc.drucksache,
|
|
||||||
title=doc.title,
|
|
||||||
datum=doc.datum,
|
|
||||||
typ=doc.typ,
|
|
||||||
typ_normiert=doc.typ_normiert,
|
|
||||||
fraktionen=doc.fraktionen,
|
|
||||||
link=doc.link,
|
|
||||||
now=now_iso,
|
|
||||||
)
|
|
||||||
if is_new:
|
|
||||||
new_this_bl += 1
|
|
||||||
except Exception:
|
|
||||||
logger.exception(
|
|
||||||
"DB-UPSERT fehlgeschlagen für %s/%s — wird übersprungen",
|
|
||||||
bl.code, getattr(doc, "drucksache", "?"),
|
|
||||||
)
|
|
||||||
|
|
||||||
bl_result.new_count = new_this_bl
|
|
||||||
|
|
||||||
await upsert_monitoring_summary(
|
|
||||||
scan_date=scan_date,
|
|
||||||
bundesland=bl.code,
|
|
||||||
total_seen=bl_result.total_seen,
|
|
||||||
new_count=bl_result.new_count,
|
|
||||||
errors=None,
|
|
||||||
)
|
|
||||||
|
|
||||||
logger.info(
|
|
||||||
"%s: %d gesehen, %d neu",
|
|
||||||
bl.code, bl_result.total_seen, bl_result.new_count,
|
|
||||||
)
|
|
||||||
result.results.append(bl_result)
|
|
||||||
|
|
||||||
result.new_total = sum(r.new_count for r in result.results)
|
|
||||||
result.total_seen = sum(r.total_seen for r in result.results)
|
|
||||||
result.estimated_cost_eur = estimate_cost_qwen_plus(result.new_total)
|
|
||||||
|
|
||||||
return result
|
|
||||||
|
|
||||||
|
|
||||||
# ─── Mail-Digest ─────────────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
async def run_monitoring_digest(recipient: str) -> dict:
|
|
||||||
"""Führt daily_scan() durch und verschickt den Ergebnis-Digest per Mail.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
recipient: Empfänger-Adresse (typischerweise der Admin).
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
dict mit Scan-Statistiken + {"mail_sent": bool}.
|
|
||||||
"""
|
|
||||||
from .mail import send_mail
|
|
||||||
from .database import get_monitoring_new_today
|
|
||||||
from jinja2 import Environment, FileSystemLoader
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
scan_result = await daily_scan()
|
|
||||||
|
|
||||||
# Neue Drucksachen für den heutigen Tag laden
|
|
||||||
new_docs = await get_monitoring_new_today(scan_result.scan_date)
|
|
||||||
|
|
||||||
# Mail-Inhalt via Template rendern
|
|
||||||
tmpl_dir = Path(__file__).resolve().parent / "templates"
|
|
||||||
env = Environment(loader=FileSystemLoader(str(tmpl_dir)), autoescape=True)
|
|
||||||
tmpl = env.get_template("monitoring_digest.html")
|
|
||||||
|
|
||||||
html_body = tmpl.render(
|
|
||||||
scan_date=scan_result.scan_date,
|
|
||||||
new_total=scan_result.new_total,
|
|
||||||
total_seen=scan_result.total_seen,
|
|
||||||
estimated_cost_eur=scan_result.estimated_cost_eur,
|
|
||||||
results=scan_result.results,
|
|
||||||
new_docs=new_docs,
|
|
||||||
errors=scan_result.errors,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Plaintext-Variante
|
|
||||||
text_body = _render_plain(scan_result, new_docs)
|
|
||||||
|
|
||||||
subject = (
|
|
||||||
f"[GWÖ-Monitor] {scan_result.scan_date} — "
|
|
||||||
f"{scan_result.new_total} neue Drucksachen"
|
|
||||||
+ (f" ({len(scan_result.errors)} Fehler)" if scan_result.errors else "")
|
|
||||||
)
|
|
||||||
|
|
||||||
mail_sent = False
|
|
||||||
try:
|
|
||||||
await send_mail(recipient, subject, text_body, html_body)
|
|
||||||
mail_sent = True
|
|
||||||
logger.info("Monitoring-Digest verschickt an %s", recipient)
|
|
||||||
except Exception:
|
|
||||||
logger.exception("Monitoring-Digest: Mail-Versand fehlgeschlagen")
|
|
||||||
|
|
||||||
return {
|
|
||||||
"scan_date": scan_result.scan_date,
|
|
||||||
"new_total": scan_result.new_total,
|
|
||||||
"total_seen": scan_result.total_seen,
|
|
||||||
"estimated_cost_eur": scan_result.estimated_cost_eur,
|
|
||||||
"error_count": len(scan_result.errors),
|
|
||||||
"mail_sent": mail_sent,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def _render_plain(scan_result: DailyScanResult, new_docs: list[dict]) -> str:
|
|
||||||
"""Baut den Plaintext-Part des Monitoring-Digests."""
|
|
||||||
from .config import settings
|
|
||||||
|
|
||||||
lines = [
|
|
||||||
f"GWÖ-Antragsprüfer — Monitoring-Digest {scan_result.scan_date}",
|
|
||||||
"=" * 60,
|
|
||||||
"",
|
|
||||||
f"Neue Drucksachen: {scan_result.new_total}",
|
|
||||||
f"Gesamt gesehen: {scan_result.total_seen}",
|
|
||||||
f"Kosten-Schätzung: {scan_result.estimated_cost_eur:.4f} EUR",
|
|
||||||
"",
|
|
||||||
]
|
|
||||||
|
|
||||||
if scan_result.errors:
|
|
||||||
lines.append(f"Fehler ({len(scan_result.errors)}):")
|
|
||||||
for e in scan_result.errors:
|
|
||||||
lines.append(f" • {e}")
|
|
||||||
lines.append("")
|
|
||||||
|
|
||||||
lines.append("Bundesland-Übersicht:")
|
|
||||||
for r in scan_result.results:
|
|
||||||
status = f"✓ {r.new_count} neu / {r.total_seen} gesehen"
|
|
||||||
if r.error:
|
|
||||||
status = f"✗ Fehler: {r.error[:80]}"
|
|
||||||
lines.append(f" {r.bundesland:6s} {status}")
|
|
||||||
lines.append("")
|
|
||||||
|
|
||||||
if new_docs:
|
|
||||||
lines.append(f"Neue Drucksachen ({len(new_docs)}):")
|
|
||||||
for doc in new_docs[:30]:
|
|
||||||
title = (doc.get("title") or doc.get("drucksache") or "")[:80]
|
|
||||||
bl = doc.get("bundesland", "")
|
|
||||||
drucks = doc.get("drucksache", "")
|
|
||||||
lines.append(f" [{bl}] {drucks} — {title}")
|
|
||||||
if len(new_docs) > 30:
|
|
||||||
lines.append(f" … und {len(new_docs) - 30} weitere")
|
|
||||||
lines.append("")
|
|
||||||
|
|
||||||
lines.append(f"Webapp: {settings.base_url}")
|
|
||||||
return "\n".join(lines)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
# python -m app.monitoring <empfaenger@example.com>
|
|
||||||
import sys
|
|
||||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
|
|
||||||
to = sys.argv[1] if len(sys.argv) > 1 else "mail@tobiasroedel.de"
|
|
||||||
stats = asyncio.run(run_monitoring_digest(to))
|
|
||||||
print(f"Monitoring-Scan fertig: {stats}")
|
|
||||||
121
app/og_card.py
121
app/og_card.py
@ -1,121 +0,0 @@
|
|||||||
"""Open-Graph-Bild-Rendering via Playwright (#141).
|
|
||||||
|
|
||||||
Rendert /v2/og-template?drucksache=X als PNG 1200×630.
|
|
||||||
Cache in data/og-cache/ mit Key SHA256(drucksache + updated_at).
|
|
||||||
|
|
||||||
Öffentliche API:
|
|
||||||
``render_og_card(drucksache, updated_at, base_url)``
|
|
||||||
→ PNG-Bytes oder None bei Fehler
|
|
||||||
|
|
||||||
``cache_key(drucksache, updated_at)``
|
|
||||||
→ Hex-String (SHA-256 Kurzform, 16 Zeichen)
|
|
||||||
|
|
||||||
``get_cached(drucksache, updated_at, cache_dir)``
|
|
||||||
→ Path der gecacheten Datei oder None
|
|
||||||
"""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import hashlib
|
|
||||||
import logging
|
|
||||||
from pathlib import Path
|
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
_DEFAULT_CACHE_DIR = Path(__file__).resolve().parent.parent / "data" / "og-cache"
|
|
||||||
|
|
||||||
|
|
||||||
def cache_key(drucksache: str, updated_at: str) -> str:
|
|
||||||
"""Berechnet den Cache-Schlüssel als 16-stelligen SHA-256-Präfix.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
drucksache: Drucksachen-ID (z.B. "NRW-18/1234").
|
|
||||||
updated_at: ISO-Zeitstempel des letzten Updates aus der Datenbank.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
16 Hex-Zeichen (64-Bit-Präfix des SHA-256).
|
|
||||||
"""
|
|
||||||
raw = f"{drucksache}|{updated_at}"
|
|
||||||
return hashlib.sha256(raw.encode()).hexdigest()[:16]
|
|
||||||
|
|
||||||
|
|
||||||
def _cache_path(drucksache: str, updated_at: str, cache_dir: Path) -> Path:
|
|
||||||
key = cache_key(drucksache, updated_at)
|
|
||||||
safe_name = drucksache.replace("/", "_").replace(" ", "_")
|
|
||||||
return cache_dir / f"{safe_name}_{key}.png"
|
|
||||||
|
|
||||||
|
|
||||||
def get_cached(
|
|
||||||
drucksache: str,
|
|
||||||
updated_at: str,
|
|
||||||
cache_dir: Optional[Path] = None,
|
|
||||||
) -> Optional[Path]:
|
|
||||||
"""Gibt den Pfad der gecacheten PNG-Datei zurück, wenn sie existiert.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
drucksache: Drucksachen-ID.
|
|
||||||
updated_at: ISO-Zeitstempel — ändert sich dieser, ist der Cache ungültig.
|
|
||||||
cache_dir: Verzeichnis für den Cache. Standard: data/og-cache/.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Path-Objekt wenn Treffer, sonst None.
|
|
||||||
"""
|
|
||||||
cache_dir = cache_dir or _DEFAULT_CACHE_DIR
|
|
||||||
path = _cache_path(drucksache, updated_at, cache_dir)
|
|
||||||
return path if path.exists() else None
|
|
||||||
|
|
||||||
|
|
||||||
def render_og_card(
|
|
||||||
drucksache: str,
|
|
||||||
updated_at: str,
|
|
||||||
base_url: str = "http://127.0.0.1:8000",
|
|
||||||
cache_dir: Optional[Path] = None,
|
|
||||||
) -> Optional[bytes]:
|
|
||||||
"""Rendert die OG-Karte als PNG via Playwright und legt sie im Cache ab.
|
|
||||||
|
|
||||||
Bei Cache-Hit wird das Rendering übersprungen.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
drucksache: Drucksachen-ID (URL-kodierbar).
|
|
||||||
updated_at: ISO-Zeitstempel für den Cache-Key.
|
|
||||||
base_url: Interne Basis-URL der App (Playwright greift darauf zu).
|
|
||||||
cache_dir: Cache-Verzeichnis. Standard: data/og-cache/.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
PNG-Bytes bei Erfolg, None bei Fehler.
|
|
||||||
"""
|
|
||||||
cache_dir = cache_dir or _DEFAULT_CACHE_DIR
|
|
||||||
cache_dir.mkdir(parents=True, exist_ok=True)
|
|
||||||
|
|
||||||
cached = get_cached(drucksache, updated_at, cache_dir)
|
|
||||||
if cached:
|
|
||||||
logger.debug("OG-Cache-Hit für %s", drucksache)
|
|
||||||
return cached.read_bytes()
|
|
||||||
|
|
||||||
dest = _cache_path(drucksache, updated_at, cache_dir)
|
|
||||||
|
|
||||||
try:
|
|
||||||
from playwright.sync_api import sync_playwright
|
|
||||||
import urllib.parse
|
|
||||||
|
|
||||||
encoded = urllib.parse.quote(drucksache, safe="")
|
|
||||||
url = f"{base_url}/v2/og-template?drucksache={encoded}"
|
|
||||||
|
|
||||||
with sync_playwright() as pw:
|
|
||||||
browser = pw.chromium.launch(args=["--no-sandbox"])
|
|
||||||
page = browser.new_page(viewport={"width": 1200, "height": 630})
|
|
||||||
page.goto(url, wait_until="networkidle", timeout=15000)
|
|
||||||
png_bytes = page.screenshot(
|
|
||||||
clip={"x": 0, "y": 0, "width": 1200, "height": 630},
|
|
||||||
type="png",
|
|
||||||
)
|
|
||||||
browser.close()
|
|
||||||
|
|
||||||
dest.write_bytes(png_bytes)
|
|
||||||
logger.info("OG-Karte gerendert: %s → %s", drucksache, dest.name)
|
|
||||||
return png_bytes
|
|
||||||
|
|
||||||
except Exception:
|
|
||||||
logger.exception("Playwright-Render fehlgeschlagen für %s", drucksache)
|
|
||||||
return None
|
|
||||||
3421
app/parlamente.py
3421
app/parlamente.py
File diff suppressed because it is too large
Load Diff
334
app/parteien.py
334
app/parteien.py
@ -1,334 +0,0 @@
|
|||||||
"""Zentrale Parteinamen-Auflösung für den GWÖ-Antragsprüfer.
|
|
||||||
|
|
||||||
Single Source of Truth für die Mappings, die heute (vor #55) an mindestens
|
|
||||||
6 Stellen redundant codiert sind:
|
|
||||||
|
|
||||||
- Vier nahezu identische ``_normalize_fraktion()``-Methoden in
|
|
||||||
``app.parlamente`` (PortalaAdapter, ParLDokAdapter, StarFinderCGIAdapter,
|
|
||||||
PARLISAdapter)
|
|
||||||
- Der ``partei != "GRÜNE"``-Hack in ``app.embeddings`` Z. ~496
|
|
||||||
- Implizite Annahmen in ``WAHLPROGRAMME``-Keys und ``PROGRAMME``-Metadaten
|
|
||||||
|
|
||||||
Konzept:
|
|
||||||
|
|
||||||
- ``PARTEIEN`` ist eine kuratierte Tabelle (kanonisch + Aliase + optionaler
|
|
||||||
``bundesland_scope`` + langer Anzeigename + Sonderrolle für Regierungs-
|
|
||||||
Strukturen)
|
|
||||||
- ``normalize_partei(raw, *, bundesland=None)`` löst einen einzelnen
|
|
||||||
Roh-String auf den kanonischen Key auf
|
|
||||||
- ``extract_fraktionen(text, *, bundesland=None)`` zerlegt einen freien
|
|
||||||
Urheber-Text (komma-separierte Listen, MdL-mit-Klammerpartei,
|
|
||||||
HTML-Reste) in eine Liste kanonischer Keys — der Funnel für die vier
|
|
||||||
alten Adapter-Helper
|
|
||||||
- ``display_name(canonical, *, long=False)`` liefert die Anzeigeform für
|
|
||||||
UI/PDF/Reports
|
|
||||||
|
|
||||||
Backwards-Kompatibilität: die kanonischen Keys sind exakt die Strings,
|
|
||||||
die heute in der DB stehen ("CDU", "SPD", "GRÜNE", "FDP", "AfD", "LINKE",
|
|
||||||
"BSW", "SSW", "Landesregierung", "FREIE WÄHLER", "BiW", "FW", "CSU"). Das
|
|
||||||
heißt, kein Migrations-Schritt ist nötig — bestehende Assessments und
|
|
||||||
Embeddings bleiben lesbar.
|
|
||||||
|
|
||||||
Die "Freie Wähler"-Disambiguierung (BVB-FW-BB ≠ FW-Bayern ≠ FW-RLP) ist
|
|
||||||
hier dokumentiert und der Mapper trägt sie als Daten — die programmatische
|
|
||||||
Auflösung greift, sobald die jeweiligen Wahlprogramme als separate
|
|
||||||
``PROGRAMME``-Einträge existieren.
|
|
||||||
"""
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import re
|
|
||||||
from dataclasses import dataclass, field
|
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
# ─────────────────────────────────────────────────────────────────────────────
|
|
||||||
# Tabelle
|
|
||||||
# ─────────────────────────────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
|
||||||
class Partei:
|
|
||||||
canonical: str # Lookup-Key, identisch zu DB-Schreibweise
|
|
||||||
display_long: str # Für UI/PDF
|
|
||||||
aliases: tuple[str, ...] # alle bekannten Schreibweisen aus HTML/JSON/LLM
|
|
||||||
bundesland_scope: Optional[str] = None # None = bundesweit; sonst BL-Code
|
|
||||||
is_government: bool = False # für Landesregierung-Marker
|
|
||||||
|
|
||||||
|
|
||||||
# Reihenfolge ist Sortierung — bei mehrdeutigen Aliasen gewinnt der erste
|
|
||||||
# Treffer. Spezifische Einträge (mit ``bundesland_scope``) müssen vor den
|
|
||||||
# generischen stehen, damit ``FREIE WÄHLER`` in BB als BVB-FW erkannt wird
|
|
||||||
# bevor es als generisches FREIE WÄHLER landet.
|
|
||||||
PARTEIEN: tuple[Partei, ...] = (
|
|
||||||
# ── Etablierte Bundesparteien ────────────────────────────────────────
|
|
||||||
Partei("CDU", "CDU", ("CDU", "Christlich Demokratische Union")),
|
|
||||||
Partei("CSU", "CSU",
|
|
||||||
("CSU", "Christlich-Soziale Union", "Christlich Soziale Union"),
|
|
||||||
bundesland_scope="BY"),
|
|
||||||
Partei("SPD", "SPD", ("SPD", "Sozialdemokratische Partei")),
|
|
||||||
Partei("GRÜNE", "BÜNDNIS 90/DIE GRÜNEN",
|
|
||||||
("GRÜNE", "Grüne", "GRUENE", "Gruene",
|
|
||||||
"Bündnis 90/Die Grünen", "BÜNDNIS 90", "B90/Grüne", "Bündnis90",
|
|
||||||
# Sachsen-spezifischer Eigenname der Fraktion
|
|
||||||
"BÜNDNISGRÜNE", "Bündnisgrüne")),
|
|
||||||
Partei("FDP", "FDP", ("FDP", "F.D.P.", "F. D. P.", "F.D.P", "FDP-DVP")),
|
|
||||||
Partei("LINKE", "DIE LINKE",
|
|
||||||
("LINKE", "Die Linke", "DIE LINKE", "LL/PDS", "Linkspartei")),
|
|
||||||
Partei("AfD", "AfD",
|
|
||||||
("AfD", "AFD", "Alternative für Deutschland")),
|
|
||||||
Partei("BSW", "BSW",
|
|
||||||
("BSW", "Bündnis Sahra Wagenknecht",
|
|
||||||
"Bündnis Sahra Wagenknecht – Vernunft und Gerechtigkeit")),
|
|
||||||
# ── Bundesland-spezifische Parteien ──────────────────────────────────
|
|
||||||
Partei("SSW", "SSW",
|
|
||||||
("SSW", "Südschleswigscher Wählerverband"),
|
|
||||||
bundesland_scope="SH"),
|
|
||||||
Partei("BiW", "BÜRGER IN WUT",
|
|
||||||
("BiW", "Bürger in Wut", "BIW"),
|
|
||||||
bundesland_scope="HB"),
|
|
||||||
# ── Freie-Wähler-Familie (kontextsensitiv) ───────────────────────────
|
|
||||||
# Reihenfolge: spezifische Scopes zuerst. ``BVB-FW`` ist im BB-Landtag
|
|
||||||
# eine eigenständige Partei (Brandenburger Vereinigte Bürgerbewegung),
|
|
||||||
# programmatisch nicht identisch mit den FW-Landesvereinigungen in BY
|
|
||||||
# oder RP. Solange wir dafür kein eigenes Programm indexieren, bleibt
|
|
||||||
# ``BVB-FW`` ein nominelles Mapping.
|
|
||||||
Partei("BVB-FW", "BVB / FREIE WÄHLER",
|
|
||||||
("BVB", "BVB/FW", "BVB / FREIE WÄHLER", "FREIE WÄHLER", "FW", "Freie Wähler"),
|
|
||||||
bundesland_scope="BB"),
|
|
||||||
Partei("FW-BAYERN", "FREIE WÄHLER Bayern",
|
|
||||||
("FW", "FREIE WÄHLER", "Freie Wähler"),
|
|
||||||
bundesland_scope="BY"),
|
|
||||||
Partei("FW-SL", "Freie Wähler Saarland",
|
|
||||||
("FW", "FREIE WÄHLER", "Freie Wähler"),
|
|
||||||
bundesland_scope="SL"),
|
|
||||||
# Bundesweit-Default für FW (z.B. RP — der Landesverband der
|
|
||||||
# Bundesvereinigung). Letzte Position, damit oben spezifische Scopes
|
|
||||||
# vorrangig matchen.
|
|
||||||
Partei("FREIE WÄHLER", "FREIE WÄHLER",
|
|
||||||
("FW", "FREIE WÄHLER", "Freie Wähler")),
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
# Regierung/Verwaltung — keine Partei, aber wir müssen sie aus Urheber-
|
|
||||||
# Texten als Marker extrahieren können (wenn das Ministerium als Antrag-
|
|
||||||
# steller auftaucht, ist das eine Regierungsdrucksache).
|
|
||||||
_GOVERNMENT_MARKER_RE = re.compile(
|
|
||||||
r"LANDESREGIERUNG|SENAT VON BERLIN|REGIERENDE[RN]?\s+BÜRGERMEISTER"
|
|
||||||
r"|\bMINISTER|STAATSKANZLEI|MINISTERPRÄSIDENT",
|
|
||||||
re.IGNORECASE,
|
|
||||||
)
|
|
||||||
|
|
||||||
GOVERNMENT_KEY = "Landesregierung"
|
|
||||||
|
|
||||||
|
|
||||||
# ─────────────────────────────────────────────────────────────────────────────
|
|
||||||
# Lookup-Tabellen werden einmalig aus PARTEIEN abgeleitet
|
|
||||||
# ─────────────────────────────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
|
|
||||||
def _build_canonical_index() -> dict[str, Partei]:
|
|
||||||
return {p.canonical: p for p in PARTEIEN}
|
|
||||||
|
|
||||||
|
|
||||||
_CANONICAL_INDEX: dict[str, Partei] = _build_canonical_index()
|
|
||||||
|
|
||||||
|
|
||||||
def all_canonical_keys() -> list[str]:
|
|
||||||
"""Alle bekannten kanonischen Partei-Keys + Government-Marker."""
|
|
||||||
return [p.canonical for p in PARTEIEN] + [GOVERNMENT_KEY]
|
|
||||||
|
|
||||||
|
|
||||||
# ─────────────────────────────────────────────────────────────────────────────
|
|
||||||
# Regex-Patterns pro Alias — vorab kompiliert für die Volltext-Extraktion
|
|
||||||
# ─────────────────────────────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
|
|
||||||
def _alias_to_pattern(alias: str) -> re.Pattern[str]:
|
|
||||||
"""Konvertiert einen Alias in eine fallunabhängige Wortgrenzen-Regex.
|
|
||||||
|
|
||||||
Punkte und Schrägstriche werden escaped, dazwischen optional Whitespace
|
|
||||||
erlaubt — ``F.D.P.`` matched dann ``F. D. P.``, ``FDP``, ``F.D.P``.
|
|
||||||
Letzteres ist die historische SH/HB-Schreibweise. ``LL/PDS`` matched
|
|
||||||
sich selbst und nichts anderes.
|
|
||||||
"""
|
|
||||||
# Escape, dann Whitespace zwischen einzelnen Tokens flexibilisieren
|
|
||||||
escaped = re.escape(alias)
|
|
||||||
# Punkte: optional, Whitespace daneben optional
|
|
||||||
flex = escaped.replace(r"\.", r"\.?\s*")
|
|
||||||
return re.compile(rf"(?<![A-Za-zÄÖÜäöüß]){flex}(?![A-Za-zÄÖÜäöüß])", re.IGNORECASE)
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
|
||||||
class _CompiledAlias:
|
|
||||||
canonical: str
|
|
||||||
bundesland_scope: Optional[str]
|
|
||||||
pattern: re.Pattern[str]
|
|
||||||
|
|
||||||
|
|
||||||
def _build_compiled_aliases() -> list[_CompiledAlias]:
|
|
||||||
out: list[_CompiledAlias] = []
|
|
||||||
for p in PARTEIEN:
|
|
||||||
for alias in p.aliases:
|
|
||||||
out.append(_CompiledAlias(
|
|
||||||
canonical=p.canonical,
|
|
||||||
bundesland_scope=p.bundesland_scope,
|
|
||||||
pattern=_alias_to_pattern(alias),
|
|
||||||
))
|
|
||||||
return out
|
|
||||||
|
|
||||||
|
|
||||||
_COMPILED_ALIASES: list[_CompiledAlias] = _build_compiled_aliases()
|
|
||||||
|
|
||||||
|
|
||||||
# ─────────────────────────────────────────────────────────────────────────────
|
|
||||||
# Public API
|
|
||||||
# ─────────────────────────────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
|
|
||||||
def normalize_partei(raw: str, *, bundesland: Optional[str] = None) -> Optional[str]:
|
|
||||||
"""Lese einen einzelnen Roh-String → kanonischer Key.
|
|
||||||
|
|
||||||
``bundesland`` ist Pflicht, wenn der Roh-String nur durch Bundesland
|
|
||||||
disambiguiert werden kann (z.B. ``"FREIE WÄHLER"`` → BVB-FW in BB,
|
|
||||||
FW-BAYERN in BY, FREIE WÄHLER in RP). Bei generischen Strings wie
|
|
||||||
``"CDU"`` ist ``bundesland`` egal.
|
|
||||||
|
|
||||||
Returns ``None`` wenn nichts matched — der Caller entscheidet, ob
|
|
||||||
das ein Skip oder ein Hard-Fail ist.
|
|
||||||
"""
|
|
||||||
if not raw:
|
|
||||||
return None
|
|
||||||
|
|
||||||
# Government-Marker zuerst, weil "Ministerium der Finanzen" weder
|
|
||||||
# Partei-Alias ist noch eines werden soll
|
|
||||||
if _GOVERNMENT_MARKER_RE.search(raw):
|
|
||||||
return GOVERNMENT_KEY
|
|
||||||
|
|
||||||
# Suche alle Treffer; bei mehreren wähle den, der zum Bundesland-Scope
|
|
||||||
# passt (oder den ersten generischen wenn keiner spezifisch passt).
|
|
||||||
candidates: list[_CompiledAlias] = []
|
|
||||||
for ca in _COMPILED_ALIASES:
|
|
||||||
if ca.pattern.search(raw):
|
|
||||||
candidates.append(ca)
|
|
||||||
|
|
||||||
if not candidates:
|
|
||||||
return None
|
|
||||||
|
|
||||||
if bundesland:
|
|
||||||
# Erst spezifischer Match
|
|
||||||
for ca in candidates:
|
|
||||||
if ca.bundesland_scope == bundesland:
|
|
||||||
return ca.canonical
|
|
||||||
# Sonst erster generischer (bundesland_scope is None)
|
|
||||||
for ca in candidates:
|
|
||||||
if ca.bundesland_scope is None:
|
|
||||||
return ca.canonical
|
|
||||||
# Fallback: erster überhaupt — kann passieren bei BL-spezifischer
|
|
||||||
# Partei in falschem BL (z.B. "SSW" in einem MV-Antrag, was Unsinn
|
|
||||||
# wäre, aber wir liefern dann SSW zurück und der Caller logged es)
|
|
||||||
return candidates[0].canonical
|
|
||||||
|
|
||||||
|
|
||||||
def extract_fraktionen(text: str, *, bundesland: Optional[str] = None) -> list[str]:
|
|
||||||
"""Zerlege einen freien Urheber-Text in eine Liste kanonischer Keys.
|
|
||||||
|
|
||||||
Ersetzt die vier ``_normalize_fraktion()``-Methoden der Adapter
|
|
||||||
(PortalaAdapter, ParLDokAdapter, StarFinderCGIAdapter, PARLISAdapter).
|
|
||||||
Findet alle Partei-Aliase im Text, dedupliziert, behält die Reihenfolge
|
|
||||||
des ersten Vorkommens.
|
|
||||||
|
|
||||||
``bundesland`` ist nötig, damit FW-Familien-Disambiguierung greift —
|
|
||||||
eine BB-Drucksache mit Urheber ``"FREIE WÄHLER"`` wird zu ``"BVB-FW"``,
|
|
||||||
eine BY-Drucksache zu ``"FW-BAYERN"``, eine RP-Drucksache bleibt
|
|
||||||
``"FREIE WÄHLER"``.
|
|
||||||
"""
|
|
||||||
if not text:
|
|
||||||
return []
|
|
||||||
|
|
||||||
out: list[str] = []
|
|
||||||
seen: set[str] = set()
|
|
||||||
|
|
||||||
# Government-Marker als erstes prüfen — wenn es ein Ministerium ist,
|
|
||||||
# überspringen wir Parteien-Matching nicht (z.B. "Antrag der Fraktion
|
|
||||||
# CDU und der Landesregierung" enthält beide), aber Landesregierung
|
|
||||||
# wird jedenfalls aufgenommen.
|
|
||||||
if _GOVERNMENT_MARKER_RE.search(text):
|
|
||||||
out.append(GOVERNMENT_KEY)
|
|
||||||
seen.add(GOVERNMENT_KEY)
|
|
||||||
|
|
||||||
# Pro Alias prüfen und mit Bundesland-Scope-Vorrang sortieren.
|
|
||||||
# Strategie: pro Partei-Familie wählen wir den passendsten Scope.
|
|
||||||
matches_by_canonical: dict[str, list[_CompiledAlias]] = {}
|
|
||||||
for ca in _COMPILED_ALIASES:
|
|
||||||
if ca.pattern.search(text):
|
|
||||||
matches_by_canonical.setdefault(ca.canonical, []).append(ca)
|
|
||||||
|
|
||||||
# Für jede Partei: wähle die richtige Scope-Variante. FW-Familie ist
|
|
||||||
# der Spezialfall — alle vier Einträge (BVB-FW/FW-BAYERN/FW-SL/
|
|
||||||
# FREIE WÄHLER) haben überlappende Aliase, aber nur einer soll am
|
|
||||||
# Ende im Output landen. Wir gruppieren über die Aliase und wählen
|
|
||||||
# nach Bundesland.
|
|
||||||
fw_aliases = {"FW", "FREIE WÄHLER", "Freie Wähler"}
|
|
||||||
fw_family: list[str] = []
|
|
||||||
for canonical, aliases in matches_by_canonical.items():
|
|
||||||
if any(a in fw_aliases for ca in aliases for a in [ca.pattern.pattern]):
|
|
||||||
# Approximation — wir wissen, dass alle FW-Familien-Patterns
|
|
||||||
# auf die gleichen Strings matchen
|
|
||||||
fw_family.append(canonical)
|
|
||||||
|
|
||||||
# Tatsächliche FW-Familien-Detektion: schauen, welche der Partei-Keys
|
|
||||||
# zur FW-Familie gehören (statisch)
|
|
||||||
FW_CANONICAL_FAMILY = {"BVB-FW", "FW-BAYERN", "FW-SL", "FREIE WÄHLER"}
|
|
||||||
fw_in_match = FW_CANONICAL_FAMILY & set(matches_by_canonical.keys())
|
|
||||||
if fw_in_match:
|
|
||||||
# Wähle den passenden FW-Eintrag nach Bundesland
|
|
||||||
chosen_fw: Optional[str] = None
|
|
||||||
if bundesland:
|
|
||||||
for ca in PARTEIEN:
|
|
||||||
if ca.canonical in fw_in_match and ca.bundesland_scope == bundesland:
|
|
||||||
chosen_fw = ca.canonical
|
|
||||||
break
|
|
||||||
if not chosen_fw:
|
|
||||||
# generischer Fallback (bundesland_scope is None)
|
|
||||||
for ca in PARTEIEN:
|
|
||||||
if ca.canonical in fw_in_match and ca.bundesland_scope is None:
|
|
||||||
chosen_fw = ca.canonical
|
|
||||||
break
|
|
||||||
if not chosen_fw:
|
|
||||||
# Notfall: ersten nehmen
|
|
||||||
chosen_fw = sorted(fw_in_match)[0]
|
|
||||||
if chosen_fw not in seen:
|
|
||||||
out.append(chosen_fw)
|
|
||||||
seen.add(chosen_fw)
|
|
||||||
# Andere FW-Familien-Mitglieder aus dem Match-Dict entfernen
|
|
||||||
for k in list(matches_by_canonical.keys()):
|
|
||||||
if k in FW_CANONICAL_FAMILY and k != chosen_fw:
|
|
||||||
del matches_by_canonical[k]
|
|
||||||
|
|
||||||
# Verbleibende Parteien in der Reihenfolge ihrer Tabellen-Position
|
|
||||||
for p in PARTEIEN:
|
|
||||||
if p.canonical in matches_by_canonical and p.canonical not in seen:
|
|
||||||
# Bundesland-Scope-Filter: BL-spezifische Parteien dürfen nur
|
|
||||||
# auftauchen, wenn der Antrag aus diesem BL stammt (oder kein
|
|
||||||
# BL angegeben wurde — dann tolerant)
|
|
||||||
if p.bundesland_scope is not None and bundesland is not None:
|
|
||||||
if p.bundesland_scope != bundesland:
|
|
||||||
continue
|
|
||||||
out.append(p.canonical)
|
|
||||||
seen.add(p.canonical)
|
|
||||||
|
|
||||||
return out
|
|
||||||
|
|
||||||
|
|
||||||
def display_name(canonical: str, *, long: bool = False) -> str:
|
|
||||||
"""Render einen kanonischen Key für Anzeige in UI/PDF/Reports.
|
|
||||||
|
|
||||||
Mit ``long=True`` der lange offizielle Name (z.B. ``"BÜNDNIS 90/DIE
|
|
||||||
GRÜNEN"`` für ``"GRÜNE"``), sonst der kanonische Key selbst (kurz
|
|
||||||
und vertraut).
|
|
||||||
"""
|
|
||||||
if canonical == GOVERNMENT_KEY:
|
|
||||||
return "Landesregierung"
|
|
||||||
p = _CANONICAL_INDEX.get(canonical)
|
|
||||||
if p is None:
|
|
||||||
return canonical # unbekannt — Pass-Through statt Fehler
|
|
||||||
return p.display_long if long else p.canonical
|
|
||||||
@ -1,11 +0,0 @@
|
|||||||
"""Ports (Protocols) für externe Dienste — Teil der Hexagonal-Migration (ADR 0008).
|
|
||||||
|
|
||||||
Ein „Port" ist hier ein ``typing.Protocol``, das einen Infrastruktur-
|
|
||||||
Zugang beschreibt (LLM-Call, Embedding-Search, Mail-Versand) ohne
|
|
||||||
konkrete Implementierung. Adapter in ``app/adapters/`` implementieren
|
|
||||||
die Ports gegen reale Systeme; Tests nutzen Fake-Implementierungen.
|
|
||||||
"""
|
|
||||||
|
|
||||||
from .llm_bewerter import LlmBewerter, LlmRequest
|
|
||||||
|
|
||||||
__all__ = ["LlmBewerter", "LlmRequest"]
|
|
||||||
@ -1,48 +0,0 @@
|
|||||||
"""LlmBewerter — Port für den LLM-Call in der Antragsbewertung.
|
|
||||||
|
|
||||||
Trennt die *Rohantwort* des LLMs (JSON-String) vom umgebenden
|
|
||||||
Application-Flow (Retry, Prompt-Composition, Citation-Binding). Die
|
|
||||||
Retry-Logik samt Temperatur-Escalation bleibt Adapter-Detail — ein
|
|
||||||
zweiter Adapter (Claude, OpenAI-kompatible Proxies) kann eine ganz
|
|
||||||
andere Strategie wählen.
|
|
||||||
|
|
||||||
Ein späterer Tag-Schritt (Kapitel 10.5 der DDD-Bewertung) kapselt
|
|
||||||
zusätzlich die JSON-Parse-Kaskade hinter dem Port; heute bekommt der
|
|
||||||
Caller noch einen JSON-String zurück.
|
|
||||||
"""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
from dataclasses import dataclass
|
|
||||||
from typing import Protocol, runtime_checkable
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
|
||||||
class LlmRequest:
|
|
||||||
"""Alles, was der Adapter zum Generieren der Bewertung braucht —
|
|
||||||
inkl. Retry-Verhalten auf der Adapter-Seite."""
|
|
||||||
|
|
||||||
system_prompt: str
|
|
||||||
user_prompt: str
|
|
||||||
model: str = "qwen-plus"
|
|
||||||
max_retries: int = 3
|
|
||||||
max_tokens: int = 4000
|
|
||||||
base_temperature: float = 0.3
|
|
||||||
|
|
||||||
|
|
||||||
@runtime_checkable
|
|
||||||
class LlmBewerter(Protocol):
|
|
||||||
"""Port: wandelt einen Prompt in einen JSON-String (LLM-Rohantwort).
|
|
||||||
|
|
||||||
Der Adapter kümmert sich um:
|
|
||||||
|
|
||||||
- Markdown-Fence-Entfernung,
|
|
||||||
- JSON-Parse-Retry mit steigender Temperatur,
|
|
||||||
- Content-Fingerprint-Logging zur Forensik.
|
|
||||||
|
|
||||||
Raises:
|
|
||||||
json.JSONDecodeError: wenn alle Retries scheitern. Höhere Schichten
|
|
||||||
behandeln das als Fehlschlag der Analyse.
|
|
||||||
"""
|
|
||||||
|
|
||||||
async def bewerte(self, request: LlmRequest) -> dict: ...
|
|
||||||
@ -1,71 +0,0 @@
|
|||||||
"""BL-uebergreifende Plenarprotokoll-Abstimmungsparser (#126).
|
|
||||||
|
|
||||||
Architektur (vgl. ADR 0009): pro Bundesland eine Modul-Datei
|
|
||||||
``app/protokoll_parsers/<bl-code>.py``, die mindestens eine Funktion
|
|
||||||
``parse_protocol(pdf_path: str) -> list[dict]`` exportiert. Die Registry
|
|
||||||
``PROTOKOLL_PARSERS`` mappt BL-Code → Parser-Funktion.
|
|
||||||
|
|
||||||
Erwartetes Result-Schema pro Eintrag in der Liste::
|
|
||||||
|
|
||||||
{
|
|
||||||
"drucksache": str | None, # z.B. "18/1234"; None bei nicht aufloesbar
|
|
||||||
"ergebnis": str, # angenommen | abgelehnt | ueberwiesen | ...
|
|
||||||
"einstimmig": bool, # explizit als einstimmig markiert
|
|
||||||
"kind": str, # parser-intern, fuer Debug
|
|
||||||
"votes": { # fraktions-Listen pro Vote-Kategorie
|
|
||||||
"ja": list[str],
|
|
||||||
"nein": list[str],
|
|
||||||
"enthaltung": list[str],
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
NRW ist die Referenz-Implementierung. Folge-BL (HE/BB/MV/BE/...) bekommen
|
|
||||||
eigene Module mit demselben Funktions-Vertrag — neue Eintraege in der
|
|
||||||
Registry sind reine Tippelarbeit, das Reverse-Engineering pro Landtag
|
|
||||||
ist die eigentliche Arbeit.
|
|
||||||
"""
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
from typing import Callable
|
|
||||||
|
|
||||||
from .nrw import parse_protocol as _parse_nrw
|
|
||||||
from .bund import parse_protocol as _parse_bund
|
|
||||||
|
|
||||||
# Typ-Alias fuer Lesbarkeit; Parser-Signatur ist bewusst minimal.
|
|
||||||
ProtokollParser = Callable[[str], list[dict]]
|
|
||||||
|
|
||||||
PROTOKOLL_PARSERS: dict[str, ProtokollParser] = {
|
|
||||||
"NRW": _parse_nrw,
|
|
||||||
"BUND": _parse_bund,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def parse_protocol(bundesland: str, pdf_path: str) -> list[dict]:
|
|
||||||
"""BL-uebergreifender Einstieg. Sucht den Parser in der Registry.
|
|
||||||
|
|
||||||
Raises:
|
|
||||||
NotImplementedError: wenn fuer das Bundesland (noch) kein Parser
|
|
||||||
registriert ist. Folge-Issue: BL-Adapter ergaenzen mit einem
|
|
||||||
eigenen Modul plus Eintrag hier.
|
|
||||||
"""
|
|
||||||
parser = PROTOKOLL_PARSERS.get(bundesland)
|
|
||||||
if parser is None:
|
|
||||||
supported = ", ".join(sorted(PROTOKOLL_PARSERS)) or "(keine)"
|
|
||||||
raise NotImplementedError(
|
|
||||||
f"Kein Plenarprotokoll-Parser fuer {bundesland!r}. "
|
|
||||||
f"Unterstuetzt: {supported}. Siehe #126."
|
|
||||||
)
|
|
||||||
return parser(pdf_path)
|
|
||||||
|
|
||||||
|
|
||||||
def supported_bundeslaender() -> list[str]:
|
|
||||||
"""Liste der BL-Codes mit registrierten Parsern."""
|
|
||||||
return sorted(PROTOKOLL_PARSERS)
|
|
||||||
|
|
||||||
|
|
||||||
__all__ = [
|
|
||||||
"ProtokollParser",
|
|
||||||
"PROTOKOLL_PARSERS",
|
|
||||||
"parse_protocol",
|
|
||||||
"supported_bundeslaender",
|
|
||||||
]
|
|
||||||
@ -1,47 +0,0 @@
|
|||||||
"""Brandenburg (BB) — Plenarprotokoll-Parser STUB (#106 Folge, ADR 0009).
|
|
||||||
|
|
||||||
**Status: noch nicht implementiert.** Dieser Modul-Stub enthaelt
|
|
||||||
Recherche-Findings vom 2026-04-28, sodass die Implementer-Session
|
|
||||||
direkt produktiv loslegen kann. Der Stub wird **nicht** in
|
|
||||||
``app.protokoll_parsers.PROTOKOLL_PARSERS`` registriert — der
|
|
||||||
Auto-Ingest-Cron ueberspringt BB solange.
|
|
||||||
|
|
||||||
## Recherche
|
|
||||||
|
|
||||||
| Feld | Wert |
|
|
||||||
|---|---|
|
|
||||||
| **Doku-System** | portala |
|
|
||||||
| **Base-URL** | https://www.parlamentsdokumentation.brandenburg.de |
|
|
||||||
| **Familie** | RP/HE-Familie |
|
|
||||||
| **Format** | PDF (Vote-Tabellen erwartet); BB-Adapter PortalaAdapter |
|
|
||||||
|
|
||||||
## URL-Discovery
|
|
||||||
|
|
||||||
https://www.parlamentsdokumentation.brandenburg.de/parladoku/w8/plpr/PlPr8-{n}.pdf (HTTP 403 ohne Referer)
|
|
||||||
|
|
||||||
## Bezug
|
|
||||||
|
|
||||||
- Architektur: ADR 0009 (Plenarprotokoll-Parser-Registry)
|
|
||||||
- Roadmap: ``docs/protokoll-parser-roadmap.md``
|
|
||||||
- Referenz-Implementation: ``app/protokoll_parsers/nrw.py``
|
|
||||||
(38 Tests, 19/19-Fixture-Garantie)
|
|
||||||
- Folge-Issue: https://repo.toppyr.de/tobias/gwoe-antragspruefer/issues/149 (Titel: "protokoll-parser: BB (Brandenburg)")
|
|
||||||
|
|
||||||
## Aufwand
|
|
||||||
|
|
||||||
Geschaetzt 1-3 Tage konzentrierte Arbeit:
|
|
||||||
- 2-4h URL-Discovery + Format-Inspektion (Sample-Protokoll inhaltlich anschauen)
|
|
||||||
- 4-8h Anchor-Phrasen-Reverse-Engineering + Parser-Implementierung
|
|
||||||
- 4h Tests mit Fixture-Pinning
|
|
||||||
- 1h Eintrag in PROTOKOLL_PARSERS + auto-ingest-protocols.sh
|
|
||||||
"""
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
|
|
||||||
def parse_protocol(path: str) -> list[dict]:
|
|
||||||
"""STUB — siehe Modul-Docstring."""
|
|
||||||
raise NotImplementedError(
|
|
||||||
"BB-Plenarprotokoll-Parser ist noch nicht implementiert. "
|
|
||||||
"Siehe app/protokoll_parsers/bb.py-Docstring fuer Recherche-Findings "
|
|
||||||
"und docs/protokoll-parser-roadmap.md."
|
|
||||||
)
|
|
||||||
@ -1,47 +0,0 @@
|
|||||||
"""Berlin (BE) — Plenarprotokoll-Parser STUB (#106 Folge, ADR 0009).
|
|
||||||
|
|
||||||
**Status: noch nicht implementiert.** Dieser Modul-Stub enthaelt
|
|
||||||
Recherche-Findings vom 2026-04-28, sodass die Implementer-Session
|
|
||||||
direkt produktiv loslegen kann. Der Stub wird **nicht** in
|
|
||||||
``app.protokoll_parsers.PROTOKOLL_PARSERS`` registriert — der
|
|
||||||
Auto-Ingest-Cron ueberspringt BE solange.
|
|
||||||
|
|
||||||
## Recherche
|
|
||||||
|
|
||||||
| Feld | Wert |
|
|
||||||
|---|---|
|
|
||||||
| **Doku-System** | PARDOK |
|
|
||||||
| **Base-URL** | https://pardok.parlament-berlin.de |
|
|
||||||
| **Familie** | LSA-Familie |
|
|
||||||
| **Format** | PDF erwartet |
|
|
||||||
|
|
||||||
## URL-Discovery
|
|
||||||
|
|
||||||
Plenum-PDF-URLs ueber PARDOK-Search-API zu ermitteln; direktes Pattern noch nicht bekannt
|
|
||||||
|
|
||||||
## Bezug
|
|
||||||
|
|
||||||
- Architektur: ADR 0009 (Plenarprotokoll-Parser-Registry)
|
|
||||||
- Roadmap: ``docs/protokoll-parser-roadmap.md``
|
|
||||||
- Referenz-Implementation: ``app/protokoll_parsers/nrw.py``
|
|
||||||
(38 Tests, 19/19-Fixture-Garantie)
|
|
||||||
- Folge-Issue: https://repo.toppyr.de/tobias/gwoe-antragspruefer/issues/150 (Titel: "protokoll-parser: BE (Berlin)")
|
|
||||||
|
|
||||||
## Aufwand
|
|
||||||
|
|
||||||
Geschaetzt 1-3 Tage konzentrierte Arbeit:
|
|
||||||
- 2-4h URL-Discovery + Format-Inspektion (Sample-Protokoll inhaltlich anschauen)
|
|
||||||
- 4-8h Anchor-Phrasen-Reverse-Engineering + Parser-Implementierung
|
|
||||||
- 4h Tests mit Fixture-Pinning
|
|
||||||
- 1h Eintrag in PROTOKOLL_PARSERS + auto-ingest-protocols.sh
|
|
||||||
"""
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
|
|
||||||
def parse_protocol(path: str) -> list[dict]:
|
|
||||||
"""STUB — siehe Modul-Docstring."""
|
|
||||||
raise NotImplementedError(
|
|
||||||
"BE-Plenarprotokoll-Parser ist noch nicht implementiert. "
|
|
||||||
"Siehe app/protokoll_parsers/be.py-Docstring fuer Recherche-Findings "
|
|
||||||
"und docs/protokoll-parser-roadmap.md."
|
|
||||||
)
|
|
||||||
@ -1,202 +0,0 @@
|
|||||||
"""Bundestag (BUND) — Plenarprotokoll-Parser (#106 / #148, ADR 0009).
|
|
||||||
|
|
||||||
XML-basierter Parser für Bundestags-Plenarprotokolle. Quelle:
|
|
||||||
``https://dserver.bundestag.de/btp/{wp}/{wp}{n:03}.xml`` (auch .pdf
|
|
||||||
verfuegbar; XML ist strukturierter, daher bevorzugt).
|
|
||||||
|
|
||||||
## Anchor-Sprache (verifiziert WP20 Sitzungen 30, 100)
|
|
||||||
|
|
||||||
Bundestag formuliert Beschluesse mit:
|
|
||||||
|
|
||||||
```
|
|
||||||
Die Beschlussempfehlung ist mit den Stimmen der Koalitionsfraktionen
|
|
||||||
und der Fraktion Die Linke gegen die Stimmen der CDU/CSU-Fraktion
|
|
||||||
bei Enthaltung der AfD-Fraktion angenommen.
|
|
||||||
```
|
|
||||||
|
|
||||||
Pattern:
|
|
||||||
- Subjekt: "Die Beschlussempfehlung", "Der Überweisungsvorschlag",
|
|
||||||
"Der Antrag", "Der Gesetzentwurf"
|
|
||||||
- Vote-Block: "mit den Stimmen X gegen die Stimmen Y bei Enthaltung Z"
|
|
||||||
- Anchor-Verb: "angenommen" oder "abgelehnt"
|
|
||||||
|
|
||||||
## Fraktions-Mapping
|
|
||||||
|
|
||||||
Koalitions-/Oppositions-Bezeichnungen aendern sich pro Wahlperiode.
|
|
||||||
Aktuell hardcoded fuer **WP20** (2021-2025, Ampel):
|
|
||||||
|
|
||||||
- "Koalitionsfraktionen" → SPD + GRÜNE + FDP
|
|
||||||
- "Oppositionsfraktionen" → CDU/CSU + AfD + LINKE
|
|
||||||
|
|
||||||
WP21 (ab 2025) wuerde anderes Mapping brauchen. Folge-Issue notwendig.
|
|
||||||
|
|
||||||
## Drucksachen-Aufloesung
|
|
||||||
|
|
||||||
Vor dem Anchor wird rueckwaerts nach "Drucksache 20/N" oder
|
|
||||||
"auf Drucksache 20/N" gesucht. Der naechste Match in einem 1500-Zeichen-
|
|
||||||
Fenster gewinnt.
|
|
||||||
"""
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import re
|
|
||||||
import xml.etree.ElementTree as ET
|
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
|
|
||||||
# WP20 (2021-2025) Koalition: SPD + GRÜNE + FDP. Opposition: CDU/CSU + AfD + LINKE.
|
|
||||||
# WP21 Implementierung erfordert separates Mapping pro WP — folgt sobald gebraucht.
|
|
||||||
WP20_KOALITIONSFRAKTIONEN = ["SPD", "GRÜNE", "FDP"]
|
|
||||||
WP20_OPPOSITIONSFRAKTIONEN = ["CDU/CSU", "AfD", "LINKE"]
|
|
||||||
|
|
||||||
# Phrase → kanonische Fraktions-Codes. Reihenfolge: längere Aliasse zuerst.
|
|
||||||
FRAKTIONEN_MAP_BT = [
|
|
||||||
("Koalitionsfraktionen", WP20_KOALITIONSFRAKTIONEN),
|
|
||||||
("Koalitionsfraktion", WP20_KOALITIONSFRAKTIONEN),
|
|
||||||
("Oppositionsfraktionen", WP20_OPPOSITIONSFRAKTIONEN),
|
|
||||||
("Oppositionsfraktion", WP20_OPPOSITIONSFRAKTIONEN),
|
|
||||||
("Fraktion Bündnis 90/Die Grünen", ["GRÜNE"]),
|
|
||||||
("Bündnis 90/Die Grünen", ["GRÜNE"]),
|
|
||||||
("Fraktion Die Linke", ["LINKE"]),
|
|
||||||
("Die Linke", ["LINKE"]),
|
|
||||||
("CDU/CSU-Fraktion", ["CDU/CSU"]),
|
|
||||||
("Fraktion der CDU/CSU", ["CDU/CSU"]),
|
|
||||||
("CDU/CSU", ["CDU/CSU"]),
|
|
||||||
("SPD-Fraktion", ["SPD"]),
|
|
||||||
("Fraktion der SPD", ["SPD"]),
|
|
||||||
("SPD", ["SPD"]),
|
|
||||||
("FDP-Fraktion", ["FDP"]),
|
|
||||||
("Fraktion der FDP", ["FDP"]),
|
|
||||||
("FDP", ["FDP"]),
|
|
||||||
("AfD-Fraktion", ["AfD"]),
|
|
||||||
("Fraktion der AfD", ["AfD"]),
|
|
||||||
("AfD", ["AfD"]),
|
|
||||||
]
|
|
||||||
|
|
||||||
ALL_BT_FRAKTIONEN = ["CDU/CSU", "SPD", "GRÜNE", "FDP", "AfD", "LINKE"]
|
|
||||||
|
|
||||||
|
|
||||||
def _normalize_fraktionen_bt(text: str) -> list[str]:
|
|
||||||
"""Extrahiere BT-Fraktions-Codes aus einer Phrase."""
|
|
||||||
found = set()
|
|
||||||
remaining = text
|
|
||||||
for phrase, codes in FRAKTIONEN_MAP_BT:
|
|
||||||
if phrase in remaining:
|
|
||||||
for c in codes:
|
|
||||||
found.add(c)
|
|
||||||
remaining = remaining.replace(phrase, " ")
|
|
||||||
return sorted(found)
|
|
||||||
|
|
||||||
|
|
||||||
# Result-Anchor: Subjekt + "ist mit den Stimmen [...] (angenommen|abgelehnt)"
|
|
||||||
# Großzügige 500-char-Begrenzung weil BT-Vote-Blocks lang werden koennen.
|
|
||||||
RESULT_ANCHOR_RE = re.compile(
|
|
||||||
r"(?P<subject>Die Beschlussempfehlung|Der Überweisungsvorschlag|Der Antrag"
|
|
||||||
r"|Der Gesetzentwurf|Diese Beschlussempfehlung)"
|
|
||||||
r"\s+ist\s+mit den Stimmen(?P<votes>[^.]{20,500}?)"
|
|
||||||
r"\s+(?P<ergebnis>angenommen|abgelehnt)\s*\.",
|
|
||||||
re.DOTALL,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def _parse_vote_block_bt(votes_text: str) -> dict:
|
|
||||||
"""Parst BT-Vote-Phrase: 'X gegen die Stimmen Y bei Enthaltung Z'."""
|
|
||||||
result = {"ja": [], "nein": [], "enthaltung": []}
|
|
||||||
|
|
||||||
# Aufsplit-Marker
|
|
||||||
nein_idx = votes_text.find("gegen die Stimmen")
|
|
||||||
enth_idx = votes_text.find("bei Enthaltung")
|
|
||||||
|
|
||||||
# Boundaries
|
|
||||||
end_ja = min(idx for idx in (nein_idx, enth_idx, len(votes_text)) if idx >= 0)
|
|
||||||
ja_text = votes_text[:end_ja]
|
|
||||||
result["ja"] = _normalize_fraktionen_bt(ja_text)
|
|
||||||
|
|
||||||
if nein_idx >= 0:
|
|
||||||
end_nein = enth_idx if enth_idx > nein_idx else len(votes_text)
|
|
||||||
nein_text = votes_text[nein_idx + len("gegen die Stimmen"):end_nein]
|
|
||||||
result["nein"] = _normalize_fraktionen_bt(nein_text)
|
|
||||||
|
|
||||||
if enth_idx >= 0:
|
|
||||||
enth_text = votes_text[enth_idx + len("bei Enthaltung"):]
|
|
||||||
result["enthaltung"] = _normalize_fraktionen_bt(enth_text)
|
|
||||||
|
|
||||||
return result
|
|
||||||
|
|
||||||
|
|
||||||
# Drucksache-Pattern fuer rueckwaerts-Lookup: "Drucksache 20/123" oder
|
|
||||||
# "auf Drucksache 20/123(neu)" — nehmen die letzten 1500 Zeichen vor dem
|
|
||||||
# Anchor.
|
|
||||||
DS_RE_BT = re.compile(r"Drucksache\s+(\d{1,2}/\d{2,5}(?:\(neu\))?)")
|
|
||||||
|
|
||||||
|
|
||||||
def _resolve_drucksache_bt(text: str, anchor_start: int) -> Optional[str]:
|
|
||||||
"""Rueckwaerts vom Anchor die letzte erwaehnte Drucksache finden."""
|
|
||||||
window_start = max(0, anchor_start - 1500)
|
|
||||||
window = text[window_start:anchor_start]
|
|
||||||
matches = list(DS_RE_BT.finditer(window))
|
|
||||||
if matches:
|
|
||||||
return matches[-1].group(1)
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def _extract_full_text(xml_path: str) -> str:
|
|
||||||
"""Extrahiere den Volltext aus einem BT-Plenarprotokoll-XML."""
|
|
||||||
tree = ET.parse(xml_path)
|
|
||||||
text = ET.tostring(tree.getroot(), encoding="unicode", method="text")
|
|
||||||
# Whitespace normalisieren: alles auf Single-Space, wie im NRW-Parser
|
|
||||||
text = re.sub(r"\s+", " ", text)
|
|
||||||
return text
|
|
||||||
|
|
||||||
|
|
||||||
def parse_protocol(xml_path: str) -> list[dict]:
|
|
||||||
"""Parst ein Bundestags-Plenarprotokoll-XML und liefert Vote-Records."""
|
|
||||||
text = _extract_full_text(xml_path)
|
|
||||||
results = []
|
|
||||||
for m in RESULT_ANCHOR_RE.finditer(text):
|
|
||||||
subject = m.group("subject")
|
|
||||||
ergebnis = m.group("ergebnis") # angenommen | abgelehnt
|
|
||||||
votes_text = m.group("votes")
|
|
||||||
|
|
||||||
ds = _resolve_drucksache_bt(text, m.start())
|
|
||||||
if not ds:
|
|
||||||
continue
|
|
||||||
|
|
||||||
votes = _parse_vote_block_bt(votes_text)
|
|
||||||
|
|
||||||
# einstimmig-Heuristik: alle 6 BT-Fraktionen in ja, nichts in nein/enth
|
|
||||||
einstimmig = (
|
|
||||||
len(votes["ja"]) >= 5 # mind. 5 von 6 → praktisch einstimmig
|
|
||||||
and not votes["nein"]
|
|
||||||
and not votes["enthaltung"]
|
|
||||||
)
|
|
||||||
|
|
||||||
# Subjekt → kind-Klassifikation
|
|
||||||
if "Überweisungsvorschlag" in subject:
|
|
||||||
kind = "ueberweisung"
|
|
||||||
# Ueberweisungen sind typischerweise faktisch ergebnis="ueberwiesen"
|
|
||||||
ergebnis = "überwiesen" if ergebnis == "angenommen" else ergebnis
|
|
||||||
elif "Gesetzentwurf" in subject:
|
|
||||||
kind = "gesetzentwurf"
|
|
||||||
else:
|
|
||||||
kind = "direct"
|
|
||||||
|
|
||||||
results.append({
|
|
||||||
"drucksache": ds,
|
|
||||||
"ergebnis": ergebnis,
|
|
||||||
"einstimmig": einstimmig,
|
|
||||||
"kind": kind,
|
|
||||||
"votes": votes,
|
|
||||||
"anchor_pos": m.start(),
|
|
||||||
})
|
|
||||||
|
|
||||||
# Dedup ueber (drucksache, anchor_pos): falls ein Anchor mehrfach matched
|
|
||||||
seen = set()
|
|
||||||
deduped = []
|
|
||||||
for r in results:
|
|
||||||
key = (r["drucksache"], r["anchor_pos"])
|
|
||||||
if key in seen:
|
|
||||||
continue
|
|
||||||
seen.add(key)
|
|
||||||
deduped.append(r)
|
|
||||||
|
|
||||||
return deduped
|
|
||||||
@ -1,103 +0,0 @@
|
|||||||
"""Baden-Württemberg (BW) — Plenarprotokoll-Parser STUB (#106 Folge, ADR 0009).
|
|
||||||
|
|
||||||
**Status: noch nicht implementiert.** Dieser Modul-Stub enthaelt
|
|
||||||
Recherche-Findings vom 2026-04-28, sodass die Implementer-Session
|
|
||||||
direkt produktiv loslegen kann. Der Stub wird **nicht** in
|
|
||||||
``app.protokoll_parsers.PROTOKOLL_PARSERS`` registriert — der
|
|
||||||
Auto-Ingest-Cron ueberspringt BW solange.
|
|
||||||
|
|
||||||
## Recherche
|
|
||||||
|
|
||||||
| Feld | Wert |
|
|
||||||
|---|---|
|
|
||||||
| **Doku-System** | PARLIS |
|
|
||||||
| **Base-URL** | https://parlis.landtag-bw.de |
|
|
||||||
| **Familie** | eigenstaendig (PARLIS-spezifisch, eigene Pattern) |
|
|
||||||
| **Format** | PDF; URL-Pattern bekannt mit 4-stelliger Sitzungs-Nr |
|
|
||||||
|
|
||||||
## URL-Discovery
|
|
||||||
|
|
||||||
```
|
|
||||||
https://www.landtag-bw.de/files/live/sites/LTBW/files/dokumente/WP{wp}/Plp/{wp}_{n:04}.pdf
|
|
||||||
```
|
|
||||||
|
|
||||||
Verifiziert HTTP 200 fuer WP17 Sitzungen 0001, 0050, 0100. WP17 endet
|
|
||||||
ungefaehr Sitzung 130 (404 ab ~150). Pattern: 4-stellige Sitzungs-Nr
|
|
||||||
mit fuehrenden Nullen (anders als NRW `MMP18-N` ohne Padding).
|
|
||||||
|
|
||||||
## Anchor-Phrasen-Befunde (vom Sample WP17 Sitzung 50)
|
|
||||||
|
|
||||||
Stichprobe-Analyse von `17_0050.pdf` (618 KB, ~617k Zeichen):
|
|
||||||
|
|
||||||
| Pattern | Treffer | Kommentar |
|
|
||||||
|---|---:|---|
|
|
||||||
| ``angenommen`` | 1 | nur in einer Rede, **kein** Beschluss-Anchor |
|
|
||||||
| ``abgelehnt`` | 5 | gemischt Reden/Beschluesse |
|
|
||||||
| ``einstimmig`` | 7 | als Anchor-Phrase brauchbar |
|
|
||||||
| ``Drucksache 17/\d+`` | 35 | Drucksachen-Nrn werden referenziert |
|
|
||||||
| ``namentliche Abstimmung`` | 3 | namentliche Abstimmungen kommen vor |
|
|
||||||
| ``zugestimmt`` | 19 | **dominierende Vote-Phrase** |
|
|
||||||
| ``einstimmig zugestimmt`` | 5 | hochsignifikante Anchor-Phrase |
|
|
||||||
| ``Damit ist [...] einstimmig`` | 2 | NRW-aehnliche Anchor-Form |
|
|
||||||
| ``Wer dem [...] seine Zustimmung gibt`` | 0 | Bundestag-Pattern, in BW NICHT genutzt |
|
|
||||||
|
|
||||||
**Konsequenz fuer Parser:** BW-Vote-Sprache ist:
|
|
||||||
- ``Damit ist [Artikel/Antrag X] einstimmig (zu)gestimmt`` als
|
|
||||||
Haupt-Anchor (statt NRW ``angenommen``)
|
|
||||||
- ``Drucksache 17/N`` als DS-Pattern (analog NRW)
|
|
||||||
- Detaillierte Fraktions-Auflistung pro Vote ist **deutlich** weniger
|
|
||||||
vorhanden als in NRW — der Parser kann oft nur ``einstimmig`` /
|
|
||||||
``mit Mehrheit`` extrahieren, kein ja/nein/enthaltung-Breakdown.
|
|
||||||
- Fuer namentliche Abstimmungen (3/Sitzung) ist eigene Logik noetig
|
|
||||||
(separate Tabelle im PDF).
|
|
||||||
|
|
||||||
## **WICHTIG — Datenmodell-Inkompatibilitaet**
|
|
||||||
|
|
||||||
Vertiefte Probe (Sitzung 17_0050):
|
|
||||||
- Anchor-Phrase ``Damit ist Artikel 1 einstimmig zugestimmt`` (3x)
|
|
||||||
- ``Damit ist Artikel 2 mehrheitlich zugestimmt`` (1x)
|
|
||||||
- Aber **keine direkten** ``Damit ist Drucksache X angenommen``-Anchors
|
|
||||||
|
|
||||||
**BW stimmt pro Gesetzentwurf-Artikel ab**, nicht pro Drucksache. Das
|
|
||||||
ist eine andere Datenmodellierung als NRW (Drucksache → Vote) und
|
|
||||||
BUND (Beschlussempfehlung → Vote). Ein BW-Parser muesste:
|
|
||||||
|
|
||||||
1. Pro Gesetzentwurf alle Artikel-Anchors sammeln
|
|
||||||
2. Aggregat bilden: Gesetzentwurf X = wenn alle Artikel angenommen
|
|
||||||
3. Drucksache aus dem zugehoerigen Gesetzentwurf-Header oben rueckwaerts
|
|
||||||
suchen (nicht trivial — der Bezug ist mehrere Seiten zurueck)
|
|
||||||
|
|
||||||
Alternative-Modellierung: Schema ``plenum_vote_results`` um optionalen
|
|
||||||
``artikel``-Spalten-Pfad erweitern, um pro-Artikel-Records zu speichern.
|
|
||||||
|
|
||||||
**Empfehlung fuer Implementer:** vor Parser-Start mit Maintainer
|
|
||||||
abstimmen, ob BW-Datenmodell ggf. eigenen Tabellen-Anbau noetig macht
|
|
||||||
oder eine BW-spezifische Aggregations-Heuristik (alle Artikel zugestimmt
|
|
||||||
→ Gesetzentwurf-DS=angenommen) genuegt.
|
|
||||||
|
|
||||||
## Bezug
|
|
||||||
|
|
||||||
- Architektur: ADR 0009 (Plenarprotokoll-Parser-Registry)
|
|
||||||
- Roadmap: ``docs/protokoll-parser-roadmap.md``
|
|
||||||
- Referenz-Implementation: ``app/protokoll_parsers/nrw.py``
|
|
||||||
(38 Tests, 19/19-Fixture-Garantie)
|
|
||||||
- Folge-Issue: https://repo.toppyr.de/tobias/gwoe-antragspruefer/issues/151 (Titel: "protokoll-parser: BW (Baden-Württemberg)")
|
|
||||||
|
|
||||||
## Aufwand
|
|
||||||
|
|
||||||
Geschaetzt 1-3 Tage konzentrierte Arbeit:
|
|
||||||
- 2-4h URL-Discovery + Format-Inspektion (Sample-Protokoll inhaltlich anschauen)
|
|
||||||
- 4-8h Anchor-Phrasen-Reverse-Engineering + Parser-Implementierung
|
|
||||||
- 4h Tests mit Fixture-Pinning
|
|
||||||
- 1h Eintrag in PROTOKOLL_PARSERS + auto-ingest-protocols.sh
|
|
||||||
"""
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
|
|
||||||
def parse_protocol(path: str) -> list[dict]:
|
|
||||||
"""STUB — siehe Modul-Docstring."""
|
|
||||||
raise NotImplementedError(
|
|
||||||
"BW-Plenarprotokoll-Parser ist noch nicht implementiert. "
|
|
||||||
"Siehe app/protokoll_parsers/bw.py-Docstring fuer Recherche-Findings "
|
|
||||||
"und docs/protokoll-parser-roadmap.md."
|
|
||||||
)
|
|
||||||
@ -1,47 +0,0 @@
|
|||||||
"""Bayern (BY) — Plenarprotokoll-Parser STUB (#106 Folge, ADR 0009).
|
|
||||||
|
|
||||||
**Status: noch nicht implementiert.** Dieser Modul-Stub enthaelt
|
|
||||||
Recherche-Findings vom 2026-04-28, sodass die Implementer-Session
|
|
||||||
direkt produktiv loslegen kann. Der Stub wird **nicht** in
|
|
||||||
``app.protokoll_parsers.PROTOKOLL_PARSERS`` registriert — der
|
|
||||||
Auto-Ingest-Cron ueberspringt BY solange.
|
|
||||||
|
|
||||||
## Recherche
|
|
||||||
|
|
||||||
| Feld | Wert |
|
|
||||||
|---|---|
|
|
||||||
| **Doku-System** | Eigensystem |
|
|
||||||
| **Base-URL** | https://www.bayern.landtag.de |
|
|
||||||
| **Familie** | eigenstaendig |
|
|
||||||
| **Format** | PDF erwartet, eigenes ElanTextAblage-System |
|
|
||||||
|
|
||||||
## URL-Discovery
|
|
||||||
|
|
||||||
URL-Pattern nicht trivial vorhersagbar — direkte Probe schlug 404
|
|
||||||
|
|
||||||
## Bezug
|
|
||||||
|
|
||||||
- Architektur: ADR 0009 (Plenarprotokoll-Parser-Registry)
|
|
||||||
- Roadmap: ``docs/protokoll-parser-roadmap.md``
|
|
||||||
- Referenz-Implementation: ``app/protokoll_parsers/nrw.py``
|
|
||||||
(38 Tests, 19/19-Fixture-Garantie)
|
|
||||||
- Folge-Issue: https://repo.toppyr.de/tobias/gwoe-antragspruefer/issues/152 (Titel: "protokoll-parser: BY (Bayern)")
|
|
||||||
|
|
||||||
## Aufwand
|
|
||||||
|
|
||||||
Geschaetzt 1-3 Tage konzentrierte Arbeit:
|
|
||||||
- 2-4h URL-Discovery + Format-Inspektion (Sample-Protokoll inhaltlich anschauen)
|
|
||||||
- 4-8h Anchor-Phrasen-Reverse-Engineering + Parser-Implementierung
|
|
||||||
- 4h Tests mit Fixture-Pinning
|
|
||||||
- 1h Eintrag in PROTOKOLL_PARSERS + auto-ingest-protocols.sh
|
|
||||||
"""
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
|
|
||||||
def parse_protocol(path: str) -> list[dict]:
|
|
||||||
"""STUB — siehe Modul-Docstring."""
|
|
||||||
raise NotImplementedError(
|
|
||||||
"BY-Plenarprotokoll-Parser ist noch nicht implementiert. "
|
|
||||||
"Siehe app/protokoll_parsers/by.py-Docstring fuer Recherche-Findings "
|
|
||||||
"und docs/protokoll-parser-roadmap.md."
|
|
||||||
)
|
|
||||||
@ -1,47 +0,0 @@
|
|||||||
"""Bremen (HB) — Plenarprotokoll-Parser STUB (#106 Folge, ADR 0009).
|
|
||||||
|
|
||||||
**Status: noch nicht implementiert.** Dieser Modul-Stub enthaelt
|
|
||||||
Recherche-Findings vom 2026-04-28, sodass die Implementer-Session
|
|
||||||
direkt produktiv loslegen kann. Der Stub wird **nicht** in
|
|
||||||
``app.protokoll_parsers.PROTOKOLL_PARSERS`` registriert — der
|
|
||||||
Auto-Ingest-Cron ueberspringt HB solange.
|
|
||||||
|
|
||||||
## Recherche
|
|
||||||
|
|
||||||
| Feld | Wert |
|
|
||||||
|---|---|
|
|
||||||
| **Doku-System** | PARiS |
|
|
||||||
| **Base-URL** | https://paris.bremische-buergerschaft.de |
|
|
||||||
| **Familie** | StarWeb-Familie |
|
|
||||||
| **Format** | PDF (oder HTML) |
|
|
||||||
|
|
||||||
## URL-Discovery
|
|
||||||
|
|
||||||
URL-Pattern unbekannt — PARiS-Skin-Search-API noetig
|
|
||||||
|
|
||||||
## Bezug
|
|
||||||
|
|
||||||
- Architektur: ADR 0009 (Plenarprotokoll-Parser-Registry)
|
|
||||||
- Roadmap: ``docs/protokoll-parser-roadmap.md``
|
|
||||||
- Referenz-Implementation: ``app/protokoll_parsers/nrw.py``
|
|
||||||
(38 Tests, 19/19-Fixture-Garantie)
|
|
||||||
- Folge-Issue: https://repo.toppyr.de/tobias/gwoe-antragspruefer/issues/153 (Titel: "protokoll-parser: HB (Bremen)")
|
|
||||||
|
|
||||||
## Aufwand
|
|
||||||
|
|
||||||
Geschaetzt 1-3 Tage konzentrierte Arbeit:
|
|
||||||
- 2-4h URL-Discovery + Format-Inspektion (Sample-Protokoll inhaltlich anschauen)
|
|
||||||
- 4-8h Anchor-Phrasen-Reverse-Engineering + Parser-Implementierung
|
|
||||||
- 4h Tests mit Fixture-Pinning
|
|
||||||
- 1h Eintrag in PROTOKOLL_PARSERS + auto-ingest-protocols.sh
|
|
||||||
"""
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
|
|
||||||
def parse_protocol(path: str) -> list[dict]:
|
|
||||||
"""STUB — siehe Modul-Docstring."""
|
|
||||||
raise NotImplementedError(
|
|
||||||
"HB-Plenarprotokoll-Parser ist noch nicht implementiert. "
|
|
||||||
"Siehe app/protokoll_parsers/hb.py-Docstring fuer Recherche-Findings "
|
|
||||||
"und docs/protokoll-parser-roadmap.md."
|
|
||||||
)
|
|
||||||
@ -1,47 +0,0 @@
|
|||||||
"""Hessen (HE) — Plenarprotokoll-Parser STUB (#106 Folge, ADR 0009).
|
|
||||||
|
|
||||||
**Status: noch nicht implementiert.** Dieser Modul-Stub enthaelt
|
|
||||||
Recherche-Findings vom 2026-04-28, sodass die Implementer-Session
|
|
||||||
direkt produktiv loslegen kann. Der Stub wird **nicht** in
|
|
||||||
``app.protokoll_parsers.PROTOKOLL_PARSERS`` registriert — der
|
|
||||||
Auto-Ingest-Cron ueberspringt HE solange.
|
|
||||||
|
|
||||||
## Recherche
|
|
||||||
|
|
||||||
| Feld | Wert |
|
|
||||||
|---|---|
|
|
||||||
| **Doku-System** | portala |
|
|
||||||
| **Base-URL** | https://starweb.hessen.de/portal |
|
|
||||||
| **Familie** | BB/RP-Familie |
|
|
||||||
| **Format** | HTML bevorzugt; ggf. PDF als Fallback |
|
|
||||||
|
|
||||||
## URL-Discovery
|
|
||||||
|
|
||||||
Plenum-Protokolle wahrscheinlich als HTML mit semantischen Tags pro Beschluss — wenn HTML zugaenglich, EINFACHER als PDF-Parser
|
|
||||||
|
|
||||||
## Bezug
|
|
||||||
|
|
||||||
- Architektur: ADR 0009 (Plenarprotokoll-Parser-Registry)
|
|
||||||
- Roadmap: ``docs/protokoll-parser-roadmap.md``
|
|
||||||
- Referenz-Implementation: ``app/protokoll_parsers/nrw.py``
|
|
||||||
(38 Tests, 19/19-Fixture-Garantie)
|
|
||||||
- Folge-Issue: https://repo.toppyr.de/tobias/gwoe-antragspruefer/issues/154 (Titel: "protokoll-parser: HE (Hessen)")
|
|
||||||
|
|
||||||
## Aufwand
|
|
||||||
|
|
||||||
Geschaetzt 1-3 Tage konzentrierte Arbeit:
|
|
||||||
- 2-4h URL-Discovery + Format-Inspektion (Sample-Protokoll inhaltlich anschauen)
|
|
||||||
- 4-8h Anchor-Phrasen-Reverse-Engineering + Parser-Implementierung
|
|
||||||
- 4h Tests mit Fixture-Pinning
|
|
||||||
- 1h Eintrag in PROTOKOLL_PARSERS + auto-ingest-protocols.sh
|
|
||||||
"""
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
|
|
||||||
def parse_protocol(path: str) -> list[dict]:
|
|
||||||
"""STUB — siehe Modul-Docstring."""
|
|
||||||
raise NotImplementedError(
|
|
||||||
"HE-Plenarprotokoll-Parser ist noch nicht implementiert. "
|
|
||||||
"Siehe app/protokoll_parsers/he.py-Docstring fuer Recherche-Findings "
|
|
||||||
"und docs/protokoll-parser-roadmap.md."
|
|
||||||
)
|
|
||||||
@ -1,47 +0,0 @@
|
|||||||
"""Hamburg (HH) — Plenarprotokoll-Parser STUB (#106 Folge, ADR 0009).
|
|
||||||
|
|
||||||
**Status: noch nicht implementiert.** Dieser Modul-Stub enthaelt
|
|
||||||
Recherche-Findings vom 2026-04-28, sodass die Implementer-Session
|
|
||||||
direkt produktiv loslegen kann. Der Stub wird **nicht** in
|
|
||||||
``app.protokoll_parsers.PROTOKOLL_PARSERS`` registriert — der
|
|
||||||
Auto-Ingest-Cron ueberspringt HH solange.
|
|
||||||
|
|
||||||
## Recherche
|
|
||||||
|
|
||||||
| Feld | Wert |
|
|
||||||
|---|---|
|
|
||||||
| **Doku-System** | ParlDok |
|
|
||||||
| **Base-URL** | https://www.buergerschaft-hh.de/parldok |
|
|
||||||
| **Familie** | MV/TH-Familie |
|
|
||||||
| **Format** | PDF via ParlDok-Search |
|
|
||||||
|
|
||||||
## URL-Discovery
|
|
||||||
|
|
||||||
ParlDok 5.x oder 8.x — Live-Format vor Implementierung verifizieren (curl -s buergerschaft-hh.de/parldok/ | grep ParlDok)
|
|
||||||
|
|
||||||
## Bezug
|
|
||||||
|
|
||||||
- Architektur: ADR 0009 (Plenarprotokoll-Parser-Registry)
|
|
||||||
- Roadmap: ``docs/protokoll-parser-roadmap.md``
|
|
||||||
- Referenz-Implementation: ``app/protokoll_parsers/nrw.py``
|
|
||||||
(38 Tests, 19/19-Fixture-Garantie)
|
|
||||||
- Folge-Issue: https://repo.toppyr.de/tobias/gwoe-antragspruefer/issues/155 (Titel: "protokoll-parser: HH (Hamburg)")
|
|
||||||
|
|
||||||
## Aufwand
|
|
||||||
|
|
||||||
Geschaetzt 1-3 Tage konzentrierte Arbeit:
|
|
||||||
- 2-4h URL-Discovery + Format-Inspektion (Sample-Protokoll inhaltlich anschauen)
|
|
||||||
- 4-8h Anchor-Phrasen-Reverse-Engineering + Parser-Implementierung
|
|
||||||
- 4h Tests mit Fixture-Pinning
|
|
||||||
- 1h Eintrag in PROTOKOLL_PARSERS + auto-ingest-protocols.sh
|
|
||||||
"""
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
|
|
||||||
def parse_protocol(path: str) -> list[dict]:
|
|
||||||
"""STUB — siehe Modul-Docstring."""
|
|
||||||
raise NotImplementedError(
|
|
||||||
"HH-Plenarprotokoll-Parser ist noch nicht implementiert. "
|
|
||||||
"Siehe app/protokoll_parsers/hh.py-Docstring fuer Recherche-Findings "
|
|
||||||
"und docs/protokoll-parser-roadmap.md."
|
|
||||||
)
|
|
||||||
@ -1,47 +0,0 @@
|
|||||||
"""Sachsen-Anhalt (LSA) — Plenarprotokoll-Parser STUB (#106 Folge, ADR 0009).
|
|
||||||
|
|
||||||
**Status: noch nicht implementiert.** Dieser Modul-Stub enthaelt
|
|
||||||
Recherche-Findings vom 2026-04-28, sodass die Implementer-Session
|
|
||||||
direkt produktiv loslegen kann. Der Stub wird **nicht** in
|
|
||||||
``app.protokoll_parsers.PROTOKOLL_PARSERS`` registriert — der
|
|
||||||
Auto-Ingest-Cron ueberspringt LSA solange.
|
|
||||||
|
|
||||||
## Recherche
|
|
||||||
|
|
||||||
| Feld | Wert |
|
|
||||||
|---|---|
|
|
||||||
| **Doku-System** | PARDOK |
|
|
||||||
| **Base-URL** | https://padoka.landtag.sachsen-anhalt.de |
|
|
||||||
| **Familie** | BE-Familie |
|
|
||||||
| **Format** | PDF erwartet |
|
|
||||||
|
|
||||||
## URL-Discovery
|
|
||||||
|
|
||||||
Padoka-Plattform; URL-Pattern via Search-API zu ermitteln
|
|
||||||
|
|
||||||
## Bezug
|
|
||||||
|
|
||||||
- Architektur: ADR 0009 (Plenarprotokoll-Parser-Registry)
|
|
||||||
- Roadmap: ``docs/protokoll-parser-roadmap.md``
|
|
||||||
- Referenz-Implementation: ``app/protokoll_parsers/nrw.py``
|
|
||||||
(38 Tests, 19/19-Fixture-Garantie)
|
|
||||||
- Folge-Issue: https://repo.toppyr.de/tobias/gwoe-antragspruefer/issues/156 (Titel: "protokoll-parser: LSA (Sachsen-Anhalt)")
|
|
||||||
|
|
||||||
## Aufwand
|
|
||||||
|
|
||||||
Geschaetzt 1-3 Tage konzentrierte Arbeit:
|
|
||||||
- 2-4h URL-Discovery + Format-Inspektion (Sample-Protokoll inhaltlich anschauen)
|
|
||||||
- 4-8h Anchor-Phrasen-Reverse-Engineering + Parser-Implementierung
|
|
||||||
- 4h Tests mit Fixture-Pinning
|
|
||||||
- 1h Eintrag in PROTOKOLL_PARSERS + auto-ingest-protocols.sh
|
|
||||||
"""
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
|
|
||||||
def parse_protocol(path: str) -> list[dict]:
|
|
||||||
"""STUB — siehe Modul-Docstring."""
|
|
||||||
raise NotImplementedError(
|
|
||||||
"LSA-Plenarprotokoll-Parser ist noch nicht implementiert. "
|
|
||||||
"Siehe app/protokoll_parsers/lsa.py-Docstring fuer Recherche-Findings "
|
|
||||||
"und docs/protokoll-parser-roadmap.md."
|
|
||||||
)
|
|
||||||
@ -1,47 +0,0 @@
|
|||||||
"""Mecklenburg-Vorpommern (MV) — Plenarprotokoll-Parser STUB (#106 Folge, ADR 0009).
|
|
||||||
|
|
||||||
**Status: noch nicht implementiert.** Dieser Modul-Stub enthaelt
|
|
||||||
Recherche-Findings vom 2026-04-28, sodass die Implementer-Session
|
|
||||||
direkt produktiv loslegen kann. Der Stub wird **nicht** in
|
|
||||||
``app.protokoll_parsers.PROTOKOLL_PARSERS`` registriert — der
|
|
||||||
Auto-Ingest-Cron ueberspringt MV solange.
|
|
||||||
|
|
||||||
## Recherche
|
|
||||||
|
|
||||||
| Feld | Wert |
|
|
||||||
|---|---|
|
|
||||||
| **Doku-System** | ParlDok |
|
|
||||||
| **Base-URL** | https://www.dokumentation.landtag-mv.de |
|
|
||||||
| **Familie** | TH-Familie (synergie-fähig) |
|
|
||||||
| **Format** | PDF nach ID-Discovery |
|
|
||||||
|
|
||||||
## URL-Discovery
|
|
||||||
|
|
||||||
ParlDok 8.x SPA — Plenum-PDFs hinter Search-API. Doc-IDs nicht direkt vorhersagbar. Fuer URL-Discovery: ParlDok-Search mit Plenarprotokoll-Filter durchparsen.
|
|
||||||
|
|
||||||
## Bezug
|
|
||||||
|
|
||||||
- Architektur: ADR 0009 (Plenarprotokoll-Parser-Registry)
|
|
||||||
- Roadmap: ``docs/protokoll-parser-roadmap.md``
|
|
||||||
- Referenz-Implementation: ``app/protokoll_parsers/nrw.py``
|
|
||||||
(38 Tests, 19/19-Fixture-Garantie)
|
|
||||||
- Folge-Issue: https://repo.toppyr.de/tobias/gwoe-antragspruefer/issues/157 (Titel: "protokoll-parser: MV (Mecklenburg-Vorpommern)")
|
|
||||||
|
|
||||||
## Aufwand
|
|
||||||
|
|
||||||
Geschaetzt 1-3 Tage konzentrierte Arbeit:
|
|
||||||
- 2-4h URL-Discovery + Format-Inspektion (Sample-Protokoll inhaltlich anschauen)
|
|
||||||
- 4-8h Anchor-Phrasen-Reverse-Engineering + Parser-Implementierung
|
|
||||||
- 4h Tests mit Fixture-Pinning
|
|
||||||
- 1h Eintrag in PROTOKOLL_PARSERS + auto-ingest-protocols.sh
|
|
||||||
"""
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
|
|
||||||
def parse_protocol(path: str) -> list[dict]:
|
|
||||||
"""STUB — siehe Modul-Docstring."""
|
|
||||||
raise NotImplementedError(
|
|
||||||
"MV-Plenarprotokoll-Parser ist noch nicht implementiert. "
|
|
||||||
"Siehe app/protokoll_parsers/mv.py-Docstring fuer Recherche-Findings "
|
|
||||||
"und docs/protokoll-parser-roadmap.md."
|
|
||||||
)
|
|
||||||
@ -1,47 +0,0 @@
|
|||||||
"""Niedersachsen (NI) — Plenarprotokoll-Parser STUB (#106 Folge, ADR 0009).
|
|
||||||
|
|
||||||
**Status: noch nicht implementiert.** Dieser Modul-Stub enthaelt
|
|
||||||
Recherche-Findings vom 2026-04-28, sodass die Implementer-Session
|
|
||||||
direkt produktiv loslegen kann. Der Stub wird **nicht** in
|
|
||||||
``app.protokoll_parsers.PROTOKOLL_PARSERS`` registriert — der
|
|
||||||
Auto-Ingest-Cron ueberspringt NI solange.
|
|
||||||
|
|
||||||
## Recherche
|
|
||||||
|
|
||||||
| Feld | Wert |
|
|
||||||
|---|---|
|
|
||||||
| **Doku-System** | StarWeb |
|
|
||||||
| **Base-URL** | https://www.landtag-niedersachsen.de |
|
|
||||||
| **Familie** | HB/SH-Familie |
|
|
||||||
| **Format** | PDF erwartet, ggf. mit Auth-Workaround |
|
|
||||||
|
|
||||||
## URL-Discovery
|
|
||||||
|
|
||||||
nilas-Portal Login-protected fuer Drucksachen — fuer Plenarprotokolle ggf. eigener oeffentlicher Endpunkt; HAR-Capture noetig (siehe Issue #22)
|
|
||||||
|
|
||||||
## Bezug
|
|
||||||
|
|
||||||
- Architektur: ADR 0009 (Plenarprotokoll-Parser-Registry)
|
|
||||||
- Roadmap: ``docs/protokoll-parser-roadmap.md``
|
|
||||||
- Referenz-Implementation: ``app/protokoll_parsers/nrw.py``
|
|
||||||
(38 Tests, 19/19-Fixture-Garantie)
|
|
||||||
- Folge-Issue: https://repo.toppyr.de/tobias/gwoe-antragspruefer/issues/158 (Titel: "protokoll-parser: NI (Niedersachsen)")
|
|
||||||
|
|
||||||
## Aufwand
|
|
||||||
|
|
||||||
Geschaetzt 1-3 Tage konzentrierte Arbeit:
|
|
||||||
- 2-4h URL-Discovery + Format-Inspektion (Sample-Protokoll inhaltlich anschauen)
|
|
||||||
- 4-8h Anchor-Phrasen-Reverse-Engineering + Parser-Implementierung
|
|
||||||
- 4h Tests mit Fixture-Pinning
|
|
||||||
- 1h Eintrag in PROTOKOLL_PARSERS + auto-ingest-protocols.sh
|
|
||||||
"""
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
|
|
||||||
def parse_protocol(path: str) -> list[dict]:
|
|
||||||
"""STUB — siehe Modul-Docstring."""
|
|
||||||
raise NotImplementedError(
|
|
||||||
"NI-Plenarprotokoll-Parser ist noch nicht implementiert. "
|
|
||||||
"Siehe app/protokoll_parsers/ni.py-Docstring fuer Recherche-Findings "
|
|
||||||
"und docs/protokoll-parser-roadmap.md."
|
|
||||||
)
|
|
||||||
@ -1,348 +0,0 @@
|
|||||||
"""NRW-Plenarprotokoll Abstimmungs-Parser v5 (deterministisch, anchor-basiert).
|
|
||||||
|
|
||||||
Neue Architektur: Statt pro Drucksache zu suchen, findet der Parser zuerst
|
|
||||||
alle **Result-Anchors** im Volltext ("Damit ist ... angenommen/abgelehnt/...")
|
|
||||||
und extrahiert pro Anchor rückwärts:
|
|
||||||
1. die zugehörige Drucksache (nächste 18/XXXXX davor, innerhalb ~500 chars)
|
|
||||||
2. den Vote-Block (letztes "Wer stimmt ... zu?" vor dem Anchor)
|
|
||||||
|
|
||||||
Fixture-basierte Tests. Ziel: 18/19 (17824 ist bewusst nicht_gesondert).
|
|
||||||
|
|
||||||
Migriert nach app/ aus dem POC-Skript parser_v5_iteration15.py
|
|
||||||
(2026-04-28, #134/#106). Fitz-Import ist optional — pure-string-Funktionen
|
|
||||||
laufen ohne, parse_protocol() braucht das echte fitz.
|
|
||||||
"""
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import re
|
|
||||||
import json
|
|
||||||
import sys
|
|
||||||
|
|
||||||
try: # fitz ist optional — pure-string-Funktionen laufen ohne
|
|
||||||
import fitz
|
|
||||||
except ImportError:
|
|
||||||
fitz = None
|
|
||||||
|
|
||||||
FRAKTIONEN_MAP = [
|
|
||||||
("Bündnis 90/Die Grünen", "GRÜNE"),
|
|
||||||
("Bündnis 90", "GRÜNE"),
|
|
||||||
("Grünen", "GRÜNE"),
|
|
||||||
("GRÜNE", "GRÜNE"),
|
|
||||||
("F.D.P.", "FDP"),
|
|
||||||
("FDP", "FDP"),
|
|
||||||
("CDU", "CDU"),
|
|
||||||
("SPD", "SPD"),
|
|
||||||
("AfD", "AfD"),
|
|
||||||
("LINKE", "LINKE"),
|
|
||||||
("BSW", "BSW"),
|
|
||||||
("Landesregierung", "Landesregierung"),
|
|
||||||
]
|
|
||||||
|
|
||||||
ALLE_FRAKTIONEN_NRW = ["CDU", "SPD", "GRÜNE", "FDP", "AfD"]
|
|
||||||
|
|
||||||
|
|
||||||
def normalize_fraktionen(txt):
|
|
||||||
"""Extrahiere Fraktions-Tokens aus einem Text-Abschnitt."""
|
|
||||||
found = set()
|
|
||||||
# Reihenfolge: längere zuerst (damit "Bündnis 90/Die Grünen" vor "Grünen" matcht)
|
|
||||||
remaining = txt
|
|
||||||
for key, val in FRAKTIONEN_MAP:
|
|
||||||
if key in remaining:
|
|
||||||
found.add(val)
|
|
||||||
remaining = remaining.replace(key, "") # Doppel-Match vermeiden
|
|
||||||
return sorted(found)
|
|
||||||
|
|
||||||
|
|
||||||
def _is_empty_phrase(txt):
|
|
||||||
"""Prüft ob der Text eine Negation ausdrückt (niemand, nicht, keine)."""
|
|
||||||
neg = ["niemand", "Niemand", "Keine", "keine", "nicht der Fall",
|
|
||||||
"Auch nicht", "ist nicht", "ist auch nicht", "nicht vor"]
|
|
||||||
return any(n in txt for n in neg)
|
|
||||||
|
|
||||||
|
|
||||||
def _parse_vote_block(block: str) -> dict:
|
|
||||||
"""Extrahiere ja/nein/enthaltung aus dem Text-Block vor einem Result-Anchor.
|
|
||||||
|
|
||||||
Vereinfachter Ansatz: matche bis zum nächsten '?' oder 200 chars.
|
|
||||||
"""
|
|
||||||
votes = {"ja": [], "nein": [], "enthaltung": []}
|
|
||||||
|
|
||||||
# JA — letztes Match gewinnt (bei Re-Votes)
|
|
||||||
ja_matches = list(re.finditer(
|
|
||||||
r"Wer stimmt(?! dagegen)[^?]{0,80}zu\?\s*[–-]?\s*([^?]{1,250})",
|
|
||||||
block
|
|
||||||
))
|
|
||||||
if ja_matches:
|
|
||||||
g = ja_matches[-1].group(1)
|
|
||||||
if not _is_empty_phrase(g):
|
|
||||||
votes["ja"] = normalize_fraktionen(g)
|
|
||||||
|
|
||||||
# NEIN
|
|
||||||
nein_patterns = [
|
|
||||||
r"Wer stimmt dagegen\?\s*[–-]?\s*([^?]{1,200})",
|
|
||||||
r"Wer lehnt[^?]{0,30}ab\?\s*[–-]?\s*([^?]{1,200})",
|
|
||||||
r"Stimmt jemand dagegen\?\s*[–-]?\s*([^?]{1,120})",
|
|
||||||
r"Ist jemand dagegen\?\s*[–-]?\s*([^?]{1,120})",
|
|
||||||
]
|
|
||||||
for pat in nein_patterns:
|
|
||||||
matches = list(re.finditer(pat, block))
|
|
||||||
if matches:
|
|
||||||
g = matches[-1].group(1)
|
|
||||||
votes["nein"] = [] if _is_empty_phrase(g) else normalize_fraktionen(g)
|
|
||||||
break
|
|
||||||
|
|
||||||
# ENTHALTUNG
|
|
||||||
enth_patterns = [
|
|
||||||
r"Wer enthält sich\?\s*[–-]?\s*([^?]{1,200})",
|
|
||||||
r"Gibt es Enthaltungen\?\s*[–-]?\s*([^?]{1,200})",
|
|
||||||
r"Enthält sich jemand\?\s*[–-]?\s*([^?]{1,120})",
|
|
||||||
r"Möchte sich jemand enthalten\?\s*[–-]?\s*([^?]{1,120})",
|
|
||||||
]
|
|
||||||
for pat in enth_patterns:
|
|
||||||
matches = list(re.finditer(pat, block))
|
|
||||||
if matches:
|
|
||||||
g = matches[-1].group(1)
|
|
||||||
votes["enthaltung"] = [] if _is_empty_phrase(g) else normalize_fraktionen(g)
|
|
||||||
break
|
|
||||||
|
|
||||||
# Implizite leere Enthaltungen: "Enthaltungen gibt es damit nicht"
|
|
||||||
if not votes["enthaltung"] and re.search(r"Enthaltungen\s+gibt\s+es\s+damit\s+nicht", block):
|
|
||||||
votes["enthaltung"] = []
|
|
||||||
|
|
||||||
return votes
|
|
||||||
|
|
||||||
|
|
||||||
# Result-Anchors: Pattern → (ergebnis, is_ueberweisung)
|
|
||||||
# v6: Broad-Anchor-Matches für alle direkten Varianten.
|
|
||||||
# Type 'direct_broad': matcht "Damit/Somit ist der/dieser/die Antrag/Gesetzentwurf/...
|
|
||||||
# ... angenommen/abgelehnt/überwiesen/verabschiedet" — Drucksache wird
|
|
||||||
# separat aus dem Match-Span extrahiert (oder aus dem vorangehenden Segment).
|
|
||||||
RESULT_ANCHORS = [
|
|
||||||
# Broad direct-result pattern (deckt fast alle Varianten ab).
|
|
||||||
# "beschlossen" = bei direkter Abstimmung eines Antrags = angenommen
|
|
||||||
(r"(?:Damit|Somit) ist (?:der|dieser|die|diese) (?:Antrag|Gesetzentwurf|Änderungsantrag|Wahlvorschlag|Entschließungsantrag|Beschlussempfehlung)[^.]{0,200}?(angenommen|abgelehnt|überwiesen|zurückgezogen|verabschiedet|beschlossen)", "direct_broad"),
|
|
||||||
# Variante ohne führendes "Damit/Somit ist": "Dieser Antrag Drucksache X ist somit ... abgelehnt"
|
|
||||||
(r"Dieser (?:Antrag|Gesetzentwurf|Änderungsantrag|Wahlvorschlag)[^.]{0,200}?(angenommen|abgelehnt|überwiesen|zurückgezogen|verabschiedet|beschlossen)", "direct_broad"),
|
|
||||||
# Überweisungs-Anchor (Drucksache muss rückwärts gesucht werden)
|
|
||||||
(r"(?:Damit|Somit) ist (?:diese|die)\s+Überweisungsempfehlung\s+(einstimmig\s+|ebenso\s+)?(angenommen)", "ueber"),
|
|
||||||
(r"Somit ist das so beschlossen()()", "ueber"),
|
|
||||||
(r"Damit ist das so beschlossen()()", "ueber"),
|
|
||||||
# "Damit schließt sich der Landtag der Empfehlung des Rechtsausschusses an" — Empfehlung-Beitritt
|
|
||||||
(r"Damit schließt sich der Landtag der Empfehlung[^.]{0,100}?an()()", "ueber"),
|
|
||||||
# Petitionsausschuss-Sammel-Abstimmung
|
|
||||||
(r"Damit sind die Beschlüsse des Petitionsausschusses[^.]{0,100}?bestätigt()()", "petition"),
|
|
||||||
# Übersicht-Bestätigung (§ 82 Abs. 2 GO)
|
|
||||||
(r"Damit sind die in Drucksache (\d+/\d+(?:\(neu\))?) enthaltenen[^.]{0,150}?bestätigt()", "uebersicht"),
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
def find_results(text: str) -> list[dict]:
|
|
||||||
"""Finde alle Result-Anchors im Text.
|
|
||||||
|
|
||||||
Returns: Liste von {drucksache, ergebnis, anchor_start, anchor_end, kind, einstimmig}.
|
|
||||||
"""
|
|
||||||
results = []
|
|
||||||
for pat, kind in RESULT_ANCHORS:
|
|
||||||
for m in re.finditer(pat, text):
|
|
||||||
groups = m.groups()
|
|
||||||
ds = None
|
|
||||||
einstimmig = False
|
|
||||||
span_text = text[m.start():m.end()]
|
|
||||||
|
|
||||||
# Für "direct" kind: erste DS-artige Group ist die Drucksache
|
|
||||||
if kind == "direct":
|
|
||||||
for g in groups:
|
|
||||||
if g and re.match(r"^\d+/\d+(?:\(neu\))?$", g):
|
|
||||||
ds = g
|
|
||||||
break
|
|
||||||
# Für "direct_broad": Drucksache innerhalb des Match-Spans suchen
|
|
||||||
elif kind == "direct_broad":
|
|
||||||
ds_match = re.search(r"Drucksache\s+(\d+/\d+(?:\(neu\))?)", span_text)
|
|
||||||
if ds_match:
|
|
||||||
ds = ds_match.group(1)
|
|
||||||
# Ergebnis: suche bekanntes Wort in allen Groups
|
|
||||||
ergebnis = None
|
|
||||||
for g in groups:
|
|
||||||
if g and g.strip() == "einstimmig":
|
|
||||||
einstimmig = True
|
|
||||||
if g and g.strip() in ("angenommen", "abgelehnt", "überwiesen", "zurückgezogen", "verabschiedet", "beschlossen"):
|
|
||||||
ergebnis = g.strip()
|
|
||||||
# "verabschiedet" = angenommen und verabschiedet (Gesetzentwurf)
|
|
||||||
# "beschlossen" (bei direkter Abstimmung) = angenommen
|
|
||||||
if ergebnis in ("verabschiedet", "beschlossen"):
|
|
||||||
ergebnis = "angenommen"
|
|
||||||
if kind == "ueber":
|
|
||||||
ergebnis = "überwiesen"
|
|
||||||
if "einstimmig" in text[m.start():m.end() + 5]:
|
|
||||||
einstimmig = True
|
|
||||||
# "Damit ist das so beschlossen" / "Somit ist das so beschlossen" = implizit einstimmig
|
|
||||||
if "so beschlossen" in text[m.start():m.end() + 5]:
|
|
||||||
einstimmig = True
|
|
||||||
if kind == "petition":
|
|
||||||
ergebnis = "sammel"
|
|
||||||
einstimmig = True
|
|
||||||
if kind == "uebersicht":
|
|
||||||
ergebnis = "bestätigt"
|
|
||||||
einstimmig = True
|
|
||||||
# Drucksache ist in Group[0] des Patterns
|
|
||||||
for g in groups:
|
|
||||||
if g and re.match(r"^\d+/\d+(?:\(neu\))?$", g):
|
|
||||||
ds = g
|
|
||||||
break
|
|
||||||
if not ergebnis:
|
|
||||||
continue
|
|
||||||
results.append({
|
|
||||||
"drucksache": ds,
|
|
||||||
"ergebnis": ergebnis,
|
|
||||||
"kind": kind,
|
|
||||||
"einstimmig": einstimmig,
|
|
||||||
"anchor_start": m.start(),
|
|
||||||
"anchor_end": m.end(),
|
|
||||||
})
|
|
||||||
results.sort(key=lambda r: r["anchor_start"])
|
|
||||||
dedup = []
|
|
||||||
seen_positions = set()
|
|
||||||
for r in results:
|
|
||||||
if r["anchor_start"] in seen_positions:
|
|
||||||
continue
|
|
||||||
seen_positions.add(r["anchor_start"])
|
|
||||||
dedup.append(r)
|
|
||||||
return dedup
|
|
||||||
|
|
||||||
|
|
||||||
def resolve_drucksache_for_ueber(text: str, anchor_start: int) -> str | None:
|
|
||||||
"""Für Überweisungs-Anchors: rückwärts die nächste Drucksache-Nr suchen."""
|
|
||||||
# Schaue bis 2000 chars zurück
|
|
||||||
window_start = max(0, anchor_start - 2000)
|
|
||||||
window = text[window_start:anchor_start]
|
|
||||||
# Letzte Drucksache vor dem Anchor
|
|
||||||
matches = list(re.finditer(r"Drucksache\s+(\d+/\d+(?:\(neu\))?)", window))
|
|
||||||
if not matches:
|
|
||||||
return None
|
|
||||||
return matches[-1].group(1)
|
|
||||||
|
|
||||||
|
|
||||||
def normalize_text(text: str) -> str:
|
|
||||||
"""Normalisiere PDF-Text: Worttrennungen (-\n) auflösen, Zeilenumbrüche zu Spaces."""
|
|
||||||
# Worttrennung am Zeilenende: "Überweisungs-\nempfehlung" → "Überweisungsempfehlung"
|
|
||||||
text = re.sub(r"-\s*\n\s*", "", text)
|
|
||||||
# Alle restlichen Zeilenumbrüche zu Spaces
|
|
||||||
text = re.sub(r"\s+", " ", text)
|
|
||||||
return text
|
|
||||||
|
|
||||||
|
|
||||||
def parse_protocol(pdf_path: str) -> list[dict]:
|
|
||||||
doc = fitz.open(pdf_path)
|
|
||||||
full = "".join(page.get_text() for page in doc)
|
|
||||||
doc.close()
|
|
||||||
full = normalize_text(full)
|
|
||||||
|
|
||||||
anchors = find_results(full)
|
|
||||||
parsed = []
|
|
||||||
|
|
||||||
# Segment-Boundaries: jede Abstimmung beginnt mit einer dieser Phrasen
|
|
||||||
segment_starts = [m.start() for m in re.finditer(
|
|
||||||
r"(?:(?:Damit|Somit) kommen wir (?:zur|somit zur) Abstimmung|Wir kommen (?:somit )?zur Abstimmung|Wir stimmen(?!\s+zu\?)|(?:Somit|Damit) kommen wir (?:direkt )?zu den Abstimmungen|Wir stimmen zweitens|gehen (?:wir )?zur Abstimmung über|Somit kommen wir sofort zur Abstimmung)",
|
|
||||||
full
|
|
||||||
)]
|
|
||||||
|
|
||||||
def segment_start_for(anchor_pos: int) -> int:
|
|
||||||
"""Letzte Segment-Grenze vor dem Anchor."""
|
|
||||||
candidates = [s for s in segment_starts if s < anchor_pos]
|
|
||||||
return candidates[-1] if candidates else max(0, anchor_pos - 1500)
|
|
||||||
|
|
||||||
for a in anchors:
|
|
||||||
ds = a["drucksache"]
|
|
||||||
if not ds:
|
|
||||||
ds = resolve_drucksache_for_ueber(full, a["anchor_start"])
|
|
||||||
if not ds:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Vote-Block: vom letzten Segment-Start bis zum Anchor
|
|
||||||
block_start = segment_start_for(a["anchor_start"])
|
|
||||||
block = full[block_start:a["anchor_end"]]
|
|
||||||
|
|
||||||
# Einstimmig: immer alle ja, unabhängig davon was das Fenster sagt
|
|
||||||
if a["einstimmig"]:
|
|
||||||
votes = {"ja": list(ALLE_FRAKTIONEN_NRW), "nein": [], "enthaltung": []}
|
|
||||||
else:
|
|
||||||
votes = _parse_vote_block(block)
|
|
||||||
# Fallback-Einstimmig: wenn ein Überweisungs-Anchor keinen eigenen
|
|
||||||
# "Wer stimmt ... zu?"-Block hat (stattdessen nur inverse Form
|
|
||||||
# "Wer stimmt gegen ...?"), ist das in der Praxis einstimmig.
|
|
||||||
if a["kind"] == "ueber" and not votes["ja"] and not votes["nein"] and not votes["enthaltung"]:
|
|
||||||
votes = {"ja": list(ALLE_FRAKTIONEN_NRW), "nein": [], "enthaltung": []}
|
|
||||||
|
|
||||||
parsed.append({
|
|
||||||
"drucksache": ds,
|
|
||||||
"ergebnis": a["ergebnis"],
|
|
||||||
"votes": votes,
|
|
||||||
"anchor_pos": a["anchor_start"],
|
|
||||||
})
|
|
||||||
|
|
||||||
return parsed
|
|
||||||
|
|
||||||
|
|
||||||
def compare_to_fixture(parsed: list[dict], fixture: dict) -> tuple[int, list]:
|
|
||||||
"""Vergleiche Parser-Output gegen Ground-Truth-Fixture."""
|
|
||||||
parsed_map = {}
|
|
||||||
for p in parsed:
|
|
||||||
parsed_map.setdefault(p["drucksache"], []).append(p)
|
|
||||||
|
|
||||||
errors = []
|
|
||||||
matches = 0
|
|
||||||
for gt in fixture["drucksachen"]:
|
|
||||||
ds = gt["drucksache"]
|
|
||||||
gt_erg = gt["ergebnis"]
|
|
||||||
if ds not in parsed_map:
|
|
||||||
if gt_erg == "nicht_gesondert_abgestimmt":
|
|
||||||
# Korrekt NICHT gefunden
|
|
||||||
matches += 1
|
|
||||||
continue
|
|
||||||
errors.append(f"{ds}: NOT FOUND")
|
|
||||||
continue
|
|
||||||
if gt_erg == "nicht_gesondert_abgestimmt":
|
|
||||||
errors.append(f"{ds}: expected nicht_gesondert, but parser found it")
|
|
||||||
continue
|
|
||||||
# Pick the one closest to expected — if multiple, take the first
|
|
||||||
candidates = parsed_map[ds]
|
|
||||||
p = candidates[0]
|
|
||||||
|
|
||||||
gt_erg = gt["ergebnis"]
|
|
||||||
if gt_erg == "nicht_gesondert_abgestimmt":
|
|
||||||
# Erwartetes Verhalten: Parser sollte es NICHT finden
|
|
||||||
continue
|
|
||||||
|
|
||||||
ok = True
|
|
||||||
if p["ergebnis"] != gt_erg:
|
|
||||||
errors.append(f"{ds}: ergebnis {p['ergebnis']} != {gt_erg}")
|
|
||||||
ok = False
|
|
||||||
if sorted(p["votes"]["ja"]) != sorted(gt["ja"]):
|
|
||||||
errors.append(f"{ds}: ja {p['votes']['ja']} != {gt['ja']}")
|
|
||||||
ok = False
|
|
||||||
if sorted(p["votes"]["nein"]) != sorted(gt["nein"]):
|
|
||||||
errors.append(f"{ds}: nein {p['votes']['nein']} != {gt['nein']}")
|
|
||||||
ok = False
|
|
||||||
if sorted(p["votes"]["enthaltung"]) != sorted(gt["enthaltung"]):
|
|
||||||
errors.append(f"{ds}: enth {p['votes']['enthaltung']} != {gt['enthaltung']}")
|
|
||||||
ok = False
|
|
||||||
if ok:
|
|
||||||
matches += 1
|
|
||||||
return matches, errors
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
pdf = "/tmp/mmp18-119.pdf"
|
|
||||||
fixture_path = "/tmp/nrw_fixture.json"
|
|
||||||
fixture = json.load(open(fixture_path))
|
|
||||||
|
|
||||||
parsed = parse_protocol(pdf)
|
|
||||||
print(f"Parsed {len(parsed)} Abstimmungen gesamt")
|
|
||||||
|
|
||||||
matches, errors = compare_to_fixture(parsed, fixture)
|
|
||||||
print(f"Match gegen Fixture: {matches}/{len(fixture['drucksachen']) - 1} (ohne nicht_gesondert)")
|
|
||||||
print()
|
|
||||||
if errors:
|
|
||||||
print("Fehler:")
|
|
||||||
for e in errors:
|
|
||||||
print(f" {e}")
|
|
||||||
@ -1,47 +0,0 @@
|
|||||||
"""Rheinland-Pfalz (RP) — Plenarprotokoll-Parser STUB (#106 Folge, ADR 0009).
|
|
||||||
|
|
||||||
**Status: noch nicht implementiert.** Dieser Modul-Stub enthaelt
|
|
||||||
Recherche-Findings vom 2026-04-28, sodass die Implementer-Session
|
|
||||||
direkt produktiv loslegen kann. Der Stub wird **nicht** in
|
|
||||||
``app.protokoll_parsers.PROTOKOLL_PARSERS`` registriert — der
|
|
||||||
Auto-Ingest-Cron ueberspringt RP solange.
|
|
||||||
|
|
||||||
## Recherche
|
|
||||||
|
|
||||||
| Feld | Wert |
|
|
||||||
|---|---|
|
|
||||||
| **Doku-System** | portala |
|
|
||||||
| **Base-URL** | https://opal.rlp.de |
|
|
||||||
| **Familie** | BB/HE-Familie |
|
|
||||||
| **Format** | PDF erwartet |
|
|
||||||
|
|
||||||
## URL-Discovery
|
|
||||||
|
|
||||||
OPAL_extern (NICHT identisch mit NRW-OPAL); URL-Pattern via Search-API
|
|
||||||
|
|
||||||
## Bezug
|
|
||||||
|
|
||||||
- Architektur: ADR 0009 (Plenarprotokoll-Parser-Registry)
|
|
||||||
- Roadmap: ``docs/protokoll-parser-roadmap.md``
|
|
||||||
- Referenz-Implementation: ``app/protokoll_parsers/nrw.py``
|
|
||||||
(38 Tests, 19/19-Fixture-Garantie)
|
|
||||||
- Folge-Issue: https://repo.toppyr.de/tobias/gwoe-antragspruefer/issues/159 (Titel: "protokoll-parser: RP (Rheinland-Pfalz)")
|
|
||||||
|
|
||||||
## Aufwand
|
|
||||||
|
|
||||||
Geschaetzt 1-3 Tage konzentrierte Arbeit:
|
|
||||||
- 2-4h URL-Discovery + Format-Inspektion (Sample-Protokoll inhaltlich anschauen)
|
|
||||||
- 4-8h Anchor-Phrasen-Reverse-Engineering + Parser-Implementierung
|
|
||||||
- 4h Tests mit Fixture-Pinning
|
|
||||||
- 1h Eintrag in PROTOKOLL_PARSERS + auto-ingest-protocols.sh
|
|
||||||
"""
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
|
|
||||||
def parse_protocol(path: str) -> list[dict]:
|
|
||||||
"""STUB — siehe Modul-Docstring."""
|
|
||||||
raise NotImplementedError(
|
|
||||||
"RP-Plenarprotokoll-Parser ist noch nicht implementiert. "
|
|
||||||
"Siehe app/protokoll_parsers/rp.py-Docstring fuer Recherche-Findings "
|
|
||||||
"und docs/protokoll-parser-roadmap.md."
|
|
||||||
)
|
|
||||||
@ -1,47 +0,0 @@
|
|||||||
"""Schleswig-Holstein (SH) — Plenarprotokoll-Parser STUB (#106 Folge, ADR 0009).
|
|
||||||
|
|
||||||
**Status: noch nicht implementiert.** Dieser Modul-Stub enthaelt
|
|
||||||
Recherche-Findings vom 2026-04-28, sodass die Implementer-Session
|
|
||||||
direkt produktiv loslegen kann. Der Stub wird **nicht** in
|
|
||||||
``app.protokoll_parsers.PROTOKOLL_PARSERS`` registriert — der
|
|
||||||
Auto-Ingest-Cron ueberspringt SH solange.
|
|
||||||
|
|
||||||
## Recherche
|
|
||||||
|
|
||||||
| Feld | Wert |
|
|
||||||
|---|---|
|
|
||||||
| **Doku-System** | StarWeb |
|
|
||||||
| **Base-URL** | http://lissh.lvn.parlanet.de |
|
|
||||||
| **Familie** | HB/NI-Familie |
|
|
||||||
| **Format** | PDF erwartet |
|
|
||||||
|
|
||||||
## URL-Discovery
|
|
||||||
|
|
||||||
Starfinder-CGI-Backend; Plenum-Protokolle als PDF-Direktlinks moeglich
|
|
||||||
|
|
||||||
## Bezug
|
|
||||||
|
|
||||||
- Architektur: ADR 0009 (Plenarprotokoll-Parser-Registry)
|
|
||||||
- Roadmap: ``docs/protokoll-parser-roadmap.md``
|
|
||||||
- Referenz-Implementation: ``app/protokoll_parsers/nrw.py``
|
|
||||||
(38 Tests, 19/19-Fixture-Garantie)
|
|
||||||
- Folge-Issue: https://repo.toppyr.de/tobias/gwoe-antragspruefer/issues/160 (Titel: "protokoll-parser: SH (Schleswig-Holstein)")
|
|
||||||
|
|
||||||
## Aufwand
|
|
||||||
|
|
||||||
Geschaetzt 1-3 Tage konzentrierte Arbeit:
|
|
||||||
- 2-4h URL-Discovery + Format-Inspektion (Sample-Protokoll inhaltlich anschauen)
|
|
||||||
- 4-8h Anchor-Phrasen-Reverse-Engineering + Parser-Implementierung
|
|
||||||
- 4h Tests mit Fixture-Pinning
|
|
||||||
- 1h Eintrag in PROTOKOLL_PARSERS + auto-ingest-protocols.sh
|
|
||||||
"""
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
|
|
||||||
def parse_protocol(path: str) -> list[dict]:
|
|
||||||
"""STUB — siehe Modul-Docstring."""
|
|
||||||
raise NotImplementedError(
|
|
||||||
"SH-Plenarprotokoll-Parser ist noch nicht implementiert. "
|
|
||||||
"Siehe app/protokoll_parsers/sh.py-Docstring fuer Recherche-Findings "
|
|
||||||
"und docs/protokoll-parser-roadmap.md."
|
|
||||||
)
|
|
||||||
@ -1,47 +0,0 @@
|
|||||||
"""Saarland (SL) — Plenarprotokoll-Parser STUB (#106 Folge, ADR 0009).
|
|
||||||
|
|
||||||
**Status: noch nicht implementiert.** Dieser Modul-Stub enthaelt
|
|
||||||
Recherche-Findings vom 2026-04-28, sodass die Implementer-Session
|
|
||||||
direkt produktiv loslegen kann. Der Stub wird **nicht** in
|
|
||||||
``app.protokoll_parsers.PROTOKOLL_PARSERS`` registriert — der
|
|
||||||
Auto-Ingest-Cron ueberspringt SL solange.
|
|
||||||
|
|
||||||
## Recherche
|
|
||||||
|
|
||||||
| Feld | Wert |
|
|
||||||
|---|---|
|
|
||||||
| **Doku-System** | Eigensystem |
|
|
||||||
| **Base-URL** | https://www.landtag-saar.de |
|
|
||||||
| **Familie** | eigenstaendig |
|
|
||||||
| **Format** | PDF erwartet ueber Umbraco-Filterm |
|
|
||||||
|
|
||||||
## URL-Discovery
|
|
||||||
|
|
||||||
Umbraco-Backend; siehe SaarlandAdapter — Plenum-Protokolle ggf. analog Drucksachen via aawSearchSurfaceController-Pattern
|
|
||||||
|
|
||||||
## Bezug
|
|
||||||
|
|
||||||
- Architektur: ADR 0009 (Plenarprotokoll-Parser-Registry)
|
|
||||||
- Roadmap: ``docs/protokoll-parser-roadmap.md``
|
|
||||||
- Referenz-Implementation: ``app/protokoll_parsers/nrw.py``
|
|
||||||
(38 Tests, 19/19-Fixture-Garantie)
|
|
||||||
- Folge-Issue: https://repo.toppyr.de/tobias/gwoe-antragspruefer/issues/161 (Titel: "protokoll-parser: SL (Saarland)")
|
|
||||||
|
|
||||||
## Aufwand
|
|
||||||
|
|
||||||
Geschaetzt 1-3 Tage konzentrierte Arbeit:
|
|
||||||
- 2-4h URL-Discovery + Format-Inspektion (Sample-Protokoll inhaltlich anschauen)
|
|
||||||
- 4-8h Anchor-Phrasen-Reverse-Engineering + Parser-Implementierung
|
|
||||||
- 4h Tests mit Fixture-Pinning
|
|
||||||
- 1h Eintrag in PROTOKOLL_PARSERS + auto-ingest-protocols.sh
|
|
||||||
"""
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
|
|
||||||
def parse_protocol(path: str) -> list[dict]:
|
|
||||||
"""STUB — siehe Modul-Docstring."""
|
|
||||||
raise NotImplementedError(
|
|
||||||
"SL-Plenarprotokoll-Parser ist noch nicht implementiert. "
|
|
||||||
"Siehe app/protokoll_parsers/sl.py-Docstring fuer Recherche-Findings "
|
|
||||||
"und docs/protokoll-parser-roadmap.md."
|
|
||||||
)
|
|
||||||
@ -1,47 +0,0 @@
|
|||||||
"""Sachsen (SN) — Plenarprotokoll-Parser STUB (#106 Folge, ADR 0009).
|
|
||||||
|
|
||||||
**Status: noch nicht implementiert.** Dieser Modul-Stub enthaelt
|
|
||||||
Recherche-Findings vom 2026-04-28, sodass die Implementer-Session
|
|
||||||
direkt produktiv loslegen kann. Der Stub wird **nicht** in
|
|
||||||
``app.protokoll_parsers.PROTOKOLL_PARSERS`` registriert — der
|
|
||||||
Auto-Ingest-Cron ueberspringt SN solange.
|
|
||||||
|
|
||||||
## Recherche
|
|
||||||
|
|
||||||
| Feld | Wert |
|
|
||||||
|---|---|
|
|
||||||
| **Doku-System** | EDAS-XML-Export |
|
|
||||||
| **Base-URL** | https://edas.landtag.sachsen.de |
|
|
||||||
| **Familie** | eigenstaendig |
|
|
||||||
| **Format** | XML bevorzugt |
|
|
||||||
|
|
||||||
## URL-Discovery
|
|
||||||
|
|
||||||
EDAS macht XML-Export — Plenarprotokolle ggf. ebenfalls strukturiert verfuegbar; Synergie mit bestehendem `app/sn_xml_export`-Workflow pruefen
|
|
||||||
|
|
||||||
## Bezug
|
|
||||||
|
|
||||||
- Architektur: ADR 0009 (Plenarprotokoll-Parser-Registry)
|
|
||||||
- Roadmap: ``docs/protokoll-parser-roadmap.md``
|
|
||||||
- Referenz-Implementation: ``app/protokoll_parsers/nrw.py``
|
|
||||||
(38 Tests, 19/19-Fixture-Garantie)
|
|
||||||
- Folge-Issue: https://repo.toppyr.de/tobias/gwoe-antragspruefer/issues/162 (Titel: "protokoll-parser: SN (Sachsen)")
|
|
||||||
|
|
||||||
## Aufwand
|
|
||||||
|
|
||||||
Geschaetzt 1-3 Tage konzentrierte Arbeit:
|
|
||||||
- 2-4h URL-Discovery + Format-Inspektion (Sample-Protokoll inhaltlich anschauen)
|
|
||||||
- 4-8h Anchor-Phrasen-Reverse-Engineering + Parser-Implementierung
|
|
||||||
- 4h Tests mit Fixture-Pinning
|
|
||||||
- 1h Eintrag in PROTOKOLL_PARSERS + auto-ingest-protocols.sh
|
|
||||||
"""
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
|
|
||||||
def parse_protocol(path: str) -> list[dict]:
|
|
||||||
"""STUB — siehe Modul-Docstring."""
|
|
||||||
raise NotImplementedError(
|
|
||||||
"SN-Plenarprotokoll-Parser ist noch nicht implementiert. "
|
|
||||||
"Siehe app/protokoll_parsers/sn.py-Docstring fuer Recherche-Findings "
|
|
||||||
"und docs/protokoll-parser-roadmap.md."
|
|
||||||
)
|
|
||||||
@ -1,47 +0,0 @@
|
|||||||
"""Thüringen (TH) — Plenarprotokoll-Parser STUB (#106 Folge, ADR 0009).
|
|
||||||
|
|
||||||
**Status: noch nicht implementiert.** Dieser Modul-Stub enthaelt
|
|
||||||
Recherche-Findings vom 2026-04-28, sodass die Implementer-Session
|
|
||||||
direkt produktiv loslegen kann. Der Stub wird **nicht** in
|
|
||||||
``app.protokoll_parsers.PROTOKOLL_PARSERS`` registriert — der
|
|
||||||
Auto-Ingest-Cron ueberspringt TH solange.
|
|
||||||
|
|
||||||
## Recherche
|
|
||||||
|
|
||||||
| Feld | Wert |
|
|
||||||
|---|---|
|
|
||||||
| **Doku-System** | ParlDok |
|
|
||||||
| **Base-URL** | https://parldok.thueringer-landtag.de |
|
|
||||||
| **Familie** | MV-Familie (Synergien hoch) |
|
|
||||||
| **Format** | PDF nach ID-Discovery |
|
|
||||||
|
|
||||||
## URL-Discovery
|
|
||||||
|
|
||||||
ParlDok-Plattform analog MV — sobald MV-Parser steht, ist TH eine Iteration der MV-Logik mit anderem Base-URL-Praefix
|
|
||||||
|
|
||||||
## Bezug
|
|
||||||
|
|
||||||
- Architektur: ADR 0009 (Plenarprotokoll-Parser-Registry)
|
|
||||||
- Roadmap: ``docs/protokoll-parser-roadmap.md``
|
|
||||||
- Referenz-Implementation: ``app/protokoll_parsers/nrw.py``
|
|
||||||
(38 Tests, 19/19-Fixture-Garantie)
|
|
||||||
- Folge-Issue: https://repo.toppyr.de/tobias/gwoe-antragspruefer/issues/163 (Titel: "protokoll-parser: TH (Thüringen)")
|
|
||||||
|
|
||||||
## Aufwand
|
|
||||||
|
|
||||||
Geschaetzt 1-3 Tage konzentrierte Arbeit:
|
|
||||||
- 2-4h URL-Discovery + Format-Inspektion (Sample-Protokoll inhaltlich anschauen)
|
|
||||||
- 4-8h Anchor-Phrasen-Reverse-Engineering + Parser-Implementierung
|
|
||||||
- 4h Tests mit Fixture-Pinning
|
|
||||||
- 1h Eintrag in PROTOKOLL_PARSERS + auto-ingest-protocols.sh
|
|
||||||
"""
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
|
|
||||||
def parse_protocol(path: str) -> list[dict]:
|
|
||||||
"""STUB — siehe Modul-Docstring."""
|
|
||||||
raise NotImplementedError(
|
|
||||||
"TH-Plenarprotokoll-Parser ist noch nicht implementiert. "
|
|
||||||
"Siehe app/protokoll_parsers/th.py-Docstring fuer Recherche-Findings "
|
|
||||||
"und docs/protokoll-parser-roadmap.md."
|
|
||||||
)
|
|
||||||
303
app/queue.py
303
app/queue.py
@ -1,303 +0,0 @@
|
|||||||
"""Analysis job queue with configurable parallel workers (#95, #99).
|
|
||||||
|
|
||||||
Processes jobs via an asyncio.Queue with N concurrent workers (Semaphore).
|
|
||||||
Tracks per-job status for live UI visualization.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import asyncio
|
|
||||||
import logging
|
|
||||||
import os
|
|
||||||
import time
|
|
||||||
from typing import Any, Callable, Coroutine, Optional
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
# Konfiguration
|
|
||||||
MAX_QUEUE_SIZE = 50
|
|
||||||
CONCURRENCY = int(os.environ.get("QUEUE_CONCURRENCY", "3"))
|
|
||||||
MIN_PAUSE_SECONDS = 3
|
|
||||||
_shutting_down = False # Sperrt neue Jobs bei Graceful Shutdown
|
|
||||||
BACKOFF_BASE = 15
|
|
||||||
BACKOFF_MAX = 300
|
|
||||||
|
|
||||||
# In-Memory Queue + Job-Tracking
|
|
||||||
_queue: asyncio.Queue = asyncio.Queue(maxsize=MAX_QUEUE_SIZE)
|
|
||||||
_worker_tasks: list[asyncio.Task] = []
|
|
||||||
_stats = {
|
|
||||||
"processed": 0,
|
|
||||||
"failed": 0,
|
|
||||||
"started_at": None,
|
|
||||||
"avg_duration": 60.0,
|
|
||||||
}
|
|
||||||
# Live Job-Tracking: job_id → {status, drucksache, started_at, duration, error}
|
|
||||||
_jobs: dict[str, dict] = {}
|
|
||||||
_MAX_TRACKED_JOBS = 100 # Älteste Jobs werden verworfen
|
|
||||||
|
|
||||||
|
|
||||||
class QueueFullError(Exception):
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
async def enqueue(
|
|
||||||
job_id: str,
|
|
||||||
callback: Callable[..., Coroutine],
|
|
||||||
*args: Any,
|
|
||||||
drucksache: str = "",
|
|
||||||
**kwargs: Any,
|
|
||||||
) -> int:
|
|
||||||
"""Add a job to the queue. Returns queue position."""
|
|
||||||
if _shutting_down:
|
|
||||||
raise QueueFullError("Server wird neu gestartet. Bitte in Kürze erneut versuchen.")
|
|
||||||
try:
|
|
||||||
_queue.put_nowait((job_id, callback, args, kwargs))
|
|
||||||
except asyncio.QueueFull:
|
|
||||||
raise QueueFullError(f"Queue voll ({MAX_QUEUE_SIZE} Jobs).")
|
|
||||||
_jobs[job_id] = {
|
|
||||||
"status": "queued",
|
|
||||||
"drucksache": drucksache,
|
|
||||||
"enqueued_at": time.time(),
|
|
||||||
"started_at": None,
|
|
||||||
"duration": None,
|
|
||||||
"error": None,
|
|
||||||
}
|
|
||||||
# Alte Jobs trimmen
|
|
||||||
if len(_jobs) > _MAX_TRACKED_JOBS:
|
|
||||||
oldest = sorted(_jobs, key=lambda k: _jobs[k].get("enqueued_at", 0))
|
|
||||||
for k in oldest[:len(_jobs) - _MAX_TRACKED_JOBS]:
|
|
||||||
del _jobs[k]
|
|
||||||
position = _queue.qsize()
|
|
||||||
logger.info("Job %s enqueued at position %d (concurrency=%d)", job_id, position, CONCURRENCY)
|
|
||||||
return position
|
|
||||||
|
|
||||||
|
|
||||||
def get_queue_status() -> dict:
|
|
||||||
"""Queue status + per-job details for UI visualization."""
|
|
||||||
pending = _queue.qsize()
|
|
||||||
avg = _stats["avg_duration"]
|
|
||||||
# Bei N Workern teilt sich die Wartezeit
|
|
||||||
estimated_wait = (pending / max(CONCURRENCY, 1)) * (avg + MIN_PAUSE_SECONDS)
|
|
||||||
|
|
||||||
# Jobs nach Status gruppieren
|
|
||||||
recent_jobs = sorted(_jobs.values(), key=lambda j: j.get("enqueued_at", 0), reverse=True)[:30]
|
|
||||||
|
|
||||||
# Stale jobs aus DB laden (nach Container-Restart)
|
|
||||||
stale_jobs = []
|
|
||||||
try:
|
|
||||||
import sqlite3
|
|
||||||
from .config import settings
|
|
||||||
conn = sqlite3.connect(settings.db_path)
|
|
||||||
conn.row_factory = sqlite3.Row
|
|
||||||
rows = conn.execute(
|
|
||||||
"SELECT id, bundesland, status, created_at FROM jobs "
|
|
||||||
"WHERE status IN ('stale', 'queued', 'processing') ORDER BY created_at DESC LIMIT 20"
|
|
||||||
).fetchall()
|
|
||||||
conn.close()
|
|
||||||
stale_jobs = [{"job_id": r["id"], "bundesland": r["bundesland"] or "",
|
|
||||||
"status": "stale", "drucksache": r["drucksache"] if "drucksache" in r.keys() else "",
|
|
||||||
"duration": None, "error": "Container-Restart"} for r in rows]
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
return {
|
|
||||||
"pending": pending,
|
|
||||||
"max_size": MAX_QUEUE_SIZE,
|
|
||||||
"concurrency": CONCURRENCY,
|
|
||||||
"shutting_down": _shutting_down,
|
|
||||||
"processed_total": _stats["processed"],
|
|
||||||
"failed_total": _stats["failed"],
|
|
||||||
"estimated_wait_seconds": round(estimated_wait),
|
|
||||||
"avg_job_duration_seconds": round(avg, 1),
|
|
||||||
"workers_running": sum(1 for t in _worker_tasks if not t.done()),
|
|
||||||
"jobs": [{
|
|
||||||
"job_id": jid,
|
|
||||||
"drucksache": j.get("drucksache", ""),
|
|
||||||
"status": j["status"],
|
|
||||||
"duration": round(j["duration"], 1) if j.get("duration") else None,
|
|
||||||
"error": j.get("error"),
|
|
||||||
} for jid, j in list(_jobs.items())[-30:]] + stale_jobs,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
async def _worker(worker_id: int):
|
|
||||||
"""Worker coroutine — picks jobs from queue, processes with Semaphore."""
|
|
||||||
logger.info("Worker %d started", worker_id)
|
|
||||||
consecutive_failures = 0
|
|
||||||
|
|
||||||
while True:
|
|
||||||
job_id, callback, args, kwargs = await _queue.get()
|
|
||||||
t0 = time.time()
|
|
||||||
|
|
||||||
if job_id in _jobs:
|
|
||||||
_jobs[job_id]["status"] = "processing"
|
|
||||||
_jobs[job_id]["started_at"] = t0
|
|
||||||
|
|
||||||
try:
|
|
||||||
logger.info("Worker %d processing %s (queue: %d)", worker_id, job_id, _queue.qsize())
|
|
||||||
await callback(*args, **kwargs)
|
|
||||||
duration = time.time() - t0
|
|
||||||
_stats["processed"] += 1
|
|
||||||
_stats["avg_duration"] = (_stats["avg_duration"] * 0.8) + (duration * 0.2)
|
|
||||||
consecutive_failures = 0
|
|
||||||
|
|
||||||
if job_id in _jobs:
|
|
||||||
_jobs[job_id]["status"] = "completed"
|
|
||||||
_jobs[job_id]["duration"] = duration
|
|
||||||
logger.info("Worker %d completed %s in %.1fs", worker_id, job_id, duration)
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
_stats["failed"] += 1
|
|
||||||
consecutive_failures += 1
|
|
||||||
if job_id in _jobs:
|
|
||||||
_jobs[job_id]["status"] = "failed"
|
|
||||||
_jobs[job_id]["duration"] = time.time() - t0
|
|
||||||
_jobs[job_id]["error"] = str(e)[:100]
|
|
||||||
logger.exception("Worker %d failed %s", worker_id, job_id)
|
|
||||||
|
|
||||||
if consecutive_failures > 1:
|
|
||||||
backoff = min(BACKOFF_BASE * (2 ** (consecutive_failures - 2)), BACKOFF_MAX)
|
|
||||||
logger.warning("Worker %d backoff %ds", worker_id, backoff)
|
|
||||||
await asyncio.sleep(backoff)
|
|
||||||
|
|
||||||
finally:
|
|
||||||
_queue.task_done()
|
|
||||||
|
|
||||||
await asyncio.sleep(MIN_PAUSE_SECONDS)
|
|
||||||
|
|
||||||
|
|
||||||
def start_worker() -> list[asyncio.Task]:
|
|
||||||
"""Start N worker coroutines."""
|
|
||||||
global _worker_tasks
|
|
||||||
_stats["started_at"] = time.time()
|
|
||||||
for i in range(CONCURRENCY):
|
|
||||||
if i < len(_worker_tasks) and not _worker_tasks[i].done():
|
|
||||||
continue
|
|
||||||
task = asyncio.create_task(_worker(i))
|
|
||||||
if i < len(_worker_tasks):
|
|
||||||
_worker_tasks[i] = task
|
|
||||||
else:
|
|
||||||
_worker_tasks.append(task)
|
|
||||||
logger.info("Queue: %d workers started (QUEUE_CONCURRENCY=%d)", CONCURRENCY, CONCURRENCY)
|
|
||||||
return _worker_tasks
|
|
||||||
|
|
||||||
|
|
||||||
async def graceful_shutdown(timeout: int = 900):
|
|
||||||
"""Graceful Shutdown: aktuell laufende Jobs beenden, Queue sperren.
|
|
||||||
|
|
||||||
1. Sperrt neue Jobs (_shutting_down = True)
|
|
||||||
2. Wartet bis alle gerade PROCESSING-Jobs fertig sind (max timeout)
|
|
||||||
3. Queued-Jobs bleiben in der DB als 'stale' → User kann nach
|
|
||||||
Restart erneut triggern
|
|
||||||
|
|
||||||
Timeout 15 min (900s) — ein einzelner LLM-Call dauert max ~120s,
|
|
||||||
bei 3 parallelen Workern also max ~120s reale Wartezeit.
|
|
||||||
"""
|
|
||||||
global _shutting_down
|
|
||||||
_shutting_down = True
|
|
||||||
|
|
||||||
processing = sum(1 for j in _jobs.values() if j.get("status") == "processing")
|
|
||||||
pending = _queue.qsize()
|
|
||||||
|
|
||||||
if processing == 0:
|
|
||||||
logger.info("Graceful shutdown: keine laufenden Jobs, sofort beenden (%d queued verworfen)", pending)
|
|
||||||
return
|
|
||||||
|
|
||||||
logger.warning("Graceful shutdown: warte auf %d laufende Jobs (max %ds). %d queued werden beim Restart stale.",
|
|
||||||
processing, timeout, pending)
|
|
||||||
|
|
||||||
# Warte nur auf die laufenden Jobs, nicht auf die ganze Queue
|
|
||||||
start = time.time()
|
|
||||||
while time.time() - start < timeout:
|
|
||||||
still_processing = sum(1 for j in _jobs.values() if j.get("status") == "processing")
|
|
||||||
if still_processing == 0:
|
|
||||||
logger.info("Graceful shutdown: alle laufenden Jobs beendet nach %.0fs", time.time() - start)
|
|
||||||
return
|
|
||||||
await asyncio.sleep(2)
|
|
||||||
|
|
||||||
logger.error("Graceful shutdown: Timeout nach %ds, %d Jobs noch aktiv",
|
|
||||||
timeout, sum(1 for j in _jobs.values() if j.get("status") == "processing"))
|
|
||||||
|
|
||||||
|
|
||||||
async def re_enqueue_pending(analysis_callback=None):
|
|
||||||
"""Re-enqueue jobs that were queued or processing when the container died.
|
|
||||||
|
|
||||||
Jobs WITH a drucksache column get re-enqueued automatically (if callback provided).
|
|
||||||
Jobs WITHOUT drucksache (legacy) get marked as stale and cleaned up.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
analysis_callback: async function(job_id, drucksache, text, bundesland, model, doc)
|
|
||||||
"""
|
|
||||||
import aiosqlite
|
|
||||||
from .config import settings
|
|
||||||
|
|
||||||
async with aiosqlite.connect(settings.db_path) as db:
|
|
||||||
db.row_factory = aiosqlite.Row
|
|
||||||
rows = await db.execute(
|
|
||||||
"SELECT id, bundesland, drucksache, model FROM jobs "
|
|
||||||
"WHERE status IN ('queued', 'processing') ORDER BY created_at"
|
|
||||||
)
|
|
||||||
pending = await rows.fetchall()
|
|
||||||
|
|
||||||
if not pending:
|
|
||||||
# Alte stale-Jobs ohne drucksache aufräumen
|
|
||||||
async with aiosqlite.connect(settings.db_path) as db:
|
|
||||||
deleted = await db.execute(
|
|
||||||
"DELETE FROM jobs WHERE status='stale' AND (drucksache IS NULL OR drucksache='')"
|
|
||||||
)
|
|
||||||
if deleted.rowcount > 0:
|
|
||||||
logger.info("Cleaned up %d legacy stale jobs without drucksache", deleted.rowcount)
|
|
||||||
await db.commit()
|
|
||||||
return
|
|
||||||
|
|
||||||
logger.info("Found %d pending jobs from previous run", len(pending))
|
|
||||||
|
|
||||||
from .parlamente import get_adapter
|
|
||||||
|
|
||||||
re_enqueued = 0
|
|
||||||
marked_stale = 0
|
|
||||||
for row in pending:
|
|
||||||
job_id = row["id"]
|
|
||||||
bundesland = row["bundesland"] or "NRW"
|
|
||||||
drucksache = row["drucksache"]
|
|
||||||
model = row["model"] or "qwen-plus"
|
|
||||||
|
|
||||||
if not drucksache or not analysis_callback:
|
|
||||||
# Legacy-Job ohne Drucksache oder kein Callback → stale markieren
|
|
||||||
async with aiosqlite.connect(settings.db_path) as db:
|
|
||||||
await db.execute(
|
|
||||||
"UPDATE jobs SET status='stale', updated_at=datetime('now') WHERE id=?",
|
|
||||||
(job_id,),
|
|
||||||
)
|
|
||||||
await db.commit()
|
|
||||||
marked_stale += 1
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Job mit Drucksache → neu enqueuen
|
|
||||||
try:
|
|
||||||
adapter = get_adapter(bundesland)
|
|
||||||
doc = await adapter.get_document(drucksache)
|
|
||||||
if not doc:
|
|
||||||
raise ValueError(f"Drucksache {drucksache} nicht gefunden")
|
|
||||||
text = await adapter.download_text(drucksache)
|
|
||||||
if not text:
|
|
||||||
raise ValueError(f"PDF-Text für {drucksache} leer")
|
|
||||||
|
|
||||||
position = await enqueue(
|
|
||||||
job_id,
|
|
||||||
analysis_callback,
|
|
||||||
job_id, drucksache, text, bundesland, model, doc,
|
|
||||||
drucksache=drucksache,
|
|
||||||
)
|
|
||||||
re_enqueued += 1
|
|
||||||
logger.info("Re-enqueued %s (%s) at position %d", drucksache, bundesland, position)
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning("Could not re-enqueue %s (%s): %s — marking stale", drucksache, bundesland, e)
|
|
||||||
async with aiosqlite.connect(settings.db_path) as db:
|
|
||||||
await db.execute(
|
|
||||||
"UPDATE jobs SET status='stale', error=?, updated_at=datetime('now') WHERE id=?",
|
|
||||||
(str(e)[:200], job_id),
|
|
||||||
)
|
|
||||||
await db.commit()
|
|
||||||
marked_stale += 1
|
|
||||||
|
|
||||||
logger.info("Re-enqueued %d jobs, marked %d stale", re_enqueued, marked_stale)
|
|
||||||
@ -1,90 +0,0 @@
|
|||||||
"""Redline-Parser-Hilfsfunktionen — keine FastAPI-Abhängigkeiten.
|
|
||||||
|
|
||||||
Wird von app.main._row_to_detail() und von Tests direkt importiert.
|
|
||||||
"""
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import re
|
|
||||||
from urllib.parse import quote_plus
|
|
||||||
|
|
||||||
|
|
||||||
def parse_redline_segments(vorschlag: str | None) -> list[dict]:
|
|
||||||
"""Parst §INS§text§INS§/§DEL§text§DEL§-Marker sowie **text**- und
|
|
||||||
~~text~~-Markdown in eine Liste von {type, text}-Segmenten (ctx/ins/del).
|
|
||||||
|
|
||||||
Toleriert beide Formate gleichzeitig. Unausgewogene Marker bleiben als ctx.
|
|
||||||
Leerer oder None-Input liefert [].
|
|
||||||
|
|
||||||
Beispiel:
|
|
||||||
>>> parse_redline_segments("§ 3 §DEL§alt§DEL§ §INS§neu§INS§ Ende")
|
|
||||||
[{'type': 'ctx', 'text': '§ 3 '}, {'type': 'del', 'text': 'alt'},
|
|
||||||
{'type': 'ctx', 'text': ' '}, {'type': 'ins', 'text': 'neu'},
|
|
||||||
{'type': 'ctx', 'text': ' Ende'}]
|
|
||||||
"""
|
|
||||||
if not vorschlag:
|
|
||||||
return []
|
|
||||||
|
|
||||||
# Normalisierung: §INS§...§INS§ und §DEL§...§DEL§ → interne Tags
|
|
||||||
text = vorschlag
|
|
||||||
text = re.sub(r"§INS§(.*?)§INS§", r"<INS>\1</INS>", text, flags=re.DOTALL)
|
|
||||||
text = re.sub(r"§DEL§(.*?)§DEL§", r"<DEL>\1</DEL>", text, flags=re.DOTALL)
|
|
||||||
# Markdown-Konvention: **...** → ins, ~~...~~ → del
|
|
||||||
text = re.sub(r"\*\*(.*?)\*\*", r"<INS>\1</INS>", text, flags=re.DOTALL)
|
|
||||||
text = re.sub(r"~~(.*?)~~", r"<DEL>\1</DEL>", text, flags=re.DOTALL)
|
|
||||||
|
|
||||||
# Splitten an Tags, Typen zuordnen
|
|
||||||
segments: list[dict] = []
|
|
||||||
parts = re.split(r"(<INS>.*?</INS>|<DEL>.*?</DEL>)", text, flags=re.DOTALL)
|
|
||||||
for part in parts:
|
|
||||||
if not part:
|
|
||||||
continue
|
|
||||||
ins_m = re.fullmatch(r"<INS>(.*)</INS>", part, re.DOTALL)
|
|
||||||
del_m = re.fullmatch(r"<DEL>(.*)</DEL>", part, re.DOTALL)
|
|
||||||
if ins_m:
|
|
||||||
segments.append({"type": "ins", "text": ins_m.group(1)})
|
|
||||||
elif del_m:
|
|
||||||
segments.append({"type": "del", "text": del_m.group(1)})
|
|
||||||
else:
|
|
||||||
segments.append({"type": "ctx", "text": part})
|
|
||||||
return segments
|
|
||||||
|
|
||||||
|
|
||||||
def build_pdf_href(zitat: dict, bundesland: str = "") -> str:
|
|
||||||
"""Gibt den pdf_href für ein Zitat zurück.
|
|
||||||
|
|
||||||
Bevorzugt das bereits gepflegte url-Feld. Falls leer, rekonstruiert
|
|
||||||
die URL aus dem quelle-Feld (Format: 'Titel · S. N' oder 'Titel, S. N')
|
|
||||||
über die WAHLPROGRAMME-Registry.
|
|
||||||
"""
|
|
||||||
url = zitat.get("url", "")
|
|
||||||
if url:
|
|
||||||
return url
|
|
||||||
|
|
||||||
quelle = zitat.get("quelle", "")
|
|
||||||
seite_m = re.search(r"[·,]?\s*S\.\s*(\d+)", quelle)
|
|
||||||
if not seite_m:
|
|
||||||
return ""
|
|
||||||
seite = seite_m.group(1)
|
|
||||||
|
|
||||||
# pid aus WAHLPROGRAMME-Registry ermitteln: Dateiname ohne .pdf
|
|
||||||
from .wahlprogramme import WAHLPROGRAMME
|
|
||||||
pid = ""
|
|
||||||
for bl_data in WAHLPROGRAMME.values():
|
|
||||||
for partei_data in bl_data.values():
|
|
||||||
titel = partei_data.get("titel", "")
|
|
||||||
partei_name = partei_data.get("partei", "")
|
|
||||||
file_name = partei_data.get("file", "")
|
|
||||||
if titel and (titel in quelle or partei_name in quelle):
|
|
||||||
pid = file_name.replace(".pdf", "")
|
|
||||||
break
|
|
||||||
if pid:
|
|
||||||
break
|
|
||||||
|
|
||||||
if not pid:
|
|
||||||
return ""
|
|
||||||
|
|
||||||
text = zitat.get("text", "")
|
|
||||||
q = " ".join(text.split()[:5])
|
|
||||||
# #page=N als URL-Hash, damit der Browser-PDF-Viewer direkt zur Seite
|
|
||||||
# springt — OpenAction im PDF wird von Chrome/Firefox ignoriert.
|
|
||||||
return f"/api/wahlprogramm-cite?pid={pid}&seite={seite}&q={quote_plus(q)}#page={seite}"
|
|
||||||
@ -1,234 +0,0 @@
|
|||||||
"""Reindex-Script für die Embedding-Modell-Migration v3 → v4 (Issue #123).
|
|
||||||
|
|
||||||
Läuft im Container:
|
|
||||||
docker exec gwoe-antragspruefer python -m app.reindex_embeddings
|
|
||||||
|
|
||||||
Was es macht:
|
|
||||||
1. Alle Wahlprogramme + Grundsatzprogramme mit dem aktuellen EMBEDDING_MODEL
|
|
||||||
(aus settings.embedding_model_write, default 'text-embedding-v4') neu
|
|
||||||
indexieren. Schreibt neue Rows in chunks mit model='text-embedding-v4',
|
|
||||||
die bestehenden v3-Rows bleiben unberührt.
|
|
||||||
2. Alle Assessments backfillen: summary_embedding erzeugen wo NULL oder wo
|
|
||||||
embedding_model vom aktuellen abweicht.
|
|
||||||
3. Rate-Limit: 100ms zwischen Calls (= max 10 req/sec).
|
|
||||||
4. Fortschritts-Logging pro Programm/Assessment.
|
|
||||||
|
|
||||||
Nach erfolgreichem Lauf:
|
|
||||||
- settings.embedding_model_read auf 'text-embedding-v4' flippen (via ENV),
|
|
||||||
Container neu starten
|
|
||||||
- Script `cleanup_v3_rows.py` läuft DELETE FROM chunks WHERE model='text-embedding-v3'
|
|
||||||
"""
|
|
||||||
|
|
||||||
import asyncio
|
|
||||||
import json
|
|
||||||
import logging
|
|
||||||
import sqlite3
|
|
||||||
import time
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
import aiosqlite
|
|
||||||
|
|
||||||
from .config import settings
|
|
||||||
from .embeddings import (
|
|
||||||
EMBEDDING_BATCH_SIZE,
|
|
||||||
EMBEDDING_MODEL,
|
|
||||||
EMBEDDINGS_DB,
|
|
||||||
PROGRAMME,
|
|
||||||
create_embedding,
|
|
||||||
create_embeddings_batch,
|
|
||||||
init_embeddings_db,
|
|
||||||
)
|
|
||||||
|
|
||||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
RATE_LIMIT_SLEEP = 0.1 # 100ms = 10 req/sec
|
|
||||||
|
|
||||||
|
|
||||||
def reindex_programme(pdf_dir: Path) -> dict:
|
|
||||||
"""Re-index all programs with the current WRITE model."""
|
|
||||||
init_embeddings_db()
|
|
||||||
|
|
||||||
# Welche Programme sind bereits mit dem aktuellen Modell indexiert?
|
|
||||||
conn = sqlite3.connect(EMBEDDINGS_DB)
|
|
||||||
conn.row_factory = sqlite3.Row
|
|
||||||
rows = conn.execute(
|
|
||||||
"SELECT programm_id, COUNT(*) AS n FROM chunks WHERE model = ? GROUP BY programm_id",
|
|
||||||
(EMBEDDING_MODEL,),
|
|
||||||
).fetchall()
|
|
||||||
already_done = {r["programm_id"]: r["n"] for r in rows}
|
|
||||||
conn.close()
|
|
||||||
|
|
||||||
stats = {"reindexed": 0, "skipped": 0, "failed": 0, "total_chunks": 0}
|
|
||||||
|
|
||||||
for prog_id, info in PROGRAMME.items():
|
|
||||||
if prog_id in already_done:
|
|
||||||
logger.info(
|
|
||||||
"SKIP %s — bereits %d chunks mit %s",
|
|
||||||
prog_id, already_done[prog_id], EMBEDDING_MODEL,
|
|
||||||
)
|
|
||||||
stats["skipped"] += 1
|
|
||||||
continue
|
|
||||||
|
|
||||||
pdf_path = pdf_dir / info["pdf"]
|
|
||||||
if not pdf_path.exists():
|
|
||||||
logger.warning("MISS %s — PDF fehlt: %s", prog_id, pdf_path)
|
|
||||||
stats["failed"] += 1
|
|
||||||
continue
|
|
||||||
|
|
||||||
try:
|
|
||||||
logger.info("INDEX %s (%s)", prog_id, info["pdf"])
|
|
||||||
n = _index_programm_with_ratelimit(prog_id, pdf_dir)
|
|
||||||
stats["reindexed"] += 1
|
|
||||||
stats["total_chunks"] += n
|
|
||||||
logger.info("DONE %s — %d chunks", prog_id, n)
|
|
||||||
except Exception:
|
|
||||||
logger.exception("FAIL %s", prog_id)
|
|
||||||
stats["failed"] += 1
|
|
||||||
|
|
||||||
return stats
|
|
||||||
|
|
||||||
|
|
||||||
def _index_programm_with_ratelimit(programm_id: str, pdf_dir: Path) -> int:
|
|
||||||
"""Batch-Reindex: sammelt alle Chunks, embedded in Batches von
|
|
||||||
EMBEDDING_BATCH_SIZE (10) Texten pro API-Call. ~10× schneller als
|
|
||||||
Single-Call-Loop."""
|
|
||||||
import fitz
|
|
||||||
|
|
||||||
info = PROGRAMME[programm_id]
|
|
||||||
pdf_path = pdf_dir / info["pdf"]
|
|
||||||
|
|
||||||
conn = sqlite3.connect(EMBEDDINGS_DB)
|
|
||||||
# Nur die Rows des aktuellen Modells löschen (Migration-sicher)
|
|
||||||
conn.execute(
|
|
||||||
"DELETE FROM chunks WHERE programm_id = ? AND model = ?",
|
|
||||||
(programm_id, EMBEDDING_MODEL),
|
|
||||||
)
|
|
||||||
|
|
||||||
# Erst alle Chunks sammeln, dann in Batches embedden
|
|
||||||
doc = fitz.open(pdf_path)
|
|
||||||
pending: list[tuple[int, str]] = [] # (page_num, chunk_text)
|
|
||||||
for page_num in range(len(doc)):
|
|
||||||
page = doc[page_num]
|
|
||||||
text = page.get_text()
|
|
||||||
if not text.strip():
|
|
||||||
continue
|
|
||||||
words = text.split()
|
|
||||||
i = 0
|
|
||||||
chunk_size, overlap = 400, 50
|
|
||||||
while i < len(words):
|
|
||||||
chunk = " ".join(words[i : i + chunk_size])
|
|
||||||
i += chunk_size - overlap
|
|
||||||
if len(chunk.split()) < 20:
|
|
||||||
continue
|
|
||||||
pending.append((page_num + 1, chunk))
|
|
||||||
doc.close()
|
|
||||||
|
|
||||||
total = 0
|
|
||||||
# Batches à BATCH_SIZE
|
|
||||||
for start in range(0, len(pending), EMBEDDING_BATCH_SIZE):
|
|
||||||
batch = pending[start : start + EMBEDDING_BATCH_SIZE]
|
|
||||||
texts = [t for _, t in batch]
|
|
||||||
try:
|
|
||||||
vecs = create_embeddings_batch(texts, model=EMBEDDING_MODEL)
|
|
||||||
time.sleep(RATE_LIMIT_SLEEP) # 100ms zwischen Batch-Calls
|
|
||||||
except Exception:
|
|
||||||
logger.exception("batch failed (programm %s, start %d)", programm_id, start)
|
|
||||||
continue
|
|
||||||
for (page_num, chunk), vec in zip(batch, vecs):
|
|
||||||
conn.execute(
|
|
||||||
"INSERT INTO chunks (programm_id, partei, typ, seite, text, embedding, bundesland, model) "
|
|
||||||
"VALUES (?, ?, ?, ?, ?, ?, ?, ?)",
|
|
||||||
(
|
|
||||||
programm_id,
|
|
||||||
info["partei"],
|
|
||||||
info["typ"],
|
|
||||||
page_num,
|
|
||||||
chunk,
|
|
||||||
json.dumps(vec).encode(),
|
|
||||||
info.get("bundesland"),
|
|
||||||
EMBEDDING_MODEL,
|
|
||||||
),
|
|
||||||
)
|
|
||||||
total += 1
|
|
||||||
# Commit pro Batch, damit im Crash-Fall nicht alles verloren ist
|
|
||||||
conn.commit()
|
|
||||||
|
|
||||||
conn.close()
|
|
||||||
return total
|
|
||||||
|
|
||||||
|
|
||||||
async def backfill_assessment_embeddings() -> dict:
|
|
||||||
"""Alle Assessments ohne Embedding (oder mit altem Modell) nachziehen."""
|
|
||||||
from .embeddings import create_assessment_embedding
|
|
||||||
|
|
||||||
stats = {"backfilled": 0, "skipped": 0, "failed": 0}
|
|
||||||
|
|
||||||
async with aiosqlite.connect(settings.db_path) as db:
|
|
||||||
db.row_factory = aiosqlite.Row
|
|
||||||
cur = await db.execute(
|
|
||||||
"SELECT drucksache, title, antrag_zusammenfassung, themen, bundesland, embedding_model "
|
|
||||||
"FROM assessments"
|
|
||||||
)
|
|
||||||
rows = await cur.fetchall()
|
|
||||||
|
|
||||||
for row in rows:
|
|
||||||
if row["embedding_model"] == EMBEDDING_MODEL:
|
|
||||||
stats["skipped"] += 1
|
|
||||||
continue
|
|
||||||
|
|
||||||
try:
|
|
||||||
themen = json.loads(row["themen"] or "[]")
|
|
||||||
except Exception:
|
|
||||||
themen = []
|
|
||||||
|
|
||||||
blob, model = create_assessment_embedding(
|
|
||||||
title=row["title"] or "",
|
|
||||||
zusammenfassung=row["antrag_zusammenfassung"],
|
|
||||||
themen=themen,
|
|
||||||
bundesland=row["bundesland"],
|
|
||||||
)
|
|
||||||
time.sleep(RATE_LIMIT_SLEEP)
|
|
||||||
|
|
||||||
if blob is None:
|
|
||||||
stats["failed"] += 1
|
|
||||||
logger.warning("backfill FAIL %s", row["drucksache"])
|
|
||||||
continue
|
|
||||||
|
|
||||||
async with aiosqlite.connect(settings.db_path) as db:
|
|
||||||
await db.execute(
|
|
||||||
"UPDATE assessments SET summary_embedding = ?, embedding_model = ? WHERE drucksache = ?",
|
|
||||||
(blob, model, row["drucksache"]),
|
|
||||||
)
|
|
||||||
await db.commit()
|
|
||||||
stats["backfilled"] += 1
|
|
||||||
if stats["backfilled"] % 20 == 0:
|
|
||||||
logger.info("backfill progress: %d", stats["backfilled"])
|
|
||||||
|
|
||||||
return stats
|
|
||||||
|
|
||||||
|
|
||||||
async def main():
|
|
||||||
pdf_dir = Path(__file__).resolve().parent / "static" / "referenzen"
|
|
||||||
logger.info("=" * 60)
|
|
||||||
logger.info("Reindex mit WRITE-Modell: %s", EMBEDDING_MODEL)
|
|
||||||
logger.info("PDF-Verzeichnis: %s", pdf_dir)
|
|
||||||
logger.info("=" * 60)
|
|
||||||
|
|
||||||
prog_stats = reindex_programme(pdf_dir)
|
|
||||||
logger.info("Programme fertig: %s", prog_stats)
|
|
||||||
|
|
||||||
logger.info("Backfill Assessment-Embeddings …")
|
|
||||||
ass_stats = await backfill_assessment_embeddings()
|
|
||||||
logger.info("Assessments fertig: %s", ass_stats)
|
|
||||||
|
|
||||||
logger.info("=" * 60)
|
|
||||||
logger.info("REINDEX KOMPLETT")
|
|
||||||
logger.info("Programme: %s", prog_stats)
|
|
||||||
logger.info("Assessments: %s", ass_stats)
|
|
||||||
logger.info("Nächster Schritt: settings.embedding_model_read auf %s setzen", EMBEDDING_MODEL)
|
|
||||||
logger.info("(ENV: EMBEDDING_MODEL_READ=%s, Container neu starten)", EMBEDDING_MODEL)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
asyncio.run(main())
|
|
||||||
647
app/report.py
647
app/report.py
@ -1,647 +0,0 @@
|
|||||||
"""Report generation for HTML and PDF output.
|
|
||||||
|
|
||||||
All LLM-generated fields are HTML-escaped before being interpolated into
|
|
||||||
the report template. WeasyPrint will happily resolve ``<img src="file://...">``
|
|
||||||
or ``<link rel=stylesheet href="file://...">`` against the container
|
|
||||||
filesystem, so unescaped LLM output is a Local-File-Read primitive — see
|
|
||||||
issue #57 (audit findings #2 and #6). The ``_e`` helper is the single
|
|
||||||
funnel through which all LLM strings must pass on their way into the HTML.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import logging
|
|
||||||
import subprocess
|
|
||||||
from html import escape as _e
|
|
||||||
from pathlib import Path
|
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
from .models import Assessment, MATRIX_LABELS, EMPFEHLUNG_CONFIG
|
|
||||||
from .bundeslaender import BUNDESLAENDER
|
|
||||||
|
|
||||||
# ECOnGOOD Colors
|
|
||||||
COLORS = {
|
|
||||||
"darkgray": "#5a5a5a",
|
|
||||||
"green": "#889e33",
|
|
||||||
"blue": "#009da5",
|
|
||||||
"lightgray": "#bfbfbf",
|
|
||||||
"orange": "#F7941D",
|
|
||||||
"red": "#d00000",
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def get_score_color(score: float) -> str:
|
|
||||||
"""Get color for a score value."""
|
|
||||||
if score >= 7:
|
|
||||||
return COLORS["blue"]
|
|
||||||
if score >= 4:
|
|
||||||
return COLORS["green"]
|
|
||||||
if score >= 2:
|
|
||||||
return "#FFC20E"
|
|
||||||
if score >= 1:
|
|
||||||
return COLORS["orange"]
|
|
||||||
return COLORS["red"]
|
|
||||||
|
|
||||||
|
|
||||||
def get_rating_symbol(rating: int) -> str:
|
|
||||||
"""Convert numeric rating to symbol."""
|
|
||||||
if rating >= 2:
|
|
||||||
return "++"
|
|
||||||
if rating == 1:
|
|
||||||
return "+"
|
|
||||||
if rating == 0:
|
|
||||||
return "○"
|
|
||||||
if rating == -1:
|
|
||||||
return "−"
|
|
||||||
return "−−"
|
|
||||||
|
|
||||||
|
|
||||||
def format_redline_html(text: str) -> str:
|
|
||||||
"""Convert redline markup (``**ins**`` / ``~~del~~``) to HTML.
|
|
||||||
|
|
||||||
Escapes the input first so that any HTML in the LLM output (e.g.
|
|
||||||
``<img src="file:///etc/passwd">``) becomes inert text. The marker
|
|
||||||
regexes still fire because ``**`` and ``~~`` are not HTML special
|
|
||||||
chars and survive escaping unchanged. The inserted ``<span>`` tags
|
|
||||||
are the only raw HTML in the result and are produced by us.
|
|
||||||
"""
|
|
||||||
import re
|
|
||||||
text = _e(text or "")
|
|
||||||
# **text** → green bold (inserted)
|
|
||||||
text = re.sub(r'\*\*([^*]+)\*\*', r'<span class="inserted">\1</span>', text)
|
|
||||||
# ~~text~~ → red strikethrough (deleted)
|
|
||||||
text = re.sub(r'~~([^~]+)~~', r'<span class="deleted">\1</span>', text)
|
|
||||||
return text
|
|
||||||
|
|
||||||
|
|
||||||
def build_matrix_html(assessment: Assessment) -> str:
|
|
||||||
"""Build HTML matrix table."""
|
|
||||||
rating_map = {e.field: e for e in assessment.gwoe_matrix}
|
|
||||||
|
|
||||||
rows = ["A", "B", "C", "D", "E"]
|
|
||||||
row_labels = {
|
|
||||||
"A": "Lieferant:innen",
|
|
||||||
"B": "Finanzen",
|
|
||||||
"C": "Führung/Verwaltung",
|
|
||||||
"D": "Bürger:innen",
|
|
||||||
"E": "Gesellschaft/Natur",
|
|
||||||
}
|
|
||||||
|
|
||||||
html = ['<table class="matrix-table">']
|
|
||||||
html.append('<thead><tr>')
|
|
||||||
html.append('<th></th>')
|
|
||||||
for col in range(1, 6):
|
|
||||||
html.append(f'<th>{col}</th>')
|
|
||||||
html.append('</tr></thead>')
|
|
||||||
html.append('<tbody>')
|
|
||||||
|
|
||||||
for row in rows:
|
|
||||||
html.append(f'<tr><th>{row}: {row_labels[row]}</th>')
|
|
||||||
for col in range(1, 6):
|
|
||||||
field = f"{row}{col}"
|
|
||||||
entry = rating_map.get(field)
|
|
||||||
if entry:
|
|
||||||
symbol = get_rating_symbol(entry.rating)
|
|
||||||
css_class = "positive" if entry.rating > 0 else ("negative" if entry.rating < 0 else "neutral")
|
|
||||||
# entry.aspect comes from the LLM and is interpolated into a
|
|
||||||
# title="..." attribute — escape it so a stray double-quote
|
|
||||||
# cannot break out and inject attributes/handlers.
|
|
||||||
html.append(f'<td class="{css_class}" title="{_e(entry.aspect)}">{symbol}</td>')
|
|
||||||
else:
|
|
||||||
html.append('<td></td>')
|
|
||||||
html.append('</tr>')
|
|
||||||
|
|
||||||
html.append('</tbody></table>')
|
|
||||||
return '\n'.join(html)
|
|
||||||
|
|
||||||
|
|
||||||
async def generate_html_report(
|
|
||||||
assessment: Assessment,
|
|
||||||
output_path: Path,
|
|
||||||
bundesland: Optional[str] = None,
|
|
||||||
) -> None:
|
|
||||||
"""Generate HTML report.
|
|
||||||
|
|
||||||
``bundesland`` is the optional state code (e.g. ``"NRW"``, ``"LSA"``).
|
|
||||||
When set and known in ``BUNDESLAENDER``, the resulting report carries
|
|
||||||
the parlament name in its header so the source parliament is always
|
|
||||||
visible — important since assessments from multiple bundesländer share
|
|
||||||
the same Drucksachen-ID space.
|
|
||||||
"""
|
|
||||||
|
|
||||||
empf_config = EMPFEHLUNG_CONFIG.get(assessment.empfehlung.value, {})
|
|
||||||
|
|
||||||
parlament_name = ""
|
|
||||||
if bundesland and bundesland in BUNDESLAENDER:
|
|
||||||
parlament_name = BUNDESLAENDER[bundesland].parlament_name
|
|
||||||
|
|
||||||
html = f"""<!DOCTYPE html>
|
|
||||||
<html lang="de">
|
|
||||||
<head>
|
|
||||||
<meta charset="UTF-8">
|
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
||||||
<title>GWÖ-Antragsprüfung: {_e(assessment.title or "")}</title>
|
|
||||||
<style>
|
|
||||||
:root {{
|
|
||||||
--color-darkgray: {COLORS['darkgray']};
|
|
||||||
--color-green: {COLORS['green']};
|
|
||||||
--color-blue: {COLORS['blue']};
|
|
||||||
--color-lightgray: {COLORS['lightgray']};
|
|
||||||
--color-orange: {COLORS['orange']};
|
|
||||||
--color-red: {COLORS['red']};
|
|
||||||
}}
|
|
||||||
|
|
||||||
body {{
|
|
||||||
font-family: 'Avenir', Arial, sans-serif;
|
|
||||||
max-width: 800px;
|
|
||||||
margin: 0 auto;
|
|
||||||
padding: 1.5rem 2rem;
|
|
||||||
color: var(--color-darkgray);
|
|
||||||
line-height: 1.5;
|
|
||||||
font-size: 10pt;
|
|
||||||
}}
|
|
||||||
|
|
||||||
.header {{
|
|
||||||
text-align: center;
|
|
||||||
border-bottom: 2px solid var(--color-blue);
|
|
||||||
padding-bottom: 0.75rem;
|
|
||||||
margin-bottom: 1.25rem;
|
|
||||||
}}
|
|
||||||
|
|
||||||
.header img {{
|
|
||||||
max-width: 150px;
|
|
||||||
}}
|
|
||||||
|
|
||||||
.header-label {{
|
|
||||||
font-size: 8pt;
|
|
||||||
letter-spacing: 0.5px;
|
|
||||||
color: var(--color-blue);
|
|
||||||
margin-bottom: 0.5rem;
|
|
||||||
}}
|
|
||||||
|
|
||||||
.header-parlament {{
|
|
||||||
font-size: 9pt;
|
|
||||||
color: var(--color-blue);
|
|
||||||
font-weight: bold;
|
|
||||||
margin-top: 0.4rem;
|
|
||||||
letter-spacing: 0.3px;
|
|
||||||
}}
|
|
||||||
|
|
||||||
h1 {{
|
|
||||||
color: var(--color-darkgray);
|
|
||||||
font-size: 14pt;
|
|
||||||
margin: 0.75rem 0;
|
|
||||||
line-height: 1.3;
|
|
||||||
}}
|
|
||||||
|
|
||||||
h2 {{
|
|
||||||
color: var(--color-blue);
|
|
||||||
font-size: 11pt;
|
|
||||||
border-bottom: 1px solid var(--color-lightgray);
|
|
||||||
padding-bottom: 0.3rem;
|
|
||||||
margin-top: 1.25rem;
|
|
||||||
margin-bottom: 0.5rem;
|
|
||||||
}}
|
|
||||||
|
|
||||||
h3 {{
|
|
||||||
color: var(--color-green);
|
|
||||||
font-size: 10pt;
|
|
||||||
margin-top: 0.75rem;
|
|
||||||
margin-bottom: 0.3rem;
|
|
||||||
}}
|
|
||||||
|
|
||||||
.meta-box {{
|
|
||||||
background: #f5f5f5;
|
|
||||||
padding: 0.6rem 0.8rem;
|
|
||||||
border-radius: 3px;
|
|
||||||
margin-bottom: 0.75rem;
|
|
||||||
font-size: 9pt;
|
|
||||||
}}
|
|
||||||
|
|
||||||
.empfehlung-box {{
|
|
||||||
background: {empf_config.get('color', COLORS['blue'])}15;
|
|
||||||
border: 1px solid {empf_config.get('color', COLORS['blue'])};
|
|
||||||
padding: 0.5rem 0.75rem;
|
|
||||||
text-align: center;
|
|
||||||
border-radius: 3px;
|
|
||||||
margin: 0.75rem 0;
|
|
||||||
}}
|
|
||||||
|
|
||||||
.empfehlung-box .symbol {{
|
|
||||||
font-size: 12pt;
|
|
||||||
color: {empf_config.get('color', COLORS['blue'])};
|
|
||||||
font-weight: bold;
|
|
||||||
display: inline;
|
|
||||||
margin-right: 0.5rem;
|
|
||||||
}}
|
|
||||||
|
|
||||||
.empfehlung-box .text {{
|
|
||||||
font-size: 10pt;
|
|
||||||
display: inline;
|
|
||||||
}}
|
|
||||||
|
|
||||||
.score-bar {{
|
|
||||||
background: var(--color-lightgray);
|
|
||||||
height: 12px;
|
|
||||||
border-radius: 6px;
|
|
||||||
overflow: hidden;
|
|
||||||
margin: 0.3rem 0;
|
|
||||||
}}
|
|
||||||
|
|
||||||
.score-bar-fill {{
|
|
||||||
height: 100%;
|
|
||||||
}}
|
|
||||||
|
|
||||||
.matrix-table {{
|
|
||||||
width: 100%;
|
|
||||||
border-collapse: collapse;
|
|
||||||
margin: 0.5rem 0;
|
|
||||||
font-size: 8pt;
|
|
||||||
}}
|
|
||||||
|
|
||||||
.matrix-table th, .matrix-table td {{
|
|
||||||
border: 1px solid var(--color-lightgray);
|
|
||||||
padding: 0.25rem 0.4rem;
|
|
||||||
text-align: center;
|
|
||||||
}}
|
|
||||||
|
|
||||||
.matrix-table thead th {{
|
|
||||||
background: var(--color-blue);
|
|
||||||
color: white;
|
|
||||||
font-size: 8pt;
|
|
||||||
font-weight: normal;
|
|
||||||
}}
|
|
||||||
|
|
||||||
.matrix-table tbody th {{
|
|
||||||
background: #f5f5f5;
|
|
||||||
text-align: left;
|
|
||||||
font-weight: normal;
|
|
||||||
font-size: 8pt;
|
|
||||||
}}
|
|
||||||
|
|
||||||
.matrix-table .positive {{
|
|
||||||
background: var(--color-green);
|
|
||||||
color: white;
|
|
||||||
font-weight: bold;
|
|
||||||
}}
|
|
||||||
|
|
||||||
.matrix-table .negative {{
|
|
||||||
background: var(--color-red);
|
|
||||||
color: white;
|
|
||||||
font-weight: bold;
|
|
||||||
}}
|
|
||||||
|
|
||||||
.matrix-table .neutral {{
|
|
||||||
background: #f0f0f0;
|
|
||||||
}}
|
|
||||||
|
|
||||||
.verbesserung {{
|
|
||||||
margin: 0.5rem 0;
|
|
||||||
padding: 0.5rem;
|
|
||||||
border: 1px solid var(--color-lightgray);
|
|
||||||
border-radius: 3px;
|
|
||||||
font-size: 9pt;
|
|
||||||
}}
|
|
||||||
|
|
||||||
.verbesserung .original {{
|
|
||||||
background: #f9f9f9;
|
|
||||||
padding: 0.4rem;
|
|
||||||
margin-bottom: 0.3rem;
|
|
||||||
}}
|
|
||||||
|
|
||||||
.verbesserung .vorschlag {{
|
|
||||||
background: rgba(136, 158, 51, 0.1);
|
|
||||||
border-left: 2px solid var(--color-green);
|
|
||||||
padding: 0.4rem;
|
|
||||||
}}
|
|
||||||
|
|
||||||
.inserted {{
|
|
||||||
color: var(--color-green);
|
|
||||||
font-weight: bold;
|
|
||||||
}}
|
|
||||||
|
|
||||||
.deleted {{
|
|
||||||
color: var(--color-red);
|
|
||||||
text-decoration: line-through;
|
|
||||||
}}
|
|
||||||
|
|
||||||
.two-columns {{
|
|
||||||
display: grid;
|
|
||||||
grid-template-columns: 1fr 1fr;
|
|
||||||
gap: 0.75rem;
|
|
||||||
}}
|
|
||||||
|
|
||||||
.staerken {{
|
|
||||||
border-left: 2px solid var(--color-green);
|
|
||||||
padding-left: 0.5rem;
|
|
||||||
}}
|
|
||||||
|
|
||||||
.schwaechen {{
|
|
||||||
border-left: 2px solid var(--color-orange);
|
|
||||||
padding-left: 0.5rem;
|
|
||||||
}}
|
|
||||||
|
|
||||||
ul {{
|
|
||||||
margin: 0.3rem 0;
|
|
||||||
padding-left: 1.2rem;
|
|
||||||
}}
|
|
||||||
|
|
||||||
li {{
|
|
||||||
margin-bottom: 0.2rem;
|
|
||||||
}}
|
|
||||||
|
|
||||||
p {{
|
|
||||||
margin: 0.4rem 0;
|
|
||||||
}}
|
|
||||||
|
|
||||||
.footer {{
|
|
||||||
margin-top: 1.5rem;
|
|
||||||
padding-top: 0.5rem;
|
|
||||||
border-top: 1px solid var(--color-lightgray);
|
|
||||||
text-align: center;
|
|
||||||
color: var(--color-lightgray);
|
|
||||||
font-size: 7pt;
|
|
||||||
}}
|
|
||||||
|
|
||||||
@media print {{
|
|
||||||
body {{ max-width: none; }}
|
|
||||||
}}
|
|
||||||
</style>
|
|
||||||
</head>
|
|
||||||
<body>
|
|
||||||
<div class="header">
|
|
||||||
<div class="header-label">GEMEINWOHL-ÖKONOMIE | ANTRAGSBEWERTUNG</div>
|
|
||||||
<h1>{_e(assessment.title or "")}</h1>
|
|
||||||
{f'<div class="header-parlament">{_e(parlament_name)}</div>' if parlament_name else ''}
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<div class="meta-box">
|
|
||||||
<strong>Drucksache:</strong> {_e(assessment.drucksache or "")} |
|
|
||||||
<strong>Datum:</strong> {_e(assessment.datum or "")} |
|
|
||||||
<strong>Fraktion(en):</strong> {_e(', '.join(assessment.fraktionen))} |
|
|
||||||
<strong>GWÖ-Score:</strong> <span style="color: {get_score_color(assessment.gwoe_score)}; font-weight: bold;">{assessment.gwoe_score}/10</span>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<div class="empfehlung-box">
|
|
||||||
<span class="symbol">{_e(empf_config.get('symbol', '[?]'))}</span>
|
|
||||||
<span class="text"><strong>Empfehlung:</strong> {_e(assessment.empfehlung.value)}</span>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<h2>Der Antrag im Überblick</h2>
|
|
||||||
<p>{_e(assessment.antrag_zusammenfassung or 'Keine Zusammenfassung verfügbar.')}</p>
|
|
||||||
|
|
||||||
{('<ul>' + ''.join(f'<li>{_e(k)}</li>' for k in assessment.antrag_kernpunkte) + '</ul>') if assessment.antrag_kernpunkte else ''}
|
|
||||||
|
|
||||||
<h2>GWÖ-Treue</h2>
|
|
||||||
<p style="font-size: 9pt;"><strong>Score:</strong> <span style="color: {get_score_color(assessment.gwoe_score)};">{assessment.gwoe_score}/10</span></p>
|
|
||||||
|
|
||||||
<div class="score-bar">
|
|
||||||
<div class="score-bar-fill" style="width: {assessment.gwoe_score * 10}%; background: {get_score_color(assessment.gwoe_score)};"></div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<p><strong>Begründung:</strong> {_e(assessment.gwoe_begruendung or "")}</p>
|
|
||||||
<p><strong>Schwerpunkte:</strong> {_e(', '.join(assessment.gwoe_schwerpunkt))}</p>
|
|
||||||
|
|
||||||
<h2>Matrix-Zuordnung (Matrix 2.0 für Gemeinden)</h2>
|
|
||||||
|
|
||||||
{build_matrix_html(assessment)}
|
|
||||||
|
|
||||||
<p style="font-size: 7pt; color: #999;">
|
|
||||||
<strong>Legende:</strong> ++ stark fördernd, + fördernd, ○ neutral, − widersprechend, −− stark widersprechend
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<h3>Berührte Themenfelder</h3>
|
|
||||||
<ul>
|
|
||||||
{''.join(f'<li><strong>{_e(e.field)}:</strong> {_e(e.aspect)} [{get_rating_symbol(e.rating)}]</li>' for e in assessment.gwoe_matrix)}
|
|
||||||
</ul>
|
|
||||||
|
|
||||||
<h2>Programmtreue</h2>
|
|
||||||
|
|
||||||
{''.join(f'''
|
|
||||||
<h3>{_e(s.fraktion)} {' (Antragsteller)' if s.ist_antragsteller else ''}{' (Regierung)' if s.ist_regierung else ''}</h3>
|
|
||||||
<p><strong>Wahlprogramm:</strong> {s.wahlprogramm.score}/10 — {_e(s.wahlprogramm.begruendung or "")}</p>
|
|
||||||
<p><strong>Parteiprogramm:</strong> {s.parteiprogramm.score}/10 — {_e(s.parteiprogramm.begruendung or "")}</p>
|
|
||||||
''' for s in assessment.wahlprogramm_scores)}
|
|
||||||
|
|
||||||
<h2>Verbesserungsvorschläge</h2>
|
|
||||||
|
|
||||||
{''.join(f'''
|
|
||||||
<div class="verbesserung">
|
|
||||||
<div class="original"><strong>Original:</strong><br>{_e(v.original or "")}</div>
|
|
||||||
<div class="vorschlag"><strong>Vorschlag:</strong><br>{format_redline_html(v.vorschlag)}</div>
|
|
||||||
<div style="font-style: italic; margin-top: 0.5rem;">{_e(v.begruendung or "")}</div>
|
|
||||||
</div>
|
|
||||||
''' for v in assessment.verbesserungen) or '<p>Keine Verbesserungsvorschläge.</p>'}
|
|
||||||
|
|
||||||
<h2>Zusammenfassung</h2>
|
|
||||||
|
|
||||||
<div class="two-columns">
|
|
||||||
<div class="staerken">
|
|
||||||
<h3 style="color: var(--color-green);">Stärken</h3>
|
|
||||||
<ul>
|
|
||||||
{''.join(f'<li>{_e(s)}</li>' for s in assessment.staerken) or '<li>(keine)</li>'}
|
|
||||||
</ul>
|
|
||||||
</div>
|
|
||||||
<div class="schwaechen">
|
|
||||||
<h3 style="color: var(--color-orange);">Schwächen</h3>
|
|
||||||
<ul>
|
|
||||||
{''.join(f'<li>{_e(s)}</li>' for s in assessment.schwaechen) or '<li>(keine)</li>'}
|
|
||||||
</ul>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<div class="footer">
|
|
||||||
<p>Erstellt mit GWÖ-Antragsprüfer v4.1 | Matrix 2.0 für Gemeinden</p>
|
|
||||||
<p style="color: var(--color-blue);">germany.econgood.org</p>
|
|
||||||
</div>
|
|
||||||
</body>
|
|
||||||
</html>"""
|
|
||||||
|
|
||||||
output_path.write_text(html)
|
|
||||||
|
|
||||||
|
|
||||||
async def generate_pdf_report(
|
|
||||||
assessment: Assessment,
|
|
||||||
output_path: Path,
|
|
||||||
bundesland: Optional[str] = None,
|
|
||||||
) -> None:
|
|
||||||
"""Generate PDF report using WeasyPrint, then append the original Antrag.
|
|
||||||
|
|
||||||
Two-step pipeline:
|
|
||||||
|
|
||||||
1. Render the GWÖ-Report HTML and convert to PDF via WeasyPrint
|
|
||||||
(existing behaviour).
|
|
||||||
2. If ``assessment.link`` is a fetchable PDF URL, download it via
|
|
||||||
``httpx`` and append it after a separator page so the resulting
|
|
||||||
single file contains both the analysis and its source document
|
|
||||||
(issue #9).
|
|
||||||
|
|
||||||
The append step is best-effort: a missing/empty link is silently
|
|
||||||
skipped, network errors and parse errors fall back to a single
|
|
||||||
placeholder page so the report itself is always delivered.
|
|
||||||
|
|
||||||
``bundesland`` is forwarded to ``generate_html_report`` so the source
|
|
||||||
parlament name appears in the report header.
|
|
||||||
"""
|
|
||||||
# Step 1 — render the report itself
|
|
||||||
html_path = output_path.with_suffix('.tmp.html')
|
|
||||||
await generate_html_report(assessment, html_path, bundesland=bundesland)
|
|
||||||
|
|
||||||
try:
|
|
||||||
from weasyprint import HTML
|
|
||||||
HTML(filename=str(html_path)).write_pdf(str(output_path))
|
|
||||||
finally:
|
|
||||||
html_path.unlink(missing_ok=True)
|
|
||||||
|
|
||||||
# Step 2 — append the original Antrag (best-effort)
|
|
||||||
await _append_original_antrag(assessment, output_path)
|
|
||||||
|
|
||||||
|
|
||||||
async def _append_original_antrag(
|
|
||||||
assessment: Assessment,
|
|
||||||
report_path: Path,
|
|
||||||
) -> None:
|
|
||||||
"""Try to download the original Antrag PDF and append it to ``report_path``.
|
|
||||||
|
|
||||||
Failure modes (download error, non-PDF content, parse error) are
|
|
||||||
handled gracefully: a single placeholder page is appended noting the
|
|
||||||
issue, so the user always sees that an attempt was made.
|
|
||||||
"""
|
|
||||||
import fitz # PyMuPDF
|
|
||||||
import httpx
|
|
||||||
|
|
||||||
link = (assessment.link or "").strip()
|
|
||||||
if not link or not link.startswith(("http://", "https://")):
|
|
||||||
# Manual upload / pasted text — nothing to append.
|
|
||||||
return
|
|
||||||
|
|
||||||
download_error: Optional[str] = None
|
|
||||||
pdf_bytes: Optional[bytes] = None
|
|
||||||
try:
|
|
||||||
async with httpx.AsyncClient(
|
|
||||||
timeout=30,
|
|
||||||
follow_redirects=True,
|
|
||||||
headers={"User-Agent": "Mozilla/5.0 GWOE-Antragspruefer"},
|
|
||||||
) as client:
|
|
||||||
resp = await client.get(link)
|
|
||||||
if resp.status_code != 200:
|
|
||||||
download_error = f"HTTP {resp.status_code}"
|
|
||||||
elif not resp.content[:5].startswith(b"%PDF-"):
|
|
||||||
download_error = f"kein PDF (Content-Type: {resp.headers.get('content-type', 'unknown')})"
|
|
||||||
else:
|
|
||||||
pdf_bytes = resp.content
|
|
||||||
except Exception as e:
|
|
||||||
download_error = f"Download-Fehler: {e}"
|
|
||||||
|
|
||||||
try:
|
|
||||||
report_doc = fitz.open(report_path)
|
|
||||||
try:
|
|
||||||
# Always insert a divider page so the user sees what comes next
|
|
||||||
_insert_divider_page(report_doc, assessment, download_error)
|
|
||||||
|
|
||||||
if pdf_bytes is not None:
|
|
||||||
try:
|
|
||||||
src_doc = fitz.open(stream=pdf_bytes, filetype="pdf")
|
|
||||||
try:
|
|
||||||
report_doc.insert_pdf(src_doc)
|
|
||||||
finally:
|
|
||||||
src_doc.close()
|
|
||||||
except Exception as e:
|
|
||||||
logger.exception("_append_original_antrag: PDF-Parse-Fehler für %s", assessment.drucksache)
|
|
||||||
|
|
||||||
# PyMuPDF refuses to overwrite the source file in non-incremental
|
|
||||||
# mode — write to a sibling temp file and atomically replace.
|
|
||||||
tmp_path = report_path.with_suffix(report_path.suffix + ".tmp")
|
|
||||||
report_doc.save(
|
|
||||||
str(tmp_path),
|
|
||||||
deflate=True,
|
|
||||||
garbage=3,
|
|
||||||
)
|
|
||||||
finally:
|
|
||||||
report_doc.close()
|
|
||||||
tmp_path.replace(report_path)
|
|
||||||
except Exception as e:
|
|
||||||
# Hard failure — leave the original report file untouched.
|
|
||||||
logger.exception("_append_original_antrag: Konnte Report nicht erweitern für %s", assessment.drucksache)
|
|
||||||
|
|
||||||
|
|
||||||
def _insert_divider_page(
|
|
||||||
report_doc, # fitz.Document
|
|
||||||
assessment: Assessment,
|
|
||||||
download_error: Optional[str],
|
|
||||||
) -> None:
|
|
||||||
"""Append a single A4 separator page that introduces the original Antrag.
|
|
||||||
|
|
||||||
Uses PyMuPDF's text drawing API directly so we don't need a second
|
|
||||||
WeasyPrint round-trip just for one page.
|
|
||||||
"""
|
|
||||||
page = report_doc.new_page(width=595, height=842) # A4
|
|
||||||
margin_left = 60
|
|
||||||
y = 200
|
|
||||||
|
|
||||||
# Title
|
|
||||||
page.insert_text(
|
|
||||||
(margin_left, y),
|
|
||||||
"Original-Antrag",
|
|
||||||
fontsize=24,
|
|
||||||
fontname="helv",
|
|
||||||
color=(0 / 255, 157 / 255, 165 / 255), # var(--color-blue)
|
|
||||||
)
|
|
||||||
y += 38
|
|
||||||
|
|
||||||
# Drucksache
|
|
||||||
page.insert_text(
|
|
||||||
(margin_left, y),
|
|
||||||
f"Drucksache {assessment.drucksache}",
|
|
||||||
fontsize=14,
|
|
||||||
fontname="helv",
|
|
||||||
color=(0.35, 0.35, 0.35),
|
|
||||||
)
|
|
||||||
y += 22
|
|
||||||
|
|
||||||
# Title (truncated to ~75 chars to fit one line)
|
|
||||||
title = assessment.title or ""
|
|
||||||
if len(title) > 75:
|
|
||||||
title = title[:72] + "…"
|
|
||||||
page.insert_text(
|
|
||||||
(margin_left, y),
|
|
||||||
title,
|
|
||||||
fontsize=11,
|
|
||||||
fontname="helv",
|
|
||||||
color=(0.35, 0.35, 0.35),
|
|
||||||
)
|
|
||||||
y += 40
|
|
||||||
|
|
||||||
if download_error:
|
|
||||||
page.insert_text(
|
|
||||||
(margin_left, y),
|
|
||||||
"⚠ Original-PDF konnte nicht angehängt werden.",
|
|
||||||
fontsize=11,
|
|
||||||
fontname="helv",
|
|
||||||
color=(0.82, 0.0, 0.0),
|
|
||||||
)
|
|
||||||
y += 18
|
|
||||||
page.insert_text(
|
|
||||||
(margin_left, y),
|
|
||||||
f"Grund: {download_error}",
|
|
||||||
fontsize=10,
|
|
||||||
fontname="helv",
|
|
||||||
color=(0.5, 0.5, 0.5),
|
|
||||||
)
|
|
||||||
y += 18
|
|
||||||
if assessment.link:
|
|
||||||
page.insert_text(
|
|
||||||
(margin_left, y),
|
|
||||||
f"Quelle: {assessment.link[:90]}",
|
|
||||||
fontsize=9,
|
|
||||||
fontname="helv",
|
|
||||||
color=(0.5, 0.5, 0.5),
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
page.insert_text(
|
|
||||||
(margin_left, y),
|
|
||||||
"Die folgenden Seiten enthalten den unveränderten Originalantrag.",
|
|
||||||
fontsize=11,
|
|
||||||
fontname="helv",
|
|
||||||
color=(0.35, 0.35, 0.35),
|
|
||||||
)
|
|
||||||
@ -1,44 +0,0 @@
|
|||||||
"""Repository-Pattern für Persistenz-Zugriff (ADR 0008).
|
|
||||||
|
|
||||||
Die Repositories kapseln direkte ``database.py``-Aufrufe hinter Protocols,
|
|
||||||
sodass Tests `InMemory*Repository` verwenden können und Callsites nicht
|
|
||||||
mehr jedes Schema-Detail kennen müssen.
|
|
||||||
|
|
||||||
Die konkreten `Sqlite*Repository`-Implementierungen delegieren heute noch
|
|
||||||
an die bestehenden Funktionen in ``database.py`` — kein Big-Bang-Rewrite.
|
|
||||||
Schritt für Schritt wandern die direkten DB-Aufrufe in die Repositories.
|
|
||||||
"""
|
|
||||||
|
|
||||||
from .antrag_repository import (
|
|
||||||
AntragRepository,
|
|
||||||
SqliteAntragRepository,
|
|
||||||
InMemoryAntragRepository,
|
|
||||||
get_antrag_repository,
|
|
||||||
)
|
|
||||||
from .bewertung_repository import (
|
|
||||||
BewertungRepository,
|
|
||||||
SqliteBewertungRepository,
|
|
||||||
InMemoryBewertungRepository,
|
|
||||||
get_bewertung_repository,
|
|
||||||
)
|
|
||||||
from .abonnement_repository import (
|
|
||||||
AbonnementRepository,
|
|
||||||
SqliteAbonnementRepository,
|
|
||||||
InMemoryAbonnementRepository,
|
|
||||||
get_abonnement_repository,
|
|
||||||
)
|
|
||||||
|
|
||||||
__all__ = [
|
|
||||||
"AntragRepository",
|
|
||||||
"SqliteAntragRepository",
|
|
||||||
"InMemoryAntragRepository",
|
|
||||||
"get_antrag_repository",
|
|
||||||
"BewertungRepository",
|
|
||||||
"SqliteBewertungRepository",
|
|
||||||
"InMemoryBewertungRepository",
|
|
||||||
"get_bewertung_repository",
|
|
||||||
"AbonnementRepository",
|
|
||||||
"SqliteAbonnementRepository",
|
|
||||||
"InMemoryAbonnementRepository",
|
|
||||||
"get_abonnement_repository",
|
|
||||||
]
|
|
||||||
@ -1,138 +0,0 @@
|
|||||||
"""AbonnementRepository — Port für E-Mail-Digest-Abos (#124).
|
|
||||||
|
|
||||||
Kapselt die `email_subscriptions`-Tabelle. Der Name „Abonnement" ist die
|
|
||||||
Ubiquitous-Language-Form (Kapitel 4 der DDD-Bewertung); intern heißt die
|
|
||||||
Tabelle weiter `email_subscriptions`.
|
|
||||||
"""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
from typing import Optional, Protocol, runtime_checkable
|
|
||||||
|
|
||||||
from .. import database
|
|
||||||
|
|
||||||
|
|
||||||
@runtime_checkable
|
|
||||||
class AbonnementRepository(Protocol):
|
|
||||||
async def create(
|
|
||||||
self,
|
|
||||||
user_id: str,
|
|
||||||
email: str,
|
|
||||||
bundesland: Optional[str] = None,
|
|
||||||
partei: Optional[str] = None,
|
|
||||||
frequency: str = "daily",
|
|
||||||
) -> int: ...
|
|
||||||
|
|
||||||
async def list_by_user(self, user_id: str) -> list[dict]: ...
|
|
||||||
|
|
||||||
async def list_all(self) -> list[dict]: ...
|
|
||||||
|
|
||||||
async def list_due(self, frequency: str = "daily") -> list[dict]: ...
|
|
||||||
|
|
||||||
async def delete(self, user_id: str, sub_id: int) -> bool: ...
|
|
||||||
|
|
||||||
async def delete_by_id(self, sub_id: int) -> bool: ...
|
|
||||||
|
|
||||||
async def mark_sent(self, sub_id: int) -> None: ...
|
|
||||||
|
|
||||||
|
|
||||||
class SqliteAbonnementRepository:
|
|
||||||
async def create(
|
|
||||||
self,
|
|
||||||
user_id: str,
|
|
||||||
email: str,
|
|
||||||
bundesland: Optional[str] = None,
|
|
||||||
partei: Optional[str] = None,
|
|
||||||
frequency: str = "daily",
|
|
||||||
) -> int:
|
|
||||||
return await database.create_subscription(
|
|
||||||
user_id, email, bundesland, partei, frequency,
|
|
||||||
)
|
|
||||||
|
|
||||||
async def list_by_user(self, user_id: str) -> list[dict]:
|
|
||||||
return await database.list_subscriptions(user_id)
|
|
||||||
|
|
||||||
async def list_all(self) -> list[dict]:
|
|
||||||
return await database.list_all_subscriptions()
|
|
||||||
|
|
||||||
async def list_due(self, frequency: str = "daily") -> list[dict]:
|
|
||||||
return await database.get_all_subscriptions_due(frequency)
|
|
||||||
|
|
||||||
async def delete(self, user_id: str, sub_id: int) -> bool:
|
|
||||||
return await database.delete_subscription(user_id, sub_id)
|
|
||||||
|
|
||||||
async def delete_by_id(self, sub_id: int) -> bool:
|
|
||||||
return await database.delete_subscription_by_id(sub_id)
|
|
||||||
|
|
||||||
async def mark_sent(self, sub_id: int) -> None:
|
|
||||||
await database.mark_subscription_sent(sub_id)
|
|
||||||
|
|
||||||
|
|
||||||
class InMemoryAbonnementRepository:
|
|
||||||
"""Test-Fake. Ignoriert ``last_sent``-Zeitberechnung — ``list_due`` gibt
|
|
||||||
einfach alle zurück, bei denen ``last_sent`` ``None`` ist. Für
|
|
||||||
Zeit-bezogene Tests explizit ``mark_sent`` nutzen."""
|
|
||||||
|
|
||||||
def __init__(self) -> None:
|
|
||||||
self._subs: list[dict] = []
|
|
||||||
self._next_id = 1
|
|
||||||
|
|
||||||
async def create(
|
|
||||||
self,
|
|
||||||
user_id: str,
|
|
||||||
email: str,
|
|
||||||
bundesland: Optional[str] = None,
|
|
||||||
partei: Optional[str] = None,
|
|
||||||
frequency: str = "daily",
|
|
||||||
) -> int:
|
|
||||||
sid = self._next_id
|
|
||||||
self._next_id += 1
|
|
||||||
self._subs.append({
|
|
||||||
"id": sid,
|
|
||||||
"user_id": user_id,
|
|
||||||
"email": email,
|
|
||||||
"bundesland": bundesland,
|
|
||||||
"partei": partei,
|
|
||||||
"frequency": frequency,
|
|
||||||
"last_sent": None,
|
|
||||||
"created_at": "",
|
|
||||||
})
|
|
||||||
return sid
|
|
||||||
|
|
||||||
async def list_by_user(self, user_id: str) -> list[dict]:
|
|
||||||
return [dict(s) for s in self._subs if s["user_id"] == user_id]
|
|
||||||
|
|
||||||
async def list_all(self) -> list[dict]:
|
|
||||||
return [dict(s) for s in self._subs]
|
|
||||||
|
|
||||||
async def list_due(self, frequency: str = "daily") -> list[dict]:
|
|
||||||
return [
|
|
||||||
dict(s) for s in self._subs
|
|
||||||
if s["frequency"] == frequency and s.get("last_sent") is None
|
|
||||||
]
|
|
||||||
|
|
||||||
async def delete(self, user_id: str, sub_id: int) -> bool:
|
|
||||||
for i, s in enumerate(self._subs):
|
|
||||||
if s["id"] == sub_id and s["user_id"] == user_id:
|
|
||||||
self._subs.pop(i)
|
|
||||||
return True
|
|
||||||
return False
|
|
||||||
|
|
||||||
async def delete_by_id(self, sub_id: int) -> bool:
|
|
||||||
for i, s in enumerate(self._subs):
|
|
||||||
if s["id"] == sub_id:
|
|
||||||
self._subs.pop(i)
|
|
||||||
return True
|
|
||||||
return False
|
|
||||||
|
|
||||||
async def mark_sent(self, sub_id: int) -> None:
|
|
||||||
for s in self._subs:
|
|
||||||
if s["id"] == sub_id:
|
|
||||||
s["last_sent"] = "sent"
|
|
||||||
|
|
||||||
|
|
||||||
_default_abonnement_repo: AbonnementRepository = SqliteAbonnementRepository()
|
|
||||||
|
|
||||||
|
|
||||||
def get_abonnement_repository() -> AbonnementRepository:
|
|
||||||
return _default_abonnement_repo
|
|
||||||
@ -1,135 +0,0 @@
|
|||||||
"""AntragRepository — Persistenz-Port für Assessment-Datensätze (#136, ADR 0008).
|
|
||||||
|
|
||||||
Der Name `AntragRepository` ist bewusst auf die Domäne bezogen: aus Sicht
|
|
||||||
der Anwendung speichern wir eine Bewertung *zu einem Antrag* — die
|
|
||||||
Drucksachen-ID ist der Identifier. Intern zugreifen wir auf die
|
|
||||||
`assessments`-Tabelle.
|
|
||||||
|
|
||||||
Für Bewertungs-Versionen (assessment_versions) siehe `BewertungRepository`.
|
|
||||||
"""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
from typing import Optional, Protocol, runtime_checkable
|
|
||||||
|
|
||||||
from .. import database
|
|
||||||
|
|
||||||
|
|
||||||
@runtime_checkable
|
|
||||||
class AntragRepository(Protocol):
|
|
||||||
"""Port für den Zugriff auf Antrags-Bewertungen.
|
|
||||||
|
|
||||||
Rückgabe-Typ bleibt vorerst ``dict`` (wie heute von ``database.get_assessment``
|
|
||||||
geliefert), um die Umstellung möglichst diff-arm zu halten. Ein
|
|
||||||
Domain-Objekt-Wrapper (Kapitel 3.2 der DDD-Bewertung) kommt als
|
|
||||||
Tag-6-Schritt. Wichtig: callsites sollen *nicht* weiter ``database.*``
|
|
||||||
direkt importieren.
|
|
||||||
"""
|
|
||||||
|
|
||||||
async def save(self, data: dict) -> bool: ...
|
|
||||||
|
|
||||||
async def get(self, drucksache: str) -> Optional[dict]: ...
|
|
||||||
|
|
||||||
async def list(self, bundesland: Optional[str] = None) -> list[dict]: ...
|
|
||||||
|
|
||||||
async def search(
|
|
||||||
self, query: str, bundesland: Optional[str] = None, limit: int = 50,
|
|
||||||
) -> list[dict]: ...
|
|
||||||
|
|
||||||
async def delete(self, drucksache: str) -> bool: ...
|
|
||||||
|
|
||||||
|
|
||||||
class SqliteAntragRepository:
|
|
||||||
"""Produktions-Implementation. Delegiert an ``database.py``.
|
|
||||||
|
|
||||||
Hält bewusst *keinen* Connection-Pool — ``database.py`` öffnet pro
|
|
||||||
Aufruf eine Connection (``aiosqlite.connect``). Bei Performance-
|
|
||||||
Regressionen später zentralisieren.
|
|
||||||
"""
|
|
||||||
|
|
||||||
async def save(self, data: dict) -> bool:
|
|
||||||
return await database.upsert_assessment(data)
|
|
||||||
|
|
||||||
async def get(self, drucksache: str) -> Optional[dict]:
|
|
||||||
return await database.get_assessment(drucksache)
|
|
||||||
|
|
||||||
async def list(self, bundesland: Optional[str] = None) -> list[dict]:
|
|
||||||
return await database.get_all_assessments(bundesland)
|
|
||||||
|
|
||||||
async def search(
|
|
||||||
self, query: str, bundesland: Optional[str] = None, limit: int = 50,
|
|
||||||
) -> list[dict]:
|
|
||||||
return await database.search_assessments(query, bundesland, limit)
|
|
||||||
|
|
||||||
async def delete(self, drucksache: str) -> bool:
|
|
||||||
return await database.delete_assessment(drucksache)
|
|
||||||
|
|
||||||
|
|
||||||
class InMemoryAntragRepository:
|
|
||||||
"""Test-Fake. Keine Datei, kein I/O — in-process Dict.
|
|
||||||
|
|
||||||
Bei mehrfachem ``save`` für dieselbe Drucksache wird überschrieben
|
|
||||||
(wie im produktiven UPSERT). Versionierung simuliert das Fake bewusst
|
|
||||||
nicht — dafür gibt es ``BewertungRepository`` als separaten Port.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, initial: Optional[list[dict]] = None) -> None:
|
|
||||||
self._store: dict[str, dict] = {}
|
|
||||||
for d in initial or []:
|
|
||||||
ds = d.get("drucksache")
|
|
||||||
if ds:
|
|
||||||
self._store[ds] = dict(d)
|
|
||||||
|
|
||||||
async def save(self, data: dict) -> bool:
|
|
||||||
ds = data.get("drucksache")
|
|
||||||
if not ds:
|
|
||||||
raise ValueError("save(): data.drucksache ist Pflicht")
|
|
||||||
self._store[ds] = dict(data)
|
|
||||||
return True
|
|
||||||
|
|
||||||
async def get(self, drucksache: str) -> Optional[dict]:
|
|
||||||
row = self._store.get(drucksache)
|
|
||||||
return dict(row) if row else None
|
|
||||||
|
|
||||||
async def list(self, bundesland: Optional[str] = None) -> list[dict]:
|
|
||||||
rows = list(self._store.values())
|
|
||||||
if bundesland and bundesland != "ALL":
|
|
||||||
rows = [r for r in rows if r.get("bundesland") == bundesland]
|
|
||||||
# Sortierung analog zu database.get_all_assessments: gwoe_score desc
|
|
||||||
rows.sort(key=lambda r: (r.get("gwoe_score") or 0), reverse=True)
|
|
||||||
return [dict(r) for r in rows]
|
|
||||||
|
|
||||||
async def search(
|
|
||||||
self, query: str, bundesland: Optional[str] = None, limit: int = 50,
|
|
||||||
) -> list[dict]:
|
|
||||||
q = (query or "").lower()
|
|
||||||
out: list[dict] = []
|
|
||||||
for r in self._store.values():
|
|
||||||
if bundesland and bundesland != "ALL" and r.get("bundesland") != bundesland:
|
|
||||||
continue
|
|
||||||
hay = " ".join([
|
|
||||||
str(r.get("title") or ""),
|
|
||||||
str(r.get("drucksache") or ""),
|
|
||||||
" ".join(r.get("fraktionen") or []) if isinstance(r.get("fraktionen"), list) else str(r.get("fraktionen") or ""),
|
|
||||||
" ".join(r.get("themen") or []) if isinstance(r.get("themen"), list) else str(r.get("themen") or ""),
|
|
||||||
]).lower()
|
|
||||||
if q in hay:
|
|
||||||
out.append(dict(r))
|
|
||||||
out.sort(key=lambda r: (r.get("gwoe_score") or 0), reverse=True)
|
|
||||||
return out[:limit]
|
|
||||||
|
|
||||||
async def delete(self, drucksache: str) -> bool:
|
|
||||||
return self._store.pop(drucksache, None) is not None
|
|
||||||
|
|
||||||
|
|
||||||
# ─── FastAPI-Dependency ─────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
_default_antrag_repo: AntragRepository = SqliteAntragRepository()
|
|
||||||
|
|
||||||
|
|
||||||
def get_antrag_repository() -> AntragRepository:
|
|
||||||
"""FastAPI-``Depends()``-Provider. In Tests via
|
|
||||||
``app.dependency_overrides[get_antrag_repository] = lambda: InMemoryAntragRepository()``
|
|
||||||
überschreibbar.
|
|
||||||
"""
|
|
||||||
return _default_antrag_repo
|
|
||||||
@ -1,64 +0,0 @@
|
|||||||
"""BewertungRepository — Port für die Versionshistorie einer Bewertung.
|
|
||||||
|
|
||||||
Eine „Bewertung" ist die vollständige Assessment-Instanz; der
|
|
||||||
`BewertungRepository` greift auf die Snapshot-Tabelle
|
|
||||||
``assessment_versions`` zu. Für die aktuellste Bewertung siehe
|
|
||||||
``AntragRepository``.
|
|
||||||
"""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
from typing import Protocol, runtime_checkable
|
|
||||||
|
|
||||||
from .. import database
|
|
||||||
|
|
||||||
|
|
||||||
@runtime_checkable
|
|
||||||
class BewertungRepository(Protocol):
|
|
||||||
async def versions(self, drucksache: str) -> list[dict]: ...
|
|
||||||
|
|
||||||
|
|
||||||
class SqliteBewertungRepository:
|
|
||||||
"""Produktions-Implementation. Delegiert an ``database.py``."""
|
|
||||||
|
|
||||||
async def versions(self, drucksache: str) -> list[dict]:
|
|
||||||
return await database.get_assessment_history(drucksache)
|
|
||||||
|
|
||||||
|
|
||||||
class InMemoryBewertungRepository:
|
|
||||||
"""Test-Fake. Erlaubt per ``add_version`` händisches Bestücken.
|
|
||||||
|
|
||||||
Die produktive Versionierung passiert implizit in ``upsert_assessment``
|
|
||||||
(siehe database.py:580-598). Im Fake trennen wir das bewusst, weil
|
|
||||||
Tests oft explizit Versionshistorie befüllen wollen.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self) -> None:
|
|
||||||
self._versions: dict[str, list[dict]] = {}
|
|
||||||
|
|
||||||
def add_version(
|
|
||||||
self,
|
|
||||||
drucksache: str,
|
|
||||||
version: int,
|
|
||||||
gwoe_score: float,
|
|
||||||
model: str,
|
|
||||||
created_at: str = "",
|
|
||||||
) -> None:
|
|
||||||
self._versions.setdefault(drucksache, []).append({
|
|
||||||
"version": version,
|
|
||||||
"gwoe_score": gwoe_score,
|
|
||||||
"model": model,
|
|
||||||
"created_at": created_at,
|
|
||||||
})
|
|
||||||
|
|
||||||
async def versions(self, drucksache: str) -> list[dict]:
|
|
||||||
rows = list(self._versions.get(drucksache, []))
|
|
||||||
rows.sort(key=lambda r: r["version"], reverse=True)
|
|
||||||
return rows
|
|
||||||
|
|
||||||
|
|
||||||
_default_bewertung_repo: BewertungRepository = SqliteBewertungRepository()
|
|
||||||
|
|
||||||
|
|
||||||
def get_bewertung_repository() -> BewertungRepository:
|
|
||||||
return _default_bewertung_repo
|
|
||||||
20
app/static/chart.umd.min.js
vendored
20
app/static/chart.umd.min.js
vendored
File diff suppressed because one or more lines are too long
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user