feat(#138): SHA-Lock-File schuetzt vor stillem PDF-Tausch
Hintergrund: abgeordnetenwatch hatte das CDU-BE-2023-PDF unter dem alten
Slug-Namen gegen das CDU-BE-2026-Wahlprogramm ersetzt — ohne den
Datei-Namen zu aendern. Die Embedding-Indexierung haette das anachronistische
Programm uebernommen, ohne dass es jemand bemerkt.
Loesung: app/wahlprogramm-shas.lock.json pinnt nach erstem erfolgreichen
Download den SHA-256 jedes Programmes. Spaetere Aufrufe von
fetch_and_verify() vergleichen den Server-Inhalt gegen den Lock; bei
Abweichung wird abgebrochen mit klarer Fehlermeldung. Nur mit explizitem
Maintainer-Override (--accept-new-sha) wird der Lock aktualisiert.
CLI:
python -m app.wahlprogramm_fetch --pin-existing
seedet den Lock einmalig aus den vorhandenen PDFs (52 Eintraege).
python -m app.wahlprogramm_fetch --fetch BL PARTEI [--accept-new-sha]
laedt mit Lock-Pruefung; --accept-new-sha bei bewusstem Update.
6 neue Tests in test_wahlprogramm_fetch.py decken den Pferdetausch-
Block, das initiale Pinnen, das Migration-Szenario (PDF da, Lock leer)
und den --accept-new-sha-Override ab.
Closes #138
This commit is contained in:
parent
d0d941444d
commit
5559f42c92
55
app/wahlprogramm-shas.lock.json
Normal file
55
app/wahlprogramm-shas.lock.json
Normal file
@ -0,0 +1,55 @@
|
||||
{
|
||||
"afd-bb-2024.pdf": "da5cd04cc66128b2f0df35b47775fce850ed2f4145ee15d74ec8bf501ce043f1",
|
||||
"afd-be-2023.pdf": "d2b5997b1bc0d3fb590cc354d8ed1ac879e8de4a74518f4089436a2fa12615f1",
|
||||
"afd-bw-2021.pdf": "a438e09279c6c5766171a213715ed0a9d60248ff86f648227e8bb6ec59a591c7",
|
||||
"afd-hh-2025.pdf": "6aae3ad00cd07824bcd99473e130d1b894e2174a89fcafece51865c51fdcd4c8",
|
||||
"afd-lsa-2021.pdf": "dd2651af2a9423039b1c5a39760be2332025d569a878453f09e0302e252edc23",
|
||||
"afd-mv-2021.pdf": "953c39941a1f997233daaf0cec01bc82b1e86ba895b43e8d34b015cc72799648",
|
||||
"afd-nrw-2022.pdf": "36c4bc55c3239e3f7e69568e19d7f074ce2f1cf018653d493767ec09df637282",
|
||||
"afd-rp-2021.pdf": "3ec39eb08a073244813a51f260e18fe52aab791bea26bf8079546b6e189ec2b3",
|
||||
"afd-th-2024.pdf": "26e61fdc3456e7ce18f7a3d2ea1eada303f93cad0b9698797f83a671574eaf51",
|
||||
"bsw-bb-2024.pdf": "548c9bda01af176586606fae708c9f3b3ba98e1e128f1e2ff39e482289faab42",
|
||||
"bsw-th-2024.pdf": "5ace33912083048a759ee2af9288248447363dafa21f569c5c056df22751ba69",
|
||||
"cdu-bb-2024.pdf": "460b1463483429f9e8b84e4ae6ef9cf878dd228e108411bed3c153169a0001e8",
|
||||
"cdu-be-2023.pdf": "813d0d08ac8ce7381e9a7b9472e0616aaf684b1632c9d4a7f4e940a33455f29a",
|
||||
"cdu-bw-2021.pdf": "a92c104c456ce06d8bad6649071551e0ec0d525a1bc0bc31e9fa6a0566da4db0",
|
||||
"cdu-hh-2025.pdf": "8d29e514b8bce5c2f3f497dc5b97f6f8ab95a7bdbf619abf258e9582d57f2dbd",
|
||||
"cdu-lsa-2021.pdf": "63b6cf42ce97834d5d105fb7b8cc7fb7a2aa96928d4153bd3a5858c196ee0797",
|
||||
"cdu-mv-2021.pdf": "605a2211bef8666c2103771ebffd97a088e7cdb1545401087ef125155e7e4db2",
|
||||
"cdu-nrw-2022.pdf": "49d97a6f30fbacad3a0b770c182ed0527bc5d347dc4cacd65f85e7e4e9644566",
|
||||
"cdu-rp-2021.pdf": "54c50d88bdf5c5f7dee5abcc981ffb4d1cfd5c86fbf2a29f4f2f4a8a3dd4797a",
|
||||
"cdu-sh-2022.pdf": "39b79a22e904b300cf1bbc25752b618195683c90c31e6b10c3bc0e8408aa6a1a",
|
||||
"cdu-th-2024.pdf": "cde8d2222bd8ce04aee24883a38dab8a30f5d60cda115b8bb2f43ceffa08b730",
|
||||
"fdp-bw-2021.pdf": "bdcbb1b2e5748922c8347bd69ea6f81c954fd02cd220d448400f9a5a86ce914b",
|
||||
"fdp-lsa-2021.pdf": "3d4275e36e29c0b191dcc4a29061a1072920f868cc52bee954bf81491ad15224",
|
||||
"fdp-mv-2021.pdf": "8dc341dd017f1d82c51608a26e1fd6c3d8acd1281dc37409e375389999b37b55",
|
||||
"fdp-nrw-2022.pdf": "576b42a26c29ca5d8b7469d417ae709c8d0699aed5195d4ca16dd696dcff8bea",
|
||||
"fdp-rp-2021.pdf": "fba792d8d43842f33ae8f0aa94b0d4e50838908c217402b4c5cb4707f958e1ae",
|
||||
"fdp-sh-2022.pdf": "4c49da411bb3c8e008f4b57dd20dc005104515b56056ff746cf5403529728d09",
|
||||
"fw-rp-2021.pdf": "c7f26d553f24c9d9fcf1c2edb1dbe558edc1ca65af68b289a1541e77f7bbeea8",
|
||||
"gruene-be-2023.pdf": "2b14a319cdcd2ca022399254ea285714f872eddd166f3f537861eeb2dc5ade80",
|
||||
"gruene-bw-2021.pdf": "9af526705cb10b91be0690b26c9c033668a8082eeefca482dc4e7a46f2d671f9",
|
||||
"gruene-hh-2025.pdf": "4428d1cdc16b4e74588f0bd51145ab7371f9e0871a2fc9d25a1f94e4f5aeb662",
|
||||
"gruene-lsa-2021.pdf": "7b5cea92cd600283d7edf18dc0d358c0b7d78d7269589d9ef05de7d5f8b35998",
|
||||
"gruene-mv-2021.pdf": "40f0070743ef9ae7808cab319234b4c83faa53a8a098ba8a82f28023bee4d9f6",
|
||||
"gruene-nrw-2022.pdf": "2d7eaf2f4b73e0b7cdccf8641208b86d306b654ead5706d72c446965f82e5769",
|
||||
"gruene-rp-2021.pdf": "4fd68629d1560c28d61b2b913fd20ce6ad9a76b22823fd8496e51bfaf70dc19c",
|
||||
"gruene-sh-2022.pdf": "62870c948c9e05663125b051d3a6401d63952ea6a64e4140dcece7bd1b1aea52",
|
||||
"linke-be-2023.pdf": "7d6a9166f6a1d87ba26cc1a2818ae2b844ee9df6ed6668673f329dd5186fd956",
|
||||
"linke-hh-2025.pdf": "15e68efe3818758a7cefc0a3e3095a5a5fb191111c00a1202c563cee43ce6e40",
|
||||
"linke-lsa-2021.pdf": "f269c014416b213785badf7bea5928fdb847fc902e09f52ec66a140a37e03d75",
|
||||
"linke-mv-2021.pdf": "160dad56ab4de8f641c21f51cbf3c33953f2f3d91b4de792c4e725f3975fdfbe",
|
||||
"linke-th-2024.pdf": "2d8ca99ef60cbe1b59cf33b1e37320d66a057e5136c2f49aa8cde77e4a19533a",
|
||||
"spd-bb-2024.pdf": "4131f63fbb9d67cd8948ca7a54f1c140b47968c77454a3dabe6bcdc4384f63d3",
|
||||
"spd-be-2023.pdf": "4ee84e969e97894742673f940ec030883216ce852b729507327f8bced637d03b",
|
||||
"spd-bw-2021.pdf": "d888ae92bb62a61aaa4d6ac8dc22c2c98d1a2227b6ba223b6422770672825072",
|
||||
"spd-hh-2025.pdf": "5e8c57969cb3b159b9299c173831f7863ab81bd206c2a87ae232ba96f23156ee",
|
||||
"spd-lsa-2021.pdf": "59140aa1921ab0ee85142d74e1d72b1af7254da3f7870a30460abd605d280333",
|
||||
"spd-mv-2021.pdf": "c8c671c2e60f1a4f8048bd74e379eb8edc69ab2daeb09581fe83f25f6c87d529",
|
||||
"spd-nrw-2022.pdf": "6f1375add74a532cb084dee10c3e5a6215e7d4118ddd26ef0d27bf39765d19a6",
|
||||
"spd-rp-2021.pdf": "13966815b8870b30e3480673437634fb90882bf5410c652694a6579492e32707",
|
||||
"spd-sh-2022.pdf": "3acd3ed6c42a0e0a8f49abd76610b536c7d5fdf13fcc4499e391bc9b1a3d0f0f",
|
||||
"spd-th-2024.pdf": "dbd96a51134c8c13dabe18807fe233e9a43f45c2fefeead2ea500ecc3d63de6b",
|
||||
"ssw-sh-2022.pdf": "3020762a1c33a09bc51f7fa49ede1c2d5dd7574ea74ef262076e59d5e3a9a41b",
|
||||
"test.pdf": "71630b3ce93b3fd91aefa095908c8070d07e0eca8ad3071c60ae7375da2e7e17"
|
||||
}
|
||||
@ -16,6 +16,7 @@ CLI:
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
import urllib.request
|
||||
from pathlib import Path
|
||||
@ -26,9 +27,39 @@ import yaml
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_LINKS_FILE = Path(__file__).parent / "wahlprogramm-links.yaml"
|
||||
_LOCK_FILE = Path(__file__).parent / "wahlprogramm-shas.lock.json"
|
||||
_REFERENZEN_DIR = Path(__file__).parent / "static" / "referenzen"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# SHA-Lock — schuetzt vor stillem PDF-Austausch unter gleicher URL.
|
||||
# Hintergrund: abgeordnetenwatch hat die CDU-BE-2023-Datei intern gegen den
|
||||
# 2026-Berlin-Plan ersetzt, ohne den Slug zu aendern. Nach dem ersten
|
||||
# erfolgreichen Download wird der SHA-256 hier gepinnt; spaetere fetches
|
||||
# vergleichen gegen den Lock und brechen bei Abweichung ab.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _load_lock() -> dict[str, str]:
|
||||
if not _LOCK_FILE.exists():
|
||||
return {}
|
||||
try:
|
||||
return json.loads(_LOCK_FILE.read_text(encoding="utf-8"))
|
||||
except (json.JSONDecodeError, OSError) as exc:
|
||||
logger.error("Lock-File %s ist kaputt: %s — leerer Lock genutzt", _LOCK_FILE, exc)
|
||||
return {}
|
||||
|
||||
|
||||
def _save_lock(lock: dict[str, str]) -> None:
|
||||
_LOCK_FILE.write_text(
|
||||
json.dumps(lock, indent=2, sort_keys=True, ensure_ascii=False) + "\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
|
||||
def _lock_key(dateiname: str) -> str:
|
||||
return dateiname
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# YAML-Quelle laden
|
||||
# ---------------------------------------------------------------------------
|
||||
@ -78,32 +109,45 @@ def fetch_and_verify(
|
||||
url: str,
|
||||
dest_path: Path,
|
||||
expected_sha: Optional[str] = None,
|
||||
*,
|
||||
accept_new_sha: bool = False,
|
||||
) -> dict:
|
||||
"""Lädt eine Datei herunter und prüft optional den SHA-256-Hash.
|
||||
"""Lädt eine Datei herunter und prüft den SHA-256-Hash gegen den Lock.
|
||||
|
||||
SHA-Gate-Logik:
|
||||
- Existiert ``dest_path`` bereits, wird der bisherige Hash gespeichert.
|
||||
- Nach dem Download wird der neue Hash verglichen.
|
||||
- Bei Abweichung wird die temporäre Datei gelöscht und ein Fehler zurückgegeben
|
||||
(niemals stillschweigend überschreiben).
|
||||
SHA-Gate-Logik (Pferdetausch-Schutz):
|
||||
- Beim ersten erfolgreichen Download wird der SHA in
|
||||
``wahlprogramm-shas.lock.json`` gepinnt.
|
||||
- Spätere fetches vergleichen gegen diesen gepinnten SHA. Abweichung →
|
||||
Abbruch, ausser ``accept_new_sha=True`` ist gesetzt (dann wird der Lock
|
||||
explizit aktualisiert).
|
||||
- ``expected_sha`` (z.B. aus YAML) ueberschreibt den Lock fuer diesen Call.
|
||||
|
||||
Args:
|
||||
url: Download-URL der PDF-Datei.
|
||||
dest_path: Ziel-Pfad (typischerweise in app/static/referenzen/).
|
||||
expected_sha: Wenn angegeben, muss der Download-Hash übereinstimmen.
|
||||
expected_sha: Wenn angegeben, muss der Download-Hash übereinstimmen
|
||||
(haerter als der Lock-Vergleich).
|
||||
accept_new_sha: Wenn True, wird der Lock auf den neuen SHA aktualisiert
|
||||
statt bei Abweichung abzubrechen. NICHT default — Maintainer-Override.
|
||||
|
||||
Returns:
|
||||
Dict mit den Schlüsseln:
|
||||
- ``ok`` (bool): True bei Erfolg.
|
||||
- ``sha256`` (str): SHA-256 der heruntergeladenen Datei.
|
||||
- ``prev_sha256`` (str|None): SHA-256 der bisherigen Datei, falls vorhanden.
|
||||
- ``locked_sha256`` (str|None): SHA aus dem Lock-File (vor diesem Call).
|
||||
- ``error`` (str|None): Fehlermeldung bei Misserfolg.
|
||||
- ``changed`` (bool): True, wenn sich die Datei gegenüber der bisherigen Version geändert hat.
|
||||
- ``changed`` (bool): True, wenn sich die Datei geaendert hat.
|
||||
- ``lock_updated`` (bool): True, wenn der Lock-Eintrag neu/ersetzt wurde.
|
||||
"""
|
||||
prev_sha: Optional[str] = None
|
||||
if dest_path.exists():
|
||||
prev_sha = sha256_of_file(dest_path)
|
||||
|
||||
lock = _load_lock()
|
||||
lock_key = _lock_key(dest_path.name)
|
||||
locked_sha = lock.get(lock_key)
|
||||
|
||||
tmp_path = dest_path.with_suffix(".tmp")
|
||||
try:
|
||||
logger.info("Lade %s → %s", url, tmp_path)
|
||||
@ -119,39 +163,71 @@ def fetch_and_verify(
|
||||
|
||||
new_sha = sha256_of_file(tmp_path)
|
||||
|
||||
# SHA-Gate gegen expected_sha
|
||||
# SHA-Gate gegen expected_sha (haerter, aus YAML kuratiert)
|
||||
if expected_sha and new_sha != expected_sha:
|
||||
tmp_path.unlink(missing_ok=True)
|
||||
return {
|
||||
"ok": False,
|
||||
"sha256": new_sha,
|
||||
"prev_sha256": prev_sha,
|
||||
"locked_sha256": locked_sha,
|
||||
"changed": False,
|
||||
"lock_updated": False,
|
||||
"error": (
|
||||
f"SHA-Prüfung fehlgeschlagen: erwartet {expected_sha[:12]}…, "
|
||||
f"erhalten {new_sha[:12]}…"
|
||||
f"SHA-Pruefung gegen erwarteten Hash fehlgeschlagen: "
|
||||
f"erwartet {expected_sha[:12]}…, erhalten {new_sha[:12]}…"
|
||||
),
|
||||
}
|
||||
|
||||
# SHA-Gate gegen bisherige Datei
|
||||
# SHA-Gate gegen Lock-File (Pferdetausch-Schutz)
|
||||
if locked_sha and new_sha != locked_sha and not accept_new_sha:
|
||||
tmp_path.unlink(missing_ok=True)
|
||||
return {
|
||||
"ok": False,
|
||||
"sha256": new_sha,
|
||||
"prev_sha256": prev_sha,
|
||||
"locked_sha256": locked_sha,
|
||||
"changed": False,
|
||||
"lock_updated": False,
|
||||
"error": (
|
||||
f"Lock-Pruefung fehlgeschlagen: gepinnt {locked_sha[:12]}…, "
|
||||
f"jetzt {new_sha[:12]}…. Pferdetausch-Verdacht — Inhalt manuell "
|
||||
f"pruefen, dann mit --accept-new-sha bestaetigen."
|
||||
),
|
||||
}
|
||||
|
||||
# SHA-Gate gegen bisherige Datei (no-op)
|
||||
if prev_sha and new_sha == prev_sha:
|
||||
tmp_path.unlink(missing_ok=True)
|
||||
logger.info("Datei unverändert (SHA %s…), kein Überschreiben.", new_sha[:12])
|
||||
lock_updated = False
|
||||
if locked_sha != new_sha:
|
||||
# Datei war schon korrekt, Lock fehlte — initialer Pin.
|
||||
lock[lock_key] = new_sha
|
||||
_save_lock(lock)
|
||||
lock_updated = True
|
||||
logger.info("Datei unveraendert (SHA %s…), kein Ueberschreiben.", new_sha[:12])
|
||||
return {
|
||||
"ok": True,
|
||||
"sha256": new_sha,
|
||||
"prev_sha256": prev_sha,
|
||||
"locked_sha256": locked_sha,
|
||||
"changed": False,
|
||||
"lock_updated": lock_updated,
|
||||
"error": None,
|
||||
}
|
||||
|
||||
tmp_path.rename(dest_path)
|
||||
# Lock aktualisieren — initialer Pin oder bewusstes Update via accept_new_sha
|
||||
lock[lock_key] = new_sha
|
||||
_save_lock(lock)
|
||||
logger.info("Gespeichert: %s (SHA %s…)", dest_path.name, new_sha[:12])
|
||||
return {
|
||||
"ok": True,
|
||||
"sha256": new_sha,
|
||||
"prev_sha256": prev_sha,
|
||||
"locked_sha256": locked_sha,
|
||||
"changed": True,
|
||||
"lock_updated": True,
|
||||
"error": None,
|
||||
}
|
||||
|
||||
@ -162,7 +238,9 @@ def fetch_and_verify(
|
||||
"ok": False,
|
||||
"sha256": "",
|
||||
"prev_sha256": prev_sha,
|
||||
"locked_sha256": locked_sha,
|
||||
"changed": False,
|
||||
"lock_updated": False,
|
||||
"error": str(exc),
|
||||
}
|
||||
|
||||
@ -225,8 +303,39 @@ def _cli() -> None:
|
||||
parser.add_argument("--url", help="URL überschreiben (statt erster Kandidat aus YAML)")
|
||||
parser.add_argument("--yes", action="store_true",
|
||||
help="Nicht interaktiv bestätigen (gefährlich)")
|
||||
parser.add_argument("--accept-new-sha", action="store_true",
|
||||
help="Bei Lock-Mismatch: neuen SHA in den Lock uebernehmen (Pferdetausch-Override)")
|
||||
parser.add_argument("--pin-existing", action="store_true",
|
||||
help="Alle bereits vorhandenen PDFs in static/referenzen/ in den Lock pinnen "
|
||||
"(einmalig nach Einfuehrung des Lock-Files)")
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.pin_existing:
|
||||
from .wahlprogramme import WAHLPROGRAMME
|
||||
|
||||
lock = _load_lock()
|
||||
added = 0
|
||||
for bl, parteien in WAHLPROGRAMME.items():
|
||||
for partei, info in parteien.items():
|
||||
dateiname = info.get("file") if isinstance(info, dict) else None
|
||||
if not dateiname:
|
||||
continue
|
||||
pdf_path = _REFERENZEN_DIR / dateiname
|
||||
if not pdf_path.exists():
|
||||
continue
|
||||
key = _lock_key(dateiname)
|
||||
if key in lock:
|
||||
continue
|
||||
lock[key] = sha256_of_file(pdf_path)
|
||||
added += 1
|
||||
print(f" pinned {bl}/{partei}: {dateiname} → {lock[key][:12]}…")
|
||||
if added:
|
||||
_save_lock(lock)
|
||||
print(f"\n{added} neue Eintraege in {_LOCK_FILE.name}.")
|
||||
else:
|
||||
print("Keine neuen Eintraege — alle vorhandenen PDFs sind bereits gepinnt.")
|
||||
sys.exit(0)
|
||||
|
||||
if args.check:
|
||||
missing = get_missing_programmes(args.bl)
|
||||
if not missing:
|
||||
@ -272,12 +381,14 @@ def _cli() -> None:
|
||||
print("Abgebrochen.")
|
||||
sys.exit(0)
|
||||
|
||||
result = fetch_and_verify(url, dest)
|
||||
result = fetch_and_verify(url, dest, accept_new_sha=args.accept_new_sha)
|
||||
if result["ok"]:
|
||||
change_note = "geändert" if result["changed"] else "unverändert"
|
||||
change_note = "geaendert" if result["changed"] else "unveraendert"
|
||||
print(f"OK ({change_note}) — SHA-256: {result['sha256'][:16]}…")
|
||||
if result["lock_updated"]:
|
||||
print(f"Lock aktualisiert in {_LOCK_FILE.name}.")
|
||||
if result["changed"]:
|
||||
print("Hinweis: Embeddings müssen neu indexiert werden (python -m app.reindex_embeddings).")
|
||||
print("Hinweis: Embeddings muessen neu indexiert werden (python -m app.reindex_embeddings).")
|
||||
else:
|
||||
print(f"FEHLER: {result['error']}")
|
||||
sys.exit(1)
|
||||
|
||||
@ -177,6 +177,129 @@ class TestFetchAndVerify:
|
||||
assert result["changed"] is True
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 4: SHA-Lock-File — Pferdetausch-Schutz (#138)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestShaLock:
|
||||
"""Regression: abgeordnetenwatch hat das CDU-BE-2023-PDF unter dem alten
|
||||
Slug-Namen gegen das CDU-BE-2026-PDF ersetzt. Der Lock-File-Mechanismus
|
||||
muss solche stillen Tausch-Aktionen abfangen."""
|
||||
|
||||
def _patch_lock_file(self, tmp_path):
|
||||
"""Setzt den Lock-File-Pfad auf einen tmp-Pfad fuer den Test."""
|
||||
return patch("app.wahlprogramm_fetch._LOCK_FILE", tmp_path / "lock.json")
|
||||
|
||||
def _urlopen_with(self, content: bytes):
|
||||
def _u(url_or_req, timeout=None):
|
||||
class _R:
|
||||
def read(self_inner):
|
||||
return content
|
||||
def __enter__(self_inner):
|
||||
return self_inner
|
||||
def __exit__(self_inner, *a):
|
||||
pass
|
||||
return _R()
|
||||
return _u
|
||||
|
||||
def test_first_download_pins_sha(self, tmp_path):
|
||||
"""Erster Download → Lock-File wird angelegt mit dem neuen SHA."""
|
||||
dest = tmp_path / "cdu-be.pdf"
|
||||
content = b"%PDF original CDU BE 2021"
|
||||
|
||||
with self._patch_lock_file(tmp_path), \
|
||||
patch("urllib.request.urlopen", self._urlopen_with(content)):
|
||||
result = fetch_and_verify("https://example.com/cdu-be.pdf", dest)
|
||||
|
||||
assert result["ok"] is True
|
||||
assert result["lock_updated"] is True
|
||||
lock_path = tmp_path / "lock.json"
|
||||
assert lock_path.exists()
|
||||
import json
|
||||
lock = json.loads(lock_path.read_text())
|
||||
assert lock["cdu-be.pdf"] == _sha(content)
|
||||
|
||||
def test_second_download_with_same_content_passes(self, tmp_path):
|
||||
"""Zweiter Download mit gleichem Inhalt → ok, changed=False."""
|
||||
dest = tmp_path / "cdu-be.pdf"
|
||||
content = b"%PDF original CDU BE 2021"
|
||||
dest.write_bytes(content)
|
||||
# Lock vorbereiten
|
||||
import json
|
||||
(tmp_path / "lock.json").write_text(json.dumps({"cdu-be.pdf": _sha(content)}))
|
||||
|
||||
with self._patch_lock_file(tmp_path), \
|
||||
patch("urllib.request.urlopen", self._urlopen_with(content)):
|
||||
result = fetch_and_verify("https://example.com/cdu-be.pdf", dest)
|
||||
|
||||
assert result["ok"] is True
|
||||
assert result["changed"] is False
|
||||
|
||||
def test_pferdetausch_blocks_silent_replacement(self, tmp_path):
|
||||
"""KRITISCH: lokal liegt 'CDU BE 2021', Server liefert 'CDU BE 2026'.
|
||||
Lock zeigt SHA von 2021 → fetch muss ABBRECHEN, nicht ueberschreiben."""
|
||||
dest = tmp_path / "cdu-be-2023.pdf"
|
||||
original_content = b"%PDF CDU Berlin 2021-2026 Wahlprogramm"
|
||||
replaced_content = b"%PDF CDU Berlin-Plan 2026 (replaced!)"
|
||||
dest.write_bytes(original_content)
|
||||
# Lock pinnt den Original-SHA
|
||||
import json
|
||||
(tmp_path / "lock.json").write_text(
|
||||
json.dumps({"cdu-be-2023.pdf": _sha(original_content)})
|
||||
)
|
||||
|
||||
with self._patch_lock_file(tmp_path), \
|
||||
patch("urllib.request.urlopen", self._urlopen_with(replaced_content)):
|
||||
result = fetch_and_verify("https://example.com/cdu-be-2023.pdf", dest)
|
||||
|
||||
assert result["ok"] is False
|
||||
assert "Lock-Pruefung" in result["error"]
|
||||
# Datei darf NICHT ueberschrieben sein
|
||||
assert dest.read_bytes() == original_content
|
||||
|
||||
def test_accept_new_sha_overrides_lock(self, tmp_path):
|
||||
"""Mit accept_new_sha=True wird der Lock bewusst aktualisiert."""
|
||||
dest = tmp_path / "linke-bb.pdf"
|
||||
original_content = b"%PDF v1"
|
||||
new_content = b"%PDF v2 - intentional update"
|
||||
dest.write_bytes(original_content)
|
||||
import json
|
||||
(tmp_path / "lock.json").write_text(
|
||||
json.dumps({"linke-bb.pdf": _sha(original_content)})
|
||||
)
|
||||
|
||||
with self._patch_lock_file(tmp_path), \
|
||||
patch("urllib.request.urlopen", self._urlopen_with(new_content)):
|
||||
result = fetch_and_verify(
|
||||
"https://example.com/linke-bb.pdf", dest,
|
||||
accept_new_sha=True,
|
||||
)
|
||||
|
||||
assert result["ok"] is True
|
||||
assert result["changed"] is True
|
||||
# Lock muss neuen SHA haben
|
||||
lock = json.loads((tmp_path / "lock.json").read_text())
|
||||
assert lock["linke-bb.pdf"] == _sha(new_content)
|
||||
|
||||
def test_existing_file_without_lock_pins_silently(self, tmp_path):
|
||||
"""File ist da aber Lock fehlt (Migration-Szenario): bei naechstem
|
||||
identischen fetch wird der SHA gepinnt, kein Block."""
|
||||
dest = tmp_path / "spd-mv.pdf"
|
||||
content = b"%PDF SPD MV 2021"
|
||||
dest.write_bytes(content)
|
||||
# Kein Lock-Eintrag
|
||||
|
||||
with self._patch_lock_file(tmp_path), \
|
||||
patch("urllib.request.urlopen", self._urlopen_with(content)):
|
||||
result = fetch_and_verify("https://example.com/spd-mv.pdf", dest)
|
||||
|
||||
assert result["ok"] is True
|
||||
assert result["lock_updated"] is True
|
||||
import json
|
||||
lock = json.loads((tmp_path / "lock.json").read_text())
|
||||
assert lock["spd-mv.pdf"] == _sha(content)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 4: og_card — cache_key Determinismus und Cache-Miss/Hit
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
Loading…
Reference in New Issue
Block a user