#57 Security: print() → logger.exception für alle Module
Befund #4 aus dem Security-Audit (PII/LLM-Content im Container-Log): Die letzten 10 print()-Aufrufe in app/{report,embeddings,parlamente}.py durch strukturiertes Logging (logger.warning/exception/info) ersetzt. Betroffen: - report.py: 2× print in _append_original_antrag → logger.exception - embeddings.py: 3× print in index_programm → logger.warning/info/exception - parlamente.py: 5× print in NRWAdapter → logger.error/exception logger.exception statt print+traceback: Stack-Trace wird automatisch angehängt, ohne den LLM-Content oder Antrags-Details als Volltext zu leaken (nur die Drucksache-ID als Kontext-Parameter). Audit-Status nach diesem Commit: alle 7 adressierbaren Befunde aus #57 sind gefixt (1 Rate-Limit, 2/6 XSS/XXE, 3 Path-Traversal, 4 PII-Log, 5 CSRF via Auth, 7 Search-DoS). Befund 8 (Secrets als ENV) ist akzeptiertes Risiko für Single-Server-Docker. Tests: 201 passed, 5 skipped.
This commit is contained in:
parent
0870e8a910
commit
1a82f8294c
@ -1,7 +1,10 @@
|
|||||||
"""Semantic search for Wahlprogramme and Parteiprogramme using Qwen embeddings."""
|
"""Semantic search for Wahlprogramme and Parteiprogramme using Qwen embeddings."""
|
||||||
|
|
||||||
import json
|
import json
|
||||||
|
import logging
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
import sqlite3
|
import sqlite3
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
@ -349,7 +352,7 @@ def index_programm(programm_id: str, pdf_dir: Path) -> int:
|
|||||||
pdf_path = pdf_dir / info["pdf"]
|
pdf_path = pdf_dir / info["pdf"]
|
||||||
|
|
||||||
if not pdf_path.exists():
|
if not pdf_path.exists():
|
||||||
print(f"PDF not found: {pdf_path}")
|
logger.warning("PDF not found: %s", pdf_path)
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
conn = sqlite3.connect(EMBEDDINGS_DB)
|
conn = sqlite3.connect(EMBEDDINGS_DB)
|
||||||
@ -386,13 +389,13 @@ def index_programm(programm_id: str, pdf_dir: Path) -> int:
|
|||||||
))
|
))
|
||||||
total_chunks += 1
|
total_chunks += 1
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Error embedding chunk: {e}")
|
logger.exception("Error embedding chunk")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
conn.commit()
|
conn.commit()
|
||||||
conn.close()
|
conn.close()
|
||||||
|
|
||||||
print(f"Indexed {total_chunks} chunks from {programm_id}")
|
logger.info("Indexed %d chunks from %s", total_chunks, programm_id)
|
||||||
return total_chunks
|
return total_chunks
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -109,7 +109,7 @@ class NRWAdapter(ParlamentAdapter):
|
|||||||
# First, get the page to establish session
|
# First, get the page to establish session
|
||||||
initial = await client.get(self.search_url)
|
initial = await client.get(self.search_url)
|
||||||
if initial.status_code != 200:
|
if initial.status_code != 200:
|
||||||
print(f"NRW search initial request failed: {initial.status_code}")
|
logger.error("NRW search initial request failed: %s", initial.status_code)
|
||||||
return []
|
return []
|
||||||
|
|
||||||
# Parse for webflow token from pagination links
|
# Parse for webflow token from pagination links
|
||||||
@ -161,7 +161,7 @@ class NRWAdapter(ParlamentAdapter):
|
|||||||
)
|
)
|
||||||
|
|
||||||
if search_resp.status_code != 200:
|
if search_resp.status_code != 200:
|
||||||
print(f"NRW search request failed: {search_resp.status_code}")
|
logger.error("NRW search request failed: %s", search_resp.status_code)
|
||||||
return []
|
return []
|
||||||
|
|
||||||
# Parse results
|
# Parse results
|
||||||
@ -246,11 +246,11 @@ class NRWAdapter(ParlamentAdapter):
|
|||||||
results.append(doc)
|
results.append(doc)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Error parsing item: {e}")
|
logger.exception("NRW error parsing item")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"NRW search error: {e}")
|
logger.exception("NRW search error")
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
||||||
@ -312,7 +312,7 @@ class NRWAdapter(ParlamentAdapter):
|
|||||||
|
|
||||||
return text
|
return text
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Error downloading {drucksache}: {e}")
|
logger.exception("NRW download error for %s", drucksache)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -8,11 +8,14 @@ issue #57 (audit findings #2 and #6). The ``_e`` helper is the single
|
|||||||
funnel through which all LLM strings must pass on their way into the HTML.
|
funnel through which all LLM strings must pass on their way into the HTML.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
import subprocess
|
import subprocess
|
||||||
from html import escape as _e
|
from html import escape as _e
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
from .models import Assessment, MATRIX_LABELS, EMPFEHLUNG_CONFIG
|
from .models import Assessment, MATRIX_LABELS, EMPFEHLUNG_CONFIG
|
||||||
from .bundeslaender import BUNDESLAENDER
|
from .bundeslaender import BUNDESLAENDER
|
||||||
|
|
||||||
@ -544,7 +547,7 @@ async def _append_original_antrag(
|
|||||||
finally:
|
finally:
|
||||||
src_doc.close()
|
src_doc.close()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"_append_original_antrag: PDF-Parse-Fehler für {assessment.drucksache}: {e}")
|
logger.exception("_append_original_antrag: PDF-Parse-Fehler für %s", assessment.drucksache)
|
||||||
|
|
||||||
# PyMuPDF refuses to overwrite the source file in non-incremental
|
# PyMuPDF refuses to overwrite the source file in non-incremental
|
||||||
# mode — write to a sibling temp file and atomically replace.
|
# mode — write to a sibling temp file and atomically replace.
|
||||||
@ -559,7 +562,7 @@ async def _append_original_antrag(
|
|||||||
tmp_path.replace(report_path)
|
tmp_path.replace(report_path)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# Hard failure — leave the original report file untouched.
|
# Hard failure — leave the original report file untouched.
|
||||||
print(f"_append_original_antrag: Konnte Report nicht erweitern für {assessment.drucksache}: {e}")
|
logger.exception("_append_original_antrag: Konnte Report nicht erweitern für %s", assessment.drucksache)
|
||||||
|
|
||||||
|
|
||||||
def _insert_divider_page(
|
def _insert_divider_page(
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user