- TestUnionFindRankSwap: rank-Asymmetrie-Branch (Line 69) - TestLoadAssessmentItems: tmp-DB mit korrekten + kaputten Embeddings, bundesland-Filter, vollstaendiges Item-Schema - TestBuildHierarchySubclusters: - max_cluster_size=3 zwingt grossen Cluster zu sub-clustern - kleiner Cluster bekommt subclusters=None Total Coverage: 49.9% → 50.4% (50%-Marke ueberschritten), 718 → 724 Tests.
592 lines
22 KiB
Python
592 lines
22 KiB
Python
"""Unit-Tests für app/clustering.py (#134 Phase 2).
|
|
|
|
Testet reine Python-Funktionen (_cosine, UnionFind, _cluster_indices,
|
|
_cluster_label, _dominant_fraktion, _cluster_summary) mit synthetischen
|
|
Fixtures. DB-abhängige async-Funktionen (load_assessment_items,
|
|
build_hierarchy, find_similar_assessments) werden mit gemocktem DB-Lader
|
|
getestet.
|
|
|
|
Fixture-Corpus: normalisierte Vektoren per Pure-Python (kein numpy nötig).
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
import math
|
|
import random
|
|
from unittest.mock import patch
|
|
|
|
import pytest
|
|
|
|
|
|
# ─── Hilfsfunktionen ─────────────────────────────────────────────────────────
|
|
|
|
def run(coro):
|
|
return asyncio.get_event_loop().run_until_complete(coro)
|
|
|
|
|
|
def _norm_py(v: list[float]) -> list[float]:
|
|
"""Normalisiert einen Vektor auf Länge 1 (pure Python)."""
|
|
n = math.sqrt(sum(x * x for x in v))
|
|
return [x / n for x in v] if n > 0 else v
|
|
|
|
|
|
def _make_items(n: int = 5, dim: int = 16, seed: int = 42) -> list[dict]:
|
|
"""Erstellt n normalisierte Embedding-Dicts mit reproduzierbaren Zufallswerten."""
|
|
rng = random.Random(seed)
|
|
items = []
|
|
for i in range(n):
|
|
raw = [rng.gauss(0, 1) for _ in range(dim)]
|
|
items.append({
|
|
"drucksache": f"18/{1000 + i}",
|
|
"title": f"Testantrag {i}",
|
|
"bundesland": "NRW",
|
|
"fraktionen": ["SPD"] if i % 2 == 0 else ["CDU"],
|
|
"datum": "2026-04-20",
|
|
"link": f"https://example.com/{i}",
|
|
"gwoe_score": 5.0 + i * 0.5,
|
|
"empfehlung": "Empfohlen",
|
|
"empfehlung_symbol": "✓",
|
|
"themen": [f"Thema{i % 3}"],
|
|
"embedding": _norm_py(raw),
|
|
})
|
|
return items
|
|
|
|
|
|
# ─── _cosine ─────────────────────────────────────────────────────────────────
|
|
|
|
class TestCosine:
|
|
def test_identical_vectors_give_one(self):
|
|
from app.clustering import _cosine
|
|
v = [1.0, 0.0, 0.0]
|
|
assert abs(_cosine(v, v) - 1.0) < 1e-9
|
|
|
|
def test_orthogonal_vectors_give_zero(self):
|
|
from app.clustering import _cosine
|
|
a = [1.0, 0.0]
|
|
b = [0.0, 1.0]
|
|
assert abs(_cosine(a, b)) < 1e-9
|
|
|
|
def test_opposite_vectors_give_minus_one(self):
|
|
from app.clustering import _cosine
|
|
a = [1.0, 0.0]
|
|
b = [-1.0, 0.0]
|
|
assert abs(_cosine(a, b) + 1.0) < 1e-9
|
|
|
|
def test_zero_vector_returns_zero(self):
|
|
from app.clustering import _cosine
|
|
assert _cosine([0.0, 0.0], [1.0, 0.0]) == 0.0
|
|
|
|
def test_symmetry(self):
|
|
from app.clustering import _cosine
|
|
a = [0.6, 0.8]
|
|
b = [0.8, 0.6]
|
|
assert abs(_cosine(a, b) - _cosine(b, a)) < 1e-12
|
|
|
|
def test_range_normalized_vectors(self):
|
|
from app.clustering import _cosine
|
|
rng = random.Random(1)
|
|
for _ in range(10):
|
|
a = _norm_py([rng.gauss(0, 1) for _ in range(8)])
|
|
b = _norm_py([rng.gauss(0, 1) for _ in range(8)])
|
|
sim = _cosine(a, b)
|
|
assert -1.0 - 1e-9 <= sim <= 1.0 + 1e-9
|
|
|
|
|
|
# ─── UnionFind ────────────────────────────────────────────────────────────────
|
|
|
|
class TestUnionFind:
|
|
def test_initial_all_separate(self):
|
|
from app.clustering import UnionFind
|
|
uf = UnionFind(4)
|
|
assert len({uf.find(i) for i in range(4)}) == 4
|
|
|
|
def test_union_merges_components(self):
|
|
from app.clustering import UnionFind
|
|
uf = UnionFind(4)
|
|
uf.union(0, 1)
|
|
uf.union(2, 3)
|
|
assert uf.find(0) == uf.find(1)
|
|
assert uf.find(2) == uf.find(3)
|
|
assert uf.find(0) != uf.find(2)
|
|
|
|
def test_union_find_path_compression(self):
|
|
from app.clustering import UnionFind
|
|
uf = UnionFind(5)
|
|
uf.union(0, 1)
|
|
uf.union(1, 2)
|
|
uf.union(2, 3)
|
|
uf.union(3, 4)
|
|
root = uf.find(0)
|
|
assert all(uf.find(i) == root for i in range(5))
|
|
|
|
def test_union_self_no_error(self):
|
|
from app.clustering import UnionFind
|
|
uf = UnionFind(3)
|
|
uf.union(1, 1)
|
|
assert uf.find(1) == uf.find(1)
|
|
|
|
def test_empty_union_find(self):
|
|
from app.clustering import UnionFind
|
|
uf = UnionFind(0)
|
|
assert uf.parent == []
|
|
|
|
|
|
# ─── _cluster_indices ────────────────────────────────────────────────────────
|
|
|
|
class TestClusterIndices:
|
|
def test_empty_corpus_returns_empty(self):
|
|
from app.clustering import _cluster_indices
|
|
assert _cluster_indices([], 0.5) == []
|
|
|
|
def test_single_item_is_singleton(self):
|
|
from app.clustering import _cluster_indices
|
|
items = _make_items(1)
|
|
groups = _cluster_indices(items, 0.5)
|
|
assert len(groups) == 1
|
|
assert len(groups[0]) == 1
|
|
|
|
def test_all_identical_items_one_cluster(self):
|
|
from app.clustering import _cosine, _cluster_indices
|
|
# Alle denselben Vektor → kosinus = 1.0 → alle in einem Cluster
|
|
v = [1.0, 0.0, 0.0]
|
|
items = [
|
|
{**_make_items(1)[0], "drucksache": f"18/{i}", "embedding": v}
|
|
for i in range(4)
|
|
]
|
|
groups = _cluster_indices(items, 0.5)
|
|
assert len(groups) == 1
|
|
assert len(groups[0]) == 4
|
|
|
|
def test_orthogonal_items_all_singletons(self):
|
|
"""Orthogonale Einheitsvektoren → kosinus=0 → alle Singletons."""
|
|
from app.clustering import _cluster_indices
|
|
identity_vecs = [[1 if i == j else 0 for j in range(4)] for i in range(4)]
|
|
items = [
|
|
{**_make_items(1)[0], "drucksache": f"18/{i}", "embedding": v}
|
|
for i, v in enumerate(identity_vecs)
|
|
]
|
|
groups = _cluster_indices(items, 0.5)
|
|
# Alle Gruppen sind Singletons
|
|
assert all(len(g) == 1 for g in groups)
|
|
|
|
def test_higher_threshold_fewer_clusters(self):
|
|
"""Höherer Threshold → mehr Singletons, weniger große Cluster."""
|
|
from app.clustering import _cluster_indices
|
|
items = _make_items(8, seed=99)
|
|
groups_low = _cluster_indices(items, 0.1)
|
|
groups_high = _cluster_indices(items, 0.99)
|
|
# Bei low threshold: mind. eine Gruppe > 1 möglich
|
|
# Bei high threshold (0.99): fast alle Singletons
|
|
singleton_low = sum(1 for g in groups_low if len(g) == 1)
|
|
singleton_high = sum(1 for g in groups_high if len(g) == 1)
|
|
assert singleton_high >= singleton_low
|
|
|
|
def test_sorted_by_size_descending(self):
|
|
from app.clustering import _cluster_indices
|
|
v = [1.0, 0.0]
|
|
items = [
|
|
{**_make_items(1)[0], "drucksache": f"18/{i}", "embedding": v}
|
|
for i in range(3)
|
|
] + [
|
|
{**_make_items(1)[0], "drucksache": "18/solo", "embedding": [0.0, 1.0]}
|
|
]
|
|
groups = _cluster_indices(items, 0.5)
|
|
sizes = [len(g) for g in groups]
|
|
assert sizes == sorted(sizes, reverse=True)
|
|
|
|
|
|
# ─── _dominant_fraktion ───────────────────────────────────────────────────────
|
|
|
|
class TestDominantFraktion:
|
|
def test_majority_fraktion_wins(self):
|
|
from app.clustering import _dominant_fraktion
|
|
items = [
|
|
{"fraktionen": ["SPD"]},
|
|
{"fraktionen": ["SPD"]},
|
|
{"fraktionen": ["CDU"]},
|
|
]
|
|
assert _dominant_fraktion(items) == "SPD"
|
|
|
|
def test_empty_items_returns_none(self):
|
|
from app.clustering import _dominant_fraktion
|
|
assert _dominant_fraktion([]) is None
|
|
|
|
def test_empty_fraktionen_lists_returns_none(self):
|
|
from app.clustering import _dominant_fraktion
|
|
items = [{"fraktionen": []}, {"fraktionen": None}]
|
|
assert _dominant_fraktion(items) is None
|
|
|
|
|
|
# ─── _cluster_label ───────────────────────────────────────────────────────────
|
|
|
|
class TestClusterLabel:
|
|
def test_top_theme_used_as_label(self):
|
|
from app.clustering import _cluster_label
|
|
items = [
|
|
{"themen": ["Klimaschutz", "Energie"], "title": "A"},
|
|
{"themen": ["Klimaschutz"], "title": "B"},
|
|
]
|
|
label = _cluster_label(items)
|
|
assert "Klimaschutz" in label
|
|
|
|
def test_fallback_to_shortest_title(self):
|
|
from app.clustering import _cluster_label
|
|
items = [
|
|
{"themen": [], "title": "Kurz"},
|
|
{"themen": [], "title": "Sehr langer Titel"},
|
|
]
|
|
label = _cluster_label(items)
|
|
assert label == "Kurz"
|
|
|
|
def test_fallback_cluster_label(self):
|
|
from app.clustering import _cluster_label
|
|
items = [{"themen": [], "title": None}]
|
|
label = _cluster_label(items)
|
|
assert label == "Cluster"
|
|
|
|
|
|
# ─── _cluster_summary ────────────────────────────────────────────────────────
|
|
|
|
class TestClusterSummary:
|
|
def test_basic_fields_present(self):
|
|
from app.clustering import _cluster_summary
|
|
items = _make_items(3)
|
|
summary = _cluster_summary(items)
|
|
for key in ("size", "label", "dominant_fraktion", "avg_gwoe_score", "drucksachen"):
|
|
assert key in summary
|
|
|
|
def test_size_correct(self):
|
|
from app.clustering import _cluster_summary
|
|
items = _make_items(4)
|
|
summary = _cluster_summary(items)
|
|
assert summary["size"] == 4
|
|
|
|
def test_avg_score_calculated(self):
|
|
from app.clustering import _cluster_summary
|
|
items = [
|
|
{**_make_items(1)[0], "gwoe_score": 4.0},
|
|
{**_make_items(1)[0], "gwoe_score": 6.0},
|
|
]
|
|
summary = _cluster_summary(items)
|
|
assert summary["avg_gwoe_score"] == 5.0
|
|
|
|
def test_include_edges_adds_nodes_and_edges(self):
|
|
from app.clustering import _cluster_summary
|
|
items = _make_items(3)
|
|
summary = _cluster_summary(items, include_edges=True)
|
|
assert "nodes" in summary
|
|
assert "edges" in summary
|
|
assert len(summary["nodes"]) == 3
|
|
# 3 Knoten → 3 Kanten (0-1, 0-2, 1-2)
|
|
assert len(summary["edges"]) == 3
|
|
|
|
def test_no_edges_without_flag(self):
|
|
from app.clustering import _cluster_summary
|
|
items = _make_items(3)
|
|
summary = _cluster_summary(items, include_edges=False)
|
|
assert "edges" not in summary
|
|
assert "nodes" not in summary
|
|
|
|
|
|
# ─── build_hierarchy (async, DB gemockt) ─────────────────────────────────────
|
|
|
|
class TestBuildHierarchy:
|
|
def test_empty_corpus_structure(self):
|
|
"""Leerer Corpus → korrekte Grundstruktur."""
|
|
from app import clustering
|
|
|
|
async def fake_load(bundesland=None):
|
|
return []
|
|
|
|
with patch.object(clustering, "load_assessment_items", side_effect=fake_load):
|
|
result = run(clustering.build_hierarchy())
|
|
|
|
assert result["meta"]["total"] == 0
|
|
assert result["clusters"] == []
|
|
assert result["singletons"] == []
|
|
|
|
def test_single_item_becomes_singleton(self):
|
|
from app import clustering
|
|
items = _make_items(1)
|
|
|
|
async def fake_load(bundesland=None):
|
|
return items
|
|
|
|
with patch.object(clustering, "load_assessment_items", side_effect=fake_load):
|
|
result = run(clustering.build_hierarchy(threshold=0.5))
|
|
|
|
assert len(result["singletons"]) == 1
|
|
assert result["clusters"] == []
|
|
|
|
def test_meta_fields_present(self):
|
|
from app import clustering
|
|
items = _make_items(4)
|
|
|
|
async def fake_load(bundesland=None):
|
|
return items
|
|
|
|
with patch.object(clustering, "load_assessment_items", side_effect=fake_load):
|
|
result = run(clustering.build_hierarchy())
|
|
|
|
meta = result["meta"]
|
|
for key in ("total", "threshold", "num_clusters", "num_singletons"):
|
|
assert key in meta
|
|
|
|
def test_threshold_affects_cluster_count(self):
|
|
"""Niedrigerer Threshold → mehr Kanten → potenziell mehr gebündelte Items."""
|
|
from app import clustering
|
|
# Identische Items → immer ein Cluster bei jedem Threshold < 1.0
|
|
v = [1.0, 0.0, 0.0]
|
|
items = [
|
|
{**_make_items(1)[0], "drucksache": f"18/{i}", "embedding": v}
|
|
for i in range(3)
|
|
]
|
|
|
|
async def fake_load(bundesland=None):
|
|
return items
|
|
|
|
with patch.object(clustering, "load_assessment_items", side_effect=fake_load):
|
|
result = run(clustering.build_hierarchy(threshold=0.5))
|
|
|
|
assert len(result["clusters"]) == 1
|
|
assert result["clusters"][0]["size"] == 3
|
|
|
|
|
|
# ─── find_similar_assessments (async, DB gemockt) ────────────────────────────
|
|
|
|
class TestFindSimilarAssessments:
|
|
def test_returns_empty_for_unknown_drucksache(self):
|
|
from app import clustering
|
|
items = _make_items(3)
|
|
|
|
async def fake_load(bundesland=None):
|
|
return items
|
|
|
|
with patch.object(clustering, "load_assessment_items", side_effect=fake_load):
|
|
result = run(clustering.find_similar_assessments("99/9999"))
|
|
|
|
assert result == []
|
|
|
|
def test_returns_top_k_results(self):
|
|
from app import clustering
|
|
items = _make_items(5)
|
|
target_id = items[0]["drucksache"]
|
|
|
|
async def fake_load(bundesland=None):
|
|
return items
|
|
|
|
with patch.object(clustering, "load_assessment_items", side_effect=fake_load):
|
|
result = run(clustering.find_similar_assessments(target_id, top_k=3))
|
|
|
|
assert len(result) == 3
|
|
|
|
def test_excludes_self(self):
|
|
from app import clustering
|
|
items = _make_items(5)
|
|
target_id = items[0]["drucksache"]
|
|
|
|
async def fake_load(bundesland=None):
|
|
return items
|
|
|
|
with patch.object(clustering, "load_assessment_items", side_effect=fake_load):
|
|
result = run(clustering.find_similar_assessments(target_id, top_k=10))
|
|
|
|
drucksachen = [r["drucksache"] for r in result]
|
|
assert target_id not in drucksachen
|
|
|
|
def test_result_sorted_by_similarity_descending(self):
|
|
from app import clustering
|
|
items = _make_items(5)
|
|
target_id = items[0]["drucksache"]
|
|
|
|
async def fake_load(bundesland=None):
|
|
return items
|
|
|
|
with patch.object(clustering, "load_assessment_items", side_effect=fake_load):
|
|
result = run(clustering.find_similar_assessments(target_id, top_k=4))
|
|
|
|
sims = [r["similarity"] for r in result]
|
|
assert sims == sorted(sims, reverse=True)
|
|
|
|
def test_result_fields_present(self):
|
|
from app import clustering
|
|
items = _make_items(3)
|
|
target_id = items[0]["drucksache"]
|
|
|
|
async def fake_load(bundesland=None):
|
|
return items
|
|
|
|
with patch.object(clustering, "load_assessment_items", side_effect=fake_load):
|
|
result = run(clustering.find_similar_assessments(target_id, top_k=2))
|
|
|
|
for r in result:
|
|
for key in ("drucksache", "title", "bundesland", "fraktionen",
|
|
"gwoe_score", "empfehlung", "similarity"):
|
|
assert key in r
|
|
|
|
def test_single_item_corpus_returns_empty(self):
|
|
"""Nur ein Item im Corpus → nach Selbst-Ausschluss kein Ergebnis."""
|
|
from app import clustering
|
|
items = _make_items(1)
|
|
|
|
async def fake_load(bundesland=None):
|
|
return items
|
|
|
|
with patch.object(clustering, "load_assessment_items", side_effect=fake_load):
|
|
result = run(clustering.find_similar_assessments(items[0]["drucksache"]))
|
|
|
|
assert result == []
|
|
|
|
|
|
# ─── Coverage-Backfill (#134) ────────────────────────────────────────────────
|
|
|
|
|
|
class TestUnionFindRankSwap:
|
|
"""Wenn rank[ra] < rank[rb], muss parent[ra] auf rb zeigen (Line 69)."""
|
|
|
|
def test_smaller_rank_attaches_to_larger(self):
|
|
from app.clustering import UnionFind
|
|
uf = UnionFind(4)
|
|
# Erst zwei Trees mit unterschiedlichen Höhen aufbauen:
|
|
# 0—1 (rank 1) und 2—3—... (rank 1)
|
|
uf.union(0, 1)
|
|
uf.union(2, 3)
|
|
# Beide Roots haben rank 1 — uniteFurther durch Drittes:
|
|
uf.union(2, 0) # bringt einen rank-Tie auf rank=2 für einen
|
|
# Jetzt eine Insertion mit Rank-Asymmetrie:
|
|
# Erstellen wir eine Klasse mit höherem Rank
|
|
big = UnionFind(8)
|
|
big.union(0, 1)
|
|
big.union(2, 3)
|
|
big.union(0, 2) # baut einen rank-2-Baum
|
|
# Knoten 4 als Single (rank 0). union(4, 0) sollte 4 unter 0 hängen.
|
|
big.union(4, 0)
|
|
# 4 sollte jetzt im selben Set wie 0 sein
|
|
assert big.find(4) == big.find(0)
|
|
|
|
|
|
class TestLoadAssessmentItems:
|
|
"""Async DB-Lader; Tests gegen tmp-DB."""
|
|
|
|
def _build_db(self, tmp_path):
|
|
import sqlite3
|
|
import json as _j
|
|
db_path = tmp_path / "clust.db"
|
|
conn = sqlite3.connect(str(db_path))
|
|
conn.execute("""
|
|
CREATE TABLE assessments (
|
|
drucksache TEXT PRIMARY KEY, title TEXT,
|
|
fraktionen TEXT, datum TEXT, bundesland TEXT,
|
|
gwoe_score REAL, link TEXT,
|
|
empfehlung TEXT, empfehlung_symbol TEXT,
|
|
themen TEXT, summary_embedding BLOB
|
|
)
|
|
""")
|
|
# Korrektes Embedding
|
|
emb_ok = _j.dumps([0.1, 0.2, 0.3]).encode()
|
|
conn.execute(
|
|
"INSERT INTO assessments VALUES (?,?,?,?,?,?,?,?,?,?,?)",
|
|
("18/1", "T1", '["CDU"]', "2026-04-01", "NRW",
|
|
7.0, "x", "Empfohlen", "+", '["Klima"]', emb_ok),
|
|
)
|
|
# Kaputtes Embedding (ungueltiges JSON)
|
|
conn.execute(
|
|
"INSERT INTO assessments VALUES (?,?,?,?,?,?,?,?,?,?,?)",
|
|
("18/2", "T2", '["SPD"]', "2026-04-02", "NRW",
|
|
5.0, "y", "Empfohlen", "+", '["Klima"]', b"not-json"),
|
|
)
|
|
# Anderes BL (fuer bundesland-Filter)
|
|
conn.execute(
|
|
"INSERT INTO assessments VALUES (?,?,?,?,?,?,?,?,?,?,?)",
|
|
("8/1", "T3", '["AfD"]', "2026-04-03", "MV",
|
|
3.0, "z", "Ablehnen", "-", "[]", emb_ok),
|
|
)
|
|
conn.commit()
|
|
conn.close()
|
|
return db_path
|
|
|
|
def test_loads_only_valid_embeddings(self, tmp_path, monkeypatch):
|
|
from app.config import settings
|
|
from app import clustering
|
|
db = self._build_db(tmp_path)
|
|
monkeypatch.setattr(settings, "db_path", str(db))
|
|
|
|
items = run(clustering.load_assessment_items())
|
|
# 18/2 hat kaputtes Embedding und wird übersprungen
|
|
ids = sorted(i["drucksache"] for i in items)
|
|
assert "18/2" not in ids
|
|
assert "18/1" in ids
|
|
assert "8/1" in ids
|
|
|
|
def test_bundesland_filter(self, tmp_path, monkeypatch):
|
|
from app.config import settings
|
|
from app import clustering
|
|
db = self._build_db(tmp_path)
|
|
monkeypatch.setattr(settings, "db_path", str(db))
|
|
|
|
items = run(clustering.load_assessment_items(bundesland="NRW"))
|
|
ids = [i["drucksache"] for i in items]
|
|
assert ids == ["18/1"]
|
|
|
|
def test_loaded_item_fields_present(self, tmp_path, monkeypatch):
|
|
from app.config import settings
|
|
from app import clustering
|
|
db = self._build_db(tmp_path)
|
|
monkeypatch.setattr(settings, "db_path", str(db))
|
|
|
|
items = run(clustering.load_assessment_items(bundesland="NRW"))
|
|
assert items
|
|
item = items[0]
|
|
for key in ("drucksache", "title", "fraktionen", "datum", "link",
|
|
"bundesland", "gwoe_score", "empfehlung",
|
|
"empfehlung_symbol", "themen", "embedding"):
|
|
assert key in item
|
|
|
|
|
|
class TestBuildHierarchySubclusters:
|
|
"""Wenn ein Cluster groesser als max_cluster_size ist, wird sub-clustered
|
|
(Lines 256-262)."""
|
|
|
|
def test_large_cluster_gets_subclustered(self):
|
|
from app import clustering
|
|
from unittest.mock import patch
|
|
|
|
# 6 fast-identische Items → ein grosser Cluster, sub-Cluster sub > 1
|
|
v = [1.0, 0.0, 0.0]
|
|
items = [
|
|
{**_make_items(1)[0], "drucksache": f"18/{i}",
|
|
"embedding": [v[0] + 0.01 * i, v[1], v[2]]}
|
|
for i in range(6)
|
|
]
|
|
|
|
async def fake_load(bundesland=None):
|
|
return items
|
|
|
|
with patch.object(clustering, "load_assessment_items", side_effect=fake_load):
|
|
# max_cluster_size=3 zwingt sub-Clustering
|
|
result = run(clustering.build_hierarchy(
|
|
threshold=0.95, max_cluster_size=3, subcluster_threshold=0.999,
|
|
))
|
|
assert result["clusters"]
|
|
# Mindestens ein Cluster muss subclusters haben
|
|
assert any(c.get("subclusters") for c in result["clusters"])
|
|
|
|
def test_small_cluster_has_subclusters_none(self):
|
|
from app import clustering
|
|
from unittest.mock import patch
|
|
|
|
items = _make_items(2)
|
|
# Setze dieselben embeddings, damit sie in einem Cluster sind
|
|
items[0]["embedding"] = [1.0, 0.0, 0.0]
|
|
items[1]["embedding"] = [1.0, 0.0, 0.0]
|
|
|
|
async def fake_load(bundesland=None):
|
|
return items
|
|
|
|
with patch.object(clustering, "load_assessment_items", side_effect=fake_load):
|
|
result = run(clustering.build_hierarchy(
|
|
threshold=0.5, max_cluster_size=10,
|
|
))
|
|
for c in result["clusters"]:
|
|
assert c["subclusters"] is None
|