gwoe-antragspruefer/tests/test_analyzer.py

"""Tests for analyzer.py JSON-stripping logic.

Reproduces the markdown-codeblock-stripping in the LLM retry loop. Real
Qwen responses sometimes wrap their JSON in ```json …``` fences (despite
the prompt asking for raw JSON), and the analyzer must tolerate that
without resorting to retries.
"""
import json
import sys
import types

# Stub openai before importing analyzer
if "openai" not in sys.modules:
    openai_stub = types.ModuleType("openai")
    openai_stub.OpenAI = lambda **kw: None
    sys.modules["openai"] = openai_stub


def _strip_markdown_fences(content: str) -> str:
    """Mirror the analyzer's markdown-stripping snippet so we can unit-test
    the parsing rules without actually invoking the LLM.

    Keep this in sync with analyzer.py around the `if content.startswith("```")`
    branch — if the analyzer changes, this helper changes too. The point of
    the duplication is that the analyzer's stripping is buried in an async
    LLM call that we cannot easily unit-test directly.
    """
    content = content.strip()
    if content.startswith("```"):
        content = content.split("\n", 1)[1]
    if content.endswith("```"):
        content = content.rsplit("```", 1)[0]
    if content.startswith("```json"):
        content = content[7:]
    return content.strip()


SAMPLE_JSON = '{"gwoeScore": 7.0, "title": "Test"}'


class TestMarkdownStripping:
    def test_plain_json_unchanged(self):
        assert _strip_markdown_fences(SAMPLE_JSON) == SAMPLE_JSON

    def test_json_in_markdown_fence(self):
        wrapped = f"```json\n{SAMPLE_JSON}\n```"
        cleaned = _strip_markdown_fences(wrapped)
        assert json.loads(cleaned)["gwoeScore"] == 7.0

    def test_json_in_plain_fence(self):
        wrapped = f"```\n{SAMPLE_JSON}\n```"
        cleaned = _strip_markdown_fences(wrapped)
        assert json.loads(cleaned)["gwoeScore"] == 7.0

    def test_leading_whitespace_stripped(self):
        wrapped = f"   \n  {SAMPLE_JSON}  \n  "
        assert json.loads(_strip_markdown_fences(wrapped))["gwoeScore"] == 7.0

    def test_trailing_fence_stripped(self):
        wrapped = f"{SAMPLE_JSON}\n```"
        cleaned = _strip_markdown_fences(wrapped)
        assert json.loads(cleaned)["gwoeScore"] == 7.0