63 lines
2.2 KiB
Python
63 lines
2.2 KiB
Python
|
|
"""Tests for analyzer.py JSON-stripping logic.
|
||
|
|
|
||
|
|
Reproduces the markdown-codeblock-stripping in the LLM retry loop. Real
|
||
|
|
Qwen responses sometimes wrap their JSON in ```json …``` fences (despite
|
||
|
|
the prompt asking for raw JSON), and the analyzer must tolerate that
|
||
|
|
without resorting to retries.
|
||
|
|
"""
|
||
|
|
import json
|
||
|
|
import sys
|
||
|
|
import types
|
||
|
|
|
||
|
|
# Stub openai before importing analyzer
|
||
|
|
if "openai" not in sys.modules:
|
||
|
|
openai_stub = types.ModuleType("openai")
|
||
|
|
openai_stub.OpenAI = lambda **kw: None
|
||
|
|
sys.modules["openai"] = openai_stub
|
||
|
|
|
||
|
|
|
||
|
|
def _strip_markdown_fences(content: str) -> str:
|
||
|
|
"""Mirror the analyzer's markdown-stripping snippet so we can unit-test
|
||
|
|
the parsing rules without actually invoking the LLM.
|
||
|
|
|
||
|
|
Keep this in sync with analyzer.py around the `if content.startswith("```")`
|
||
|
|
branch — if the analyzer changes, this helper changes too. The point of
|
||
|
|
the duplication is that the analyzer's stripping is buried in an async
|
||
|
|
LLM call that we cannot easily unit-test directly.
|
||
|
|
"""
|
||
|
|
content = content.strip()
|
||
|
|
if content.startswith("```"):
|
||
|
|
content = content.split("\n", 1)[1]
|
||
|
|
if content.endswith("```"):
|
||
|
|
content = content.rsplit("```", 1)[0]
|
||
|
|
if content.startswith("```json"):
|
||
|
|
content = content[7:]
|
||
|
|
return content.strip()
|
||
|
|
|
||
|
|
|
||
|
|
SAMPLE_JSON = '{"gwoeScore": 7.0, "title": "Test"}'
|
||
|
|
|
||
|
|
|
||
|
|
class TestMarkdownStripping:
|
||
|
|
def test_plain_json_unchanged(self):
|
||
|
|
assert _strip_markdown_fences(SAMPLE_JSON) == SAMPLE_JSON
|
||
|
|
|
||
|
|
def test_json_in_markdown_fence(self):
|
||
|
|
wrapped = f"```json\n{SAMPLE_JSON}\n```"
|
||
|
|
cleaned = _strip_markdown_fences(wrapped)
|
||
|
|
assert json.loads(cleaned)["gwoeScore"] == 7.0
|
||
|
|
|
||
|
|
def test_json_in_plain_fence(self):
|
||
|
|
wrapped = f"```\n{SAMPLE_JSON}\n```"
|
||
|
|
cleaned = _strip_markdown_fences(wrapped)
|
||
|
|
assert json.loads(cleaned)["gwoeScore"] == 7.0
|
||
|
|
|
||
|
|
def test_leading_whitespace_stripped(self):
|
||
|
|
wrapped = f" \n {SAMPLE_JSON} \n "
|
||
|
|
assert json.loads(_strip_markdown_fences(wrapped))["gwoeScore"] == 7.0
|
||
|
|
|
||
|
|
def test_trailing_fence_stripped(self):
|
||
|
|
wrapped = f"{SAMPLE_JSON}\n```"
|
||
|
|
cleaned = _strip_markdown_fences(wrapped)
|
||
|
|
assert json.loads(cleaned)["gwoeScore"] == 7.0
|