-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest_scoring.py
More file actions
117 lines (96 loc) · 3.69 KB
/
test_scoring.py
File metadata and controls
117 lines (96 loc) · 3.69 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
"""Integration-level parametrized tests for scoring logic."""
import sys
import os
import types
import pytest
# Allow imports from backend/
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
# Stub config
_config = types.ModuleType("config")
_config.MISTRAL_API_KEY = "test-key"
_config.AUDIO_MODEL = "voxtral-mini-latest"
_config.TEXT_MODEL = "mistral-large-latest"
_config.MAX_AUDIO_SIZE_MB = 25
_config.MAX_TRANSCRIPT_LENGTH = 10000
_config.THRESHOLD_SAFE = 0.30
_config.THRESHOLD_SUSPICIOUS = 0.60
_config.THRESHOLD_LIKELY_SCAM = 0.85
sys.modules.setdefault("config", _config)
_mistralai = types.ModuleType("mistralai")
_mistralai.Mistral = lambda **kw: None
sys.modules.setdefault("mistralai", _mistralai)
from services.response_formatter import score_to_verdict, build_scam_report
from models.schemas import AnalysisResult
# ── Verdict thresholds (12 parametrized cases) ───────────────────────────────
@pytest.mark.parametrize("score,expected", [
(0.00, "SAFE"),
(0.10, "SAFE"),
(0.29, "SAFE"),
(0.30, "SUSPICIOUS"),
(0.45, "SUSPICIOUS"),
(0.59, "SUSPICIOUS"),
(0.60, "LIKELY_SCAM"),
(0.72, "LIKELY_SCAM"),
(0.84, "LIKELY_SCAM"),
(0.85, "SCAM"),
(0.95, "SCAM"),
(1.00, "SCAM"),
])
def test_verdict_thresholds(score, expected):
assert score_to_verdict(score) == expected
# ── Combined weights ─────────────────────────────────────────────────────────
def _result(score: float) -> AnalysisResult:
return AnalysisResult(
scam_score=score,
confidence=0.9,
verdict="SAFE",
signals=[],
recommendation="test",
)
@pytest.mark.parametrize("audio,text,expected", [
(0.0, 0.0, 0.0),
(1.0, 1.0, 1.0),
(0.5, 0.5, 0.5),
(0.8, 0.2, round(0.8 * 0.6 + 0.2 * 0.4, 4)),
(0.3, 0.9, round(0.3 * 0.6 + 0.9 * 0.4, 4)),
(1.0, 0.0, 0.6),
(0.0, 1.0, 0.4),
])
def test_combined_weights(audio, text, expected):
report = build_scam_report("audio", _result(audio), _result(text))
assert report.combined_score == expected
# ── Exponential weighting formula ─────────────────────────────────────────────
def test_exponential_weighting_five_chunks():
"""Step through 5 chunk scores and verify 0.7*chunk + 0.3*prev."""
chunk_scores = [0.1, 0.4, 0.9, 0.2, 0.6]
cumulative = 0.0
for score in chunk_scores:
cumulative = 0.7 * score + 0.3 * cumulative
# Manually verify step-by-step
expected = 0.0
expected = 0.7 * 0.1 + 0.3 * 0.0 # 0.07
expected = 0.7 * 0.4 + 0.3 * expected # 0.301
expected = 0.7 * 0.9 + 0.3 * expected # 0.7203
expected = 0.7 * 0.2 + 0.3 * expected # 0.35609
expected = 0.7 * 0.6 + 0.3 * expected # 0.526827
assert round(cumulative, 6) == round(expected, 6)
# ── Score clamping ────────────────────────────────────────────────────────────
@pytest.mark.parametrize("raw_score,clamped", [
(-1.0, 0.0),
(0.0, 0.0),
(0.5, 0.5),
(1.0, 1.0),
(999.0, 1.0),
])
def test_score_clamping(raw_score, clamped):
from services.response_formatter import parse_analysis_result
import json
raw = json.dumps({
"scam_score": raw_score,
"confidence": 0.5,
"verdict": "SAFE",
"signals": [],
"recommendation": "test",
})
result = parse_analysis_result(raw)
assert result.scam_score == clamped