Skip to content

Commit e2db2fb

Browse files
committed
fix(core): raise on more than one compatible endpoint-benchmark type combination
Signed-off-by: Tomasz Grzegorzek <[email protected]>
1 parent 72a316d commit e2db2fb

File tree

2 files changed

+45
-9
lines changed

2 files changed

+45
-9
lines changed

packages/nemo-evaluator/src/nemo_evaluator/core/input.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -371,7 +371,12 @@ def check_type_compatibility(evaluation: Evaluation):
371371
benchmark_type_combination = [benchmark_type_combination]
372372

373373
if model_types.issuperset(set(benchmark_type_combination)):
374-
is_target_compatible = True
374+
if is_target_compatible:
375+
raise MisconfigurationError(
376+
f"The benchmark {evaluation.config.type} is compatible with more than one combination of model capabilities {evaluation.target.api_endpoint.type} and needs a specification. Please override model capabilities for this benchmark to match only one combination."
377+
)
378+
else:
379+
is_target_compatible = True
375380

376381
if evaluation.target.api_endpoint.type is None:
377382
raise MisconfigurationError(

packages/nemo-evaluator/tests/unit_tests/core/test_input.py

Lines changed: 39 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -88,14 +88,6 @@ def test_empty_dicts():
8888
([EndpointType.CHAT, EndpointType.COMPLETIONS], [EndpointType.CHAT]),
8989
([EndpointType.CHAT, EndpointType.COMPLETIONS], EndpointType.CHAT),
9090
(EndpointType.CHAT, [[EndpointType.CHAT], [EndpointType.COMPLETIONS]]),
91-
(
92-
[EndpointType.CHAT, EndpointType.COMPLETIONS],
93-
[EndpointType.CHAT, EndpointType.COMPLETIONS],
94-
),
95-
(
96-
[EndpointType.CHAT, EndpointType.COMPLETIONS, EndpointType.VLM],
97-
[EndpointType.CHAT, EndpointType.COMPLETIONS],
98-
),
9991
(
10092
[EndpointType.CHAT, EndpointType.VLM],
10193
[
@@ -148,3 +140,42 @@ def test_endpoint_type_single_incompatible(model_types, benchmark_types):
148140
MisconfigurationError, match=r".* does not support the model type .*"
149141
):
150142
check_type_compatibility(evaluation)
143+
144+
145+
@pytest.mark.parametrize(
146+
"model_types,benchmark_types",
147+
[
148+
(
149+
[EndpointType.CHAT, EndpointType.COMPLETIONS],
150+
[EndpointType.CHAT, EndpointType.COMPLETIONS],
151+
),
152+
(
153+
[EndpointType.CHAT, EndpointType.COMPLETIONS, EndpointType.VLM],
154+
[EndpointType.CHAT, EndpointType.COMPLETIONS],
155+
),
156+
(
157+
[EndpointType.CHAT, EndpointType.COMPLETIONS, EndpointType.VLM],
158+
[
159+
[EndpointType.COMPLETIONS, EndpointType.VLM],
160+
[EndpointType.CHAT, EndpointType.VLM],
161+
],
162+
),
163+
],
164+
)
165+
def test_endpoint_type_raise_on_more_than_one(model_types, benchmark_types):
166+
evaluation_config = EvaluationConfig(supported_endpoint_types=benchmark_types)
167+
target_config = EvaluationTarget(
168+
api_endpoint=ApiEndpoint(type=model_types, url="localhost", model_id="my_model")
169+
)
170+
evaluation = Evaluation(
171+
config=evaluation_config,
172+
target=target_config,
173+
command="",
174+
pkg_name="",
175+
framework_name="",
176+
)
177+
with pytest.raises(
178+
MisconfigurationError,
179+
match=r".* is compatible with more than one combination of model capabilities .*",
180+
):
181+
check_type_compatibility(evaluation)

0 commit comments

Comments
 (0)