test: improve test reliability and model compatibility

derekhiggins · derekhiggins · commit 903cffdb954e · 2025-09-16T16:05:37.000+01:00
- Update earth question to be more specific with multiple choice format
  to prevent Llama-3.2-1B-Instruct from rambling about other planets
- Skip test_text_chat_completion_structured_output as it sometimes
  times out during CI execution again with Llama-3.2-1B-Instruct on vllm

Signed-off-by: Derek Higgins &lt;derekh@redhat.com&gt;
diff --git a/scripts/integration-tests.sh b/scripts/integration-tests.sh
@@ -214,7 +214,7 @@ EXCLUDE_TESTS="builtin_tool or safety_with_image or code_interpreter or test_rag
 
 # Additional exclusions for vllm setup
 if [[ "$TEST_SETUP" == "vllm" ]]; then
-    EXCLUDE_TESTS="${EXCLUDE_TESTS} or test_inference_store_tool_calls"
+    EXCLUDE_TESTS="${EXCLUDE_TESTS} or test_inference_store_tool_calls or test_text_chat_completion_structured_output"
 fi
 
 PYTEST_PATTERN="not( $EXCLUDE_TESTS )"
diff --git a/tests/integration/responses/fixtures/test_cases.py b/tests/integration/responses/fixtures/test_cases.py
@@ -29,7 +29,7 @@ class ResponsesTestCase(BaseModel):
 basic_test_cases = [
     pytest.param(
         ResponsesTestCase(
-            input="Which planet do humans live on?",
+            input="Humans live on which planet: Mars, Venus, or Earth?",
             expected="earth",
         ),
         id="earth",
@@ -76,7 +76,7 @@ class ResponsesTestCase(BaseModel):
             input="",  # Not used for multi-turn
             expected="",  # Not used for multi-turn
             turns=[
-                ("Which planet do humans live on?", "earth"),
+                ("Humans live on which planet: Mars, Venus, or Earth?", "earth"),
                 ("What is the name of the planet from your previous response?", "earth"),
             ],
         ),
diff --git a/tests/integration/test_cases/inference/chat_completion.json b/tests/integration/test_cases/inference/chat_completion.json
@@ -1,7 +1,7 @@
 {
   "non_streaming_01": {
     "data": {
-      "question": "Which planet do humans live on?",
+      "question": "Humans live on which planet: Mars, Venus, or Earth?",
       "expected": "Earth"
     }
   },
diff --git a/tests/integration/test_cases/openai/responses.json b/tests/integration/test_cases/openai/responses.json
@@ -1,7 +1,7 @@
 {
   "non_streaming_01": {
     "data": {
-      "question": "Which planet do humans live on?",
+      "question": "Humans live on which planet: Mars, Venus, or Earth?",
       "expected": "Earth"
     }
   },

Original file line number	Diff line number	Diff line change
`@@ -1,7 +1,7 @@`
`1`	`1`	`{`
`2`	`2`	`"non_streaming_01": {`
`3`	`3`	`"data": {`
`4`		`- "question": "Which planet do humans live on?",`
	`4`	`+ "question": "Humans live on which planet: Mars, Venus, or Earth?",`
`5`	`5`	`"expected": "Earth"`
`6`	`6`	`}`
`7`	`7`	`},`