Skip to content

Commit a43b36c

Browse files
authored
fix some ut failures on XPU w/ torch 2.9 (#41923)
* fix 6 ut failures on XPU w/ torch 2.9 Signed-off-by: Yao, Matrix <[email protected]> * fix UT failures for 4 models on XPU Signed-off-by: Yao, Matrix <[email protected]> --------- Signed-off-by: Yao, Matrix <[email protected]>
1 parent 10d5571 commit a43b36c

File tree

7 files changed

+35
-16
lines changed

7 files changed

+35
-16
lines changed

tests/models/aria/test_modeling_aria.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -520,7 +520,6 @@ def test_generation_no_images(self):
520520
quantization_config=BitsAndBytesConfig(load_in_4bit=True, llm_int8_skip_modules=["multihead_attn"]),
521521
)
522522
processor = AutoProcessor.from_pretrained(model_id)
523-
assert model.device.type == "cuda", "This test is only supported on CUDA" # TODO: remove this
524523
# Prepare inputs with no images
525524
inputs = processor(text="Hello, I am", return_tensors="pt").to(torch_device)
526525

tests/models/aya_vision/test_modeling_aya_vision.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -267,7 +267,7 @@ def test_small_model_integration_forward(self):
267267

268268
EXPECTED_LOGITS = Expectations(
269269
{
270-
("xpu", 3): [0.4109, 0.1532, 0.8018, 2.1328, 0.5483],
270+
("xpu", 3): [1.6699, 0.6260, 3.2266, 8.5547, 2.209],
271271
# 4-bit
272272
("cuda", 7): [0.1097, 0.3481, 3.8340, 9.7969, 2.0488],
273273
("cuda", 8): [1.6396, 0.6094, 3.1992, 8.5234, 2.1875],
@@ -308,7 +308,7 @@ def test_small_model_integration_generate_text_only(self):
308308

309309
expected_outputs = Expectations(
310310
{
311-
("xpu", 3): "Whispers on the breeze,\nLeaves dance under moonlit skies,\nNature's quiet song.",
311+
("xpu", 3): "Whispers on the breeze,\nLeaves dance under moonlit sky,\nNature's quiet song.",
312312
# 4-bit
313313
("cuda", 7): "Sure, here's a haiku for you:\n\nMorning dew sparkles,\nPetals unfold in sunlight,\n",
314314
("cuda", 8): "Whispers on the breeze,\nLeaves dance under moonlit skies,\nNature's quiet song.",
@@ -474,7 +474,7 @@ def test_small_model_integration_batched_generate_multi_image(self):
474474
# Batching seems to alter the output slightly, but it is also the case in the original implementation. This seems to be expected: https://github.com/huggingface/transformers/issues/23017#issuecomment-1649630232
475475
expected_outputs = Expectations(
476476
{
477-
("xpu", 3): "Wooden path to water,\nMountains echo in stillness,\nPeaceful forest lake.",
477+
("xpu", 3): "Wooden path to water,\nMountains echo in stillness,\nPeaceful forest scene.",
478478
("cuda", 7): 'Wooden bridge stretches\nMirrored lake below, mountains rise\nPeaceful, serene',
479479
("cuda", 8): 'Wooden path to water,\nMountains echo in stillness,\nPeaceful forest scene.',
480480
}

tests/models/gemma3/test_modeling_gemma3.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -499,7 +499,7 @@ def test_model_4b_bf16(self):
499499

500500
EXPECTED_TEXTS = Expectations(
501501
{
502-
("xpu", 3): ['user\nYou are a helpful assistant.\n\n\n\n\n\nWhat is shown in this image?\nmodel\nCertainly! \n\nThe image shows a brown and white cow standing on a sandy beach with turquoise water in the background. It looks like a lovely,'],
502+
("xpu", 3): ['user\nYou are a helpful assistant.\n\n\n\n\n\nWhat is shown in this image?\nmodel\nCertainly! \n\nThe image shows a brown cow standing on a sandy beach with turquoise water and a blue sky in the background. It looks like a'],
503503
("cuda", (8, 0)): ['user\nYou are a helpful assistant.\n\n\n\n\n\nWhat is shown in this image?\nmodel\nCertainly! \n\nThe image shows a brown cow standing on a sandy beach with clear turquoise water and a blue sky in the background. It looks like'],
504504
("cuda", (8, 6)): ['user\nYou are a helpful assistant.\n\n\n\n\n\nWhat is shown in this image?\nmodel\nCertainly! \n\nThe image shows a brown cow standing on a sandy beach with clear blue water and a blue sky in the background. It looks like'],
505505
("rocm", (9, 4)): ['user\nYou are a helpful assistant.\n\n\n\n\n\nWhat is shown in this image?\nmodel\nCertainly! \n\nThe image shows a brown cow standing on a sandy beach with turquoise water and a blue sky in the background. It looks like a'],
@@ -610,7 +610,7 @@ def test_model_4b_crops(self):
610610
EXPECTED_NUM_IMAGES = 3 # one for the origin image and two crops of images
611611
EXPECTED_TEXTS = Expectations(
612612
{
613-
("xpu", 3): ['user\nYou are a helpful assistant.\n\nHere is the original image \n\n\n\n and here are some crops to help you see better \n\n\n\n \n\n\n\nWhat is shown in this image?\nmodel\nThe image shows a brown cow standing on a sandy beach next to a turquoise ocean. There are clouds in the blue sky above.'],
613+
("xpu", 3): ["user\nYou are a helpful assistant.\n\nHere is the original image \n\n\n\n and here are some crops to help you see better \n\n\n\n \n\n\n\nWhat is shown in this image?\nmodel\nThe image shows a brown cow standing on a sandy beach next to a turquoise ocean. There's a bright blue sky with some white clouds in the"],
614614
("cuda", 7): [],
615615
("cuda", (8, 6)): ["user\nYou are a helpful assistant.\n\nHere is the original image \n\n\n\n and here are some crops to help you see better \n\n\n\n \n\n\n\nWhat is shown in this image?\nmodel\nThe image shows a brown cow standing on a sandy beach next to a turquoise ocean. There's a clear blue sky with some white clouds above."],
616616
("cuda", (8, 0)): ["user\nYou are a helpful assistant.\n\nHere is the original image \n\n\n\n and here are some crops to help you see better \n\n\n\n \n\n\n\nWhat is shown in this image?\nmodel\nThe image shows a brown cow standing on a sandy beach next to a turquoise ocean. There's a blue sky with some white clouds in the background"],

tests/models/glm4v/test_modeling_glm4v.py

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,9 @@
2424
is_torch_available,
2525
)
2626
from transformers.testing_utils import (
27+
Expectations,
2728
cleanup,
29+
require_deterministic_for_xpu,
2830
require_flash_attn,
2931
require_torch,
3032
require_torch_gpu,
@@ -413,6 +415,7 @@ def test_small_model_integration_test_with_video(self):
413415
)
414416

415417
@slow
418+
@require_deterministic_for_xpu
416419
def test_small_model_integration_test_expand(self):
417420
model = Glm4vForConditionalGeneration.from_pretrained(
418421
"THUDM/GLM-4.1V-9B-Thinking", dtype="auto", device_map="auto"
@@ -426,14 +429,23 @@ def test_small_model_integration_test_expand(self):
426429

427430
output = model.generate(**inputs, max_new_tokens=30, do_sample=False, num_beams=2, num_return_sequences=2)
428431

429-
EXPECTED_DECODED_TEXT = [
430-
"\nWhat kind of dog is this?\n<think>Got it, let's look at the image. The animal in the picture doesn't look like a dog; it's actually a cat. Specifically",
431-
"\nWhat kind of dog is this?\n<think>Got it, let's look at the image. The animal in the picture doesn't look like a dog; it's actually a cat, specifically"
432-
] # fmt: skip
433-
self.assertEqual(
434-
self.processor.batch_decode(output, skip_special_tokens=True),
435-
EXPECTED_DECODED_TEXT,
432+
# fmt: off
433+
EXPECTED_DECODED_TEXTS = Expectations(
434+
{
435+
436+
(None, None): ["\nWhat kind of dog is this?\n<think>Got it, let's look at the image. The animal in the picture doesn't look like a dog; it's actually a cat. Specifically",
437+
"\nWhat kind of dog is this?\n<think>Got it, let's look at the image. The animal in the picture doesn't look like a dog; it's actually a cat, specifically"
438+
],
439+
("xpu", None): ["\nWhat kind of dog is this?\n<think>Got it, let's look at the image. The animal in the picture is not a dog; it's a cat. Specifically, it looks",
440+
"\nWhat kind of dog is this?\n<think>Got it, let's look at the image. The animal in the picture is not a dog; it's a cat, specifically a Pallas"
441+
],
442+
}
436443
)
444+
# fmt: on
445+
EXPECTED_DECODED_TEXT = EXPECTED_DECODED_TEXTS.get_expectation()
446+
447+
decoded_text = self.processor.batch_decode(output, skip_special_tokens=True)
448+
self.assertEqual(decoded_text, EXPECTED_DECODED_TEXT)
437449

438450
@slow
439451
def test_small_model_integration_test_batch_wo_image(self):

tests/models/mistral3/test_modeling_mistral3.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,7 @@ def test_mistral3_integration_generate_text_only(self):
275275
self.assertEqual(decoded_output, expected_output)
276276

277277
@require_read_token
278+
@require_deterministic_for_xpu
278279
def test_mistral3_integration_generate(self):
279280
processor = AutoProcessor.from_pretrained(self.model_checkpoint)
280281
processor.chat_template = processor.chat_template.replace('strftime_now("%Y-%m-%d")', '"2025-06-20"')
@@ -299,7 +300,7 @@ def test_mistral3_integration_generate(self):
299300

300301
expected_outputs = Expectations(
301302
{
302-
("xpu", 3): "The image features two cats resting on a pink blanket. The cat on the left is a kitten",
303+
("xpu", 3): "The image features two tabby cats lying on a pink surface, which appears to be a cushion or",
303304
("cuda", 8): 'The image features two cats lying on a pink surface, which appears to be a couch or a bed',
304305
("rocm", (9, 4)): "The image features two cats lying on a pink surface, which appears to be a couch or a bed",
305306
("rocm", (9, 5)): "The image features two tabby cats lying on a pink surface, which appears to be a cushion or"

tests/models/mllama/test_modeling_mllama.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -547,7 +547,7 @@ def test_11b_model_integration_generate_text_only(self):
547547
decoded_output = processor.decode(output[0], skip_special_tokens=True)
548548
expected_outputs = Expectations(
549549
{
550-
("xpu", 3): "If I had to write a haiku about my life, I would write:\nLife is a messy tapestry\n Threads of joy and sorrow\nWeft of memories",
550+
("xpu", 3): "If I had to write a haiku about my life, I would write:\nLife is a messy stream\nRipples of joy and pain\nFlowing, ever",
551551
("cuda", 7): "If I had to write a haiku about my life, I would write:\nLife is a messy stream\nRipples of joy and pain\nFlowing, ever",
552552
("cuda", 8): "If I had to write a haiku about my life, I would write:\nLife is a messy stream\nRipples of joy and pain\nFlowing, ever",
553553
}

tests/pipelines/test_pipelines_automatic_speech_recognition.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
from transformers.pipelines.audio_utils import chunk_bytes_iter, ffmpeg_microphone_live
3636
from transformers.pipelines.automatic_speech_recognition import chunk_iter
3737
from transformers.testing_utils import (
38+
Expectations,
3839
compare_pipeline_output_to_hub_spec,
3940
is_pipeline_test,
4041
is_torch_available,
@@ -1443,8 +1444,14 @@ def test_whisper_prompted(self):
14431444
@slow
14441445
def test_whisper_longform(self):
14451446
# fmt: off
1446-
EXPECTED_RESULT = " Folks, if you watch the show, you know, I spent a lot of time right over there. Patiently and astutely scrutinizing the boxwood and mahogany chest set of the day's biggest stories developing the central headline pawns, definitely maneuvering an oso topical night to F6, fainting a classic Sicilian, nade door variation on the news, all the while seeing eight moves deep and patiently marshalling the latest press releases into a fisher's shows in Lip Nitsky attack that culminates in the elegant lethal slow-played, all-passant checkmate that is my nightly monologue. But sometimes, sometimes, folks, I. CHEERING AND APPLAUSE Sometimes I startle away, cubside down in the monkey bars of a condemned playground on a super fun site. Get all hept up on goofballs. Rummage that were discarded tag bag of defective toys. Yank out a fist bowl of disembodied doll limbs, toss them on Saturday, Rusty Cargo, container down by the Wharf, and challenge toothless drifters to the godless bughouse lets of tournament that is my segment. MUSIC Meanwhile!"
1447+
EXPECTED_RESULTS = Expectations(
1448+
{
1449+
(None, None): " Folks, if you watch the show, you know, I spent a lot of time right over there. Patiently and astutely scrutinizing the boxwood and mahogany chest set of the day's biggest stories developing the central headline pawns, definitely maneuvering an oso topical night to F6, fainting a classic Sicilian, nade door variation on the news, all the while seeing eight moves deep and patiently marshalling the latest press releases into a fisher's shows in Lip Nitsky attack that culminates in the elegant lethal slow-played, all-passant checkmate that is my nightly monologue. But sometimes, sometimes, folks, I. CHEERING AND APPLAUSE Sometimes I startle away, cubside down in the monkey bars of a condemned playground on a super fun site. Get all hept up on goofballs. Rummage that were discarded tag bag of defective toys. Yank out a fist bowl of disembodied doll limbs, toss them on Saturday, Rusty Cargo, container down by the Wharf, and challenge toothless drifters to the godless bughouse lets of tournament that is my segment. MUSIC Meanwhile!",
1450+
("xpu", None): " Folks, if you watch the show, you know, I spent a lot of time right over there. Patiently and astutely scrutinizing the boxwood and mahogany chest set of the day's biggest stories developing the central headline pawns, definitely maneuvering an oso topical night to F6, fainting of classics, Sicilian, nade door variation on the news, all the while seeing eight moves deep and patiently marshalling the latest press releases into a Fisher shows in Lip Nitsky attack that culminates in the elegant lethal slow-played, all-passant checkmate that is my nightly monologue. But sometimes, sometimes, folks, I... APPLAUSE Sometimes I... Startle away, upside down on the monkey bars of a condemned playground on a superfund site. Get all heaped up on goofballs, rummaged that would discard a tag bag of defective toys, yank out a fist bowl of disembodied doll limbs, toss them on a stain kid's place mat from a defunct denys, set up a table inside a rusty cargo container down by the Wharf and challenge toothless drifters to the godless bug house blitz of tournament that is my segment.",
1451+
}
1452+
)
14471453
# fmt: on
1454+
EXPECTED_RESULT = EXPECTED_RESULTS.get_expectation()
14481455

14491456
processor = AutoProcessor.from_pretrained("openai/whisper-tiny.en")
14501457
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny.en")

0 commit comments

Comments
 (0)