=== INFERENCE PIPELINE DIAGNOSTICS ===
Loading model: models--mlx-community--OpenELM-3B/snapshots/06bc1f24cf2423809d6d31ba847db0679268c210
Model loaded.
--- TEST 1: Basic GPU ops ---
[DIAG] matmul(ones, 2*ones) expect=8 shape=(4,4) min=8.000000 max=8.000000 mean=8.000000 |mean|=8.000000
[VALS] matmul result: [8.0000, 8.0000, 8.0000, 8.0000, 8.0000, 8.0000, 8.0000, 8.0000]
[hipBLASLt] first call
[hipBLASLt] M=4 N=4 K=4 ta=0 tb=0 lda=4 ldb=4 ldc=4
[DIAG] bf16 matmul expect=8 shape=(4,4) min=8.000000 max=8.000000 mean=8.000000 |mean|=8.000000
--- TEST 2: quantized_matmul vs dequant ---
[DIAG] q_proj weights not found (w=0 s=0 b=0)
--- TEST 3: RMS Norm ---
[DIAG] rms_norm([1,2,3,4]) shape=(1,1,4) min=0.365148 max=1.460593 mean=0.912871 |mean|=0.912871
[VALS] rms_norm([1,2,3,4]) expect≈[.365,.730,1.095,1.461]: [0.3651, 0.7303, 1.0954, 1.4606]
[DIAG] rms_norm(rand bf16 4096) shape=(1,3,4096) min=-3.515625 max=4.187500 mean=0.001857 |mean|=0.797060
--- TEST 4: RoPE ---
[DIAG] rope(ones, off=0) shape=(1,1,1,128) min=1.000000 max=1.000000 mean=1.000000 |mean|=1.000000
[VALS] rope(ones, off=0): [1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000]
[DIAG] rope(ones, off=100) shape=(1,1,1,128) min=-1.414062 max=1.406250 mean=0.586235 |mean|=0.953492
[VALS] rope(ones, off=100): [1.3672, 1.3438, -1.3672, -1.3516, 0.7305, -1.3828, -1.4062, -0.9219, 1.3594, -1.1719, 1.3750, -1.1094, -0.5898, 1.2109, 1.1406, -0.0040, -0.9805, -1.3906, -1.3516, -1.0781]
--- TEST 5: Full forward pass ---
[DIAG] logits(token=1) shape=(1,1,32000) min=-nan max=-nan mean=0.000000 |mean|=0.000000 NaN=32000
[VALS] logits(token=1): [-nan, -nan, -nan, -nan, -nan, -nan, -nan, -nan, -nan, -nan, -nan, -nan, -nan, -nan, -nan, -nan, -nan, -nan, -nan, -nan, -nan, -nan, -nan, -nan, -nan, -nan, -nan, -nan, -nan, -nan]
[DIAG] Top-10:
token=31999 logit=-nan
token=31998 logit=-nan
token=31997 logit=-nan
token=31996 logit=-nan
token=31995 logit=-nan
token=31994 logit=-nan
token=31993 logit=-nan
token=31992 logit=-nan
token=31991 logit=-nan
token=31990 logit=-nan
[DIAG] logits(step2) shape=(1,1,32000) min=-nan max=-nan mean=0.000000 |mean|=0.000000 NaN=32000
--- TEST 6: dequantize() sanity ---
[DIAG] dequant([0..7],s=1,b=0) shape=(1,8) min=0.000000 max=7.000000 mean=3.500000 |mean|=3.500000
[VALS] dequant expect=[0,1,2,3,4,5,6,7]: [0.0000, 1.0000, 2.0000, 3.0000, 4.0000, 5.0000, 6.0000, 7.0000]
--- TEST 6b: Warmup pass ---
[DIAG] warmup logits shape=(1,1,32000) min=-nan max=-nan mean=0.000000 |mean|=0.000000 NaN=32000
[DIAG] Warmup complete
--- TEST 7: Token-level generation trace ---
[DIAG] encode("What is 2+2?") = [1724, 338, 29871, 29906, 29974, 29906, 29973] (7 tokens)
[DIAG] Token-by-token decode:
token 1724 -> "What"
token 338 -> "is"
token 29871 -> ""
token 29906 -> "2"
token 29974 -> "+"
token 29906 -> "2"
token 29973 -> "?"
[DIAG] No chat template available
--- TEST 8: random::categorical ---
[DIAG] categorical([..., 10, ...]) = 2 (expect 2)
[DIAG] categorical([..., 10, ...]) = 2 (expect 2)
[DIAG] categorical([..., 10, ...]) = 2 (expect 2)
[DIAG] categorical([..., 10, ...]) = 2 (expect 2)
[DIAG] categorical([..., 10, ...]) = 2 (expect 2)
[DIAG] categorical(peak@17, V=151936) = 17 (expect 17)
[DIAG] categorical(peak@17, V=151936) = 17 (expect 17)
[DIAG] categorical(peak@17, V=151936) = 17 (expect 17)
[DIAG] Testing categorical with real model logits...
[hipBLASLt] M=19 N=2304 K=3072 ta=0 tb=1 lda=3072 ldb=3072 ldc=2304
./test-mlx.sh: line 7: 16378 Segmentation fault (core dumped)