Skip to content

Commit 6c8ab20

Browse files
authored
Remove tzcnt from audited BMI1 instructions (#1380)
* Remove `tzcnt` from audited BMI1 instructions As it turns out [^1], compilers will emit this instruction in some circumstances even when BMI1 is not technically available on your architecture, since by happy-accident older CPU's decode this instruction in backward-compatible way for non-zero inputs. [^1]: https://stackoverflow.com/questions/61422827/does-x64-support-imply-bmi1-support * Add fix to `generate_instructions_list.jl` (+ re-run!)
1 parent 2a5e711 commit 6c8ab20

File tree

2 files changed

+115
-4
lines changed

2 files changed

+115
-4
lines changed

contrib/generate_instructions_list.jl

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,13 @@ function generate_dict()
5757
end
5858
end
5959
end
60+
if name == "bmi1"
61+
# `tzcnt` can be legally emitted by compilers in some cases even when
62+
# the BMI1 feature is not available, so do not audit it.
63+
#
64+
# see: https://stackoverflow.com/questions/61422827/does-x64-support-imply-bmi1-support
65+
deleteat!(instructions, findfirst(==("tzcnt"), instructions))
66+
end
6067
dict[name] = instructions
6168
end
6269
free(xml)

src/auditor/instructions.json

Lines changed: 108 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,6 @@
200200
"pmulld",
201201
"popcnt",
202202
"ptest",
203-
"rex crc32",
204203
"roundpd",
205204
"roundps",
206205
"roundsd",
@@ -301,6 +300,8 @@
301300
"v4fmaddss",
302301
"v4fnmaddps",
303302
"v4fnmaddss",
303+
"vaddph",
304+
"vaddsh",
304305
"vaesdec",
305306
"vaesdeclast",
306307
"vaesenc",
@@ -319,35 +320,72 @@
319320
"vbroadcasti32x8",
320321
"vbroadcasti64x2",
321322
"vbroadcasti64x4",
323+
"vcmpph",
324+
"vcmpsh",
325+
"vcomish",
322326
"vcompresspd",
323327
"vcompressps",
328+
"vcvtdq2ph",
324329
"vcvtne2ps2bf16",
325330
"vcvtneps2bf16",
331+
"vcvtpd2ph",
326332
"vcvtpd2qq",
327333
"vcvtpd2udq",
328334
"vcvtpd2uqq",
335+
"vcvtph2dq",
336+
"vcvtph2pd",
337+
"vcvtph2psx",
338+
"vcvtph2qq",
339+
"vcvtph2udq",
340+
"vcvtph2uqq",
341+
"vcvtph2uw",
342+
"vcvtph2w",
343+
"vcvtps2phx",
329344
"vcvtps2qq",
330345
"vcvtps2udq",
331346
"vcvtps2uqq",
332347
"vcvtqq2pd",
348+
"vcvtqq2ph",
333349
"vcvtqq2ps",
350+
"vcvtsd2sh",
334351
"vcvtsd2usi",
352+
"vcvtsh2sd",
353+
"vcvtsh2si",
354+
"vcvtsh2ss",
355+
"vcvtsh2usi",
356+
"vcvtsi2sh",
357+
"vcvtss2sh",
335358
"vcvtss2usi",
336359
"vcvttpd2qq",
337360
"vcvttpd2udq",
338361
"vcvttpd2uqq",
362+
"vcvttph2dq",
363+
"vcvttph2qq",
364+
"vcvttph2udq",
365+
"vcvttph2uqq",
366+
"vcvttph2uw",
367+
"vcvttph2w",
339368
"vcvttps2qq",
340369
"vcvttps2udq",
341370
"vcvttps2uqq",
342371
"vcvttsd2usi",
372+
"vcvttsh2si",
373+
"vcvttsh2usi",
343374
"vcvttss2usi",
344375
"vcvtudq2pd",
376+
"vcvtudq2ph",
345377
"vcvtudq2ps",
346378
"vcvtuqq2pd",
379+
"vcvtuqq2ph",
347380
"vcvtuqq2ps",
348381
"vcvtusi2sd",
382+
"vcvtusi2sh",
349383
"vcvtusi2ss",
384+
"vcvtuw2ph",
385+
"vcvtw2ph",
350386
"vdbpsadbw",
387+
"vdivph",
388+
"vdivsh",
351389
"vdpbf16ps",
352390
"vexp2pd",
353391
"vexp2ps",
@@ -361,13 +399,53 @@
361399
"vextracti32x8",
362400
"vextracti64x2",
363401
"vextracti64x4",
402+
"vfcmaddcph",
403+
"vfcmaddcsh",
404+
"vfcmulcph",
405+
"vfcmulcsh",
364406
"vfixupimmpd",
365407
"vfixupimmps",
366408
"vfixupimmsd",
367409
"vfixupimmss",
410+
"vfmadd132ph",
411+
"vfmadd132sh",
412+
"vfmadd213ph",
413+
"vfmadd213sh",
414+
"vfmadd231ph",
415+
"vfmadd231sh",
416+
"vfmaddcph",
417+
"vfmaddcsh",
418+
"vfmaddsub132ph",
419+
"vfmaddsub213ph",
420+
"vfmaddsub231ph",
421+
"vfmsub132ph",
422+
"vfmsub132sh",
423+
"vfmsub213ph",
424+
"vfmsub213sh",
425+
"vfmsub231ph",
426+
"vfmsub231sh",
427+
"vfmsubadd132ph",
428+
"vfmsubadd213ph",
429+
"vfmsubadd231ph",
430+
"vfmulcph",
431+
"vfmulcsh",
432+
"vfnmadd132ph",
433+
"vfnmadd132sh",
434+
"vfnmadd213ph",
435+
"vfnmadd213sh",
436+
"vfnmadd231ph",
437+
"vfnmadd231sh",
438+
"vfnmsub132ph",
439+
"vfnmsub132sh",
440+
"vfnmsub213ph",
441+
"vfnmsub213sh",
442+
"vfnmsub231ph",
443+
"vfnmsub231sh",
368444
"vfpclasspd",
445+
"vfpclassph",
369446
"vfpclassps",
370447
"vfpclasssd",
448+
"vfpclasssh",
371449
"vfpclassss",
372450
"vgatherdpd",
373451
"vgatherdps",
@@ -382,12 +460,16 @@
382460
"vgatherqpd",
383461
"vgatherqps",
384462
"vgetexppd",
463+
"vgetexpph",
385464
"vgetexpps",
386465
"vgetexpsd",
466+
"vgetexpsh",
387467
"vgetexpss",
388468
"vgetmantpd",
469+
"vgetmantph",
389470
"vgetmantps",
390471
"vgetmantsd",
472+
"vgetmantsh",
391473
"vgetmantss",
392474
"vgf2p8affineinvqb",
393475
"vgf2p8affineqb",
@@ -400,12 +482,20 @@
400482
"vinserti32x8",
401483
"vinserti64x2",
402484
"vinserti64x4",
485+
"vmaxph",
486+
"vmaxsh",
487+
"vminph",
488+
"vminsh",
403489
"vmovdqa32",
404490
"vmovdqa64",
405491
"vmovdqu16",
406492
"vmovdqu32",
407493
"vmovdqu64",
408494
"vmovdqu8",
495+
"vmovsh",
496+
"vmovw",
497+
"vmulph",
498+
"vmulsh",
409499
"vp2intersectd",
410500
"vp2intersectq",
411501
"vp4dpwssd",
@@ -557,13 +647,19 @@
557647
"vrcp28ps",
558648
"vrcp28sd",
559649
"vrcp28ss",
650+
"vrcpph",
651+
"vrcpsh",
560652
"vreducepd",
653+
"vreduceph",
561654
"vreduceps",
562655
"vreducesd",
656+
"vreducesh",
563657
"vreducess",
564658
"vrndscalepd",
659+
"vrndscaleph",
565660
"vrndscaleps",
566661
"vrndscalesd",
662+
"vrndscalesh",
567663
"vrndscaless",
568664
"vrsqrt14pd",
569665
"vrsqrt14ps",
@@ -573,9 +669,13 @@
573669
"vrsqrt28ps",
574670
"vrsqrt28sd",
575671
"vrsqrt28ss",
672+
"vrsqrtph",
673+
"vrsqrtsh",
576674
"vscalefpd",
675+
"vscalefph",
577676
"vscalefps",
578677
"vscalefsd",
678+
"vscalefsh",
579679
"vscalefss",
580680
"vscatterdpd",
581681
"vscatterdps",
@@ -592,7 +692,12 @@
592692
"vshuff32x4",
593693
"vshuff64x2",
594694
"vshufi32x4",
595-
"vshufi64x2"
695+
"vshufi64x2",
696+
"vsqrtph",
697+
"vsqrtsh",
698+
"vsubph",
699+
"vsubsh",
700+
"vucomish"
596701
],
597702
"f16c": [
598703
"vcvtph2ps",
@@ -1035,8 +1140,7 @@
10351140
"bextr",
10361141
"blsi",
10371142
"blsmsk",
1038-
"blsr",
1039-
"tzcnt"
1143+
"blsr"
10401144
],
10411145
"adcx": []
10421146
}

0 commit comments

Comments
 (0)