Skip to content

Commit 40fc7df

Browse files
committed
Add runtime detection for APX-F and AVX10
1 parent 99ba556 commit 40fc7df

File tree

2 files changed

+47
-25
lines changed

2 files changed

+47
-25
lines changed

library/std_detect/src/detect/arch/x86.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,12 @@ features! {
233233
/// AMX-TF32 (TensorFloat32 Operations)
234234
@FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_transpose: "amx-transpose";
235235
/// AMX-TRANSPOSE (Matrix Transpose Operations)
236+
@FEATURE: #[unstable(feature = "apx_target_feature", issue = "139284")] apxf: "apxf";
237+
/// APX-F (Advanced Performance Extensions - Foundation)
238+
@FEATURE: #[unstable(feature = "avx10_target_feature", issue = "138843")] avx10_1: "avx10.1";
239+
/// AVX10.1
240+
@FEATURE: #[unstable(feature = "avx10_target_feature", issue = "138843")] avx10_2: "avx10.2";
241+
/// AVX10.2
236242
@FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] f16c: "f16c";
237243
/// F16C (Conversions between IEEE-754 `binary16` and `binary32` formats)
238244
@FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] fma: "fma";

library/std_detect/src/detect/os/x86.rs

Lines changed: 41 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,32 @@ pub(crate) fn detect_features() -> cache::Initializer {
137137
enable(ebx, 2, Feature::widekl);
138138
}
139139

140+
// This detects ABM on AMD CPUs and LZCNT on Intel CPUs.
141+
// On intel CPUs with popcnt, lzcnt implements the
142+
// "missing part" of ABM, so we map both to the same
143+
// internal feature.
144+
//
145+
// The `is_x86_feature_detected!("lzcnt")` macro then
146+
// internally maps to Feature::abm.
147+
enable(extended_proc_info_ecx, 5, Feature::lzcnt);
148+
149+
// As Hygon Dhyana originates from AMD technology and shares most of the architecture with
150+
// AMD's family 17h, but with different CPU Vendor ID("HygonGenuine")/Family series
151+
// number(Family 18h).
152+
//
153+
// For CPUID feature bits, Hygon Dhyana(family 18h) share the same definition with AMD
154+
// family 17h.
155+
//
156+
// Related AMD CPUID specification is https://www.amd.com/system/files/TechDocs/25481.pdf.
157+
// Related Hygon kernel patch can be found on
158+
// http://lkml.kernel.org/r/5ce86123a7b9dad925ac583d88d2f921040e859b.1538583282.git.puwen@hygon.cn
159+
if vendor_id == *b"AuthenticAMD" || vendor_id == *b"HygonGenuine" {
160+
// These features are available on AMD arch CPUs:
161+
enable(extended_proc_info_ecx, 6, Feature::sse4a);
162+
enable(extended_proc_info_ecx, 21, Feature::tbm);
163+
enable(extended_proc_info_ecx, 11, Feature::xop);
164+
}
165+
140166
// `XSAVE` and `AVX` support:
141167
let cpu_xsave = bit::test(proc_info_ecx as usize, 26);
142168
if cpu_xsave {
@@ -161,6 +187,7 @@ pub(crate) fn detect_features() -> cache::Initializer {
161187
// * AVX -> `XCR0.AVX[2]`
162188
// * AVX-512 -> `XCR0.AVX-512[7:5]`.
163189
// * AMX -> `XCR0.AMX[18:17]`
190+
// * APX -> `XCR0.APX[19]`
164191
//
165192
// by setting the corresponding bits of `XCR0` to `1`.
166193
//
@@ -173,6 +200,8 @@ pub(crate) fn detect_features() -> cache::Initializer {
173200
let os_avx512_support = xcr0 & 0xe0 == 0xe0;
174201
// Test `XCR0.AMX[18:17]` with the mask `0b110_0000_0000_0000_0000 == 0x60000`
175202
let os_amx_support = xcr0 & 0x60000 == 0x60000;
203+
// Test `XCR0.APX[19]` with the mask `0b1000_0000_0000_0000_0000 == 0x80000`
204+
let os_apx_support = xcr0 & 0x80000 == 0x80000;
176205

177206
// Only if the OS and the CPU support saving/restoring the AVX
178207
// registers we enable `xsave` support:
@@ -262,33 +291,20 @@ pub(crate) fn detect_features() -> cache::Initializer {
262291
enable(amx_feature_flags_eax, 8, Feature::amx_movrs);
263292
}
264293
}
265-
}
266-
}
267294

268-
// This detects ABM on AMD CPUs and LZCNT on Intel CPUs.
269-
// On intel CPUs with popcnt, lzcnt implements the
270-
// "missing part" of ABM, so we map both to the same
271-
// internal feature.
272-
//
273-
// The `is_x86_feature_detected!("lzcnt")` macro then
274-
// internally maps to Feature::abm.
275-
enable(extended_proc_info_ecx, 5, Feature::lzcnt);
295+
if os_apx_support {
296+
enable(extended_features_edx_leaf_1, 21, Feature::apxf);
297+
}
276298

277-
// As Hygon Dhyana originates from AMD technology and shares most of the architecture with
278-
// AMD's family 17h, but with different CPU Vendor ID("HygonGenuine")/Family series
279-
// number(Family 18h).
280-
//
281-
// For CPUID feature bits, Hygon Dhyana(family 18h) share the same definition with AMD
282-
// family 17h.
283-
//
284-
// Related AMD CPUID specification is https://www.amd.com/system/files/TechDocs/25481.pdf.
285-
// Related Hygon kernel patch can be found on
286-
// http://lkml.kernel.org/r/5ce86123a7b9dad925ac583d88d2f921040e859b.1538583282.git.puwen@hygon.cn
287-
if vendor_id == *b"AuthenticAMD" || vendor_id == *b"HygonGenuine" {
288-
// These features are available on AMD arch CPUs:
289-
enable(extended_proc_info_ecx, 6, Feature::sse4a);
290-
enable(extended_proc_info_ecx, 21, Feature::tbm);
291-
enable(extended_proc_info_ecx, 11, Feature::xop);
299+
let avx10_1 = enable(extended_features_edx_leaf_1, 19, Feature::avx10_1);
300+
if avx10_1 {
301+
let CpuidResult { ebx, .. } = unsafe { __cpuid(0x24) };
302+
let avx10_version = ebx & 0xff;
303+
if avx10_version >= 2 {
304+
value.set(Feature::avx10_2 as u32);
305+
}
306+
}
307+
}
292308
}
293309
}
294310

0 commit comments

Comments
 (0)