From d3d439dc083d5c7f83a49754230b4a376adf5982 Mon Sep 17 00:00:00 2001 From: sarah <> Date: Sun, 12 Nov 2023 15:07:13 +0100 Subject: [PATCH 1/4] Create 0000-struct-target-feature.md --- text/0000-struct-target-feature.md | 280 +++++++++++++++++++++++++++++ 1 file changed, 280 insertions(+) create mode 100644 text/0000-struct-target-feature.md diff --git a/text/0000-struct-target-feature.md b/text/0000-struct-target-feature.md new file mode 100644 index 00000000000..58712becc20 --- /dev/null +++ b/text/0000-struct-target-feature.md @@ -0,0 +1,280 @@ +# Summary + +[summary]: #summary + +Allow adding `#[target_feature(enable = "...")]` attributes to unit structs, and +enable the corresponding target features to functions taking those structs as +parameters. + +# Motivation + +[motivation]: #motivation + +Currently, the only way to tell the compiler it can assume the availability of +hardware features is by annotating a function with the corresponding +`#[target_feature]` attribute. This requires that the annotated function be +marked as unsafe as the caller must check whether the features are available at +runtime. +This also makes it difficult for library authors to use in certain situations, as +they may not know which features the library user wants to detect, and at what +level the dynamic dispatch should be done. + +Assume we want to implement a library function that multiplies a slice of `f64` +values by `2.0`. + +```rust +pub fn times_two(v: &mut [f64]) { + for v in v { + *v *= 2.0; + } +} +``` + +Generally speaking, during code generation, the compiler will only assume the +availability of globally enabled target features (e.g., `sse2` on `x86-64` +unless additional feature flags are passed to the compiler). + +This means that if the code is run on a machine with more efficient features +such as `avx2`, the function will not be able to make good use of them. + +To improve performance, the library author may decide to add runtime feature +detection to their implementation, choosing subsets of features to detect. + +```rust +#[inline(always)] +fn times_two_generic(v: &mut [f64]) { + for v in v { + *v *= 2.0; + } +} + +#[target_feature(enable = "avx")] +unsafe fn times_two_avx(v: &mut [f64]) { + times_two_generic(v); +} + +#[target_feature(enable = "avx512f")] +unsafe fn times_two_avx512f(v: &mut [f64]) { + times_two_generic(v); +} + +pub fn times_two(v: &mut[f64]) { + if is_x86_feature_detected!("avx512f") { + times_two_avx512f(v); + } else if is_x86_feature_detected!("avx") { + times_two_avx(v); + } else { + times_two_generic(v); + } +} +``` + +This decision, however, comes with a few drawbacks: + +- The runtime dispatch now implies that the code has some additional overhead + to detect the hardware features, which can harm performance for small + slices. +- The addition of more code paths increases binary size. +- The dispatch acts as a barrier that prevents inlining, which can prevent + compiler optimizations at the call-site. +- This requires adding unsafe code to the library, which has a maintenance cost. + +The proposed alternative offers solutions for these issues. + +# Guide-level explanation + +[guide-level-explanation]: #guide-level-explanation + +Suppose we define the following structs: + +```rust +#[target_feature(enable = "avx")] +#[derive(Clone, Copy, Debug)] +pub struct Avx; + +#[target_feature(enable = "avx512f")] +#[derive(Clone, Copy, Debug)] +pub struct Avx512f; +``` + +The `#[target_feature(enable = "avx")]` annotation informs the compiler that +instances of this struct can only be created if the `avx` target feature is +available, and allows it to optimize code based on that assumption. + +Note that this makes the creation of instances of type `Avx` unsafe. + +Now assume that the following methods are defined. + +```rust +#[inline] +pub fn try_new_avx() -> Option { + if is_x86_feature_detected!("avx") { + Some(unsafe { Avx }) + } else { + None + } +} + +#[inline] +pub fn try_new_avx512f() -> Option { + if is_x86_feature_detected!("avxf") { + Some(unsafe { Avx512f }) + } else { + None + } +} +``` + +Then the library code can now be written as + +```rust +pub fn times_two(simd: S, v: &mut [f64]) { + for v in v { + *v *= 2.0; + } +} +``` + +The user can now call this function in this manner. + +```rust +fn main() { + let mut v = [1.0; 1024]; + + if let Some(simd) = try_new_avx512f() { + times_two(simd, &mut v); // 1 + } else if let Some(simd) = try_new_avx() { + times_two(simd, &mut v); // 2 + } else { + times_two((), &mut v); // 3 + } +} +``` + +In the first branch, the compiler instantiates and calls the function +`times_two::`, which has the signature `fn(Avx512f, &mut [f64])`. +Since the function takes as an input parameter `Avx512f`, that means that +calling this function implies that the `avx512f` feature is available, which +allows the compiler to perform optimizations that wouldn't otherwise be +possible (in this case, automatically vectorizing the code with AVX512 +instructions). + +In the second branch, the same logic applies but for the `Avx` struct and the +`avx` feature. + +In the third branch, the called function has the signature `fn((), &mut [f64])`. +None of its parameters have types that were annotated with the +`#[target_feature]` attribute, so the compiler can't assume the availability of +features other than those that are enabled at the global scope. + +Moving the dispatch responsibility to the caller allows more control over how +the dispatch is performed, whether to optimize for code size or performance. + +Additionally, the process no longer requires any unsafe code. + +# Reference-level explanation + +[reference-level-explanation]: #reference-level-explanation + +This RFC proposes that unit structs be allowed to have one or several +`#[target_feature(enable = "...")]` attributes. + +Structs with such annotations are unsafe to construct. Creating an instance of +such a struct has the same safety requirements as calling a function marked with +the same `#[target_feature]` attribute. + +This RFC additionally proposes that functions taking parameters with a type +that has been annotated with a `#[target_feature]`, +also behave as if they have been annotated with the corresponding +`#[target_feature(enable = "...")]`, except that this doesn't impose on them +the requirement of having to be marked `unsafe`. + +# Drawbacks + +[drawbacks]: #drawbacks + +Implicitly annotating the functions with the `#[target_feature]` attribute may +cause them to be uninlined in certain situations, which may pessimize +performance. + +Since the proposed API is opt-in, this has no effect on existing code. + +# Rationale and alternatives + +[rationale-and-alternatives]: #rationale-and-alternatives + +One alternative for automatically enabling target features is determining them +based on the function body. If the compiler can prove that a function `outer` +unconditionally calls a function `inner`, then it could hypotheticall inherit `inner`'s +target features. This would make the target features of `outer` depend on the +contents of its body and may possibly affect its ABI due to enabling target +features based on that. +Our approach on the other hand makes the implicitly enabled target features of +a function knowable from those of its parameters. This avoids the issue of the ABI of the +function's ABI changing without changing its interface, or the ABI of its parameter types. + +An alternative option to automatically enabling target features could be to make them also opt-in +at the level of function declaration. Let us take our previous example: + +```rust +pub fn times_two(simd: S, v: &mut [f64]) { + // ... +} +``` + +This RFC suggests that all of the input parameters of `times_two` are scanned +during monomorphization, and target features are inherited from them +appropriately. The alternative is to explicitly mark which parameters +`times_two` is allowed to inherit target features from. Perhaps through the use +of a second attribute. + +```rust +pub fn times_two(#[inherit_target_feature] simd: S, v: &mut [f64]) { + // ... +} +``` + +It is not clear if there are any advantages to this approach, other than being +more explicit. + +# Unresolved questions + +[unresolved-questions]: #unresolved-questions + +The main unresolved part of this RFC is whether the target features should be +inherited implicitly or explicitly. +The implicit approach could allow for more aggressive optimizations, but the explicit one +also sufficiently covers most use cases. + +# Future possibilities + +[future-possibilities]: #future-possibilities + +We do not propose the following in this RFC, but references to a struct marked +with a `#[target_feature]` attribute, as well as tuples and other structs/tuple structs +containing that struct may also implicitly inherit its `#[target_feature]` +attribute. Unlike the explicitly annotated struct, they remain safe to +construct. This is sound because creating them requires creating an instance of +the target feature type to exist, which guarantees the target features' availability. + +Note: `PhantomData` must not inherit target feature attributes from ``, +as it is always safe to construct, despite acting like it contains `T`. + +The advantage of this extension is that it allows target features to naturally compose. +If a user wants to define a structure that enables both of `avx` and `fma`, then they could +define the structures of `Avx` and `Fma` separately, marked with the appropriate target features. +And then simply pass them together in a tuple or a struct containing both. + +```rust +pub fn times_two(simd: S, v: &mut [f64]) { + for v in v { + *v *= 2.0; + } +} +fn main() { + let mut v = [1.0; 1024]; + if let (Some(avx), Some(fma)) = (try_new_avx(), try_new_fma()) { + times_two((avx, fma), &mut v); + } +} +``` From 98a175fe69e26a242bca0c4e72a176f5c5e1f584 Mon Sep 17 00:00:00 2001 From: sarah <> Date: Sun, 12 Nov 2023 15:09:50 +0100 Subject: [PATCH 2/4] Add 3525 PR number --- ...000-struct-target-feature.md => 3525-struct-target-feature.md} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename text/{0000-struct-target-feature.md => 3525-struct-target-feature.md} (100%) diff --git a/text/0000-struct-target-feature.md b/text/3525-struct-target-feature.md similarity index 100% rename from text/0000-struct-target-feature.md rename to text/3525-struct-target-feature.md From 07c63e5c8aa77c6c788170f58fa5a4d2093df2c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?sarah=20qui=C3=B1ones?= Date: Mon, 21 Oct 2024 05:32:18 +0200 Subject: [PATCH 3/4] update wording to make syntax more explicit, and propose to allow inheriting target features --- text/3525-struct-target-feature.md | 99 ++++++++++++++---------------- 1 file changed, 46 insertions(+), 53 deletions(-) diff --git a/text/3525-struct-target-feature.md b/text/3525-struct-target-feature.md index 58712becc20..d631ce68074 100644 --- a/text/3525-struct-target-feature.md +++ b/text/3525-struct-target-feature.md @@ -128,6 +128,7 @@ pub fn try_new_avx512f() -> Option { Then the library code can now be written as ```rust +#[target_feature(inherit)] pub fn times_two(simd: S, v: &mut [f64]) { for v in v { *v *= 2.0; @@ -176,47 +177,64 @@ Additionally, the process no longer requires any unsafe code. [reference-level-explanation]: #reference-level-explanation -This RFC proposes that unit structs be allowed to have one or several +This RFC proposes that structs and tuple structs be allowed to have one or several `#[target_feature(enable = "...")]` attributes. -Structs with such annotations are unsafe to construct. Creating an instance of +Structs with such annotations are unsafe to construct, except in the body +of a function with the same target features. Creating an instance of such a struct has the same safety requirements as calling a function marked with the same `#[target_feature]` attribute. -This RFC additionally proposes that functions taking parameters with a type -that has been annotated with a `#[target_feature]`, +This RFC additionally proposes that functions annotated with `#[target_feature(inherit)]`, and +taking parameters with a type that has been annotated with a `#[target_feature]`, also behave as if they have been annotated with the corresponding `#[target_feature(enable = "...")]`, except that this doesn't impose on them the requirement of having to be marked `unsafe`. +Structs and tuple structs +containing members with such types can also opt into inheriting its members' `#[target_feature]` +attributes with `#[target_feature(inherit)]`. Unlike the structs annotated, they remain safe to +construct. This is sound because creating them requires creating an instance of +the target feature type to exist, which guarantees the target features' availability. + +Note: `PhantomData` must not inherit target feature attributes from ``, +as it is always safe to construct, despite acting like it contains `T`. + +The advantage of this extension is that it allows target features to naturally compose. +If a user wants to define a structure that enables both of `avx` and `fma`, then they could +define the structures of `Avx` and `Fma` separately, marked with the appropriate target features. +And then simply pass them together in a tuple or a struct containing both. + +```rust +#[target_feature(inherit)] +pub fn times_two(simd: S, v: &mut [f64]) { + for v in v { + *v *= 2.0; + } +} +fn main() { + let mut v = [1.0; 1024]; + if let (Some(avx), Some(fma)) = (try_new_avx(), try_new_fma()) { + times_two((avx, fma), &mut v); + } +} +``` + # Drawbacks [drawbacks]: #drawbacks -Implicitly annotating the functions with the `#[target_feature]` attribute may -cause them to be uninlined in certain situations, which may pessimize -performance. - Since the proposed API is opt-in, this has no effect on existing code. # Rationale and alternatives [rationale-and-alternatives]: #rationale-and-alternatives -One alternative for automatically enabling target features is determining them -based on the function body. If the compiler can prove that a function `outer` -unconditionally calls a function `inner`, then it could hypotheticall inherit `inner`'s -target features. This would make the target features of `outer` depend on the -contents of its body and may possibly affect its ABI due to enabling target -features based on that. -Our approach on the other hand makes the implicitly enabled target features of -a function knowable from those of its parameters. This avoids the issue of the ABI of the -function's ABI changing without changing its interface, or the ABI of its parameter types. - -An alternative option to automatically enabling target features could be to make them also opt-in -at the level of function declaration. Let us take our previous example: +An alternative option to inheriting all target features could be to make them also opt-in +at the parameter level. Let us take our previous example: ```rust +#[target_feature(inherit)] pub fn times_two(simd: S, v: &mut [f64]) { // ... } @@ -229,7 +247,8 @@ appropriately. The alternative is to explicitly mark which parameters of a second attribute. ```rust -pub fn times_two(#[inherit_target_feature] simd: S, v: &mut [f64]) { +#[target_feature(inherit)] +pub fn times_two(#[target_feature] simd: S, v: &mut [f64]) { // ... } ``` @@ -241,40 +260,14 @@ more explicit. [unresolved-questions]: #unresolved-questions -The main unresolved part of this RFC is whether the target features should be -inherited implicitly or explicitly. -The implicit approach could allow for more aggressive optimizations, but the explicit one -also sufficiently covers most use cases. +Should we also allow `#[target_feature(disable = "...")]` to be used with structs? + +Should references implicitly inherit their pointee's target features? This would +potentially impose more strict validity requirements for references of such types +than other types, which may break unsafe generic code that creates references pointing +to uninit data, without dereferencing them. # Future possibilities [future-possibilities]: #future-possibilities -We do not propose the following in this RFC, but references to a struct marked -with a `#[target_feature]` attribute, as well as tuples and other structs/tuple structs -containing that struct may also implicitly inherit its `#[target_feature]` -attribute. Unlike the explicitly annotated struct, they remain safe to -construct. This is sound because creating them requires creating an instance of -the target feature type to exist, which guarantees the target features' availability. - -Note: `PhantomData` must not inherit target feature attributes from ``, -as it is always safe to construct, despite acting like it contains `T`. - -The advantage of this extension is that it allows target features to naturally compose. -If a user wants to define a structure that enables both of `avx` and `fma`, then they could -define the structures of `Avx` and `Fma` separately, marked with the appropriate target features. -And then simply pass them together in a tuple or a struct containing both. - -```rust -pub fn times_two(simd: S, v: &mut [f64]) { - for v in v { - *v *= 2.0; - } -} -fn main() { - let mut v = [1.0; 1024]; - if let (Some(avx), Some(fma)) = (try_new_avx(), try_new_fma()) { - times_two((avx, fma), &mut v); - } -} -``` From 01c44d38f5e22ea79a09aa91e067955891a61dea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?sarah=20qui=C3=B1ones?= Date: Mon, 21 Oct 2024 05:39:11 +0200 Subject: [PATCH 4/4] fix example for inheriting from struct members --- text/3525-struct-target-feature.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/text/3525-struct-target-feature.md b/text/3525-struct-target-feature.md index d631ce68074..5878227d8a6 100644 --- a/text/3525-struct-target-feature.md +++ b/text/3525-struct-target-feature.md @@ -212,10 +212,14 @@ pub fn times_two(simd: S, v: &mut [f64]) { *v *= 2.0; } } + +#[target_feature(inherit)] +struct AvxFma(Avx, Fma); + fn main() { let mut v = [1.0; 1024]; if let (Some(avx), Some(fma)) = (try_new_avx(), try_new_fma()) { - times_two((avx, fma), &mut v); + times_two(AvxFma(avx, fma), &mut v); } } ```