From 90dbc318c17f306f4c3666cc54fa81f6a1af347c Mon Sep 17 00:00:00 2001 From: Romaric Jodin Date: Mon, 3 Mar 2025 10:26:58 +0100 Subject: [PATCH] Revert "add fma to native builtins for AMD (#764)" This reverts commit 5ab41528149411866d0ea2254e286e6f2f971a07. Older AMD chips does not have a OpenCL compliant fma native implementation. But clpeak performance will be fixed for those with https://github.com/google/clspv/commit/eddae278f03941c3da99bbc9c89d370188fb6831 --- src/device_properties.cpp | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/src/device_properties.cpp b/src/device_properties.cpp index 45b6a086..c61c59de 100644 --- a/src/device_properties.cpp +++ b/src/device_properties.cpp @@ -133,20 +133,17 @@ struct cvk_device_properties_amd : public cvk_device_properties { cl_uint get_max_cmd_group_size() const override final { return 1; } const std::set get_native_builtins() const override final { return std::set({ - "ceil", "copysign", "exp2", - "fdim", "floor", "fma", - "fmax", "fmin", "frexp", - "half_exp", "half_exp10", "half_exp2", - "half_log", "half_log10", "half_log2", - "half_powr", "half_rsqrt", "half_sqrt", - "isequal", "isfinite", "isgreater", - "isgreaterequal", "isinf", "isless", - "islessequal", "islessgreater", "isnan", - "isnormal", "isnotequal", "isordered", - "isunordered", "ldexp", "log", - "log10", "log2", "mad", - "rint", "round", "rsqrt", - "signbit", "sqrt", "trunc", + "ceil", "copysign", "exp2", "fdim", + "floor", "fmax", "fmin", "frexp", + "half_exp", "half_exp10", "half_exp2", "half_log", + "half_log10", "half_log2", "half_powr", "half_rsqrt", + "half_sqrt", "isequal", "isfinite", "isgreater", + "isgreaterequal", "isinf", "isless", "islessequal", + "islessgreater", "isnan", "isnormal", "isnotequal", + "isordered", "isunordered", "ldexp", "log", + "log10", "log2", "mad", "rint", + "round", "rsqrt", "signbit", "sqrt", + "trunc", }); } std::string get_compile_options() const override final {