From bf764ccd21c0a524a89c4919821542009bd8b150 Mon Sep 17 00:00:00 2001 From: Sam Schweigel Date: Fri, 24 Jan 2025 10:36:38 -0800 Subject: [PATCH 1/7] Add additional fpext/fptrunc tests for Float16/BFloat16 --- test/intrinsics.jl | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/test/intrinsics.jl b/test/intrinsics.jl index bc1838ce2c68b..e5f3816b05fd6 100644 --- a/test/intrinsics.jl +++ b/test/intrinsics.jl @@ -18,9 +18,25 @@ truncbool(u) = reinterpret(UInt8, reinterpret(Bool, u)) @testset "runtime intrinsics" begin @test Core.Intrinsics.add_int(1, 1) == 2 @test Core.Intrinsics.sub_int(1, 1) == 0 + + @test_throws ErrorException("fpext: output bitsize must be >= input bitsize") Core.Intrinsics.fpext(Float32, 1.0) + @test_throws ErrorException("fpext: output bitsize must be >= input bitsize") Core.Intrinsics.fpext(Float32, 1.0) @test_throws ErrorException("fpext: output bitsize must be >= input bitsize") Core.Intrinsics.fpext(Int32, 0x0000_0000_0000_0000) + @test_throws ErrorException("fptrunc: output bitsize must be < input bitsize") Core.Intrinsics.fptrunc(Int32, 0x0000_0000) @test_throws ErrorException("fptrunc: output bitsize must be < input bitsize") Core.Intrinsics.fptrunc(Int64, 0x0000_0000) + @test_throws ErrorException("fptrunc: output bitsize must be < input bitsize") Core.Intrinsics.fptrunc(Float16, Float16(1.0)) + @test_throws ErrorException("fptrunc: output bitsize must be < input bitsize") Core.Intrinsics.fptrunc(Core.BFloat16, Float16(1.0)) + @test_throws ErrorException("fptrunc: output bitsize must be < input bitsize") Core.Intrinsics.fptrunc(Float32, Float16(1.0)) + @test_throws ErrorException("fptrunc: output bitsize must be < input bitsize") Core.Intrinsics.fptrunc(Float32, 1.0f0) + @test_throws ErrorException("fptrunc: output bitsize must be < input bitsize") Core.Intrinsics.fptrunc(Float64, 1.0) + + let bf16_1 = Core.Intrinsics.bitcast(Core.BFloat16, 0x3f80) + @test_throws ErrorException("fptrunc: output bitsize must be < input bitsize") Core.Intrinsics.fptrunc(Core.BFloat16, bf16_1) + @test_throws ErrorException("fptrunc: output bitsize must be < input bitsize") Core.Intrinsics.fptrunc(Float16, bf16_1) + @test_throws ErrorException("fptrunc: output bitsize must be < input bitsize") Core.Intrinsics.fptrunc(Float32, bf16_1) + end + @test_throws ErrorException("ZExt: output bitsize must be > input bitsize") Core.Intrinsics.zext_int(Int8, 0x00) @test_throws ErrorException("SExt: output bitsize must be > input bitsize") Core.Intrinsics.sext_int(Int8, 0x00) @test_throws ErrorException("ZExt: output bitsize must be > input bitsize") Core.Intrinsics.zext_int(Int8, 0x0000) @@ -220,8 +236,7 @@ end @test_intrinsic Core.Intrinsics.abs_float Float16(-3.3) Float16(3.3) @test_intrinsic Core.Intrinsics.neg_float Float16(3.3) Float16(-3.3) # See - #broken @test_intrinsic Core.Intrinsics.fpext Float16 Float16(3.3) Float16(3.3) - @test_broken Core.Intrinsics.fpext(Float16, Float16(3.3)) === Float16(3.3) + @test_intrinsic Core.Intrinsics.fpext Float16 Float16(3.3) Float16(3.3) @test_intrinsic Core.Intrinsics.fpext Float32 Float16(3.3) 3.3007812f0 @test_intrinsic Core.Intrinsics.fpext Float64 Float16(3.3) 3.30078125 @test_intrinsic Core.Intrinsics.fptrunc Float16 Float32(3.3) Float16(3.3) From d4ad742aa3d6beb8a9dcd153f298a18bfa18b20b Mon Sep 17 00:00:00 2001 From: Sam Schweigel Date: Fri, 24 Jan 2025 13:49:15 -0800 Subject: [PATCH 2/7] Enforce bitsize restrictions on fptrunc/fpext (fixes #57130) --- src/runtime_intrinsics.c | 149 +++++++++++++++++++++++++-------------- 1 file changed, 97 insertions(+), 52 deletions(-) diff --git a/src/runtime_intrinsics.c b/src/runtime_intrinsics.c index 49d510cc48c34..3bb1efba039a8 100644 --- a/src/runtime_intrinsics.c +++ b/src/runtime_intrinsics.c @@ -761,33 +761,25 @@ static inline void name(unsigned osize, jl_value_t *ty, void *pa, void *pr) JL_N OP(ty, (c_type*)pr, a); \ } -#define un_fintrinsic_half(OP, name) \ -static inline void name(unsigned osize, jl_value_t *ty, void *pa, void *pr) JL_NOTSAFEPOINT \ -{ \ - uint16_t a = *(uint16_t*)pa; \ - float A = half_to_float(a); \ - if (osize == 16) { \ - float R; \ - OP(ty, &R, A); \ - *(uint16_t*)pr = float_to_half(R); \ - } else { \ - OP(ty, (uint16_t*)pr, A); \ - } \ -} +#define un_fintrinsic_half(OP, name) \ + static inline void name(unsigned osize, jl_value_t *ty, void *pa, void *pr) \ + JL_NOTSAFEPOINT \ + { \ + uint16_t a = *(uint16_t *)pa; \ + float R, A = half_to_float(a); \ + OP(ty, &R, A); \ + *(uint16_t *)pr = float_to_half(R); \ + } -#define un_fintrinsic_bfloat(OP, name) \ -static inline void name(unsigned osize, jl_value_t *ty, void *pa, void *pr) JL_NOTSAFEPOINT \ -{ \ - uint16_t a = *(uint16_t*)pa; \ - float A = bfloat_to_float(a); \ - if (osize == 16) { \ - float R; \ - OP(ty, &R, A); \ - *(uint16_t*)pr = float_to_bfloat(R); \ - } else { \ - OP(ty, (uint16_t*)pr, A); \ - } \ -} +#define un_fintrinsic_bfloat(OP, name) \ + static inline void name(unsigned osize, jl_value_t *ty, void *pa, void *pr) \ + JL_NOTSAFEPOINT \ + { \ + uint16_t a = *(uint16_t *)pa; \ + float R, A = bfloat_to_float(a); \ + OP(ty, &R, A); \ + *(uint16_t *)pr = float_to_bfloat(R); \ + } // float or integer inputs // OP::Function macro(inputa, inputb) @@ -1629,32 +1621,85 @@ cvt_iintrinsic(LLVMUItoFP, uitofp) cvt_iintrinsic(LLVMFPtoSI, fptosi) cvt_iintrinsic(LLVMFPtoUI, fptoui) -#define fptrunc(tr, pr, a) \ - if (!(osize < 8 * sizeof(a))) \ - jl_error("fptrunc: output bitsize must be < input bitsize"); \ - else if (osize == 16) { \ - if ((jl_datatype_t*)tr == jl_float16_type) \ - *(uint16_t*)pr = float_to_half(a); \ - else /*if ((jl_datatype_t*)tr == jl_bfloat16_type)*/ \ - *(uint16_t*)pr = float_to_bfloat(a); \ - } \ - else if (osize == 32) \ - *(float*)pr = a; \ - else if (osize == 64) \ - *(double*)pr = a; \ - else \ - jl_error("fptrunc: runtime floating point intrinsics are not implemented for bit sizes other than 16, 32 and 64"); -#define fpext(tr, pr, a) \ - if (!(osize >= 8 * sizeof(a))) \ - jl_error("fpext: output bitsize must be >= input bitsize"); \ - if (osize == 32) \ - *(float*)pr = a; \ - else if (osize == 64) \ - *(double*)pr = a; \ - else \ - jl_error("fpext: runtime floating point intrinsics are not implemented for bit sizes other than 32 and 64"); -un_fintrinsic_withtype(fptrunc,fptrunc) -un_fintrinsic_withtype(fpext,fpext) +#define fintrinsic_read_float16(p) half_to_float(*(uint16_t *)p) +#define fintrinsic_read_bfloat16(p) bfloat_to_float(*(uint16_t *)p) +#define fintrinsic_read_float32(p) *(float *)p +#define fintrinsic_read_float64(p) *(double *)p + +#define fintrinsic_write_float16(p, x) *(uint16_t *)p = float_to_half(x) +#define fintrinsic_write_bfloat16(p, x) *(uint16_t *)p = float_to_bfloat(x) +#define fintrinsic_write_float32(p, x) *(float *)p = x +#define fintrinsic_write_float64(p, x) *(double *)p = x + +/* + * aty: Type of value argument (input) + * pa: Pointer to value argument data + * ty: Type argument (output) + * pr: Pointer to result data + */ + +static inline void fptrunc(jl_datatype_t *aty, void *pa, jl_datatype_t *ty, void *pr) +{ + unsigned isize = jl_datatype_size(aty), osize = jl_datatype_size(ty); + if (!(osize < isize)) { + jl_error("fptrunc: output bitsize must be < input bitsize"); + return; + } + +#define fptrunc_convert(in, out) \ + else if (aty == jl_##in##_type && ty == jl_##out##_type) \ + fintrinsic_write_##out(pr, fintrinsic_read_##in(pa)) + + if (0) + ; + fptrunc_convert(float32, float16); + fptrunc_convert(float64, float16); + fptrunc_convert(float32, bfloat16); + fptrunc_convert(float64, bfloat16); + fptrunc_convert(float64, float32); + else + jl_error("fptrunc: runtime floating point intrinsics are not implemented for bit sizes other than 16, 32 and 64"); +#undef fptrunc_convert +} + +static inline void fpext(jl_datatype_t *aty, void *pa, jl_datatype_t *ty, void *pr) +{ + unsigned isize = jl_datatype_size(aty), osize = jl_datatype_size(ty); + if (!(osize >= isize)) { + jl_error("fpext: output bitsize must be >= input bitsize"); + return; + } + +#define fpext_convert(in, out) \ + else if (aty == jl_##in##_type && ty == jl_##out##_type) \ + fintrinsic_write_##out(pr, fintrinsic_read_##in(pa)) + + if (0) + ; + + fpext_convert(float16, float16); + fpext_convert(float16, bfloat16); + fpext_convert(float16, float32); + fpext_convert(float16, float64); + + fpext_convert(bfloat16, float16); + fpext_convert(bfloat16, bfloat16); + fpext_convert(bfloat16, float32); + fpext_convert(bfloat16, float64); + + fpext_convert(float32, float32); + fpext_convert(float32, float64); + + fpext_convert(float64, float64); + + else + jl_error("fptrunc: runtime floating point intrinsics are not implemented for bit sizes other than 16, 32 and 64"); +#undef fpext_convert +} + +cvt_iintrinsic(fptrunc, fptrunc) +cvt_iintrinsic(fpext, fpext) + // checked arithmetic /** From 9f2a30dc4042fd8f3701de40d232c1b84bde179d Mon Sep 17 00:00:00 2001 From: Sam Schweigel Date: Tue, 28 Jan 2025 15:21:57 -0800 Subject: [PATCH 3/7] Prohibit fpext with input = output bitsize to match LLVM We also throw an exception when using fpext/fptrunc on integer types, because with the addition of BFloat16, there is no unambiguous floating point format with which to interpret the input when it is 16 bits wide. --- src/runtime_intrinsics.c | 15 ++------------- test/intrinsics.jl | 8 ++------ 2 files changed, 4 insertions(+), 19 deletions(-) diff --git a/src/runtime_intrinsics.c b/src/runtime_intrinsics.c index 3bb1efba039a8..8aba0332a33ee 100644 --- a/src/runtime_intrinsics.c +++ b/src/runtime_intrinsics.c @@ -1665,8 +1665,8 @@ static inline void fptrunc(jl_datatype_t *aty, void *pa, jl_datatype_t *ty, void static inline void fpext(jl_datatype_t *aty, void *pa, jl_datatype_t *ty, void *pr) { unsigned isize = jl_datatype_size(aty), osize = jl_datatype_size(ty); - if (!(osize >= isize)) { - jl_error("fpext: output bitsize must be >= input bitsize"); + if (!(osize > isize)) { + jl_error("fpext: output bitsize must be > input bitsize"); return; } @@ -1676,22 +1676,11 @@ static inline void fpext(jl_datatype_t *aty, void *pa, jl_datatype_t *ty, void * if (0) ; - - fpext_convert(float16, float16); - fpext_convert(float16, bfloat16); fpext_convert(float16, float32); fpext_convert(float16, float64); - - fpext_convert(bfloat16, float16); - fpext_convert(bfloat16, bfloat16); fpext_convert(bfloat16, float32); fpext_convert(bfloat16, float64); - - fpext_convert(float32, float32); fpext_convert(float32, float64); - - fpext_convert(float64, float64); - else jl_error("fptrunc: runtime floating point intrinsics are not implemented for bit sizes other than 16, 32 and 64"); #undef fpext_convert diff --git a/test/intrinsics.jl b/test/intrinsics.jl index e5f3816b05fd6..b215f1cba0df8 100644 --- a/test/intrinsics.jl +++ b/test/intrinsics.jl @@ -19,9 +19,8 @@ truncbool(u) = reinterpret(UInt8, reinterpret(Bool, u)) @test Core.Intrinsics.add_int(1, 1) == 2 @test Core.Intrinsics.sub_int(1, 1) == 0 - @test_throws ErrorException("fpext: output bitsize must be >= input bitsize") Core.Intrinsics.fpext(Float32, 1.0) - @test_throws ErrorException("fpext: output bitsize must be >= input bitsize") Core.Intrinsics.fpext(Float32, 1.0) - @test_throws ErrorException("fpext: output bitsize must be >= input bitsize") Core.Intrinsics.fpext(Int32, 0x0000_0000_0000_0000) + @test_throws ErrorException("fpext: output bitsize must be > input bitsize") Core.Intrinsics.fpext(Float32, 1.0) + @test_throws ErrorException("fpext: output bitsize must be > input bitsize") Core.Intrinsics.fpext(Float32, 1.0) @test_throws ErrorException("fptrunc: output bitsize must be < input bitsize") Core.Intrinsics.fptrunc(Int32, 0x0000_0000) @test_throws ErrorException("fptrunc: output bitsize must be < input bitsize") Core.Intrinsics.fptrunc(Int64, 0x0000_0000) @@ -167,7 +166,6 @@ end # unary @test_intrinsic Core.Intrinsics.abs_float Float64(-3.3) Float64(3.3) @test_intrinsic Core.Intrinsics.neg_float Float64(3.3) Float64(-3.3) - @test_intrinsic Core.Intrinsics.fpext Float64 Float64(3.3) Float64(3.3) # binary @test_intrinsic Core.Intrinsics.add_float Float64(3.3) Float64(2) Float64(5.3) @@ -200,7 +198,6 @@ end # unary @test_intrinsic Core.Intrinsics.abs_float Float32(-3.3) Float32(3.3) @test_intrinsic Core.Intrinsics.neg_float Float32(3.3) Float32(-3.3) - @test_intrinsic Core.Intrinsics.fpext Float32 Float32(3.3) Float32(3.3) @test_intrinsic Core.Intrinsics.fpext Float64 Float32(3.3) 3.299999952316284 @test_intrinsic Core.Intrinsics.fptrunc Float32 Float64(3.3) Float32(3.3) @@ -236,7 +233,6 @@ end @test_intrinsic Core.Intrinsics.abs_float Float16(-3.3) Float16(3.3) @test_intrinsic Core.Intrinsics.neg_float Float16(3.3) Float16(-3.3) # See - @test_intrinsic Core.Intrinsics.fpext Float16 Float16(3.3) Float16(3.3) @test_intrinsic Core.Intrinsics.fpext Float32 Float16(3.3) 3.3007812f0 @test_intrinsic Core.Intrinsics.fpext Float64 Float16(3.3) 3.30078125 @test_intrinsic Core.Intrinsics.fptrunc Float16 Float32(3.3) Float16(3.3) From 31ecec5983098633fc1bd6be102b457f8f4f9d47 Mon Sep 17 00:00:00 2001 From: Sam Schweigel Date: Wed, 29 Jan 2025 16:14:02 -0800 Subject: [PATCH 4/7] Update effect inference, codegen for new fpext/fptrunc behaviour BFloat16 tests that trigger codegen are disabled for now, pending an LLVM fix. --- Compiler/src/tfuncs.jl | 14 +++++++++++ Compiler/test/effects.jl | 11 +++++++++ src/intrinsics.cpp | 23 +++++++++++------ test/intrinsics.jl | 53 +++++++++++++++++++++++++++++++++++----- 4 files changed, 87 insertions(+), 14 deletions(-) diff --git a/Compiler/src/tfuncs.jl b/Compiler/src/tfuncs.jl index 74c8026ca0cf5..ef3c524275130 100644 --- a/Compiler/src/tfuncs.jl +++ b/Compiler/src/tfuncs.jl @@ -2454,6 +2454,9 @@ const _SPECIAL_BUILTINS = Any[ Core._apply_iterate, ] +# Types compatible with fpext/fptrunc +const _FLOAT_TYPES = Any[Core.BFloat16, Float16, Float32, Float64] + function isdefined_effects(𝕃::AbstractLattice, argtypes::Vector{Any}) # consistent if the first arg is immutable na = length(argtypes) @@ -2867,6 +2870,17 @@ function intrinsic_exct(𝕃::AbstractLattice, f::IntrinsicFunction, argtypes::V if !(isprimitivetype(ty) && isprimitivetype(xty)) return ErrorException end + + # fpext and fptrunc have further restrictions on the allowed types. + if f === Intrinsics.fpext && + !(ty in _FLOAT_TYPES && xty in _FLOAT_TYPES && Core.sizeof(ty) > Core.sizeof(xty)) + return ErrorException + end + if f === Intrinsics.fptrunc && + !(ty in _FLOAT_TYPES && xty in _FLOAT_TYPES && Core.sizeof(ty) < Core.sizeof(xty)) + return ErrorException + end + return Union{} end diff --git a/Compiler/test/effects.jl b/Compiler/test/effects.jl index a7a1d18159137..b8a841b6b74b7 100644 --- a/Compiler/test/effects.jl +++ b/Compiler/test/effects.jl @@ -1384,3 +1384,14 @@ end |> Compiler.is_nothrow @test Base.infer_effects() do @ccall unsafecall()::Cvoid end == Compiler.EFFECTS_UNKNOWN + +# fpext +@test Compiler.intrinsic_nothrow(Core.Intrinsics.fpext, Any[Type{Float32}, Float16]) +@test Compiler.intrinsic_nothrow(Core.Intrinsics.fpext, Any[Type{Float64}, Float16]) +@test Compiler.intrinsic_nothrow(Core.Intrinsics.fpext, Any[Type{Float64}, Float32]) +@test !Compiler.intrinsic_nothrow(Core.Intrinsics.fpext, Any[Type{Float16}, Float16]) +@test !Compiler.intrinsic_nothrow(Core.Intrinsics.fpext, Any[Type{Float16}, Float32]) +@test !Compiler.intrinsic_nothrow(Core.Intrinsics.fpext, Any[Type{Float32}, Float32]) +@test !Compiler.intrinsic_nothrow(Core.Intrinsics.fpext, Any[Type{Float32}, Float64]) +@test !Compiler.intrinsic_nothrow(Core.Intrinsics.fpext, Any[Type{Int32}, Float16]) +@test !Compiler.intrinsic_nothrow(Core.Intrinsics.fpext, Any[Type{Float32}, Int16]) diff --git a/src/intrinsics.cpp b/src/intrinsics.cpp index 7b5aa7c397129..e4dc2459e8db6 100644 --- a/src/intrinsics.cpp +++ b/src/intrinsics.cpp @@ -672,16 +672,23 @@ static jl_cgval_t generic_cast( uint32_t nb = jl_datatype_size(jlto); Type *to = bitstype_to_llvm((jl_value_t*)jlto, ctx.builder.getContext(), true); Type *vt = bitstype_to_llvm(v.typ, ctx.builder.getContext(), true); - if (toint) - to = INTT(to, DL); - else - to = FLOATT(to); - if (fromint) - vt = INTT(vt, DL); - else - vt = FLOATT(vt); + + // fptrunc fpext depend on the specific floating point format to work + // correctly, and so do not pun their argument types. + if (!(f == fpext || f == fptrunc)) { + if (toint) + to = INTT(to, DL); + else + to = FLOATT(to); + if (fromint) + vt = INTT(vt, DL); + else + vt = FLOATT(vt); + } + if (!to || !vt) return emit_runtime_call(ctx, f, argv, 2); + Value *from = emit_unbox(ctx, vt, v, v.typ); if (!CastInst::castIsValid(Op, from, to)) return emit_runtime_call(ctx, f, argv, 2); diff --git a/test/intrinsics.jl b/test/intrinsics.jl index b215f1cba0df8..0c6792fa8ac22 100644 --- a/test/intrinsics.jl +++ b/test/intrinsics.jl @@ -102,16 +102,57 @@ function compiled_conv(::Type{T}, x) where T t = Core.Intrinsics.trunc_int(T, x) z = Core.Intrinsics.zext_int(typeof(x), t) s = Core.Intrinsics.sext_int(typeof(x), t) - fpt = Core.Intrinsics.fptrunc(T, x) - fpe = Core.Intrinsics.fpext(typeof(x), fpt) - return (t, z, s, fpt, fpe) + return (t, z, s) end @test compiled_conv(UInt32, Int64(0x8000_0000)) == - (0x80000000, Int64(0x80000000), -Int64(0x80000000), 0x00000000, 0) + (0x80000000, Int64(0x80000000), -Int64(0x80000000)) @test compiled_conv(UInt32, UInt64(0xC000_BA98_8765_4321)) == - (0x87654321, 0x0000000087654321, 0xffffffff87654321, 0xc005d4c4, 0xc000ba9880000000) + (0x87654321, 0x0000000087654321, 0xffffffff87654321) @test_throws ErrorException compiled_conv(Bool, im) +function compiled_fptrunc(::Type{T}, x) where T + return Core.Intrinsics.fptrunc(T, x) + +end +# 1.234 +# 0 01111111 00111011111001110110110 +# float32 0 01111111 00111011111001110110110 +# float16 0 01111 0011101111 (truncated/rtz) +# float16 0 01111 0011110000 (round-to-nearest) +# bfloat16 0 01111111 0011110 (round-to-nearest) +@test compiled_fptrunc(Float16, 1.234) === reinterpret(Float16, 0b0_01111_0011110000) +# On arm64, LLVM gives an assertion failure when compiling this: +# LLVM ERROR: Cannot select: 0x106c8e570: bf16 = fp_round 0x106c8df50, TargetConstant:i64<0>, intrinsics.jl:114 +# 0x106c8df50: f64,ch = CopyFromReg 0x104545960, Register:f64 %1 +# 0x106c8dee0: f64 = Register %1 +# 0x106c8e3b0: i64 = TargetConstant<0> +# In function: julia_compiled_fptrunc_3480 +# @test compiled_fptrunc(Core.BFloat16, 1.234) === reinterpret(Core.BFloat16, 0b0_01111111_0011110) +@test compiled_fptrunc(Float32, 1.234) === 1.234f0 +@test_throws ErrorException compiled_fptrunc(Float64, 1.234f0) +@test_throws ErrorException compiled_fptrunc(Int32, 1.234) +@test_throws ErrorException compiled_fptrunc(Float32, 1234) + +function compiled_fpext(::Type{T}, x) where T + return Core.Intrinsics.fpext(T, x) +end +# 1.234 +# float16 0 01111 0011110000 +# 0 01111111 00111100000000000000000 = 1.234375 + +# 1.234 +# float32 0 01111111 00111011111001110110110 +# float64 0 01111111111 0011101111100111011011000000000000000000000000000000 +# 3be76c +@test compiled_fpext(Float32, reinterpret(Float16, 0b0_01111_0011110000)) === 1.234375f0 +@test compiled_fpext(Float64, reinterpret(Float16, 0b0_01111_0011110000)) === 1.234375 +@test compiled_fpext(Float64, 1.234f0) === 0x1.3be76cp0 +@test_throws ErrorException compiled_fpext(Float16, Float16(1.0)) +@test_throws ErrorException compiled_fpext(Float16, 1.0f0) +@test_throws ErrorException compiled_fpext(Float32, 1.0f0) +@test_throws ErrorException compiled_fpext(Float32, 1.0) +@test_throws ErrorException compiled_fpext(Float64, 1.0) + let f = Core.Intrinsics.ashr_int @test f(Int8(-17), 1) == -9 @test f(Int32(-1), 33) == -1 @@ -158,7 +199,7 @@ macro test_intrinsic(intr, args...) $intr($(inputs...)) end @test f() === Base.invokelatest($intr, $(inputs...)) - @test f() == $output + @test f() === $output end end From d604e8e03b4954d85660d44773b39684544df749 Mon Sep 17 00:00:00 2001 From: Sam Schweigel Date: Thu, 30 Jan 2025 12:36:57 -0800 Subject: [PATCH 5/7] Add tests for fptrunc on NaNs, fix float_to_half NaN handling double_to_half, float/double_to_bfloat TODO --- src/runtime_intrinsics.c | 7 +++-- test/intrinsics.jl | 56 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+), 2 deletions(-) diff --git a/src/runtime_intrinsics.c b/src/runtime_intrinsics.c index 8aba0332a33ee..f5b281f9e92ed 100644 --- a/src/runtime_intrinsics.c +++ b/src/runtime_intrinsics.c @@ -161,8 +161,11 @@ static inline uint16_t float_to_half(float param) JL_NOTSAFEPOINT uint32_t f; memcpy(&f, ¶m, sizeof(float)); if (isnan(param)) { - uint32_t t = 0x8000 ^ (0x8000 & ((uint16_t)(f >> 0x10))); - return t ^ ((uint16_t)(f >> 0xd)); + // Match the behaviour of arm64's fcvt or x86's vcvtps2ph by quieting + // all NaNs (avoids creating infinities), preserving the sign, and using + // the upper bits of the payload. + // sign exp quiet payload + return (f>>16 & 0x8000) | 0x7c00 | 0x0200 | (f>>13 & 0x03ff); } int i = ((f & ~0x007fffff) >> 23); uint8_t sh = shifttable[i]; diff --git a/test/intrinsics.jl b/test/intrinsics.jl index 0c6792fa8ac22..e57eca96bf7d9 100644 --- a/test/intrinsics.jl +++ b/test/intrinsics.jl @@ -203,6 +203,18 @@ macro test_intrinsic(intr, args...) end end +macro test_intrinsic_pred(intr, args...) + p = args[end] + inputs = args[1:end-1] + quote + function f() + $intr($(inputs...)) + end + @test $(p)(Base.invokelatest($intr, $(inputs...))) + @test $(p)(f()) + end +end + @testset "Float64 intrinsics" begin # unary @test_intrinsic Core.Intrinsics.abs_float Float64(-3.3) Float64(3.3) @@ -269,6 +281,19 @@ end @test_intrinsic Core.Intrinsics.fptoui UInt Float32(3.3) UInt(3) end +function f16(sign, exp, sig) + x = (sign&1)<<15 | (exp&((1<<5)-1))<<10 | sig&((1<<10)-1) + return reinterpret(Float16, UInt16(x)) +end +function f32(sign, exp, sig) + x = (sign&1)<<31 | (exp&((1<<8)-1))<<23 | sig&((1<<23)-1) + return reinterpret(Float32, UInt32(x)) +end +function f64(sign, exp, sig) + x = (sign&1)<<31 | (exp&((1<<11)-1))<<52 | sig&((1<<52)-1) + return reinterpret(Float64, UInt64(x)) +end + @testset "Float16 intrinsics" begin # unary @test_intrinsic Core.Intrinsics.abs_float Float16(-3.3) Float16(3.3) @@ -279,6 +304,37 @@ end @test_intrinsic Core.Intrinsics.fptrunc Float16 Float32(3.3) Float16(3.3) @test_intrinsic Core.Intrinsics.fptrunc Float16 Float64(3.3) Float16(3.3) + # float_to_half/bfloat_to_float special cases + @test_intrinsic Core.Intrinsics.fptrunc Float16 Inf32 Inf16 + @test_intrinsic Core.Intrinsics.fptrunc Float16 -Inf32 -Inf16 + @test_intrinsic Core.Intrinsics.fptrunc Float16 Inf64 Inf16 + @test_intrinsic Core.Intrinsics.fptrunc Float16 -Inf64 -Inf16 + + # LLVM gives us three things that may happen to NaNs in an fptrunc on + # "normal" platforms (x86, ARM): + # - Return a canonical NaN (quiet, all-zero payload) + # - Copy high bits of payload to output, and: + # - Set the quiet bit + # - Leave the quiet bit as-is. This option isn't possible if doing so + # would result in an infinity (all-zero payload and quiet bit clear) + # + # We'll just test a NaN is returned at all. + # + # Refer to #49353 and https://llvm.org/docs/LangRef.html#floatnan + + # Canonical NaN + @test_intrinsic_pred Core.Intrinsics.fptrunc Float16 NaN32 isnan + @test_intrinsic_pred Core.Intrinsics.fptrunc Float16 NaN isnan + # Quiet NaN + @test_intrinsic_pred Core.Intrinsics.fptrunc Float16 f32(0, 0xff, 1<<22 | 1<<13) isnan + @test_intrinsic_pred Core.Intrinsics.fptrunc Float16 f64(0, 0x7ff, 1<<51 | 1<<42) isnan + # Signalling NaN that can be propagated to Float16 + @test_intrinsic_pred Core.Intrinsics.fptrunc Float16 f32(0, 0xff, 1<<13) isnan + @test_intrinsic_pred Core.Intrinsics.fptrunc Float16 f64(0, 0x7ff, 1<<42) isnan + # Signalling NaN that cannot be propagated to Float16 + @test_intrinsic_pred Core.Intrinsics.fptrunc Float16 f32(0, 0xff, 1) isnan + @test_intrinsic_pred Core.Intrinsics.fptrunc Float16 f64(0, 0x7ff, 1) isnan + # binary @test_intrinsic Core.Intrinsics.add_float Float16(3.3) Float16(2) Float16(5.3) @test_intrinsic Core.Intrinsics.sub_float Float16(3.3) Float16(2) Float16(1.301) From e2d3cd987091e7322f0a2c0c2a58878a5a5f5090 Mon Sep 17 00:00:00 2001 From: Sam Schweigel <33556084+xal-0@users.noreply.github.com> Date: Fri, 31 Jan 2025 12:17:20 -0800 Subject: [PATCH 6/7] Rename _FLOAT_TYPES to CORE_FLOAT_TYPES, make it a Union{...} Co-authored-by: Jameson Nash --- Compiler/src/tfuncs.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Compiler/src/tfuncs.jl b/Compiler/src/tfuncs.jl index ef3c524275130..50b88bb0222ce 100644 --- a/Compiler/src/tfuncs.jl +++ b/Compiler/src/tfuncs.jl @@ -2455,7 +2455,7 @@ const _SPECIAL_BUILTINS = Any[ ] # Types compatible with fpext/fptrunc -const _FLOAT_TYPES = Any[Core.BFloat16, Float16, Float32, Float64] +const CORE_FLOAT_TYPES = Union{Core.BFloat16, Float16, Float32, Float64} function isdefined_effects(𝕃::AbstractLattice, argtypes::Vector{Any}) # consistent if the first arg is immutable @@ -2873,11 +2873,11 @@ function intrinsic_exct(𝕃::AbstractLattice, f::IntrinsicFunction, argtypes::V # fpext and fptrunc have further restrictions on the allowed types. if f === Intrinsics.fpext && - !(ty in _FLOAT_TYPES && xty in _FLOAT_TYPES && Core.sizeof(ty) > Core.sizeof(xty)) + !(ty <: CORE_FLOAT_TYPES && xty <: CORE_FLOAT_TYPES && Core.sizeof(ty) > Core.sizeof(xty)) return ErrorException end if f === Intrinsics.fptrunc && - !(ty in _FLOAT_TYPES && xty in _FLOAT_TYPES && Core.sizeof(ty) < Core.sizeof(xty)) + !(ty <: CORE_FLOAT_TYPES && xty <: CORE_FLOAT_TYPES && Core.sizeof(ty) < Core.sizeof(xty)) return ErrorException end From fa82f94ec2b72e954512b761376b494761ae7a1c Mon Sep 17 00:00:00 2001 From: Sam Schweigel Date: Mon, 3 Feb 2025 09:58:41 -0800 Subject: [PATCH 7/7] Fix test/intrinsics.jl when Int = Int32 --- test/intrinsics.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/intrinsics.jl b/test/intrinsics.jl index e57eca96bf7d9..c3e9bb1680d48 100644 --- a/test/intrinsics.jl +++ b/test/intrinsics.jl @@ -290,7 +290,7 @@ function f32(sign, exp, sig) return reinterpret(Float32, UInt32(x)) end function f64(sign, exp, sig) - x = (sign&1)<<31 | (exp&((1<<11)-1))<<52 | sig&((1<<52)-1) + x = Int64(sign&1)<<31 | Int64(exp&((1<<11)-1))<<52 | sig&((Int64(1)<<52)-1) return reinterpret(Float64, UInt64(x)) end @@ -327,10 +327,10 @@ end @test_intrinsic_pred Core.Intrinsics.fptrunc Float16 NaN isnan # Quiet NaN @test_intrinsic_pred Core.Intrinsics.fptrunc Float16 f32(0, 0xff, 1<<22 | 1<<13) isnan - @test_intrinsic_pred Core.Intrinsics.fptrunc Float16 f64(0, 0x7ff, 1<<51 | 1<<42) isnan + @test_intrinsic_pred Core.Intrinsics.fptrunc Float16 f64(0, 0x7ff, Int64(1)<<51 | Int64(1)<<42) isnan # Signalling NaN that can be propagated to Float16 @test_intrinsic_pred Core.Intrinsics.fptrunc Float16 f32(0, 0xff, 1<<13) isnan - @test_intrinsic_pred Core.Intrinsics.fptrunc Float16 f64(0, 0x7ff, 1<<42) isnan + @test_intrinsic_pred Core.Intrinsics.fptrunc Float16 f64(0, 0x7ff, Int64(1)<<42) isnan # Signalling NaN that cannot be propagated to Float16 @test_intrinsic_pred Core.Intrinsics.fptrunc Float16 f32(0, 0xff, 1) isnan @test_intrinsic_pred Core.Intrinsics.fptrunc Float16 f64(0, 0x7ff, 1) isnan