-
-
Notifications
You must be signed in to change notification settings - Fork 5.6k
Runtime intrinsics: fix fpext and fptrunc behaviour on Float16/BFloat16 #57160
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
oscardssmith
merged 7 commits into
JuliaLang:master
from
xal-0:fix-float16-fptrunc-fpext
Feb 3, 2025
Merged
Changes from 5 commits
Commits
Show all changes
7 commits
Select commit
Hold shift + click to select a range
bf764cc
Add additional fpext/fptrunc tests for Float16/BFloat16
xal-0 d4ad742
Enforce bitsize restrictions on fptrunc/fpext (fixes #57130)
xal-0 9f2a30d
Prohibit fpext with input = output bitsize to match LLVM
xal-0 31ecec5
Update effect inference, codegen for new fpext/fptrunc behaviour
xal-0 d604e8e
Add tests for fptrunc on NaNs, fix float_to_half NaN handling
xal-0 e2d3cd9
Rename _FLOAT_TYPES to CORE_FLOAT_TYPES, make it a Union{...}
xal-0 fa82f94
Fix test/intrinsics.jl when Int = Int32
xal-0 File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -161,8 +161,11 @@ static inline uint16_t float_to_half(float param) JL_NOTSAFEPOINT | |
uint32_t f; | ||
memcpy(&f, ¶m, sizeof(float)); | ||
if (isnan(param)) { | ||
uint32_t t = 0x8000 ^ (0x8000 & ((uint16_t)(f >> 0x10))); | ||
return t ^ ((uint16_t)(f >> 0xd)); | ||
// Match the behaviour of arm64's fcvt or x86's vcvtps2ph by quieting | ||
// all NaNs (avoids creating infinities), preserving the sign, and using | ||
// the upper bits of the payload. | ||
// sign exp quiet payload | ||
return (f>>16 & 0x8000) | 0x7c00 | 0x0200 | (f>>13 & 0x03ff); | ||
} | ||
int i = ((f & ~0x007fffff) >> 23); | ||
uint8_t sh = shifttable[i]; | ||
|
@@ -761,33 +764,25 @@ static inline void name(unsigned osize, jl_value_t *ty, void *pa, void *pr) JL_N | |
OP(ty, (c_type*)pr, a); \ | ||
} | ||
|
||
#define un_fintrinsic_half(OP, name) \ | ||
static inline void name(unsigned osize, jl_value_t *ty, void *pa, void *pr) JL_NOTSAFEPOINT \ | ||
{ \ | ||
uint16_t a = *(uint16_t*)pa; \ | ||
float A = half_to_float(a); \ | ||
if (osize == 16) { \ | ||
float R; \ | ||
OP(ty, &R, A); \ | ||
*(uint16_t*)pr = float_to_half(R); \ | ||
} else { \ | ||
OP(ty, (uint16_t*)pr, A); \ | ||
} \ | ||
} | ||
#define un_fintrinsic_half(OP, name) \ | ||
static inline void name(unsigned osize, jl_value_t *ty, void *pa, void *pr) \ | ||
JL_NOTSAFEPOINT \ | ||
{ \ | ||
uint16_t a = *(uint16_t *)pa; \ | ||
float R, A = half_to_float(a); \ | ||
OP(ty, &R, A); \ | ||
*(uint16_t *)pr = float_to_half(R); \ | ||
} | ||
|
||
#define un_fintrinsic_bfloat(OP, name) \ | ||
static inline void name(unsigned osize, jl_value_t *ty, void *pa, void *pr) JL_NOTSAFEPOINT \ | ||
{ \ | ||
uint16_t a = *(uint16_t*)pa; \ | ||
float A = bfloat_to_float(a); \ | ||
if (osize == 16) { \ | ||
float R; \ | ||
OP(ty, &R, A); \ | ||
*(uint16_t*)pr = float_to_bfloat(R); \ | ||
} else { \ | ||
OP(ty, (uint16_t*)pr, A); \ | ||
} \ | ||
} | ||
#define un_fintrinsic_bfloat(OP, name) \ | ||
static inline void name(unsigned osize, jl_value_t *ty, void *pa, void *pr) \ | ||
JL_NOTSAFEPOINT \ | ||
{ \ | ||
uint16_t a = *(uint16_t *)pa; \ | ||
float R, A = bfloat_to_float(a); \ | ||
OP(ty, &R, A); \ | ||
*(uint16_t *)pr = float_to_bfloat(R); \ | ||
} | ||
|
||
// float or integer inputs | ||
// OP::Function macro(inputa, inputb) | ||
|
@@ -1629,32 +1624,74 @@ cvt_iintrinsic(LLVMUItoFP, uitofp) | |
cvt_iintrinsic(LLVMFPtoSI, fptosi) | ||
cvt_iintrinsic(LLVMFPtoUI, fptoui) | ||
|
||
#define fptrunc(tr, pr, a) \ | ||
if (!(osize < 8 * sizeof(a))) \ | ||
jl_error("fptrunc: output bitsize must be < input bitsize"); \ | ||
else if (osize == 16) { \ | ||
if ((jl_datatype_t*)tr == jl_float16_type) \ | ||
*(uint16_t*)pr = float_to_half(a); \ | ||
else /*if ((jl_datatype_t*)tr == jl_bfloat16_type)*/ \ | ||
*(uint16_t*)pr = float_to_bfloat(a); \ | ||
} \ | ||
else if (osize == 32) \ | ||
*(float*)pr = a; \ | ||
else if (osize == 64) \ | ||
*(double*)pr = a; \ | ||
else \ | ||
jl_error("fptrunc: runtime floating point intrinsics are not implemented for bit sizes other than 16, 32 and 64"); | ||
#define fpext(tr, pr, a) \ | ||
if (!(osize >= 8 * sizeof(a))) \ | ||
jl_error("fpext: output bitsize must be >= input bitsize"); \ | ||
if (osize == 32) \ | ||
*(float*)pr = a; \ | ||
else if (osize == 64) \ | ||
*(double*)pr = a; \ | ||
else \ | ||
jl_error("fpext: runtime floating point intrinsics are not implemented for bit sizes other than 32 and 64"); | ||
un_fintrinsic_withtype(fptrunc,fptrunc) | ||
un_fintrinsic_withtype(fpext,fpext) | ||
#define fintrinsic_read_float16(p) half_to_float(*(uint16_t *)p) | ||
#define fintrinsic_read_bfloat16(p) bfloat_to_float(*(uint16_t *)p) | ||
#define fintrinsic_read_float32(p) *(float *)p | ||
#define fintrinsic_read_float64(p) *(double *)p | ||
|
||
#define fintrinsic_write_float16(p, x) *(uint16_t *)p = float_to_half(x) | ||
#define fintrinsic_write_bfloat16(p, x) *(uint16_t *)p = float_to_bfloat(x) | ||
#define fintrinsic_write_float32(p, x) *(float *)p = x | ||
#define fintrinsic_write_float64(p, x) *(double *)p = x | ||
|
||
/* | ||
* aty: Type of value argument (input) | ||
* pa: Pointer to value argument data | ||
* ty: Type argument (output) | ||
* pr: Pointer to result data | ||
*/ | ||
|
||
static inline void fptrunc(jl_datatype_t *aty, void *pa, jl_datatype_t *ty, void *pr) | ||
{ | ||
unsigned isize = jl_datatype_size(aty), osize = jl_datatype_size(ty); | ||
if (!(osize < isize)) { | ||
jl_error("fptrunc: output bitsize must be < input bitsize"); | ||
return; | ||
} | ||
|
||
#define fptrunc_convert(in, out) \ | ||
else if (aty == jl_##in##_type && ty == jl_##out##_type) \ | ||
fintrinsic_write_##out(pr, fintrinsic_read_##in(pa)) | ||
|
||
if (0) | ||
; | ||
fptrunc_convert(float32, float16); | ||
fptrunc_convert(float64, float16); | ||
fptrunc_convert(float32, bfloat16); | ||
fptrunc_convert(float64, bfloat16); | ||
fptrunc_convert(float64, float32); | ||
else | ||
jl_error("fptrunc: runtime floating point intrinsics are not implemented for bit sizes other than 16, 32 and 64"); | ||
Comment on lines
+1663
to
+1664
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since you are changing the permitted combination of behaviors, you will also need to update Compiler/src/tfuncs.jl and src/intrinsics.cpp to match these new rules, so that they all agree on exactly which errors are thrown and for what cases they can occur |
||
#undef fptrunc_convert | ||
} | ||
|
||
static inline void fpext(jl_datatype_t *aty, void *pa, jl_datatype_t *ty, void *pr) | ||
{ | ||
unsigned isize = jl_datatype_size(aty), osize = jl_datatype_size(ty); | ||
if (!(osize > isize)) { | ||
jl_error("fpext: output bitsize must be > input bitsize"); | ||
return; | ||
} | ||
|
||
#define fpext_convert(in, out) \ | ||
else if (aty == jl_##in##_type && ty == jl_##out##_type) \ | ||
fintrinsic_write_##out(pr, fintrinsic_read_##in(pa)) | ||
|
||
if (0) | ||
; | ||
fpext_convert(float16, float32); | ||
fpext_convert(float16, float64); | ||
fpext_convert(bfloat16, float32); | ||
fpext_convert(bfloat16, float64); | ||
fpext_convert(float32, float64); | ||
else | ||
jl_error("fptrunc: runtime floating point intrinsics are not implemented for bit sizes other than 16, 32 and 64"); | ||
#undef fpext_convert | ||
} | ||
|
||
cvt_iintrinsic(fptrunc, fptrunc) | ||
cvt_iintrinsic(fpext, fpext) | ||
|
||
|
||
// checked arithmetic | ||
/** | ||
|
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.