Skip to content

Commit bc9708f

Browse files
niaowdeadprogram
authored andcommitted
compiler: fix min/max on floats by using intrinsics
This fixes two edge cases for min/max: min/max(+0.0, -0.0) = -0.0, +0.0 min/max(number, NaN) = NaN, NaN The compare and select method does not work here. I switched to the llvm.minimum/llvm.maximum intrinsics which match the intended behavior. The integer min/max were also swapped over to using intrinsics. The compare and select path is now only used by strings.
1 parent 8ef36ed commit bc9708f

4 files changed

Lines changed: 104 additions & 40 deletions

File tree

compiler/compiler.go

Lines changed: 58 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1716,20 +1716,66 @@ func (b *builder) createBuiltin(argTypes []types.Type, argValues []llvm.Value, c
17161716
return llvmLen, nil
17171717
case "min", "max":
17181718
// min and max builtins, added in Go 1.21.
1719-
// We can simply reuse the existing binop comparison code, which has all
1720-
// the edge cases figured out already.
1721-
tok := token.LSS
1722-
if callName == "max" {
1723-
tok = token.GTR
1724-
}
1719+
// Find the corresponding intrinsic name.
1720+
ty := argTypes[0].Underlying().(*types.Basic)
1721+
llvmType := b.getLLVMType(ty)
1722+
info := ty.Info()
1723+
var prefix, delimeter, typeName string
1724+
if info&types.IsInteger != 0 {
1725+
// This is an integer value.
1726+
// Use the LLVM int min/max intrinsics.
1727+
prefix = "llvm.s"
1728+
if info&types.IsUnsigned != 0 {
1729+
prefix = "llvm.u"
1730+
}
1731+
delimeter = ".i"
1732+
typeName = strconv.Itoa(llvmType.IntTypeWidth())
1733+
} else {
1734+
switch ty.Kind() {
1735+
case types.String:
1736+
// Strings do not have an equivalent intrinsic.
1737+
// Implement with compares and selects.
1738+
tok := token.LSS
1739+
if callName == "max" {
1740+
tok = token.GTR
1741+
}
1742+
result := argValues[0]
1743+
typ := argTypes[0]
1744+
for _, arg := range argValues[1:] {
1745+
cmp, err := b.createBinOp(tok, typ, typ, result, arg, pos)
1746+
if err != nil {
1747+
return result, err
1748+
}
1749+
result = b.CreateSelect(cmp, result, arg, "")
1750+
}
1751+
return result, nil
1752+
case types.Float32:
1753+
typeName = "f32"
1754+
case types.Float64:
1755+
typeName = "f64"
1756+
default:
1757+
return llvm.Value{}, b.makeError(pos, "todo: min/max: unknown type")
1758+
}
1759+
// There are a few edge cases with floating point min/max:
1760+
// min(-0.0, +0.0) = -0.0
1761+
// min(NaN, number) = NaN
1762+
// The llvm.minimum.*/llvm.maximum.* intrinsics match this behavior.
1763+
// Neither Go nor LLVM defines the bit representation of resulting NaNs.
1764+
prefix = "llvm."
1765+
delimeter = "imum."
1766+
}
1767+
intrinsicName := prefix + callName + delimeter + typeName
1768+
// Find or create the intrinsic.
1769+
llvmFn := b.mod.NamedFunction(intrinsicName)
1770+
if llvmFn.IsNil() {
1771+
fnType := llvm.FunctionType(llvmType, []llvm.Type{llvmType, llvmType}, false)
1772+
llvmFn = llvm.AddFunction(b.mod, intrinsicName, fnType)
1773+
}
1774+
// Call the intrinsic repeatedly to merge the arguments.
1775+
callType := llvmFn.GlobalValueType()
17251776
result := argValues[0]
1726-
typ := argTypes[0]
17271777
for _, arg := range argValues[1:] {
1728-
cmp, err := b.createBinOp(tok, typ, typ, result, arg, pos)
1729-
if err != nil {
1730-
return result, err
1731-
}
1732-
result = b.CreateSelect(cmp, result, arg, "")
1778+
result = b.CreateCall(callType, llvmFn, []llvm.Value{result, arg}, "")
17331779
}
17341780
return result, nil
17351781
case "panic":

compiler/testdata/go1.21.ll

Lines changed: 33 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@ entry:
2222
ret i32 %a
2323
}
2424

25+
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
26+
declare i32 @llvm.smin.i32(i32, i32) #3
27+
2528
; Function Attrs: nounwind
2629
define hidden i32 @main.min2(i32 %a, i32 %b, ptr %context) unnamed_addr #2 {
2730
entry:
@@ -53,29 +56,39 @@ entry:
5356
ret i8 %0
5457
}
5558

59+
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
60+
declare i8 @llvm.umin.i8(i8, i8) #3
61+
5662
; Function Attrs: nounwind
5763
define hidden i32 @main.minUnsigned(i32 %a, i32 %b, ptr %context) unnamed_addr #2 {
5864
entry:
5965
%0 = call i32 @llvm.umin.i32(i32 %a, i32 %b)
6066
ret i32 %0
6167
}
6268

69+
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
70+
declare i32 @llvm.umin.i32(i32, i32) #3
71+
6372
; Function Attrs: nounwind
6473
define hidden float @main.minFloat32(float %a, float %b, ptr %context) unnamed_addr #2 {
6574
entry:
66-
%0 = fcmp olt float %a, %b
67-
%1 = select i1 %0, float %a, float %b
68-
ret float %1
75+
%0 = call float @llvm.minimum.f32(float %a, float %b)
76+
ret float %0
6977
}
7078

79+
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
80+
declare float @llvm.minimum.f32(float, float) #3
81+
7182
; Function Attrs: nounwind
7283
define hidden double @main.minFloat64(double %a, double %b, ptr %context) unnamed_addr #2 {
7384
entry:
74-
%0 = fcmp olt double %a, %b
75-
%1 = select i1 %0, double %a, double %b
76-
ret double %1
85+
%0 = call double @llvm.minimum.f64(double %a, double %b)
86+
ret double %0
7787
}
7888

89+
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
90+
declare double @llvm.minimum.f64(double, double) #3
91+
7992
; Function Attrs: nounwind
8093
define hidden %runtime._string @main.minString(ptr readonly %a.data, i32 %a.len, ptr readonly %b.data, i32 %b.len, ptr %context) unnamed_addr #2 {
8194
entry:
@@ -100,21 +113,29 @@ entry:
100113
ret i32 %0
101114
}
102115

116+
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
117+
declare i32 @llvm.smax.i32(i32, i32) #3
118+
103119
; Function Attrs: nounwind
104120
define hidden i32 @main.maxUint(i32 %a, i32 %b, ptr %context) unnamed_addr #2 {
105121
entry:
106122
%0 = call i32 @llvm.umax.i32(i32 %a, i32 %b)
107123
ret i32 %0
108124
}
109125

126+
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
127+
declare i32 @llvm.umax.i32(i32, i32) #3
128+
110129
; Function Attrs: nounwind
111130
define hidden float @main.maxFloat32(float %a, float %b, ptr %context) unnamed_addr #2 {
112131
entry:
113-
%0 = fcmp ogt float %a, %b
114-
%1 = select i1 %0, float %a, float %b
115-
ret float %1
132+
%0 = call float @llvm.maximum.f32(float %a, float %b)
133+
ret float %0
116134
}
117135

136+
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
137+
declare float @llvm.maximum.f32(float, float) #3
138+
118139
; Function Attrs: nounwind
119140
define hidden %runtime._string @main.maxString(ptr readonly %a.data, i32 %a.len, ptr readonly %b.data, i32 %b.len, ptr %context) unnamed_addr #2 {
120141
entry:
@@ -139,7 +160,7 @@ entry:
139160
}
140161

141162
; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write)
142-
declare void @llvm.memset.p0.i32(ptr nocapture writeonly, i8, i32, i1 immarg) #3
163+
declare void @llvm.memset.p0.i32(ptr nocapture writeonly, i8, i32, i1 immarg) #4
143164

144165
; Function Attrs: nounwind
145166
define hidden void @main.clearZeroSizedSlice(ptr %s.data, i32 %s.len, i32 %s.cap, ptr %context) unnamed_addr #2 {
@@ -156,24 +177,9 @@ entry:
156177

157178
declare void @runtime.hashmapClear(ptr dereferenceable_or_null(40), ptr) #1
158179

159-
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
160-
declare i32 @llvm.smin.i32(i32, i32) #4
161-
162-
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
163-
declare i8 @llvm.umin.i8(i8, i8) #4
164-
165-
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
166-
declare i32 @llvm.umin.i32(i32, i32) #4
167-
168-
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
169-
declare i32 @llvm.smax.i32(i32, i32) #4
170-
171-
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
172-
declare i32 @llvm.umax.i32(i32, i32) #4
173-
174180
attributes #0 = { allockind("alloc,zeroed") allocsize(0) "alloc-family"="runtime.alloc" "target-features"="+bulk-memory,+bulk-memory-opt,+call-indirect-overlong,+mutable-globals,+nontrapping-fptoint,+sign-ext,-multivalue,-reference-types" }
175181
attributes #1 = { "target-features"="+bulk-memory,+bulk-memory-opt,+call-indirect-overlong,+mutable-globals,+nontrapping-fptoint,+sign-ext,-multivalue,-reference-types" }
176182
attributes #2 = { nounwind "target-features"="+bulk-memory,+bulk-memory-opt,+call-indirect-overlong,+mutable-globals,+nontrapping-fptoint,+sign-ext,-multivalue,-reference-types" }
177-
attributes #3 = { nocallback nofree nounwind willreturn memory(argmem: write) }
178-
attributes #4 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
183+
attributes #3 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
184+
attributes #4 = { nocallback nofree nounwind willreturn memory(argmem: write) }
179185
attributes #5 = { nounwind }

testdata/go1.21.go

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,25 @@
11
package main
22

3+
import "math"
4+
35
func main() {
46
// The new min/max builtins.
7+
// With int:
58
ia := 1
69
ib := 5
710
ic := -3
11+
println("min/max:", min(ia, ib, ic), max(ia, ib, ic))
12+
// With float:
813
fa := 1.0
914
fb := 5.0
1015
fc := -3.0
11-
println("min/max:", min(ia, ib, ic), max(ia, ib, ic))
1216
println("min/max:", min(fa, fb, fc), max(fa, fb, fc))
17+
// Float +/- 0.0:
18+
pos0 := 0.0
19+
neg0 := -pos0
20+
println("min/max:", min(pos0, neg0), max(pos0, neg0))
21+
// Float NaN:
22+
println("min/max:", min(math.NaN(), 12.0), max(math.NaN(), 12.0))
1323

1424
// The clear builtin, for slices.
1525
s := []int{1, 2, 3, 4, 5}

testdata/go1.21.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
min/max: -3 5
22
min/max: -3.000000e+000 +5.000000e+000
3+
min/max: -0.000000e+000 +0.000000e+000
4+
min/max: NaN NaN
35
cleared s[:3]: 0 0 0 4 5
46
cleared map: 0
57
added to cleared map: four 1

0 commit comments

Comments
 (0)