Skip to content

Commit 0b52213

Browse files
Lucas Mateus Castro (alqotel)Lucas Mateus Castro (alqotel)
authored andcommitted
target/ppc: Implemented [pm]xvbf16ger2*
Implemented prefixed and non-prefixed bfloat16 GER instructions without accumulation and with positive or negative multiply and accumulate --- There's a discrepancy between this implementation and mambo/the hardware where implementing it with float32_mul then float32_muladd results in incorrect signal in 0/infinite results, but implementing it using the multiplication then muladd using FloatParts64 results in a different result after rounding in an underflow, I've not been able to solve this
1 parent 068f847 commit 0b52213

6 files changed

Lines changed: 34 additions & 0 deletions

File tree

target/ppc/cpu.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,7 @@ typedef union _ppc_vsr_t {
224224
int16_t s16[8];
225225
int32_t s32[4];
226226
int64_t s64[2];
227+
bfloat16 bf16[8];
227228
float16 f16[8];
228229
float32 f32[4];
229230
float64 f64[2];
@@ -2652,6 +2653,7 @@ static inline bool lsw_reg_in_range(int start, int nregs, int rx)
26522653
#define VsrSW(i) s32[i]
26532654
#define VsrD(i) u64[i]
26542655
#define VsrSD(i) s64[i]
2656+
#define VsrBF(i) bf16[i]
26552657
#define VsrHF(i) f16[i]
26562658
#define VsrSF(i) f32[i]
26572659
#define VsrDF(i) f64[i]
@@ -2664,6 +2666,7 @@ static inline bool lsw_reg_in_range(int start, int nregs, int rx)
26642666
#define VsrSW(i) s32[3 - (i)]
26652667
#define VsrD(i) u64[1 - (i)]
26662668
#define VsrSD(i) s64[1 - (i)]
2669+
#define VsrBF(i) bf16[7 - (i)]
26672670
#define VsrHF(i) f16[7 - (i)]
26682671
#define VsrSF(i) f32[3 - (i)]
26692672
#define VsrDF(i) f64[1 - (i)]

target/ppc/fpu_helper.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3609,6 +3609,7 @@ static inline bool ger_neg_acc_flag(uint32_t flag)
36093609
compute_fp_flags(env, GETPC()); \
36103610
}
36113611

3612+
VSXGER16(helper_XVBF16GER2, bfloat16, BF)
36123613
VSXGER16(helper_XVF16GER2, float16, HF)
36133614
VSXGER(helper_XVF32GER, float32, SF)
36143615
VSXGER(helper_XVF64GER, float64, DF)

target/ppc/helper.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -536,6 +536,7 @@ DEF_HELPER_5(XXBLENDVD, void, vsr, vsr, vsr, vsr, i32)
536536
DEF_HELPER_6(XVI4GER8, void, env, i32, i32, i32, i32, i32)
537537
DEF_HELPER_6(XVI8GER4, void, env, i32, i32, i32, i32, i32)
538538
DEF_HELPER_6(XVI16GER2, void, env, i32, i32, i32, i32, i32)
539+
DEF_HELPER_6(XVBF16GER2, void, env, i32, i32, i32, i32, i32)
539540
DEF_HELPER_6(XVF16GER2, void, env, i32, i32, i32, i32, i32)
540541
DEF_HELPER_6(XVF32GER, void, env, i32, i32, i32, i32, i32)
541542
DEF_HELPER_6(XVF64GER, void, env, i32, i32, i32, i32, i32)

target/ppc/insn32.decode

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -729,6 +729,12 @@ XVI8GER4SPP 111011 ... -- ..... ..... 01100011 ..- @XX3_at xa=%xx_xa
729729
XVI16GER2S 111011 ... -- ..... ..... 00101011 ..- @XX3_at xa=%xx_xa
730730
XVI16GER2SPP 111011 ... -- ..... ..... 00101010 ..- @XX3_at xa=%xx_xa
731731

732+
XVBF16GER2 111011 ... -- ..... ..... 00110011 ..- @XX3_at xa=%xx_xa
733+
XVBF16GER2PP 111011 ... -- ..... ..... 00110010 ..- @XX3_at xa=%xx_xa
734+
XVBF16GER2PN 111011 ... -- ..... ..... 10110010 ..- @XX3_at xa=%xx_xa
735+
XVBF16GER2NP 111011 ... -- ..... ..... 01110010 ..- @XX3_at xa=%xx_xa
736+
XVBF16GER2NN 111011 ... -- ..... ..... 11110010 ..- @XX3_at xa=%xx_xa
737+
732738
XVF16GER2 111011 ... -- ..... ..... 00010011 ..- @XX3_at xa=%xx_xa
733739
XVF16GER2PP 111011 ... -- ..... ..... 00010010 ..- @XX3_at xa=%xx_xa
734740
XVF16GER2PN 111011 ... -- ..... ..... 10010010 ..- @XX3_at xa=%xx_xa

target/ppc/insn64.decode

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,17 @@ PMXVI16GER2S 000001 11 1001 -- - - pmsk:2 ------ ........ \
151151
PMXVI16GER2SPP 000001 11 1001 -- - - pmsk:2 ------ ........ \
152152
111011 ... -- ..... ..... 00101010 ..- @MMIRR_XX3
153153

154+
PMXVBF16GER2 000001 11 1001 -- - - pmsk:2 ------ ........ \
155+
111011 ... -- ..... ..... 00110011 ..- @MMIRR_XX3
156+
PMXVBF16GER2PP 000001 11 1001 -- - - pmsk:2 ------ ........ \
157+
111011 ... -- ..... ..... 00110010 ..- @MMIRR_XX3
158+
PMXVBF16GER2PN 000001 11 1001 -- - - pmsk:2 ------ ........ \
159+
111011 ... -- ..... ..... 10110010 ..- @MMIRR_XX3
160+
PMXVBF16GER2NP 000001 11 1001 -- - - pmsk:2 ------ ........ \
161+
111011 ... -- ..... ..... 01110010 ..- @MMIRR_XX3
162+
PMXVBF16GER2NN 000001 11 1001 -- - - pmsk:2 ------ ........ \
163+
111011 ... -- ..... ..... 11110010 ..- @MMIRR_XX3
164+
154165
PMXVF16GER2 000001 11 1001 -- - - pmsk:2 ------ ........ \
155166
111011 ... -- ..... ..... 00010011 ..- @MMIRR_XX3
156167
PMXVF16GER2PP 000001 11 1001 -- - - pmsk:2 ------ ........ \

target/ppc/translate/vsx-impl.c.inc

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2911,6 +2911,12 @@ TRANS64(PMXVI16GER2SPP, do_ger_MMIRR_XX3, GER_SPP, gen_helper_XVI16GER2)
29112911
#define GER_PN ger_pack_flags_xvf(true, false, true)
29122912
#define GER_NN ger_pack_flags_xvf(true, true, true)
29132913

2914+
TRANS(XVBF16GER2, do_ger_XX3, GER_NOP, gen_helper_XVBF16GER2)
2915+
TRANS(XVBF16GER2PP, do_ger_XX3, GER_PP, gen_helper_XVBF16GER2)
2916+
TRANS(XVBF16GER2PN, do_ger_XX3, GER_PN, gen_helper_XVBF16GER2)
2917+
TRANS(XVBF16GER2NP, do_ger_XX3, GER_NP, gen_helper_XVBF16GER2)
2918+
TRANS(XVBF16GER2NN, do_ger_XX3, GER_NN, gen_helper_XVBF16GER2)
2919+
29142920
TRANS(XVF16GER2, do_ger_XX3, GER_NOP, gen_helper_XVF16GER2)
29152921
TRANS(XVF16GER2PP, do_ger_XX3, GER_PP, gen_helper_XVF16GER2)
29162922
TRANS(XVF16GER2PN, do_ger_XX3, GER_PN, gen_helper_XVF16GER2)
@@ -2935,6 +2941,12 @@ TRANS64(PMXVF16GER2PN, do_ger_MMIRR_XX3, GER_PN, gen_helper_XVF16GER2)
29352941
TRANS64(PMXVF16GER2NP, do_ger_MMIRR_XX3, GER_NP, gen_helper_XVF16GER2)
29362942
TRANS64(PMXVF16GER2NN, do_ger_MMIRR_XX3, GER_NN, gen_helper_XVF16GER2)
29372943

2944+
TRANS64(PMXVBF16GER2, do_ger_MMIRR_XX3, GER_NOP, gen_helper_XVBF16GER2)
2945+
TRANS64(PMXVBF16GER2PP, do_ger_MMIRR_XX3, GER_PP, gen_helper_XVBF16GER2)
2946+
TRANS64(PMXVBF16GER2PN, do_ger_MMIRR_XX3, GER_PN, gen_helper_XVBF16GER2)
2947+
TRANS64(PMXVBF16GER2NP, do_ger_MMIRR_XX3, GER_NP, gen_helper_XVBF16GER2)
2948+
TRANS64(PMXVBF16GER2NN, do_ger_MMIRR_XX3, GER_NN, gen_helper_XVBF16GER2)
2949+
29382950
TRANS64(PMXVF32GER, do_ger_MMIRR_XX3_NO_PMSK, GER_NOP, gen_helper_XVF32GER)
29392951
TRANS64(PMXVF32GERPP, do_ger_MMIRR_XX3_NO_PMSK, GER_PP, gen_helper_XVF32GER)
29402952
TRANS64(PMXVF32GERPN, do_ger_MMIRR_XX3_NO_PMSK, GER_PN, gen_helper_XVF32GER)

0 commit comments

Comments
 (0)