diff --git a/.gitignore b/.gitignore index 10e20410..0ca0892d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,11 +1,9 @@ # ignore python compiled object *.pyc -# ignore vi temporary files -*.swo -*.swp .sconsign.dblite obj/ cctest* bench_* libvixl* example-* +vixl_stats.csv diff --git a/README.md b/README.md index e3c3a0ad..239a0d95 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -VIXL: AArch64 Runtime Code Generation Library Version 1.1 +VIXL: AArch64 Runtime Code Generation Library Version 1.2 ========================================================= Contents: @@ -54,7 +54,7 @@ were deemed unnecessary: * No Advanced SIMD support. * Limited rounding mode support for floating point. - * No support for synchronisation instructions. + * Limited support for synchronisation instructions. * Limited support for system instructions. * A few miscellaneous integer and floating point instructions are missing. diff --git a/doc/changelog.md b/doc/changelog.md index 8bab9323..09491e4e 100644 --- a/doc/changelog.md +++ b/doc/changelog.md @@ -1,6 +1,25 @@ VIXL Change Log =============== +* 1.2 + + Added support for `fmadd`, `fnmadd`, `fnmsub`, `fminnm`, `fmaxnm`, + `frinta`, `fcvtau` and `fcvtas`. + + Added support for assembling and disassembling `isb`, `dsb` and `dmb`. + + Added support for automatic inversion of compare instructions when using + negative immediates. + + Added support for using `movn` when generating immediates. + + Added explicit flag-setting 'S' instructions, and removed + `SetFlags` and `LeaveFlags` arguments. + + Added support for `Movk` in macro assembler. + + Added support for W register parameters to `Tbz` and `Tbnz`. + + Added support for using immediate operands with `Csel`. + + Added new debugger syntax for memory inspection. + + Fixed `smull`, `fmsub` and `sdiv` simulation. + + Fixed sign extension for W->X conversions using `sxtb`, `sxth` and `sxtw`. + + Prevented code generation for certain side-effect free operations, + such as `add r, r, #0`, in the macro assembler. + + Other small bug fixes. + * 1.1 + Improved robustness of instruction decoder and disassembler. + Added support for double-to-float conversions using `fcvt`. diff --git a/doc/supported-instructions.md b/doc/supported-instructions.md index 90d63ec9..71839d4e 100644 --- a/doc/supported-instructions.md +++ b/doc/supported-instructions.md @@ -15,8 +15,16 @@ Add with carry bit. void adc(const Register& rd, const Register& rn, - const Operand& operand, - FlagsUpdate S = LeaveFlags) + const Operand& operand) + + +### adcs ### + +Add with carry bit and update status flags. + + void adcs(const Register& rd, + const Register& rn, + const Operand& operand) ### add ### @@ -25,8 +33,16 @@ Add. void add(const Register& rd, const Register& rn, - const Operand& operand, - FlagsUpdate S = LeaveFlags) + const Operand& operand) + + +### adds ### + +Add and update status flags. + + void adds(const Register& rd, + const Register& rn, + const Operand& operand) ### adr ### @@ -43,6 +59,24 @@ Calculate the address of a label. void adr(const Register& rd, Label* label) +### and ### + +Bitwise and (A & B). + + void and_(const Register& rd, + const Register& rn, + const Operand& operand) + + +### ands ### + +Bitwise and (A & B) and update status flags. + + void ands(const Register& rd, + const Register& rn, + const Operand& operand) + + ### asr ### Arithmetic shift right. @@ -59,16 +93,30 @@ Arithmetic shift right by variable. ### b ### -Branch to PC offset. +Conditional branch to PC offset. + + void b(int imm19, Condition cond) + + +### b ### + +Conditional branch to label. + + void b(Label* label, Condition cond) + + +### b ### + +Unconditional branch to PC offset. - void b(int imm26, Condition cond = al) + void b(int imm26) ### b ### -Branch to label. +Unconditional branch to label. - void b(Label* label, Condition cond = al) + void b(Label* label) ### bfi ### @@ -107,8 +155,16 @@ Bit clear (A & ~B). void bic(const Register& rd, const Register& rn, - const Operand& operand, - FlagsUpdate S = LeaveFlags) + const Operand& operand) + + +### bics ### + +Bit clear (A & ~B) and update status flags. + + void bics(const Register& rd, + const Register& rn, + const Operand& operand) ### bl ### @@ -297,6 +353,20 @@ Conditional select negation: rd = cond ? rn : -rm. Condition cond) +### dmb ### + +Data memory barrier. + + void dmb(BarrierDomain domain, BarrierType type) + + +### dsb ### + +Data synchronization barrier. + + void dsb(BarrierDomain domain, BarrierType type) + + ### eon ### Bitwise enor/xnor (A ^ ~B). @@ -335,6 +405,13 @@ Halting debug-mode breakpoint. void hlt(int code) +### isb ### + +Instruction synchronization barrier. + + void isb() + + ### ldnp ### Load integer or FP register pair, non-temporal. @@ -530,8 +607,15 @@ Move inverted operand to register. Negate. void neg(const Register& rd, - const Operand& operand, - FlagsUpdate S = LeaveFlags) + const Operand& operand) + + +### negs ### + +Negate and update status flags. + + void negs(const Register& rd, + const Operand& operand) ### ngc ### @@ -539,8 +623,15 @@ Negate. Negate with carry bit. void ngc(const Register& rd, - const Operand& operand, - FlagsUpdate S = LeaveFlags) + const Operand& operand) + + +### ngcs ### + +Negate with carry bit and update status flags. + + void ngcs(const Register& rd, + const Operand& operand) ### nop ### @@ -619,8 +710,16 @@ Subtract with carry bit. void sbc(const Register& rd, const Register& rn, - const Operand& operand, - FlagsUpdate S = LeaveFlags) + const Operand& operand) + + +### sbcs ### + +Subtract with carry bit and update status flags. + + void sbcs(const Register& rd, + const Register& rn, + const Operand& operand) ### sbfiz ### @@ -744,8 +843,16 @@ Subtract. void sub(const Register& rd, const Register& rn, - const Operand& operand, - FlagsUpdate S = LeaveFlags) + const Operand& operand) + + +### subs ### + +Subtract and update status flags. + + void subs(const Register& rd, + const Register& rn, + const Operand& operand) ### sxtb ### @@ -943,11 +1050,25 @@ FP conditional select. ### fcvt ### -FP convert single to double precision. +FP convert between single and double precision. void fcvt(const FPRegister& fd, const FPRegister& fn) +### fcvtas ### + +Convert FP to signed integer (nearest with ties to away). + + void fcvtas(const Register& rd, const FPRegister& fn) + + +### fcvtau ### + +Convert FP to unsigned integer (nearest with ties to away). + + void fcvtau(const Register& rd, const FPRegister& fn) + + ### fcvtms ### Convert FP to signed integer (round towards -infinity). @@ -997,6 +1118,16 @@ FP divide. void fdiv(const FPRegister& fd, const FPRegister& fn, const FPRegister& fm) +### fmadd ### + +FP fused multiply and add. + + void fmadd(const FPRegister& fd, + const FPRegister& fn, + const FPRegister& fm, + const FPRegister& fa) + + ### fmax ### FP maximum. @@ -1004,6 +1135,13 @@ FP maximum. void fmax(const FPRegister& fd, const FPRegister& fn, const FPRegister& fm) +### fmaxnm ### + +FP maximum number. + + void fmaxnm(const FPRegister& fd, const FPRegister& fn, const FPRegister& fm) + + ### fmin ### FP minimum. @@ -1011,6 +1149,13 @@ FP minimum. void fmin(const FPRegister& fd, const FPRegister& fn, const FPRegister& fm) +### fminnm ### + +FP minimum number. + + void fminnm(const FPRegister& fd, const FPRegister& fn, const FPRegister& fm) + + ### fmov ### Move FP register to FP register. @@ -1041,7 +1186,7 @@ Move register to FP register. ### fmsub ### -FP multiply and subtract. +FP fused multiply and subtract. void fmsub(const FPRegister& fd, const FPRegister& fn, @@ -1063,6 +1208,33 @@ FP negate. void fneg(const FPRegister& fd, const FPRegister& fn) +### fnmadd ### + +FP fused multiply, add and negate. + + void fnmadd(const FPRegister& fd, + const FPRegister& fn, + const FPRegister& fm, + const FPRegister& fa) + + +### fnmsub ### + +FP fused multiply, subtract and negate. + + void fnmsub(const FPRegister& fd, + const FPRegister& fn, + const FPRegister& fm, + const FPRegister& fa) + + +### frinta ### + +FP round to integer (nearest with ties to away). + + void frinta(const FPRegister& fd, const FPRegister& fn) + + ### frintn ### FP round to integer (nearest with ties to even). @@ -1123,11 +1295,4 @@ Emit raw instructions into the instruction stream. inline void dci(Instr raw_inst) -### debug ### - -Debug control pseudo instruction, only supported by the debugger. - - void debug(const char* message, uint32_t code, Instr params = BREAK) - - diff --git a/src/a64/assembler-a64.cc b/src/a64/assembler-a64.cc index 89f74063..eaee2758 100644 --- a/src/a64/assembler-a64.cc +++ b/src/a64/assembler-a64.cc @@ -176,28 +176,24 @@ const FPRegister& FPRegister::DRegFromCode(unsigned code) { const Register& CPURegister::W() const { ASSERT(IsValidRegister()); - ASSERT(Is64Bits()); return Register::WRegFromCode(code_); } const Register& CPURegister::X() const { ASSERT(IsValidRegister()); - ASSERT(Is32Bits()); return Register::XRegFromCode(code_); } const FPRegister& CPURegister::S() const { ASSERT(IsValidFPRegister()); - ASSERT(Is64Bits()); return FPRegister::SRegFromCode(code_); } const FPRegister& CPURegister::D() const { ASSERT(IsValidFPRegister()); - ASSERT(Is32Bits()); return FPRegister::DRegFromCode(code_); } @@ -230,6 +226,9 @@ Operand::Operand(Register reg, Extend extend, unsigned shift_amount) ASSERT(reg.IsValid()); ASSERT(shift_amount <= 4); ASSERT(!reg.IsSP()); + + // Extend modes SXTX and UXTX require a 64-bit register. + ASSERT(reg.Is64Bits() || ((extend != SXTX) && (extend != UXTX))); } @@ -248,6 +247,15 @@ bool Operand::IsExtendedRegister() const { } +bool Operand::IsZero() const { + if (IsImmediate()) { + return immediate() == 0; + } else { + return reg().IsZero(); + } +} + + Operand Operand::ToExtendedRegister() const { ASSERT(IsShiftedRegister()); ASSERT((shift_ == LSL) && (shift_amount_ <= 4)); @@ -271,6 +279,9 @@ MemOperand::MemOperand(Register base, ASSERT(base.Is64Bits() && !base.IsZero()); ASSERT(!regoffset.IsSP()); ASSERT((extend == UXTW) || (extend == SXTW) || (extend == SXTX)); + + // SXTX extend mode requires a 64-bit offset register. + ASSERT(regoffset.Is64Bits() || (extend != SXTX)); } @@ -281,7 +292,7 @@ MemOperand::MemOperand(Register base, : base_(base), regoffset_(regoffset), offset_(0), addrmode_(Offset), shift_(shift), extend_(NO_EXTEND), shift_amount_(shift_amount) { ASSERT(base.Is64Bits() && !base.IsZero()); - ASSERT(!regoffset.IsSP()); + ASSERT(regoffset.Is64Bits() && !regoffset.IsSP()); ASSERT(shift == LSL); } @@ -303,7 +314,7 @@ MemOperand::MemOperand(Register base, const Operand& offset, AddrMode addrmode) offset_ = 0; // These assertions match those in the shifted-register constructor. - ASSERT(!regoffset_.IsSP()); + ASSERT(regoffset_.Is64Bits() && !regoffset_.IsSP()); ASSERT(shift_ == LSL); } else { ASSERT(offset.IsExtendedRegister()); @@ -319,6 +330,7 @@ MemOperand::MemOperand(Register base, const Operand& offset, AddrMode addrmode) // These assertions match those in the extended-register constructor. ASSERT(!regoffset_.IsSP()); ASSERT((extend_ == UXTW) || (extend_ == SXTW) || (extend_ == SXTX)); + ASSERT((regoffset_.Is64Bits() || (extend_ != SXTX))); } } @@ -493,7 +505,7 @@ void Assembler::cbnz(const Register& rt, void Assembler::tbz(const Register& rt, unsigned bit_pos, int imm14) { - ASSERT(rt.Is64Bits()); + ASSERT(rt.Is64Bits() || (rt.Is32Bits() && (bit_pos < kWRegSize))); Emit(TBZ | ImmTestBranchBit(bit_pos) | ImmTestBranch(imm14) | Rt(rt)); } @@ -508,7 +520,7 @@ void Assembler::tbz(const Register& rt, void Assembler::tbnz(const Register& rt, unsigned bit_pos, int imm14) { - ASSERT(rt.Is64Bits()); + ASSERT(rt.Is64Bits() || (rt.Is32Bits() && (bit_pos < kWRegSize))); Emit(TBNZ | ImmTestBranchBit(bit_pos) | ImmTestBranch(imm14) | Rt(rt)); } @@ -533,81 +545,129 @@ void Assembler::adr(const Register& rd, Label* label) { void Assembler::add(const Register& rd, const Register& rn, - const Operand& operand, - FlagsUpdate S) { - AddSub(rd, rn, operand, S, ADD); + const Operand& operand) { + AddSub(rd, rn, operand, LeaveFlags, ADD); +} + + +void Assembler::adds(const Register& rd, + const Register& rn, + const Operand& operand) { + AddSub(rd, rn, operand, SetFlags, ADD); } void Assembler::cmn(const Register& rn, const Operand& operand) { Register zr = AppropriateZeroRegFor(rn); - add(zr, rn, operand, SetFlags); + adds(zr, rn, operand); } void Assembler::sub(const Register& rd, const Register& rn, - const Operand& operand, - FlagsUpdate S) { - AddSub(rd, rn, operand, S, SUB); + const Operand& operand) { + AddSub(rd, rn, operand, LeaveFlags, SUB); +} + + +void Assembler::subs(const Register& rd, + const Register& rn, + const Operand& operand) { + AddSub(rd, rn, operand, SetFlags, SUB); } void Assembler::cmp(const Register& rn, const Operand& operand) { Register zr = AppropriateZeroRegFor(rn); - sub(zr, rn, operand, SetFlags); + subs(zr, rn, operand); +} + + +void Assembler::neg(const Register& rd, const Operand& operand) { + Register zr = AppropriateZeroRegFor(rd); + sub(rd, zr, operand); } -void Assembler::neg(const Register& rd, const Operand& operand, FlagsUpdate S) { +void Assembler::negs(const Register& rd, const Operand& operand) { Register zr = AppropriateZeroRegFor(rd); - sub(rd, zr, operand, S); + subs(rd, zr, operand); } void Assembler::adc(const Register& rd, const Register& rn, - const Operand& operand, - FlagsUpdate S) { - AddSubWithCarry(rd, rn, operand, S, ADC); + const Operand& operand) { + AddSubWithCarry(rd, rn, operand, LeaveFlags, ADC); +} + + +void Assembler::adcs(const Register& rd, + const Register& rn, + const Operand& operand) { + AddSubWithCarry(rd, rn, operand, SetFlags, ADC); } void Assembler::sbc(const Register& rd, const Register& rn, - const Operand& operand, - FlagsUpdate S) { - AddSubWithCarry(rd, rn, operand, S, SBC); + const Operand& operand) { + AddSubWithCarry(rd, rn, operand, LeaveFlags, SBC); +} + + +void Assembler::sbcs(const Register& rd, + const Register& rn, + const Operand& operand) { + AddSubWithCarry(rd, rn, operand, SetFlags, SBC); +} + + +void Assembler::ngc(const Register& rd, const Operand& operand) { + Register zr = AppropriateZeroRegFor(rd); + sbc(rd, zr, operand); } -void Assembler::ngc(const Register& rd, const Operand& operand, FlagsUpdate S) { +void Assembler::ngcs(const Register& rd, const Operand& operand) { Register zr = AppropriateZeroRegFor(rd); - sbc(rd, zr, operand, S); + sbcs(rd, zr, operand); } // Logical instructions. void Assembler::and_(const Register& rd, const Register& rn, - const Operand& operand, - FlagsUpdate S) { - Logical(rd, rn, operand, (S == SetFlags) ? ANDS : AND); + const Operand& operand) { + Logical(rd, rn, operand, AND); +} + + +void Assembler::ands(const Register& rd, + const Register& rn, + const Operand& operand) { + Logical(rd, rn, operand, ANDS); } void Assembler::tst(const Register& rn, const Operand& operand) { - and_(AppropriateZeroRegFor(rn), rn, operand, SetFlags); + ands(AppropriateZeroRegFor(rn), rn, operand); } void Assembler::bic(const Register& rd, const Register& rn, - const Operand& operand, - FlagsUpdate S) { - Logical(rd, rn, operand, (S == SetFlags) ? BICS : BIC); + const Operand& operand) { + Logical(rd, rn, operand, BIC); +} + + +void Assembler::bics(const Register& rd, + const Register& rn, + const Operand& operand) { + Logical(rd, rn, operand, BICS); } @@ -683,7 +743,7 @@ void Assembler::bfm(const Register& rd, ASSERT(rd.size() == rn.size()); Instr N = SF(rd) >> (kSFOffset - kBitfieldNOffset); Emit(SF(rd) | BFM | N | - ImmR(immr, rd.size()) | ImmS(imms, rd.size()) | Rn(rn) | Rd(rd)); + ImmR(immr, rd.size()) | ImmS(imms, rn.size()) | Rn(rn) | Rd(rd)); } @@ -691,10 +751,10 @@ void Assembler::sbfm(const Register& rd, const Register& rn, unsigned immr, unsigned imms) { - ASSERT(rd.size() == rn.size()); + ASSERT(rd.Is64Bits() || rn.Is32Bits()); Instr N = SF(rd) >> (kSFOffset - kBitfieldNOffset); Emit(SF(rd) | SBFM | N | - ImmR(immr, rd.size()) | ImmS(imms, rd.size()) | Rn(rn) | Rd(rd)); + ImmR(immr, rd.size()) | ImmS(imms, rn.size()) | Rn(rn) | Rd(rd)); } @@ -705,7 +765,7 @@ void Assembler::ubfm(const Register& rd, ASSERT(rd.size() == rn.size()); Instr N = SF(rd) >> (kSFOffset - kBitfieldNOffset); Emit(SF(rd) | UBFM | N | - ImmR(immr, rd.size()) | ImmS(imms, rd.size()) | Rn(rn) | Rd(rd)); + ImmR(immr, rd.size()) | ImmS(imms, rn.size()) | Rn(rn) | Rd(rd)); } @@ -716,7 +776,7 @@ void Assembler::extr(const Register& rd, ASSERT(rd.size() == rn.size()); ASSERT(rd.size() == rm.size()); Instr N = SF(rd) >> (kSFOffset - kBitfieldNOffset); - Emit(SF(rd) | EXTR | N | Rm(rm) | ImmS(lsb, rd.size()) | Rn(rn) | Rd(rd)); + Emit(SF(rd) | EXTR | N | Rm(rm) | ImmS(lsb, rn.size()) | Rn(rn) | Rd(rd)); } @@ -1146,6 +1206,17 @@ void Assembler::hint(SystemHint code) { Emit(HINT | ImmHint(code) | Rt(xzr)); } +void Assembler::dmb(BarrierDomain domain, BarrierType type) { + Emit(DMB | ImmBarrierDomain(domain) | ImmBarrierType(type)); +} + +void Assembler::dsb(BarrierDomain domain, BarrierType type) { + Emit(DSB | ImmBarrierDomain(domain) | ImmBarrierType(type)); +} + +void Assembler::isb() { + Emit(ISB | ImmBarrierDomain(FullSystem) | ImmBarrierType(BarrierAll)); +} void Assembler::fmov(FPRegister fd, double imm) { if (fd.Is64Bits() && IsImmFP64(imm)) { @@ -1202,6 +1273,14 @@ void Assembler::fmul(const FPRegister& fd, } +void Assembler::fmadd(const FPRegister& fd, + const FPRegister& fn, + const FPRegister& fm, + const FPRegister& fa) { + FPDataProcessing3Source(fd, fn, fm, fa, fd.Is32Bits() ? FMADD_s : FMADD_d); +} + + void Assembler::fmsub(const FPRegister& fd, const FPRegister& fn, const FPRegister& fm, @@ -1210,6 +1289,22 @@ void Assembler::fmsub(const FPRegister& fd, } +void Assembler::fnmadd(const FPRegister& fd, + const FPRegister& fn, + const FPRegister& fm, + const FPRegister& fa) { + FPDataProcessing3Source(fd, fn, fm, fa, fd.Is32Bits() ? FNMADD_s : FNMADD_d); +} + + +void Assembler::fnmsub(const FPRegister& fd, + const FPRegister& fn, + const FPRegister& fm, + const FPRegister& fa) { + FPDataProcessing3Source(fd, fn, fm, fa, fd.Is32Bits() ? FNMSUB_s : FNMSUB_d); +} + + void Assembler::fdiv(const FPRegister& fd, const FPRegister& fn, const FPRegister& fm) { @@ -1224,6 +1319,13 @@ void Assembler::fmax(const FPRegister& fd, } +void Assembler::fmaxnm(const FPRegister& fd, + const FPRegister& fn, + const FPRegister& fm) { + FPDataProcessing2Source(fd, fn, fm, FMAXNM); +} + + void Assembler::fmin(const FPRegister& fd, const FPRegister& fn, const FPRegister& fm) { @@ -1231,6 +1333,13 @@ void Assembler::fmin(const FPRegister& fd, } +void Assembler::fminnm(const FPRegister& fd, + const FPRegister& fn, + const FPRegister& fm) { + FPDataProcessing2Source(fd, fn, fm, FMINNM); +} + + void Assembler::fabs(const FPRegister& fd, const FPRegister& fn) { ASSERT(fd.SizeInBits() == fn.SizeInBits()); @@ -1252,6 +1361,13 @@ void Assembler::fsqrt(const FPRegister& fd, } +void Assembler::frinta(const FPRegister& fd, + const FPRegister& fn) { + ASSERT(fd.SizeInBits() == fn.SizeInBits()); + FPDataProcessing1Source(fd, fn, FRINTA); +} + + void Assembler::frintn(const FPRegister& fd, const FPRegister& fn) { ASSERT(fd.SizeInBits() == fn.SizeInBits()); @@ -1324,6 +1440,16 @@ void Assembler::fcvt(const FPRegister& fd, } +void Assembler::fcvtau(const Register& rd, const FPRegister& fn) { + FPConvertToInt(rd, fn, FCVTAU); +} + + +void Assembler::fcvtas(const Register& rd, const FPRegister& fn) { + FPConvertToInt(rd, fn, FCVTAS); +} + + void Assembler::fcvtmu(const Register& rd, const FPRegister& fn) { FPConvertToInt(rd, fn, FCVTMU); } @@ -1334,26 +1460,22 @@ void Assembler::fcvtms(const Register& rd, const FPRegister& fn) { } -void Assembler::fcvtnu(const Register& rd, - const FPRegister& fn) { +void Assembler::fcvtnu(const Register& rd, const FPRegister& fn) { FPConvertToInt(rd, fn, FCVTNU); } -void Assembler::fcvtns(const Register& rd, - const FPRegister& fn) { +void Assembler::fcvtns(const Register& rd, const FPRegister& fn) { FPConvertToInt(rd, fn, FCVTNS); } -void Assembler::fcvtzu(const Register& rd, - const FPRegister& fn) { +void Assembler::fcvtzu(const Register& rd, const FPRegister& fn) { FPConvertToInt(rd, fn, FCVTZU); } -void Assembler::fcvtzs(const Register& rd, - const FPRegister& fn) { +void Assembler::fcvtzs(const Register& rd, const FPRegister& fn) { FPConvertToInt(rd, fn, FCVTZS); } diff --git a/src/a64/assembler-a64.h b/src/a64/assembler-a64.h index 93b30118..43d31590 100644 --- a/src/a64/assembler-a64.h +++ b/src/a64/assembler-a64.h @@ -471,6 +471,7 @@ class Operand { bool IsImmediate() const; bool IsShiftedRegister() const; bool IsExtendedRegister() const; + bool IsZero() const; // This returns an LSL shift (<= 4) operand as an equivalent extend operand, // which helps in the encoding of instructions that use the stack pointer. @@ -716,8 +717,12 @@ class Assembler { // Add. void add(const Register& rd, const Register& rn, - const Operand& operand, - FlagsUpdate S = LeaveFlags); + const Operand& operand); + + // Add and update status flags. + void adds(const Register& rd, + const Register& rn, + const Operand& operand); // Compare negative. void cmn(const Register& rn, const Operand& operand); @@ -725,40 +730,62 @@ class Assembler { // Subtract. void sub(const Register& rd, const Register& rn, - const Operand& operand, - FlagsUpdate S = LeaveFlags); + const Operand& operand); + + // Subtract and update status flags. + void subs(const Register& rd, + const Register& rn, + const Operand& operand); // Compare. void cmp(const Register& rn, const Operand& operand); // Negate. void neg(const Register& rd, - const Operand& operand, - FlagsUpdate S = LeaveFlags); + const Operand& operand); + + // Negate and update status flags. + void negs(const Register& rd, + const Operand& operand); // Add with carry bit. void adc(const Register& rd, const Register& rn, - const Operand& operand, - FlagsUpdate S = LeaveFlags); + const Operand& operand); + + // Add with carry bit and update status flags. + void adcs(const Register& rd, + const Register& rn, + const Operand& operand); // Subtract with carry bit. void sbc(const Register& rd, const Register& rn, - const Operand& operand, - FlagsUpdate S = LeaveFlags); + const Operand& operand); + + // Subtract with carry bit and update status flags. + void sbcs(const Register& rd, + const Register& rn, + const Operand& operand); // Negate with carry bit. void ngc(const Register& rd, - const Operand& operand, - FlagsUpdate S = LeaveFlags); + const Operand& operand); + + // Negate with carry bit and update status flags. + void ngcs(const Register& rd, + const Operand& operand); // Logical instructions. // Bitwise and (A & B). void and_(const Register& rd, const Register& rn, - const Operand& operand, - FlagsUpdate S = LeaveFlags); + const Operand& operand); + + // Bitwise and (A & B) and update status flags. + void ands(const Register& rd, + const Register& rn, + const Operand& operand); // Bit test and set flags. void tst(const Register& rn, const Operand& operand); @@ -766,8 +793,12 @@ class Assembler { // Bit clear (A & ~B). void bic(const Register& rd, const Register& rn, - const Operand& operand, - FlagsUpdate S = LeaveFlags); + const Operand& operand); + + // Bit clear (A & ~B) and update status flags. + void bics(const Register& rd, + const Register& rn, + const Operand& operand); // Bitwise or (A | B). void orr(const Register& rd, const Register& rn, const Operand& operand); @@ -1160,6 +1191,15 @@ class Assembler { // System hint. void hint(SystemHint code); + // Data memory barrier. + void dmb(BarrierDomain domain, BarrierType type); + + // Data synchronization barrier. + void dsb(BarrierDomain domain, BarrierType type); + + // Instruction synchronization barrier. + void isb(); + // Alias for system instructions. // No-op. void nop() { @@ -1188,12 +1228,30 @@ class Assembler { // FP multiply. void fmul(const FPRegister& fd, const FPRegister& fn, const FPRegister& fm); - // FP multiply and subtract. + // FP fused multiply and add. + void fmadd(const FPRegister& fd, + const FPRegister& fn, + const FPRegister& fm, + const FPRegister& fa); + + // FP fused multiply and subtract. void fmsub(const FPRegister& fd, const FPRegister& fn, const FPRegister& fm, const FPRegister& fa); + // FP fused multiply, add and negate. + void fnmadd(const FPRegister& fd, + const FPRegister& fn, + const FPRegister& fm, + const FPRegister& fa); + + // FP fused multiply, subtract and negate. + void fnmsub(const FPRegister& fd, + const FPRegister& fn, + const FPRegister& fm, + const FPRegister& fa); + // FP divide. void fdiv(const FPRegister& fd, const FPRegister& fn, const FPRegister& fm); @@ -1203,6 +1261,12 @@ class Assembler { // FP minimum. void fmin(const FPRegister& fd, const FPRegister& fn, const FPRegister& fm); + // FP maximum number. + void fmaxnm(const FPRegister& fd, const FPRegister& fn, const FPRegister& fm); + + // FP minimum number. + void fminnm(const FPRegister& fd, const FPRegister& fn, const FPRegister& fm); + // FP absolute. void fabs(const FPRegister& fd, const FPRegister& fn); @@ -1212,6 +1276,9 @@ class Assembler { // FP square root. void fsqrt(const FPRegister& fd, const FPRegister& fn); + // FP round to integer (nearest with ties to away). + void frinta(const FPRegister& fd, const FPRegister& fn); + // FP round to integer (nearest with ties to even). void frintn(const FPRegister& fd, const FPRegister& fn); @@ -1244,6 +1311,12 @@ class Assembler { // FP convert between single and double precision. void fcvt(const FPRegister& fd, const FPRegister& fn); + // Convert FP to unsigned integer (nearest with ties to away). + void fcvtau(const Register& rd, const FPRegister& fn); + + // Convert FP to signed integer (nearest with ties to away). + void fcvtas(const Register& rd, const FPRegister& fn); + // Convert FP to unsigned integer (round towards -infinity). void fcvtmu(const Register& rd, const FPRegister& fn); @@ -1517,6 +1590,16 @@ class Assembler { return imm7 << ImmHint_offset; } + static Instr ImmBarrierDomain(int imm2) { + ASSERT(is_uint2(imm2)); + return imm2 << ImmBarrierDomain_offset; + } + + static Instr ImmBarrierType(int imm2) { + ASSERT(is_uint2(imm2)); + return imm2 << ImmBarrierType_offset; + } + static LSDataSize CalcLSDataSize(LoadStoreOp op) { ASSERT((SizeLS_offset + SizeLS_width) == (kInstructionSize * 8)); return static_cast(op >> SizeLS_offset); diff --git a/src/a64/constants-a64.h b/src/a64/constants-a64.h index 2e0336dd..1ac26f4a 100644 --- a/src/a64/constants-a64.h +++ b/src/a64/constants-a64.h @@ -116,6 +116,8 @@ V_(ImmCmpBranch, 23, 5, SignedBits) \ V_(ImmLLiteral, 23, 5, SignedBits) \ V_(ImmException, 20, 5, Bits) \ V_(ImmHint, 11, 5, Bits) \ +V_(ImmBarrierDomain, 11, 10, Bits) \ +V_(ImmBarrierType, 9, 8, Bits) \ \ /* System (MRS, MSR) */ \ V_(ImmSystemRegister, 19, 5, Bits) \ @@ -246,6 +248,20 @@ enum SystemHint { SEVL = 5 }; +enum BarrierDomain { + OuterShareable = 0, + NonShareable = 1, + InnerShareable = 2, + FullSystem = 3 +}; + +enum BarrierType { + BarrierOther = 0, + BarrierReads = 1, + BarrierWrites = 2, + BarrierAll = 3 +}; + // System/special register names. // This information is not encoded as one field but as the concatenation of // multiple fields (Op0<0>, Op1, Crn, Crm, Op2). @@ -560,6 +576,15 @@ enum ExceptionOp { DCPS3 = ExceptionFixed | 0x00A00003 }; +enum MemBarrierOp { + MemBarrierFixed = 0xD503309F, + MemBarrierFMask = 0xFFFFF09F, + MemBarrierMask = 0xFFFFF0FF, + DSB = MemBarrierFixed | 0x00000000, + DMB = MemBarrierFixed | 0x00000020, + ISB = MemBarrierFixed | 0x00000040 +}; + // Any load or store. enum LoadStoreAnyOp { LoadStoreAnyFMask = 0x0a000000, @@ -927,17 +952,22 @@ enum FPDataProcessing1SourceOp { FRINTN = FRINTN_s, FRINTP_s = FPDataProcessing1SourceFixed | 0x00048000, FRINTP_d = FPDataProcessing1SourceFixed | FP64 | 0x00048000, + FRINTP = FRINTP_s, FRINTM_s = FPDataProcessing1SourceFixed | 0x00050000, FRINTM_d = FPDataProcessing1SourceFixed | FP64 | 0x00050000, + FRINTM = FRINTM_s, FRINTZ_s = FPDataProcessing1SourceFixed | 0x00058000, FRINTZ_d = FPDataProcessing1SourceFixed | FP64 | 0x00058000, FRINTZ = FRINTZ_s, FRINTA_s = FPDataProcessing1SourceFixed | 0x00060000, FRINTA_d = FPDataProcessing1SourceFixed | FP64 | 0x00060000, + FRINTA = FRINTA_s, FRINTX_s = FPDataProcessing1SourceFixed | 0x00070000, FRINTX_d = FPDataProcessing1SourceFixed | FP64 | 0x00070000, + FRINTX = FRINTX_s, FRINTI_s = FPDataProcessing1SourceFixed | 0x00078000, - FRINTI_d = FPDataProcessing1SourceFixed | FP64 | 0x00078000 + FRINTI_d = FPDataProcessing1SourceFixed | FP64 | 0x00078000, + FRINTI = FRINTI_s }; // Floating point data processing 2 source. diff --git a/src/a64/debugger-a64.cc b/src/a64/debugger-a64.cc index ecdc8355..c146a452 100644 --- a/src/a64/debugger-a64.cc +++ b/src/a64/debugger-a64.cc @@ -35,7 +35,7 @@ C(ContinueCommand) \ C(StepCommand) \ C(DisasmCommand) \ C(PrintCommand) \ -C(MemCommand) +C(ExamineCommand) // Debugger command lines are broken up in token of different type to make // processing easier later on. @@ -173,26 +173,22 @@ class IntegerToken : public ValueToken { }; // Literal describing how to print a chunk of data (up to 64 bits). -// Format: %qt -// where q (qualifier) is one of +// Format: .ln +// where l (letter) is one of +// * x: hexadecimal // * s: signed integer // * u: unsigned integer -// * a: hexadecimal floating point -// and t (type) is one of -// * x: 64-bit integer -// * w: 32-bit integer -// * h: 16-bit integer -// * b: 8-bit integer -// * c: character -// * d: double -// * s: float -// When no qualifier is given for integers, they are printed in hexadecinal. +// * f: floating point +// * i: instruction +// and n (size) is one of 8, 16, 32 and 64. n should be omitted for +// instructions. class FormatToken : public Token { public: FormatToken() {} virtual bool IsFormat() const { return true; } virtual int SizeOf() const = 0; + virtual char type_code() const = 0; virtual void PrintData(void* data, FILE* out = stdout) const = 0; virtual void Print(FILE* out = stdout) const = 0; @@ -206,9 +202,10 @@ class FormatToken : public Token { template class Format : public FormatToken { public: - explicit Format(const char* fmt) : fmt_(fmt) {} + Format(const char* fmt, char type_code) : fmt_(fmt), type_code_(type_code) {} virtual int SizeOf() const { return sizeof(T); } + virtual char type_code() const { return type_code_; } virtual void PrintData(void* data, FILE* out = stdout) const { T value; memcpy(&value, data, sizeof(value)); @@ -218,6 +215,7 @@ template class Format : public FormatToken { private: const char* fmt_; + char type_code_; }; // Tokens which don't fit any of the above. @@ -314,37 +312,25 @@ class StepCommand : public DebugCommand { class DisasmCommand : public DebugCommand { public: - DisasmCommand(Token* name, Token* target, IntegerToken* count) - : DebugCommand(name), target_(target), count_(count) {} - virtual ~DisasmCommand() { - delete target_; - delete count_; - } - - Token* target() { return target_; } - int64_t count() { return count_->value(); } - virtual bool Run(Debugger* debugger); - virtual void Print(FILE* out = stdout); - static DebugCommand* Build(std::vector args); static const char* kHelp; static const char* kAliases[]; static const char* kArguments; - - private: - Token* target_; - IntegerToken* count_; }; class PrintCommand : public DebugCommand { public: - PrintCommand(Token* name, Token* target) - : DebugCommand(name), target_(target) {} - virtual ~PrintCommand() { delete target_; } + PrintCommand(Token* name, Token* target, FormatToken* format) + : DebugCommand(name), target_(target), format_(format) {} + virtual ~PrintCommand() { + delete target_; + delete format_; + } Token* target() { return target_; } + FormatToken* format() { return format_; } virtual bool Run(Debugger* debugger); virtual void Print(FILE* out = stdout); @@ -356,24 +342,25 @@ class PrintCommand : public DebugCommand { private: Token* target_; + FormatToken* format_; }; -class MemCommand : public DebugCommand { +class ExamineCommand : public DebugCommand { public: - MemCommand(Token* name, - Token* target, - IntegerToken* count, - FormatToken* format) - : DebugCommand(name), target_(target), count_(count), format_(format) {} - virtual ~MemCommand() { + ExamineCommand(Token* name, + Token* target, + FormatToken* format, + IntegerToken* count) + : DebugCommand(name), target_(target), format_(format), count_(count) {} + virtual ~ExamineCommand() { delete target_; - delete count_; delete format_; + delete count_; } Token* target() { return target_; } - int64_t count() { return count_->value(); } FormatToken* format() { return format_; } + IntegerToken* count() { return count_; } virtual bool Run(Debugger* debugger); virtual void Print(FILE* out = stdout); @@ -385,8 +372,8 @@ class MemCommand : public DebugCommand { private: Token* target_; - IntegerToken* count_; FormatToken* format_; + IntegerToken* count_; }; // Commands which name does not match any of the known commnand. @@ -418,40 +405,48 @@ class InvalidCommand : public DebugCommand { const char* HelpCommand::kAliases[] = { "help", NULL }; const char* HelpCommand::kArguments = NULL; -const char* HelpCommand::kHelp = " print this help"; +const char* HelpCommand::kHelp = " Print this help."; const char* ContinueCommand::kAliases[] = { "continue", "c", NULL }; const char* ContinueCommand::kArguments = NULL; -const char* ContinueCommand::kHelp = " resume execution"; +const char* ContinueCommand::kHelp = " Resume execution."; const char* StepCommand::kAliases[] = { "stepi", "si", NULL }; const char* StepCommand::kArguments = "[n = 1]"; -const char* StepCommand::kHelp = " execute n next instruction(s)"; +const char* StepCommand::kHelp = " Execute n next instruction(s)."; -const char* DisasmCommand::kAliases[] = { "dis", "d", NULL }; -const char* DisasmCommand::kArguments = "[addr = pc] [n = 1]"; +const char* DisasmCommand::kAliases[] = { "disasm", "di", NULL }; +const char* DisasmCommand::kArguments = "[n = 10]"; const char* DisasmCommand::kHelp = - " disassemble n instruction(s) at address addr.\n" - " addr can be an immediate address, a register or the pc." + " Disassemble n instruction(s) at pc.\n" + " This command is equivalent to x pc.i [n = 10]." ; const char* PrintCommand::kAliases[] = { "print", "p", NULL }; -const char* PrintCommand::kArguments = ""; +const char* PrintCommand::kArguments = "[.format]"; const char* PrintCommand::kHelp = - " print the given entity\n" - " entity can be 'regs' for W and X registers, 'fpregs' for S and D\n" - " registers, 'sysregs' for system registers (including NZCV) or 'pc'." + " Print the given entity according to the given format.\n" + " The format parameter only affects individual registers; it is ignored\n" + " for other entities.\n" + " can be one of the following:\n" + " * A register name (such as x0, s1, ...).\n" + " * 'regs', to print all integer (W and X) registers.\n" + " * 'fpregs' to print all floating-point (S and D) registers.\n" + " * 'sysregs' to print all system registers (including NZCV).\n" + " * 'pc' to print the current program counter.\n" ; -const char* MemCommand::kAliases[] = { "mem", "m", NULL }; -const char* MemCommand::kArguments = " [n = 1] [format = %x]"; -const char* MemCommand::kHelp = - " print n memory item(s) at address addr according to the given format.\n" - " addr can be an immediate address, a register or the pc.\n" - " format is made of a qualifer: 's', 'u', 'a' (signed, unsigned, hexa)\n" - " and a type 'x', 'w', 'h', 'b' (64- to 8-bit integer), 'c' (character),\n" - " 's' (float) or 'd' (double). E.g 'mem sp %w' will print a 32-bit word\n" - " from the stack as an hexadecimal number." +const char* ExamineCommand::kAliases[] = { "m", "mem", "x", NULL }; +const char* ExamineCommand::kArguments = "[.format] [n = 10]"; +const char* ExamineCommand::kHelp = + " Examine memory. Print n items of memory at address according to\n" + " the given [.format].\n" + " Addr can be an immediate address, a register name or pc.\n" + " Format is made of a type letter: 'x' (hexadecimal), 's' (signed), 'u'\n" + " (unsigned), 'f' (floating point), i (instruction) and a size in bits\n" + " when appropriate (8, 16, 32, 64)\n" + " E.g 'x sp.x64' will print 10 64-bit words from the stack in\n" + " hexadecimal format." ; const char* RegisterToken::kXAliases[kNumberOfRegisters][kMaxAliasNumber] = { @@ -539,6 +534,7 @@ Debugger::Debugger(Decoder* decoder, FILE* stream) void Debugger::Run() { + pc_modified_ = false; while (pc_ != kEndOfSimAddress) { if (pending_request()) { LogProcessorState(); @@ -571,8 +567,8 @@ void Debugger::PrintInstructions(void* address, int64_t count) { void Debugger::PrintMemory(const uint8_t* address, - int64_t count, - const FormatToken* format) { + const FormatToken* format, + int64_t count) { if (count == 0) { return; } @@ -586,7 +582,7 @@ void Debugger::PrintMemory(const uint8_t* address, const uint8_t* to = from + count * size; for (const uint8_t* current = from; current < to; current += size) { - if (((current - from) % 16) == 0) { + if (((current - from) % 8) == 0) { printf("\n%p: ", current); } @@ -598,6 +594,54 @@ void Debugger::PrintMemory(const uint8_t* address, } +void Debugger::PrintRegister(const Register& target_reg, + const char* name, + const FormatToken* format) { + const uint64_t reg_size = target_reg.SizeInBits(); + const uint64_t format_size = format->SizeOf() * 8; + const uint64_t count = reg_size / format_size; + const uint64_t mask = 0xffffffffffffffff >> (64 - format_size); + const uint64_t reg_value = reg(reg_size, + target_reg.code(), + Reg31IsStackPointer); + ASSERT(count > 0); + + printf("%s = ", name); + for (uint64_t i = 1; i <= count; i++) { + uint64_t data = reg_value >> (reg_size - (i * format_size)); + data &= mask; + format->PrintData(&data); + printf(" "); + } + printf("\n"); +} + + +void Debugger::PrintFPRegister(const FPRegister& target_fpreg, + const FormatToken* format) { + const uint64_t fpreg_size = target_fpreg.SizeInBits(); + const uint64_t format_size = format->SizeOf() * 8; + const uint64_t count = fpreg_size / format_size; + const uint64_t mask = 0xffffffffffffffff >> (64 - format_size); + const uint64_t fpreg_value = fpreg(fpreg_size, + target_fpreg.code()); + ASSERT(count > 0); + + if (target_fpreg.Is32Bits()) { + printf("s%u = ", target_fpreg.code()); + } else { + printf("d%u = ", target_fpreg.code()); + } + for (uint64_t i = 1; i <= count; i++) { + uint64_t data = fpreg_value >> (fpreg_size - (i * format_size)); + data &= mask; + format->PrintData(&data); + printf(" "); + } + printf("\n"); +} + + void Debugger::VisitException(Instruction* instr) { switch (instr->Mask(ExceptionMask)) { case BRK: @@ -873,11 +917,6 @@ Token* Token::Tokenize(const char* arg) { return token; } - token = FormatToken::Tokenize(arg); - if (token != NULL) { - return token; - } - return new UnknownToken(arg); } @@ -1039,61 +1078,79 @@ Token* IntegerToken::Tokenize(const char* arg) { Token* FormatToken::Tokenize(const char* arg) { - if (arg[0] != '%') { + int length = strlen(arg); + switch (arg[0]) { + case 'x': + case 's': + case 'u': + case 'f': + if (length == 1) return NULL; + break; + case 'i': + if (length == 1) return new Format("%08" PRIx32, 'i'); + default: return NULL; + } + + char* endptr = NULL; + errno = 0; // Reset errors. + uint64_t count = strtoul(arg + 1, &endptr, 10); + + if (errno != 0) { + // Overflow, etc. return NULL; } - int length = strlen(arg); - if ((length < 2) || (length > 3)) { + if (endptr == arg) { + // No digits were parsed. return NULL; } - char type = arg[length - 1]; - if (length == 2) { - switch (type) { - case 'x': return new Format("%016" PRIx64); - case 'w': return new Format("%08" PRIx32); - case 'h': return new Format("%04" PRIx16); - case 'b': return new Format("%02" PRIx8); - case 'c': return new Format("%c"); - case 'd': return new Format("%g"); - case 's': return new Format("%g"); - default: return NULL; - } + if (*endptr != '\0') { + // There are unexpected (non-digit) characters after the number. + return NULL; } - ASSERT(length == 3); - switch (arg[1]) { + switch (arg[0]) { + case 'x': + switch (count) { + case 8: return new Format("%02" PRIx8, 'x'); + case 16: return new Format("%04" PRIx16, 'x'); + case 32: return new Format("%08" PRIx32, 'x'); + case 64: return new Format("%016" PRIx64, 'x'); + default: return NULL; + } case 's': - switch (type) { - case 'x': return new Format("%+20" PRId64); - case 'w': return new Format("%+11" PRId32); - case 'h': return new Format("%+6" PRId16); - case 'b': return new Format("%+4" PRId8); + switch (count) { + case 8: return new Format("%4" PRId8, 's'); + case 16: return new Format("%6" PRId16, 's'); + case 32: return new Format("%11" PRId32, 's'); + case 64: return new Format("%20" PRId64, 's'); default: return NULL; } case 'u': - switch (type) { - case 'x': return new Format("%20" PRIu64); - case 'w': return new Format("%10" PRIu32); - case 'h': return new Format("%5" PRIu16); - case 'b': return new Format("%3" PRIu8); + switch (count) { + case 8: return new Format("%3" PRIu8, 'u'); + case 16: return new Format("%5" PRIu16, 'u'); + case 32: return new Format("%10" PRIu32, 'u'); + case 64: return new Format("%20" PRIu64, 'u'); default: return NULL; } - case 'a': - switch (type) { - case 'd': return new Format("%a"); - case 's': return new Format("%a"); + case 'f': + switch (count) { + case 32: return new Format("%13g", 'f'); + case 64: return new Format("%13g", 'f'); default: return NULL; } - default: return NULL; + default: + UNREACHABLE(); + return NULL; } } template void Format::Print(FILE* out) const { - fprintf(out, "[Format %s - %lu byte(s)]", fmt_, sizeof(T)); + fprintf(out, "[Format %c%lu - %s]", type_code_, sizeof(T) * 8, fmt_); } @@ -1121,10 +1178,25 @@ bool DebugCommand::Match(const char* name, const char** aliases) { DebugCommand* DebugCommand::Parse(char* line) { std::vector args; - for (char* chunk = strtok(line, " "); + for (char* chunk = strtok(line, " \t"); chunk != NULL; - chunk = strtok(NULL, " ")) { - args.push_back(Token::Tokenize(chunk)); + chunk = strtok(NULL, " \t")) { + char* dot = strchr(chunk, '.'); + if (dot != NULL) { + // 'Token.format'. + Token* format = FormatToken::Tokenize(dot + 1); + if (format != NULL) { + *dot = '\0'; + args.push_back(Token::Tokenize(chunk)); + args.push_back(format); + } else { + // Error while parsing the format, push the UnknownToken so an error + // can be accurately reported. + args.push_back(Token::Tokenize(chunk)); + } + } else { + args.push_back(Token::Tokenize(chunk)); + } } if (args.size() == 0) { @@ -1132,7 +1204,7 @@ DebugCommand* DebugCommand::Parse(char* line) { } if (!args[0]->IsIdentifier()) { - return new InvalidCommand(args, 0, "command name is not an identifier"); + return new InvalidCommand(args, 0, "command name is not valid"); } const char* name = IdentifierToken::Cast(args[0])->value(); @@ -1249,66 +1321,36 @@ DebugCommand* StepCommand::Build(std::vector args) { } -bool DisasmCommand::Run(Debugger* debugger) { - ASSERT(debugger->IsDebuggerRunning()); - - uint8_t* from = target()->ToAddress(debugger); - debugger->PrintInstructions(from, count()); - - return false; -} - - -void DisasmCommand::Print(FILE* out) { - fprintf(out, "%s ", name()); - target()->Print(out); - fprintf(out, " %" PRId64 "", count()); -} - - DebugCommand* DisasmCommand::Build(std::vector args) { - Token* address = NULL; IntegerToken* count = NULL; switch (args.size()) { - case 1: { // disasm [pc] [1] - address = new IdentifierToken("pc"); - count = new IntegerToken(1); - break; - } - case 2: { // disasm [pc] n or disasm address [1] - Token* first = args[1]; - if (first->IsInteger()) { - address = new IdentifierToken("pc"); - count = IntegerToken::Cast(first); - } else if (first->CanAddressMemory()) { - address = first; - count = new IntegerToken(1); - } else { - return new InvalidCommand(args, 1, "expects int or addr"); - } + case 1: { // disasm [10] + count = new IntegerToken(10); break; } - case 3: { // disasm address count + case 2: { // disasm n Token* first = args[1]; - Token* second = args[2]; - if (!first->CanAddressMemory() || !second->IsInteger()) { - return new InvalidCommand(args, -1, "disasm addr int"); + if (!first->IsInteger()) { + return new InvalidCommand(args, 1, "expects int"); } - address = first; - count = IntegerToken::Cast(second); + + count = IntegerToken::Cast(first); break; } default: - return new InvalidCommand(args, -1, "wrong arguments number"); + return new InvalidCommand(args, -1, "too many arguments"); } - return new DisasmCommand(args[0], address, count); + Token* target = new IdentifierToken("pc"); + FormatToken* format = new Format("%08" PRIx32, 'i'); + return new ExamineCommand(args[0], target, format, count); } void PrintCommand::Print(FILE* out) { fprintf(out, "%s ", name()); target()->Print(out); + if (format() != NULL) format()->Print(out); } @@ -1333,30 +1375,24 @@ bool PrintCommand::Run(Debugger* debugger) { return false; } + FormatToken* format_tok = format(); + ASSERT(format_tok != NULL); + if (format_tok->type_code() == 'i') { + // TODO(all): Add support for instruction disassembly. + printf(" ** unsupported format: instructions **\n"); + return false; + } + if (tok->IsRegister()) { RegisterToken* reg_tok = RegisterToken::Cast(tok); Register reg = reg_tok->value(); - if (reg.Is32Bits()) { - printf("%s = %" PRId32 "\n", - reg_tok->Name(), - debugger->wreg(reg.code(), Reg31IsStackPointer)); - } else { - printf("%s = %" PRId64 "\n", - reg_tok->Name(), - debugger->xreg(reg.code(), Reg31IsStackPointer)); - } - + debugger->PrintRegister(reg, reg_tok->Name(), format_tok); return false; } if (tok->IsFPRegister()) { FPRegister fpreg = FPRegisterToken::Cast(tok)->value(); - if (fpreg.Is32Bits()) { - printf("s%u = %g\n", fpreg.code(), debugger->sreg(fpreg.code())); - } else { - printf("d%u = %g\n", fpreg.code(), debugger->dreg(fpreg.code())); - } - + debugger->PrintFPRegister(fpreg, format_tok); return false; } @@ -1366,91 +1402,144 @@ bool PrintCommand::Run(Debugger* debugger) { DebugCommand* PrintCommand::Build(std::vector args) { - Token* target = NULL; + if (args.size() < 2) { + return new InvalidCommand(args, -1, "too few arguments"); + } + + Token* target = args[1]; + if (!target->IsRegister() && + !target->IsFPRegister() && + !target->IsIdentifier()) { + return new InvalidCommand(args, 1, "expects reg or identifier"); + } + + FormatToken* format = NULL; + int target_size = 0; + if (target->IsRegister()) { + Register reg = RegisterToken::Cast(target)->value(); + target_size = reg.SizeInBytes(); + } else if (target->IsFPRegister()) { + FPRegister fpreg = FPRegisterToken::Cast(target)->value(); + target_size = fpreg.SizeInBytes(); + } + // If the target is an identifier there must be no format. This is checked + // in the switch statement below. + switch (args.size()) { case 2: { - target = args[1]; - if (!target->IsRegister() - && !target->IsFPRegister() - && !target->IsIdentifier()) { - return new InvalidCommand(args, 1, "expects reg or identifier"); + if (target->IsRegister()) { + switch (target_size) { + case 4: format = new Format("%08" PRIx32, 'x'); break; + case 8: format = new Format("%016" PRIx64, 'x'); break; + default: UNREACHABLE(); + } + } else if (target->IsFPRegister()) { + switch (target_size) { + case 4: format = new Format("%8g", 'f'); break; + case 8: format = new Format("%8g", 'f'); break; + default: UNREACHABLE(); + } } break; } + case 3: { + if (target->IsIdentifier()) { + return new InvalidCommand(args, 2, + "format is only allowed with registers"); + } + + Token* second = args[2]; + if (!second->IsFormat()) { + return new InvalidCommand(args, 2, "expects format"); + } + format = FormatToken::Cast(second); + + if (format->SizeOf() > target_size) { + return new InvalidCommand(args, 2, "format too wide"); + } + + break; + } default: return new InvalidCommand(args, -1, "too many arguments"); } - return new PrintCommand(args[0], target); + return new PrintCommand(args[0], target, format); } -bool MemCommand::Run(Debugger* debugger) { +bool ExamineCommand::Run(Debugger* debugger) { ASSERT(debugger->IsDebuggerRunning()); uint8_t* address = target()->ToAddress(debugger); - debugger->PrintMemory(address, count(), format()); + int64_t amount = count()->value(); + if (format()->type_code() == 'i') { + debugger->PrintInstructions(address, amount); + } else { + debugger->PrintMemory(address, format(), amount); + } return false; } -void MemCommand::Print(FILE* out) { +void ExamineCommand::Print(FILE* out) { fprintf(out, "%s ", name()); - target()->Print(out); - fprintf(out, " %" PRId64 " ", count()); format()->Print(out); + target()->Print(out); } -DebugCommand* MemCommand::Build(std::vector args) { +DebugCommand* ExamineCommand::Build(std::vector args) { if (args.size() < 2) { return new InvalidCommand(args, -1, "too few arguments"); } Token* target = args[1]; - IntegerToken* count = NULL; - FormatToken* format = NULL; - if (!target->CanAddressMemory()) { return new InvalidCommand(args, 1, "expects address"); } + FormatToken* format = NULL; + IntegerToken* count = NULL; + switch (args.size()) { - case 2: { // mem addressable [1] [%x] - count = new IntegerToken(1); - format = new Format("%016x"); + case 2: { // mem addr[.x64] [10] + format = new Format("%016" PRIx64, 'x'); + count = new IntegerToken(10); break; } - case 3: { // mem addr n [%x] or mem addr [n] %f + case 3: { // mem addr.format [10] + // mem addr[.x64] n Token* second = args[2]; - if (second->IsInteger()) { - count = IntegerToken::Cast(second); - format = new Format("%016x"); - } else if (second->IsFormat()) { - count = new IntegerToken(1); + if (second->IsFormat()) { format = FormatToken::Cast(second); + count = new IntegerToken(10); + break; + } else if (second->IsInteger()) { + format = new Format("%016" PRIx64, 'x'); + count = IntegerToken::Cast(second); } else { - return new InvalidCommand(args, 2, "expects int or format"); + return new InvalidCommand(args, 2, "expects format or integer"); } + UNREACHABLE(); break; } - case 4: { // mem addr n %f + case 4: { // mem addr.format n Token* second = args[2]; Token* third = args[3]; - if (!second->IsInteger() || !third->IsFormat()) { - return new InvalidCommand(args, -1, "mem addr >>int<< %F"); + if (!second->IsFormat() || !third->IsInteger()) { + return new InvalidCommand(args, -1, "expects addr[.format] [n]"); } - - count = IntegerToken::Cast(second); - format = FormatToken::Cast(third); + format = FormatToken::Cast(second); + count = IntegerToken::Cast(third); break; } default: return new InvalidCommand(args, -1, "too many arguments"); } - return new MemCommand(args[0], target, count, format); + return new ExamineCommand(args[0], target, format, count); } diff --git a/src/a64/debugger-a64.h b/src/a64/debugger-a64.h index 542d2025..1f7888a6 100644 --- a/src/a64/debugger-a64.h +++ b/src/a64/debugger-a64.h @@ -156,8 +156,13 @@ class Debugger : public Simulator { void PrintInstructions(void* address, int64_t count = 1); void PrintMemory(const uint8_t* address, - int64_t count, - const FormatToken* format); + const FormatToken* format, + int64_t count = 1); + void PrintRegister(const Register& target_reg, + const char* name, + const FormatToken* format); + void PrintFPRegister(const FPRegister& target_fpreg, + const FormatToken* format); private: void LogSystemRegisters(); diff --git a/src/a64/disasm-a64.cc b/src/a64/disasm-a64.cc index 4a497480..616fca56 100644 --- a/src/a64/disasm-a64.cc +++ b/src/a64/disasm-a64.cc @@ -1082,6 +1082,14 @@ void Disassembler::VisitFPIntegerConvert(Instruction* instr) { case FMOV_xd: mnemonic = "fmov"; form = form_rf; break; case FMOV_sw: case FMOV_dx: mnemonic = "fmov"; form = form_fr; break; + case FCVTAS_ws: + case FCVTAS_xs: + case FCVTAS_wd: + case FCVTAS_xd: mnemonic = "fcvtas"; form = form_rf; break; + case FCVTAU_ws: + case FCVTAU_xs: + case FCVTAU_wd: + case FCVTAU_xd: mnemonic = "fcvtau"; form = form_rf; break; case FCVTMS_ws: case FCVTMS_xs: case FCVTMS_wd: @@ -1184,6 +1192,24 @@ void Disassembler::VisitSystem(Instruction* instr) { break; } } + } else if (instr->Mask(MemBarrierFMask) == MemBarrierFixed) { + switch (instr->Mask(MemBarrierMask)) { + case DMB: { + mnemonic = "dmb"; + form = "'M"; + break; + } + case DSB: { + mnemonic = "dsb"; + form = "'M"; + break; + } + case ISB: { + mnemonic = "isb"; + form = NULL; + break; + } + } } Format(instr, mnemonic, form); @@ -1268,6 +1294,7 @@ int Disassembler::SubstituteField(Instruction* instr, const char* format) { case 'A': return SubstitutePCRelAddressField(instr, format); case 'B': return SubstituteBranchTargetField(instr, format); case 'O': return SubstituteLSRegOffsetField(instr, format); + case 'M': return SubstituteBarrierField(instr, format); default: { UNREACHABLE(); return 1; @@ -1654,6 +1681,23 @@ int Disassembler::SubstitutePrefetchField(Instruction* instr, return 6; } +int Disassembler::SubstituteBarrierField(Instruction* instr, + const char* format) { + ASSERT(format[0] == 'M'); + USE(format); + + static const char* options[4][4] = { + { "sy (0b0000)", "oshld", "oshst", "osh" }, + { "sy (0b0100)", "nshld", "nshst", "nsh" }, + { "sy (0b1000)", "ishld", "ishst", "ish" }, + { "sy (0b1100)", "ld", "st", "sy" } + }; + int domain = instr->ImmBarrierDomain(); + int type = instr->ImmBarrierType(); + + AppendToOutput("%s", options[domain][type]); + return 1; +} void Disassembler::ResetOutput() { buffer_pos_ = 0; diff --git a/src/a64/disasm-a64.h b/src/a64/disasm-a64.h index 857a5aca..3a56e155 100644 --- a/src/a64/disasm-a64.h +++ b/src/a64/disasm-a64.h @@ -64,6 +64,7 @@ class Disassembler: public DecoderVisitor { int SubstituteBranchTargetField(Instruction* instr, const char* format); int SubstituteLSRegOffsetField(Instruction* instr, const char* format); int SubstitutePrefetchField(Instruction* instr, const char* format); + int SubstituteBarrierField(Instruction* instr, const char* format); inline bool RdIsZROrSP(Instruction* instr) const { return (instr->Rd() == kZeroRegCode); diff --git a/src/a64/instructions-a64.h b/src/a64/instructions-a64.h index 0f31fcd7..a7558b2a 100644 --- a/src/a64/instructions-a64.h +++ b/src/a64/instructions-a64.h @@ -93,6 +93,7 @@ static const float kFP32SignallingNaN = rawbits_to_float(0x7f800001); static const double kFP64QuietNaN = rawbits_to_double(0x7ff800007fc00001); static const float kFP32QuietNaN = rawbits_to_float(0x7fc00001); + enum LSDataSize { LSByte = 0, LSHalfword = 1, diff --git a/src/a64/instrument-a64.cc b/src/a64/instrument-a64.cc index 507410d0..841173c3 100644 --- a/src/a64/instrument-a64.cc +++ b/src/a64/instrument-a64.cc @@ -151,7 +151,7 @@ Instrument::~Instrument() { // Free all the counter objects. std::list::iterator it; for (it = counters_.begin(); it != counters_.end(); it++) { - free(*it); + delete *it; } if (output_stream_ != stdout) { diff --git a/src/a64/instrument-a64.h b/src/a64/instrument-a64.h index bee965ba..d8ddb46b 100644 --- a/src/a64/instrument-a64.h +++ b/src/a64/instrument-a64.h @@ -54,7 +54,6 @@ enum CounterType { class Counter { public: Counter(const char* name, CounterType type = Gauge); - ~Counter(); void Increment(); void Enable(); diff --git a/src/a64/macro-assembler-a64.cc b/src/a64/macro-assembler-a64.cc index 39a925c2..a7e2c2ef 100644 --- a/src/a64/macro-assembler-a64.cc +++ b/src/a64/macro-assembler-a64.cc @@ -29,26 +29,40 @@ namespace vixl { void MacroAssembler::And(const Register& rd, const Register& rn, - const Operand& operand, - FlagsUpdate S) { + const Operand& operand) { ASSERT(allow_macro_instructions_); - LogicalMacro(rd, rn, operand, (S == SetFlags) ? ANDS : AND); + LogicalMacro(rd, rn, operand, AND); +} + + +void MacroAssembler::Ands(const Register& rd, + const Register& rn, + const Operand& operand) { + ASSERT(allow_macro_instructions_); + LogicalMacro(rd, rn, operand, ANDS); } void MacroAssembler::Tst(const Register& rn, const Operand& operand) { ASSERT(allow_macro_instructions_); - And(AppropriateZeroRegFor(rn), rn, operand, SetFlags); + Ands(AppropriateZeroRegFor(rn), rn, operand); } void MacroAssembler::Bic(const Register& rd, const Register& rn, - const Operand& operand, - FlagsUpdate S) { + const Operand& operand) { + ASSERT(allow_macro_instructions_); + LogicalMacro(rd, rn, operand, BIC); +} + + +void MacroAssembler::Bics(const Register& rd, + const Register& rn, + const Operand& operand) { ASSERT(allow_macro_instructions_); - LogicalMacro(rd, rn, operand, (S == SetFlags) ? BICS : BIC); + LogicalMacro(rd, rn, operand, BICS); } @@ -174,7 +188,9 @@ void MacroAssembler::LogicalMacro(const Register& rd, } -void MacroAssembler::Mov(const Register& rd, const Operand& operand) { +void MacroAssembler::Mov(const Register& rd, + const Operand& operand, + DiscardMoveMode discard_mode) { ASSERT(allow_macro_instructions_); if (operand.IsImmediate()) { // Call the macro assembler for generic immediates. @@ -191,10 +207,16 @@ void MacroAssembler::Mov(const Register& rd, const Operand& operand) { operand.shift_amount()); } else { // Otherwise, emit a register move only if the registers are distinct, or - // if they are not X registers. Note that mov(w0, w0) is not a no-op - // because it clears the top word of x0. + // if they are not X registers. + // + // Note that mov(w0, w0) is not a no-op because it clears the top word of + // x0. A flag is provided (kDiscardForSameWReg) if a move between the same W + // registers is not required to clear the top word of the X register. In + // this case, the instruction is discarded. + // // If the sp is an operand, add #0 is emitted, otherwise, orr #0. - if (!rd.Is(operand.reg()) || !rd.Is64Bits()) { + if (!rd.Is(operand.reg()) || (rd.Is32Bits() && + (discard_mode == kDontDiscardForSameWReg))) { mov(rd, operand.reg()); } } @@ -230,19 +252,21 @@ void MacroAssembler::Mov(const Register& rd, uint64_t imm) { // // Initial values can be generated with: // 1. 64-bit move zero (movz). - // 2. 32-bit move negative (movn). - // 3. 64-bit move negative. + // 2. 32-bit move inverted (movn). + // 3. 64-bit move inverted. // 4. 32-bit orr immediate. // 5. 64-bit orr immediate. - // Move-keep may then be used to modify each of the 16-bit nybbles. + // Move-keep may then be used to modify each of the 16-bit half words. // // The code below supports all five initial value generators, and - // applying move-keep operations to move-zero initial values only. + // applying move-keep operations to move-zero and move-inverted initial + // values. unsigned reg_size = rd.size(); unsigned n, imm_s, imm_r; if (IsImmMovz(imm, reg_size) && !rd.IsSP()) { - // Immediate can be represented in a move zero instruction. + // Immediate can be represented in a move zero instruction. Movz can't + // write to the stack pointer. movz(rd, imm); } else if (IsImmMovn(imm, reg_size) && !rd.IsSP()) { // Immediate can be represented in a move negative instruction. Movn can't @@ -255,20 +279,36 @@ void MacroAssembler::Mov(const Register& rd, uint64_t imm) { } else { // Generic immediate case. Imm will be represented by // [imm3, imm2, imm1, imm0], where each imm is 16 bits. - // A move-zero is generated for the first non-zero immX, and a move-keep - // for subsequent non-zero immX. + // A move-zero or move-inverted is generated for the first non-zero or + // non-0xffff immX, and a move-keep for subsequent non-zero immX. + + uint64_t ignored_halfword = 0; + bool invert_move = false; + // If the number of 0xffff halfwords is greater than the number of 0x0000 + // halfwords, it's more efficient to use move-inverted. + if (CountClearHalfWords(~imm, reg_size) > + CountClearHalfWords(imm, reg_size)) { + ignored_halfword = 0xffffL; + invert_move = true; + } - // Use a temporary register when moving to the stack pointer. + // Mov instructions can't move values into the stack pointer, so set up a + // temporary register, if needed. Register temp = rd.IsSP() ? AppropriateTempFor(rd) : rd; + // Iterate through the halfwords. Use movn/movz for the first non-ignored + // halfword, and movk for subsequent halfwords. ASSERT((reg_size % 16) == 0); bool first_mov_done = false; for (unsigned i = 0; i < (temp.size() / 16); i++) { uint64_t imm16 = (imm >> (16 * i)) & 0xffffL; - if (imm16 != 0) { + if (imm16 != ignored_halfword) { if (!first_mov_done) { - // Move the first non-zero 16-bit chunk into the destination register. - movz(temp, imm16, 16 * i); + if (invert_move) { + movn(temp, (~imm16) & 0xffffL, 16 * i); + } else { + movz(temp, imm16, 16 * i); + } first_mov_done = true; } else { // Construct a wider constant. @@ -277,34 +317,35 @@ void MacroAssembler::Mov(const Register& rd, uint64_t imm) { } } + ASSERT(first_mov_done); + + // Move the temporary if the original destination register was the stack + // pointer. if (rd.IsSP()) { mov(rd, temp); } + } +} - ASSERT(first_mov_done); + +unsigned MacroAssembler::CountClearHalfWords(uint64_t imm, unsigned reg_size) { + ASSERT((reg_size % 8) == 0); + int count = 0; + for (unsigned i = 0; i < (reg_size / 16); i++) { + if ((imm & 0xffff) == 0) { + count++; + } + imm >>= 16; } + return count; } -// The movz instruction can generate immediates containing an arbitrary 16-bit +// The movn instruction can generate immediates containing an arbitrary 16-bit // value, with remaining bits set, eg. 0x00001234, 0x0000123400000000. bool MacroAssembler::IsImmMovz(uint64_t imm, unsigned reg_size) { - if (reg_size == kXRegSize) { - if (((imm & 0xffffffffffff0000UL) == 0UL) || - ((imm & 0xffffffff0000ffffUL) == 0UL) || - ((imm & 0xffff0000ffffffffUL) == 0UL) || - ((imm & 0x0000ffffffffffffUL) == 0UL)) { - return true; - } - } else { - ASSERT(reg_size == kWRegSize); - imm &= kWRegMask; - if (((imm & 0xffff0000) == 0) || - ((imm & 0x0000ffff) == 0)) { - return true; - } - } - return false; + ASSERT((reg_size == kXRegSize) || (reg_size == kWRegSize)); + return CountClearHalfWords(imm, reg_size) >= ((reg_size / 16) - 1); } @@ -320,7 +361,11 @@ void MacroAssembler::Ccmp(const Register& rn, StatusFlags nzcv, Condition cond) { ASSERT(allow_macro_instructions_); - ConditionalCompareMacro(rn, operand, nzcv, cond, CCMP); + if (operand.IsImmediate() && (operand.immediate() < 0)) { + ConditionalCompareMacro(rn, -operand.immediate(), nzcv, cond, CCMN); + } else { + ConditionalCompareMacro(rn, operand, nzcv, cond, CCMP); + } } @@ -329,7 +374,11 @@ void MacroAssembler::Ccmn(const Register& rn, StatusFlags nzcv, Condition cond) { ASSERT(allow_macro_instructions_); - ConditionalCompareMacro(rn, operand, nzcv, cond, CCMN); + if (operand.IsImmediate() && (operand.immediate() < 0)) { + ConditionalCompareMacro(rn, -operand.immediate(), nzcv, cond, CCMP); + } else { + ConditionalCompareMacro(rn, operand, nzcv, cond, CCMN); + } } @@ -347,88 +396,138 @@ void MacroAssembler::ConditionalCompareMacro(const Register& rn, } else { // The operand isn't directly supported by the instruction: perform the // operation on a temporary register. - Register temp(NoReg); - if (operand.IsImmediate()) { - temp = AppropriateTempFor(rn); - Mov(temp, operand.immediate()); - } else if (operand.IsShiftedRegister()) { - ASSERT(operand.shift() != ROR); - ASSERT(is_uintn(rn.size() == kXRegSize ? kXRegSizeLog2 : kWRegSizeLog2, - operand.shift_amount())); - temp = AppropriateTempFor(rn, operand.reg()); - EmitShift(temp, operand.reg(), operand.shift(), operand.shift_amount()); + Register temp = AppropriateTempFor(rn); + Mov(temp, operand); + ConditionalCompare(rn, temp, nzcv, cond, op); + } +} + + +void MacroAssembler::Csel(const Register& rd, + const Register& rn, + const Operand& operand, + Condition cond) { + ASSERT(allow_macro_instructions_); + ASSERT(!rd.IsZero()); + ASSERT(!rn.IsZero()); + ASSERT((cond != al) && (cond != nv)); + if (operand.IsImmediate()) { + // Immediate argument. Handle special cases of 0, 1 and -1 using zero + // register. + int64_t imm = operand.immediate(); + Register zr = AppropriateZeroRegFor(rn); + if (imm == 0) { + csel(rd, rn, zr, cond); + } else if (imm == 1) { + csinc(rd, rn, zr, cond); + } else if (imm == -1) { + csinv(rd, rn, zr, cond); } else { - ASSERT(operand.IsExtendedRegister()); - ASSERT(operand.reg().size() <= rn.size()); - // Add/sub extended support a shift <= 4. We want to support exactly the - // same modes. - ASSERT(operand.shift_amount() <= 4); - ASSERT(operand.reg().Is64Bits() || - ((operand.extend() != UXTX) && (operand.extend() != SXTX))); - temp = AppropriateTempFor(rn, operand.reg()); - EmitExtendShift(temp, operand.reg(), operand.extend(), - operand.shift_amount()); + Register temp = AppropriateTempFor(rn); + Mov(temp, operand.immediate()); + csel(rd, rn, temp, cond); } - ConditionalCompare(rn, Operand(temp), nzcv, cond, op); + } else if (operand.IsShiftedRegister() && (operand.shift_amount() == 0)) { + // Unshifted register argument. + csel(rd, rn, operand.reg(), cond); + } else { + // All other arguments. + Register temp = AppropriateTempFor(rn); + Mov(temp, operand); + csel(rd, rn, temp, cond); } } void MacroAssembler::Add(const Register& rd, const Register& rn, - const Operand& operand, - FlagsUpdate S) { + const Operand& operand) { ASSERT(allow_macro_instructions_); if (operand.IsImmediate() && (operand.immediate() < 0)) { - AddSubMacro(rd, rn, -operand.immediate(), S, SUB); + AddSubMacro(rd, rn, -operand.immediate(), LeaveFlags, SUB); } else { - AddSubMacro(rd, rn, operand, S, ADD); + AddSubMacro(rd, rn, operand, LeaveFlags, ADD); + } +} + + +void MacroAssembler::Adds(const Register& rd, + const Register& rn, + const Operand& operand) { + ASSERT(allow_macro_instructions_); + if (operand.IsImmediate() && (operand.immediate() < 0)) { + AddSubMacro(rd, rn, -operand.immediate(), SetFlags, SUB); + } else { + AddSubMacro(rd, rn, operand, SetFlags, ADD); } } void MacroAssembler::Sub(const Register& rd, const Register& rn, - const Operand& operand, - FlagsUpdate S) { + const Operand& operand) { + ASSERT(allow_macro_instructions_); + if (operand.IsImmediate() && (operand.immediate() < 0)) { + AddSubMacro(rd, rn, -operand.immediate(), LeaveFlags, ADD); + } else { + AddSubMacro(rd, rn, operand, LeaveFlags, SUB); + } +} + + +void MacroAssembler::Subs(const Register& rd, + const Register& rn, + const Operand& operand) { ASSERT(allow_macro_instructions_); if (operand.IsImmediate() && (operand.immediate() < 0)) { - AddSubMacro(rd, rn, -operand.immediate(), S, ADD); + AddSubMacro(rd, rn, -operand.immediate(), SetFlags, ADD); } else { - AddSubMacro(rd, rn, operand, S, SUB); + AddSubMacro(rd, rn, operand, SetFlags, SUB); } } void MacroAssembler::Cmn(const Register& rn, const Operand& operand) { ASSERT(allow_macro_instructions_); - Add(AppropriateZeroRegFor(rn), rn, operand, SetFlags); + Adds(AppropriateZeroRegFor(rn), rn, operand); } void MacroAssembler::Cmp(const Register& rn, const Operand& operand) { ASSERT(allow_macro_instructions_); - Sub(AppropriateZeroRegFor(rn), rn, operand, SetFlags); + Subs(AppropriateZeroRegFor(rn), rn, operand); } void MacroAssembler::Neg(const Register& rd, - const Operand& operand, - FlagsUpdate S) { + const Operand& operand) { ASSERT(allow_macro_instructions_); if (operand.IsImmediate()) { Mov(rd, -operand.immediate()); } else { - Sub(rd, AppropriateZeroRegFor(rd), operand, S); + Sub(rd, AppropriateZeroRegFor(rd), operand); } } +void MacroAssembler::Negs(const Register& rd, + const Operand& operand) { + ASSERT(allow_macro_instructions_); + Subs(rd, AppropriateZeroRegFor(rd), operand); +} + + void MacroAssembler::AddSubMacro(const Register& rd, const Register& rn, const Operand& operand, FlagsUpdate S, AddSubOp op) { + if (operand.IsZero() && rd.Is(rn) && rd.Is64Bits() && rn.Is64Bits() && + (S == LeaveFlags)) { + // The instruction would be a nop. Avoid generating useless code. + return; + } + if ((operand.IsImmediate() && !IsImmAddSub(operand.immediate())) || (rn.IsZero() && !operand.IsShiftedRegister()) || (operand.IsShiftedRegister() && (operand.shift() == ROR))) { @@ -443,28 +542,49 @@ void MacroAssembler::AddSubMacro(const Register& rd, void MacroAssembler::Adc(const Register& rd, const Register& rn, - const Operand& operand, - FlagsUpdate S) { + const Operand& operand) { ASSERT(allow_macro_instructions_); - AddSubWithCarryMacro(rd, rn, operand, S, ADC); + AddSubWithCarryMacro(rd, rn, operand, LeaveFlags, ADC); +} + + +void MacroAssembler::Adcs(const Register& rd, + const Register& rn, + const Operand& operand) { + ASSERT(allow_macro_instructions_); + AddSubWithCarryMacro(rd, rn, operand, SetFlags, ADC); } void MacroAssembler::Sbc(const Register& rd, const Register& rn, - const Operand& operand, - FlagsUpdate S) { + const Operand& operand) { ASSERT(allow_macro_instructions_); - AddSubWithCarryMacro(rd, rn, operand, S, SBC); + AddSubWithCarryMacro(rd, rn, operand, LeaveFlags, SBC); +} + + +void MacroAssembler::Sbcs(const Register& rd, + const Register& rn, + const Operand& operand) { + ASSERT(allow_macro_instructions_); + AddSubWithCarryMacro(rd, rn, operand, SetFlags, SBC); } void MacroAssembler::Ngc(const Register& rd, - const Operand& operand, - FlagsUpdate S) { + const Operand& operand) { ASSERT(allow_macro_instructions_); Register zr = AppropriateZeroRegFor(rd); - Sbc(rd, zr, operand, S); + Sbc(rd, zr, operand); +} + + +void MacroAssembler::Ngcs(const Register& rd, + const Operand& operand) { + ASSERT(allow_macro_instructions_); + Register zr = AppropriateZeroRegFor(rd); + Sbcs(rd, zr, operand); } @@ -771,8 +891,13 @@ void MacroAssembler::Peek(const Register& dst, const Operand& offset) { void MacroAssembler::Claim(const Operand& size) { ASSERT(allow_macro_instructions_); + + if (size.IsZero()) { + return; + } + if (size.IsImmediate()) { - ASSERT(size.immediate() >= 0); + ASSERT(size.immediate() > 0); if (sp.Is(StackPointer())) { ASSERT((size.immediate() % 16) == 0); } @@ -788,8 +913,13 @@ void MacroAssembler::Claim(const Operand& size) { void MacroAssembler::Drop(const Operand& size) { ASSERT(allow_macro_instructions_); + + if (size.IsZero()) { + return; + } + if (size.IsImmediate()) { - ASSERT(size.immediate() >= 0); + ASSERT(size.immediate() > 0); if (sp.Is(StackPointer())) { ASSERT((size.immediate() % 16) == 0); } diff --git a/src/a64/macro-assembler-a64.h b/src/a64/macro-assembler-a64.h index f2660637..3c52c995 100644 --- a/src/a64/macro-assembler-a64.h +++ b/src/a64/macro-assembler-a64.h @@ -45,6 +45,8 @@ namespace vixl { +enum DiscardMoveMode { kDontDiscardForSameWReg, kDiscardForSameWReg }; + class MacroAssembler : public Assembler { public: MacroAssembler(byte * buffer, unsigned buffer_size) @@ -57,12 +59,16 @@ class MacroAssembler : public Assembler { // Logical macros. void And(const Register& rd, const Register& rn, - const Operand& operand, - FlagsUpdate S = LeaveFlags); + const Operand& operand); + void Ands(const Register& rd, + const Register& rn, + const Operand& operand); void Bic(const Register& rd, const Register& rn, - const Operand& operand, - FlagsUpdate S = LeaveFlags); + const Operand& operand); + void Bics(const Register& rd, + const Register& rn, + const Operand& operand); void Orr(const Register& rd, const Register& rn, const Operand& operand); @@ -84,17 +90,23 @@ class MacroAssembler : public Assembler { // Add and sub macros. void Add(const Register& rd, const Register& rn, - const Operand& operand, - FlagsUpdate S = LeaveFlags); + const Operand& operand); + void Adds(const Register& rd, + const Register& rn, + const Operand& operand); void Sub(const Register& rd, const Register& rn, - const Operand& operand, - FlagsUpdate S = LeaveFlags); + const Operand& operand); + void Subs(const Register& rd, + const Register& rn, + const Operand& operand); void Cmn(const Register& rn, const Operand& operand); void Cmp(const Register& rn, const Operand& operand); void Neg(const Register& rd, - const Operand& operand, - FlagsUpdate S = LeaveFlags); + const Operand& operand); + void Negs(const Register& rd, + const Operand& operand); + void AddSubMacro(const Register& rd, const Register& rn, const Operand& operand, @@ -104,15 +116,20 @@ class MacroAssembler : public Assembler { // Add/sub with carry macros. void Adc(const Register& rd, const Register& rn, - const Operand& operand, - FlagsUpdate S = LeaveFlags); + const Operand& operand); + void Adcs(const Register& rd, + const Register& rn, + const Operand& operand); void Sbc(const Register& rd, const Register& rn, - const Operand& operand, - FlagsUpdate S = LeaveFlags); + const Operand& operand); + void Sbcs(const Register& rd, + const Register& rn, + const Operand& operand); void Ngc(const Register& rd, - const Operand& operand, - FlagsUpdate S = LeaveFlags); + const Operand& operand); + void Ngcs(const Register& rd, + const Operand& operand); void AddSubWithCarryMacro(const Register& rd, const Register& rn, const Operand& operand, @@ -121,15 +138,18 @@ class MacroAssembler : public Assembler { // Move macros. void Mov(const Register& rd, uint64_t imm); - void Mov(const Register& rd, const Operand& operand); + void Mov(const Register& rd, + const Operand& operand, + DiscardMoveMode discard_mode = kDontDiscardForSameWReg); void Mvn(const Register& rd, uint64_t imm) { - Mov(rd, ~imm); + Mov(rd, (rd.size() == kXRegSize) ? ~imm : (~imm & kWRegMask)); }; void Mvn(const Register& rd, const Operand& operand); - bool IsImmMovn(uint64_t imm, unsigned reg_size); bool IsImmMovz(uint64_t imm, unsigned reg_size); + bool IsImmMovn(uint64_t imm, unsigned reg_size); + unsigned CountClearHalfWords(uint64_t imm, unsigned reg_size); - // Conditional compare macros. + // Conditional macros. void Ccmp(const Register& rn, const Operand& operand, StatusFlags nzcv, @@ -143,6 +163,10 @@ class MacroAssembler : public Assembler { StatusFlags nzcv, Condition cond, ConditionalCompareOp op); + void Csel(const Register& rd, + const Register& rn, + const Operand& operand, + Condition cond); // Load/store macros. #define DECLARE_FUNCTION(FN, REGTYPE, REG, OP) \ @@ -384,17 +408,6 @@ class MacroAssembler : public Assembler { ASSERT(!rn.IsZero()); cneg(rd, rn, cond); } - void Csel(const Register& rd, - const Register& rn, - const Register& rm, - Condition cond) { - ASSERT(allow_macro_instructions_); - ASSERT(!rd.IsZero()); - ASSERT(!rn.IsZero()); - ASSERT(!rm.IsZero()); - ASSERT((cond != al) && (cond != nv)); - csel(rd, rn, rm, cond); - } void Cset(const Register& rd, Condition cond) { ASSERT(allow_macro_instructions_); ASSERT(!rd.IsZero()); @@ -438,6 +451,14 @@ class MacroAssembler : public Assembler { ASSERT((cond != al) && (cond != nv)); csneg(rd, rn, rm, cond); } + void Dmb(BarrierDomain domain, BarrierType type) { + ASSERT(allow_macro_instructions_); + dmb(domain, type); + } + void Dsb(BarrierDomain domain, BarrierType type) { + ASSERT(allow_macro_instructions_); + dsb(domain, type); + } void Extr(const Register& rd, const Register& rn, const Register& rm, @@ -490,6 +511,16 @@ class MacroAssembler : public Assembler { ASSERT(allow_macro_instructions_); fcvt(fd, fn); } + void Fcvtas(const Register& rd, const FPRegister& fn) { + ASSERT(allow_macro_instructions_); + ASSERT(!rd.IsZero()); + fcvtas(rd, fn); + } + void Fcvtau(const Register& rd, const FPRegister& fn) { + ASSERT(allow_macro_instructions_); + ASSERT(!rd.IsZero()); + fcvtau(rd, fn); + } void Fcvtms(const Register& rd, const FPRegister& fn) { ASSERT(allow_macro_instructions_); ASSERT(!rd.IsZero()); @@ -528,10 +559,22 @@ class MacroAssembler : public Assembler { ASSERT(allow_macro_instructions_); fmax(fd, fn, fm); } + void Fmaxnm(const FPRegister& fd, + const FPRegister& fn, + const FPRegister& fm) { + ASSERT(allow_macro_instructions_); + fmaxnm(fd, fn, fm); + } void Fmin(const FPRegister& fd, const FPRegister& fn, const FPRegister& fm) { ASSERT(allow_macro_instructions_); fmin(fd, fn, fm); } + void Fminnm(const FPRegister& fd, + const FPRegister& fn, + const FPRegister& fm) { + ASSERT(allow_macro_instructions_); + fminnm(fd, fn, fm); + } void Fmov(FPRegister fd, FPRegister fn) { ASSERT(allow_macro_instructions_); // Only emit an instruction if fd and fn are different, and they are both D @@ -560,6 +603,13 @@ class MacroAssembler : public Assembler { ASSERT(allow_macro_instructions_); fmul(fd, fn, fm); } + void Fmadd(const FPRegister& fd, + const FPRegister& fn, + const FPRegister& fm, + const FPRegister& fa) { + ASSERT(allow_macro_instructions_); + fmadd(fd, fn, fm, fa); + } void Fmsub(const FPRegister& fd, const FPRegister& fn, const FPRegister& fm, @@ -567,10 +617,28 @@ class MacroAssembler : public Assembler { ASSERT(allow_macro_instructions_); fmsub(fd, fn, fm, fa); } + void Fnmadd(const FPRegister& fd, + const FPRegister& fn, + const FPRegister& fm, + const FPRegister& fa) { + ASSERT(allow_macro_instructions_); + fnmadd(fd, fn, fm, fa); + } + void Fnmsub(const FPRegister& fd, + const FPRegister& fn, + const FPRegister& fm, + const FPRegister& fa) { + ASSERT(allow_macro_instructions_); + fnmsub(fd, fn, fm, fa); + } void Fneg(const FPRegister& fd, const FPRegister& fn) { ASSERT(allow_macro_instructions_); fneg(fd, fn); } + void Frinta(const FPRegister& fd, const FPRegister& fn) { + ASSERT(allow_macro_instructions_); + frinta(fd, fn); + } void Frintn(const FPRegister& fd, const FPRegister& fn) { ASSERT(allow_macro_instructions_); frintn(fd, fn); @@ -595,6 +663,10 @@ class MacroAssembler : public Assembler { ASSERT(allow_macro_instructions_); hlt(code); } + void Isb() { + ASSERT(allow_macro_instructions_); + isb(); + } void Ldnp(const CPURegister& rt, const CPURegister& rt2, const MemOperand& src) { @@ -668,6 +740,11 @@ class MacroAssembler : public Assembler { ASSERT(allow_macro_instructions_); mov(rd, rn); } + void Movk(const Register& rd, uint64_t imm, int shift = -1) { + ASSERT(allow_macro_instructions_); + ASSERT(!rd.IsZero()); + movk(rd, imm, shift); + } void Mrs(const Register& rt, SystemRegister sysreg) { ASSERT(allow_macro_instructions_); ASSERT(!rt.IsZero()); diff --git a/src/a64/simulator-a64.cc b/src/a64/simulator-a64.cc index f08e0ed1..211acc59 100644 --- a/src/a64/simulator-a64.cc +++ b/src/a64/simulator-a64.cc @@ -57,22 +57,27 @@ SimSystemRegister SimSystemRegister::DefaultValueFor(SystemRegister id) { Simulator::Simulator(Decoder* decoder, FILE* stream) { - // Ensure shift operations act as the simulator expects. + // Ensure that shift operations act as the simulator expects. ASSERT((static_cast(-1) >> 1) == -1); ASSERT((static_cast(-1) >> 1) == 0x7FFFFFFF); - // Setup the decoder. + // Set up the decoder. decoder_ = decoder; decoder_->AppendVisitor(this); ResetState(); - // Allocate and setup the simulator stack. - stack_ = reinterpret_cast(malloc(stack_size_)); + // Allocate and set up the simulator stack. + stack_ = new byte[stack_size_]; stack_limit_ = stack_ + stack_protection_size_; - byte* tos = stack_ + stack_size_ - stack_protection_size_; - // The stack pointer must be 16 bytes aligned. - set_sp(reinterpret_cast(tos) & ~0xfUL); + // Configure the starting stack pointer. + // - Find the top of the stack. + uintptr_t tos = reinterpret_cast(stack_) + stack_size_; + // - There's a protection region at both ends of the stack. + tos -= stack_protection_size_; + // - The stack pointer must be 16-byte aligned. + tos &= ~0xfUL; + set_sp(tos); stream_ = stream; print_disasm_ = new PrintDisassembler(stream_); @@ -100,12 +105,12 @@ void Simulator::ResetState() { set_dreg_bits(i, 0x7ff000007f800001UL); } // Returning to address 0 exits the Simulator. - set_lr(reinterpret_cast(kEndOfSimAddress)); + set_lr(kEndOfSimAddress); } Simulator::~Simulator() { - free(stack_); + delete [] stack_; // The decoder may outlive the simulator. decoder_->RemoveVisitor(print_disasm_); delete print_disasm_; @@ -116,6 +121,7 @@ Simulator::~Simulator() { void Simulator::Run() { + pc_modified_ = false; while (pc_ != kEndOfSimAddress) { ExecuteInstruction(); } @@ -123,8 +129,7 @@ void Simulator::Run() { void Simulator::RunFrom(Instruction* first) { - pc_ = first; - pc_modified_ = false; + set_pc(first); Run(); } @@ -394,17 +399,18 @@ void Simulator::PrintRegisters(bool print_all_regs) { char const * const clr_reg_value = (coloured_trace_) ? ("\033[1;36m") : (""); for (unsigned i = 0; i < kNumberOfRegisters; i++) { - if (print_all_regs || first_run || (last_regs[i] != registers_[i].x)) { + if (print_all_regs || first_run || + (last_regs[i] != xreg(i, Reg31IsStackPointer))) { fprintf(stream_, "# %s%4s:%s 0x%016" PRIx64 "%s\n", clr_reg_name, XRegNameForCode(i, Reg31IsStackPointer), clr_reg_value, - registers_[i].x, + xreg(i, Reg31IsStackPointer), clr_normal); } // Cache the new register value so the next run can detect any changes. - last_regs[i] = registers_[i].x; + last_regs[i] = xreg(i, Reg31IsStackPointer); } first_run = false; } @@ -424,27 +430,26 @@ void Simulator::PrintFPRegisters(bool print_all_regs) { // register in the same column each time (to make it easy to visually scan // for changes). for (unsigned i = 0; i < kNumberOfFPRegisters; i++) { - if (print_all_regs || first_run || - (last_regs[i] != double_to_rawbits(fpregisters_[i].d))) { + if (print_all_regs || first_run || (last_regs[i] != dreg_bits(i))) { fprintf(stream_, "# %s %4s:%s 0x%016" PRIx64 "%s (%s%s:%s %g%s %s:%s %g%s)\n", clr_reg_name, VRegNameForCode(i), clr_reg_value, - double_to_rawbits(fpregisters_[i].d), + dreg_bits(i), clr_normal, clr_reg_name, DRegNameForCode(i), clr_reg_value, - fpregisters_[i].d, + dreg(i), clr_reg_name, SRegNameForCode(i), clr_reg_value, - fpregisters_[i].s, + sreg(i), clr_normal); } // Cache the new register value so the next run can detect any changes. - last_regs[i] = double_to_rawbits(fpregisters_[i].d); + last_regs[i] = dreg_bits(i); } first_run = false; } @@ -492,7 +497,7 @@ void Simulator::VisitPCRelAddressing(Instruction* instr) { void Simulator::VisitUnconditionalBranch(Instruction* instr) { switch (instr->Mask(UnconditionalBranchMask)) { case BL: - set_lr(reinterpret_cast(instr->NextInstruction())); + set_lr(instr->NextInstruction()); // Fall through. case B: set_pc(instr->ImmPCOffsetTarget()); @@ -515,7 +520,7 @@ void Simulator::VisitUnconditionalBranchToRegister(Instruction* instr) { switch (instr->Mask(UnconditionalBranchToRegisterMask)) { case BLR: - set_lr(reinterpret_cast(instr->NextInstruction())); + set_lr(instr->NextInstruction()); // Fall through. case BR: case RET: set_pc(target); break; @@ -1112,18 +1117,52 @@ void Simulator::VisitDataProcessing2Source(Instruction* instr) { Shift shift_op = NO_SHIFT; int64_t result = 0; switch (instr->Mask(DataProcessing2SourceMask)) { - case SDIV_w: result = wreg(instr->Rn()) / wreg(instr->Rm()); break; - case SDIV_x: result = xreg(instr->Rn()) / xreg(instr->Rm()); break; + case SDIV_w: { + int32_t rn = wreg(instr->Rn()); + int32_t rm = wreg(instr->Rm()); + if ((rn == kWMinInt) && (rm == -1)) { + result = kWMinInt; + } else if (rm == 0) { + // Division by zero can be trapped, but not on A-class processors. + result = 0; + } else { + result = rn / rm; + } + break; + } + case SDIV_x: { + int64_t rn = xreg(instr->Rn()); + int64_t rm = xreg(instr->Rm()); + if ((rn == kXMinInt) && (rm == -1)) { + result = kXMinInt; + } else if (rm == 0) { + // Division by zero can be trapped, but not on A-class processors. + result = 0; + } else { + result = rn / rm; + } + break; + } case UDIV_w: { uint32_t rn = static_cast(wreg(instr->Rn())); uint32_t rm = static_cast(wreg(instr->Rm())); - result = rn / rm; + if (rm == 0) { + // Division by zero can be trapped, but not on A-class processors. + result = 0; + } else { + result = rn / rm; + } break; } case UDIV_x: { uint64_t rn = static_cast(xreg(instr->Rn())); uint64_t rm = static_cast(xreg(instr->Rm())); - result = rn / rm; + if (rm == 0) { + // Division by zero can be trapped, but not on A-class processors. + result = 0; + } else { + result = rn / rm; + } break; } case LSLV_w: @@ -1176,8 +1215,11 @@ void Simulator::VisitDataProcessing3Source(Instruction* instr) { unsigned reg_size = instr->SixtyFourBits() ? kXRegSize : kWRegSize; int64_t result = 0; - uint64_t rn; - uint64_t rm; + // Extract and sign- or zero-extend 32-bit arguments for widening operations. + uint64_t rn_u32 = reg(instr->Rn()); + uint64_t rm_u32 = reg(instr->Rm()); + int64_t rn_s32 = reg(instr->Rn()); + int64_t rm_s32 = reg(instr->Rm()); switch (instr->Mask(DataProcessing3SourceMask)) { case MADD_w: case MADD_x: @@ -1187,22 +1229,10 @@ void Simulator::VisitDataProcessing3Source(Instruction* instr) { case MSUB_x: result = xreg(instr->Ra()) - (xreg(instr->Rn()) * xreg(instr->Rm())); break; - case SMADDL_x: - result = xreg(instr->Ra()) + (wreg(instr->Rn()) * wreg(instr->Rm())); - break; - case SMSUBL_x: - result = xreg(instr->Ra()) - (wreg(instr->Rn()) * wreg(instr->Rm())); - break; - case UMADDL_x: - rn = static_cast(wreg(instr->Rn())); - rm = static_cast(wreg(instr->Rm())); - result = xreg(instr->Ra()) + (rn * rm); - break; - case UMSUBL_x: - rn = static_cast(wreg(instr->Rn())); - rm = static_cast(wreg(instr->Rm())); - result = xreg(instr->Ra()) - (rn * rm); - break; + case SMADDL_x: result = xreg(instr->Ra()) + (rn_s32 * rm_s32); break; + case SMSUBL_x: result = xreg(instr->Ra()) - (rn_s32 * rm_s32); break; + case UMADDL_x: result = xreg(instr->Ra()) + (rn_u32 * rm_u32); break; + case UMSUBL_x: result = xreg(instr->Ra()) - (rn_u32 * rm_u32); break; case SMULH_x: result = MultiplyHighSigned(xreg(instr->Rn()), xreg(instr->Rm())); break; @@ -1297,6 +1327,14 @@ void Simulator::VisitFPIntegerConvert(Instruction* instr) { FPRounding round = RMode(); switch (instr->Mask(FPIntegerConvertMask)) { + case FCVTAS_ws: set_wreg(dst, FPToInt32(sreg(src), FPTieAway)); break; + case FCVTAS_xs: set_xreg(dst, FPToInt64(sreg(src), FPTieAway)); break; + case FCVTAS_wd: set_wreg(dst, FPToInt32(dreg(src), FPTieAway)); break; + case FCVTAS_xd: set_xreg(dst, FPToInt64(dreg(src), FPTieAway)); break; + case FCVTAU_ws: set_wreg(dst, FPToUInt32(sreg(src), FPTieAway)); break; + case FCVTAU_xs: set_xreg(dst, FPToUInt64(sreg(src), FPTieAway)); break; + case FCVTAU_wd: set_wreg(dst, FPToUInt32(dreg(src), FPTieAway)); break; + case FCVTAU_xd: set_xreg(dst, FPToUInt64(dreg(src), FPTieAway)); break; case FCVTMS_ws: set_wreg(dst, FPToInt32(sreg(src), FPNegativeInfinity)); break; @@ -1494,18 +1532,16 @@ void Simulator::VisitFPConditionalCompare(Instruction* instr) { void Simulator::VisitFPConditionalSelect(Instruction* instr) { AssertSupportedFPCR(); - unsigned reg_size = instr->FPType() == FP32 ? kSRegSize : kDRegSize; - - double selected_val; + Instr selected; if (ConditionPassed(static_cast(instr->Condition()))) { - selected_val = fpreg(reg_size, instr->Rn()); + selected = instr->Rn(); } else { - selected_val = fpreg(reg_size, instr->Rm()); + selected = instr->Rm(); } switch (instr->Mask(FPConditionalSelectMask)) { - case FCSEL_s: - case FCSEL_d: set_fpreg(reg_size, instr->Rd(), selected_val); break; + case FCSEL_s: set_sreg(instr->Rd(), sreg(selected)); break; + case FCSEL_d: set_dreg(instr->Rd(), dreg(selected)); break; default: UNIMPLEMENTED(); } } @@ -1526,6 +1562,8 @@ void Simulator::VisitFPDataProcessing1Source(Instruction* instr) { case FNEG_d: set_dreg(fd, -dreg(fn)); break; case FSQRT_s: set_sreg(fd, sqrt(sreg(fn))); break; case FSQRT_d: set_dreg(fd, sqrt(dreg(fn))); break; + case FRINTA_s: set_sreg(fd, FPRoundInt(sreg(fn), FPTieAway)); break; + case FRINTA_d: set_dreg(fd, FPRoundInt(dreg(fn), FPTieAway)); break; case FRINTN_s: set_sreg(fd, FPRoundInt(sreg(fn), FPTieEven)); break; case FRINTN_d: set_dreg(fd, FPRoundInt(dreg(fn), FPTieEven)); break; case FRINTZ_s: set_sreg(fd, FPRoundInt(sreg(fn), FPZero)); break; @@ -1789,6 +1827,14 @@ double Simulator::FPRoundInt(double value, FPRounding round_mode) { double int_result = floor(value); double error = value - int_result; switch (round_mode) { + case FPTieAway: { + // If the error is greater than 0.5, or is equal to 0.5 and the integer + // result is positive, round up. + if ((error > 0.5) || ((error == 0.5) && (int_result >= 0.0))) { + int_result++; + } + break; + } case FPTieEven: { // If the error is greater than 0.5, or is equal to 0.5 and the integer // result is odd, round up. @@ -1924,6 +1970,10 @@ void Simulator::VisitFPDataProcessing2Source(Instruction* instr) { case FMAX_d: set_dreg(fd, FPMax(dreg(fn), dreg(fm))); break; case FMIN_s: set_sreg(fd, FPMin(sreg(fn), sreg(fm))); break; case FMIN_d: set_dreg(fd, FPMin(dreg(fn), dreg(fm))); break; + case FMAXNM_s: set_sreg(fd, FPMaxNM(sreg(fn), sreg(fm))); break; + case FMAXNM_d: set_dreg(fd, FPMaxNM(dreg(fn), dreg(fm))); break; + case FMINNM_s: set_sreg(fd, FPMinNM(sreg(fn), sreg(fm))); break; + case FMINNM_d: set_dreg(fd, FPMinNM(dreg(fn), dreg(fm))); break; default: UNIMPLEMENTED(); } } @@ -1937,25 +1987,34 @@ void Simulator::VisitFPDataProcessing3Source(Instruction* instr) { unsigned fm = instr->Rm(); unsigned fa = instr->Ra(); - // Note: The FMSUB implementation here is not precisely the same as the - // instruction definition. In full implementation rounding of results would - // occur once at the end, here rounding will occur after the first multiply - // and then after the subsequent addition. A full implementation here would - // be possible but would require an effort isn't immediately justified given - // the small differences we expect to see in most cases. - + // The C99 (and C++11) fma function performs a fused multiply-accumulate. switch (instr->Mask(FPDataProcessing3SourceMask)) { - case FMSUB_s: set_sreg(fd, sreg(fa) + (-sreg(fn))*sreg(fm)); break; - case FMSUB_d: set_dreg(fd, dreg(fa) + (-dreg(fn))*dreg(fm)); break; + // fd = fa +/- (fn * fm) + case FMADD_s: set_sreg(fd, fmaf(sreg(fn), sreg(fm), sreg(fa))); break; + case FMSUB_s: set_sreg(fd, fmaf(-sreg(fn), sreg(fm), sreg(fa))); break; + case FMADD_d: set_dreg(fd, fma(dreg(fn), dreg(fm), dreg(fa))); break; + case FMSUB_d: set_dreg(fd, fma(-dreg(fn), dreg(fm), dreg(fa))); break; + // Variants of the above where the result is negated. + case FNMADD_s: set_sreg(fd, -fmaf(sreg(fn), sreg(fm), sreg(fa))); break; + case FNMSUB_s: set_sreg(fd, -fmaf(-sreg(fn), sreg(fm), sreg(fa))); break; + case FNMADD_d: set_dreg(fd, -fma(dreg(fn), dreg(fm), dreg(fa))); break; + case FNMSUB_d: set_dreg(fd, -fma(-dreg(fn), dreg(fm), dreg(fa))); break; default: UNIMPLEMENTED(); } } -double Simulator::FPMax(double a, double b) { - if (isnan(a)) { +template +T Simulator::FPMax(T a, T b) { + if (IsSignallingNaN(a)) { + return a; + } else if (IsSignallingNaN(b)) { + return b; + } else if (isnan(a)) { + ASSERT(IsQuietNaN(a)); return a; } else if (isnan(b)) { + ASSERT(IsQuietNaN(b)); return b; } @@ -1969,10 +2028,28 @@ double Simulator::FPMax(double a, double b) { } -double Simulator::FPMin(double a, double b) { - if (isnan(a)) { +template +T Simulator::FPMaxNM(T a, T b) { + if (IsQuietNaN(a) && !IsQuietNaN(b)) { + a = kFP64NegativeInfinity; + } else if (!IsQuietNaN(a) && IsQuietNaN(b)) { + b = kFP64NegativeInfinity; + } + return FPMax(a, b); +} + + +template +T Simulator::FPMin(T a, T b) { + if (IsSignallingNaN(a)) { + return a; + } else if (IsSignallingNaN(b)) { + return b; + } else if (isnan(a)) { + ASSERT(IsQuietNaN(a)); return a; } else if (isnan(b)) { + ASSERT(IsQuietNaN(b)); return b; } @@ -1985,6 +2062,16 @@ double Simulator::FPMin(double a, double b) { } } +template +T Simulator::FPMinNM(T a, T b) { + if (IsQuietNaN(a) && !IsQuietNaN(b)) { + a = kFP64PositiveInfinity; + } else if (!IsQuietNaN(a) && IsQuietNaN(b)) { + b = kFP64PositiveInfinity; + } + return FPMin(a, b); +} + void Simulator::VisitSystem(Instruction* instr) { // Some system instructions hijack their Op and Cp fields to represent a @@ -2015,6 +2102,8 @@ void Simulator::VisitSystem(Instruction* instr) { case NOP: break; default: UNIMPLEMENTED(); } + } else if (instr->Mask(MemBarrierFMask) == MemBarrierFixed) { + __sync_synchronize(); } else { UNIMPLEMENTED(); } @@ -2048,21 +2137,23 @@ void Simulator::DoPrintf(Instruction* instr) { ASSERT(sizeof(*instr) == 1); memcpy(&type, instr + kPrintfTypeOffset, sizeof(type)); - const char * format = reinterpret_cast(x0()); + const char * format = reg(0); ASSERT(format != NULL); // Pass all of the relevant PCS registers onto printf. It doesn't matter // if we pass too many as the extra ones won't be read. int result = 0; if (type == CPURegister::kRegister) { - result = printf(format, x1(), x2(), x3(), x4(), x5(), x6(), x7()); + result = printf(format, xreg(1), xreg(2), xreg(3), xreg(4), + xreg(5), xreg(6), xreg(7)); } else if (type == CPURegister::kFPRegister) { - result = printf(format, d0(), d1(), d2(), d3(), d4(), d5(), d6(), d7()); + result = printf(format, dreg(0), dreg(1), dreg(2), dreg(3), + dreg(4), dreg(5), dreg(6), dreg(7)); } else { ASSERT(type == CPURegister::kNoRegister); result = printf("%s", format); } - set_x0(result); + set_xreg(0, result); // TODO: Clobber all caller-saved registers here, to ensure no assumptions // are made about preserved state. @@ -2071,7 +2162,7 @@ void Simulator::DoPrintf(Instruction* instr) { set_pc(instr->InstructionAtOffset(kPrintfLength)); // Set LR as if we'd just called a native printf function. - set_lr(reinterpret_cast(pc())); + set_lr(pc()); } } // namespace vixl diff --git a/src/a64/simulator-a64.h b/src/a64/simulator-a64.h index 0c22f9e7..efdb9bc2 100644 --- a/src/a64/simulator-a64.h +++ b/src/a64/simulator-a64.h @@ -115,6 +115,38 @@ class SimSystemRegister { }; +// Represent a register (r0-r31, v0-v31). +template +class SimRegisterBase { + public: + template + void Set(T new_value, unsigned size = sizeof(T)) { + ASSERT(size <= kSizeInBytes); + ASSERT(size <= sizeof(new_value)); + // All AArch64 registers are zero-extending; Writing a W register clears the + // top bits of the corresponding X register. + memset(value_, 0, kSizeInBytes); + memcpy(value_, &new_value, size); + } + + // Copy 'size' bytes of the register to the result, and zero-extend to fill + // the result. + template + T Get(unsigned size = sizeof(T)) const { + ASSERT(size <= kSizeInBytes); + T result; + memset(&result, 0, sizeof(result)); + memcpy(&result, value_, size); + return result; + } + + protected: + uint8_t value_[kSizeInBytes]; +}; +typedef SimRegisterBase SimRegister; // r0-r31 +typedef SimRegisterBase SimFPRegister; // v0-v31 + + class Simulator : public DecoderVisitor { public: explicit Simulator(Decoder* decoder, FILE* stream = stdout); @@ -122,19 +154,6 @@ class Simulator : public DecoderVisitor { void ResetState(); - // TODO: We assume little endianness, and the way in which the members of this - // union overlay. Add tests to ensure this, or fix accessors to no longer - // require this assumption. - union SimRegister { - int64_t x; - int32_t w; - }; - - union SimFPRegister { - double d; - float s; - }; - // Run the simulator. virtual void Run(); void RunFrom(Instruction* first); @@ -167,172 +186,169 @@ class Simulator : public DecoderVisitor { #undef DECLARE // Register accessors. - inline int32_t wreg(unsigned code, - Reg31Mode r31mode = Reg31IsZeroRegister) const { + + // Return 'size' bits of the value of an integer register, as the specified + // type. The value is zero-extended to fill the result. + // + // The only supported values of 'size' are kXRegSize and kWRegSize. + template + inline T reg(unsigned size, unsigned code, + Reg31Mode r31mode = Reg31IsZeroRegister) const { + unsigned size_in_bytes = size / 8; + ASSERT(size_in_bytes <= sizeof(T)); + ASSERT((size == kXRegSize) || (size == kWRegSize)); ASSERT(code < kNumberOfRegisters); + if ((code == 31) && (r31mode == Reg31IsZeroRegister)) { - return 0; + T result; + memset(&result, 0, sizeof(result)); + return result; } - return registers_[code].w; + return registers_[code].Get(size_in_bytes); + } + + // Like reg(), but infer the access size from the template type. + template + inline T reg(unsigned code, Reg31Mode r31mode = Reg31IsZeroRegister) const { + return reg(sizeof(T) * 8, code, r31mode); + } + + // Common specialized accessors for the reg() template. + inline int32_t wreg(unsigned code, + Reg31Mode r31mode = Reg31IsZeroRegister) const { + return reg(code, r31mode); } inline int64_t xreg(unsigned code, Reg31Mode r31mode = Reg31IsZeroRegister) const { + return reg(code, r31mode); + } + + inline int64_t reg(unsigned size, unsigned code, + Reg31Mode r31mode = Reg31IsZeroRegister) const { + return reg(size, code, r31mode); + } + + // Write 'size' bits of 'value' into an integer register. The value is + // zero-extended. This behaviour matches AArch64 register writes. + // + // The only supported values of 'size' are kXRegSize and kWRegSize. + template + inline void set_reg(unsigned size, unsigned code, T value, + Reg31Mode r31mode = Reg31IsZeroRegister) { + unsigned size_in_bytes = size / 8; + ASSERT(size_in_bytes <= sizeof(T)); + ASSERT((size == kXRegSize) || (size == kWRegSize)); ASSERT(code < kNumberOfRegisters); + if ((code == 31) && (r31mode == Reg31IsZeroRegister)) { - return 0; + return; } - return registers_[code].x; + return registers_[code].Set(value, size_in_bytes); } - inline int64_t reg(unsigned size, - unsigned code, - Reg31Mode r31mode = Reg31IsZeroRegister) const { - switch (size) { - case kWRegSize: return wreg(code, r31mode) & kWRegMask; - case kXRegSize: return xreg(code, r31mode); - default: - UNREACHABLE(); - return 0; - } + // Like set_reg(), but infer the access size from the template type. + template + inline void set_reg(unsigned code, T value, + Reg31Mode r31mode = Reg31IsZeroRegister) { + set_reg(sizeof(value) * 8, code, value, r31mode); } + // Common specialized accessors for the set_reg() template. inline void set_wreg(unsigned code, int32_t value, Reg31Mode r31mode = Reg31IsZeroRegister) { - ASSERT(code < kNumberOfRegisters); - if ((code == kZeroRegCode) && (r31mode == Reg31IsZeroRegister)) { - return; - } - registers_[code].x = 0; // First clear the register top bits. - registers_[code].w = value; + set_reg(kWRegSize, code, value, r31mode); } inline void set_xreg(unsigned code, int64_t value, Reg31Mode r31mode = Reg31IsZeroRegister) { - ASSERT(code < kNumberOfRegisters); - if ((code == kZeroRegCode) && (r31mode == Reg31IsZeroRegister)) { - return; - } - registers_[code].x = value; + set_reg(kXRegSize, code, value, r31mode); } - inline void set_reg(unsigned size, unsigned code, int64_t value, - Reg31Mode r31mode = Reg31IsZeroRegister) { - switch (size) { - case kWRegSize: - return set_wreg(code, static_cast(value & 0xffffffff), - r31mode); - case kXRegSize: - return set_xreg(code, value, r31mode); - default: - UNREACHABLE(); - break; - } + // Commonly-used special cases. + template + inline void set_lr(T value) { + set_reg(kLinkRegCode, value); } - #define REG_ACCESSORS(N) \ - inline int32_t w##N() { return wreg(N); } \ - inline int64_t x##N() { return xreg(N); } \ - inline void set_w##N(int32_t val) { set_wreg(N, val); } \ - inline void set_x##N(int64_t val) { set_xreg(N, val); } - REGISTER_CODE_LIST(REG_ACCESSORS) - #undef REG_ACCESSORS - - // Aliases. - #define REG_ALIAS_ACCESSORS(N, wname, xname) \ - inline int32_t wname() { return wreg(N); } \ - inline int64_t xname() { return xreg(N); } \ - inline void set_##wname(int32_t val) { set_wreg(N, val); } \ - inline void set_##xname(int64_t val) { set_xreg(N, val); } - REG_ALIAS_ACCESSORS(30, wlr, lr); - #undef REG_ALIAS_ACCESSORS + template + inline void set_sp(T value) { + set_reg(31, value, Reg31IsStackPointer); + } - // The stack is a special case in aarch64. - inline int32_t wsp() { return wreg(31, Reg31IsStackPointer); } - inline int64_t sp() { return xreg(31, Reg31IsStackPointer); } - inline void set_wsp(int32_t val) { - set_wreg(31, val, Reg31IsStackPointer); + // Return 'size' bits of the value of a floating-point register, as the + // specified type. The value is zero-extended to fill the result. + // + // The only supported values of 'size' are kDRegSize and kSRegSize. + template + inline T fpreg(unsigned size, unsigned code) const { + unsigned size_in_bytes = size / 8; + ASSERT(size_in_bytes <= sizeof(T)); + ASSERT((size == kDRegSize) || (size == kSRegSize)); + ASSERT(code < kNumberOfFPRegisters); + return fpregisters_[code].Get(size_in_bytes); } - inline void set_sp(int64_t val) { - set_xreg(31, val, Reg31IsStackPointer); + + // Like fpreg(), but infer the access size from the template type. + template + inline T fpreg(unsigned code) const { + return fpreg(sizeof(T) * 8, code); } - // FPRegister accessors. + // Common specialized accessors for the fpreg() template. inline float sreg(unsigned code) const { - ASSERT(code < kNumberOfFPRegisters); - return fpregisters_[code].s; + return fpreg(code); } inline uint32_t sreg_bits(unsigned code) const { - return float_to_rawbits(sreg(code)); + return fpreg(code); } inline double dreg(unsigned code) const { - ASSERT(code < kNumberOfFPRegisters); - return fpregisters_[code].d; + return fpreg(code); } inline uint64_t dreg_bits(unsigned code) const { - return double_to_rawbits(dreg(code)); + return fpreg(code); } inline double fpreg(unsigned size, unsigned code) const { switch (size) { case kSRegSize: return sreg(code); case kDRegSize: return dreg(code); - default: { + default: UNREACHABLE(); return 0.0; - } } } - inline void set_sreg(unsigned code, float val) { + // Write 'value' into a floating-point register. The value is zero-extended. + // This behaviour matches AArch64 register writes. + template + inline void set_fpreg(unsigned code, T value) { + ASSERT((sizeof(value) == kDRegSizeInBytes) || + (sizeof(value) == kSRegSizeInBytes)); ASSERT(code < kNumberOfFPRegisters); - // Ensure that the upper word is set to 0. - set_dreg_bits(code, 0); - - fpregisters_[code].s = val; + fpregisters_[code].Set(value, sizeof(value)); } - inline void set_sreg_bits(unsigned code, uint32_t rawbits) { - ASSERT(code < kNumberOfFPRegisters); - // Ensure that the upper word is set to 0. - set_dreg_bits(code, 0); - - set_sreg(code, rawbits_to_float(rawbits)); + // Common specialized accessors for the set_fpreg() template. + inline void set_sreg(unsigned code, float value) { + set_fpreg(code, value); } - inline void set_dreg(unsigned code, double val) { - ASSERT(code < kNumberOfFPRegisters); - fpregisters_[code].d = val; + inline void set_sreg_bits(unsigned code, uint32_t value) { + set_fpreg(code, value); } - inline void set_dreg_bits(unsigned code, uint64_t rawbits) { - ASSERT(code < kNumberOfFPRegisters); - set_dreg(code, rawbits_to_double(rawbits)); + inline void set_dreg(unsigned code, double value) { + set_fpreg(code, value); } - inline void set_fpreg(unsigned size, unsigned code, double value) { - switch (size) { - case kSRegSize: - return set_sreg(code, value); - case kDRegSize: - return set_dreg(code, value); - default: - UNREACHABLE(); - break; - } + inline void set_dreg_bits(unsigned code, uint64_t value) { + set_fpreg(code, value); } - #define FPREG_ACCESSORS(N) \ - inline float s##N() { return sreg(N); } \ - inline double d##N() { return dreg(N); } \ - inline void set_s##N(float val) { set_sreg(N, val); } \ - inline void set_d##N(double val) { set_dreg(N, val); } - REGISTER_CODE_LIST(FPREG_ACCESSORS) - #undef FPREG_ACCESSORS - bool N() { return nzcv_.N() != 0; } bool Z() { return nzcv_.Z() != 0; } bool C() { return nzcv_.C() != 0; } @@ -486,8 +502,18 @@ class Simulator : public DecoderVisitor { int64_t FPToInt64(double value, FPRounding rmode); uint32_t FPToUInt32(double value, FPRounding rmode); uint64_t FPToUInt64(double value, FPRounding rmode); - double FPMax(double a, double b); - double FPMin(double a, double b); + + template + T FPMax(T a, T b); + + template + T FPMin(T a, T b); + + template + T FPMaxNM(T a, T b); + + template + T FPMinNM(T a, T b); // Pseudo Printf instruction void DoPrintf(Instruction* instr); diff --git a/src/utils.h b/src/utils.h index 4e0b367e..15d144af 100644 --- a/src/utils.h +++ b/src/utils.h @@ -27,7 +27,7 @@ #ifndef VIXL_UTILS_H #define VIXL_UTILS_H - +#include #include #include "globals.h" @@ -90,12 +90,40 @@ inline int64_t signed_bitextract_64(int msb, int lsb, int64_t x) { return (x << (63 - msb)) >> (lsb + 63 - msb); } -// floating point representation +// Floating point representation. uint32_t float_to_rawbits(float value); uint64_t double_to_rawbits(double value); float rawbits_to_float(uint32_t bits); double rawbits_to_double(uint64_t bits); + +// NaN tests. +inline bool IsSignallingNaN(double num) { + const uint64_t kFP64QuietNaNMask = 0x0008000000000000UL; + uint64_t raw = double_to_rawbits(num); + if (isnan(num) && ((raw & kFP64QuietNaNMask) == 0)) { + return true; + } + return false; +} + + +inline bool IsSignallingNaN(float num) { + const uint64_t kFP32QuietNaNMask = 0x00400000UL; + uint32_t raw = float_to_rawbits(num); + if (isnan(num) && ((raw & kFP32QuietNaNMask) == 0)) { + return true; + } + return false; +} + + +template +inline bool IsQuietNaN(T num) { + return isnan(num) && !IsSignallingNaN(num); +} + + // Bits counting. int CountLeadingZeros(uint64_t value, int width); int CountLeadingSignBits(int64_t value, int width); diff --git a/test/test-assembler-a64.cc b/test/test-assembler-a64.cc index c2a021a9..c135c525 100644 --- a/test/test-assembler-a64.cc +++ b/test/test-assembler-a64.cc @@ -25,6 +25,7 @@ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include #include #include #include @@ -295,6 +296,100 @@ TEST(mvn) { } +TEST(mov_imm_w) { + SETUP(); + + START(); + __ Mov(w0, 0xffffffffL); + __ Mov(w1, 0xffff1234L); + __ Mov(w2, 0x1234ffffL); + __ Mov(w3, 0x00000000L); + __ Mov(w4, 0x00001234L); + __ Mov(w5, 0x12340000L); + __ Mov(w6, 0x12345678L); + END(); + + RUN(); + + ASSERT_EQUAL_64(0xffffffffL, x0); + ASSERT_EQUAL_64(0xffff1234L, x1); + ASSERT_EQUAL_64(0x1234ffffL, x2); + ASSERT_EQUAL_64(0x00000000L, x3); + ASSERT_EQUAL_64(0x00001234L, x4); + ASSERT_EQUAL_64(0x12340000L, x5); + ASSERT_EQUAL_64(0x12345678L, x6); + + TEARDOWN(); +} + + +TEST(mov_imm_x) { + SETUP(); + + START(); + __ Mov(x0, 0xffffffffffffffffL); + __ Mov(x1, 0xffffffffffff1234L); + __ Mov(x2, 0xffffffff12345678L); + __ Mov(x3, 0xffff1234ffff5678L); + __ Mov(x4, 0x1234ffffffff5678L); + __ Mov(x5, 0x1234ffff5678ffffL); + __ Mov(x6, 0x12345678ffffffffL); + __ Mov(x7, 0x1234ffffffffffffL); + __ Mov(x8, 0x123456789abcffffL); + __ Mov(x9, 0x12345678ffff9abcL); + __ Mov(x10, 0x1234ffff56789abcL); + __ Mov(x11, 0xffff123456789abcL); + __ Mov(x12, 0x0000000000000000L); + __ Mov(x13, 0x0000000000001234L); + __ Mov(x14, 0x0000000012345678L); + __ Mov(x15, 0x0000123400005678L); + __ Mov(x18, 0x1234000000005678L); + __ Mov(x19, 0x1234000056780000L); + __ Mov(x20, 0x1234567800000000L); + __ Mov(x21, 0x1234000000000000L); + __ Mov(x22, 0x123456789abc0000L); + __ Mov(x23, 0x1234567800009abcL); + __ Mov(x24, 0x1234000056789abcL); + __ Mov(x25, 0x0000123456789abcL); + __ Mov(x26, 0x123456789abcdef0L); + __ Mov(x27, 0xffff000000000001L); + __ Mov(x28, 0x8000ffff00000000L); + END(); + + RUN(); + + ASSERT_EQUAL_64(0xffffffffffff1234L, x1); + ASSERT_EQUAL_64(0xffffffff12345678L, x2); + ASSERT_EQUAL_64(0xffff1234ffff5678L, x3); + ASSERT_EQUAL_64(0x1234ffffffff5678L, x4); + ASSERT_EQUAL_64(0x1234ffff5678ffffL, x5); + ASSERT_EQUAL_64(0x12345678ffffffffL, x6); + ASSERT_EQUAL_64(0x1234ffffffffffffL, x7); + ASSERT_EQUAL_64(0x123456789abcffffL, x8); + ASSERT_EQUAL_64(0x12345678ffff9abcL, x9); + ASSERT_EQUAL_64(0x1234ffff56789abcL, x10); + ASSERT_EQUAL_64(0xffff123456789abcL, x11); + ASSERT_EQUAL_64(0x0000000000000000L, x12); + ASSERT_EQUAL_64(0x0000000000001234L, x13); + ASSERT_EQUAL_64(0x0000000012345678L, x14); + ASSERT_EQUAL_64(0x0000123400005678L, x15); + ASSERT_EQUAL_64(0x1234000000005678L, x18); + ASSERT_EQUAL_64(0x1234000056780000L, x19); + ASSERT_EQUAL_64(0x1234567800000000L, x20); + ASSERT_EQUAL_64(0x1234000000000000L, x21); + ASSERT_EQUAL_64(0x123456789abc0000L, x22); + ASSERT_EQUAL_64(0x1234567800009abcL, x23); + ASSERT_EQUAL_64(0x1234000056789abcL, x24); + ASSERT_EQUAL_64(0x0000123456789abcL, x25); + ASSERT_EQUAL_64(0x123456789abcdef0L, x26); + ASSERT_EQUAL_64(0xffff000000000001L, x27); + ASSERT_EQUAL_64(0x8000ffff00000000L, x28); + + + TEARDOWN(); +} + + TEST(mov) { SETUP(); @@ -341,6 +436,9 @@ TEST(mov) { __ Mov(w25, Operand(w13, UXTH, 2)); __ Mov(x26, Operand(x13, SXTH, 3)); __ Mov(x27, Operand(w13, UXTW, 4)); + + __ Mov(x28, 0x0123456789abcdefL); + __ Mov(w28, w28, kDiscardForSameWReg); END(); RUN(); @@ -370,6 +468,7 @@ TEST(mov) { ASSERT_EQUAL_64(0x00007ff8, x25); ASSERT_EQUAL_64(0x000000000000fff0UL, x26); ASSERT_EQUAL_64(0x000000000001ffe0UL, x27); + ASSERT_EQUAL_64(0x0123456789abcdefL, x28); TEARDOWN(); } @@ -603,7 +702,7 @@ TEST(ands) { START(); __ Mov(x1, 0xf00000ff); - __ And(w0, w1, Operand(w1), SetFlags); + __ Ands(w0, w1, Operand(w1)); END(); RUN(); @@ -614,7 +713,7 @@ TEST(ands) { START(); __ Mov(x0, 0xfff0); __ Mov(x1, 0xf00000ff); - __ And(w0, w0, Operand(w1, LSR, 4), SetFlags); + __ Ands(w0, w0, Operand(w1, LSR, 4)); END(); RUN(); @@ -625,7 +724,7 @@ TEST(ands) { START(); __ Mov(x0, 0x8000000000000000L); __ Mov(x1, 0x00000001); - __ And(x0, x0, Operand(x1, ROR, 1), SetFlags); + __ Ands(x0, x0, Operand(x1, ROR, 1)); END(); RUN(); @@ -635,7 +734,7 @@ TEST(ands) { START(); __ Mov(x0, 0xfff0); - __ And(w0, w0, Operand(0xf), SetFlags); + __ Ands(w0, w0, Operand(0xf)); END(); RUN(); @@ -645,7 +744,7 @@ TEST(ands) { START(); __ Mov(x0, 0xff000000); - __ And(w0, w0, Operand(0x80000000), SetFlags); + __ Ands(w0, w0, Operand(0x80000000)); END(); RUN(); @@ -741,7 +840,7 @@ TEST(bics) { START(); __ Mov(x1, 0xffff); - __ Bic(w0, w1, Operand(w1), SetFlags); + __ Bics(w0, w1, Operand(w1)); END(); RUN(); @@ -751,7 +850,7 @@ TEST(bics) { START(); __ Mov(x0, 0xffffffff); - __ Bic(w0, w0, Operand(w0, LSR, 1), SetFlags); + __ Bics(w0, w0, Operand(w0, LSR, 1)); END(); RUN(); @@ -762,7 +861,7 @@ TEST(bics) { START(); __ Mov(x0, 0x8000000000000000L); __ Mov(x1, 0x00000001); - __ Bic(x0, x0, Operand(x1, ROR, 1), SetFlags); + __ Bics(x0, x0, Operand(x1, ROR, 1)); END(); RUN(); @@ -772,7 +871,7 @@ TEST(bics) { START(); __ Mov(x0, 0xffffffffffffffffL); - __ Bic(x0, x0, Operand(0x7fffffffffffffffL), SetFlags); + __ Bics(x0, x0, Operand(0x7fffffffffffffffL)); END(); RUN(); @@ -782,7 +881,7 @@ TEST(bics) { START(); __ Mov(w0, 0xffff0000); - __ Bic(w0, w0, Operand(0xfffffff0), SetFlags); + __ Bics(w0, w0, Operand(0xfffffff0)); END(); RUN(); @@ -983,6 +1082,29 @@ TEST(mul) { } +static void SmullHelper(int64_t expected, int64_t a, int64_t b) { + SETUP(); + START(); + __ Mov(w0, a); + __ Mov(w1, b); + __ Smull(x2, w0, w1); + END(); + RUN(); + ASSERT_EQUAL_64(expected, x2); + TEARDOWN(); +} + + +TEST(smull) { + SmullHelper(0, 0, 0); + SmullHelper(1, 1, 1); + SmullHelper(-1, -1, 1); + SmullHelper(1, -1, -1); + SmullHelper(0xffffffff80000000, 0x80000000, 1); + SmullHelper(0x0000000080000000, 0x00010000, 0x00008000); +} + + TEST(madd) { SETUP(); @@ -1268,6 +1390,22 @@ TEST(div) { __ Sdiv(x13, x19, x21); __ Udiv(x14, x20, x21); __ Sdiv(x15, x20, x21); + + __ Udiv(w22, w19, w17); + __ Sdiv(w23, w19, w17); + __ Udiv(x24, x20, x18); + __ Sdiv(x25, x20, x18); + + __ Udiv(x26, x16, x21); + __ Sdiv(x27, x16, x21); + __ Udiv(x28, x18, x21); + __ Sdiv(x29, x18, x21); + + __ Mov(x17, 0); + __ Udiv(w18, w16, w17); + __ Sdiv(w19, w16, w17); + __ Udiv(x20, x16, x17); + __ Sdiv(x21, x16, x17); END(); RUN(); @@ -1288,6 +1426,18 @@ TEST(div) { ASSERT_EQUAL_64(0x40000000, x13); ASSERT_EQUAL_64(0x4000000000000000UL, x14); ASSERT_EQUAL_64(0xC000000000000000UL, x15); + ASSERT_EQUAL_64(0, x22); + ASSERT_EQUAL_64(0x80000000, x23); + ASSERT_EQUAL_64(0, x24); + ASSERT_EQUAL_64(0x8000000000000000UL, x25); + ASSERT_EQUAL_64(0, x26); + ASSERT_EQUAL_64(0, x27); + ASSERT_EQUAL_64(0x7fffffffffffffffUL, x28); + ASSERT_EQUAL_64(0, x29); + ASSERT_EQUAL_64(0, x18); + ASSERT_EQUAL_64(0, x19); + ASSERT_EQUAL_64(0, x20); + ASSERT_EQUAL_64(0, x21); TEARDOWN(); } @@ -1664,7 +1814,7 @@ TEST(test_branch) { __ Mov(x16, 0xaaaaaaaaaaaaaaaaUL); Label bz, bz_end; - __ Tbz(x16, 0, &bz); + __ Tbz(w16, 0, &bz); __ B(&bz_end); __ Bind(&bz); __ Mov(x0, 1); @@ -1685,7 +1835,7 @@ TEST(test_branch) { __ Bind(&nbz_end); Label nbo, nbo_end; - __ Tbnz(x16, 2, &nbo); + __ Tbnz(w16, 2, &nbo); __ B(&nbo_end); __ Bind(&nbo); __ Mov(x3, 1); @@ -2868,6 +3018,64 @@ TEST(add_sub_negative) { } +TEST(add_sub_zero) { + SETUP(); + + START(); + __ Mov(x0, 0); + __ Mov(x1, 0); + __ Mov(x2, 0); + + Label blob1; + __ Bind(&blob1); + __ Add(x0, x0, 0); + __ Sub(x1, x1, 0); + __ Sub(x2, x2, xzr); + CHECK(__ SizeOfCodeGeneratedSince(&blob1) == 0); + + Label blob2; + __ Bind(&blob2); + __ Add(w3, w3, 0); + CHECK(__ SizeOfCodeGeneratedSince(&blob2) != 0); + + Label blob3; + __ Bind(&blob3); + __ Sub(w3, w3, wzr); + CHECK(__ SizeOfCodeGeneratedSince(&blob3) != 0); + + END(); + + RUN(); + + ASSERT_EQUAL_64(0, x0); + ASSERT_EQUAL_64(0, x1); + ASSERT_EQUAL_64(0, x2); + + TEARDOWN(); +} + + +TEST(claim_drop_zero) { + SETUP(); + + START(); + + Label start; + __ Bind(&start); + __ Claim(Operand(0)); + __ Drop(Operand(0)); + __ Claim(Operand(xzr)); + __ Drop(Operand(xzr)); + CHECK(__ SizeOfCodeGeneratedSince(&start) == 0); + + END(); + + RUN(); + + TEARDOWN(); +} + + TEST(neg) { SETUP(); @@ -2927,7 +3135,7 @@ TEST(adc_sbc_shift) { __ Mov(x4, 0xffffffffffffffffL); // Clear the C flag. - __ Add(x0, x0, Operand(0), SetFlags); + __ Adds(x0, x0, Operand(0)); __ Adc(x5, x2, Operand(x3)); __ Adc(x6, x0, Operand(x1, LSL, 60)); @@ -2988,37 +3196,126 @@ TEST(adc_sbc_shift) { __ Mov(x0, 1); __ Mov(x1, 0xffffffffffffffffL); // Clear the C flag. - __ Add(x0, x0, Operand(0), SetFlags); - __ Adc(x10, x0, Operand(x1), SetFlags); + __ Adds(x0, x0, Operand(0)); + __ Adcs(x10, x0, Operand(x1)); END(); RUN(); ASSERT_EQUAL_NZCV(ZCFlag); + ASSERT_EQUAL_64(0, x10); START(); __ Mov(x0, 1); __ Mov(x1, 0x8000000000000000L); // Clear the C flag. - __ Add(x0, x0, Operand(0), SetFlags); - __ Adc(x10, x0, Operand(x1, ASR, 63), SetFlags); + __ Adds(x0, x0, Operand(0)); + __ Adcs(x10, x0, Operand(x1, ASR, 63)); END(); RUN(); ASSERT_EQUAL_NZCV(ZCFlag); + ASSERT_EQUAL_64(0, x10); START(); __ Mov(x0, 0x10); __ Mov(x1, 0x07ffffffffffffffL); // Clear the C flag. - __ Add(x0, x0, Operand(0), SetFlags); - __ Adc(x10, x0, Operand(x1, LSL, 4), SetFlags); + __ Adds(x0, x0, Operand(0)); + __ Adcs(x10, x0, Operand(x1, LSL, 4)); END(); RUN(); ASSERT_EQUAL_NZCV(NVFlag); + ASSERT_EQUAL_64(0x8000000000000000L, x10); + + // Check that sbc correctly sets the condition flags. + START(); + __ Mov(x0, 0); + __ Mov(x1, 0xffffffffffffffffL); + // Clear the C flag. + __ Adds(x0, x0, Operand(0)); + __ Sbcs(x10, x0, Operand(x1)); + END(); + + RUN(); + + ASSERT_EQUAL_NZCV(ZFlag); + ASSERT_EQUAL_64(0, x10); + + START(); + __ Mov(x0, 1); + __ Mov(x1, 0xffffffffffffffffL); + // Clear the C flag. + __ Adds(x0, x0, Operand(0)); + __ Sbcs(x10, x0, Operand(x1, LSR, 1)); + END(); + + RUN(); + + ASSERT_EQUAL_NZCV(NFlag); + ASSERT_EQUAL_64(0x8000000000000001L, x10); + + START(); + __ Mov(x0, 0); + // Clear the C flag. + __ Adds(x0, x0, Operand(0)); + __ Sbcs(x10, x0, Operand(0xffffffffffffffffL)); + END(); + + RUN(); + + ASSERT_EQUAL_NZCV(ZFlag); + ASSERT_EQUAL_64(0, x10); + + START() + __ Mov(w0, 0x7fffffff); + // Clear the C flag. + __ Adds(x0, x0, Operand(0)); + __ Ngcs(w10, w0); + END(); + + RUN(); + + ASSERT_EQUAL_NZCV(NFlag); + ASSERT_EQUAL_64(0x80000000, x10); + + START(); + // Clear the C flag. + __ Adds(x0, x0, Operand(0)); + __ Ngcs(x10, 0x7fffffffffffffffL); + END(); + + RUN(); + + ASSERT_EQUAL_NZCV(NFlag); + ASSERT_EQUAL_64(0x8000000000000000L, x10); + + START() + __ Mov(x0, 0); + // Set the C flag. + __ Cmp(x0, Operand(x0)); + __ Sbcs(x10, x0, Operand(1)); + END(); + + RUN(); + + ASSERT_EQUAL_NZCV(NFlag); + ASSERT_EQUAL_64(0xffffffffffffffffL, x10); + + START() + __ Mov(x0, 0); + // Set the C flag. + __ Cmp(x0, Operand(x0)); + __ Ngcs(x10, 0x7fffffffffffffffL); + END(); + + RUN(); + + ASSERT_EQUAL_NZCV(NFlag); + ASSERT_EQUAL_64(0x8000000000000001L, x10); TEARDOWN(); } @@ -3029,7 +3326,7 @@ TEST(adc_sbc_extend) { START(); // Clear the C flag. - __ Add(x0, x0, Operand(0), SetFlags); + __ Adds(x0, x0, Operand(0)); __ Mov(x0, 0); __ Mov(x1, 1); @@ -3082,8 +3379,8 @@ TEST(adc_sbc_extend) { __ Mov(x0, 0xff); __ Mov(x1, 0xffffffffffffffffL); // Clear the C flag. - __ Add(x0, x0, Operand(0), SetFlags); - __ Adc(x10, x0, Operand(x1, SXTX, 1), SetFlags); + __ Adds(x0, x0, Operand(0)); + __ Adcs(x10, x0, Operand(x1, SXTX, 1)); END(); RUN(); @@ -3094,8 +3391,8 @@ TEST(adc_sbc_extend) { __ Mov(x0, 0x7fffffffffffffffL); __ Mov(x1, 1); // Clear the C flag. - __ Add(x0, x0, Operand(0), SetFlags); - __ Adc(x10, x0, Operand(x1, UXTB, 2), SetFlags); + __ Adds(x0, x0, Operand(0)); + __ Adcs(x10, x0, Operand(x1, UXTB, 2)); END(); RUN(); @@ -3105,8 +3402,8 @@ TEST(adc_sbc_extend) { START(); __ Mov(x0, 0x7fffffffffffffffL); // Clear the C flag. - __ Add(x0, x0, Operand(0), SetFlags); - __ Adc(x10, x0, Operand(1), SetFlags); + __ Adds(x0, x0, Operand(0)); + __ Adcs(x10, x0, Operand(1)); END(); RUN(); @@ -3124,24 +3421,41 @@ TEST(adc_sbc_wide_imm) { __ Mov(x0, 0); // Clear the C flag. - __ Add(x0, x0, Operand(0), SetFlags); + __ Adds(x0, x0, Operand(0)); __ Adc(x7, x0, Operand(0x1234567890abcdefUL)); __ Adc(w8, w0, Operand(0xffffffff)); + __ Sbc(x9, x0, Operand(0x1234567890abcdefUL)); + __ Sbc(w10, w0, Operand(0xffffffff)); + __ Ngc(x11, Operand(0xffffffff00000000UL)); + __ Ngc(w12, Operand(0xffff0000)); // Set the C flag. __ Cmp(w0, Operand(w0)); - __ Adc(x27, x0, Operand(0x1234567890abcdefUL)); - __ Adc(w28, w0, Operand(0xffffffff)); + __ Adc(x18, x0, Operand(0x1234567890abcdefUL)); + __ Adc(w19, w0, Operand(0xffffffff)); + __ Sbc(x20, x0, Operand(0x1234567890abcdefUL)); + __ Sbc(w21, w0, Operand(0xffffffff)); + __ Ngc(x22, Operand(0xffffffff00000000UL)); + __ Ngc(w23, Operand(0xffff0000)); END(); RUN(); ASSERT_EQUAL_64(0x1234567890abcdefUL, x7); ASSERT_EQUAL_64(0xffffffff, x8); - ASSERT_EQUAL_64(0x1234567890abcdefUL + 1, x27); - ASSERT_EQUAL_64(0, x28); + ASSERT_EQUAL_64(0xedcba9876f543210UL, x9); + ASSERT_EQUAL_64(0, x10); + ASSERT_EQUAL_64(0xffffffff, x11); + ASSERT_EQUAL_64(0xffff, x12); + + ASSERT_EQUAL_64(0x1234567890abcdefUL + 1, x18); + ASSERT_EQUAL_64(0, x19); + ASSERT_EQUAL_64(0xedcba9876f543211UL, x20); + ASSERT_EQUAL_64(1, x21); + ASSERT_EQUAL_64(0x100000000UL, x22); + ASSERT_EQUAL_64(0x10000, x23); TEARDOWN(); } @@ -3156,7 +3470,7 @@ TEST(flags) { __ Neg(x11, Operand(x1)); __ Neg(w12, Operand(w1)); // Clear the C flag. - __ Add(x0, x0, Operand(0), SetFlags); + __ Adds(x0, x0, Operand(0)); __ Ngc(x13, Operand(x0)); // Set the C flag. __ Cmp(x0, Operand(x0)); @@ -3271,8 +3585,8 @@ TEST(flags) { __ Mov(w0, 0); __ Mov(w1, 1); // Clear the C flag. - __ Add(w0, w0, Operand(0), SetFlags); - __ Ngc(w0, Operand(w1), SetFlags); + __ Adds(w0, w0, Operand(0)); + __ Ngcs(w0, Operand(w1)); END(); RUN(); @@ -3284,7 +3598,7 @@ TEST(flags) { __ Mov(w1, 0); // Set the C flag. __ Cmp(w0, Operand(w0)); - __ Ngc(w0, Operand(w1), SetFlags); + __ Ngcs(w0, Operand(w1)); END(); RUN(); @@ -3570,6 +3884,59 @@ TEST(csel) { } +TEST(csel_imm) { + SETUP(); + + START(); + __ Mov(x18, 0); + __ Mov(x19, 0x80000000); + __ Mov(x20, 0x8000000000000000UL); + + __ Cmp(x18, Operand(0)); + __ Csel(w0, w19, -2, ne); + __ Csel(w1, w19, -1, ne); + __ Csel(w2, w19, 0, ne); + __ Csel(w3, w19, 1, ne); + __ Csel(w4, w19, 2, ne); + __ Csel(w5, w19, Operand(w19, ASR, 31), ne); + __ Csel(w6, w19, Operand(w19, ROR, 1), ne); + __ Csel(w7, w19, 3, eq); + + __ Csel(x8, x20, -2, ne); + __ Csel(x9, x20, -1, ne); + __ Csel(x10, x20, 0, ne); + __ Csel(x11, x20, 1, ne); + __ Csel(x12, x20, 2, ne); + __ Csel(x13, x20, Operand(x20, ASR, 63), ne); + __ Csel(x14, x20, Operand(x20, ROR, 1), ne); + __ Csel(x15, x20, 3, eq); + + END(); + + RUN(); + + ASSERT_EQUAL_32(-2, w0); + ASSERT_EQUAL_32(-1, w1); + ASSERT_EQUAL_32(0, w2); + ASSERT_EQUAL_32(1, w3); + ASSERT_EQUAL_32(2, w4); + ASSERT_EQUAL_32(-1, w5); + ASSERT_EQUAL_32(0x40000000, w6); + ASSERT_EQUAL_32(0x80000000, w7); + + ASSERT_EQUAL_64(-2, x8); + ASSERT_EQUAL_64(-1, x9); + ASSERT_EQUAL_64(0, x10); + ASSERT_EQUAL_64(1, x11); + ASSERT_EQUAL_64(2, x12); + ASSERT_EQUAL_64(-1, x13); + ASSERT_EQUAL_64(0x4000000000000000UL, x14); + ASSERT_EQUAL_64(0x8000000000000000UL, x15); + + TEARDOWN(); +} + + TEST(lslv) { SETUP(); @@ -3846,11 +4213,11 @@ TEST(sbfm) { __ Sbfiz(x21, x2, 8, 16); __ Sbfx(x22, x1, 8, 16); __ Sbfx(x23, x2, 8, 16); - __ Sxtb(x24, x1); + __ Sxtb(x24, w1); __ Sxtb(x25, x2); - __ Sxth(x26, x1); + __ Sxth(x26, w1); __ Sxth(x27, x2); - __ Sxtw(x28, x1); + __ Sxtw(x28, w1); __ Sxtw(x29, x2); END(); @@ -4177,85 +4544,215 @@ TEST(fmul) { } -TEST(fmsub) { +static void FmaddFmsubDoubleHelper(double n, double m, double a, + double fmadd, double fmsub) { SETUP(); + START(); + + __ Fmov(d0, n); + __ Fmov(d1, m); + __ Fmov(d2, a); + __ Fmadd(d28, d0, d1, d2); + __ Fmsub(d29, d0, d1, d2); + __ Fnmadd(d30, d0, d1, d2); + __ Fnmsub(d31, d0, d1, d2); + + END(); + RUN(); + + ASSERT_EQUAL_FP64(fmadd, d28); + ASSERT_EQUAL_FP64(fmsub, d29); + ASSERT_EQUAL_FP64(-fmadd, d30); + ASSERT_EQUAL_FP64(-fmsub, d31); + + TEARDOWN(); +} + +TEST(fmadd_fmsub_double) { + double inputs[] = { + // Normal numbers, including -0.0. + DBL_MAX, DBL_MIN, 3.25, 2.0, 0.0, + -DBL_MAX, -DBL_MIN, -3.25, -2.0, -0.0, + // Infinities. + kFP64NegativeInfinity, kFP64PositiveInfinity, + // Subnormal numbers. + rawbits_to_double(0x000fffffffffffff), + rawbits_to_double(0x0000000000000001), + rawbits_to_double(0x000123456789abcd), + -rawbits_to_double(0x000fffffffffffff), + -rawbits_to_double(0x0000000000000001), + -rawbits_to_double(0x000123456789abcd), + // NaN. + kFP64QuietNaN, + -kFP64QuietNaN, + }; + const int count = sizeof(inputs) / sizeof(inputs[0]); + + for (int in = 0; in < count; in++) { + double n = inputs[in]; + for (int im = 0; im < count; im++) { + double m = inputs[im]; + for (int ia = 0; ia < count; ia++) { + double a = inputs[ia]; + double fmadd = fma(n, m, a); + double fmsub = fma(-n, m, a); + + FmaddFmsubDoubleHelper(n, m, a, fmadd, fmsub); + } + } + } +} + + +TEST(fmadd_fmsub_double_rounding) { + // Make sure we run plenty of tests where an intermediate rounding stage would + // produce an incorrect result. + const int limit = 1000; + int count_fmadd = 0; + int count_fmsub = 0; + + uint16_t seed[3] = {42, 43, 44}; + seed48(seed); + + while ((count_fmadd < limit) || (count_fmsub < limit)) { + double n, m, a; + uint32_t r[2]; + ASSERT(sizeof(r) == sizeof(n)); + + r[0] = mrand48(); + r[1] = mrand48(); + memcpy(&n, r, sizeof(r)); + r[0] = mrand48(); + r[1] = mrand48(); + memcpy(&m, r, sizeof(r)); + r[0] = mrand48(); + r[1] = mrand48(); + memcpy(&a, r, sizeof(r)); + + if (!isfinite(a) || !isfinite(n) || !isfinite(m)) { + continue; + } + + // Calculate the expected results. + double fmadd = fma(n, m, a); + double fmsub = fma(-n, m, a); + + bool test_fmadd = (fmadd != (a + n * m)); + bool test_fmsub = (fmsub != (a - n * m)); + + // If rounding would produce a different result, increment the test count. + count_fmadd += test_fmadd; + count_fmsub += test_fmsub; + + if (test_fmadd || test_fmsub) { + FmaddFmsubDoubleHelper(n, m, a, fmadd, fmsub); + } + } +} + + +static void FmaddFmsubFloatHelper(float n, float m, float a, + float fmadd, float fmsub) { + SETUP(); START(); - __ Fmov(s16, 3.25); - __ Fmov(s17, 2.0); - __ Fmov(s18, 0); - __ Fmov(s19, -0.5); - __ Fmov(s20, kFP32PositiveInfinity); - __ Fmov(s21, kFP32NegativeInfinity); - __ Fmov(s22, -0); - __ Fmov(d29, 0); - __ Fmov(d30, -2.0); - __ Fmov(d31, 2.25); - __ Fmov(d28, 4); - __ Fmov(d24, kFP64PositiveInfinity); - __ Fmov(d25, kFP64NegativeInfinity); - __ Fmov(d26, -0); - - // Normal combinations - __ Fmsub(s0, s16, s17, s18); - __ Fmsub(s1, s17, s18, s16); - __ Fmsub(s2, s17, s16, s19); - // Pos/Neg Infinity - __ Fmsub(s3, s16, s21, s19); - __ Fmsub(s4, s17, s16, s20); - __ Fmsub(s5, s20, s16, s19); - __ Fmsub(s6, s21, s16, s19); - // -0 - __ Fmsub(s7, s22, s16, s19); - __ Fmsub(s8, s19, s16, s22); - - // Normal combinations - __ Fmsub(d9, d30, d31, d29); - __ Fmsub(d10, d29, d31, d30); - __ Fmsub(d11, d30, d31, d28); - // Pos/Neg Infinity - __ Fmsub(d12, d30, d24, d28); - __ Fmsub(d13, d24, d31, d25); - __ Fmsub(d14, d24, d31, d28); - __ Fmsub(d15, d25, d31, d28); - // -0 - __ Fmsub(d16, d26, d31, d28); - __ Fmsub(d17, d30, d26, d28); - END(); - - RUN(); - - // Normal combinations - ASSERT_EQUAL_FP32(-6.5, s0); - ASSERT_EQUAL_FP32(3.25, s1); - ASSERT_EQUAL_FP32(-7, s2); - // Pos/Neg Infinity - ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s3); - ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s4); - ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s5); - ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s6); - // -0 - ASSERT_EQUAL_FP32(-0.5, s7); - ASSERT_EQUAL_FP32(1.625, s8); + __ Fmov(s0, n); + __ Fmov(s1, m); + __ Fmov(s2, a); + __ Fmadd(s30, s0, s1, s2); + __ Fmsub(s31, s0, s1, s2); + + END(); + RUN(); - // Normal combinations - ASSERT_EQUAL_FP64(4.5, d9); - ASSERT_EQUAL_FP64(-2.0, d10); - ASSERT_EQUAL_FP64(8.5, d11); - // Pos/Neg Infinity - ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d12); - ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d13); - ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d14); - ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d15); - // -0 - ASSERT_EQUAL_FP64(4.0, d16); - ASSERT_EQUAL_FP64(4.0, d17); + ASSERT_EQUAL_FP32(fmadd, s30); + ASSERT_EQUAL_FP32(fmsub, s31); TEARDOWN(); } +TEST(fmadd_fmsub_float) { + float inputs[] = { + // Normal numbers, including -0.0f. + FLT_MAX, FLT_MIN, 3.25f, 2.0f, 0.0f, + -FLT_MAX, -FLT_MIN, -3.25f, -2.0f, -0.0f, + // Infinities. + kFP32NegativeInfinity, kFP32PositiveInfinity, + // Subnormal numbers. + rawbits_to_float(0x07ffffff), + rawbits_to_float(0x00000001), + rawbits_to_float(0x01234567), + -rawbits_to_float(0x07ffffff), + -rawbits_to_float(0x00000001), + -rawbits_to_float(0x01234567), + // NaN. + kFP32QuietNaN, + -kFP32QuietNaN, + }; + const int count = sizeof(inputs) / sizeof(inputs[0]); + + for (int in = 0; in < count; in++) { + float n = inputs[in]; + for (int im = 0; im < count; im++) { + float m = inputs[im]; + for (int ia = 0; ia < count; ia++) { + float a = inputs[ia]; + float fmadd = fmaf(n, m, a); + float fmsub = fmaf(-n, m, a); + + FmaddFmsubFloatHelper(n, m, a, fmadd, fmsub); + } + } + } +} + + +TEST(fmadd_fmsub_float_rounding) { + // Make sure we run plenty of tests where an intermediate rounding stage would + // produce an incorrect result. + const int limit = 1000; + int count_fmadd = 0; + int count_fmsub = 0; + + uint16_t seed[3] = {42, 43, 44}; + seed48(seed); + + while ((count_fmadd < limit) || (count_fmsub < limit)) { + float n, m, a; + uint32_t r; + ASSERT(sizeof(r) == sizeof(n)); + + r = mrand48(); + memcpy(&n, &r, sizeof(r)); + r = mrand48(); + memcpy(&m, &r, sizeof(r)); + r = mrand48(); + memcpy(&a, &r, sizeof(r)); + + if (!isfinite(a) || !isfinite(n) || !isfinite(m)) { + continue; + } + + // Calculate the expected results. + float fmadd = fmaf(n, m, a); + float fmsub = fmaf(-n, m, a); + + bool test_fmadd = (fmadd != (a + n * m)); + bool test_fmsub = (fmsub != (a - n * m)); + + // If rounding would produce a different result, increment the test count. + count_fmadd += test_fmadd; + count_fmsub += test_fmsub; + + if (test_fmadd || test_fmsub) { + FmaddFmsubFloatHelper(n, m, a, fmadd, fmsub); + } + } +} + + TEST(fdiv) { SETUP(); @@ -4304,171 +4801,219 @@ TEST(fdiv) { } -TEST(fmin_s) { - SETUP(); +static float MinMaxHelper(float n, + float m, + bool min, + float quiet_nan_substitute = 0.0) { + const uint64_t kFP32QuietNaNMask = 0x00400000UL; + uint32_t raw_n = float_to_rawbits(n); + uint32_t raw_m = float_to_rawbits(m); + + if (isnan(n) && ((raw_n & kFP32QuietNaNMask) == 0)) { + // n is signalling NaN. + return n; + } else if (isnan(m) && ((raw_m & kFP32QuietNaNMask) == 0)) { + // m is signalling NaN. + return m; + } else if (quiet_nan_substitute == 0.0) { + if (isnan(n)) { + // n is quiet NaN. + return n; + } else if (isnan(m)) { + // m is quiet NaN. + return m; + } + } else { + // Substitute n or m if one is quiet, but not both. + if (isnan(n) && !isnan(m)) { + // n is quiet NaN: replace with substitute. + n = quiet_nan_substitute; + } else if (!isnan(n) && isnan(m)) { + // m is quiet NaN: replace with substitute. + m = quiet_nan_substitute; + } + } - START(); - __ Fmov(s25, 0.0); - __ Fneg(s26, s25); - __ Fmov(s27, kFP32PositiveInfinity); - __ Fmov(s28, 1.0); - __ Fmin(s0, s25, s26); - __ Fmin(s1, s27, s28); - __ Fmin(s2, s28, s26); - END(); + if ((n == 0.0) && (m == 0.0) && + (copysign(1.0, n) != copysign(1.0, m))) { + return min ? -0.0 : 0.0; + } - RUN(); + return min ? fminf(n, m) : fmaxf(n, m); +} + + +static double MinMaxHelper(double n, + double m, + bool min, + double quiet_nan_substitute = 0.0) { + const uint64_t kFP64QuietNaNMask = 0x0008000000000000UL; + uint64_t raw_n = double_to_rawbits(n); + uint64_t raw_m = double_to_rawbits(m); + + if (isnan(n) && ((raw_n & kFP64QuietNaNMask) == 0)) { + // n is signalling NaN. + return n; + } else if (isnan(m) && ((raw_m & kFP64QuietNaNMask) == 0)) { + // m is signalling NaN. + return m; + } else if (quiet_nan_substitute == 0.0) { + if (isnan(n)) { + // n is quiet NaN. + return n; + } else if (isnan(m)) { + // m is quiet NaN. + return m; + } + } else { + // Substitute n or m if one is quiet, but not both. + if (isnan(n) && !isnan(m)) { + // n is quiet NaN: replace with substitute. + n = quiet_nan_substitute; + } else if (!isnan(n) && isnan(m)) { + // m is quiet NaN: replace with substitute. + m = quiet_nan_substitute; + } + } - ASSERT_EQUAL_FP32(-0.0, s0); - ASSERT_EQUAL_FP32(1.0, s1); - ASSERT_EQUAL_FP32(-0.0, s2); + if ((n == 0.0) && (m == 0.0) && + (copysign(1.0, n) != copysign(1.0, m))) { + return min ? -0.0 : 0.0; + } - TEARDOWN(); + return min ? fmin(n, m) : fmax(n, m); } -TEST(fmin_d) { +static void FminFmaxDoubleHelper(double n, double m, double min, double max, + double minnm, double maxnm) { SETUP(); START(); - __ Fmov(d25, 0.0); - __ Fneg(d26, d25); - __ Fmov(d27, kFP32PositiveInfinity); - __ Fneg(d28, d27); - __ Fmov(d29, 1.0); - - for (unsigned j = 0; j < 5; j++) { - for (unsigned i = 0; i < 5; i++) { - // Test all combinations, writing results into d0 - d24. - __ Fmin(FPRegister::DRegFromCode(i + 5*j), - FPRegister::DRegFromCode(i + 25), - FPRegister::DRegFromCode(j + 25)); - } - } + __ Fmov(d0, n); + __ Fmov(d1, m); + __ Fmin(d28, d0, d1); + __ Fmax(d29, d0, d1); + __ Fminnm(d30, d0, d1); + __ Fmaxnm(d31, d0, d1); END(); RUN(); - // Second register is 0.0. - ASSERT_EQUAL_FP64(0.0, d0); - ASSERT_EQUAL_FP64(-0.0, d1); - ASSERT_EQUAL_FP64(0.0, d2); - ASSERT_EQUAL_FP64(kFP32NegativeInfinity, d3); - ASSERT_EQUAL_FP64(0.0, d4); - - // Second register is -0.0. - ASSERT_EQUAL_FP64(-0.0, d5); - ASSERT_EQUAL_FP64(-0.0, d6); - ASSERT_EQUAL_FP64(-0.0, d7); - ASSERT_EQUAL_FP64(kFP32NegativeInfinity, d8); - ASSERT_EQUAL_FP64(-0.0, d9); + ASSERT_EQUAL_FP64(min, d28); + ASSERT_EQUAL_FP64(max, d29); + ASSERT_EQUAL_FP64(minnm, d30); + ASSERT_EQUAL_FP64(maxnm, d31); - // Second register is +Inf. - ASSERT_EQUAL_FP64(0.0, d10); - ASSERT_EQUAL_FP64(-0.0, d11); - ASSERT_EQUAL_FP64(kFP32PositiveInfinity, d12); - ASSERT_EQUAL_FP64(kFP32NegativeInfinity, d13); - ASSERT_EQUAL_FP64(1.0, d14); + TEARDOWN(); +} - // Second register is -Inf. - ASSERT_EQUAL_FP64(kFP32NegativeInfinity, d15); - ASSERT_EQUAL_FP64(kFP32NegativeInfinity, d16); - ASSERT_EQUAL_FP64(kFP32NegativeInfinity, d17); - ASSERT_EQUAL_FP64(kFP32NegativeInfinity, d18); - ASSERT_EQUAL_FP64(kFP32NegativeInfinity, d19); - // Second register is 1.0. - ASSERT_EQUAL_FP64(0.0, d20); - ASSERT_EQUAL_FP64(-0.0, d21); - ASSERT_EQUAL_FP64(1.0, d22); - ASSERT_EQUAL_FP64(kFP32NegativeInfinity, d23); - ASSERT_EQUAL_FP64(1.0, d24); - - TEARDOWN(); +TEST(fmax_fmin_d) { + // Bootstrap tests. + FminFmaxDoubleHelper(0, 0, 0, 0, 0, 0); + FminFmaxDoubleHelper(0, 1, 0, 1, 0, 1); + FminFmaxDoubleHelper(kFP64PositiveInfinity, kFP64NegativeInfinity, + kFP64NegativeInfinity, kFP64PositiveInfinity, + kFP64NegativeInfinity, kFP64PositiveInfinity); + FminFmaxDoubleHelper(kFP64SignallingNaN, 0, + kFP64SignallingNaN, kFP64SignallingNaN, + kFP64SignallingNaN, kFP64SignallingNaN); + FminFmaxDoubleHelper(kFP64QuietNaN, 0, + kFP64QuietNaN, kFP64QuietNaN, + 0, 0); + FminFmaxDoubleHelper(kFP64QuietNaN, kFP64SignallingNaN, + kFP64SignallingNaN, kFP64SignallingNaN, + kFP64SignallingNaN, kFP64SignallingNaN); + + // Iterate over all combinations of inputs. + double inputs[] = { DBL_MAX, DBL_MIN, 1.0, 0.0, + -DBL_MAX, -DBL_MIN, -1.0, -0.0, + kFP64PositiveInfinity, kFP64NegativeInfinity, + kFP64QuietNaN, kFP64SignallingNaN }; + + const int count = sizeof(inputs) / sizeof(inputs[0]); + + for (int in = 0; in < count; in++) { + double n = inputs[in]; + for (int im = 0; im < count; im++) { + double m = inputs[im]; + FminFmaxDoubleHelper(n, m, + MinMaxHelper(n, m, true), + MinMaxHelper(n, m, false), + MinMaxHelper(n, m, true, kFP64PositiveInfinity), + MinMaxHelper(n, m, false, kFP64NegativeInfinity)); + } + } } -TEST(fmax_s) { +static void FminFmaxFloatHelper(float n, float m, float min, float max, + float minnm, float maxnm) { SETUP(); START(); - __ Fmov(s25, 0.0); - __ Fneg(s26, s25); - __ Fmov(s27, kFP32PositiveInfinity); - __ Fmov(s28, 1.0); - __ Fmax(s0, s25, s26); - __ Fmax(s1, s27, s28); - __ Fmax(s2, s28, s26); + // TODO: Signalling NaNs are sometimes converted by the C compiler to quiet + // NaNs on implicit casts from float to double. Here, we move the raw bits + // into a W register first, so we get the correct value. Fix Fmov so this + // additional step is no longer needed. + __ Mov(w0, float_to_rawbits(n)); + __ Fmov(s0, w0); + __ Mov(w0, float_to_rawbits(m)); + __ Fmov(s1, w0); + __ Fmin(s28, s0, s1); + __ Fmax(s29, s0, s1); + __ Fminnm(s30, s0, s1); + __ Fmaxnm(s31, s0, s1); END(); RUN(); - ASSERT_EQUAL_FP32(0.0, s0); - ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s1); - ASSERT_EQUAL_FP32(1.0, s2); + ASSERT_EQUAL_FP32(min, s28); + ASSERT_EQUAL_FP32(max, s29); + ASSERT_EQUAL_FP32(minnm, s30); + ASSERT_EQUAL_FP32(maxnm, s31); TEARDOWN(); } -TEST(fmax_d) { - SETUP(); - - START(); - __ Fmov(d25, 0.0); - __ Fneg(d26, d25); - __ Fmov(d27, kFP32PositiveInfinity); - __ Fneg(d28, d27); - __ Fmov(d29, 1.0); - - for (unsigned j = 0; j < 5; j++) { - for (unsigned i = 0; i < 5; i++) { - // Test all combinations, writing results into d0 - d24. - __ Fmax(FPRegister::DRegFromCode(i + 5*j), - FPRegister::DRegFromCode(i + 25), - FPRegister::DRegFromCode(j + 25)); +TEST(fmax_fmin_s) { + // Bootstrap tests. + FminFmaxFloatHelper(0, 0, 0, 0, 0, 0); + FminFmaxFloatHelper(0, 1, 0, 1, 0, 1); + FminFmaxFloatHelper(kFP32PositiveInfinity, kFP32NegativeInfinity, + kFP32NegativeInfinity, kFP32PositiveInfinity, + kFP32NegativeInfinity, kFP32PositiveInfinity); + FminFmaxFloatHelper(kFP32SignallingNaN, 0, + kFP32SignallingNaN, kFP32SignallingNaN, + kFP32SignallingNaN, kFP32SignallingNaN); + FminFmaxFloatHelper(kFP32QuietNaN, 0, + kFP32QuietNaN, kFP32QuietNaN, + 0, 0); + FminFmaxFloatHelper(kFP32QuietNaN, kFP32SignallingNaN, + kFP32SignallingNaN, kFP32SignallingNaN, + kFP32SignallingNaN, kFP32SignallingNaN); + + // Iterate over all combinations of inputs. + float inputs[] = { FLT_MAX, FLT_MIN, 1.0, 0.0, + -FLT_MAX, -FLT_MIN, -1.0, -0.0, + kFP32PositiveInfinity, kFP32NegativeInfinity, + kFP32QuietNaN, kFP32SignallingNaN }; + + const int count = sizeof(inputs) / sizeof(inputs[0]); + + for (int in = 0; in < count; in++) { + float n = inputs[in]; + for (int im = 0; im < count; im++) { + float m = inputs[im]; + FminFmaxFloatHelper(n, m, + MinMaxHelper(n, m, true), + MinMaxHelper(n, m, false), + MinMaxHelper(n, m, true, kFP32PositiveInfinity), + MinMaxHelper(n, m, false, kFP32NegativeInfinity)); } } - END(); - - RUN(); - - // Second register is 0.0. - ASSERT_EQUAL_FP64(0.0, d0); - ASSERT_EQUAL_FP64(0.0, d1); - ASSERT_EQUAL_FP64(kFP32PositiveInfinity, d2); - ASSERT_EQUAL_FP64(0.0, d3); - ASSERT_EQUAL_FP64(1.0, d4); - - // Second register is -0.0. - ASSERT_EQUAL_FP64(0.0, d5); - ASSERT_EQUAL_FP64(-0.0, d6); - ASSERT_EQUAL_FP64(kFP32PositiveInfinity, d7); - ASSERT_EQUAL_FP64(-0.0, d8); - ASSERT_EQUAL_FP64(1.0, d9); - - // Second register is +Inf. - ASSERT_EQUAL_FP64(kFP32PositiveInfinity, d10); - ASSERT_EQUAL_FP64(kFP32PositiveInfinity, d11); - ASSERT_EQUAL_FP64(kFP32PositiveInfinity, d12); - ASSERT_EQUAL_FP64(kFP32PositiveInfinity, d13); - ASSERT_EQUAL_FP64(kFP32PositiveInfinity, d14); - - // Second register is -Inf. - ASSERT_EQUAL_FP64(0.0, d15); - ASSERT_EQUAL_FP64(-0.0, d16); - ASSERT_EQUAL_FP64(kFP32PositiveInfinity, d17); - ASSERT_EQUAL_FP64(kFP32NegativeInfinity, d18); - ASSERT_EQUAL_FP64(1.0, d19); - - // Second register is 1.0. - ASSERT_EQUAL_FP64(1.0, d20); - ASSERT_EQUAL_FP64(1.0, d21); - ASSERT_EQUAL_FP64(kFP32PositiveInfinity, d22); - ASSERT_EQUAL_FP64(1.0, d23); - ASSERT_EQUAL_FP64(1.0, d24); - - TEARDOWN(); } @@ -4542,6 +5087,11 @@ TEST(fcmp) { SETUP(); START(); + + // Some of these tests require a floating-point scratch register assigned to + // the macro assembler, but most do not. + __ SetFPScratchRegister(NoFPReg); + __ Fmov(s8, 0.0); __ Fmov(s9, 0.5); __ Mov(w18, 0x7f800001); // Single precision NaN. @@ -4559,7 +5109,9 @@ TEST(fcmp) { __ Mrs(x4, NZCV); __ Fcmp(s8, 0.0); __ Mrs(x5, NZCV); + __ SetFPScratchRegister(d0); __ Fcmp(s8, 255.0); + __ SetFPScratchRegister(NoFPReg); __ Mrs(x6, NZCV); __ Fmov(d19, 0.0); @@ -4579,7 +5131,9 @@ TEST(fcmp) { __ Mrs(x14, NZCV); __ Fcmp(d19, 0.0); __ Mrs(x15, NZCV); + __ SetFPScratchRegister(d0); __ Fcmp(d19, 12.3456); + __ SetFPScratchRegister(NoFPReg); __ Mrs(x16, NZCV); END(); @@ -4766,6 +5320,88 @@ TEST(fsqrt) { } +TEST(frinta) { + SETUP(); + + START(); + __ Fmov(s16, 1.0); + __ Fmov(s17, 1.1); + __ Fmov(s18, 1.5); + __ Fmov(s19, 1.9); + __ Fmov(s20, 2.5); + __ Fmov(s21, -1.5); + __ Fmov(s22, -2.5); + __ Fmov(s23, kFP32PositiveInfinity); + __ Fmov(s24, kFP32NegativeInfinity); + __ Fmov(s25, 0.0); + __ Fmov(s26, -0.0); + + __ Frinta(s0, s16); + __ Frinta(s1, s17); + __ Frinta(s2, s18); + __ Frinta(s3, s19); + __ Frinta(s4, s20); + __ Frinta(s5, s21); + __ Frinta(s6, s22); + __ Frinta(s7, s23); + __ Frinta(s8, s24); + __ Frinta(s9, s25); + __ Frinta(s10, s26); + + __ Fmov(d16, 1.0); + __ Fmov(d17, 1.1); + __ Fmov(d18, 1.5); + __ Fmov(d19, 1.9); + __ Fmov(d20, 2.5); + __ Fmov(d21, -1.5); + __ Fmov(d22, -2.5); + __ Fmov(d23, kFP32PositiveInfinity); + __ Fmov(d24, kFP32NegativeInfinity); + __ Fmov(d25, 0.0); + __ Fmov(d26, -0.0); + + __ Frinta(d11, d16); + __ Frinta(d12, d17); + __ Frinta(d13, d18); + __ Frinta(d14, d19); + __ Frinta(d15, d20); + __ Frinta(d16, d21); + __ Frinta(d17, d22); + __ Frinta(d18, d23); + __ Frinta(d19, d24); + __ Frinta(d20, d25); + __ Frinta(d21, d26); + END(); + + RUN(); + + ASSERT_EQUAL_FP32(1.0, s0); + ASSERT_EQUAL_FP32(1.0, s1); + ASSERT_EQUAL_FP32(2.0, s2); + ASSERT_EQUAL_FP32(2.0, s3); + ASSERT_EQUAL_FP32(3.0, s4); + ASSERT_EQUAL_FP32(-2.0, s5); + ASSERT_EQUAL_FP32(-3.0, s6); + ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s7); + ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s8); + ASSERT_EQUAL_FP32(0.0, s9); + ASSERT_EQUAL_FP32(-0.0, s10); + ASSERT_EQUAL_FP64(1.0, d11); + ASSERT_EQUAL_FP64(1.0, d12); + ASSERT_EQUAL_FP64(2.0, d13); + ASSERT_EQUAL_FP64(2.0, d14); + ASSERT_EQUAL_FP64(3.0, d15); + ASSERT_EQUAL_FP64(-2.0, d16); + ASSERT_EQUAL_FP64(-3.0, d17); + ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d18); + ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d19); + ASSERT_EQUAL_FP64(0.0, d20); + ASSERT_EQUAL_FP64(-0.0, d21); + + TEARDOWN(); +} + + TEST(frintn) { SETUP(); @@ -5011,7 +5647,7 @@ TEST(fcvt_sd) { {2.0, 2.0f}, {FLT_MAX, FLT_MAX}, // - The smallest normalized float. - {pow(2, -126), pow(2, -126)}, + {pow(2, -126), powf(2, -126)}, // - Normal floats that need (ties-to-even) rounding. // For normalized numbers: // bit 29 (0x0000000020000000) is the lowest-order bit which will @@ -5109,6 +5745,209 @@ TEST(fcvt_sd) { } +TEST(fcvtas) { + SETUP(); + + START(); + __ Fmov(s0, 1.0); + __ Fmov(s1, 1.1); + __ Fmov(s2, 2.5); + __ Fmov(s3, -2.5); + __ Fmov(s4, kFP32PositiveInfinity); + __ Fmov(s5, kFP32NegativeInfinity); + __ Fmov(s6, 0x7fffff80); // Largest float < INT32_MAX. + __ Fneg(s7, s6); // Smallest float > INT32_MIN. + __ Fmov(d8, 1.0); + __ Fmov(d9, 1.1); + __ Fmov(d10, 2.5); + __ Fmov(d11, -2.5); + __ Fmov(d12, kFP64PositiveInfinity); + __ Fmov(d13, kFP64NegativeInfinity); + __ Fmov(d14, kWMaxInt - 1); + __ Fmov(d15, kWMinInt + 1); + __ Fmov(s17, 1.1); + __ Fmov(s18, 2.5); + __ Fmov(s19, -2.5); + __ Fmov(s20, kFP32PositiveInfinity); + __ Fmov(s21, kFP32NegativeInfinity); + __ Fmov(s22, 0x7fffff8000000000UL); // Largest float < INT64_MAX. + __ Fneg(s23, s22); // Smallest float > INT64_MIN. + __ Fmov(d24, 1.1); + __ Fmov(d25, 2.5); + __ Fmov(d26, -2.5); + __ Fmov(d27, kFP64PositiveInfinity); + __ Fmov(d28, kFP64NegativeInfinity); + __ Fmov(d29, 0x7ffffffffffffc00UL); // Largest double < INT64_MAX. + __ Fneg(d30, d29); // Smallest double > INT64_MIN. + + __ Fcvtas(w0, s0); + __ Fcvtas(w1, s1); + __ Fcvtas(w2, s2); + __ Fcvtas(w3, s3); + __ Fcvtas(w4, s4); + __ Fcvtas(w5, s5); + __ Fcvtas(w6, s6); + __ Fcvtas(w7, s7); + __ Fcvtas(w8, d8); + __ Fcvtas(w9, d9); + __ Fcvtas(w10, d10); + __ Fcvtas(w11, d11); + __ Fcvtas(w12, d12); + __ Fcvtas(w13, d13); + __ Fcvtas(w14, d14); + __ Fcvtas(w15, d15); + __ Fcvtas(x17, s17); + __ Fcvtas(x18, s18); + __ Fcvtas(x19, s19); + __ Fcvtas(x20, s20); + __ Fcvtas(x21, s21); + __ Fcvtas(x22, s22); + __ Fcvtas(x23, s23); + __ Fcvtas(x24, d24); + __ Fcvtas(x25, d25); + __ Fcvtas(x26, d26); + __ Fcvtas(x27, d27); + __ Fcvtas(x28, d28); + __ Fcvtas(x29, d29); + __ Fcvtas(x30, d30); + END(); + + RUN(); + + ASSERT_EQUAL_64(1, x0); + ASSERT_EQUAL_64(1, x1); + ASSERT_EQUAL_64(3, x2); + ASSERT_EQUAL_64(0xfffffffd, x3); + ASSERT_EQUAL_64(0x7fffffff, x4); + ASSERT_EQUAL_64(0x80000000, x5); + ASSERT_EQUAL_64(0x7fffff80, x6); + ASSERT_EQUAL_64(0x80000080, x7); + ASSERT_EQUAL_64(1, x8); + ASSERT_EQUAL_64(1, x9); + ASSERT_EQUAL_64(3, x10); + ASSERT_EQUAL_64(0xfffffffd, x11); + ASSERT_EQUAL_64(0x7fffffff, x12); + ASSERT_EQUAL_64(0x80000000, x13); + ASSERT_EQUAL_64(0x7ffffffe, x14); + ASSERT_EQUAL_64(0x80000001, x15); + ASSERT_EQUAL_64(1, x17); + ASSERT_EQUAL_64(3, x18); + ASSERT_EQUAL_64(0xfffffffffffffffdUL, x19); + ASSERT_EQUAL_64(0x7fffffffffffffffUL, x20); + ASSERT_EQUAL_64(0x8000000000000000UL, x21); + ASSERT_EQUAL_64(0x7fffff8000000000UL, x22); + ASSERT_EQUAL_64(0x8000008000000000UL, x23); + ASSERT_EQUAL_64(1, x24); + ASSERT_EQUAL_64(3, x25); + ASSERT_EQUAL_64(0xfffffffffffffffdUL, x26); + ASSERT_EQUAL_64(0x7fffffffffffffffUL, x27); + ASSERT_EQUAL_64(0x8000000000000000UL, x28); + ASSERT_EQUAL_64(0x7ffffffffffffc00UL, x29); + ASSERT_EQUAL_64(0x8000000000000400UL, x30); + + TEARDOWN(); +} + + +TEST(fcvtau) { + SETUP(); + + START(); + __ Fmov(s0, 1.0); + __ Fmov(s1, 1.1); + __ Fmov(s2, 2.5); + __ Fmov(s3, -2.5); + __ Fmov(s4, kFP32PositiveInfinity); + __ Fmov(s5, kFP32NegativeInfinity); + __ Fmov(s6, 0xffffff00); // Largest float < UINT32_MAX. + __ Fmov(d8, 1.0); + __ Fmov(d9, 1.1); + __ Fmov(d10, 2.5); + __ Fmov(d11, -2.5); + __ Fmov(d12, kFP64PositiveInfinity); + __ Fmov(d13, kFP64NegativeInfinity); + __ Fmov(d14, 0xfffffffe); + __ Fmov(s16, 1.0); + __ Fmov(s17, 1.1); + __ Fmov(s18, 2.5); + __ Fmov(s19, -2.5); + __ Fmov(s20, kFP32PositiveInfinity); + __ Fmov(s21, kFP32NegativeInfinity); + __ Fmov(s22, 0xffffff0000000000UL); // Largest float < UINT64_MAX. + __ Fmov(d24, 1.1); + __ Fmov(d25, 2.5); + __ Fmov(d26, -2.5); + __ Fmov(d27, kFP64PositiveInfinity); + __ Fmov(d28, kFP64NegativeInfinity); + __ Fmov(d29, 0xfffffffffffff800UL); // Largest double < UINT64_MAX. + __ Fmov(s30, 0x100000000UL); + + __ Fcvtau(w0, s0); + __ Fcvtau(w1, s1); + __ Fcvtau(w2, s2); + __ Fcvtau(w3, s3); + __ Fcvtau(w4, s4); + __ Fcvtau(w5, s5); + __ Fcvtau(w6, s6); + __ Fcvtau(w8, d8); + __ Fcvtau(w9, d9); + __ Fcvtau(w10, d10); + __ Fcvtau(w11, d11); + __ Fcvtau(w12, d12); + __ Fcvtau(w13, d13); + __ Fcvtau(w14, d14); + __ Fcvtau(w15, d15); + __ Fcvtau(x16, s16); + __ Fcvtau(x17, s17); + __ Fcvtau(x18, s18); + __ Fcvtau(x19, s19); + __ Fcvtau(x20, s20); + __ Fcvtau(x21, s21); + __ Fcvtau(x22, s22); + __ Fcvtau(x24, d24); + __ Fcvtau(x25, d25); + __ Fcvtau(x26, d26); + __ Fcvtau(x27, d27); + __ Fcvtau(x28, d28); + __ Fcvtau(x29, d29); + __ Fcvtau(w30, s30); + END(); + + RUN(); + + ASSERT_EQUAL_64(1, x0); + ASSERT_EQUAL_64(1, x1); + ASSERT_EQUAL_64(3, x2); + ASSERT_EQUAL_64(0, x3); + ASSERT_EQUAL_64(0xffffffff, x4); + ASSERT_EQUAL_64(0, x5); + ASSERT_EQUAL_64(0xffffff00, x6); + ASSERT_EQUAL_64(1, x8); + ASSERT_EQUAL_64(1, x9); + ASSERT_EQUAL_64(3, x10); + ASSERT_EQUAL_64(0, x11); + ASSERT_EQUAL_64(0xffffffff, x12); + ASSERT_EQUAL_64(0, x13); + ASSERT_EQUAL_64(0xfffffffe, x14); + ASSERT_EQUAL_64(1, x16); + ASSERT_EQUAL_64(1, x17); + ASSERT_EQUAL_64(3, x18); + ASSERT_EQUAL_64(0, x19); + ASSERT_EQUAL_64(0xffffffffffffffffUL, x20); + ASSERT_EQUAL_64(0, x21); + ASSERT_EQUAL_64(0xffffff0000000000UL, x22); + ASSERT_EQUAL_64(1, x24); + ASSERT_EQUAL_64(3, x25); + ASSERT_EQUAL_64(0, x26); + ASSERT_EQUAL_64(0xffffffffffffffffUL, x27); + ASSERT_EQUAL_64(0, x28); + ASSERT_EQUAL_64(0xfffffffffffff800UL, x29); + ASSERT_EQUAL_64(0xffffffff, x30); + + TEARDOWN(); +} + + TEST(fcvtms) { SETUP(); @@ -5957,8 +6796,8 @@ static void TestUScvtf32Helper(uint64_t in, float expected_ucvtf_base = rawbits_to_float(expected_ucvtf_bits); for (int fbits = 0; fbits <= 32; fbits++) { - float expected_scvtf = expected_scvtf_base / pow(2, fbits); - float expected_ucvtf = expected_ucvtf_base / pow(2, fbits); + float expected_scvtf = expected_scvtf_base / powf(2, fbits); + float expected_ucvtf = expected_ucvtf_base / powf(2, fbits); ASSERT_EQUAL_FP32(expected_scvtf, results_scvtf_x[fbits]); ASSERT_EQUAL_FP32(expected_ucvtf, results_ucvtf_x[fbits]); if (cvtf_s32) ASSERT_EQUAL_FP32(expected_scvtf, results_scvtf_w[fbits]); @@ -5967,8 +6806,8 @@ static void TestUScvtf32Helper(uint64_t in, } for (int fbits = 33; fbits <= 64; fbits++) { break; - float expected_scvtf = expected_scvtf_base / pow(2, fbits); - float expected_ucvtf = expected_ucvtf_base / pow(2, fbits); + float expected_scvtf = expected_scvtf_base / powf(2, fbits); + float expected_ucvtf = expected_ucvtf_base / powf(2, fbits); ASSERT_EQUAL_FP32(expected_scvtf, results_scvtf_x[fbits]); ASSERT_EQUAL_FP32(expected_ucvtf, results_ucvtf_x[fbits]); } @@ -6062,7 +6901,7 @@ TEST(system_mrs) { __ Mrs(x4, NZCV); // Set the Z, C and V flags. - __ Add(w0, w2, w2, SetFlags); + __ Adds(w0, w2, w2); __ Mrs(x5, NZCV); // Read the default FPCR. @@ -6257,31 +7096,31 @@ TEST(zero_dest_setflags) { // All of these instructions should only write to the flags in these forms, // but have alternate forms which can write into the stack pointer. - __ add(xzr, x0, Operand(x1, UXTX), SetFlags); - __ add(xzr, x1, Operand(xzr, UXTX), SetFlags); - __ add(xzr, x1, 1234, SetFlags); - __ add(xzr, x0, x1, SetFlags); - __ add(xzr, x1, xzr, SetFlags); - __ add(xzr, xzr, x1, SetFlags); - - __ and_(xzr, x2, ~0xf, SetFlags); - __ and_(xzr, xzr, ~0xf, SetFlags); - __ and_(xzr, x0, x2, SetFlags); - __ and_(xzr, x2, xzr, SetFlags); - __ and_(xzr, xzr, x2, SetFlags); - - __ bic(xzr, x3, ~0xf, SetFlags); - __ bic(xzr, xzr, ~0xf, SetFlags); - __ bic(xzr, x0, x3, SetFlags); - __ bic(xzr, x3, xzr, SetFlags); - __ bic(xzr, xzr, x3, SetFlags); - - __ sub(xzr, x0, Operand(x3, UXTX), SetFlags); - __ sub(xzr, x3, Operand(xzr, UXTX), SetFlags); - __ sub(xzr, x3, 1234, SetFlags); - __ sub(xzr, x0, x3, SetFlags); - __ sub(xzr, x3, xzr, SetFlags); - __ sub(xzr, xzr, x3, SetFlags); + __ adds(xzr, x0, Operand(x1, UXTX)); + __ adds(xzr, x1, Operand(xzr, UXTX)); + __ adds(xzr, x1, 1234); + __ adds(xzr, x0, x1); + __ adds(xzr, x1, xzr); + __ adds(xzr, xzr, x1); + + __ ands(xzr, x2, ~0xf); + __ ands(xzr, xzr, ~0xf); + __ ands(xzr, x0, x2); + __ ands(xzr, x2, xzr); + __ ands(xzr, xzr, x2); + + __ bics(xzr, x3, ~0xf); + __ bics(xzr, xzr, ~0xf); + __ bics(xzr, x0, x3); + __ bics(xzr, x3, xzr); + __ bics(xzr, xzr, x3); + + __ subs(xzr, x0, Operand(x3, UXTX)); + __ subs(xzr, x3, Operand(xzr, UXTX)); + __ subs(xzr, x3, 1234); + __ subs(xzr, x0, x3); + __ subs(xzr, x3, xzr); + __ subs(xzr, xzr, x3); // Swap the saved stack pointer with the real one. If sp was written // during the test, it will show up in x30. This is done because the test @@ -7763,4 +8602,63 @@ TEST(blr_lr) { TEARDOWN(); } + +TEST(barriers) { + // Generate all supported barriers, this is just a smoke test + SETUP(); + + START(); + + // DMB + __ Dmb(FullSystem, BarrierAll); + __ Dmb(FullSystem, BarrierReads); + __ Dmb(FullSystem, BarrierWrites); + __ Dmb(FullSystem, BarrierOther); + + __ Dmb(InnerShareable, BarrierAll); + __ Dmb(InnerShareable, BarrierReads); + __ Dmb(InnerShareable, BarrierWrites); + __ Dmb(InnerShareable, BarrierOther); + + __ Dmb(NonShareable, BarrierAll); + __ Dmb(NonShareable, BarrierReads); + __ Dmb(NonShareable, BarrierWrites); + __ Dmb(NonShareable, BarrierOther); + + __ Dmb(OuterShareable, BarrierAll); + __ Dmb(OuterShareable, BarrierReads); + __ Dmb(OuterShareable, BarrierWrites); + __ Dmb(OuterShareable, BarrierOther); + + // DSB + __ Dsb(FullSystem, BarrierAll); + __ Dsb(FullSystem, BarrierReads); + __ Dsb(FullSystem, BarrierWrites); + __ Dsb(FullSystem, BarrierOther); + + __ Dsb(InnerShareable, BarrierAll); + __ Dsb(InnerShareable, BarrierReads); + __ Dsb(InnerShareable, BarrierWrites); + __ Dsb(InnerShareable, BarrierOther); + + __ Dsb(NonShareable, BarrierAll); + __ Dsb(NonShareable, BarrierReads); + __ Dsb(NonShareable, BarrierWrites); + __ Dsb(NonShareable, BarrierOther); + + __ Dsb(OuterShareable, BarrierAll); + __ Dsb(OuterShareable, BarrierReads); + __ Dsb(OuterShareable, BarrierWrites); + __ Dsb(OuterShareable, BarrierOther); + + // ISB + __ Isb(); + + END(); + + RUN(); + + TEARDOWN(); +} + } // namespace vixl diff --git a/test/test-disasm-a64.cc b/test/test-disasm-a64.cc index 408da39d..c04819e6 100644 --- a/test/test-disasm-a64.cc +++ b/test/test-disasm-a64.cc @@ -119,8 +119,8 @@ TEST(mov_mvn) { COMPARE(Mov(w14, Operand(w15, SXTH, 2)), "sbfiz w14, w15, #2, #16"); COMPARE(Mov(x16, Operand(x17, SXTW, 3)), "sbfiz x16, x17, #3, #32"); - COMPARE(Mvn(w0, Operand(0x1)), "movn w0, #0x1"); - COMPARE(Mvn(x1, Operand(0xfff)), "movn x1, #0xfff"); + COMPARE(Mvn(w0, Operand(0x101)), "movn w0, #0x101"); + COMPARE(Mvn(x1, Operand(0xfff1)), "movn x1, #0xfff1"); COMPARE(Mvn(w2, Operand(w3)), "mvn w2, w3"); COMPARE(Mvn(x4, Operand(x5)), "mvn x4, x5"); COMPARE(Mvn(w6, Operand(w7, LSL, 12)), "mvn w6, w7, lsl #12"); @@ -165,6 +165,61 @@ TEST(move_immediate) { CLEANUP(); } +TEST(move_immediate_2) { + SETUP_CLASS(MacroAssembler); + + // Move instructions expected for certain immediates. This is really a macro + // assembler test, to ensure it generates immediates efficiently. + COMPARE(Mov(w0, 0), "movz w0, #0x0"); + COMPARE(Mov(w0, 0x0000ffff), "movz w0, #0xffff"); + COMPARE(Mov(w0, 0x00010000), "movz w0, #0x10000"); + COMPARE(Mov(w0, 0xffff0000), "movz w0, #0xffff0000"); + COMPARE(Mov(w0, 0x0001ffff), "movn w0, #0xfffe0000"); + COMPARE(Mov(w0, 0xffff8000), "movn w0, #0x7fff"); + COMPARE(Mov(w0, 0xfffffffe), "movn w0, #0x1"); + COMPARE(Mov(w0, 0xffffffff), "movn w0, #0x0"); + COMPARE(Mov(w0, 0x00ffff00), "mov w0, #0xffff00"); + COMPARE(Mov(w0, 0xfffe7fff), "mov w0, #0xfffe7fff"); + COMPARE(Mov(w0, 0xfffeffff), "movn w0, #0x10000"); + COMPARE(Mov(w0, 0xffff7fff), "movn w0, #0x8000"); + + COMPARE(Mov(x0, 0), "movz x0, #0x0"); + COMPARE(Mov(x0, 0x0000ffff), "movz x0, #0xffff"); + COMPARE(Mov(x0, 0x00010000), "movz x0, #0x10000"); + COMPARE(Mov(x0, 0xffff0000), "movz x0, #0xffff0000"); + COMPARE(Mov(x0, 0x0001ffff), "mov x0, #0x1ffff"); + COMPARE(Mov(x0, 0xffff8000), "mov x0, #0xffff8000"); + COMPARE(Mov(x0, 0xfffffffe), "mov x0, #0xfffffffe"); + COMPARE(Mov(x0, 0xffffffff), "mov x0, #0xffffffff"); + COMPARE(Mov(x0, 0x00ffff00), "mov x0, #0xffff00"); + COMPARE(Mov(x0, 0xffff000000000000), "movz x0, #0xffff000000000000"); + COMPARE(Mov(x0, 0x0000ffff00000000), "movz x0, #0xffff00000000"); + COMPARE(Mov(x0, 0x00000000ffff0000), "movz x0, #0xffff0000"); + COMPARE(Mov(x0, 0xffffffffffff0000), "movn x0, #0xffff"); + COMPARE(Mov(x0, 0xffffffff0000ffff), "movn x0, #0xffff0000"); + COMPARE(Mov(x0, 0xffff0000ffffffff), "movn x0, #0xffff00000000"); + COMPARE(Mov(x0, 0x0000ffffffffffff), "movn x0, #0xffff000000000000"); + COMPARE(Mov(x0, 0xfffe7fffffffffff), "mov x0, #0xfffe7fffffffffff"); + COMPARE(Mov(x0, 0xfffeffffffffffff), "movn x0, #0x1000000000000"); + COMPARE(Mov(x0, 0xffff7fffffffffff), "movn x0, #0x800000000000"); + COMPARE(Mov(x0, 0xfffffffe7fffffff), "mov x0, #0xfffffffe7fffffff"); + COMPARE(Mov(x0, 0xfffffffeffffffff), "movn x0, #0x100000000"); + COMPARE(Mov(x0, 0xffffffff7fffffff), "movn x0, #0x80000000"); + COMPARE(Mov(x0, 0xfffffffffffe7fff), "mov x0, #0xfffffffffffe7fff"); + COMPARE(Mov(x0, 0xfffffffffffeffff), "movn x0, #0x10000"); + COMPARE(Mov(x0, 0xffffffffffff7fff), "movn x0, #0x8000"); + COMPARE(Mov(x0, 0xffffffffffffffff), "movn x0, #0x0"); + + COMPARE(Movk(w0, 0x1234, 0), "movk w0, #0x1234"); + COMPARE(Movk(x1, 0x2345, 0), "movk x1, #0x2345"); + COMPARE(Movk(w2, 0x3456, 16), "movk w2, #0x3456, lsl #16"); + COMPARE(Movk(x3, 0x4567, 16), "movk x3, #0x4567, lsl #16"); + COMPARE(Movk(x4, 0x5678, 32), "movk x4, #0x5678, lsl #32"); + COMPARE(Movk(x5, 0x6789, 48), "movk x5, #0x6789, lsl #48"); + + CLEANUP(); +} + TEST(add_immediate) { SETUP(); @@ -177,9 +232,9 @@ TEST(add_immediate) { "add x10, x11, #0x3ff000 (4190208)"); COMPARE(add(w12, w13, Operand(0xfff000)), "add w12, w13, #0xfff000 (16773120)"); - COMPARE(add(w14, w15, Operand(0xff), SetFlags), "adds w14, w15, #0xff (255)"); - COMPARE(add(x16, x17, Operand(0xaa000), SetFlags), - "adds x16, x17, #0xaa000 (696320)"); + COMPARE(adds(w14, w15, Operand(0xff)), "adds w14, w15, #0xff (255)"); + COMPARE(adds(x16, x17, Operand(0xaa000)), "adds x16, x17, #0xaa000 (696320)"); + COMPARE(cmn(w18, Operand(0xff)), "cmn w18, #0xff (255)"); COMPARE(cmn(x19, Operand(0xff000)), "cmn x19, #0xff000 (1044480)"); COMPARE(add(w0, wsp, Operand(0)), "mov w0, wsp"); @@ -189,7 +244,7 @@ TEST(add_immediate) { COMPARE(add(x2, sp, Operand(16)), "add x2, sp, #0x10 (16)"); COMPARE(add(wsp, wsp, Operand(42)), "add wsp, wsp, #0x2a (42)"); COMPARE(cmn(sp, Operand(24)), "cmn sp, #0x18 (24)"); - COMPARE(add(wzr, wsp, Operand(9), SetFlags), "cmn wsp, #0x9 (9)"); + COMPARE(adds(wzr, wsp, Operand(9)), "cmn wsp, #0x9 (9)"); CLEANUP(); } @@ -206,9 +261,8 @@ TEST(sub_immediate) { "sub x10, x11, #0x3ff000 (4190208)"); COMPARE(sub(w12, w13, Operand(0xfff000)), "sub w12, w13, #0xfff000 (16773120)"); - COMPARE(sub(w14, w15, Operand(0xff), SetFlags), "subs w14, w15, #0xff (255)"); - COMPARE(sub(x16, x17, Operand(0xaa000), SetFlags), - "subs x16, x17, #0xaa000 (696320)"); + COMPARE(subs(w14, w15, Operand(0xff)), "subs w14, w15, #0xff (255)"); + COMPARE(subs(x16, x17, Operand(0xaa000)), "subs x16, x17, #0xaa000 (696320)"); COMPARE(cmp(w18, Operand(0xff)), "cmp w18, #0xff (255)"); COMPARE(cmp(x19, Operand(0xff000)), "cmp x19, #0xff000 (1044480)"); @@ -216,7 +270,7 @@ TEST(sub_immediate) { COMPARE(sub(x2, sp, Operand(16)), "sub x2, sp, #0x10 (16)"); COMPARE(sub(wsp, wsp, Operand(42)), "sub wsp, wsp, #0x2a (42)"); COMPARE(cmp(sp, Operand(24)), "cmp sp, #0x18 (24)"); - COMPARE(sub(wzr, wsp, Operand(9), SetFlags), "cmp wsp, #0x9 (9)"); + COMPARE(subs(wzr, wsp, Operand(9)), "cmp wsp, #0x9 (9)"); CLEANUP(); } @@ -241,8 +295,8 @@ TEST(add_shifted) { COMPARE(add(x4, sp, Operand(x5, LSL, 1)), "add x4, sp, x5, lsl #1"); COMPARE(add(x4, xzr, Operand(x5, LSL, 1)), "add x4, xzr, x5, lsl #1"); COMPARE(add(w6, wsp, Operand(w7, LSL, 3)), "add w6, wsp, w7, lsl #3"); - COMPARE(add(xzr, sp, Operand(x8, LSL, 4), SetFlags), "cmn sp, x8, lsl #4"); - COMPARE(add(xzr, xzr, Operand(x8, LSL, 5), SetFlags), "cmn xzr, x8, lsl #5"); + COMPARE(adds(xzr, sp, Operand(x8, LSL, 4)), "cmn sp, x8, lsl #4"); + COMPARE(adds(xzr, xzr, Operand(x8, LSL, 5)), "cmn xzr, x8, lsl #5"); CLEANUP(); } @@ -263,16 +317,16 @@ TEST(sub_shifted) { COMPARE(cmp(x26, Operand(x27, LSL, 63)), "cmp x26, x27, lsl #63"); COMPARE(neg(w28, Operand(w29)), "neg w28, w29"); COMPARE(neg(x30, Operand(x0, LSR, 62)), "neg x30, x0, lsr #62"); - COMPARE(neg(w1, Operand(w2), SetFlags), "negs w1, w2"); - COMPARE(neg(x3, Operand(x4, ASR, 61), SetFlags), "negs x3, x4, asr #61"); + COMPARE(negs(w1, Operand(w2)), "negs w1, w2"); + COMPARE(negs(x3, Operand(x4, ASR, 61)), "negs x3, x4, asr #61"); COMPARE(sub(x0, sp, Operand(x1)), "sub x0, sp, x1"); COMPARE(sub(w2, wsp, Operand(w3)), "sub w2, wsp, w3"); COMPARE(sub(x4, sp, Operand(x5, LSL, 1)), "sub x4, sp, x5, lsl #1"); COMPARE(sub(x4, xzr, Operand(x5, LSL, 1)), "neg x4, x5, lsl #1"); COMPARE(sub(w6, wsp, Operand(w7, LSL, 3)), "sub w6, wsp, w7, lsl #3"); - COMPARE(sub(xzr, sp, Operand(x8, LSL, 4), SetFlags), "cmp sp, x8, lsl #4"); - COMPARE(sub(xzr, xzr, Operand(x8, LSL, 5), SetFlags), "cmp xzr, x8, lsl #5"); + COMPARE(subs(xzr, sp, Operand(x8, LSL, 4)), "cmp sp, x8, lsl #4"); + COMPARE(subs(xzr, xzr, Operand(x8, LSL, 5)), "cmp xzr, x8, lsl #5"); CLEANUP(); } @@ -282,20 +336,15 @@ TEST(add_extended) { SETUP(); COMPARE(add(w0, w1, Operand(w2, UXTB)), "add w0, w1, w2, uxtb"); - COMPARE(add(x3, x4, Operand(w5, UXTB, 1), SetFlags), - "adds x3, x4, w5, uxtb #1"); + COMPARE(adds(x3, x4, Operand(w5, UXTB, 1)), "adds x3, x4, w5, uxtb #1"); COMPARE(add(w6, w7, Operand(w8, UXTH, 2)), "add w6, w7, w8, uxth #2"); - COMPARE(add(x9, x10, Operand(x11, UXTW, 3), SetFlags), - "adds x9, x10, w11, uxtw #3"); + COMPARE(adds(x9, x10, Operand(x11, UXTW, 3)), "adds x9, x10, w11, uxtw #3"); COMPARE(add(x12, x13, Operand(x14, UXTX, 4)), "add x12, x13, x14, uxtx #4"); - COMPARE(add(w15, w16, Operand(w17, SXTB, 4), SetFlags), - "adds w15, w16, w17, sxtb #4"); + COMPARE(adds(w15, w16, Operand(w17, SXTB, 4)), "adds w15, w16, w17, sxtb #4"); COMPARE(add(x18, x19, Operand(x20, SXTB, 3)), "add x18, x19, w20, sxtb #3"); - COMPARE(add(w21, w22, Operand(w23, SXTH, 2), SetFlags), - "adds w21, w22, w23, sxth #2"); + COMPARE(adds(w21, w22, Operand(w23, SXTH, 2)), "adds w21, w22, w23, sxth #2"); COMPARE(add(x24, x25, Operand(x26, SXTW, 1)), "add x24, x25, w26, sxtw #1"); - COMPARE(add(x27, x28, Operand(x29, SXTX), SetFlags), - "adds x27, x28, x29, sxtx"); + COMPARE(adds(x27, x28, Operand(x29, SXTX)), "adds x27, x28, x29, sxtx"); COMPARE(cmn(w0, Operand(w1, UXTB, 2)), "cmn w0, w1, uxtb #2"); COMPARE(cmn(x2, Operand(x3, SXTH, 4)), "cmn x2, w3, sxth #4"); @@ -313,20 +362,15 @@ TEST(sub_extended) { SETUP(); COMPARE(sub(w0, w1, Operand(w2, UXTB)), "sub w0, w1, w2, uxtb"); - COMPARE(sub(x3, x4, Operand(w5, UXTB, 1), SetFlags), - "subs x3, x4, w5, uxtb #1"); + COMPARE(subs(x3, x4, Operand(w5, UXTB, 1)), "subs x3, x4, w5, uxtb #1"); COMPARE(sub(w6, w7, Operand(w8, UXTH, 2)), "sub w6, w7, w8, uxth #2"); - COMPARE(sub(x9, x10, Operand(x11, UXTW, 3), SetFlags), - "subs x9, x10, w11, uxtw #3"); + COMPARE(subs(x9, x10, Operand(x11, UXTW, 3)), "subs x9, x10, w11, uxtw #3"); COMPARE(sub(x12, x13, Operand(x14, UXTX, 4)), "sub x12, x13, x14, uxtx #4"); - COMPARE(sub(w15, w16, Operand(w17, SXTB, 4), SetFlags), - "subs w15, w16, w17, sxtb #4"); + COMPARE(subs(w15, w16, Operand(w17, SXTB, 4)), "subs w15, w16, w17, sxtb #4"); COMPARE(sub(x18, x19, Operand(x20, SXTB, 3)), "sub x18, x19, w20, sxtb #3"); - COMPARE(sub(w21, w22, Operand(w23, SXTH, 2), SetFlags), - "subs w21, w22, w23, sxth #2"); + COMPARE(subs(w21, w22, Operand(w23, SXTH, 2)), "subs w21, w22, w23, sxth #2"); COMPARE(sub(x24, x25, Operand(x26, SXTW, 1)), "sub x24, x25, w26, sxtw #1"); - COMPARE(sub(x27, x28, Operand(x29, SXTX), SetFlags), - "subs x27, x28, x29, sxtx"); + COMPARE(subs(x27, x28, Operand(x29, SXTX)), "subs x27, x28, x29, sxtx"); COMPARE(cmp(w0, Operand(w1, SXTB, 1)), "cmp w0, w1, sxtb #1"); COMPARE(cmp(x2, Operand(x3, UXTH, 3)), "cmp x2, w3, uxth #3"); @@ -345,16 +389,16 @@ TEST(adc_subc_ngc) { COMPARE(adc(w0, w1, Operand(w2)), "adc w0, w1, w2"); COMPARE(adc(x3, x4, Operand(x5)), "adc x3, x4, x5"); - COMPARE(adc(w6, w7, Operand(w8), SetFlags), "adcs w6, w7, w8"); - COMPARE(adc(x9, x10, Operand(x11), SetFlags), "adcs x9, x10, x11"); + COMPARE(adcs(w6, w7, Operand(w8)), "adcs w6, w7, w8"); + COMPARE(adcs(x9, x10, Operand(x11)), "adcs x9, x10, x11"); COMPARE(sbc(w12, w13, Operand(w14)), "sbc w12, w13, w14"); COMPARE(sbc(x15, x16, Operand(x17)), "sbc x15, x16, x17"); - COMPARE(sbc(w18, w19, Operand(w20), SetFlags), "sbcs w18, w19, w20"); - COMPARE(sbc(x21, x22, Operand(x23), SetFlags), "sbcs x21, x22, x23"); + COMPARE(sbcs(w18, w19, Operand(w20)), "sbcs w18, w19, w20"); + COMPARE(sbcs(x21, x22, Operand(x23)), "sbcs x21, x22, x23"); COMPARE(ngc(w24, Operand(w25)), "ngc w24, w25"); COMPARE(ngc(x26, Operand(x27)), "ngc x26, x27"); - COMPARE(ngc(w28, Operand(w29), SetFlags), "ngcs w28, w29"); - COMPARE(ngc(x30, Operand(x0), SetFlags), "ngcs x30, x0"); + COMPARE(ngcs(w28, Operand(w29)), "ngcs w28, w29"); + COMPARE(ngcs(x30, Operand(x0)), "ngcs x30, x0"); CLEANUP(); } @@ -445,6 +489,10 @@ TEST(bitfield) { COMPARE(sxth(w4, w5), "sxth w4, w5"); COMPARE(sxth(x6, x7), "sxth x6, w7"); COMPARE(sxtw(x8, x9), "sxtw x8, w9"); + COMPARE(sxtb(x0, w1), "sxtb x0, w1"); + COMPARE(sxth(x2, w3), "sxth x2, w3"); + COMPARE(sxtw(x4, w5), "sxtw x4, w5"); + COMPARE(uxtb(w10, w11), "uxtb w10, w11"); COMPARE(uxtb(x12, x13), "uxtb x12, w13"); COMPARE(uxth(w14, w15), "uxth w14, w15"); @@ -567,9 +615,8 @@ TEST(logical_immediate) { "eor w15, w16, #0x1"); COMPARE(eor(x17, x18, Operand(0x0000000000000003L)), "eor x17, x18, #0x3"); - COMPARE(and_(w23, w24, Operand(0x0000000f), SetFlags), - "ands w23, w24, #0xf"); - COMPARE(and_(x25, x26, Operand(0x800000000000000fL), SetFlags), + COMPARE(ands(w23, w24, Operand(0x0000000f)), "ands w23, w24, #0xf"); + COMPARE(ands(x25, x26, Operand(0x800000000000000fL)), "ands x25, x26, #0x800000000000000f"); // Test inverse. @@ -585,14 +632,13 @@ TEST(logical_immediate) { "eor w19, w20, #0x7ffffffe"); COMPARE(eon(x21, x22, Operand(0xc000000000000003L)), "eor x21, x22, #0x3ffffffffffffffc"); - COMPARE(bic(w27, w28, Operand(0xfffffff7), SetFlags), - "ands w27, w28, #0x8"); - COMPARE(bic(x29, x0, Operand(0xfffffffeffffffffL), SetFlags), + COMPARE(bics(w27, w28, Operand(0xfffffff7)), "ands w27, w28, #0x8"); + COMPARE(bics(x29, x0, Operand(0xfffffffeffffffffL)), "ands x29, x0, #0x100000000"); // Test stack pointer. COMPARE(and_(wsp, wzr, Operand(7)), "and wsp, wzr, #0x7"); - COMPARE(and_(xzr, xzr, Operand(7), SetFlags), "tst xzr, #0x7"); + COMPARE(ands(xzr, xzr, Operand(7)), "tst xzr, #0x7"); COMPARE(orr(sp, xzr, Operand(15)), "orr sp, xzr, #0xf"); COMPARE(eor(wsp, w0, Operand(31)), "eor wsp, w0, #0x1f"); @@ -656,25 +702,17 @@ TEST(logical_shifted) { COMPARE(eon(x24, x25, Operand(x26, ASR, 23)), "eon x24, x25, x26, asr #23"); COMPARE(eon(w27, w28, Operand(w29, ROR, 24)), "eon w27, w28, w29, ror #24"); - COMPARE(and_(w0, w1, Operand(w2), SetFlags), "ands w0, w1, w2"); - COMPARE(and_(x3, x4, Operand(x5, LSL, 1), SetFlags), - "ands x3, x4, x5, lsl #1"); - COMPARE(and_(w6, w7, Operand(w8, LSR, 2), SetFlags), - "ands w6, w7, w8, lsr #2"); - COMPARE(and_(x9, x10, Operand(x11, ASR, 3), SetFlags), - "ands x9, x10, x11, asr #3"); - COMPARE(and_(w12, w13, Operand(w14, ROR, 4), SetFlags), - "ands w12, w13, w14, ror #4"); - - COMPARE(bic(w15, w16, Operand(w17), SetFlags), "bics w15, w16, w17"); - COMPARE(bic(x18, x19, Operand(x20, LSL, 5), SetFlags), - "bics x18, x19, x20, lsl #5"); - COMPARE(bic(w21, w22, Operand(w23, LSR, 6), SetFlags), - "bics w21, w22, w23, lsr #6"); - COMPARE(bic(x24, x25, Operand(x26, ASR, 7), SetFlags), - "bics x24, x25, x26, asr #7"); - COMPARE(bic(w27, w28, Operand(w29, ROR, 8), SetFlags), - "bics w27, w28, w29, ror #8"); + COMPARE(ands(w0, w1, Operand(w2)), "ands w0, w1, w2"); + COMPARE(ands(x3, x4, Operand(x5, LSL, 1)), "ands x3, x4, x5, lsl #1"); + COMPARE(ands(w6, w7, Operand(w8, LSR, 2)), "ands w6, w7, w8, lsr #2"); + COMPARE(ands(x9, x10, Operand(x11, ASR, 3)), "ands x9, x10, x11, asr #3"); + COMPARE(ands(w12, w13, Operand(w14, ROR, 4)), "ands w12, w13, w14, ror #4"); + + COMPARE(bics(w15, w16, Operand(w17)), "bics w15, w16, w17"); + COMPARE(bics(x18, x19, Operand(x20, LSL, 5)), "bics x18, x19, x20, lsl #5"); + COMPARE(bics(w21, w22, Operand(w23, LSR, 6)), "bics w21, w22, w23, lsr #6"); + COMPARE(bics(x24, x25, Operand(x26, ASR, 7)), "bics x24, x25, x26, asr #7"); + COMPARE(bics(w27, w28, Operand(w29, ROR, 8)), "bics w27, w28, w29, ror #8"); COMPARE(tst(w0, Operand(w1)), "tst w0, w1"); COMPARE(tst(w2, Operand(w3, ROR, 10)), "tst w2, w3, ror #10"); @@ -745,11 +783,16 @@ TEST(branch) { COMPARE(cbz(x1, INST_OFF(-0x100000)), "cbz x1, #-0x100000"); COMPARE(cbnz(w2, INST_OFF(0xffffc)), "cbnz w2, #+0xffffc"); COMPARE(cbnz(x3, INST_OFF(-0x100000)), "cbnz x3, #-0x100000"); - COMPARE(tbz(x4, 0, INST_OFF(0x7ffc)), "tbz w4, #0, #+0x7ffc"); + COMPARE(tbz(w4, 0, INST_OFF(0x7ffc)), "tbz w4, #0, #+0x7ffc"); COMPARE(tbz(x5, 63, INST_OFF(-0x8000)), "tbz x5, #63, #-0x8000"); - COMPARE(tbnz(x6, 0, INST_OFF(0x7ffc)), "tbnz w6, #0, #+0x7ffc"); - COMPARE(tbnz(x7, 63, INST_OFF(-0x8000)), "tbnz x7, #63, #-0x8000"); - + COMPARE(tbz(w6, 31, INST_OFF(0)), "tbz w6, #31, #+0x0"); + COMPARE(tbz(x7, 31, INST_OFF(0x4)), "tbz w7, #31, #+0x4"); + COMPARE(tbz(x8, 32, INST_OFF(0x8)), "tbz x8, #32, #+0x8"); + COMPARE(tbnz(w8, 0, INST_OFF(0x7ffc)), "tbnz w8, #0, #+0x7ffc"); + COMPARE(tbnz(x9, 63, INST_OFF(-0x8000)), "tbnz x9, #63, #-0x8000"); + COMPARE(tbnz(w10, 31, INST_OFF(0)), "tbnz w10, #31, #+0x0"); + COMPARE(tbnz(x11, 31, INST_OFF(0x4)), "tbnz w11, #31, #+0x4"); + COMPARE(tbnz(x12, 32, INST_OFF(0x8)), "tbnz x12, #32, #+0x8"); COMPARE(br(x0), "br x0"); COMPARE(blr(x1), "blr x1"); COMPARE(ret(x2), "ret x2"); @@ -1229,6 +1272,19 @@ TEST(cond_select) { CLEANUP(); } +TEST(cond_select_macro) { + SETUP_CLASS(MacroAssembler); + + COMPARE(Csel(w0, w1, -1, eq), "csinv w0, w1, wzr, eq"); + COMPARE(Csel(w2, w3, 0, ne), "csel w2, w3, wzr, ne"); + COMPARE(Csel(w4, w5, 1, hs), "csinc w4, w5, wzr, hs"); + COMPARE(Csel(x6, x7, -1, lo), "csinv x6, x7, xzr, lo"); + COMPARE(Csel(x8, x9, 0, mi), "csel x8, x9, xzr, mi"); + COMPARE(Csel(x10, x11, 1, pl), "csinc x10, x11, xzr, pl"); + + CLEANUP(); +} + TEST(cond_cmp) { SETUP(); @@ -1246,6 +1302,17 @@ TEST(cond_cmp) { CLEANUP(); } +TEST(cond_cmp_macro) { + SETUP_CLASS(MacroAssembler); + + COMPARE(Ccmp(w0, -1, VFlag, hi), "ccmn w0, #1, #nzcV, hi"); + COMPARE(Ccmp(x1, -31, CFlag, ge), "ccmn x1, #31, #nzCv, ge"); + COMPARE(Ccmn(w2, -1, CVFlag, gt), "ccmp w2, #1, #nzCV, gt"); + COMPARE(Ccmn(x3, -31, ZCVFlag, ls), "ccmp x3, #31, #nZCV, ls"); + + CLEANUP(); +} + TEST(fmov_imm) { SETUP(); @@ -1286,6 +1353,10 @@ TEST(fp_dp1) { COMPARE(fsqrt(s31, s30), "fsqrt s31, s30"); COMPARE(fsqrt(d10, d11), "fsqrt d10, d11"); COMPARE(fsqrt(d31, d30), "fsqrt d31, d30"); + COMPARE(frinta(s10, s11), "frinta s10, s11"); + COMPARE(frinta(s31, s30), "frinta s31, s30"); + COMPARE(frinta(d12, d13), "frinta d12, d13"); + COMPARE(frinta(d31, d30), "frinta d31, d30"); COMPARE(frintn(s10, s11), "frintn s10, s11"); COMPARE(frintn(s31, s30), "frintn s31, s30"); COMPARE(frintn(d12, d13), "frintn d12, d13"); @@ -1316,6 +1387,10 @@ TEST(fp_dp2) { COMPARE(fmax(d22, d23, d24), "fmax d22, d23, d24"); COMPARE(fmin(s25, s26, s27), "fmin s25, s26, s27"); COMPARE(fmin(d28, d29, d30), "fmin d28, d29, d30"); + COMPARE(fmaxnm(s31, s0, s1), "fmaxnm s31, s0, s1"); + COMPARE(fmaxnm(d2, d3, d4), "fmaxnm d2, d3, d4"); + COMPARE(fminnm(s5, s6, s7), "fminnm s5, s6, s7"); + COMPARE(fminnm(d8, d9, d10), "fminnm d8, d9, d10"); CLEANUP(); } @@ -1324,9 +1399,16 @@ TEST(fp_dp2) { TEST(fp_dp3) { SETUP(); + COMPARE(fmadd(s7, s8, s9, s10), "fmadd s7, s8, s9, s10"); + COMPARE(fmadd(d10, d11, d12, d10), "fmadd d10, d11, d12, d10"); COMPARE(fmsub(s7, s8, s9, s10), "fmsub s7, s8, s9, s10"); COMPARE(fmsub(d10, d11, d12, d10), "fmsub d10, d11, d12, d10"); + COMPARE(fnmadd(s7, s8, s9, s10), "fnmadd s7, s8, s9, s10"); + COMPARE(fnmadd(d10, d11, d12, d10), "fnmadd d10, d11, d12, d10"); + COMPARE(fnmsub(s7, s8, s9, s10), "fnmsub s7, s8, s9, s10"); + COMPARE(fnmsub(d10, d11, d12, d10), "fnmsub d10, d11, d12, d10"); + CLEANUP(); } @@ -1380,6 +1462,14 @@ TEST(fp_select) { TEST(fcvt_scvtf_ucvtf) { SETUP(); + COMPARE(fcvtas(w0, s1), "fcvtas w0, s1"); + COMPARE(fcvtas(x2, s3), "fcvtas x2, s3"); + COMPARE(fcvtas(w4, d5), "fcvtas w4, d5"); + COMPARE(fcvtas(x6, d7), "fcvtas x6, d7"); + COMPARE(fcvtau(w8, s9), "fcvtau w8, s9"); + COMPARE(fcvtau(x10, s11), "fcvtau x10, s11"); + COMPARE(fcvtau(w12, d13), "fcvtau w12, d13"); + COMPARE(fcvtau(x14, d15), "fcvtau x14, d15"); COMPARE(fcvtns(w0, s1), "fcvtns w0, s1"); COMPARE(fcvtns(x2, s3), "fcvtns x2, s3"); COMPARE(fcvtns(w4, d5), "fcvtns w4, d5"); @@ -1545,6 +1635,16 @@ TEST(add_sub_negative) { COMPARE(Sub(w21, w3, -0xbc), "add w21, w3, #0xbc (188)"); COMPARE(Sub(w22, w4, -2000), "add w22, w4, #0x7d0 (2000)"); + COMPARE(Cmp(w0, -1), "cmn w0, #0x1 (1)"); + COMPARE(Cmp(x1, -1), "cmn x1, #0x1 (1)"); + COMPARE(Cmp(w2, -4095), "cmn w2, #0xfff (4095)"); + COMPARE(Cmp(x3, -4095), "cmn x3, #0xfff (4095)"); + + COMPARE(Cmn(w0, -1), "cmp w0, #0x1 (1)"); + COMPARE(Cmn(x1, -1), "cmp x1, #0x1 (1)"); + COMPARE(Cmn(w2, -4095), "cmp w2, #0xfff (4095)"); + COMPARE(Cmn(x3, -4095), "cmp x3, #0xfff (4095)"); + CLEANUP(); } @@ -1586,4 +1686,57 @@ TEST(logical_immediate_move) { CLEANUP(); } + +TEST(barriers) { + SETUP_CLASS(MacroAssembler); + + // DMB + COMPARE(Dmb(FullSystem, BarrierAll), "dmb sy"); + COMPARE(Dmb(FullSystem, BarrierReads), "dmb ld"); + COMPARE(Dmb(FullSystem, BarrierWrites), "dmb st"); + + COMPARE(Dmb(InnerShareable, BarrierAll), "dmb ish"); + COMPARE(Dmb(InnerShareable, BarrierReads), "dmb ishld"); + COMPARE(Dmb(InnerShareable, BarrierWrites), "dmb ishst"); + + COMPARE(Dmb(NonShareable, BarrierAll), "dmb nsh"); + COMPARE(Dmb(NonShareable, BarrierReads), "dmb nshld"); + COMPARE(Dmb(NonShareable, BarrierWrites), "dmb nshst"); + + COMPARE(Dmb(OuterShareable, BarrierAll), "dmb osh"); + COMPARE(Dmb(OuterShareable, BarrierReads), "dmb oshld"); + COMPARE(Dmb(OuterShareable, BarrierWrites), "dmb oshst"); + + COMPARE(Dmb(FullSystem, BarrierOther), "dmb sy (0b1100)"); + COMPARE(Dmb(InnerShareable, BarrierOther), "dmb sy (0b1000)"); + COMPARE(Dmb(NonShareable, BarrierOther), "dmb sy (0b0100)"); + COMPARE(Dmb(OuterShareable, BarrierOther), "dmb sy (0b0000)"); + + // DSB + COMPARE(Dsb(FullSystem, BarrierAll), "dsb sy"); + COMPARE(Dsb(FullSystem, BarrierReads), "dsb ld"); + COMPARE(Dsb(FullSystem, BarrierWrites), "dsb st"); + + COMPARE(Dsb(InnerShareable, BarrierAll), "dsb ish"); + COMPARE(Dsb(InnerShareable, BarrierReads), "dsb ishld"); + COMPARE(Dsb(InnerShareable, BarrierWrites), "dsb ishst"); + + COMPARE(Dsb(NonShareable, BarrierAll), "dsb nsh"); + COMPARE(Dsb(NonShareable, BarrierReads), "dsb nshld"); + COMPARE(Dsb(NonShareable, BarrierWrites), "dsb nshst"); + + COMPARE(Dsb(OuterShareable, BarrierAll), "dsb osh"); + COMPARE(Dsb(OuterShareable, BarrierReads), "dsb oshld"); + COMPARE(Dsb(OuterShareable, BarrierWrites), "dsb oshst"); + + COMPARE(Dsb(FullSystem, BarrierOther), "dsb sy (0b1100)"); + COMPARE(Dsb(InnerShareable, BarrierOther), "dsb sy (0b1000)"); + COMPARE(Dsb(NonShareable, BarrierOther), "dsb sy (0b0100)"); + COMPARE(Dsb(OuterShareable, BarrierOther), "dsb sy (0b0000)"); + + // ISB + COMPARE(Isb(), "isb"); + + CLEANUP(); +} } // namespace vixl diff --git a/tools/make_instruction_doc.pl b/tools/make_instruction_doc.pl index a244962c..5457c38e 100755 --- a/tools/make_instruction_doc.pl +++ b/tools/make_instruction_doc.pl @@ -39,7 +39,7 @@ while() { # Find a function formatted like an instruction. - if(my($t) = /^ ((?:void|inline void) [a-z0-9]{1,6})\(/mgp) + if(my($t) = /^ ((?:void|inline void) [a-z0-9]{1,6}_?)\(/mgp) { my $before = ${^PREMATCH}; my $after = ${^POSTMATCH};