From 0f7f987b1de85b27e0d3854f07394ebd5f31af65 Mon Sep 17 00:00:00 2001 From: Sam Elliott Date: Wed, 2 Jul 2025 12:33:04 -0700 Subject: [PATCH] [RISCV] Fix QC.E.LI -> C.LI with Bare Symbol Compression There's a long comment explaining this approach in RISCVInstrInfoXqci.td This is presented as an alternative to llvm/llvm-project#146184. --- .../Target/RISCV/AsmParser/RISCVAsmParser.cpp | 4 + .../RISCV/MCTargetDesc/RISCVAsmBackend.cpp | 44 ++++++++++- .../MCTargetDesc/RISCVELFObjectWriter.cpp | 3 + .../RISCV/MCTargetDesc/RISCVFixupKinds.h | 4 + .../RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp | 2 + llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td | 79 +++++++++++++++++++ llvm/test/MC/RISCV/Relocations/mc-dump.s | 2 +- llvm/test/MC/RISCV/xqcibi-relocations.s | 15 +++- llvm/test/MC/RISCV/xqcilb-relocations.s | 17 +++- llvm/test/MC/RISCV/xqcili-relocations.s | 17 +++- 10 files changed, 182 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp index bb0f9df2032fd..eaad0bbf98f71 100644 --- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp +++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp @@ -1646,6 +1646,10 @@ bool RISCVAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, case Match_InvalidSImm26: return generateImmOutOfRangeError(Operands, ErrorInfo, -(1 << 25), (1 << 25) - 1); + // HACK: See comment before `BareSymbolQC_E_LI` in RISCVInstrInfoXqci.td. + case Match_InvalidBareSymbolQC_E_LI: + LLVM_FALLTHROUGH; + // END HACK case Match_InvalidBareSImm32: return generateImmOutOfRangeError(Operands, ErrorInfo, std::numeric_limits::min(), diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp index 75ef861d58a1c..8ab6f5ef82b60 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp @@ -78,6 +78,7 @@ MCFixupKindInfo RISCVAsmBackend::getFixupKindInfo(MCFixupKind Kind) const { {"fixup_riscv_branch", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, {"fixup_riscv_rvc_jump", 2, 11, MCFixupKindInfo::FKF_IsPCRel}, {"fixup_riscv_rvc_branch", 0, 16, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_riscv_rvc_imm", 0, 16, 0}, {"fixup_riscv_call", 0, 64, MCFixupKindInfo::FKF_IsPCRel}, {"fixup_riscv_call_plt", 0, 64, MCFixupKindInfo::FKF_IsPCRel}, @@ -136,6 +137,10 @@ bool RISCVAsmBackend::fixupNeedsRelaxationAdvanced(const MCFixup &Fixup, // For jump instructions the immediate must be in the range // [-1048576, 1048574] return Offset > 1048574 || Offset < -1048576; + case RISCV::fixup_riscv_rvc_imm: + // This fixup can never be emitted as a relocation, so always needs to be + // relaxed. + return true; } } @@ -154,6 +159,18 @@ static unsigned getRelaxedOpcode(const MCInst &Inst, // This only relaxes one "step" - i.e. from C.J to JAL, not from C.J to // QC.E.J, because we can always relax again if needed. return RISCV::JAL; + case RISCV::C_LI: + if (!STI.hasFeature(RISCV::FeatureVendorXqcili)) + break; + // We only need this because `QC.E.LI` can be compressed into a `C.LI`. This + // happens because the `simm6` MCOperandPredicate accepts bare symbols, and + // `QC.E.LI` is the only instruction that accepts bare symbols at parse-time + // and compresses to `C.LI`. `C.LI` does not itself accept bare symbols at + // parse time. + // + // If we have a bare symbol, we need to turn this back to a `QC.E.LI`, as we + // have no way to emit a relocation on a `C.LI` instruction. + return RISCV::QC_E_LI; case RISCV::JAL: { // We can only relax JAL if we have Xqcilb if (!STI.hasFeature(RISCV::FeatureVendorXqcilb)) @@ -226,7 +243,7 @@ void RISCVAsmBackend::relaxInstruction(MCInst &Inst, [[maybe_unused]] bool Success = RISCVRVC::uncompress(Res, Inst, STI); assert(Success && "Can't uncompress instruction"); assert(Res.getOpcode() == getRelaxedOpcode(Inst, STI) && - "Branch Relaxation Error"); + "Assembler Relaxation Error"); break; } case RISCV::JAL: { @@ -241,6 +258,23 @@ void RISCVAsmBackend::relaxInstruction(MCInst &Inst, Res.addOperand(Inst.getOperand(1)); break; } + case RISCV::C_LI: { + // This should only be hit when trying to relax a `C.LI` into a `QC.E.LI` + // because the `C.LI` has a bare symbol. We cannot use + // `RISCVRVC::uncompress` because it will use decompression patterns. The + // `QC.E.LI` compression pattern to `C.LI` is compression-only (because we + // don't want `c.li` ever printed as `qc.e.li`, which might be done if the + // pattern applied to decompression), but that doesn't help much becuase + // `C.LI` with a bare symbol will decompress to an `ADDI` anyway (because + // `simm12`'s MCOperandPredicate accepts a bare symbol and that pattern + // comes first), and we still cannot emit an `ADDI` with a bare symbol. + assert(STI.hasFeature(RISCV::FeatureVendorXqcili) && + "C.LI is only relaxable with Xqcili"); + Res.setOpcode(getRelaxedOpcode(Inst, STI)); + Res.addOperand(Inst.getOperand(0)); + Res.addOperand(Inst.getOperand(1)); + break; + } case RISCV::BEQ: case RISCV::BNE: case RISCV::BLT: @@ -539,6 +573,14 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value, (Bit5 << 2); return Value; } + case RISCV::fixup_riscv_rvc_imm: { + if (!isInt<6>(Value)) + Ctx.reportError(Fixup.getLoc(), "fixup value out of range"); + unsigned Bit5 = (Value >> 5) & 0x1; + unsigned Bit4_0 = Value & 0x1f; + Value = (Bit5 << 12) | (Bit4_0 << 2); + return Value; + } case RISCV::fixup_riscv_qc_e_32: { if (!isInt<32>(Value)) Ctx.reportError(Fixup.getLoc(), "fixup value out of range"); diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp index 8ab2c56ae3178..146e3ab2fc38d 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp @@ -135,6 +135,9 @@ unsigned RISCVELFObjectWriter::getRelocType(const MCFixup &Fixup, return ELF::R_RISCV_LO12_I; case RISCV::fixup_riscv_lo12_s: return ELF::R_RISCV_LO12_S; + case RISCV::fixup_riscv_rvc_imm: + reportError(Fixup.getLoc(), "No relocation for CI-type instructions"); + return ELF::R_RISCV_NONE; case RISCV::fixup_riscv_qc_e_32: return ELF::R_RISCV_QC_E_32; case RISCV::fixup_riscv_qc_abs20_u: diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVFixupKinds.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVFixupKinds.h index b5c23772e6d8c..722af4100c216 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVFixupKinds.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVFixupKinds.h @@ -40,12 +40,16 @@ enum Fixups { fixup_riscv_rvc_jump, // 8-bit fixup for symbol references in the compressed branch instruction fixup_riscv_rvc_branch, + // 6-bit fixup for symbol references in instructions like c.li + fixup_riscv_rvc_imm, // Fixup representing a legacy no-pic function call attached to the auipc // instruction in a pair composed of adjacent auipc+jalr instructions. fixup_riscv_call, // Fixup representing a function call attached to the auipc instruction in a // pair composed of adjacent auipc+jalr instructions. fixup_riscv_call_plt, + + // Qualcomm specific fixups // 12-bit fixup for symbol references in the 48-bit Xqcibi branch immediate // instructions fixup_riscv_qc_e_branch, diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp index ce0fbc0ac0654..c77f06c5ed5d0 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp @@ -636,6 +636,8 @@ uint64_t RISCVMCCodeEmitter::getImmOpValue(const MCInst &MI, unsigned OpNo, FixupKind = RISCV::fixup_riscv_rvc_jump; } else if (MIFrm == RISCVII::InstFormatCB) { FixupKind = RISCV::fixup_riscv_rvc_branch; + } else if (MIFrm == RISCVII::InstFormatCI) { + FixupKind = RISCV::fixup_riscv_rvc_imm; } else if (MIFrm == RISCVII::InstFormatI) { FixupKind = RISCV::fixup_riscv_12_i; } else if (MIFrm == RISCVII::InstFormatQC_EB) { diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td index e8dd164714875..cfadb281f1326 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td @@ -1589,3 +1589,82 @@ def : CompressPat<(QC_E_BGEUI GPRNoX0:$rs1, uimm5nonzero:$imm5, bare_simm13_lsb0 def : CompressPat<(QC_E_BLTUI GPRNoX0:$rs1, uimm5nonzero:$imm5, bare_simm13_lsb0:$imm12), (QC_BLTUI GPRNoX0:$rs1, uimm5nonzero:$imm5, bare_simm13_lsb0:$imm12)>; } // let isCompressOnly = true, Predicates = [HasVendorXqcibi, IsRV32] + +// HACKS +// ----- +// The reasons for needing the definitions below are long and quite annoying. I'm writing +// this so they are explained in-line, rather than anywhere else. +// +// Emitting an instruction to an object proceeds as: +// - Compression (in emitInstruction) +// - Emit to Binary Code + Fixups +// - Assembler Relaxation +// - Fixup evaluation/application +// - If relaxed, re-emitted to Binary + Fixups +// - Relocation generation from Fixups +// +// Unfortunately, the `QC.E.LI` -> `C.LI` compression pattern has an edge case that has +// caused crashes in the past. +// +// How the bug happens is: +// - QC.E.LI is parsed with a bare symbol, which is valid + expected, and can +// be handled by fixups/relocations. +// - Compression turns this into a `C.LI` because the `simm6` +// MCOperandPredicate accepts bare symbols. +// - Binary Code emission didn't know how to create a fixup for a CI-type +// instruction containing a bare symbol. +// +// The solution to the last bullet is that we added the `fixup_riscv_rvc_imm`, +// so that we could proceed past the last error, and then use Assembler Relaxation +// to turn the `C.LI` with a bare symbol back into a `QC.E.LI`. +// +// This is good enough for emitting objects, but doesn't work for emitting +// assembly. Emitting assembly is why we need the following Hacks. +// +// Emitting an instruction to assembly proceeds as: +// - Compression (in emitInstruction) +// - Decompression (in RISCVInstPrinter::printInst) +// - InstAliases are applied +// +// So in the case of `QC.E.LI` with a bare symbol, first it is compressed to +// `C.LI` with a bare symbol, and then it is decompressed to `ADDI` with a bare +// symbol for printing, which is printed via an alias as `li , `. +// Both the decompression and the alias use the MCOperandPredicate from +// `simm12`, which accepts bare symbols. +// +// The problem here is that `li , ` fails to parse, because the +// parsers do not accept bare symbols, they only accept symbols with specifiers +// or immediates. +// +// Our solution is to add another alias, which will be prioritised above the +// `li` alias, but only when `qc.e.li` is available. We originally intended to +// use the `bare_symbol` Operand type, but this had no MCOperandPredicate, and +// adding one changed the error messages when parsing `qc.e.li` with a +// too-large constant. So instead, we add a new `AsmOperand` and `Operand` type, +// just for the alias, which parse just like a BareSymbol, but they +// have both an MCOperandPredicate, and the error message that corresponds to +// the existing one on `qc.e.li` for too-large immediates (which fail to parse +// as both an immediate, and a bare symbol). +// +// This is fairly unpleasant, but it's the least disruptive thing we can do +// and keeps all the hacks confined to the RISC-V backend code. + +def BareSymbolQC_E_LI : AsmOperandClass { + let Name = "BareSymbolQC_E_LI"; + let PredicateMethod = "isBareSymbol"; + let RenderMethod = "addImmOperands"; + let DiagnosticType = "InvalidBareSymbolQC_E_LI"; + let ParserMethod = "parseBareSymbol"; +} + +def hack_bare_symbol_qc_e_li : Operand { + let ParserMatchClass = BareSymbolQC_E_LI; + let MCOperandPredicate = [{ + return MCOp.isExpr() && MCOp.isBareSymbolRef(); + }]; +} + +let Predicates = [HasVendorXqcili, IsRV32] in { +def : InstAlias<"qc.e.li $rd, $sym", (ADDI GPR:$rd, X0, hack_bare_symbol_qc_e_li:$sym), 3>; +} // Predicates = [HasVendorXqcili, IsRV32] +// END HACKS diff --git a/llvm/test/MC/RISCV/Relocations/mc-dump.s b/llvm/test/MC/RISCV/Relocations/mc-dump.s index 2ebd249fcf49b..6662f2ae2b912 100644 --- a/llvm/test/MC/RISCV/Relocations/mc-dump.s +++ b/llvm/test/MC/RISCV/Relocations/mc-dump.s @@ -7,7 +7,7 @@ # CHECK-NEXT: Symbol @0 .text # CHECK-NEXT:0 Align Align:4 Value:0 ValueSize:1 MaxBytesToEmit:4 Nops # CHECK-NEXT:0 Data LinkerRelaxable Size:8 [97,00,00,00,e7,80,00,00] -# CHECK-NEXT: Fixup @0 Value:specifier(19,ext) Kind:4026 +# CHECK-NEXT: Fixup @0 Value:specifier(19,ext) Kind:4027 # CHECK-NEXT: Symbol @0 $x # CHECK-NEXT:8 Align Align:8 Value:0 ValueSize:1 MaxBytesToEmit:8 Nops # CHECK-NEXT:12 Data Size:4 [13,05,30,00] diff --git a/llvm/test/MC/RISCV/xqcibi-relocations.s b/llvm/test/MC/RISCV/xqcibi-relocations.s index 0f7cc8c5787a1..931cd7c9314bb 100644 --- a/llvm/test/MC/RISCV/xqcibi-relocations.s +++ b/llvm/test/MC/RISCV/xqcibi-relocations.s @@ -84,7 +84,20 @@ qc.bnei t3, 14, same_section # OBJ-NEXT: qc.e.bgeui s2, 0x18, 0x28 qc.e.bgeui s2, 24, same_section -.option norelax +## Enable compression/relaxation to check how symbols are handled. +.option noexact + +# ASM: qc.bnei t1, 10, undef +# OBJ: qc.beqi t1, 0xa, 0x42 +# OBJ-NEXT: j 0x3e +# OBJ-NEXT: R_RISCV_JAL undef{{$}} +qc.bnei t1, 10, undef + +# ASM: qc.e.bgeui s0, 40, undef +# OBJ-NEXT: qc.e.bltui s0, 0x28, 0x4c +# OBJ-NEXT: j 0x48 +# OBJ-NEXT: R_RISCV_JAL undef{{$}} +qc.e.bgeui s0, 40, undef .section .text.second, "ax", @progbits diff --git a/llvm/test/MC/RISCV/xqcilb-relocations.s b/llvm/test/MC/RISCV/xqcilb-relocations.s index ab08beed9be94..48c8c6931c8af 100644 --- a/llvm/test/MC/RISCV/xqcilb-relocations.s +++ b/llvm/test/MC/RISCV/xqcilb-relocations.s @@ -92,7 +92,22 @@ qc.e.j same_section # OBJ-NEXT: R_RISCV_RELAX qc.e.jal same_section -.option norelax +## Enable compression/relaxation to check how symbols are handled. +.option noexact + +qc.e.j undef +# ASM: j undef +# OBJ: qc.e.j 0x44 +# OBJ-NEXT: R_RISCV_VENDOR QUALCOMM{{$}} +# OBJ-NEXT: R_RISCV_CUSTOM195 undef{{$}} +# OBJ-NEXT: R_RISCV_RELAX + +qc.e.jal undef +# ASM: jal undef +# OBJ: qc.e.jal 0x4a +# OBJ-NEXT: R_RISCV_VENDOR QUALCOMM{{$}} +# OBJ-NEXT: R_RISCV_CUSTOM195 undef{{$}} +# OBJ-NEXT: R_RISCV_RELAX .section .text.other, "ax", @progbits diff --git a/llvm/test/MC/RISCV/xqcili-relocations.s b/llvm/test/MC/RISCV/xqcili-relocations.s index 866586236fa46..7eff61fc782d8 100644 --- a/llvm/test/MC/RISCV/xqcili-relocations.s +++ b/llvm/test/MC/RISCV/xqcili-relocations.s @@ -97,7 +97,22 @@ qc.li a1, %qc.abs20(undef) # OBJ-NEXT: R_RISCV_RELAX qc.e.li s1, undef -.option norelax +## Enable compression/relaxation to check how symbols are handled. +.option noexact + +# ASM: qc.li a1, %qc.abs20(undef) +# OBJ-NEXT: qc.li a1, 0x0 +# OBJ-NEXT: R_RISCV_VENDOR QUALCOMM{{$}} +# OBJ-NEXT: R_RISCV_CUSTOM192 undef{{$}} +# OBJ-NEXT: R_RISCV_RELAX +qc.li a1, %qc.abs20(undef) + +# ASM: qc.e.li a2, undef +# OBJ-NEXT: qc.e.li a2, 0x0 +# OBJ-NEXT: R_RISCV_VENDOR QUALCOMM{{$}} +# OBJ-NEXT: R_RISCV_CUSTOM194 undef{{$}} +# OBJ-NEXT: R_RISCV_RELAX +qc.e.li a2, undef .section .text.other, "ax", @progbits