Skip to content

[RISCV] Fix QC.E.LI -> C.LI with Bare Symbol Compression #146763

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1646,6 +1646,10 @@ bool RISCVAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
case Match_InvalidSImm26:
return generateImmOutOfRangeError(Operands, ErrorInfo, -(1 << 25),
(1 << 25) - 1);
// HACK: See comment before `BareSymbolQC_E_LI` in RISCVInstrInfoXqci.td.
case Match_InvalidBareSymbolQC_E_LI:
LLVM_FALLTHROUGH;
// END HACK
case Match_InvalidBareSImm32:
return generateImmOutOfRangeError(Operands, ErrorInfo,
std::numeric_limits<int32_t>::min(),
Expand Down
44 changes: 43 additions & 1 deletion llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ MCFixupKindInfo RISCVAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
{"fixup_riscv_branch", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
{"fixup_riscv_rvc_jump", 2, 11, MCFixupKindInfo::FKF_IsPCRel},
{"fixup_riscv_rvc_branch", 0, 16, MCFixupKindInfo::FKF_IsPCRel},
{"fixup_riscv_rvc_imm", 0, 16, 0},
{"fixup_riscv_call", 0, 64, MCFixupKindInfo::FKF_IsPCRel},
{"fixup_riscv_call_plt", 0, 64, MCFixupKindInfo::FKF_IsPCRel},

Expand Down Expand Up @@ -136,6 +137,10 @@ bool RISCVAsmBackend::fixupNeedsRelaxationAdvanced(const MCFixup &Fixup,
// For jump instructions the immediate must be in the range
// [-1048576, 1048574]
return Offset > 1048574 || Offset < -1048576;
case RISCV::fixup_riscv_rvc_imm:
// This fixup can never be emitted as a relocation, so always needs to be
// relaxed.
return true;
}
}

Expand All @@ -154,6 +159,18 @@ static unsigned getRelaxedOpcode(const MCInst &Inst,
// This only relaxes one "step" - i.e. from C.J to JAL, not from C.J to
// QC.E.J, because we can always relax again if needed.
return RISCV::JAL;
case RISCV::C_LI:
if (!STI.hasFeature(RISCV::FeatureVendorXqcili))
break;
// We only need this because `QC.E.LI` can be compressed into a `C.LI`. This
// happens because the `simm6` MCOperandPredicate accepts bare symbols, and
// `QC.E.LI` is the only instruction that accepts bare symbols at parse-time
// and compresses to `C.LI`. `C.LI` does not itself accept bare symbols at
// parse time.
//
// If we have a bare symbol, we need to turn this back to a `QC.E.LI`, as we
// have no way to emit a relocation on a `C.LI` instruction.
return RISCV::QC_E_LI;
case RISCV::JAL: {
// We can only relax JAL if we have Xqcilb
if (!STI.hasFeature(RISCV::FeatureVendorXqcilb))
Expand Down Expand Up @@ -226,7 +243,7 @@ void RISCVAsmBackend::relaxInstruction(MCInst &Inst,
[[maybe_unused]] bool Success = RISCVRVC::uncompress(Res, Inst, STI);
assert(Success && "Can't uncompress instruction");
assert(Res.getOpcode() == getRelaxedOpcode(Inst, STI) &&
"Branch Relaxation Error");
"Assembler Relaxation Error");
break;
}
case RISCV::JAL: {
Expand All @@ -241,6 +258,23 @@ void RISCVAsmBackend::relaxInstruction(MCInst &Inst,
Res.addOperand(Inst.getOperand(1));
break;
}
case RISCV::C_LI: {
// This should only be hit when trying to relax a `C.LI` into a `QC.E.LI`
// because the `C.LI` has a bare symbol. We cannot use
// `RISCVRVC::uncompress` because it will use decompression patterns. The
// `QC.E.LI` compression pattern to `C.LI` is compression-only (because we
// don't want `c.li` ever printed as `qc.e.li`, which might be done if the
// pattern applied to decompression), but that doesn't help much becuase
// `C.LI` with a bare symbol will decompress to an `ADDI` anyway (because
// `simm12`'s MCOperandPredicate accepts a bare symbol and that pattern
// comes first), and we still cannot emit an `ADDI` with a bare symbol.
assert(STI.hasFeature(RISCV::FeatureVendorXqcili) &&
"C.LI is only relaxable with Xqcili");
Res.setOpcode(getRelaxedOpcode(Inst, STI));
Res.addOperand(Inst.getOperand(0));
Res.addOperand(Inst.getOperand(1));
break;
}
case RISCV::BEQ:
case RISCV::BNE:
case RISCV::BLT:
Expand Down Expand Up @@ -539,6 +573,14 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
(Bit5 << 2);
return Value;
}
case RISCV::fixup_riscv_rvc_imm: {
if (!isInt<6>(Value))
Ctx.reportError(Fixup.getLoc(), "fixup value out of range");
unsigned Bit5 = (Value >> 5) & 0x1;
unsigned Bit4_0 = Value & 0x1f;
Value = (Bit5 << 12) | (Bit4_0 << 2);
return Value;
}
case RISCV::fixup_riscv_qc_e_32: {
if (!isInt<32>(Value))
Ctx.reportError(Fixup.getLoc(), "fixup value out of range");
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,9 @@ unsigned RISCVELFObjectWriter::getRelocType(const MCFixup &Fixup,
return ELF::R_RISCV_LO12_I;
case RISCV::fixup_riscv_lo12_s:
return ELF::R_RISCV_LO12_S;
case RISCV::fixup_riscv_rvc_imm:
reportError(Fixup.getLoc(), "No relocation for CI-type instructions");
return ELF::R_RISCV_NONE;
case RISCV::fixup_riscv_qc_e_32:
return ELF::R_RISCV_QC_E_32;
case RISCV::fixup_riscv_qc_abs20_u:
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/RISCV/MCTargetDesc/RISCVFixupKinds.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,16 @@ enum Fixups {
fixup_riscv_rvc_jump,
// 8-bit fixup for symbol references in the compressed branch instruction
fixup_riscv_rvc_branch,
// 6-bit fixup for symbol references in instructions like c.li
fixup_riscv_rvc_imm,
// Fixup representing a legacy no-pic function call attached to the auipc
// instruction in a pair composed of adjacent auipc+jalr instructions.
fixup_riscv_call,
// Fixup representing a function call attached to the auipc instruction in a
// pair composed of adjacent auipc+jalr instructions.
fixup_riscv_call_plt,

// Qualcomm specific fixups
// 12-bit fixup for symbol references in the 48-bit Xqcibi branch immediate
// instructions
fixup_riscv_qc_e_branch,
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -636,6 +636,8 @@ uint64_t RISCVMCCodeEmitter::getImmOpValue(const MCInst &MI, unsigned OpNo,
FixupKind = RISCV::fixup_riscv_rvc_jump;
} else if (MIFrm == RISCVII::InstFormatCB) {
FixupKind = RISCV::fixup_riscv_rvc_branch;
} else if (MIFrm == RISCVII::InstFormatCI) {
FixupKind = RISCV::fixup_riscv_rvc_imm;
} else if (MIFrm == RISCVII::InstFormatI) {
FixupKind = RISCV::fixup_riscv_12_i;
} else if (MIFrm == RISCVII::InstFormatQC_EB) {
Expand Down
79 changes: 79 additions & 0 deletions llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
Original file line number Diff line number Diff line change
Expand Up @@ -1589,3 +1589,82 @@ def : CompressPat<(QC_E_BGEUI GPRNoX0:$rs1, uimm5nonzero:$imm5, bare_simm13_lsb0
def : CompressPat<(QC_E_BLTUI GPRNoX0:$rs1, uimm5nonzero:$imm5, bare_simm13_lsb0:$imm12),
(QC_BLTUI GPRNoX0:$rs1, uimm5nonzero:$imm5, bare_simm13_lsb0:$imm12)>;
} // let isCompressOnly = true, Predicates = [HasVendorXqcibi, IsRV32]

// HACKS
// -----
// The reasons for needing the definitions below are long and quite annoying. I'm writing
// this so they are explained in-line, rather than anywhere else.
//
// Emitting an instruction to an object proceeds as:
// - Compression (in emitInstruction)
// - Emit to Binary Code + Fixups
// - Assembler Relaxation
// - Fixup evaluation/application
// - If relaxed, re-emitted to Binary + Fixups
// - Relocation generation from Fixups
//
// Unfortunately, the `QC.E.LI` -> `C.LI` compression pattern has an edge case that has
// caused crashes in the past.
//
// How the bug happens is:
// - QC.E.LI is parsed with a bare symbol, which is valid + expected, and can
// be handled by fixups/relocations.
// - Compression turns this into a `C.LI` because the `simm6`
// MCOperandPredicate accepts bare symbols.
// - Binary Code emission didn't know how to create a fixup for a CI-type
// instruction containing a bare symbol.
//
// The solution to the last bullet is that we added the `fixup_riscv_rvc_imm`,
// so that we could proceed past the last error, and then use Assembler Relaxation
// to turn the `C.LI` with a bare symbol back into a `QC.E.LI`.
//
// This is good enough for emitting objects, but doesn't work for emitting
// assembly. Emitting assembly is why we need the following Hacks.
//
// Emitting an instruction to assembly proceeds as:
// - Compression (in emitInstruction)
// - Decompression (in RISCVInstPrinter::printInst)
// - InstAliases are applied
//
// So in the case of `QC.E.LI` with a bare symbol, first it is compressed to
// `C.LI` with a bare symbol, and then it is decompressed to `ADDI` with a bare
// symbol for printing, which is printed via an alias as `li <reg>, <symbol>`.
// Both the decompression and the alias use the MCOperandPredicate from
// `simm12`, which accepts bare symbols.
//
// The problem here is that `li <reg>, <symbol>` fails to parse, because the
// parsers do not accept bare symbols, they only accept symbols with specifiers
// or immediates.
//
// Our solution is to add another alias, which will be prioritised above the
// `li` alias, but only when `qc.e.li` is available. We originally intended to
// use the `bare_symbol` Operand type, but this had no MCOperandPredicate, and
// adding one changed the error messages when parsing `qc.e.li` with a
// too-large constant. So instead, we add a new `AsmOperand` and `Operand` type,
// just for the alias, which parse just like a BareSymbol, but they
// have both an MCOperandPredicate, and the error message that corresponds to
// the existing one on `qc.e.li` for too-large immediates (which fail to parse
// as both an immediate, and a bare symbol).
//
// This is fairly unpleasant, but it's the least disruptive thing we can do
// and keeps all the hacks confined to the RISC-V backend code.

def BareSymbolQC_E_LI : AsmOperandClass {
let Name = "BareSymbolQC_E_LI";
let PredicateMethod = "isBareSymbol";
let RenderMethod = "addImmOperands";
let DiagnosticType = "InvalidBareSymbolQC_E_LI";
let ParserMethod = "parseBareSymbol";
}

def hack_bare_symbol_qc_e_li : Operand<XLenVT> {
let ParserMatchClass = BareSymbolQC_E_LI;
let MCOperandPredicate = [{
return MCOp.isExpr() && MCOp.isBareSymbolRef();
}];
}

let Predicates = [HasVendorXqcili, IsRV32] in {
def : InstAlias<"qc.e.li $rd, $sym", (ADDI GPR:$rd, X0, hack_bare_symbol_qc_e_li:$sym), 3>;
} // Predicates = [HasVendorXqcili, IsRV32]
// END HACKS
2 changes: 1 addition & 1 deletion llvm/test/MC/RISCV/Relocations/mc-dump.s
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
# CHECK-NEXT: Symbol @0 .text
# CHECK-NEXT:0 Align Align:4 Value:0 ValueSize:1 MaxBytesToEmit:4 Nops
# CHECK-NEXT:0 Data LinkerRelaxable Size:8 [97,00,00,00,e7,80,00,00]
# CHECK-NEXT: Fixup @0 Value:specifier(19,ext) Kind:4026
# CHECK-NEXT: Fixup @0 Value:specifier(19,ext) Kind:4027
# CHECK-NEXT: Symbol @0 $x
# CHECK-NEXT:8 Align Align:8 Value:0 ValueSize:1 MaxBytesToEmit:8 Nops
# CHECK-NEXT:12 Data Size:4 [13,05,30,00]
Expand Down
15 changes: 14 additions & 1 deletion llvm/test/MC/RISCV/xqcibi-relocations.s
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,20 @@ qc.bnei t3, 14, same_section
# OBJ-NEXT: qc.e.bgeui s2, 0x18, 0x28 <same_section>
qc.e.bgeui s2, 24, same_section

.option norelax
## Enable compression/relaxation to check how symbols are handled.
.option noexact

# ASM: qc.bnei t1, 10, undef
# OBJ: qc.beqi t1, 0xa, 0x42 <same_section_extern+0x16>
# OBJ-NEXT: j 0x3e <same_section_extern+0x12>
# OBJ-NEXT: R_RISCV_JAL undef{{$}}
qc.bnei t1, 10, undef

# ASM: qc.e.bgeui s0, 40, undef
# OBJ-NEXT: qc.e.bltui s0, 0x28, 0x4c <same_section_extern+0x20>
# OBJ-NEXT: j 0x48 <same_section_extern+0x1c>
# OBJ-NEXT: R_RISCV_JAL undef{{$}}
qc.e.bgeui s0, 40, undef

.section .text.second, "ax", @progbits

Expand Down
17 changes: 16 additions & 1 deletion llvm/test/MC/RISCV/xqcilb-relocations.s
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,22 @@ qc.e.j same_section
# OBJ-NEXT: R_RISCV_RELAX
qc.e.jal same_section

.option norelax
## Enable compression/relaxation to check how symbols are handled.
.option noexact

qc.e.j undef
# ASM: j undef
# OBJ: qc.e.j 0x44 <same_section_extern+0x10>
# OBJ-NEXT: R_RISCV_VENDOR QUALCOMM{{$}}
# OBJ-NEXT: R_RISCV_CUSTOM195 undef{{$}}
# OBJ-NEXT: R_RISCV_RELAX

qc.e.jal undef
# ASM: jal undef
# OBJ: qc.e.jal 0x4a <same_section_extern+0x16>
# OBJ-NEXT: R_RISCV_VENDOR QUALCOMM{{$}}
# OBJ-NEXT: R_RISCV_CUSTOM195 undef{{$}}
# OBJ-NEXT: R_RISCV_RELAX

.section .text.other, "ax", @progbits

Expand Down
17 changes: 16 additions & 1 deletion llvm/test/MC/RISCV/xqcili-relocations.s
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,22 @@ qc.li a1, %qc.abs20(undef)
# OBJ-NEXT: R_RISCV_RELAX
qc.e.li s1, undef

.option norelax
## Enable compression/relaxation to check how symbols are handled.
.option noexact

# ASM: qc.li a1, %qc.abs20(undef)
# OBJ-NEXT: qc.li a1, 0x0
# OBJ-NEXT: R_RISCV_VENDOR QUALCOMM{{$}}
# OBJ-NEXT: R_RISCV_CUSTOM192 undef{{$}}
# OBJ-NEXT: R_RISCV_RELAX
qc.li a1, %qc.abs20(undef)

# ASM: qc.e.li a2, undef
# OBJ-NEXT: qc.e.li a2, 0x0
# OBJ-NEXT: R_RISCV_VENDOR QUALCOMM{{$}}
# OBJ-NEXT: R_RISCV_CUSTOM194 undef{{$}}
# OBJ-NEXT: R_RISCV_RELAX
qc.e.li a2, undef

.section .text.other, "ax", @progbits

Expand Down
Loading