|
14 | 14 | #include "GCNSubtarget.h"
|
15 | 15 | #include "Utils/AMDGPUBaseInfo.h"
|
16 | 16 | #include "llvm/Analysis/CycleAnalysis.h"
|
| 17 | +#include "llvm/Analysis/TargetTransformInfo.h" |
17 | 18 | #include "llvm/CodeGen/TargetPassConfig.h"
|
18 | 19 | #include "llvm/IR/IntrinsicsAMDGPU.h"
|
19 | 20 | #include "llvm/IR/IntrinsicsR600.h"
|
@@ -1295,6 +1296,114 @@ struct AAAMDGPUNoAGPR
|
1295 | 1296 |
|
1296 | 1297 | const char AAAMDGPUNoAGPR::ID = 0;
|
1297 | 1298 |
|
| 1299 | +struct AAAMDGPUUniform : public StateWrapper<BooleanState, AbstractAttribute> { |
| 1300 | + using Base = StateWrapper<BooleanState, AbstractAttribute>; |
| 1301 | + AAAMDGPUUniform(const IRPosition &IRP, Attributor &A) : Base(IRP) {} |
| 1302 | + |
| 1303 | + /// Create an abstract attribute view for the position \p IRP. |
| 1304 | + static AAAMDGPUUniform &createForPosition(const IRPosition &IRP, |
| 1305 | + Attributor &A); |
| 1306 | + |
| 1307 | + /// See AbstractAttribute::getName() |
| 1308 | + StringRef getName() const override { return "AAAMDGPUUniform"; } |
| 1309 | + |
| 1310 | + const std::string getAsStr(Attributor *A) const override { |
| 1311 | + return getAssumed() ? "uniform" : "divergent"; |
| 1312 | + } |
| 1313 | + |
| 1314 | + void trackStatistics() const override {} |
| 1315 | + |
| 1316 | + /// See AbstractAttribute::getIdAddr() |
| 1317 | + const char *getIdAddr() const override { return &ID; } |
| 1318 | + |
| 1319 | + /// This function should return true if the type of the \p AA is |
| 1320 | + /// AAAMDGPUUniform |
| 1321 | + static bool classof(const AbstractAttribute *AA) { |
| 1322 | + return (AA->getIdAddr() == &ID); |
| 1323 | + } |
| 1324 | + |
| 1325 | + /// Unique ID (due to the unique address) |
| 1326 | + static const char ID; |
| 1327 | +}; |
| 1328 | + |
| 1329 | +const char AAAMDGPUUniform::ID = 0; |
| 1330 | + |
| 1331 | +/// This AA is to infer the inreg attribute for a function argument. |
| 1332 | +struct AAAMDGPUUniformArgument : public AAAMDGPUUniform { |
| 1333 | + AAAMDGPUUniformArgument(const IRPosition &IRP, Attributor &A) |
| 1334 | + : AAAMDGPUUniform(IRP, A) {} |
| 1335 | + |
| 1336 | + void initialize(Attributor &A) override { |
| 1337 | + Argument *Arg = getAssociatedArgument(); |
| 1338 | + CallingConv::ID CC = Arg->getParent()->getCallingConv(); |
| 1339 | + if (Arg->hasAttribute(Attribute::InReg)) { |
| 1340 | + indicateOptimisticFixpoint(); |
| 1341 | + return; |
| 1342 | + } |
| 1343 | + |
| 1344 | + if (AMDGPU::isEntryFunctionCC(CC)) { |
| 1345 | + // We only use isArgPassedInSGPR on kernel entry function argument, so |
| 1346 | + // even if we will use SPGR for non-uniform i1 argument passing, it will |
| 1347 | + // not affect this. |
| 1348 | + if (AMDGPU::isArgPassedInSGPR(Arg)) |
| 1349 | + indicateOptimisticFixpoint(); |
| 1350 | + else |
| 1351 | + indicatePessimisticFixpoint(); |
| 1352 | + } |
| 1353 | + } |
| 1354 | + |
| 1355 | + ChangeStatus updateImpl(Attributor &A) override { |
| 1356 | + unsigned ArgNo = getAssociatedArgument()->getArgNo(); |
| 1357 | + TargetMachine &TM = |
| 1358 | + static_cast<AMDGPUInformationCache &>(A.getInfoCache()).TM; |
| 1359 | + |
| 1360 | + auto isUniform = [&](AbstractCallSite ACS) -> bool { |
| 1361 | + CallBase *CB = ACS.getInstruction(); |
| 1362 | + Value *V = CB->getArgOperand(ArgNo); |
| 1363 | + if (auto *Arg = dyn_cast<Argument>(V)) { |
| 1364 | + auto *AA = A.getOrCreateAAFor<AAAMDGPUUniform>( |
| 1365 | + IRPosition::argument(*Arg), this, DepClassTy::REQUIRED); |
| 1366 | + return AA && AA->isValidState(); |
| 1367 | + } |
| 1368 | + TargetTransformInfo TTI = TM.getTargetTransformInfo(*CB->getFunction()); |
| 1369 | + return TTI.isAlwaysUniform(V); |
| 1370 | + }; |
| 1371 | + |
| 1372 | + bool UsedAssumedInformation = true; |
| 1373 | + if (!A.checkForAllCallSites(isUniform, *this, /*RequireAllCallSites=*/true, |
| 1374 | + UsedAssumedInformation)) |
| 1375 | + return indicatePessimisticFixpoint(); |
| 1376 | + |
| 1377 | + if (!UsedAssumedInformation) |
| 1378 | + return indicateOptimisticFixpoint(); |
| 1379 | + |
| 1380 | + return ChangeStatus::UNCHANGED; |
| 1381 | + } |
| 1382 | + |
| 1383 | + ChangeStatus manifest(Attributor &A) override { |
| 1384 | + Argument *Arg = getAssociatedArgument(); |
| 1385 | + // If the argument already has inreg attribute, we will not do anything |
| 1386 | + // about it. |
| 1387 | + if (Arg->hasAttribute(Attribute::InReg)) |
| 1388 | + return ChangeStatus::UNCHANGED; |
| 1389 | + if (AMDGPU::isEntryFunctionCC(Arg->getParent()->getCallingConv())) |
| 1390 | + return ChangeStatus::UNCHANGED; |
| 1391 | + LLVMContext &Ctx = Arg->getContext(); |
| 1392 | + return A.manifestAttrs(getIRPosition(), |
| 1393 | + {Attribute::get(Ctx, Attribute::InReg)}); |
| 1394 | + } |
| 1395 | +}; |
| 1396 | + |
| 1397 | +AAAMDGPUUniform &AAAMDGPUUniform::createForPosition(const IRPosition &IRP, |
| 1398 | + Attributor &A) { |
| 1399 | + switch (IRP.getPositionKind()) { |
| 1400 | + case IRPosition::IRP_ARGUMENT: |
| 1401 | + return *new (A.Allocator) AAAMDGPUUniformArgument(IRP, A); |
| 1402 | + default: |
| 1403 | + llvm_unreachable("not a valid position for AAAMDGPUUniform"); |
| 1404 | + } |
| 1405 | +} |
| 1406 | + |
1298 | 1407 | /// Performs the final check and updates the 'amdgpu-waves-per-eu' attribute
|
1299 | 1408 | /// based on the finalized 'amdgpu-flat-work-group-size' attribute.
|
1300 | 1409 | /// Both attributes start with narrow ranges that expand during iteration.
|
@@ -1381,7 +1490,7 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
|
1381 | 1490 | &AAAMDMaxNumWorkgroups::ID, &AAAMDWavesPerEU::ID, &AAAMDGPUNoAGPR::ID,
|
1382 | 1491 | &AACallEdges::ID, &AAPointerInfo::ID, &AAPotentialConstantValues::ID,
|
1383 | 1492 | &AAUnderlyingObjects::ID, &AAAddressSpace::ID, &AAIndirectCallInfo::ID,
|
1384 |
| - &AAInstanceInfo::ID}); |
| 1493 | + &AAInstanceInfo::ID, &AAAMDGPUUniform::ID}); |
1385 | 1494 |
|
1386 | 1495 | AttributorConfig AC(CGUpdater);
|
1387 | 1496 | AC.IsClosedWorldModule = Options.IsClosedWorld;
|
@@ -1433,6 +1542,11 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
|
1433 | 1542 | A.getOrCreateAAFor<AAAddressSpace>(
|
1434 | 1543 | IRPosition::value(*CmpX->getPointerOperand()));
|
1435 | 1544 | }
|
| 1545 | + |
| 1546 | + if (!AMDGPU::isEntryFunctionCC(F->getCallingConv())) { |
| 1547 | + for (auto &Arg : F->args()) |
| 1548 | + A.getOrCreateAAFor<AAAMDGPUUniform>(IRPosition::argument(Arg)); |
| 1549 | + } |
1436 | 1550 | }
|
1437 | 1551 | }
|
1438 | 1552 |
|
|
0 commit comments