-
Notifications
You must be signed in to change notification settings - Fork 11.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[BlockPlacement] Add flag to disable profile usage #102956
Conversation
@llvm/pr-subscribers-llvm-transforms Author: Ellis Hoag (ellishg) ChangesCreate the When building with Full diff: https://github.com/llvm/llvm-project/pull/102956.diff 5 Files Affected:
diff --git a/llvm/include/llvm/Transforms/Utils/LoopPeel.h b/llvm/include/llvm/Transforms/Utils/LoopPeel.h
index 0b78700ca71bb..987c21b7ca561 100644
--- a/llvm/include/llvm/Transforms/Utils/LoopPeel.h
+++ b/llvm/include/llvm/Transforms/Utils/LoopPeel.h
@@ -37,7 +37,8 @@ gatherPeelingPreferences(Loop *L, ScalarEvolution &SE,
void computePeelCount(Loop *L, unsigned LoopSize,
TargetTransformInfo::PeelingPreferences &PP,
unsigned TripCount, DominatorTree &DT,
- ScalarEvolution &SE, AssumptionCache *AC = nullptr,
+ ScalarEvolution &SE, bool UseBranchWeights,
+ AssumptionCache *AC = nullptr,
unsigned Threshold = UINT_MAX);
} // end namespace llvm
diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index be783bc4e2973..8d5cdc9c08b7f 100644
--- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -219,6 +219,10 @@ static cl::opt<unsigned> ExtTspBlockPlacementMaxBlocks(
"block placement."),
cl::init(UINT_MAX), cl::Hidden);
+static cl::opt<bool>
+ UseProfileData("block-placement-use-profile", cl::init(true), cl::Hidden,
+ cl::desc("Use profile data to do precise benefit analysis"));
+
namespace llvm {
extern cl::opt<bool> EnableExtTspBlockPlacement;
extern cl::opt<bool> ApplyExtTspWithoutProfile;
@@ -1220,7 +1224,7 @@ bool MachineBlockPlacement::canTailDuplicateUnplacedPreds(
// If profile information is available, findDuplicateCandidates can do more
// precise benefit analysis.
- if (F->getFunction().hasProfileData())
+ if (UseProfileData && F->getFunction().hasProfileData())
return true;
// This is mainly for function exit BB.
@@ -1388,7 +1392,7 @@ void MachineBlockPlacement::precomputeTriangleChains() {
// When profile is available, we need to handle the triangle-shape CFG.
static BranchProbability getLayoutSuccessorProbThreshold(
const MachineBasicBlock *BB) {
- if (!BB->getParent()->getFunction().hasProfileData())
+ if (!UseProfileData || !BB->getParent()->getFunction().hasProfileData())
return BranchProbability(StaticLikelyProb, 100);
if (BB->succ_size() == 2) {
const MachineBasicBlock *Succ1 = *BB->succ_begin();
@@ -2621,7 +2625,8 @@ MachineBlockPlacement::collectLoopBlockSet(const MachineLoop &L) {
// will be merged into the first outer loop chain for which this block is not
// cold anymore. This needs precise profile data and we only do this when
// profile data is available.
- if (F->getFunction().hasProfileData() || ForceLoopColdBlock) {
+ if ((UseProfileData && F->getFunction().hasProfileData()) ||
+ ForceLoopColdBlock) {
BlockFrequency LoopFreq(0);
for (auto *LoopPred : L.getHeader()->predecessors())
if (!L.contains(LoopPred))
@@ -2670,8 +2675,8 @@ void MachineBlockPlacement::buildLoopChains(const MachineLoop &L) {
// this loop by modeling costs more precisely which requires the profile data
// for better layout.
bool RotateLoopWithProfile =
- ForcePreciseRotationCost ||
- (PreciseRotationCost && F->getFunction().hasProfileData());
+ ForcePreciseRotationCost || (PreciseRotationCost && UseProfileData &&
+ F->getFunction().hasProfileData());
// First check to see if there is an obviously preferable top block for the
// loop. This will default to the header, but may end up as one of the
@@ -3208,7 +3213,7 @@ bool MachineBlockPlacement::maybeTailDuplicateBlock(
bool IsSimple = TailDup.isSimpleBB(BB);
SmallVector<MachineBasicBlock *, 8> CandidatePreds;
SmallVectorImpl<MachineBasicBlock *> *CandidatePtr = nullptr;
- if (F->getFunction().hasProfileData()) {
+ if (UseProfileData && F->getFunction().hasProfileData()) {
// We can do partial duplication with precise profile information.
findDuplicateCandidates(CandidatePreds, BB, BlockFilter);
if (CandidatePreds.size() == 0)
@@ -3409,7 +3414,7 @@ void MachineBlockPlacement::findDuplicateCandidates(
void MachineBlockPlacement::initDupThreshold() {
DupThreshold = BlockFrequency(0);
- if (!F->getFunction().hasProfileData())
+ if (!UseProfileData || !F->getFunction().hasProfileData())
return;
// We prefer to use prifile count.
@@ -3529,7 +3534,8 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
// Apply a post-processing optimizing block placement.
if (MF.size() >= 3 && EnableExtTspBlockPlacement &&
- (ApplyExtTspWithoutProfile || MF.getFunction().hasProfileData()) &&
+ (ApplyExtTspWithoutProfile ||
+ (UseProfileData && MF.getFunction().hasProfileData())) &&
MF.size() <= ExtTspBlockPlacementMaxBlocks) {
// Find a new placement and modify the layout of the blocks in the function.
applyExtTsp();
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index cbc35b6dd4292..0a446851acf2d 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -179,6 +179,12 @@ static cl::opt<unsigned> PragmaUnrollFullMaxIterations(
"pragma-unroll-full-max-iterations", cl::init(1'000'000), cl::Hidden,
cl::desc("Maximum allowed iterations to unroll under pragma unroll full."));
+static cl::opt<bool>
+ UseBranchWeights("loop-unroll-use-branch-weights", cl::init(true),
+ cl::Hidden,
+ cl::desc("Estimate loop trip counts with branch weight "
+ "metadata to help determine the peel count"));
+
/// A magic value for use with the Threshold parameter to indicate
/// that the loop unroll should be performed regardless of how much
/// code expansion would result.
@@ -1012,7 +1018,8 @@ bool llvm::computeUnrollCount(
}
// 5th priority is loop peeling.
- computePeelCount(L, LoopSize, PP, TripCount, DT, SE, AC, UP.Threshold);
+ computePeelCount(L, LoopSize, PP, TripCount, DT, SE, UseBranchWeights, AC,
+ UP.Threshold);
if (PP.PeelCount) {
UP.Runtime = false;
UP.Count = 1;
@@ -1081,7 +1088,7 @@ bool llvm::computeUnrollCount(
}
// Check if the runtime trip count is too small when profile is available.
- if (L->getHeader()->getParent()->hasProfileData()) {
+ if (UseBranchWeights && L->getHeader()->getParent()->hasProfileData()) {
if (auto ProfileTripCount = getLoopEstimatedTripCount(L)) {
if (*ProfileTripCount < FlatLoopTripCountThreshold)
return false;
diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp b/llvm/lib/Transforms/Utils/LoopPeel.cpp
index 5d7c0d947facc..9557d31a122a6 100644
--- a/llvm/lib/Transforms/Utils/LoopPeel.cpp
+++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp
@@ -538,8 +538,8 @@ static bool violatesLegacyMultiExitLoopCheck(Loop *L) {
void llvm::computePeelCount(Loop *L, unsigned LoopSize,
TargetTransformInfo::PeelingPreferences &PP,
unsigned TripCount, DominatorTree &DT,
- ScalarEvolution &SE, AssumptionCache *AC,
- unsigned Threshold) {
+ ScalarEvolution &SE, bool UseBranchWeights,
+ AssumptionCache *AC, unsigned Threshold) {
assert(LoopSize > 0 && "Zero loop size is not allowed!");
// Save the PP.PeelCount value set by the target in
// TTI.getPeelingPreferences or by the flag -unroll-peel-count.
@@ -632,7 +632,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
// hit the peeled section.
// We only do this in the presence of profile information, since otherwise
// our estimates of the trip count are not reliable enough.
- if (L->getHeader()->getParent()->hasProfileData()) {
+ if (UseBranchWeights && L->getHeader()->getParent()->hasProfileData()) {
if (violatesLegacyMultiExitLoopCheck(L))
return;
std::optional<unsigned> EstimatedTripCount = getLoopEstimatedTripCount(L);
diff --git a/llvm/test/Transforms/LoopUnroll/peel-loop-conditions-pgo-1.ll b/llvm/test/Transforms/LoopUnroll/peel-loop-conditions-pgo-1.ll
index e3cfe53950f57..c7fb389c63595 100644
--- a/llvm/test/Transforms/LoopUnroll/peel-loop-conditions-pgo-1.ll
+++ b/llvm/test/Transforms/LoopUnroll/peel-loop-conditions-pgo-1.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -S -passes=loop-unroll,loop-unroll -verify-dom-info -debug-only=loop-unroll -unroll-peel-max-count=7 2>&1 | FileCheck %s
+; RUN: opt < %s -S -passes=loop-unroll,loop-unroll -verify-dom-info -debug-only=loop-unroll -unroll-peel-max-count=7 2>&1 | FileCheck %s --check-prefixes=CHECK,PGO
+; RUN: opt < %s -S -passes=loop-unroll,loop-unroll -verify-dom-info -debug-only=loop-unroll -unroll-peel-max-count=7 -loop-unroll-use-branch-weights=false 2>&1 | FileCheck %s
; REQUIRES: asserts
declare void @f1()
@@ -11,8 +12,8 @@ declare void @f2()
define void @test1(i32 %k) !prof !4 {
; CHECK: Loop Unroll: F[test1] Loop %for.body
; CHECK: PEELING loop %for.body with iteration count 2!
-; CHECK: PEELING loop %for.body with iteration count 5!
-; CHECK: llvm.loop.unroll.disable
+; PGO: PEELING loop %for.body with iteration count 5!
+; PGO: llvm.loop.unroll.disable
for.body.lr.ph:
br label %for.body
|
b141622
to
f4f826f
Compare
Create the
-block-placement-use-profile
LLVM flag to enable/disable using profiles to make decisions.When building with
-Oz
, consuming profiles can drastically increase binary size. We found-block-placement-use-profile=false
gives a slight text size win, which mitigates some of this regression.