From 2a3579ef3a1dc761ce22d5280e52edbe5758b7d2 Mon Sep 17 00:00:00 2001 From: Henry Jiang Date: Tue, 26 May 2026 12:19:04 -0700 Subject: [PATCH 1/2] [SimplifyCFG] Allow hoisting in the presence of pseudoprobes --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 13 +++ .../fold-branch-to-common-dest-pseudoprobe.ll | 102 +++++++++++++++++ .../hoist-common-skip-pseudoprobe.ll | 103 ++++++++++++++++++ 3 files changed, 218 insertions(+) create mode 100644 llvm/test/Transforms/SimplifyCFG/fold-branch-to-common-dest-pseudoprobe.ll create mode 100644 llvm/test/Transforms/SimplifyCFG/hoist-common-skip-pseudoprobe.ll diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index f1d47abe79365..307fd3fd37373 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -1169,6 +1169,11 @@ static void cloneInstructionsIntoPredecessorBlockAndUpdateSSAUses( if (BonusInst.isTerminator()) continue; + // Skip cloning pseudo probes into the predecessor, as it would overcount + // otherwise. + if (isa(BonusInst)) + continue; + Instruction *NewBonusInst = BonusInst.clone(); if (!NewBonusInst->getDebugLoc().isSameSourceLocation(PTI->getDebugLoc())) { @@ -1519,6 +1524,10 @@ enum SkipFlags { }; static unsigned skippedInstrFlags(Instruction *I) { + // Pseudo probes arm marked IntrInaccessibleMemOnly for profiling correctness, + // but other instructions can be hoisted around them. + if (isa(I)) + return 0; unsigned Flags = 0; if (I->mayReadFromMemory()) Flags |= SkipReadMem; @@ -4147,6 +4156,10 @@ bool llvm::foldBranchToCommonDest(CondBrInst *BI, DomTreeUpdater *DTU, // Ignore the terminator. if (isa(I)) continue; + // Pseudo probes are marked with IntrInaccessibleMemOnly. But it is + // profitable to fold and drop the probe. + if (isa(I)) + continue; // I must be safe to execute unconditionally. if (!isSafeToSpeculativelyExecute(&I)) return false; diff --git a/llvm/test/Transforms/SimplifyCFG/fold-branch-to-common-dest-pseudoprobe.ll b/llvm/test/Transforms/SimplifyCFG/fold-branch-to-common-dest-pseudoprobe.ll new file mode 100644 index 0000000000000..c3fafd5806823 --- /dev/null +++ b/llvm/test/Transforms/SimplifyCFG/fold-branch-to-common-dest-pseudoprobe.ll @@ -0,0 +1,102 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -passes=simplifycfg %s | FileCheck %s + +declare void @llvm.pseudoprobe(i64, i64, i32, i64) + +; Check that probe 2 does not prevent folding and is dropped +define i32 @chained_cmp(float %v, ptr %out) { +; CHECK-LABEL: @chained_cmp( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP1:%.*]] = fcmp olt float [[V:%.*]], f0x35B28000 +; CHECK-NEXT: call void @llvm.pseudoprobe(i64 -503749374543619030, i64 1, i32 0, i64 -1) +; CHECK-NEXT: [[CMP2:%.*]] = fcmp ogt float [[V]], f0xBF800001 +; CHECK-NEXT: [[OR_COND:%.*]] = and i1 [[CMP1]], [[CMP2]] +; CHECK-NEXT: br i1 [[OR_COND]], label [[IF_THEN:%.*]], label [[COMMON_RET:%.*]] +; CHECK: common.ret: +; CHECK-NEXT: [[COMMON_RET_OP:%.*]] = phi i32 [ 1, [[IF_THEN]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: ret i32 [[COMMON_RET_OP]] +; CHECK: if.then: +; CHECK-NEXT: store float [[V]], ptr [[OUT:%.*]], align 4 +; CHECK-NEXT: br label [[COMMON_RET]] +; +entry: + %cmp1 = fcmp olt float %v, 0x3EB6500000000000 + call void @llvm.pseudoprobe(i64 -503749374543619030, i64 1, i32 0, i64 -1) + br i1 %cmp1, label %land.lhs.true, label %ret_zero + +land.lhs.true: + %cmp2 = fcmp ogt float %v, 0xBFF0000020000000 + call void @llvm.pseudoprobe(i64 -503749374543619030, i64 2, i32 0, i64 -1) + br i1 %cmp2, label %if.then, label %ret_zero + +if.then: + store float %v, ptr %out, align 4 + ret i32 1 + +ret_zero: + ret i32 0 +} + +; Probe should not enable folding when another bonus instruction is not +; speculatable like udiv +define i32 @no_fold_with_unsafe_bonus(i32 %x, i32 %y, i32 %d) { +; CHECK-LABEL: @no_fold_with_unsafe_bonus( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C0:%.*]] = icmp eq i32 [[X:%.*]], 0 +; CHECK-NEXT: br i1 [[C0]], label [[BB:%.*]], label [[COMMON:%.*]] +; CHECK: bb: +; CHECK-NEXT: call void @llvm.pseudoprobe(i64 100, i64 1, i32 0, i64 -1) +; CHECK-NEXT: [[DIV:%.*]] = udiv i32 [[Y:%.*]], [[D:%.*]] +; CHECK-NEXT: [[C1:%.*]] = icmp eq i32 [[DIV]], 0 +; CHECK-NEXT: br i1 [[C1]], label [[COMMON_RET:%.*]], label [[COMMON]] +; CHECK: common.ret: +; CHECK-NEXT: [[COMMON_RET_OP:%.*]] = phi i32 [ 0, [[COMMON]] ], [ 1, [[BB]] ] +; CHECK-NEXT: ret i32 [[COMMON_RET_OP]] +; CHECK: common: +; CHECK-NEXT: br label [[COMMON_RET]] +; +entry: + %c0 = icmp eq i32 %x, 0 + br i1 %c0, label %bb, label %common + +bb: + call void @llvm.pseudoprobe(i64 100, i64 1, i32 0, i64 -1) + %div = udiv i32 %y, %d + %c1 = icmp eq i32 %div, 0 + br i1 %c1, label %if.then, label %common + +if.then: + ret i32 1 + +common: + ret i32 0 +} + +; Multiple probes in BB are all dropped on fold and predecessor probe is preserved. +define i32 @multiple_probes_in_bb(i32 %x, i32 %y) { +; CHECK-LABEL: @multiple_probes_in_bb( +; CHECK-NEXT: entry: +; CHECK-NEXT: call void @llvm.pseudoprobe(i64 200, i64 1, i32 0, i64 -1) +; CHECK-NEXT: [[C0:%.*]] = icmp eq i32 [[X:%.*]], 0 +; CHECK-NEXT: [[C1:%.*]] = icmp eq i32 [[Y:%.*]], 0 +; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[C0]], i1 [[C1]], i1 false +; CHECK-NEXT: [[COMMON_RET_OP:%.*]] = select i1 [[OR_COND]], i32 1, i32 0 +; CHECK-NEXT: ret i32 [[COMMON_RET_OP]] +; +entry: + call void @llvm.pseudoprobe(i64 200, i64 1, i32 0, i64 -1) + %c0 = icmp eq i32 %x, 0 + br i1 %c0, label %bb, label %common + +bb: + call void @llvm.pseudoprobe(i64 200, i64 2, i32 0, i64 -1) + %c1 = icmp eq i32 %y, 0 + call void @llvm.pseudoprobe(i64 200, i64 3, i32 0, i64 -1) + br i1 %c1, label %if.then, label %common + +if.then: + ret i32 1 + +common: + ret i32 0 +} diff --git a/llvm/test/Transforms/SimplifyCFG/hoist-common-skip-pseudoprobe.ll b/llvm/test/Transforms/SimplifyCFG/hoist-common-skip-pseudoprobe.ll new file mode 100644 index 0000000000000..9b5e8798072c2 --- /dev/null +++ b/llvm/test/Transforms/SimplifyCFG/hoist-common-skip-pseudoprobe.ll @@ -0,0 +1,103 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S --passes='simplifycfg' %s | FileCheck %s + +declare void @llvm.pseudoprobe(i64, i64, i32, i64) + +; Check that instructions past the pseudoprobe instrs are still hoisted into the predecessor. +define void @hoist_loads_past_pseudoprobe(i1 %c, ptr %d, ptr %m, ptr %b, i32 %v) { +; CHECK-LABEL: @hoist_loads_past_pseudoprobe( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[M:%.*]], align 2 +; CHECK-NEXT: [[CONV0:%.*]] = zext i16 [[TMP1]] to i32 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr [[M1:%.*]], align 2 +; CHECK-NEXT: [[CONV1:%.*]] = zext i16 [[TMP3]] to i32 +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then: +; CHECK-NEXT: call void @llvm.pseudoprobe(i64 1, i64 1, i32 0, i64 -1) +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[CONV0]], [[V:%.*]] +; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[ADD]], [[CONV1]] +; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[MUL]], 16 +; CHECK-NEXT: [[CONV12:%.*]] = trunc i32 [[SHR]] to i16 +; CHECK-NEXT: br label [[IF_END:%.*]] +; CHECK: if.else: +; CHECK-NEXT: call void @llvm.pseudoprobe(i64 1, i64 2, i32 0, i64 -1) +; CHECK-NEXT: [[SUB:%.*]] = sub i32 [[CONV0]], [[V]] +; CHECK-NEXT: [[MUL24:%.*]] = mul i32 [[SUB]], [[CONV1]] +; CHECK-NEXT: [[SHR25:%.*]] = lshr i32 [[MUL24]], 16 +; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[SHR25]] to i16 +; CHECK-NEXT: [[CONV27:%.*]] = sub i16 0, [[TMP2]] +; CHECK-NEXT: br label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: [[STOREMERGE:%.*]] = phi i16 [ [[CONV27]], [[IF_ELSE]] ], [ [[CONV12]], [[IF_THEN]] ] +; CHECK-NEXT: store i16 [[STOREMERGE]], ptr [[D:%.*]], align 2 +; CHECK-NEXT: ret void +; +entry: + br i1 %c, label %if.then, label %if.else + +if.then: + call void @llvm.pseudoprobe(i64 1, i64 1, i32 0, i64 -1) + %0 = load i16, ptr %b, align 2 + %conv0a = zext i16 %0 to i32 + %add = add i32 %conv0a, %v + %1 = load i16, ptr %m, align 2 + %conv1a = zext i16 %1 to i32 + %mul = mul i32 %add, %conv1a + %shr = lshr i32 %mul, 16 + %conv12 = trunc i32 %shr to i16 + br label %if.end + +if.else: + call void @llvm.pseudoprobe(i64 1, i64 2, i32 0, i64 -1) + %2 = load i16, ptr %b, align 2 + %conv0b = zext i16 %2 to i32 + %sub = sub i32 %conv0b, %v + %3 = load i16, ptr %m, align 2 + %conv1b = zext i16 %3 to i32 + %mul24 = mul i32 %sub, %conv1b + %shr25 = lshr i32 %mul24, 16 + %4 = trunc i32 %shr25 to i16 + %conv27 = sub i16 0, %4 + br label %if.end + +if.end: + %storemerge = phi i16 [ %conv27, %if.else ], [ %conv12, %if.then ] + store i16 %storemerge, ptr %d, align 2 + ret void +} + +; Stores past pseudo probes are also hoisted into the predecessor. +define void @hoist_stores_past_pseudoprobe(i1 %c, ptr %d, ptr %d2, i32 %v) { +; CHECK-LABEL: @hoist_stores_past_pseudoprobe( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i32 [[V:%.*]], ptr [[D:%.*]], align 4 +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then: +; CHECK-NEXT: call void @llvm.pseudoprobe(i64 2, i64 1, i32 0, i64 -1) +; CHECK-NEXT: store i32 0, ptr [[D2:%.*]], align 4 +; CHECK-NEXT: br label [[IF_END:%.*]] +; CHECK: if.else: +; CHECK-NEXT: call void @llvm.pseudoprobe(i64 2, i64 2, i32 0, i64 -1) +; CHECK-NEXT: store i32 1, ptr [[D2]], align 4 +; CHECK-NEXT: br label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: ret void +; +entry: + br i1 %c, label %if.then, label %if.else + +if.then: + call void @llvm.pseudoprobe(i64 2, i64 1, i32 0, i64 -1) + store i32 %v, ptr %d, align 4 + store i32 0, ptr %d2, align 4 + br label %if.end + +if.else: + call void @llvm.pseudoprobe(i64 2, i64 2, i32 0, i64 -1) + store i32 %v, ptr %d, align 4 + store i32 1, ptr %d2, align 4 + br label %if.end + +if.end: + ret void +} From 7cacef5b9afeb9a8dbea547fdb79649998aa9c5b Mon Sep 17 00:00:00 2001 From: Henry Jiang Date: Tue, 26 May 2026 13:15:31 -0700 Subject: [PATCH 2/2] update comments --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 307fd3fd37373..99954ee13a740 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -1524,8 +1524,7 @@ enum SkipFlags { }; static unsigned skippedInstrFlags(Instruction *I) { - // Pseudo probes arm marked IntrInaccessibleMemOnly for profiling correctness, - // but other instructions can be hoisted around them. + // Pseudo probes don't constrain reordering of other instructions. if (isa(I)) return 0; unsigned Flags = 0; @@ -4156,8 +4155,7 @@ bool llvm::foldBranchToCommonDest(CondBrInst *BI, DomTreeUpdater *DTU, // Ignore the terminator. if (isa(I)) continue; - // Pseudo probes are marked with IntrInaccessibleMemOnly. But it is - // profitable to fold and drop the probe. + // Pseudo probes aren't speculatable but can be dropped on fold. if (isa(I)) continue; // I must be safe to execute unconditionally.