diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst index f2bda0d3ec0b3..fed121a7c02c2 100644 --- a/llvm/docs/AMDGPUUsage.rst +++ b/llvm/docs/AMDGPUUsage.rst @@ -2529,6 +2529,9 @@ As part of the AMDGPU MC layer, AMDGPU provides the following target-specific ``max(arg, ...)`` 1 or more Variadic signed operation that returns the maximum value of all its arguments. + ``min(arg, ...)`` 1 or more Variadic signed operation that returns the minimum + value of all its arguments + ``or(arg, ...)`` 1 or more Variadic signed operation that returns the bitwise-or result of all its arguments. diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp index 2d43019a8d66b..1786a4b498607 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp @@ -20,6 +20,7 @@ #include "llvm/MC/MCContext.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Target/TargetMachine.h" +#include "GCNSubtarget.h" #define DEBUG_TYPE "amdgpu-mc-resource-usage" @@ -302,6 +303,8 @@ void MCResourceInfo::gatherResourceInfo( } }); + const GCNSubtarget &ST = MF.getSubtarget(); + auto [MaxAllowedVGPRs, MaxAllowedAGPRs] = ST.getMaxNumVectorRegs(MF.getFunction()); auto SetMaxReg = [&](MCSymbol *MaxSym, int32_t numRegs, ResourceInfoKind RIK) { if (!FRI.HasIndirectCall) { @@ -310,11 +313,19 @@ void MCResourceInfo::gatherResourceInfo( } else { const MCExpr *SymRef = MCSymbolRefExpr::create(MaxSym, OutContext); MCSymbol *LocalNumSym = getSymbol(FnSym->getName(), RIK, OutContext); - const MCExpr *MaxWithLocal = AMDGPUMCExpr::createMax( + const MCExpr *RegExpr = AMDGPUMCExpr::createMax( {MCConstantExpr::create(numRegs, OutContext), SymRef}, OutContext); - LocalNumSym->setVariableValue(MaxWithLocal); + if(RIK == RIK_NumVGPR) { + RegExpr = AMDGPUMCExpr::createMin( + {MCConstantExpr::create(MaxAllowedVGPRs, OutContext),RegExpr},OutContext); + } + else if (RIK == RIK_NumAGPR) { + RegExpr = AMDGPUMCExpr::createMin( + {MCConstantExpr::create(MaxAllowedAGPRs, OutContext),RegExpr},OutContext); + } + LocalNumSym->setVariableValue(RegExpr); LLVM_DEBUG(dbgs() << "MCResUse: " << LocalNumSym->getName() - << ": Indirect callee within, using module maximum\n"); + << ": Indirect callee within, using minimum of module maximum and function maximum\n"); } }; diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index bcfb105137af8..0a30f09fdeb37 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -9344,7 +9344,8 @@ void AMDGPUAsmParser::onBeginOfFile() { /// Parse AMDGPU specific expressions. /// /// expr ::= or(expr, ...) | -/// max(expr, ...) +/// max(expr, ...) | +/// min(expr, ...) /// bool AMDGPUAsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { using AGVK = AMDGPUMCExpr::VariantKind; @@ -9353,6 +9354,7 @@ bool AMDGPUAsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { StringRef TokenId = getTokenStr(); AGVK VK = StringSwitch(TokenId) .Case("max", AGVK::AGVK_Max) + .Case("min", AGVK::AGVK_Min) .Case("or", AGVK::AGVK_Or) .Case("extrasgprs", AGVK::AGVK_ExtraSGPRs) .Case("totalnumvgprs", AGVK::AGVK_TotalNumVGPRs) diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.cpp index 08871f43a42b5..90ac9147b9eeb 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.cpp @@ -65,6 +65,9 @@ void AMDGPUMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const { case AGVK_Max: OS << "max("; break; + case AGVK_Min: + OS << "min("; + break; case AGVK_ExtraSGPRs: OS << "extrasgprs("; break; @@ -103,6 +106,8 @@ static int64_t op(AMDGPUMCExpr::VariantKind Kind, int64_t Arg1, int64_t Arg2) { return std::max(Arg1, Arg2); case AMDGPUMCExpr::AGVK_Or: return Arg1 | Arg2; + case AMDGPUMCExpr::AGVK_Min: + return std::min(Arg1, Arg2); } } @@ -499,6 +504,16 @@ static void targetOpKnownBitsMapHelper(const MCExpr *Expr, KnownBitsMap &KBM, KBM[Expr] = std::move(KB); return; } + case AMDGPUMCExpr::VariantKind::AGVK_Min: { + knownBitsMapHelper(AGVK->getSubExpr(0), KBM, Depth + 1); + KnownBits KB = KBM[AGVK->getSubExpr(0)]; + for (const MCExpr *Arg : AGVK->getArgs()) { + knownBitsMapHelper(Arg, KBM, Depth + 1); + KB = KnownBits::umin(KB, KBM[Arg]); + } + KBM[Expr] = std::move(KB); + return; + } case AMDGPUMCExpr::VariantKind::AGVK_ExtraSGPRs: case AMDGPUMCExpr::VariantKind::AGVK_TotalNumVGPRs: case AMDGPUMCExpr::VariantKind::AGVK_AlignTo: diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.h index 66b6fdb4b0042..33a8f5d21af4c 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.h +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.h @@ -24,8 +24,9 @@ enum class LitModifier { None, Lit, Lit64 }; /// operations are: /// - (bitwise) or /// - max +/// - min /// -/// \note If the 'or'/'max' operations are provided only a single argument, the +/// \note If the 'or'/'max'/'min' operations are provided only a single argument, the /// operation will act as a no-op and simply resolve as the provided argument. /// class AMDGPUMCExpr : public MCTargetExpr { @@ -41,6 +42,7 @@ class AMDGPUMCExpr : public MCTargetExpr { AGVK_InstPrefSize, AGVK_Lit, AGVK_Lit64, + AGVK_Min, }; // Relocation specifiers. @@ -85,6 +87,10 @@ class AMDGPUMCExpr : public MCTargetExpr { MCContext &Ctx) { return create(VariantKind::AGVK_Max, Args, Ctx); } + static const AMDGPUMCExpr *createMin(ArrayRef Args, + MCContext &Ctx) { + return create(VariantKind::AGVK_Min, Args, Ctx); + } static const AMDGPUMCExpr *createExtraSGPRs(const MCExpr *VCCUsed, const MCExpr *FlatScrUsed, diff --git a/llvm/test/CodeGen/AMDGPU/agpr-register-count.ll b/llvm/test/CodeGen/AMDGPU/agpr-register-count.ll index dffde8d25c5f6..46b449514c8bc 100644 --- a/llvm/test/CodeGen/AMDGPU/agpr-register-count.ll +++ b/llvm/test/CodeGen/AMDGPU/agpr-register-count.ll @@ -156,8 +156,8 @@ declare void @undef_func() ; GCN-LABEL: {{^}}kernel_call_undef_func: ; GCN: .amdhsa_next_free_vgpr max(totalnumvgprs(.Lkernel_call_undef_func.num_agpr, .Lkernel_call_undef_func.num_vgpr), 1, 0) ; GFX90A: .amdhsa_accum_offset (((((alignto(max(1, .Lkernel_call_undef_func.num_vgpr), 4)/4)-1)&~65536)&63)+1)*4 -; GCN: .set .Lkernel_call_undef_func.num_vgpr, max(32, amdgpu.max_num_vgpr) -; GCN: .set .Lkernel_call_undef_func.num_agpr, max(0, amdgpu.max_num_agpr) +; GCN: .set .Lkernel_call_undef_func.num_vgpr, min(128, max(32, amdgpu.max_num_vgpr)) +; GCN: .set .Lkernel_call_undef_func.num_agpr, min(128, max(0, amdgpu.max_num_agpr)) ; GCN: NumVgprs: .Lkernel_call_undef_func.num_vgpr ; GCN: NumAgprs: .Lkernel_call_undef_func.num_agpr ; GCN: TotalNumVgprs: totalnumvgprs(.Lkernel_call_undef_func.num_agpr, .Lkernel_call_undef_func.num_vgpr) diff --git a/llvm/test/CodeGen/AMDGPU/amdpal-metadata-agpr-register-count.ll b/llvm/test/CodeGen/AMDGPU/amdpal-metadata-agpr-register-count.ll index b05c65e73d734..f4c5ec78d23a7 100644 --- a/llvm/test/CodeGen/AMDGPU/amdpal-metadata-agpr-register-count.ll +++ b/llvm/test/CodeGen/AMDGPU/amdpal-metadata-agpr-register-count.ll @@ -60,7 +60,7 @@ bb: declare void @undef_func() ; CHECK: .type kernel_call_undef_func -; CHECK: .set .Lkernel_call_undef_func.num_agpr, max(0, amdgpu.max_num_agpr) +; CHECK: .set .Lkernel_call_undef_func.num_agpr, min(128, max(0, amdgpu.max_num_agpr)) ; CHECK: NumAgprs: .Lkernel_call_undef_func.num_agpr ; CHECK: .set amdgpu.max_num_agpr, 32 define amdgpu_kernel void @kernel_call_undef_func() #0 { diff --git a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-vgpr-limit.ll b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-vgpr-limit.ll index 57784651591b8..4ea5b04b12641 100644 --- a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-vgpr-limit.ll +++ b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-vgpr-limit.ll @@ -547,17 +547,17 @@ define amdgpu_kernel void @f256() #256 { attributes #256 = { nounwind "amdgpu-flat-work-group-size"="256,256" } ; GCN-LABEL: {{^}}f512: -; GFX9: .set .Lf512.num_vgpr, max(128, amdgpu.max_num_vgpr) -; GFX90A: .set .Lf512.num_vgpr, max(128, amdgpu.max_num_vgpr) -; GFX90A: .set .Lf512.num_agpr, max(128, amdgpu.max_num_agpr) -; GFX10WGP-WAVE32: .set .Lf512.num_vgpr, max(256, amdgpu.max_num_vgpr) -; GFX10WGP-WAVE64: .set .Lf512.num_vgpr, max(256, amdgpu.max_num_vgpr) -; GFX10CU-WAVE32: .set .Lf512.num_vgpr, max(128, amdgpu.max_num_vgpr) -; GFX10CU-WAVE64: .set .Lf512.num_vgpr, max(128, amdgpu.max_num_vgpr) -; GFX11WGP-WAVE32: .set .Lf512.num_vgpr, max(256, amdgpu.max_num_vgpr) -; GFX11WGP-WAVE64: .set .Lf512.num_vgpr, max(256, amdgpu.max_num_vgpr) -; GFX11CU-WAVE32: .set .Lf512.num_vgpr, max(192, amdgpu.max_num_vgpr) -; GFX11CU-WAVE64: .set .Lf512.num_vgpr, max(192, amdgpu.max_num_vgpr) +; GFX9: .set .Lf512.num_vgpr, min(128, max(128, amdgpu.max_num_vgpr)) +; GFX90A: .set .Lf512.num_vgpr, min(128, max(128, amdgpu.max_num_vgpr)) +; GFX90A: .set .Lf512.num_agpr, min(128, max(128, amdgpu.max_num_agpr)) +; GFX10WGP-WAVE32: .set .Lf512.num_vgpr, min(256, max(256, amdgpu.max_num_vgpr)) +; GFX10WGP-WAVE64: .set .Lf512.num_vgpr, min(256, max(256, amdgpu.max_num_vgpr)) +; GFX10CU-WAVE32: .set .Lf512.num_vgpr, min(128, max(128, amdgpu.max_num_vgpr)) +; GFX10CU-WAVE64: .set .Lf512.num_vgpr, min(128, max(128, amdgpu.max_num_vgpr)) +; GFX11WGP-WAVE32: .set .Lf512.num_vgpr, min(256, max(256, amdgpu.max_num_vgpr)) +; GFX11WGP-WAVE64: .set .Lf512.num_vgpr, min(256, max(256, amdgpu.max_num_vgpr)) +; GFX11CU-WAVE32: .set .Lf512.num_vgpr, min(192, max(192, amdgpu.max_num_vgpr)) +; GFX11CU-WAVE64: .set .Lf512.num_vgpr, min(192, max(192, amdgpu.max_num_vgpr)) ; GCN: NumVgprs: .Lf512.num_vgpr ; GFX90A: NumAgprs: .Lf512.num_agpr ; GFX90A: TotalNumVgprs: totalnumvgprs(.Lf512.num_agpr, .Lf512.num_vgpr) @@ -569,17 +569,17 @@ define amdgpu_kernel void @f512() #512 { attributes #512 = { nounwind "amdgpu-flat-work-group-size"="512,512" } ; GCN-LABEL: {{^}}f1024: -; GFX9: .set .Lf1024.num_vgpr, max(64, amdgpu.max_num_vgpr) -; GFX90A: .set .Lf1024.num_vgpr, max(64, amdgpu.max_num_vgpr) -; GFX90A: .set .Lf1024.num_agpr, max(64, amdgpu.max_num_agpr) -; GFX10WGP-WAVE32: .set .Lf1024.num_vgpr, max(128, amdgpu.max_num_vgpr) -; GFX10WGP-WAVE64: .set .Lf1024.num_vgpr, max(128, amdgpu.max_num_vgpr) -; GFX10CU-WAVE32: .set .Lf1024.num_vgpr, max(64, amdgpu.max_num_vgpr) -; GFX10CU-WAVE64: .set .Lf1024.num_vgpr, max(64, amdgpu.max_num_vgpr) -; GFX11WGP-WAVE32: .set .Lf1024.num_vgpr, max(192, amdgpu.max_num_vgpr) -; GFX11WGP-WAVE64: .set .Lf1024.num_vgpr, max(192, amdgpu.max_num_vgpr) -; GFX11CU-WAVE32: .set .Lf1024.num_vgpr, max(96, amdgpu.max_num_vgpr) -; GFX11CU-WAVE64: .set .Lf1024.num_vgpr, max(96, amdgpu.max_num_vgpr) +; GFX9: .set .Lf1024.num_vgpr, min(64, max(64, amdgpu.max_num_vgpr)) +; GFX90A: .set .Lf1024.num_vgpr, min(64, max(64, amdgpu.max_num_vgpr)) +; GFX90A: .set .Lf1024.num_agpr, min(64, max(64, amdgpu.max_num_agpr)) +; GFX10WGP-WAVE32: .set .Lf1024.num_vgpr, min(128, max(128, amdgpu.max_num_vgpr)) +; GFX10WGP-WAVE64: .set .Lf1024.num_vgpr, min(128, max(128, amdgpu.max_num_vgpr)) +; GFX10CU-WAVE32: .set .Lf1024.num_vgpr, min(64, max(64, amdgpu.max_num_vgpr)) +; GFX10CU-WAVE64: .set .Lf1024.num_vgpr, min(64, max(64, amdgpu.max_num_vgpr)) +; GFX11WGP-WAVE32: .set .Lf1024.num_vgpr, min(192, max(192, amdgpu.max_num_vgpr)) +; GFX11WGP-WAVE64: .set .Lf1024.num_vgpr, min(192, max(192, amdgpu.max_num_vgpr)) +; GFX11CU-WAVE32: .set .Lf1024.num_vgpr, min(96, max(96, amdgpu.max_num_vgpr)) +; GFX11CU-WAVE64: .set .Lf1024.num_vgpr, min(96, max(96, amdgpu.max_num_vgpr)) ; GCN: NumVgprs: .Lf1024.num_vgpr ; GFX90A: NumAgprs: .Lf1024.num_agpr ; GFX90A: TotalNumVgprs: totalnumvgprs(.Lf1024.num_agpr, .Lf1024.num_vgpr) diff --git a/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll b/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll index c3bcaf1808acf..54b7f806b2898 100644 --- a/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll +++ b/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll @@ -239,7 +239,7 @@ define amdgpu_kernel void @usage_direct_recursion(i32 %n) #0 { ; Make sure there's no assert when a sgpr96 is used. ; GCN-LABEL: {{^}}count_use_sgpr96_external_call ; GCN: ; sgpr96 s[{{[0-9]+}}:{{[0-9]+}}] -; GCN: .set .Lcount_use_sgpr96_external_call.num_vgpr, max(0, amdgpu.max_num_vgpr) +; GCN: .set .Lcount_use_sgpr96_external_call.num_vgpr, min(64, max(0, amdgpu.max_num_vgpr)) ; GCN: .set .Lcount_use_sgpr96_external_call.numbered_sgpr, max(33, amdgpu.max_num_sgpr) ; CI: TotalNumSgprs: .Lcount_use_sgpr96_external_call.numbered_sgpr+4 ; VI-BUG: TotalNumSgprs: 96 @@ -254,7 +254,7 @@ entry: ; Make sure there's no assert when a sgpr160 is used. ; GCN-LABEL: {{^}}count_use_sgpr160_external_call ; GCN: ; sgpr160 s[{{[0-9]+}}:{{[0-9]+}}] -; GCN: .set .Lcount_use_sgpr160_external_call.num_vgpr, max(0, amdgpu.max_num_vgpr) +; GCN: .set .Lcount_use_sgpr160_external_call.num_vgpr, min(64, max(0, amdgpu.max_num_vgpr)) ; GCN: .set .Lcount_use_sgpr160_external_call.numbered_sgpr, max(33, amdgpu.max_num_sgpr) ; CI: TotalNumSgprs: .Lcount_use_sgpr160_external_call.numbered_sgpr+4 ; VI-BUG: TotalNumSgprs: 96 @@ -269,7 +269,7 @@ entry: ; Make sure there's no assert when a vgpr160 is used. ; GCN-LABEL: {{^}}count_use_vgpr160_external_call ; GCN: ; vgpr160 v[{{[0-9]+}}:{{[0-9]+}}] -; GCN: .set .Lcount_use_vgpr160_external_call.num_vgpr, max(5, amdgpu.max_num_vgpr) +; GCN: .set .Lcount_use_vgpr160_external_call.num_vgpr, min(64, max(5, amdgpu.max_num_vgpr)) ; GCN: .set .Lcount_use_vgpr160_external_call.numbered_sgpr, max(33, amdgpu.max_num_sgpr) ; CI: TotalNumSgprs: .Lcount_use_vgpr160_external_call.numbered_sgpr+4 ; VI-BUG: TotalNumSgprs: 96 diff --git a/llvm/test/CodeGen/AMDGPU/function-resource-usage.ll b/llvm/test/CodeGen/AMDGPU/function-resource-usage.ll index 2c9778e1db5e5..d7fe355f9cf4f 100644 --- a/llvm/test/CodeGen/AMDGPU/function-resource-usage.ll +++ b/llvm/test/CodeGen/AMDGPU/function-resource-usage.ll @@ -356,8 +356,8 @@ define amdgpu_kernel void @multi_call_use_use_stack() #0 { declare void @external() #0 ; GCN-LABEL: {{^}}multi_call_with_external: -; GCN: .set .Lmulti_call_with_external.num_vgpr, max(41, amdgpu.max_num_vgpr) -; GCN: .set .Lmulti_call_with_external.num_agpr, max(0, amdgpu.max_num_agpr) +; GCN: .set .Lmulti_call_with_external.num_vgpr, min(64, max(41, amdgpu.max_num_vgpr)) +; GCN: .set .Lmulti_call_with_external.num_agpr, min(0, max(0, amdgpu.max_num_agpr)) ; GCN: .set .Lmulti_call_with_external.numbered_sgpr, max(52, amdgpu.max_num_sgpr) ; GCN: .set .Lmulti_call_with_external.private_seg_size, 0+max(.Luse_stack0.private_seg_size, .Luse_stack1.private_seg_size) ; GCN: .set .Lmulti_call_with_external.uses_vcc, 1 @@ -376,8 +376,8 @@ define amdgpu_kernel void @multi_call_with_external() #0 { } ; GCN-LABEL: {{^}}multi_call_with_external_and_duplicates: -; GCN: .set .Lmulti_call_with_external_and_duplicates.num_vgpr, max(41, amdgpu.max_num_vgpr) -; GCN: .set .Lmulti_call_with_external_and_duplicates.num_agpr, max(0, amdgpu.max_num_agpr) +; GCN: .set .Lmulti_call_with_external_and_duplicates.num_vgpr, min(64, max(41, amdgpu.max_num_vgpr)) +; GCN: .set .Lmulti_call_with_external_and_duplicates.num_agpr, min(0, max(0, amdgpu.max_num_agpr)) ; GCN: .set .Lmulti_call_with_external_and_duplicates.numbered_sgpr, max(54, amdgpu.max_num_sgpr) ; GCN: .set .Lmulti_call_with_external_and_duplicates.private_seg_size, 0+max(.Luse_stack0.private_seg_size, .Luse_stack1.private_seg_size) ; GCN: .set .Lmulti_call_with_external_and_duplicates.uses_vcc, 1 @@ -399,8 +399,8 @@ define amdgpu_kernel void @multi_call_with_external_and_duplicates() #0 { } ; GCN-LABEL: {{^}}usage_external: -; GCN: .set .Lusage_external.num_vgpr, max(32, amdgpu.max_num_vgpr) -; GCN: .set .Lusage_external.num_agpr, max(0, amdgpu.max_num_agpr) +; GCN: .set .Lusage_external.num_vgpr, min(64, max(32, amdgpu.max_num_vgpr)) +; GCN: .set .Lusage_external.num_agpr, min(0, max(0, amdgpu.max_num_agpr)) ; GCN: .set .Lusage_external.numbered_sgpr, max(33, amdgpu.max_num_sgpr) ; GCN: .set .Lusage_external.private_seg_size, 0 ; GCN: .set .Lusage_external.uses_vcc, 1 @@ -419,8 +419,8 @@ define amdgpu_kernel void @usage_external() #0 { declare void @external_recurse() #2 ; GCN-LABEL: {{^}}usage_external_recurse: -; GCN: .set .Lusage_external_recurse.num_vgpr, max(32, amdgpu.max_num_vgpr) -; GCN: .set .Lusage_external_recurse.num_agpr, max(0, amdgpu.max_num_agpr) +; GCN: .set .Lusage_external_recurse.num_vgpr, min(64, max(32, amdgpu.max_num_vgpr)) +; GCN: .set .Lusage_external_recurse.num_agpr, min(0, max(0, amdgpu.max_num_agpr)) ; GCN: .set .Lusage_external_recurse.numbered_sgpr, max(33, amdgpu.max_num_sgpr) ; GCN: .set .Lusage_external_recurse.private_seg_size, 0 ; GCN: .set .Lusage_external_recurse.uses_vcc, 1 @@ -614,8 +614,8 @@ define amdgpu_kernel void @multi_call_with_multi_stage_recurse(i32 %n) #0 { ; Make sure there's no assert when a sgpr96 is used. ; GCN-LABEL: {{^}}count_use_sgpr96_external_call -; GCN: .set .Lcount_use_sgpr96_external_call.num_vgpr, max(32, amdgpu.max_num_vgpr) -; GCN: .set .Lcount_use_sgpr96_external_call.num_agpr, max(0, amdgpu.max_num_agpr) +; GCN: .set .Lcount_use_sgpr96_external_call.num_vgpr, min(64, max(32, amdgpu.max_num_vgpr)) +; GCN: .set .Lcount_use_sgpr96_external_call.num_agpr, min(0, max(0, amdgpu.max_num_agpr)) ; GCN: .set .Lcount_use_sgpr96_external_call.numbered_sgpr, max(33, amdgpu.max_num_sgpr) ; GCN: .set .Lcount_use_sgpr96_external_call.private_seg_size, 0 ; GCN: .set .Lcount_use_sgpr96_external_call.uses_vcc, 1 @@ -635,8 +635,8 @@ entry: ; Make sure there's no assert when a sgpr160 is used. ; GCN-LABEL: {{^}}count_use_sgpr160_external_call -; GCN: .set .Lcount_use_sgpr160_external_call.num_vgpr, max(32, amdgpu.max_num_vgpr) -; GCN: .set .Lcount_use_sgpr160_external_call.num_agpr, max(0, amdgpu.max_num_agpr) +; GCN: .set .Lcount_use_sgpr160_external_call.num_vgpr, min(64, max(32, amdgpu.max_num_vgpr)) +; GCN: .set .Lcount_use_sgpr160_external_call.num_agpr, min(0, max(0, amdgpu.max_num_agpr)) ; GCN: .set .Lcount_use_sgpr160_external_call.numbered_sgpr, max(33, amdgpu.max_num_sgpr) ; GCN: .set .Lcount_use_sgpr160_external_call.private_seg_size, 0 ; GCN: .set .Lcount_use_sgpr160_external_call.uses_vcc, 1 @@ -656,8 +656,8 @@ entry: ; Make sure there's no assert when a vgpr160 is used. ; GCN-LABEL: {{^}}count_use_vgpr160_external_call -; GCN: .set .Lcount_use_vgpr160_external_call.num_vgpr, max(32, amdgpu.max_num_vgpr) -; GCN: .set .Lcount_use_vgpr160_external_call.num_agpr, max(0, amdgpu.max_num_agpr) +; GCN: .set .Lcount_use_vgpr160_external_call.num_vgpr, min(64, max(32, amdgpu.max_num_vgpr)) +; GCN: .set .Lcount_use_vgpr160_external_call.num_agpr, min(0, max(0, amdgpu.max_num_agpr)) ; GCN: .set .Lcount_use_vgpr160_external_call.numbered_sgpr, max(33, amdgpu.max_num_sgpr) ; GCN: .set .Lcount_use_vgpr160_external_call.private_seg_size, 0 ; GCN: .set .Lcount_use_vgpr160_external_call.uses_vcc, 1 diff --git a/llvm/test/CodeGen/AMDGPU/mcexpr-knownbits-assign-crash-gh-issue-110930.ll b/llvm/test/CodeGen/AMDGPU/mcexpr-knownbits-assign-crash-gh-issue-110930.ll index cd9a18571815e..1753a88e40cc5 100644 --- a/llvm/test/CodeGen/AMDGPU/mcexpr-knownbits-assign-crash-gh-issue-110930.ll +++ b/llvm/test/CodeGen/AMDGPU/mcexpr-knownbits-assign-crash-gh-issue-110930.ll @@ -6,8 +6,8 @@ ; use-after-free if the assignment operator invokes a DenseMap growth. ; CHECK-LABEL: I_Quit: -; CHECK: .set .LI_Quit.num_vgpr, max(41, amdgpu.max_num_vgpr) -; CHECK: .set .LI_Quit.num_agpr, max(0, amdgpu.max_num_agpr) +; CHECK: .set .LI_Quit.num_vgpr, min(128, max(41, amdgpu.max_num_vgpr)) +; CHECK: .set .LI_Quit.num_agpr, min(0, max(0, amdgpu.max_num_agpr)) ; CHECK: .set .LI_Quit.numbered_sgpr, max(56, amdgpu.max_num_sgpr) ; CHECK: .set .LI_Quit.private_seg_size, 16 ; CHECK: .set .LI_Quit.uses_vcc, 1 @@ -78,8 +78,8 @@ define void @P_SetThingPosition() { } ; CHECK-LABEL: P_SetupPsprites: -; CHECK: .set .LP_SetupPsprites.num_vgpr, max(41, amdgpu.max_num_vgpr) -; CHECK: .set .LP_SetupPsprites.num_agpr, max(0, amdgpu.max_num_agpr) +; CHECK: .set .LP_SetupPsprites.num_vgpr, min(128, max(41, amdgpu.max_num_vgpr)) +; CHECK: .set .LP_SetupPsprites.num_agpr, min(0, max(0, amdgpu.max_num_agpr)) ; CHECK: .set .LP_SetupPsprites.numbered_sgpr, max(56, amdgpu.max_num_sgpr) ; CHECK: .set .LP_SetupPsprites.private_seg_size, 16 ; CHECK: .set .LP_SetupPsprites.uses_vcc, 1 @@ -126,8 +126,8 @@ define void @P_SpawnPlayer() { } ; CHECK-LABEL: I_Error: -; CHECK: .set .LI_Error.num_vgpr, max(41, amdgpu.max_num_vgpr) -; CHECK: .set .LI_Error.num_agpr, max(0, amdgpu.max_num_agpr) +; CHECK: .set .LI_Error.num_vgpr, min(128, max(41, amdgpu.max_num_vgpr)) +; CHECK: .set .LI_Error.num_agpr, min(0, max(0, amdgpu.max_num_agpr)) ; CHECK: .set .LI_Error.numbered_sgpr, max(56, amdgpu.max_num_sgpr) ; CHECK: .set .LI_Error.private_seg_size, 16 ; CHECK: .set .LI_Error.uses_vcc, 1 diff --git a/llvm/test/CodeGen/AMDGPU/object-linking-local-resources.ll b/llvm/test/CodeGen/AMDGPU/object-linking-local-resources.ll index 95214bcf7c06d..5d604705e0d25 100644 --- a/llvm/test/CodeGen/AMDGPU/object-linking-local-resources.ll +++ b/llvm/test/CodeGen/AMDGPU/object-linking-local-resources.ll @@ -28,8 +28,8 @@ define amdgpu_kernel void @my_kernel(ptr %fptr) { ; COM: callee" path. Register/stack-size symbols include the module-level ; COM: sinks; boolean flags are all forced to 1; HasIndirectCall is set too ; COM: (IsIndirect covers calls to declarations). -; DEFAULT: .set .Lcalls_extern.num_vgpr, max({{[0-9]+}}, amdgpu.max_num_vgpr) -; DEFAULT: .set .Lcalls_extern.num_agpr, max({{[0-9]+}}, amdgpu.max_num_agpr) +; DEFAULT: .set .Lcalls_extern.num_vgpr, min(64, max({{[0-9]+}}, amdgpu.max_num_vgpr)) +; DEFAULT: .set .Lcalls_extern.num_agpr, min(0, max({{[0-9]+}}, amdgpu.max_num_agpr)) ; DEFAULT: .set .Lcalls_extern.numbered_sgpr, max({{[0-9]+}}, amdgpu.max_num_sgpr) ; DEFAULT: .set .Lcalls_extern.num_named_barrier, max({{[0-9]+}}, amdgpu.max_num_named_barrier) ; DEFAULT: .set .Lcalls_extern.uses_vcc, 1 diff --git a/llvm/test/MC/AMDGPU/mcexpr_amd.s b/llvm/test/MC/AMDGPU/mcexpr_amd.s index d7340bb5fd2ed..4423d56f50397 100644 --- a/llvm/test/MC/AMDGPU/mcexpr_amd.s +++ b/llvm/test/MC/AMDGPU/mcexpr_amd.s @@ -17,7 +17,6 @@ // OBJDUMP-NEXT: 000000000000000a l *ABS* 0000000000000000 max_literals // OBJDUMP-NEXT: 000000000000000f l *ABS* 0000000000000000 max_with_max_sym // OBJDUMP-NEXT: 000000000000000f l *ABS* 0000000000000000 max -// OBJDUMP-NEXT: ffffffffffffffff l *ABS* 0000000000000000 neg_one // OBJDUMP-NEXT: ffffffffffffffff l *ABS* 0000000000000000 max_neg_numbers // OBJDUMP-NEXT: ffffffffffffffff l *ABS* 0000000000000000 max_neg_number // OBJDUMP-NEXT: 0000000000000003 l *ABS* 0000000000000000 max_with_subexpr @@ -29,6 +28,24 @@ // OBJDUMP-NEXT: 8000000000000000 l *ABS* 0000000000000000 max_expr_one_min // OBJDUMP-NEXT: 0000000000000003 l *ABS* 0000000000000000 max_expr_two_min // OBJDUMP-NEXT: 0000000000989680 l *ABS* 0000000000000000 max_expr_three_min +// OBJDUMP-NEXT: 0000000000000001 l *ABS* 0000000000000000 min_expression_all +// OBJDUMP-NEXT: 0000000000000001 l *ABS* 0000000000000000 min_expression_two +// OBJDUMP-NEXT: 0000000000000003 l *ABS* 0000000000000000 min_expression_one +// OBJDUMP-NEXT: 0000000000000001 l *ABS* 0000000000000000 min_literals +// OBJDUMP-NEXT: 0000000000000000 l *ABS* 0000000000000000 min_with_min_sym +// OBJDUMP-NEXT: 0000000000000000 l *ABS* 0000000000000000 min +// OBJDUMP-NEXT: ffffffffffffffff l *ABS* 0000000000000000 neg_one +// OBJDUMP-NEXT: fffffffffffffffb l *ABS* 0000000000000000 min_neg_numbers +// OBJDUMP-NEXT: ffffffffffffffff l *ABS* 0000000000000000 min_neg_number +// OBJDUMP-NEXT: 0000000000000003 l *ABS* 0000000000000000 min_with_subexpr +// OBJDUMP-NEXT: 0000000000000004 l *ABS* 0000000000000000 min_as_subexpr +// OBJDUMP-NEXT: 0000000000000001 l *ABS* 0000000000000000 min_recursive_subexpr +// OBJDUMP-NEXT: 7fffffffffffffff l *ABS* 0000000000000000 min_expr_one_max +// OBJDUMP-NEXT: 0000000000000003 l *ABS* 0000000000000000 min_expr_two_max +// OBJDUMP-NEXT: ffffffffff676980 l *ABS* 0000000000000000 min_expr_three_max +// OBJDUMP-NEXT: 8000000000000000 l *ABS* 0000000000000000 min_expr_one_min +// OBJDUMP-NEXT: 8000000000000000 l *ABS* 0000000000000000 min_expr_two_min +// OBJDUMP-NEXT: 8000000000000000 l *ABS* 0000000000000000 min_expr_three_min // OBJDUMP-NEXT: 0000000000000007 l *ABS* 0000000000000000 or_expression_all // OBJDUMP-NEXT: 0000000000000003 l *ABS* 0000000000000000 or_expression_two // OBJDUMP-NEXT: 0000000000000001 l *ABS* 0000000000000000 or_expression_one @@ -97,6 +114,49 @@ .set max_expr_two_min, max(i64_min, three) .set max_expr_three_min, max(i64_min, three, 10000000) +// ASM: .set min_expression_all, min(1, 2, five, 3, four) +// ASM: .set min_expression_two, 1 +// ASM: .set min_expression_one, 3 +// ASM: .set min_literals, 1 +// ASM: .set min_with_min_sym, min(min, 4, 3, 1, 2) + +.set min_expression_all, min(one, two, five, three, four) +.set min_expression_two, min(one, three) +.set min_expression_one, min(three) +.set min_literals, min(1,2,3,4,5,6,7,8,9,10) +.set min_with_min_sym, min(min, 4, 3, one, two) + +// ASM: .set min_neg_numbers, -5 +// ASM: .set min_neg_number, -1 + +.set neg_one, -1 +.set min_neg_numbers, min(-5, -4, -3, -2, neg_one) +.set min_neg_number, min(neg_one) + +// ASM: .set min_with_subexpr, 3 +// ASM: .set min_as_subexpr, 1+min(4, 3, five) +// ASM: .set min_recursive_subexpr, min(min(1, four), 3, min_expression_all) + +.set min_with_subexpr, min(((one | 3) << 3) / 8) +.set min_as_subexpr, 1 + min(4, 3, five) +.set min_recursive_subexpr, min(min(one, four), three, min_expression_all) + +// ASM: .set min_expr_one_max, 9223372036854775807 +// ASM: .set min_expr_two_max, 3 +// ASM: .set min_expr_three_max, -10000000 + +.set min_expr_one_max, min(i64_max) +.set min_expr_two_max, min(i64_max, three) +.set min_expr_three_max, min(i64_max, three, -10000000) + +// ASM: .set min_expr_one_min, -9223372036854775808 +// ASM: .set min_expr_two_min, min(-9223372036854775808, five) +// ASM: .set min_expr_three_min, min(-9223372036854775808, five, 10000000) + +.set min_expr_one_min, min(i64_min) +.set min_expr_two_min, min(i64_min, five) +.set min_expr_three_min, min(i64_min, five, 10000000) + // ASM: .set or_expression_all, or(1, 2, five, 3, four) // ASM: .set or_expression_two, 3 // ASM: .set or_expression_one, 1 @@ -127,4 +187,5 @@ .set four, 4 .set five, 5 .set max, 0xF +.set min, 0x0 .set or, 0xFF diff --git a/llvm/test/MC/AMDGPU/mcexpr_amd_err.s b/llvm/test/MC/AMDGPU/mcexpr_amd_err.s index 834c6eee8c31f..e97479688c5fe 100644 --- a/llvm/test/MC/AMDGPU/mcexpr_amd_err.s +++ b/llvm/test/MC/AMDGPU/mcexpr_amd_err.s @@ -8,6 +8,10 @@ // ASM: :[[@LINE-1]]:{{[0-9]+}}: error: empty max expression // ASM: :[[@LINE-2]]:{{[0-9]+}}: error: missing expression +.set min_empty, min() +// ASM: :[[@LINE-1]]:{{[0-9]+}}: error: empty min expression +// ASM: :[[@LINE-2]]:{{[0-9]+}}: error: missing expression + .set or_empty, or() // ASM: :[[@LINE-1]]:{{[0-9]+}}: error: empty or expression // ASM: :[[@LINE-2]]:{{[0-9]+}}: error: missing expression @@ -40,6 +44,10 @@ // ASM: :[[@LINE-1]]:{{[0-9]+}}: error: unexpected token in or expression // ASM: :[[@LINE-2]]:{{[0-9]+}}: error: missing expression +.set min_expression_one, min(four,five +// ASM: :[[@LINE-1]]:{{[0-9]+}}: error: unexpected token in min expression +// ASM: :[[@LINE-2]]:{{[0-9]+}}: error: missing expression + .set max_no_lparen, max four, five) // ASM: :[[@LINE-1]]:{{[0-9]+}}: error: expected newline @@ -49,5 +57,8 @@ .set max_rparen_only, max) // ASM: :[[@LINE-1]]:{{[0-9]+}}: error: expected newline +.set min_no_lparen, min four, five) +// ASM: :[[@LINE-1]]:{{[0-9]+}}: error: expected newline + .set four, 4 .set five, 5