diff --git a/include/hip/hip_runtime_api.h b/include/hip/hip_runtime_api.h index 337dd94718..675a219bdb 100644 --- a/include/hip/hip_runtime_api.h +++ b/include/hip/hip_runtime_api.h @@ -642,6 +642,7 @@ typedef struct hipIpcEventHandle_st { typedef struct ihipModule_t* hipModule_t; typedef struct ihipModuleSymbol_t* hipFunction_t; typedef struct ihipLinkState_t* hipLinkState_t; +typedef size_t (*hipOccupancyB2DSize_t)(int); /** * HIP memory pool */ @@ -6316,11 +6317,12 @@ hipError_t hipDrvLaunchKernelEx(const HIP_LAUNCH_CONFIG* config, hipFunction_t f /** * @brief determine the grid and block sizes to achieves maximum occupancy for a kernel * - * @param [out] gridSize minimum grid size for maximum potential occupancy - * @param [out] blockSize block size for maximum potential occupancy - * @param [in] f kernel function for which occupancy is calulated - * @param [in] dynSharedMemPerBlk dynamic shared memory usage (in bytes) intended for each block - * @param [in] blockSizeLimit the maximum block size for the kernel, use 0 for no limit + * @param [out] gridSize minimum grid size for maximum potential occupancy + * @param [out] blockSize block size for maximum potential occupancy + * @param [in] f kernel function for which occupancy is calulated + * @param [in] blkSizetoDynSMemSize Unary function to compute dynamic smem size to given block size + * @param [in] dynSharedMemPerBlk dynamic shared memory usage (in bytes) intended for each block + * @param [in] blockSizeLimit the maximum block size for the kernel, use 0 for no limit * * Please note, HIP does not support kernel launch with total work items defined in dimension with * size gridDim x blockDim >= 2^32. @@ -6328,18 +6330,20 @@ hipError_t hipDrvLaunchKernelEx(const HIP_LAUNCH_CONFIG* config, hipFunction_t f * @returns #hipSuccess, #hipErrorInvalidValue */ //TODO - Match CUoccupancyB2DSize -hipError_t hipModuleOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize, - hipFunction_t f, size_t dynSharedMemPerBlk, - int blockSizeLimit); +hipError_t hipModuleOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize, hipFunction_t f, + hipOccupancyB2DSize_t blkSizeToDynSMemSize, + size_t dynSharedMemPerBlk, + int blockSizeLimit); /** * @brief determine the grid and block sizes to achieves maximum occupancy for a kernel * - * @param [out] gridSize minimum grid size for maximum potential occupancy - * @param [out] blockSize block size for maximum potential occupancy - * @param [in] f kernel function for which occupancy is calulated - * @param [in] dynSharedMemPerBlk dynamic shared memory usage (in bytes) intended for each block - * @param [in] blockSizeLimit the maximum block size for the kernel, use 0 for no limit - * @param [in] flags Extra flags for occupancy calculation (only default supported) + * @param [out] gridSize minimum grid size for maximum potential occupancy + * @param [out] blockSize block size for maximum potential occupancy + * @param [in] f kernel function for which occupancy is calulated + * @param [in] blkSizetoDynSMemSize Unary function to compute dynamic smem size to given block size + * @param [in] dynSharedMemPerBlk dynamic shared memory usage (in bytes) intended for each block + * @param [in] blockSizeLimit the maximum block size for the kernel, use 0 for no limit + * @param [in] flags Extra flags for occupancy calculation (only default supported) * * Please note, HIP does not support kernel launch with total work items defined in dimension with * size gridDim x blockDim >= 2^32. @@ -6348,8 +6352,12 @@ hipError_t hipModuleOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize */ //TODO - Match CUoccupancyB2DSize hipError_t hipModuleOccupancyMaxPotentialBlockSizeWithFlags(int* gridSize, int* blockSize, - hipFunction_t f, size_t dynSharedMemPerBlk, - int blockSizeLimit, unsigned int flags); + hipFunction_t f, + hipOccupancyB2DSize_t + blkSizeToDynSMemSize, + size_t dynSharedMemPerBlk, + int blockSizeLimit, + unsigned int flags); /** * @brief Returns occupancy for a device function. *