From a5929dc201b8dcac5ee413ab9cfdac7f5ae243be Mon Sep 17 00:00:00 2001 From: Phyzait Lu Date: Tue, 16 Jun 2026 01:47:57 +0800 Subject: [PATCH] bugfix: reuse explicit GPU IDs cyclically across MPI ranks --- src/align_tiltseries_runner.cpp | 8 +++----- src/align_tiltseries_runner.h | 1 + src/amyloid_finder.cpp | 2 +- src/amyloid_finder_mpi.cpp | 4 ++-- src/args.cpp | 28 ++++++++++++++++++++++++++++ src/args.h | 10 ++++++++++ src/autopicker.cpp | 2 +- src/autopicker_mpi.cpp | 4 ++-- src/ml_optimiser.cpp | 4 ++-- src/ml_optimiser_mpi.cpp | 6 +++--- src/motioncorr_runner.cpp | 8 +++----- src/motioncorr_runner.h | 1 + 12 files changed, 57 insertions(+), 21 deletions(-) diff --git a/src/align_tiltseries_runner.cpp b/src/align_tiltseries_runner.cpp index 93b172bc9..0a2b537e7 100644 --- a/src/align_tiltseries_runner.cpp +++ b/src/align_tiltseries_runner.cpp @@ -563,13 +563,11 @@ void AlignTiltseriesRunner::executeAreTomo(long idx_tomo, int rank) if (gpu_ids.length() > 0) { - if (rank >= allThreadIDs.size()) - REPORT_ERROR("ERROR: not enough MPI nodes specified for the GPU IDs."); - command += " -Gpu " ; - for (int igpu = 0; igpu < allThreadIDs[rank].size(); igpu++) + const std::vector &rankThreadIDs = getDeviceIDsForRank(allThreadIDs, rank); + for (int igpu = 0; igpu < rankThreadIDs.size(); igpu++) { - command += allThreadIDs[rank][igpu] + " "; + command += rankThreadIDs[igpu] + " "; } } diff --git a/src/align_tiltseries_runner.h b/src/align_tiltseries_runner.h index bc926e844..9cc4013fd 100644 --- a/src/align_tiltseries_runner.h +++ b/src/align_tiltseries_runner.h @@ -27,6 +27,7 @@ #include #include #include +#include "src/args.h" #include #include #include diff --git a/src/amyloid_finder.cpp b/src/amyloid_finder.cpp index 1f1b238ba..fa68b0ce4 100644 --- a/src/amyloid_finder.cpp +++ b/src/amyloid_finder.cpp @@ -244,7 +244,7 @@ void AmyloidFinder::deviceInitialise() untangleDeviceIDs(gpu_ids, allThreadIDs); // Sequential initialisation of GPUs on all ranks - if (!std::isdigit(*gpu_ids.begin())) + if (!hasExplicitDeviceIDs(gpu_ids)) device_id = 0; else device_id = textToInteger((allThreadIDs[0][0]).c_str()); diff --git a/src/amyloid_finder_mpi.cpp b/src/amyloid_finder_mpi.cpp index 18bb14a54..459e37808 100644 --- a/src/amyloid_finder_mpi.cpp +++ b/src/amyloid_finder_mpi.cpp @@ -53,10 +53,10 @@ void AmyloidFinderMpi::deviceInitialise() untangleDeviceIDs(gpu_ids, allThreadIDs); // Sequential initialisation of GPUs on all ranks - if (!std::isdigit(*gpu_ids.begin())) + if (!hasExplicitDeviceIDs(gpu_ids)) device_id = node->rank%devCount; else - device_id = textToInteger((allThreadIDs[node->rank][0]).c_str()); + device_id = textToInteger(getDeviceIDsForRank(allThreadIDs, node->rank)[0].c_str()); for (int follower = 0; follower < node->size; follower++) { diff --git a/src/args.cpp b/src/args.cpp index c77a472e1..76e82d6c5 100644 --- a/src/args.cpp +++ b/src/args.cpp @@ -46,6 +46,7 @@ #include "src/gcc_version.h" #include "src/matrix1d.h" #include +#include // Get parameters from the command line ==================================== std::string getParameter(int argc, char **argv, const std::string param, const std::string option) @@ -472,3 +473,30 @@ void untangleDeviceIDs(std::string &tangled, std::vector < std::vector < std::st #endif } +bool hasExplicitDeviceIDs(const std::string &gpu_ids) +{ + return gpu_ids.size() > 0 && std::isdigit(static_cast(gpu_ids[0])); +} + +const std::vector& getDeviceIDsForRank( + const std::vector < std::vector < std::string > > &allThreadIDs, + int rank) +{ + if (rank < 0) + REPORT_ERROR("Negative MPI rank requested for GPU ID assignment."); + + if (allThreadIDs.size() == 0) + REPORT_ERROR("No GPU IDs parsed from --gpu."); + + const std::vector &rankThreadIDs = allThreadIDs[rank % allThreadIDs.size()]; + if (rankThreadIDs.size() == 0) + REPORT_ERROR("No GPU IDs parsed for MPI rank from --gpu."); + + for (int i = 0; i < rankThreadIDs.size(); i++) + { + if (rankThreadIDs[i] == "") + REPORT_ERROR("Empty GPU ID parsed from --gpu."); + } + + return rankThreadIDs; +} diff --git a/src/args.h b/src/args.h index 224294dd9..1c7f30182 100644 --- a/src/args.h +++ b/src/args.h @@ -218,4 +218,14 @@ class IOParser */ void untangleDeviceIDs(std::string &tangled, std::vector < std::vector < std::string > > &untangled); +bool hasExplicitDeviceIDs(const std::string &gpu_ids); + +/* + * Returns the device IDs for an MPI rank. If fewer rank-specific GPU ID groups + * are provided than MPI ranks, the groups are reused cyclically. + */ +const std::vector& getDeviceIDsForRank( + const std::vector < std::vector < std::string > > &allThreadIDs, + int rank); + #endif diff --git a/src/autopicker.cpp b/src/autopicker.cpp index f1c2fe467..b815f0e6a 100644 --- a/src/autopicker.cpp +++ b/src/autopicker.cpp @@ -967,7 +967,7 @@ void AutoPicker::deviceInitialise() untangleDeviceIDs(gpu_ids, allThreadIDs); // Sequential initialisation of GPUs on all ranks - if (!std::isdigit(*gpu_ids.begin())) + if (!hasExplicitDeviceIDs(gpu_ids)) device_id = 0; else device_id = textToInteger((allThreadIDs[0][0]).c_str()); diff --git a/src/autopicker_mpi.cpp b/src/autopicker_mpi.cpp index 20d009b27..3db6cbb7b 100644 --- a/src/autopicker_mpi.cpp +++ b/src/autopicker_mpi.cpp @@ -54,10 +54,10 @@ void AutoPickerMpi::deviceInitialise() untangleDeviceIDs(gpu_ids, allThreadIDs); // Sequential initialisation of GPUs on all ranks - if (!std::isdigit(*gpu_ids.begin())) + if (!hasExplicitDeviceIDs(gpu_ids)) device_id = node->rank%devCount; else - device_id = textToInteger((allThreadIDs[node->rank][0]).c_str()); + device_id = textToInteger(getDeviceIDsForRank(allThreadIDs, node->rank)[0].c_str()); for (int follower = 0; follower < node->size; follower++) { diff --git a/src/ml_optimiser.cpp b/src/ml_optimiser.cpp index 1179c6374..816f391dc 100644 --- a/src/ml_optimiser.cpp +++ b/src/ml_optimiser.cpp @@ -1608,7 +1608,7 @@ void MlOptimiser::initialise() // Sequential initialisation of GPUs on all ranks bool fullAutomaticMapping(true); bool semiAutomaticMapping(true); - if (allThreadIDs[0].size()==0 || (!std::isdigit(*gpu_ids.begin())) ) + if (allThreadIDs[0].size()==0 || (!hasExplicitDeviceIDs(gpu_ids)) ) std::cout << "gpu-ids not specified, threads will automatically be mapped to devices (incrementally)."<< std::endl; else { @@ -1714,7 +1714,7 @@ void MlOptimiser::initialise() bool fullAutomaticMapping; bool semiAutomaticMapping; - if (allThreadIDs[0].size()==0 || ! std::isdigit(*gpu_ids.begin()) ) + if (allThreadIDs[0].size()==0 || !hasExplicitDeviceIDs(gpu_ids) ) { fullAutomaticMapping = true; semiAutomaticMapping = true; diff --git a/src/ml_optimiser_mpi.cpp b/src/ml_optimiser_mpi.cpp index e84666a81..397264518 100644 --- a/src/ml_optimiser_mpi.cpp +++ b/src/ml_optimiser_mpi.cpp @@ -241,7 +241,7 @@ void MlOptimiserMpi::initialise() fullAutomaticMapping = true; semiAutomaticMapping = true; // possible to set fully manual for specific ranks - if ((allThreadIDs.size()isLeader()) { - if (! std::isdigit(*gpu_ids.begin())) + if (!hasExplicitDeviceIDs(gpu_ids)) { std::cout << std::string(80, '*') << std::endl; std::cout << "GPU-ids not specified and MPI/threads will automatically be mapped to available devices."<< std::endl; @@ -626,7 +626,7 @@ will still yield good performance and possibly a more stable execution. \n" << s bool fullAutomaticMapping; bool semiAutomaticMapping; - if (allThreadIDs[node->rank-1].size()==0 || ! std::isdigit(*gpu_ids.begin()) ) + if (allThreadIDs[node->rank-1].size()==0 || !hasExplicitDeviceIDs(gpu_ids) ) { fullAutomaticMapping = true; semiAutomaticMapping = true; diff --git a/src/motioncorr_runner.cpp b/src/motioncorr_runner.cpp index fa79070a0..9ee59c5ec 100644 --- a/src/motioncorr_runner.cpp +++ b/src/motioncorr_runner.cpp @@ -636,12 +636,10 @@ bool MotioncorrRunner::executeMotioncor2(Micrograph &mic, int rank) } else { - if (rank >= allThreadIDs.size()) - REPORT_ERROR("ERROR: not enough MPI nodes specified for the GPU IDs."); - command += " -Gpu "; - for (int igpu = 0; igpu < allThreadIDs[rank].size(); igpu++) - command += allThreadIDs[rank][igpu] + " "; + const std::vector &rankThreadIDs = getDeviceIDsForRank(allThreadIDs, rank); + for (int igpu = 0; igpu < rankThreadIDs.size(); igpu++) + command += rankThreadIDs[igpu] + " "; } command += " >> " + fn_out + " 2>> " + fn_err; diff --git a/src/motioncorr_runner.h b/src/motioncorr_runner.h index c9661fc24..e4ad28f4d 100644 --- a/src/motioncorr_runner.h +++ b/src/motioncorr_runner.h @@ -27,6 +27,7 @@ #include #include #include +#include "src/args.h" #include #include "src/metadata_table.h" #include "src/image.h"