Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
58 commits
Select commit Hold shift + click to select a range
0fe7075
chore(vcpkg): Import vanilla intel-mkl port
timniederhausen Apr 15, 2025
b0f2b9f
chore(vcpkg): Switch MKL build to lp64 + sequential
timniederhausen Apr 15, 2025
d4a281c
refactor!(core): Move include files into gprat/ subdirectory
timniederhausen Jul 11, 2025
99ce13f
refactor!(core): Move everything into the gprat namespace
timniederhausen Jul 14, 2025
52415b5
refactor!(core): Remove unnecessary prefixes/suffixes from filenames
timniederhausen Jul 20, 2025
ba2b9ac
fix(ci): Always enable lint workflows
timniederhausen Jul 20, 2025
9925866
feat(core): Support serializing AdamParams and SEKParams
timniederhausen Jul 22, 2025
d78e6e3
feat!(core): Introduce const_tile_data + mutable_tile_data
timniederhausen Jul 20, 2025
946aeed
feat(core): Add simple-to-use per-function performance counters
timniederhausen Aug 10, 2025
0374fd3
feat(core): Use NUMA-aware allocator for tile data
timniederhausen Jul 25, 2025
b7bfc77
chore(core): Consistently use std::size_t
timniederhausen Aug 11, 2025
c4a3573
feat(examples): Add command-line arguments for all algorithm parameters
timniederhausen Apr 26, 2025
8613aff
fix(examples): Don't try to write results outside of the target direc…
timniederhausen May 6, 2025
3fdf9ff
feat(core): Track function invocation count as well
timniederhausen Sep 20, 2025
fdf500e
refactor!(core): Add scheduler type and make algorithms use it
timniederhausen Sep 20, 2025
d83828f
chore: Upgrade dependencies
timniederhausen Aug 10, 2025
488d48a
chore(core): Fix issues with CUDA / nvcc under Windows
timniederhausen Nov 9, 2025
de1e8c2
feat(core): Add optional cache eviction before BLAS operation
timniederhausen Nov 9, 2025
93ce3e1
chore: Add some minimal docs on Windows support
timniederhausen Nov 23, 2025
1e6ca27
feat(core): Add basic heuristic for tile count
timniederhausen May 6, 2025
a33d7d6
fix(spack-repo): Enable HPX networking and instrumentation
timniederhausen May 28, 2025
6b77379
feat(examples): Add work-in-progress distributed version
timniederhausen May 26, 2025
c41500b
feat(examples,core): Implement distributed GP with HPX
timniederhausen Jun 23, 2026
87ec4b0
Fix compilation and tests
constracktor Jun 23, 2026
d00861c
feat(test): add unit test suite, CBLAS adapter dedup, and fp32 support
constracktor Jun 24, 2026
a6e7120
fix(build): resolve all compiler warnings in project sources
constracktor Jun 24, 2026
0a41c8d
fix(sycl): compile without warnings and all tests passing
constracktor Jun 24, 2026
333cd57
test(sycl): add SYCL GPU tests that run on actual hardware
constracktor Jun 24, 2026
60fbd2e
fix(sycl): wire up SYCL GPU dispatch for predict/cholesky/loss
constracktor Jun 24, 2026
1ecbee9
fix(distributed): correct named_make_tile and named_async dispatch
constracktor Jun 24, 2026
05a2f52
fix(sycl): SYCL_DEVICE constructor and create() use correct device by id
constracktor Jun 24, 2026
ca0825a
fix(target,sycl): make i_stream/i_queue atomic; remove dead queue slot
constracktor Jun 24, 2026
037f07c
fix(build): fix icpx detection and GPRAT_SYCL_CUDA_PATH type
constracktor Jun 24, 2026
292799b
fix(distributed): set_async, cache-before-write, managers copy, tile_…
constracktor Jun 24, 2026
418a062
chore: strip BOM and CRLF from tiled_dataset.hpp and actions.hpp
constracktor Jun 25, 2026
6ad7a8a
fix(sycl): fix all hanging and incorrect SYCL GPU tests and wire example
constracktor Jun 25, 2026
6872461
Fix python cpu
constracktor Jun 27, 2026
11f2527
Remove spack script
constracktor Jun 29, 2026
82c3e4c
fix(cpp-example): align run_gprat_cpp.sh with run_gprat_python.sh
constracktor Jun 29, 2026
868cdb8
fix: correct three bugs found in code review
constracktor Jun 30, 2026
ec797a8
refactor(test): remove _no_optimize fields, align GPU test with CPU s…
constracktor Jun 30, 2026
26ee64b
test(data): restore output.json baseline from v0.4.0
constracktor Jun 30, 2026
b756ead
chore(core): add missing blas_enums.hpp header
constracktor Jun 30, 2026
ab68093
docs: update README for current state of project
constracktor Jun 30, 2026
100d370
docs: add Tim Niederhausen as contributor for distributed GP
constracktor Jun 30, 2026
5298e32
chore: remove CTest artifact from tracking, add Testing/ to .gitignore
constracktor Jun 30, 2026
0585f76
style: apply clang-format-18 to all C++ source files
constracktor Jun 30, 2026
7ed8543
fix(cmake): resolve dangling zlib/rapidjson/otf2 link names from HPX+…
constracktor Jul 1, 2026
8271c09
fix(distributed-example): make gprat_distributed usable as a scaling …
constracktor Jul 1, 2026
debddbc
fix(distributed-example): address code review findings
constracktor Jul 1, 2026
b409a62
feat(distributed-example): default output.csv to the example directory
constracktor Jul 1, 2026
681fd61
feat(distributed-example): support multi-locality builds and runs
constracktor Jul 3, 2026
f3dce43
test(distributed): add opt-in multi-locality CTest smoke tests
constracktor Jul 3, 2026
8bae7e0
fix(test): serialize multi-locality distributed tests
constracktor Jul 3, 2026
94f8c46
test(distributed): verify computed results across localities, not jus…
constracktor Jul 3, 2026
21a5799
fix(test): share GPRAT_ROOT-aware data-directory resolution
constracktor Jul 3, 2026
ae1bd9e
style: apply clang-format-18 and cmake-format
constracktor Jul 4, 2026
bbb69cb
ci: build with distributed support and run multi-locality tests
constracktor Jul 4, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,11 @@ jobs:

- name: Choose environment
run: |
cp spack-repo/environments/spack_cpu_gcc.yaml spack.yaml
# gprat_cpu_gcc_dist (rather than gprat_cpu_gcc) builds HPX with networking=tcp instead
# of networking=none, which --hpx:localities (used by the distributed multi-locality
# tests, see test/CMakeLists.txt) requires. It has no MKL variant, so the rest of this
# job uses OpenBLAS (-DGPRAT_ENABLE_MKL=OFF / -DUSE_MKL=OFF below) instead.
cp spack-repo/environments/spack_cpu_gcc_dist.yaml spack.yaml
cat spack-repo/environments/ci_env_settings.yaml.tpl >> spack.yaml

- name: Concretize
Expand All @@ -61,7 +65,7 @@ jobs:
shell: spack-bash {0}
run: |
spack env activate .
cmake "--preset=ci-${{ matrix.os }}"
cmake "--preset=ci-${{ matrix.os }}" -DGPRAT_ENABLE_MKL=OFF -DGPRAT_WITH_DISTRIBUTED=ON -DGPRAT_TEST_MULTI_LOCALITY=ON

- name: Build
run: cmake --build build --config Release
Expand All @@ -87,7 +91,7 @@ jobs:
shell: spack-bash {0}
run: |
spack env activate .
cmake -G "Unix Makefiles" -S examples/gprat_cpp -B build_examples -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH=$PWD/prefix -DUSE_MKL=ON
cmake -G "Unix Makefiles" -S examples/gprat_cpp -B build_examples -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH=$PWD/prefix -DUSE_MKL=OFF

- name: Build example project
run: cmake --build build_examples --config Release
Expand Down
1 change: 0 additions & 1 deletion .github/workflows/lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ name: Code linting
on:
push:
branches:
- main
pull_request:

jobs:
Expand Down
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -204,5 +204,8 @@ compile_commands.json
# Build files
build*

# CTest output
Testing/

# Ignore folder
ignore
11 changes: 11 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ cmake_dependent_option(GPRAT_ENABLE_TESTS "Build unit and integration tests"

cmake_dependent_option(GPRAT_ENABLE_MKL "Enable support for Intel oneMKL"
${PROJECT_IS_TOP_LEVEL} "GPRAT_BUILD_CORE" OFF)
option(GPRAT_ENABLE_BENCHMARK_CACHE_EVICTIONS
"Evict data from caches before running BLAS operations" ON)

option(GPRAT_ENABLE_FORMAT_TARGETS "Enable clang-format / cmake-format targets"
${PROJECT_IS_TOP_LEVEL})
Expand Down Expand Up @@ -86,6 +88,7 @@ if(GPRAT_BUILD_CORE)

# HPX
find_package(HPX REQUIRED)
include(cmake/hpx-apex-compat.cmake)

# Add core subdirectiory
add_subdirectory(core)
Expand Down Expand Up @@ -122,6 +125,11 @@ if(NOT CMAKE_SKIP_INSTALL_RULES AND GPRAT_BUILD_CORE)
RENAME "${package}Config.cmake"
COMPONENT Development)

install(
FILES cmake/hpx-apex-compat.cmake
DESTINATION "${GPRat_INSTALL_CMAKEDIR}"
COMPONENT Development)

install(
FILES "${PROJECT_BINARY_DIR}/${package}ConfigVersion.cmake"
DESTINATION "${GPRat_INSTALL_CMAKEDIR}"
Expand All @@ -142,6 +150,9 @@ endif()
# ##############################################################################
if(GPRAT_ENABLE_EXAMPLES)
add_subdirectory(examples/gprat_cpp)
if(GPRAT_WITH_DISTRIBUTED)
add_subdirectory(examples/gprat_distributed)
endif()
endif()

# Tests
Expand Down
84 changes: 75 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ Leveraging the asynchronous many-task runtime HPX, we aim to combine the perform
with the ease of use of commonly available Python libraries.
Thus, GPRat can be conveniently integrated into Python projects without binding overheads or used directly with pure C++
code.
Computations run on CPUs as well as NVIDIA GPUs (CUDA) and Intel/AMD GPUs (SYCL), in single (fp32) and double (fp64)
precision.
GPRat further provides a NUMA-aware allocator for tile data, performance counters, and optional distributed execution
via HPX actions.

## Dependencies

Expand All @@ -21,6 +25,9 @@ A script to install and setup spack for `GPRat` is provided in [`spack-repo`](sp
Spack environment configurations and setup scripts for CPU and GPU use are provided in
[`spack-repo/environments`](spack-repo/environments).

Since Spack is not available on Windows, we also support dependency installation using vcpkg.
For now, vcpkg builds are only tested on Windows.

## How To Compile

GPRat makes use of [CMake presets][1] to simplify the process of configuring the project.
Expand All @@ -35,8 +42,9 @@ ctest --preset=dev-linux

As a developer, you may create a `CMakeUserPresets.json` file at the root of the project that contains additional
presets local to your machine.
In addition to the build configuration `dev-linux`, there are `release-linux`, `dev-linux-gpu`, `release-linux-gpu`, `dev-linux-sycl`, and `release-linux-sycl`.
The configurations suffixed with `-gpu` build the library with CUDA for NVIDIA GPUs, and those suffixed with `-sycl` build it with SYCL support for Intel and AMD GPUs.
In addition to the build configuration `dev-linux`, there are `release-linux`, `dev-linux-cuda`, `release-linux-cuda`, `dev-linux-sycl`, and `release-linux-sycl`.
For Windows, we have similar presets called `dev-windows` and `release-windows`.
The configurations suffixed with `-cuda` build the library with CUDA for NVIDIA GPUs, and those suffixed with `-sycl` build it with SYCL support for Intel and AMD GPUs.

GPRat can be build with or without Python bindings.
The following options can be set to include / exclude parts of the project:
Expand All @@ -45,14 +53,31 @@ The following options can be set to include / exclude parts of the project:
|--------------------------------|--------------------------------------------------------------------------------------|-----------------|
| GPRAT_BUILD_CORE | Enable/Disable building of the core library | ON |
| GPRAT_BUILD_BINDINGS | Enable/Disable building of the Python bindings | ON |
| GPRAT_ENABLE_FORMAT_TARGETS | Enable/Disable code formatting helper targets | ON if top-level |
| GPRAT_ENABLE_EXAMPLES | Enable/Disable example projects | ON if top-level |
| GPRAT_USE_MKL | Enable/Disable usage of MKL library | OFF |
| GPRAT_ENABLE_TESTS | Enable/Disable building of unit and integration tests | ON if top-level |
| GPRAT_ENABLE_FORMAT_TARGETS | Enable/Disable code formatting helper targets | ON if top-level |
| GPRAT_ENABLE_MKL | Enable/Disable support for Intel oneMKL | OFF |
| GPRAT_WITH_CUDA | Enable/disable compilation with CUDA support (NVIDIA GPUs) | OFF |
| GPRAT_WITH_SYCL | Enable/disable compilation with SYCL support (Intel and AMD GPUs via oneMath) | OFF |
| GPRAT_WITH_DISTRIBUTED | Enable/disable distributed GP support via HPX actions | OFF |
| GPRAT_APEX_STEPS | Enable/disable compilation for steps duration measurement with APEX | OFF |
| GPRAT_APEX_CHOLESKY | Enable/disable compilation for measuring cholesky assembly and computation with APEX | OFF |

A convenience script `compile_gprat.sh` is provided to configure, build, and install GPRat with a single command.
It takes five parameters:

```sh
./compile_gprat.sh [python/cpp] [cpu/cuda/sycl] [release/dev] [mkl/none] [steps/cholesky/none]
```

- `$1`: build the Python bindings (`python`) or the C++ library (`cpp`)
- `$2`: backend, CPU (`cpu`), CUDA for NVIDIA GPUs (`cuda`), or SYCL for Intel and AMD GPUs (`sycl`)
- `$3`: build in `release` or `dev` mode
- `$4`: enable Intel oneMKL (`mkl`) or use OpenBLAS (`none`)
- `$5`: APEX profiling, measure step durations (`steps`), cholesky assembly and computation (`cholesky`), or disable profiling (`none`)

Computations are supported in both single (fp32) and double (fp64) precision.

Respective scripts can be found in this directory.

We also provide a spack package for GPRat in [`spack-repo/packages`](spack-repo/packages) for portable and convenient compilation. When the repository is added to spack, GPRat can be installed with `spack install gprat~cuda~bindings~examples blas={mkl,openblas}`
Expand All @@ -68,23 +93,61 @@ implementations based on TensorFlow ([GPflow](https://github.com/GPflow/GPflow))
- Go to [`examples/gprat_cpp`](examples/gprat_cpp/)
- Set parameters in [`execute.cpp`](examples/gprat_cpp/src/execute.cpp)
- The example is built as part of the main project.
- Go to `build/` and execute `./gprat_cpp [--use_gpu]` to run the example.
- Go to `build/` and execute `./gprat_cpp [--use-gpu]` to run the example.
- If you want to use an installed GPRat version:
Run `./run_gprat_cpp.sh [cpu/gpu] [x86/arm/riscv]` to build and run the example.
Run `./run_gprat_cpp.sh [cpu/cuda/sycl] [nvidia/amd/intel]` to build and run the example.
The second parameter selects the SYCL device and is only required when GPRat was compiled with the SYCL backend.

### To run GPRat with Python

- Go to [`examples/gprat_python`](examples/gprat_python/)
- Set parameters in [`config.json`](examples/gprat_python/config.json)
- Run `./run_gprat_python.sh [cpu/gpu]` to run the example
- Run `./run_gprat_python.sh [cpu/cuda/sycl] [nvidia/amd/intel]` to run the example.
The second parameter selects the SYCL device and is only required when GPRat was compiled with the SYCL backend.

### To run the distributed GPRat benchmark

- Configure the main project with `-DGPRAT_WITH_DISTRIBUTED=ON` to build [`examples/gprat_distributed`](examples/gprat_distributed/).
- The example is a CLI-driven scaling benchmark (no `config.json`) rather than a single "run one example" tool,
since it sweeps over training-set sizes rather than running one fixed configuration.
- Go to `build/` and execute `./gprat_distributed [options]`, or run `./run_gprat_distributed.sh [options]` to
build and run it. Useful options:
- `--start`/`--end`/`--step`: training-set sizes to sweep over (e.g. `--start 128 --end 4096 --step 2`)
- `--tiles`, `--regressors`, `--n_test`, `--opt_iter`, `--loop`: problem size and repetition count
- `--enabled`: bitmask to select which of cholesky/optimize/predict/predict_with_uncertainty/predict_with_full_cov to run
- `--train_x_path`/`--train_y_path`/`--test_path`: point at a larger dataset (e.g. one generated via
[`data/generators`](data/generators/)) for a real scaling study; the defaults point at the small `data/data_1024`
correctness fixture
- `--output_csv`: where per-run timings are appended (defaults to `examples/gprat_distributed/output.csv`,
matching the other examples)
- By default (`GPRAT_DIST_MULTI_LOCALITY=1`, on unless you set it to `0` before running
`run_gprat_distributed.sh`) the script runs across multiple localities on one node: it builds a
networking-enabled binary and, for each locality count in `GPRAT_DIST_LOCALITIES` (default `"1 2 4"`),
launches that many processes itself with `--hpx:localities=N --hpx:node=0..N-1` (node `0` is the console
process that receives your CLI options; the others just join), waiting for each round to finish before
moving to the next. All arguments you pass are forwarded to node `0`. Set `GPRAT_DIST_MULTI_LOCALITY=0` to
opt back into a single-locality run against the default `gprat_cpu_gcc` build.
**Important:** HPX's TCP parcelport zero-copy path (`hpx.parcel.zero_copy_serialization_threshold`,
8192 bytes by default) reliably hangs once tile sizes exceed it in a multi-locality run, so the script
always raises it (`--hpx:ini=hpx.parcel.zero_copy_serialization_threshold=999999999`) for these runs.
Running across multiple actual nodes additionally requires cluster-specific network configuration
(AGAS bootstrap addresses, hostfile/job-scheduler integration) not set up here.
- The default Spack environment (`gprat_cpu_gcc`) builds HPX with `networking=none`, which rejects
`--hpx:localities` outright. `GPRAT_DIST_MULTI_LOCALITY=1` instead uses the `gprat_cpu_gcc_dist`
Spack environment (`networking=tcp`, OpenBLAS-only — see
`spack-repo/environments/setup_gprat_cpu_gcc_dist.sh`) and builds into a separate
`build/release-linux-dist` directory to avoid mixing the two toolchains.
- Enable `-DGPRAT_TEST_MULTI_LOCALITY=ON` (in addition to `-DGPRAT_WITH_DISTRIBUTED=ON`) to register
CTest smoke tests (`GPRat_test_distributed_multi_locality_{1,2,4}`) that launch `gprat_distributed`
across 1/2/4 localities; off by default since it needs the same networking-enabled HPX build.

### To run GPflow reference

- Go to [`examples/gpflow_reference`](examples/gpflow_reference/)
- Set parameters in [`config.json`](examples/gpflow_reference/config.json)
- Run `./run_gpflow.sh [cpu/gpu/arm]` to run example

### To run GPflow reference
### To run GPyTorch reference

- Go to [`examples/gpytorch_reference`](examples/gpytorch_reference/)
- Set parameters in [`config.json`](examples/gpytorch_reference/config.json)
Expand All @@ -107,9 +170,12 @@ We specifically thank the follow contributors:
- [Henrik Möllmann](https://www.linkedin.com/in/moellh/):
[CUDA backend via cuBLAS/cuSOLVER](tbd.).

- Marcel Graf:
- [Marcel Graf](https://github.com/MarcelGraf0710):
[SYCL backend via oneMath](tbd.).

- [Tim Niederhausen](https://github.com/timniederhausen):
[Distributed GP via HPX actions](tbd.).

## How To Cite

```
Expand Down
1 change: 0 additions & 1 deletion Testing/Temporary/CTestCostData.txt

This file was deleted.

2 changes: 1 addition & 1 deletion bindings/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# try finding pybind11
set(GPRat_pybind11_VERSION 2.10.3)
set(GPRat_pybind11_VERSION 2.13.6)
find_package(pybind11 ${GPRat_pybind11_VERSION} QUIET)
if(pybind11_FOUND)
message(STATUS "Found package pybind11.")
Expand Down
19 changes: 11 additions & 8 deletions bindings/gprat_py.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include "gprat_c.hpp"
#include "gprat/gprat.hpp"

#include <pybind11/pybind11.h>
#include <pybind11/stl.h>

Expand Down Expand Up @@ -31,19 +32,19 @@ void init_gprat(py::module &m)
// Set hyperparameters to default values in `AdamParams` class, unless
// specified. Python object has full access to each hyperparameter and a
// string representation `__repr__`.
py::class_<gprat_hyper::AdamParams>(m, "AdamParams")
py::class_<gprat::AdamParams>(m, "AdamParams")
.def(py::init<double, double, double, double, int>(),
py::arg("learning_rate") = 0.001,
py::arg("beta1") = 0.9,
py::arg("beta2") = 0.999,
py::arg("epsilon") = 1e-8,
py::arg("opt_iter") = 0)
.def_readwrite("learning_rate", &gprat_hyper::AdamParams::learning_rate)
.def_readwrite("beta1", &gprat_hyper::AdamParams::beta1)
.def_readwrite("beta2", &gprat_hyper::AdamParams::beta2)
.def_readwrite("epsilon", &gprat_hyper::AdamParams::epsilon)
.def_readwrite("opt_iter", &gprat_hyper::AdamParams::opt_iter)
.def("__repr__", &gprat_hyper::AdamParams::repr);
.def_readwrite("learning_rate", &gprat::AdamParams::learning_rate)
.def_readwrite("beta1", &gprat::AdamParams::beta1)
.def_readwrite("beta2", &gprat::AdamParams::beta2)
.def_readwrite("epsilon", &gprat::AdamParams::epsilon)
.def_readwrite("opt_iter", &gprat::AdamParams::opt_iter)
.def("__repr__", &gprat::AdamParams::repr);

// Initializes Gaussian Process with `GP` class. Sets default parameters for
// squared exponential kernel, number of regressors and trainable, unless
Expand Down Expand Up @@ -136,6 +137,8 @@ n_units to a value enables computations on the GPU.
py::arg("test_data"),
py::arg("m_tiles"),
py::arg("m_tile_size"))
.def(
"cholesky", &gprat::GP::cholesky, "Compute and return the Cholesky decomposition of the covariance matrix.")
.def("optimize", &gprat::GP::optimize, py::arg("AdamParams"))
.def("optimize_step", &gprat::GP::optimize_step, py::arg("AdamParams"), py::arg("iter"))
.def("compute_loss", &gprat::GP::calculate_loss);
Expand Down
25 changes: 13 additions & 12 deletions bindings/utils_py.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "target.hpp"
#include "utils_c.hpp"
#include "gprat/target.hpp"
#include "gprat/utils.hpp"

#include <pybind11/pybind11.h>
#include <pybind11/stl.h>

Expand Down Expand Up @@ -32,7 +33,7 @@ void start_hpx_wrapper(std::vector<std::string> args, std::size_t n_cores)
}
argv.push_back(nullptr);
int argc = static_cast<int>(args.size());
utils::start_hpx_runtime(argc, argv.data());
gprat::start_hpx_runtime(argc, argv.data());
}

/**
Expand All @@ -43,7 +44,7 @@ void start_hpx_wrapper(std::vector<std::string> args, std::size_t n_cores)
void init_utils(py::module &m)
{
m.def("compute_train_tiles",
&utils::compute_train_tiles,
&gprat::compute_train_tiles,
py::arg("n_samples"),
py::arg("n_tile_size"),
R"pbdoc(
Expand All @@ -58,7 +59,7 @@ void init_utils(py::module &m)
)pbdoc");

m.def("compute_train_tile_size",
&utils::compute_train_tile_size,
&gprat::compute_train_tile_size,
py::arg("n_samples"),
py::arg("n_tiles"),
R"pbdoc(
Expand All @@ -73,7 +74,7 @@ void init_utils(py::module &m)
)pbdoc");

m.def("compute_test_tiles",
&utils::compute_test_tiles,
&gprat::compute_test_tiles,
py::arg("m_samples"),
py::arg("n_tiles"),
py::arg("n_tile_size"),
Expand All @@ -90,20 +91,20 @@ void init_utils(py::module &m)
)pbdoc");

m.def("print_vector",
&utils::print_vector,
&gprat::print_vector,
py::arg("vec"),
py::arg("start") = 0,
py::arg("end") = -1,
py::arg("separator") = " ",
"Print elements of a vector with optional start, end, and separator parameters");

m.def("start_hpx", &start_hpx_wrapper, py::arg("args"), py::arg("n_cores")); // Using the wrapper function
m.def("resume_hpx", &utils::resume_hpx_runtime);
m.def("suspend_hpx", &utils::suspend_hpx_runtime);
m.def("stop_hpx", &utils::stop_hpx_runtime);
m.def("resume_hpx", &gprat::resume_hpx_runtime);
m.def("suspend_hpx", &gprat::suspend_hpx_runtime);
m.def("stop_hpx", &gprat::stop_hpx_runtime);

m.def("compiled_with_cuda", &utils::compiled_with_cuda, "Check if the code was compiled with CUDA support");
m.def("compiled_with_sycl", &utils::compiled_with_sycl, "Check if the code was compiled with SYCL support");
m.def("compiled_with_cuda", &gprat::compiled_with_cuda, "Check if the code was compiled with CUDA support");
m.def("compiled_with_sycl", &gprat::compiled_with_sycl, "Check if the code was compiled with SYCL support");

m.def("print_available_gpus", &gprat::print_available_gpus, "Print available GPUs with their properties");
m.def("gpu_count", &gprat::gpu_count, "Return the number of available GPUs");
Expand Down
32 changes: 32 additions & 0 deletions cmake/hpx-apex-compat.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# HPX built with +static and instrumentation=apex embeds APEX's private
# zlib/rapidjson/otf2 dependencies into HPXTargets.cmake by bare name instead of
# as proper (exported) targets. Since no target with those names exists in a
# consuming project, CMake falls back to raw "-l<name>" linker flags, which
# fail: "-lzlib" has no matching library file (real zlib produces libz, not
# libzlib) and "-lrapidjson" is header-only and never produces a library file at
# all. Defining targets with these exact names satisfies
# target_link_libraries()'s lookup before it degrades to a linker flag. This is
# purely additive: targets are only created when the real dependency can be
# found, so builds that don't hit this HPX export bug are unaffected.
if(NOT TARGET zlib)
find_package(ZLIB QUIET)
if(ZLIB_FOUND)
add_library(zlib INTERFACE IMPORTED)
target_link_libraries(zlib INTERFACE ZLIB::ZLIB)
endif()
endif()

if(NOT TARGET rapidjson)
add_library(rapidjson INTERFACE IMPORTED)
endif()

if(NOT TARGET otf2)
find_library(
GPRat_OTF2_LIBRARY
NAMES otf2
HINTS "${Otf2_ROOT}/lib")
if(GPRat_OTF2_LIBRARY)
add_library(otf2 INTERFACE IMPORTED)
target_link_libraries(otf2 INTERFACE "${GPRat_OTF2_LIBRARY}")
endif()
endif()
Loading
Loading