From 072c143adb0d3d51d8931533854c6a06c85a651a Mon Sep 17 00:00:00 2001 From: Christoph Ortner Date: Sun, 14 Jun 2026 12:10:02 -0700 Subject: [PATCH 1/4] Add complex harmonics + ACEbase evaluate-interface extension --- julia/Project.toml | 12 ++- julia/ext/ACEbaseExt.jl | 41 ++++++++++ julia/src/SpheriCart.jl | 1 + julia/src/complex.jl | 117 ++++++++++++++++++++++++++++ julia/test/runtests.jl | 1 + julia/test/test_aceinterface.jl | 130 ++++++++++++++++++++++++++++++++ 6 files changed, 300 insertions(+), 2 deletions(-) create mode 100644 julia/ext/ACEbaseExt.jl create mode 100644 julia/src/complex.jl create mode 100644 julia/test/test_aceinterface.jl diff --git a/julia/Project.toml b/julia/Project.toml index 0f1ab7d53..80a59eb50 100644 --- a/julia/Project.toml +++ b/julia/Project.toml @@ -1,7 +1,7 @@ name = "SpheriCart" uuid = "5caf2b29-02d9-47a3-9434-5931c85ba645" authors = ["Christoph Ortner and contributors"] -version = "0.2.3" +version = "0.2.4" [deps] BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" @@ -12,7 +12,14 @@ KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" +[weakdeps] +ACEbase = "14bae519-eb20-449c-a949-9c58ed33163e" + +[extensions] +ACEbaseExt = "ACEbase" + [compat] +ACEbase = "0.4.7" BenchmarkTools = "1.6.0" Bumper = "0.7.0" ForwardDiff = "0.10, 1.0.1" @@ -23,6 +30,7 @@ StaticArrays = "1.9" julia = "1.10.0, 1.11.0" [extras] +ACEbase = "14bae519-eb20-449c-a949-9c58ed33163e" AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e" CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" GPUArrays = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7" @@ -34,4 +42,4 @@ Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["Test", "LinearAlgebra", "Random", "Printf", "Quadmath", "GPUArrays", "Metal", "CUDA", "AMDGPU"] +test = ["Test", "ACEbase", "LinearAlgebra", "Random", "Printf", "Quadmath", "GPUArrays", "Metal", "CUDA", "AMDGPU"] diff --git a/julia/ext/ACEbaseExt.jl b/julia/ext/ACEbaseExt.jl new file mode 100644 index 000000000..1e3e8d3f1 --- /dev/null +++ b/julia/ext/ACEbaseExt.jl @@ -0,0 +1,41 @@ +module ACEbaseExt + +# Provides the standard ACEbase evaluation interface (evaluate / evaluate_ed / +# evaluate! / evaluate_ed! / natural_indices) for the SpheriCart harmonics +# bases, by forwarding to the native `compute` API. Loading this extension +# (i.e. `using SpheriCart, ACEbase`) makes the harmonics usable through the +# shared ACEsuit interface with no Polynomials4ML dependency. + +using SpheriCart +import SpheriCart: SolidHarmonics, SphericalHarmonics, + ComplexSolidHarmonics, ComplexSphericalHarmonics, + compute, compute!, + compute_with_gradients, compute_with_gradients!, + idx2lm +import ACEbase: evaluate, evaluate_ed, evaluate!, evaluate_ed!, natural_indices + +const Harmonics = Union{SolidHarmonics, SphericalHarmonics, + ComplexSolidHarmonics, ComplexSphericalHarmonics} + +# allocating interface; the trailing args... swallow optional (ps, st) since +# the harmonics bases are parameter-free. +evaluate(basis::Harmonics, x, args...) = compute(basis, x) + +evaluate_ed(basis::Harmonics, x, args...) = compute_with_gradients(basis, x) + +# in-place interface +function evaluate!(Y, basis::Harmonics, x, args...) + compute!(Y, basis, x) + return Y +end + +function evaluate_ed!(Y, dY, basis::Harmonics, x, args...) + compute_with_gradients!(Y, dY, basis, x) + return Y, dY +end + +# the (l, m) spec, in the order the basis values are stored +natural_indices(basis::Harmonics) = + [ (l = lm[1], m = lm[2]) for lm in idx2lm.(1:length(basis)) ] + +end diff --git a/julia/src/SpheriCart.jl b/julia/src/SpheriCart.jl index da5e4363c..ff374d166 100644 --- a/julia/src/SpheriCart.jl +++ b/julia/src/SpheriCart.jl @@ -14,6 +14,7 @@ include("api.jl") include("generated_kernels.jl") include("batched_kernels.jl") include("spherical.jl") +include("complex.jl") include("ka_kernels.jl") diff --git a/julia/src/complex.jl b/julia/src/complex.jl new file mode 100644 index 000000000..38885834f --- /dev/null +++ b/julia/src/complex.jl @@ -0,0 +1,117 @@ + +export ComplexSolidHarmonics, ComplexSphericalHarmonics + +# Complex spherical / solid harmonics. These are synthesised from the real +# harmonics by the standard linear combination of the ±m components; the real +# kernels do all the work and the result is converted in place. (Moved here +# from Polynomials4ML `src/sphericart.jl::_convert_R2C!`.) + +""" +`struct ComplexSolidHarmonics` : complex solid harmonics basis, wrapping a real +`SolidHarmonics` basis of the same degree. See `SolidHarmonics` for the +constructor keyword arguments. +""" +struct ComplexSolidHarmonics{L, NORM, STATIC, TF} + realbasis::SolidHarmonics{L, NORM, STATIC, TF} +end + +""" +`struct ComplexSphericalHarmonics` : complex spherical harmonics basis, wrapping +a real `SphericalHarmonics` basis of the same degree. +""" +struct ComplexSphericalHarmonics{L, NORM, STATIC, TF} + realbasis::SphericalHarmonics{L, NORM, STATIC, TF} +end + +ComplexSolidHarmonics(L::Integer; kwargs...) = + ComplexSolidHarmonics(SolidHarmonics(L; kwargs...)) + +ComplexSphericalHarmonics(L::Integer; kwargs...) = + ComplexSphericalHarmonics(SphericalHarmonics(L; kwargs...)) + +const ComplexHarmonics{L} = + Union{ComplexSolidHarmonics{L}, ComplexSphericalHarmonics{L}} + +@inline (basis::ComplexHarmonics)(args...) = compute(basis, args...) + +# ---------------------- basis length + +Base.length(::SolidHarmonics{L}) where {L} = sizeY(L) +Base.length(::SphericalHarmonics{L}) where {L} = sizeY(L) +Base.length(::ComplexSolidHarmonics{L}) where {L} = sizeY(L) +Base.length(::ComplexSphericalHarmonics{L}) where {L} = sizeY(L) + +# ---------------------- real -> complex conversion (in place) + +function _convert_R2C!(Y::AbstractMatrix, LMAX::Integer) + Nx = size(Y, 1) + for l = 0:LMAX + # m = 0 => do nothing + # m ≠ 0 => linear combinations of the ± m terms + @inbounds for m = 1:l + i_lm⁺ = lm2idx(l, m) + i_lm⁻ = lm2idx(l, -m) + @simd ivdep for j = 1:Nx + Ylm⁺ = Y[j, i_lm⁺] + Ylm⁻ = Y[j, i_lm⁻] + Y[j, i_lm⁺] = (-1)^m * (Ylm⁺ + im * Ylm⁻) / sqrt(2) + Y[j, i_lm⁻] = (Ylm⁺ - im * Ylm⁻) / sqrt(2) + end + end + end + return Y +end + +# ---------------------- batched api + +function compute(basis::ComplexHarmonics{L}, + Rs::AbstractVector{<: SVector{3, T}}) where {L, T} + Z = similar(Rs, Complex{T}, (length(Rs), sizeY(L))) + compute!(Z, basis, Rs) + return Z +end + +function compute_with_gradients(basis::ComplexHarmonics{L}, + Rs::AbstractVector{<: SVector{3, T}}) where {L, T} + Z = similar(Rs, Complex{T}, (length(Rs), sizeY(L))) + dZ = similar(Rs, SVector{3, Complex{T}}, (length(Rs), sizeY(L))) + compute_with_gradients!(Z, dZ, basis, Rs) + return Z, dZ +end + +function compute!(Z::AbstractMatrix, basis::ComplexHarmonics{L}, + Rs::AbstractVector{<: SVector{3}}) where {L} + # the real kernel writes real values into the complex array Z + compute!(Z, basis.realbasis, Rs) + _convert_R2C!(Z, L) + return Z +end + +function compute_with_gradients!(Z::AbstractMatrix, dZ::AbstractMatrix, + basis::ComplexHarmonics{L}, + Rs::AbstractVector{<: SVector{3}}) where {L} + compute_with_gradients!(Z, dZ, basis.realbasis, Rs) + _convert_R2C!(Z, L) + _convert_R2C!(dZ, L) + return Z, dZ +end + +# ---------------------- single-input api +# routed through the batched (matrix) kernel, as in the original P4ML wrapper + +function compute(basis::ComplexHarmonics{L}, 𝐫::SVector{3, T}) where {L, T} + Z = zeros(Complex{T}, sizeY(L)) + Zmat = reshape(Z, 1, :) # a view, not a copy + compute!(Zmat, basis, SA[𝐫,]) + return Z +end + +function compute_with_gradients(basis::ComplexHarmonics{L}, + 𝐫::SVector{3, T}) where {L, T} + Z = zeros(Complex{T}, sizeY(L)) + dZ = zeros(SVector{3, Complex{T}}, sizeY(L)) + Zmat = reshape(Z, 1, :) + dZmat = reshape(dZ, 1, :) + compute_with_gradients!(Zmat, dZmat, basis, SA[𝐫,]) + return Z, dZ +end diff --git a/julia/test/runtests.jl b/julia/test/runtests.jl index 87886ffa9..320c9b203 100644 --- a/julia/test/runtests.jl +++ b/julia/test/runtests.jl @@ -6,4 +6,5 @@ using Test @testset "Spherical Harmonics" begin include("test_sphericalharmonics.jl"); end @testset "FloatX" begin include("test_f32.jl"); end @testset "KernelAbstractions" begin include("test_ka.jl"); end + @testset "ACEbase interface" begin include("test_aceinterface.jl"); end end diff --git a/julia/test/test_aceinterface.jl b/julia/test/test_aceinterface.jl new file mode 100644 index 000000000..8946a43a0 --- /dev/null +++ b/julia/test/test_aceinterface.jl @@ -0,0 +1,130 @@ +using SpheriCart, ACEbase, StaticArrays, LinearAlgebra, Test +using SpheriCart: SolidHarmonics, SphericalHarmonics, + ComplexSolidHarmonics, ComplexSphericalHarmonics, + compute, compute_with_gradients, lm2idx, sizeY +using ACEbase: evaluate, evaluate_ed, evaluate!, evaluate_ed!, natural_indices + +## + +# reference: original P4ML / ACE complex spherical harmonics up to L = 3, +# L2-normalised on the sphere. +function explicit_shs(θ, φ) + Y00 = 0.5 * sqrt(1/π) + Y1m1 = 0.5 * sqrt(3/(2*π))*sin(θ)*exp(-im*φ) + Y10 = 0.5 * sqrt(3/π)*cos(θ) + Y11 = -0.5 * sqrt(3/(2*π))*sin(θ)*exp(im*φ) + Y2m2 = 0.25 * sqrt(15/(2*π))*sin(θ)^2*exp(-2*im*φ) + Y2m1 = 0.5 * sqrt(15/(2*π))*sin(θ)*cos(θ)*exp(-im*φ) + Y20 = 0.25 * sqrt(5/π)*(3*cos(θ)^2 - 1) + Y21 = -0.5 * sqrt(15/(2*π))*sin(θ)*cos(θ)*exp(im*φ) + Y22 = 0.25 * sqrt(15/(2*π))*sin(θ)^2*exp(2*im*φ) + Y3m3 = 1/8 * exp(-3 * im * φ) * sqrt(35/π) * sin(θ)^3 + Y3m2 = 1/4 * exp(-2 * im * φ) * sqrt(105/(2*π)) * cos(θ) * sin(θ)^2 + Y3m1 = 1/8 * exp(-im * φ) * sqrt(21/π) * (-1 + 5 * cos(θ)^2) * sin(θ) + Y30 = 1/4 * sqrt(7/π) * (-3 * cos(θ) + 5 * cos(θ)^3) + Y31 = -(1/8) * exp(im * φ) * sqrt(21/π) * (-1 + 5 * cos(θ)^2) * sin(θ) + Y32 = 1/4 * exp(2 * im * φ) * sqrt(105/(2*π)) * cos(θ) * sin(θ)^2 + Y33 = -(1/8) * exp(3 * im * φ) * sqrt(35/π) * sin(θ)^3 + return [Y00, Y1m1, Y10, Y11, Y2m2, Y2m1, Y20, Y21, Y22, + Y3m3, Y3m2, Y3m1, Y30, Y31, Y32, Y33] +end + +# complex harmonics built from the real ones (the R2C convention) +function cY_from_rY(Yr, LMAX) + Yc = zeros(Complex{eltype(Yr)}, length(Yr)) + for l = 0:LMAX + Yc[lm2idx(l, 0)] = Yr[lm2idx(l, 0)] + for m = 1:l + i⁺ = lm2idx(l, m); i⁻ = lm2idx(l, -m) + Yc[i⁺] = (-1)^m * (Yr[i⁺] + im * Yr[i⁻]) / sqrt(2) + Yc[i⁻] = (Yr[i⁺] - im * Yr[i⁻]) / sqrt(2) + end + end + return Yc +end + +rand_sphere() = ( u = (@SVector randn(3)); u ./ norm(u) ) +function rand_angles() + θ = rand() * π; φ = (rand()-0.5) * 2*π + return SVector(sin(θ)*cos(φ), sin(θ)*sin(φ), cos(θ)), θ, φ +end + +## + +@testset "complex harmonics vs reference" begin + r_spher = SphericalHarmonics(3) + c_spher = ComplexSphericalHarmonics(3) + c_solid = ComplexSolidHarmonics(3) + for ntest = 1:30 + 𝐫, θ, φ = rand_angles() + Yref = explicit_shs(θ, φ) + Yc1 = cY_from_rY(r_spher(𝐫), 3) + @test Yc1 ≈ Yref + @test c_spher(𝐫) ≈ Yref + @test c_solid(𝐫) ≈ Yref # on the unit sphere solid == spherical + end +end + +@testset "complex == R2C(real), batched" begin + for (RB, CB) in [ (SolidHarmonics(6), ComplexSolidHarmonics(6)), + (SphericalHarmonics(6), ComplexSphericalHarmonics(6)) ] + R = [ (@SVector randn(3)) for _ = 1:20 ] + Yr = RB(R); Yc = CB(R) + for j = 1:length(R) + @test Yc[j, :] ≈ cY_from_rY(Yr[j, :], 6) + end + end +end + +## + +@testset "ACEbase interface == compute" begin + bases = [ SolidHarmonics(5), SphericalHarmonics(5), + ComplexSolidHarmonics(5), ComplexSphericalHarmonics(5) ] + for basis in bases + 𝐫 = rand_sphere() + R = [ rand_sphere() for _ = 1:13 ] + @test evaluate(basis, 𝐫) == compute(basis, 𝐫) + @test evaluate(basis, R) == compute(basis, R) + @test evaluate(basis, R, nothing, nothing) == compute(basis, R) + Yc, dYc = compute_with_gradients(basis, R) + Ye, dYe = evaluate_ed(basis, R) + @test Yc == Ye && dYc == dYe + # in-place + Y = similar(compute(basis, R)) + @test evaluate!(Y, basis, R) == compute(basis, R) + end +end + +@testset "natural_indices" begin + for L in (0, 3, 6) + for basis in (SolidHarmonics(L), ComplexSphericalHarmonics(L)) + spec = natural_indices(basis) + @test length(spec) == sizeY(L) == length(basis) + @test spec[1] == (l = 0, m = 0) + @test all( lm2idx(spec[i].l, spec[i].m) == i for i = 1:length(spec) ) + end + end +end + +## + +@testset "finite-difference gradients" begin + bases = [ SolidHarmonics(8), SphericalHarmonics(8), + ComplexSolidHarmonics(7), ComplexSphericalHarmonics(7) ] + for basis in bases + 𝐫 = rand_sphere() * (0.5 + rand()) + Y, ∇Y = evaluate_ed(basis, 𝐫) + @test Y ≈ evaluate(basis, 𝐫) + u = rand_sphere() + errs = Float64[] + for p = 2:10 + h = 0.1^p + dh = (evaluate(basis, 𝐫 + h*u) .- evaluate(basis, 𝐫 - h*u)) ./ (2h) + # note: plain (non-conjugating) contraction, since Y may be complex + da = [ sum(∇Y[i] .* u) for i = 1:length(Y) ] + push!(errs, norm(dh .- da, Inf)) + end + @test minimum(errs) < 1e-6 + end +end From 325886edc02a8b0cd1f5cb2bcc1c7c337a6abb89 Mon Sep 17 00:00:00 2001 From: Christoph Ortner Date: Sun, 14 Jun 2026 12:53:42 -0700 Subject: [PATCH 2/4] Add Julia test suite CI workflow --- .github/workflows/julia-tests.yml | 46 +++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 .github/workflows/julia-tests.yml diff --git a/.github/workflows/julia-tests.yml b/.github/workflows/julia-tests.yml new file mode 100644 index 000000000..4a9e6dc05 --- /dev/null +++ b/.github/workflows/julia-tests.yml @@ -0,0 +1,46 @@ +name: Julia Tests + +on: + push: + branches: [main, julia] + tags: ['*'] + paths: + - 'julia/**' + - '.github/workflows/julia-tests.yml' + pull_request: + paths: + - 'julia/**' + - '.github/workflows/julia-tests.yml' + +concurrency: + group: julia-tests-${{ github.ref }} + cancel-in-progress: ${{ github.ref != 'refs/heads/main' && github.ref != 'refs/heads/julia' }} + +jobs: + test: + name: Julia ${{ matrix.version }} - ${{ matrix.os }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + version: + - '1.10' + - '1.11' + os: + - ubuntu-latest + arch: + - x64 + steps: + - uses: actions/checkout@v4 + - uses: julia-actions/setup-julia@v2 + with: + version: ${{ matrix.version }} + arch: ${{ matrix.arch }} + - uses: julia-actions/cache@v2 + # the Julia package lives in the `julia/` subdirectory of the monorepo + - uses: julia-actions/julia-buildpkg@v1 + with: + project: julia + - uses: julia-actions/julia-runtest@v1 + with: + project: julia From ea999174cfe529e757ea4a8571ff60cf52f3b696 Mon Sep 17 00:00:00 2001 From: Christoph Ortner Date: Sun, 14 Jun 2026 12:56:21 -0700 Subject: [PATCH 3/4] Add Julia 1.12 to CI matrix --- .github/workflows/julia-tests.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/julia-tests.yml b/.github/workflows/julia-tests.yml index 4a9e6dc05..b0f93eed0 100644 --- a/.github/workflows/julia-tests.yml +++ b/.github/workflows/julia-tests.yml @@ -26,6 +26,7 @@ jobs: version: - '1.10' - '1.11' + - '1.12' os: - ubuntu-latest arch: From 469b163110ce811b47ce4442501f9eb1a907f525 Mon Sep 17 00:00:00 2001 From: Christoph Ortner Date: Sun, 14 Jun 2026 13:10:34 -0700 Subject: [PATCH 4/4] Auto-detect GPU backend in Julia KA tests (no hard-coded Metal) --- julia/Project.toml | 10 +-- julia/test/test_aceinterface.jl | 14 +-- julia/test/test_ka.jl | 149 ++++++++++++++------------------ julia/test/utils_gpu.jl | 63 ++++++++++++++ 4 files changed, 141 insertions(+), 95 deletions(-) create mode 100644 julia/test/utils_gpu.jl diff --git a/julia/Project.toml b/julia/Project.toml index 80a59eb50..ebd3e836f 100644 --- a/julia/Project.toml +++ b/julia/Project.toml @@ -31,15 +31,15 @@ julia = "1.10.0, 1.11.0" [extras] ACEbase = "14bae519-eb20-449c-a949-9c58ed33163e" -AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e" -CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" -GPUArrays = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" -Metal = "dde4c033-4e86-420c-a63e-0dd931031962" +Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" Quadmath = "be4d8f0f-7fa4-5f49-b795-2f01399ab2dd" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" +# GPU backends (CUDA / Metal / AMDGPU / oneAPI) are installed on demand by +# test/utils_gpu.jl only when a GPU is detected, so they are not listed here. + [targets] -test = ["Test", "ACEbase", "LinearAlgebra", "Random", "Printf", "Quadmath", "GPUArrays", "Metal", "CUDA", "AMDGPU"] +test = ["Test", "ACEbase", "LinearAlgebra", "Random", "Printf", "Quadmath", "Pkg"] diff --git a/julia/test/test_aceinterface.jl b/julia/test/test_aceinterface.jl index 8946a43a0..54acfdd5c 100644 --- a/julia/test/test_aceinterface.jl +++ b/julia/test/test_aceinterface.jl @@ -43,8 +43,8 @@ function cY_from_rY(Yr, LMAX) return Yc end -rand_sphere() = ( u = (@SVector randn(3)); u ./ norm(u) ) -function rand_angles() +_rand_sphere() = ( u = (@SVector randn(3)); u ./ norm(u) ) +function _rand_angles() θ = rand() * π; φ = (rand()-0.5) * 2*π return SVector(sin(θ)*cos(φ), sin(θ)*sin(φ), cos(θ)), θ, φ end @@ -56,7 +56,7 @@ end c_spher = ComplexSphericalHarmonics(3) c_solid = ComplexSolidHarmonics(3) for ntest = 1:30 - 𝐫, θ, φ = rand_angles() + 𝐫, θ, φ = _rand_angles() Yref = explicit_shs(θ, φ) Yc1 = cY_from_rY(r_spher(𝐫), 3) @test Yc1 ≈ Yref @@ -82,8 +82,8 @@ end bases = [ SolidHarmonics(5), SphericalHarmonics(5), ComplexSolidHarmonics(5), ComplexSphericalHarmonics(5) ] for basis in bases - 𝐫 = rand_sphere() - R = [ rand_sphere() for _ = 1:13 ] + 𝐫 = _rand_sphere() + R = [ _rand_sphere() for _ = 1:13 ] @test evaluate(basis, 𝐫) == compute(basis, 𝐫) @test evaluate(basis, R) == compute(basis, R) @test evaluate(basis, R, nothing, nothing) == compute(basis, R) @@ -113,10 +113,10 @@ end bases = [ SolidHarmonics(8), SphericalHarmonics(8), ComplexSolidHarmonics(7), ComplexSphericalHarmonics(7) ] for basis in bases - 𝐫 = rand_sphere() * (0.5 + rand()) + 𝐫 = _rand_sphere() * (0.5 + rand()) Y, ∇Y = evaluate_ed(basis, 𝐫) @test Y ≈ evaluate(basis, 𝐫) - u = rand_sphere() + u = _rand_sphere() errs = Float64[] for p = 2:10 h = 0.1^p diff --git a/julia/test/test_ka.jl b/julia/test/test_ka.jl index 6ef11cbf5..33f23bdae 100644 --- a/julia/test/test_ka.jl +++ b/julia/test/test_ka.jl @@ -1,35 +1,12 @@ -using SpheriCart, GPUArraysCore, StaticArrays, - LinearAlgebra, KernelAbstractions, Test +using SpheriCart, GPUArraysCore, StaticArrays, + LinearAlgebra, KernelAbstractions, Test -@info("============= Testset KernelAbstractions =============") - -## +# auto-detects a GPU backend; resolves to CPU (dev = identity) on a plain +# runner and installs no GPU package. (Mechanism adapted from EquivariantTensors.) +include(joinpath(@__DIR__, "utils_gpu.jl")) -using CUDA, Metal, AMDGPU -if CUDA.functional() - @info "Using CUDA" - CUDA.versioninfo() - gpu = CuArray -elseif AMDGPU.functional() - @info "Using AMD" - AMDGPU.versioninfo() - gpu = ROCArray -elseif Metal.functional() - @info "Using Metal" - Metal.versioninfo() - gpu = MtlArray -else - @info "No GPU is available. Using CPU." - gpu = Array -end - -## - -# uncomment for developing on Metal -using Metal -Metal.versioninfo() -gpu = MtlArray +@info("============= Testset KernelAbstractions =============") ## @@ -39,60 +16,26 @@ GRPSIZE = 8 # to be on the safe side; testing just for correctness @info("L = $L, nbatch = $nbatch") - basis = SolidHarmonics(L; T = Float32) Rs = [ @SVector randn(Float32, 3) for _=1:nbatch ] Rs = [ 𝐫 / (norm(𝐫)+2*rand(Float32)) for 𝐫 in Rs ] -Rs_gpu = gpu(Rs) -# reference calculation +# reference calculation (CPU) Z1 = compute(basis, Rs) ## -@info("test GPU-KA execution") -Z2 = gpu(zeros(Float32, size(Z1))) -SpheriCart.ka_solid_harmonics!(Z2, Val(L), Rs_gpu, basis.Flm, GRPSIZE) -@show norm(Z1 - Array(Z2), Inf) / L^2 -@test norm(Z1 - Array(Z2), Inf) / L^2 < 1e-7 - -@info("test GPU-KA execution via api") -Z2a = compute(basis, Rs_gpu) -@test Z2a isa AbstractGPUArray -@test norm(Z1 - Array(Z2a), Inf) / L^2 < 1e-7 - -## - @info("test CPU-KA execution") Z3 = zeros(Float32, size(Z1)) SpheriCart.ka_solid_harmonics!(Z3, nothing, Val{L}(), Rs, basis.Flm) @show norm(Z1 - Z3, Inf) / L^2 @test norm(Z1 - Z3, Inf) / L^2 < 1e-7 - -## - -@info("test GPU-KA gradients") -Z1, ∇Z1 = compute_with_gradients(basis, Rs) -Z4 = gpu(zeros(Float32, size(Z1))) -∇Z4 = gpu(zeros(SVector{3, Float32}, size(Z1))) -SpheriCart.ka_solid_harmonics_with_grad!(Z4, ∇Z4, Val{L}(), Rs_gpu, basis.Flm, GRPSIZE) - -Z4a, ∇Z4a = compute_with_gradients(basis, Rs_gpu) -@test Z4a isa AbstractGPUArray -@test ∇Z4a isa AbstractGPUArray - -@show norm(Z1 - Array(Z4), Inf) / L^2 -@show norm(∇Z1 - Array(∇Z4), Inf) / L^3 -@test norm(Z1 - Array(Z4), Inf) / L^2 < 1e-7 -@test norm(∇Z1 - Array(∇Z4), Inf) / L^3 < 1e-7 -@test norm(Z1 - Array(Z4a), Inf) / L^2 < 1e-7 -@test norm(∇Z1 - Array(∇Z4a), Inf) / L^3 < 1e-7 - ## @info("test CPU-KA gradients") +Z1, ∇Z1 = compute_with_gradients(basis, Rs) Z5 = zeros(Float32, size(Z1)) ∇Z5 = zeros(SVector{3, Float32}, size(Z1)) SpheriCart.ka_solid_harmonics!(Z5, ∇Z5, Val{L}(), Rs, basis.Flm) @@ -102,27 +45,67 @@ SpheriCart.ka_solid_harmonics!(Z5, ∇Z5, Val{L}(), Rs, basis.Flm) @test norm(Z1 - Array(Z5), Inf) / L^2 < 1e-7 @test norm(∇Z1 - Array(∇Z5), Inf) / L^3 < 1e-7 -## +## -@info("Test KernelAbstractions for spherical harmonics") +# the remaining tests need an actual device; they are skipped on CPU runners +# (the KA kernels themselves are already exercised on the CPU backend above). +if gpu_backend == "CPU" + @info("No GPU backend detected — skipping device KA tests.") +else -sh = SphericalHarmonics(L; T = Float32) -R̂s = [ 𝐫 / norm(𝐫) for 𝐫 in Rs ] -R̂s_gpu = gpu(R̂s) + Rs_gpu = gpu(Rs) -Y1a = compute(basis, R̂s_gpu) -Y1b = compute(sh, Rs) -Y2 = compute(sh, Rs_gpu) -@test Y2 isa AbstractGPUArray -@test norm(Y1a - Y2, Inf) / L^2 < 1e-7 -@test norm(Y1b - Array(Y2), Inf) / L^2 < 1e-7 + @info("test GPU-KA execution") + Z2 = gpu(zeros(Float32, size(Z1))) + SpheriCart.ka_solid_harmonics!(Z2, Val(L), Rs_gpu, basis.Flm, GRPSIZE) + @show norm(Z1 - Array(Z2), Inf) / L^2 + @test norm(Z1 - Array(Z2), Inf) / L^2 < 1e-7 -## + @info("test GPU-KA execution via api") + Z2a = compute(basis, Rs_gpu) + @test Z2a isa AbstractGPUArray + @test norm(Z1 - Array(Z2a), Inf) / L^2 < 1e-7 + + ## + + @info("test GPU-KA gradients") + Z4 = gpu(zeros(Float32, size(Z1))) + ∇Z4 = gpu(zeros(SVector{3, Float32}, size(Z1))) + SpheriCart.ka_solid_harmonics_with_grad!(Z4, ∇Z4, Val{L}(), Rs_gpu, basis.Flm, GRPSIZE) -Y3, ∇Y3 = compute_with_gradients(sh, Rs) -Y4, ∇Y4 = compute_with_gradients(sh, Rs_gpu) -@test Y4 isa AbstractGPUArray -@test ∇Y4 isa AbstractGPUArray -@test norm(Y3 - Array(Y4), Inf) / L^2 < 1e-7 -@test norm(∇Y3 - Array(∇Y4), Inf) / L^3 < 1e-7 + Z4a, ∇Z4a = compute_with_gradients(basis, Rs_gpu) + @test Z4a isa AbstractGPUArray + @test ∇Z4a isa AbstractGPUArray + @show norm(Z1 - Array(Z4), Inf) / L^2 + @show norm(∇Z1 - Array(∇Z4), Inf) / L^3 + @test norm(Z1 - Array(Z4), Inf) / L^2 < 1e-7 + @test norm(∇Z1 - Array(∇Z4), Inf) / L^3 < 1e-7 + @test norm(Z1 - Array(Z4a), Inf) / L^2 < 1e-7 + @test norm(∇Z1 - Array(∇Z4a), Inf) / L^3 < 1e-7 + + ## + + @info("Test KernelAbstractions for spherical harmonics") + + sh = SphericalHarmonics(L; T = Float32) + R̂s = [ 𝐫 / norm(𝐫) for 𝐫 in Rs ] + R̂s_gpu = gpu(R̂s) + + Y1a = compute(basis, R̂s_gpu) + Y1b = compute(sh, Rs) + Y2 = compute(sh, Rs_gpu) + @test Y2 isa AbstractGPUArray + @test norm(Array(Y1a) - Array(Y2), Inf) / L^2 < 1e-7 + @test norm(Y1b - Array(Y2), Inf) / L^2 < 1e-7 + + ## + + Y3, ∇Y3 = compute_with_gradients(sh, Rs) + Y4, ∇Y4 = compute_with_gradients(sh, Rs_gpu) + @test Y4 isa AbstractGPUArray + @test ∇Y4 isa AbstractGPUArray + @test norm(Y3 - Array(Y4), Inf) / L^2 < 1e-7 + @test norm(∇Y3 - Array(∇Y4), Inf) / L^3 < 1e-7 + +end diff --git a/julia/test/utils_gpu.jl b/julia/test/utils_gpu.jl new file mode 100644 index 000000000..e59372e7f --- /dev/null +++ b/julia/test/utils_gpu.jl @@ -0,0 +1,63 @@ +import Pkg + +if !isdefined(Main, :___SPHERICART_UTILS_GPU___) + + """ + detect_gpu_backend() -> String + + Pick a GPU backend by probing the *system* — no GPU package is loaded here, + so a plain CI runner resolves to `"CPU"` and installs no GPU package. Set the + `TEST_BACKEND` env var to force a choice (`"CPU"`, `"CUDA"`, `"AMDGPU"`, + `"Metal"`, `"oneAPI"`). + """ + function detect_gpu_backend() + haskey(ENV, "TEST_BACKEND") && return ENV["TEST_BACKEND"] # manual override + if Sys.isapple() && Sys.ARCH == :aarch64 + return "Metal" + elseif !isnothing(Sys.which("nvidia-smi")) && success(`nvidia-smi`) + return "CUDA" + elseif !isnothing(Sys.which("rocm-smi")) || isdir("/dev/kfd") + return "AMDGPU" + elseif !isnothing(Sys.which("sycl-ls")) # crude oneAPI probe + return "oneAPI" + else + return "CPU" + end + end + + # When a GPU is detected, install the matching backend *into the (sandboxed) + # test env* and use it; a plain CI runner resolves to "CPU" and installs + # nothing. A detected-but-unusable backend (e.g. Metal on a GPU-less macOS + # VM) degrades to CPU with a warning so the suite still runs. `gpu`/`dev` + # transfer arrays host -> device (identity on CPU). + global gpu_backend = detect_gpu_backend() + global gpu = global dev = identity + + if gpu_backend != "CPU" + try + Pkg.add(gpu_backend) # into the sandboxed test env only + @eval using $(Symbol(gpu_backend)) + if gpu_backend == "CUDA" + @assert CUDA.functional(); global gpu = global dev = CUDA.CuArray + elseif gpu_backend == "Metal" + @assert Metal.functional(); global gpu = global dev = Metal.MtlArray + elseif gpu_backend == "AMDGPU" + @assert AMDGPU.functional(); global gpu = global dev = AMDGPU.ROCArray + elseif gpu_backend == "oneAPI" + @assert oneAPI.functional(); global gpu = global dev = oneAPI.oneArray + else + error("unknown TEST_BACKEND = $(gpu_backend)") + end + @info "GPU test backend: $(gpu_backend)" + catch e + @warn "GPU backend '$(gpu_backend)' detected but not usable; using CPU." exception=(e, catch_backtrace()) + global gpu_backend = "CPU" + global gpu = global dev = identity + end + end + + gpu_backend == "CPU" && @info "GPU test backend: CPU (dev = identity)." + + global ___SPHERICART_UTILS_GPU___ = true + +end