diff --git a/fearless_simd/examples/srgb.rs b/fearless_simd/examples/srgb.rs index ca40533e1..c54c957c0 100644 --- a/fearless_simd/examples/srgb.rs +++ b/fearless_simd/examples/srgb.rs @@ -8,41 +8,41 @@ use fearless_simd::{Level, dispatch, f32x4, prelude::*}; -// This block shows how to use safe wrappers for compile-time enforcement -// of using valid SIMD intrinsics. -#[cfg(feature = "safe_wrappers")] -#[inline(always)] -fn copy_alpha(a: f32x4, b: f32x4) -> f32x4 { - // #[cfg(target_arch = "x86_64")] - // if let Some(avx2) = a.simd.level().as_avx2() { - // return avx2 - // .sse4_1 - // ._mm_blend_ps::<8>(a.into(), b.into()) - // .simd_into(a.simd); - // } - #[cfg(target_arch = "aarch64")] - if let Some(neon) = a.simd.level().as_neon() { - return neon - .neon - .vcopyq_laneq_f32::<3, 3>(a.into(), b.into()) - .simd_into(a.simd); +#[cfg(target_arch = "aarch64")] +use core::arch::aarch64::{float32x4_t, vcopyq_laneq_f32}; +#[cfg(target_arch = "x86")] +use core::arch::x86::{__m128, _mm_blend_ps}; +#[cfg(target_arch = "x86_64")] +use core::arch::x86_64::{__m128, _mm_blend_ps}; + +#[cfg(target_arch = "aarch64")] +fearless_simd::neon_kernel! { + #[inline] + fn copy_alpha_neon(a: float32x4_t, b: float32x4_t) -> float32x4_t { + vcopyq_laneq_f32::<3, 3>(a, b) + } +} + +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +fearless_simd::sse4_2_kernel! { + #[inline] + fn copy_alpha_sse4_2(a: __m128, b: __m128) -> __m128 { + _mm_blend_ps::<8>(a, b) } - let mut result = a; - result[3] = b[3]; - result } -// This block lets the example compile without safe wrappers. -#[cfg(not(feature = "safe_wrappers"))] #[inline(always)] fn copy_alpha(a: f32x4, b: f32x4) -> f32x4 { + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if let Some(sse4_2) = a.simd.level().as_sse4_2() { + return copy_alpha_sse4_2(sse4_2, a.into(), b.into()).simd_into(a.simd); + } + #[cfg(target_arch = "aarch64")] - if let Some(_neon) = a.simd.level().as_neon() { - unsafe { - return core::arch::aarch64::vcopyq_laneq_f32::<3, 3>(a.into(), b.into()) - .simd_into(a.simd); - } + if let Some(neon) = a.simd.level().as_neon() { + return copy_alpha_neon(neon, a.into(), b.into()).simd_into(a.simd); } + let mut result = a; result[3] = b[3]; result diff --git a/fearless_simd/src/generated.rs b/fearless_simd/src/generated.rs index 9d342539a..00f702ab5 100644 --- a/fearless_simd/src/generated.rs +++ b/fearless_simd/src/generated.rs @@ -47,6 +47,7 @@ #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] mod avx2; mod fallback; +mod kernel_macros; #[cfg(target_arch = "aarch64")] mod neon; mod ops; diff --git a/fearless_simd/src/generated/kernel_macros.rs b/fearless_simd/src/generated/kernel_macros.rs new file mode 100644 index 000000000..a8331e1a2 --- /dev/null +++ b/fearless_simd/src/generated/kernel_macros.rs @@ -0,0 +1,340 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// This file is autogenerated by fearless_simd_gen + +#[cfg(target_arch = "aarch64")] +#[doc = "Creates a context where you can safely call intrinsics"] +#[doc = "available at the [`Neon`](crate::Neon) SIMD level."] +#[doc = ""] +#[doc = "This is useful if the portable abstractions are not enough, and you need to"] +#[doc = "use platform-specific intrinsics for parts of the computation."] +#[doc = ""] +#[doc = "See [`Neon`](crate::Neon) for the target features represented by this SIMD level."] +#[doc = ""] +#[doc = "The generated wrapper takes a SIMD token ([`Neon`](crate::Neon)) as its first argument."] +#[doc = "The macro runs your body inside an inner function annotated with the appropriate"] +#[doc = "`#[target_feature]` attributes. That makes platform-specific intrinsics from `core::arch` or"] +#[doc = "`std::arch` safe to call in the body, as long as they do not have safety"] +#[doc = "requirements beyond those target features."] +#[doc = ""] +#[doc = "## Example"] +#[doc = ""] +#[doc = "```rust"] +#[doc = "# #[allow(unused_imports)]"] +#[doc = "use fearless_simd::{f32x4, prelude::*};"] +#[doc = "#[cfg(target_arch = \"aarch64\")]"] +#[doc = "use std::arch::aarch64::{float32x4_t, vaddq_f32};"] +#[doc = ""] +#[doc = "#[cfg(target_arch = \"aarch64\")]"] +#[doc = "fearless_simd::neon_kernel! {"] +#[doc = " fn add_f32x4(a: float32x4_t, b: float32x4_t) -> float32x4_t {"] +#[doc = " vaddq_f32(a, b)"] +#[doc = " }"] +#[doc = "}"] +#[doc = ""] +#[doc = "# fn main() {"] +#[doc = "#[cfg(target_arch = \"aarch64\")]"] +#[doc = "if let Some(neon) = fearless_simd::Level::new().as_neon() {"] +#[doc = " let a: f32x4<_> = [1.0, 2.0, 3.0, 4.0].simd_into(neon);"] +#[doc = " let b: f32x4<_> = [10.0, 20.0, 30.0, 40.0].simd_into(neon);"] +#[doc = " let sum: f32x4<_> = add_f32x4(neon, a.into(), b.into()).simd_into(neon);"] +#[doc = ""] +#[doc = " assert_eq!(<[f32; 4]>::from(sum), [11.0, 22.0, 33.0, 44.0]);"] +#[doc = "}"] +#[doc = "# }"] +#[doc = "```"] +#[doc = ""] +#[doc = "See the [sRGB example] for an end-to-end use of kernel macros."] +#[doc = ""] +#[doc = "[sRGB example]: https://github.com/linebender/fearless_simd/blob/main/fearless_simd/examples/srgb.rs"] +#[doc = ""] +#[doc = "Kernel macros only accept safe functions."] +#[doc = ""] +#[doc = "```compile_fail"] +#[doc = "fearless_simd::neon_kernel! {"] +#[doc = " unsafe fn should_not_compile() {}"] +#[doc = "}"] +#[doc = "```"] +#[macro_export] +macro_rules! neon_kernel { + ( + $(#[$meta:meta])* + $vis:vis fn $name:ident( + $($arg:ident : $arg_ty:ty),* $(,)? + ) $(-> $ret:ty)? { + $($kernel_body:tt)* + } + ) => { + #[cfg(target_arch = "aarch64")] + $(#[$meta])* + $vis fn $name( + _simd: $crate::Neon, + $($arg: $arg_ty),* + ) $(-> $ret)? { + #[inline] + #[target_feature(enable = "neon")] + fn __fearless_simd_kernel( + $($arg: $arg_ty),* + ) $(-> $ret)? { + $($kernel_body)* + } + + // SAFETY: the `Neon` token proves that the required target features are available. + unsafe { __fearless_simd_kernel($($arg),*) } + } + }; +} + +#[cfg(all(target_arch = "wasm32", target_feature = "simd128"))] +#[doc = "Creates a context where you can safely call intrinsics"] +#[doc = "available at the [`WasmSimd128`](crate::WasmSimd128) SIMD level."] +#[doc = ""] +#[doc = "This is useful if the portable abstractions are not enough, and you need to"] +#[doc = "use platform-specific intrinsics for parts of the computation."] +#[doc = ""] +#[doc = "See [`WasmSimd128`](crate::WasmSimd128) for the target features represented by this SIMD level."] +#[doc = ""] +#[doc = "The generated wrapper takes a SIMD token ([`WasmSimd128`](crate::WasmSimd128)) as its first argument and is"] +#[doc = "compiled only when the required target features are enabled. That makes matching"] +#[doc = "platform-specific intrinsics from `core::arch` or `std::arch` safe to call in the"] +#[doc = "body, as long as they do not have safety requirements beyond those target features."] +#[doc = ""] +#[doc = "## Example"] +#[doc = ""] +#[doc = "```rust"] +#[doc = "# #[allow(unused_imports)]"] +#[doc = "use fearless_simd::{f32x4, prelude::*};"] +#[doc = "#[cfg(all(target_arch = \"wasm32\", target_feature = \"simd128\"))]"] +#[doc = "use std::arch::wasm32::{f32x4_add, v128};"] +#[doc = ""] +#[doc = "#[cfg(all(target_arch = \"wasm32\", target_feature = \"simd128\"))]"] +#[doc = "fearless_simd::wasm_simd128_kernel! {"] +#[doc = " fn add_f32x4(a: v128, b: v128) -> v128 {"] +#[doc = " f32x4_add(a, b)"] +#[doc = " }"] +#[doc = "}"] +#[doc = ""] +#[doc = "# fn main() {"] +#[doc = "#[cfg(all(target_arch = \"wasm32\", target_feature = \"simd128\"))]"] +#[doc = "{"] +#[doc = " if let Some(wasm) = fearless_simd::Level::new().as_wasm_simd128() {"] +#[doc = " let a: f32x4<_> = [1.0, 2.0, 3.0, 4.0].simd_into(wasm);"] +#[doc = " let b: f32x4<_> = [10.0, 20.0, 30.0, 40.0].simd_into(wasm);"] +#[doc = " let sum: f32x4<_> = add_f32x4(wasm, a.into(), b.into()).simd_into(wasm);"] +#[doc = ""] +#[doc = " assert_eq!(<[f32; 4]>::from(sum), [11.0, 22.0, 33.0, 44.0]);"] +#[doc = " }"] +#[doc = "}"] +#[doc = "# }"] +#[doc = "```"] +#[doc = ""] +#[doc = "See the [sRGB example] for an end-to-end use of kernel macros."] +#[doc = ""] +#[doc = "[sRGB example]: https://github.com/linebender/fearless_simd/blob/main/fearless_simd/examples/srgb.rs"] +#[doc = ""] +#[doc = "Kernel macros only accept safe functions."] +#[doc = ""] +#[doc = "```compile_fail"] +#[doc = "fearless_simd::wasm_simd128_kernel! {"] +#[doc = " unsafe fn should_not_compile() {}"] +#[doc = "}"] +#[doc = "```"] +#[macro_export] +macro_rules! wasm_simd128_kernel { + ( + $(#[$meta:meta])* + $vis:vis fn $name:ident( + $($arg:ident : $arg_ty:ty),* $(,)? + ) $(-> $ret:ty)? { + $($kernel_body:tt)* + } + ) => { + #[cfg(all(target_arch = "wasm32", target_feature = "simd128"))] + $(#[$meta])* + $vis fn $name( + _simd: $crate::WasmSimd128, + $($arg: $arg_ty),* + ) $(-> $ret)? { + #[inline] + fn __fearless_simd_kernel( + $($arg: $arg_ty),* + ) $(-> $ret)? { + $($kernel_body)* + } + + let _ = _simd; + __fearless_simd_kernel($($arg),*) + } + }; +} + +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +#[doc = "Creates a context where you can safely call intrinsics"] +#[doc = "available at the [`Sse4_2`](crate::Sse4_2) SIMD level."] +#[doc = ""] +#[doc = "This is useful if the portable abstractions are not enough, and you need to"] +#[doc = "use platform-specific intrinsics for parts of the computation."] +#[doc = ""] +#[doc = "See [`Sse4_2`](crate::Sse4_2) for the target features represented by this SIMD level."] +#[doc = ""] +#[doc = "The generated wrapper takes a SIMD token ([`Sse4_2`](crate::Sse4_2)) as its first argument."] +#[doc = "The macro runs your body inside an inner function annotated with the appropriate"] +#[doc = "`#[target_feature]` attributes. That makes platform-specific intrinsics from `core::arch` or"] +#[doc = "`std::arch` safe to call in the body, as long as they do not have safety"] +#[doc = "requirements beyond those target features."] +#[doc = ""] +#[doc = "## Example"] +#[doc = ""] +#[doc = "```rust"] +#[doc = "# #[allow(unused_imports)]"] +#[doc = "use fearless_simd::{f32x4, prelude::*};"] +#[doc = "#[cfg(target_arch = \"x86\")]"] +#[doc = "use std::arch::x86::{__m128, _mm_add_ps};"] +#[doc = "#[cfg(target_arch = \"x86_64\")]"] +#[doc = "use std::arch::x86_64::{__m128, _mm_add_ps};"] +#[doc = ""] +#[doc = "#[cfg(any(target_arch = \"x86\", target_arch = \"x86_64\"))]"] +#[doc = "fearless_simd::sse4_2_kernel! {"] +#[doc = " fn add_f32x4(a: __m128, b: __m128) -> __m128 {"] +#[doc = " _mm_add_ps(a, b)"] +#[doc = " }"] +#[doc = "}"] +#[doc = ""] +#[doc = "# fn main() {"] +#[doc = "#[cfg(any(target_arch = \"x86\", target_arch = \"x86_64\"))]"] +#[doc = "if let Some(sse4_2) = fearless_simd::Level::new().as_sse4_2() {"] +#[doc = " let a: f32x4<_> = [1.0, 2.0, 3.0, 4.0].simd_into(sse4_2);"] +#[doc = " let b: f32x4<_> = [10.0, 20.0, 30.0, 40.0].simd_into(sse4_2);"] +#[doc = " let sum: f32x4<_> = add_f32x4(sse4_2, a.into(), b.into()).simd_into(sse4_2);"] +#[doc = ""] +#[doc = " assert_eq!(<[f32; 4]>::from(sum), [11.0, 22.0, 33.0, 44.0]);"] +#[doc = "}"] +#[doc = "# }"] +#[doc = "```"] +#[doc = ""] +#[doc = "See the [sRGB example] for an end-to-end use of kernel macros."] +#[doc = ""] +#[doc = "[sRGB example]: https://github.com/linebender/fearless_simd/blob/main/fearless_simd/examples/srgb.rs"] +#[doc = ""] +#[doc = "Kernel macros only accept safe functions."] +#[doc = ""] +#[doc = "```compile_fail"] +#[doc = "fearless_simd::sse4_2_kernel! {"] +#[doc = " unsafe fn should_not_compile() {}"] +#[doc = "}"] +#[doc = "```"] +#[macro_export] +macro_rules! sse4_2_kernel { + ( + $(#[$meta:meta])* + $vis:vis fn $name:ident( + $($arg:ident : $arg_ty:ty),* $(,)? + ) $(-> $ret:ty)? { + $($kernel_body:tt)* + } + ) => { + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + $(#[$meta])* + $vis fn $name( + _simd: $crate::Sse4_2, + $($arg: $arg_ty),* + ) $(-> $ret)? { + #[inline] + #[target_feature(enable = "sse4.2,cmpxchg16b,popcnt")] + fn __fearless_simd_kernel( + $($arg: $arg_ty),* + ) $(-> $ret)? { + $($kernel_body)* + } + + // SAFETY: the `Sse4_2` token proves that the required target features are available. + unsafe { __fearless_simd_kernel($($arg),*) } + } + }; +} + +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +#[doc = "Creates a context where you can safely call intrinsics"] +#[doc = "available at the [`Avx2`](crate::Avx2) SIMD level."] +#[doc = ""] +#[doc = "This is useful if the portable abstractions are not enough, and you need to"] +#[doc = "use platform-specific intrinsics for parts of the computation."] +#[doc = ""] +#[doc = "See [`Avx2`](crate::Avx2) for the target features represented by this SIMD level."] +#[doc = ""] +#[doc = "The generated wrapper takes a SIMD token ([`Avx2`](crate::Avx2)) as its first argument."] +#[doc = "The macro runs your body inside an inner function annotated with the appropriate"] +#[doc = "`#[target_feature]` attributes. That makes platform-specific intrinsics from `core::arch` or"] +#[doc = "`std::arch` safe to call in the body, as long as they do not have safety"] +#[doc = "requirements beyond those target features."] +#[doc = ""] +#[doc = "## Example"] +#[doc = ""] +#[doc = "```rust"] +#[doc = "# #[allow(unused_imports)]"] +#[doc = "use fearless_simd::{i32x8, prelude::*};"] +#[doc = "#[cfg(target_arch = \"x86\")]"] +#[doc = "use std::arch::x86::{__m256i, _mm256_add_epi32};"] +#[doc = "#[cfg(target_arch = \"x86_64\")]"] +#[doc = "use std::arch::x86_64::{__m256i, _mm256_add_epi32};"] +#[doc = ""] +#[doc = "#[cfg(any(target_arch = \"x86\", target_arch = \"x86_64\"))]"] +#[doc = "fearless_simd::avx2_kernel! {"] +#[doc = " fn add_i32x8(a: __m256i, b: __m256i) -> __m256i {"] +#[doc = " _mm256_add_epi32(a, b)"] +#[doc = " }"] +#[doc = "}"] +#[doc = ""] +#[doc = "# fn main() {"] +#[doc = "#[cfg(any(target_arch = \"x86\", target_arch = \"x86_64\"))]"] +#[doc = "if let Some(avx2) = fearless_simd::Level::new().as_avx2() {"] +#[doc = " let a: i32x8<_> = [1, 2, 3, 4, 5, 6, 7, 8].simd_into(avx2);"] +#[doc = " let b: i32x8<_> = [10, 20, 30, 40, 50, 60, 70, 80].simd_into(avx2);"] +#[doc = " let sum: i32x8<_> = add_i32x8(avx2, a.into(), b.into()).simd_into(avx2);"] +#[doc = ""] +#[doc = " assert_eq!(<[i32; 8]>::from(sum), [11, 22, 33, 44, 55, 66, 77, 88]);"] +#[doc = "}"] +#[doc = "# }"] +#[doc = "```"] +#[doc = ""] +#[doc = "See the [sRGB example] for an end-to-end use of kernel macros."] +#[doc = ""] +#[doc = "[sRGB example]: https://github.com/linebender/fearless_simd/blob/main/fearless_simd/examples/srgb.rs"] +#[doc = ""] +#[doc = "Kernel macros only accept safe functions."] +#[doc = ""] +#[doc = "```compile_fail"] +#[doc = "fearless_simd::avx2_kernel! {"] +#[doc = " unsafe fn should_not_compile() {}"] +#[doc = "}"] +#[doc = "```"] +#[macro_export] +macro_rules! avx2_kernel { + ( + $(#[$meta:meta])* + $vis:vis fn $name:ident( + $($arg:ident : $arg_ty:ty),* $(,)? + ) $(-> $ret:ty)? { + $($kernel_body:tt)* + } + ) => { + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + $(#[$meta])* + $vis fn $name( + _simd: $crate::Avx2, + $($arg: $arg_ty),* + ) $(-> $ret)? { + #[inline] + #[target_feature(enable = "avx2,bmi1,bmi2,cmpxchg16b,f16c,fma,lzcnt,movbe,popcnt,xsave")] + fn __fearless_simd_kernel( + $($arg: $arg_ty),* + ) $(-> $ret)? { + $($kernel_body)* + } + + // SAFETY: the `Avx2` token proves that the required target features are available. + unsafe { __fearless_simd_kernel($($arg),*) } + } + }; +} diff --git a/fearless_simd_gen/src/level.rs b/fearless_simd_gen/src/level.rs index 0bfe06234..820c18b50 100644 --- a/fearless_simd_gen/src/level.rs +++ b/fearless_simd_gen/src/level.rs @@ -29,6 +29,11 @@ pub(crate) trait Level { /// If this SIMD level is not runtime-toggleable (for instance, the fallback implementation or WASM SIMD128), /// returns `None`. fn enabled_target_features(&self) -> Option<&'static str>; + /// The `cfg` expression under which this SIMD level token is available to generated kernel + /// macros. + fn availability_cfg(&self) -> Option<&'static str> { + None + } /// A function that takes a given vector type and returns the corresponding native vector type. For instance, /// `f32x8` would map to `__m256` on `Avx2`, and to `[f32; 8]` on `Fallback`. This will never be passed a vector /// type *larger* than [`Level::max_block_size`], since [`VecType::aligned_wrapper_ty`] will split those up into diff --git a/fearless_simd_gen/src/main.rs b/fearless_simd_gen/src/main.rs index 10efdfd99..b8446dc69 100644 --- a/fearless_simd_gen/src/main.rs +++ b/fearless_simd_gen/src/main.rs @@ -6,7 +6,7 @@ reason = "TODO: https://github.com/linebender/fearless_simd/issues/40" )] -use std::{fs::File, io::Write, path::Path}; +use std::{fmt, fs::File, io::Write, path::Path}; use clap::{Parser, ValueEnum}; use proc_macro2::TokenStream; @@ -17,6 +17,7 @@ mod arch; mod generic; mod level; mod mk_fallback; +mod mk_kernel_macros; mod mk_neon; mod mk_ops; mod mk_simd_trait; @@ -30,6 +31,7 @@ mod types; enum Module { SimdTypes, SimdTrait, + KernelMacros, Ops, Neon, Wasm, @@ -56,16 +58,17 @@ struct Cli { } impl Module { - fn generate_code(self) -> TokenStream { + fn generate_code(self) -> GeneratedCode { match self { - Self::SimdTypes => mk_simd_types::mk_simd_types(), - Self::SimdTrait => mk_simd_trait::mk_simd_trait(), - Self::Ops => mk_ops::mk_ops(), - Self::Neon => mk_neon::Neon.make_module(), - Self::Wasm => mk_wasm::WasmSimd128.make_module(), - Self::Fallback => mk_fallback::Fallback.make_module(), - Self::Sse4_2 => mk_x86::X86::Sse4_2.make_module(), - Self::Avx2 => mk_x86::X86::Avx2.make_module(), + Self::SimdTypes => GeneratedCode::Tokens(mk_simd_types::mk_simd_types()), + Self::SimdTrait => GeneratedCode::Tokens(mk_simd_trait::mk_simd_trait()), + Self::KernelMacros => GeneratedCode::Source(mk_kernel_macros::mk_kernel_macros()), + Self::Ops => GeneratedCode::Tokens(mk_ops::mk_ops()), + Self::Neon => GeneratedCode::Tokens(mk_neon::Neon.make_module()), + Self::Wasm => GeneratedCode::Tokens(mk_wasm::WasmSimd128.make_module()), + Self::Fallback => GeneratedCode::Tokens(mk_fallback::Fallback.make_module()), + Self::Sse4_2 => GeneratedCode::Tokens(mk_x86::X86::Sse4_2.make_module()), + Self::Avx2 => GeneratedCode::Tokens(mk_x86::X86::Avx2.make_module()), } } @@ -99,6 +102,7 @@ impl Module { match self { Self::SimdTypes => "simd_types", Self::SimdTrait => "simd_trait", + Self::KernelMacros => "kernel_macros", Self::Ops => "ops", Self::Neon => "neon", Self::Fallback => "fallback", @@ -109,9 +113,24 @@ impl Module { } } +enum GeneratedCode { + Tokens(TokenStream), + Source(String), +} + +impl fmt::Display for GeneratedCode { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Tokens(tokens) => tokens.fmt(f), + Self::Source(source) => f.write_str(source), + } + } +} + const MODULES: &[Module] = &[ Module::SimdTypes, Module::SimdTrait, + Module::KernelMacros, Module::Ops, Module::Neon, Module::Fallback, diff --git a/fearless_simd_gen/src/mk_kernel_macros.rs b/fearless_simd_gen/src/mk_kernel_macros.rs new file mode 100644 index 000000000..6564418f8 --- /dev/null +++ b/fearless_simd_gen/src/mk_kernel_macros.rs @@ -0,0 +1,305 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +use crate::{level::Level, mk_neon::Neon, mk_wasm::WasmSimd128, mk_x86::X86}; + +/// This emits a String rather than a TokenStream +/// because rustfmt just gives up formatting macros +/// and we end up with a completely unreadable token soup +/// if we don't impose formatting on it manually. +pub(crate) fn mk_kernel_macros() -> String { + [ + kernel_macro(&Neon), + kernel_macro(&WasmSimd128), + kernel_macro(&X86::Sse4_2), + kernel_macro(&X86::Avx2), + ] + .join("\n") +} + +fn kernel_macro(level: &dyn Level) -> String { + let macro_name = format!("{}_kernel", snake_case(level.name())); + let name = level.name(); + let cfg = level + .availability_cfg() + .expect("kernel macros should only be generated for cfg-gated SIMD levels"); + let body = kernel_body(level); + let target_feature_doc = target_feature_doc(level); + let example_doc = example_doc(level); + + KERNEL_MACRO_TEMPLATE + .replace("@MACRO_NAME@", ¯o_name) + .replace("@LEVEL_NAME@", name) + .replace("@CFG@", cfg) + .replace("@BODY@", &body) + .replace("@TARGET_FEATURE_DOC@", &target_feature_doc) + .replace("@EXAMPLE_DOC@", &example_doc) +} + +fn kernel_body(level: &dyn Level) -> String { + if let Some(features) = level.enabled_target_features() { + KERNEL_BODY_WITH_TARGET_FEATURES + .replace("@FEATURES@", features) + .replace("@LEVEL_NAME@", level.name()) + } else { + KERNEL_BODY.to_string() + } +} + +fn target_feature_doc(level: &dyn Level) -> String { + let body = if level.enabled_target_features().is_some() { + r#" +#[doc = "The generated wrapper takes a SIMD token ([`@LEVEL_NAME@`](crate::@LEVEL_NAME@)) as its first argument."] +#[doc = "The macro runs your body inside an inner function annotated with the appropriate"] +#[doc = "`#[target_feature]` attributes. That makes platform-specific intrinsics from `core::arch` or"] +#[doc = "`std::arch` safe to call in the body, as long as they do not have safety"] +#[doc = "requirements beyond those target features."] +"# + } else { + r#" +#[doc = "The generated wrapper takes a SIMD token ([`@LEVEL_NAME@`](crate::@LEVEL_NAME@)) as its first argument and is"] +#[doc = "compiled only when the required target features are enabled. That makes matching"] +#[doc = "platform-specific intrinsics from `core::arch` or `std::arch` safe to call in the"] +#[doc = "body, as long as they do not have safety requirements beyond those target features."] +"# + }; + + body.replace("@LEVEL_NAME@", level.name()) +} + +fn example_doc(level: &dyn Level) -> String { + let example = match level.name() { + "Neon" => { + r#" +## Example + +```rust +# #[allow(unused_imports)] +use fearless_simd::{f32x4, prelude::*}; +#[cfg(target_arch = "aarch64")] +use std::arch::aarch64::{float32x4_t, vaddq_f32}; + +#[cfg(target_arch = "aarch64")] +fearless_simd::neon_kernel! { + fn add_f32x4(a: float32x4_t, b: float32x4_t) -> float32x4_t { + vaddq_f32(a, b) + } +} + +# fn main() { +#[cfg(target_arch = "aarch64")] +if let Some(neon) = fearless_simd::Level::new().as_neon() { + let a: f32x4<_> = [1.0, 2.0, 3.0, 4.0].simd_into(neon); + let b: f32x4<_> = [10.0, 20.0, 30.0, 40.0].simd_into(neon); + let sum: f32x4<_> = add_f32x4(neon, a.into(), b.into()).simd_into(neon); + + assert_eq!(<[f32; 4]>::from(sum), [11.0, 22.0, 33.0, 44.0]); +} +# } +``` +"# + } + "WasmSimd128" => { + r#" +## Example + +```rust +# #[allow(unused_imports)] +use fearless_simd::{f32x4, prelude::*}; +#[cfg(all(target_arch = "wasm32", target_feature = "simd128"))] +use std::arch::wasm32::{f32x4_add, v128}; + +#[cfg(all(target_arch = "wasm32", target_feature = "simd128"))] +fearless_simd::wasm_simd128_kernel! { + fn add_f32x4(a: v128, b: v128) -> v128 { + f32x4_add(a, b) + } +} + +# fn main() { +#[cfg(all(target_arch = "wasm32", target_feature = "simd128"))] +{ + if let Some(wasm) = fearless_simd::Level::new().as_wasm_simd128() { + let a: f32x4<_> = [1.0, 2.0, 3.0, 4.0].simd_into(wasm); + let b: f32x4<_> = [10.0, 20.0, 30.0, 40.0].simd_into(wasm); + let sum: f32x4<_> = add_f32x4(wasm, a.into(), b.into()).simd_into(wasm); + + assert_eq!(<[f32; 4]>::from(sum), [11.0, 22.0, 33.0, 44.0]); + } +} +# } +``` +"# + } + "Sse4_2" => { + r#" +## Example + +```rust +# #[allow(unused_imports)] +use fearless_simd::{f32x4, prelude::*}; +#[cfg(target_arch = "x86")] +use std::arch::x86::{__m128, _mm_add_ps}; +#[cfg(target_arch = "x86_64")] +use std::arch::x86_64::{__m128, _mm_add_ps}; + +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +fearless_simd::sse4_2_kernel! { + fn add_f32x4(a: __m128, b: __m128) -> __m128 { + _mm_add_ps(a, b) + } +} + +# fn main() { +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +if let Some(sse4_2) = fearless_simd::Level::new().as_sse4_2() { + let a: f32x4<_> = [1.0, 2.0, 3.0, 4.0].simd_into(sse4_2); + let b: f32x4<_> = [10.0, 20.0, 30.0, 40.0].simd_into(sse4_2); + let sum: f32x4<_> = add_f32x4(sse4_2, a.into(), b.into()).simd_into(sse4_2); + + assert_eq!(<[f32; 4]>::from(sum), [11.0, 22.0, 33.0, 44.0]); +} +# } +``` +"# + } + "Avx2" => { + r#" +## Example + +```rust +# #[allow(unused_imports)] +use fearless_simd::{i32x8, prelude::*}; +#[cfg(target_arch = "x86")] +use std::arch::x86::{__m256i, _mm256_add_epi32}; +#[cfg(target_arch = "x86_64")] +use std::arch::x86_64::{__m256i, _mm256_add_epi32}; + +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +fearless_simd::avx2_kernel! { + fn add_i32x8(a: __m256i, b: __m256i) -> __m256i { + _mm256_add_epi32(a, b) + } +} + +# fn main() { +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +if let Some(avx2) = fearless_simd::Level::new().as_avx2() { + let a: i32x8<_> = [1, 2, 3, 4, 5, 6, 7, 8].simd_into(avx2); + let b: i32x8<_> = [10, 20, 30, 40, 50, 60, 70, 80].simd_into(avx2); + let sum: i32x8<_> = add_i32x8(avx2, a.into(), b.into()).simd_into(avx2); + + assert_eq!(<[i32; 8]>::from(sum), [11, 22, 33, 44, 55, 66, 77, 88]); +} +# } +``` +"# + } + _ => unreachable!("kernel macros are only generated for known SIMD levels"), + }; + + doc_block(example) +} + +fn doc_block(markdown: &str) -> String { + markdown + .trim_matches('\n') + .lines() + .map(|line| { + format!( + r#"#[doc = "{}"]"#, + line.replace('\\', "\\\\").replace('"', "\\\"") + ) + }) + .collect::>() + .join("\n") +} + +fn snake_case(name: &str) -> String { + let mut result = String::new(); + let mut prev_was_lowercase = false; + for ch in name.chars() { + if ch == '_' { + result.push(ch); + prev_was_lowercase = false; + } else if ch.is_uppercase() { + if prev_was_lowercase { + result.push('_'); + } + result.extend(ch.to_lowercase()); + prev_was_lowercase = false; + } else { + result.push(ch); + prev_was_lowercase = ch.is_lowercase(); + } + } + result +} + +const KERNEL_MACRO_TEMPLATE: &str = r#" +#[cfg(@CFG@)] +#[doc = "Creates a context where you can safely call intrinsics"] +#[doc = "available at the [`@LEVEL_NAME@`](crate::@LEVEL_NAME@) SIMD level."] +#[doc = ""] +#[doc = "This is useful if the portable abstractions are not enough, and you need to"] +#[doc = "use platform-specific intrinsics for parts of the computation."] +#[doc = ""] +#[doc = "See [`@LEVEL_NAME@`](crate::@LEVEL_NAME@) for the target features represented by this SIMD level."] +#[doc = ""] +@TARGET_FEATURE_DOC@ +#[doc = ""] +@EXAMPLE_DOC@ +#[doc = ""] +#[doc = "See the [sRGB example] for an end-to-end use of kernel macros."] +#[doc = ""] +#[doc = "[sRGB example]: https://github.com/linebender/fearless_simd/blob/main/fearless_simd/examples/srgb.rs"] +#[doc = ""] +#[doc = "Kernel macros only accept safe functions."] +#[doc = ""] +#[doc = "```compile_fail"] +#[doc = "fearless_simd::@MACRO_NAME@! {"] +#[doc = " unsafe fn should_not_compile() {}"] +#[doc = "}"] +#[doc = "```"] +#[macro_export] +macro_rules! @MACRO_NAME@ { + ( + $(#[$meta:meta])* + $vis:vis fn $name:ident( + $($arg:ident : $arg_ty:ty),* $(,)? + ) $(-> $ret:ty)? { + $($kernel_body:tt)* + } + ) => { + #[cfg(@CFG@)] + $(#[$meta])* + $vis fn $name( + _simd: $crate::@LEVEL_NAME@, + $($arg: $arg_ty),* + ) $(-> $ret)? { +@BODY@ + } + }; +} +"#; + +const KERNEL_BODY_WITH_TARGET_FEATURES: &str = r#" #[inline] + #[target_feature(enable = "@FEATURES@")] + fn __fearless_simd_kernel( + $($arg: $arg_ty),* + ) $(-> $ret)? { + $($kernel_body)* + } + + // SAFETY: the `@LEVEL_NAME@` token proves that the required target features are available. + unsafe { __fearless_simd_kernel($($arg),*) }"#; + +const KERNEL_BODY: &str = r#" #[inline] + fn __fearless_simd_kernel( + $($arg: $arg_ty),* + ) $(-> $ret)? { + $($kernel_body)* + } + + let _ = _simd; + __fearless_simd_kernel($($arg),*)"#; diff --git a/fearless_simd_gen/src/mk_neon.rs b/fearless_simd_gen/src/mk_neon.rs index ad356dfac..fc1318378 100644 --- a/fearless_simd_gen/src/mk_neon.rs +++ b/fearless_simd_gen/src/mk_neon.rs @@ -36,6 +36,10 @@ impl Level for Neon { Some("neon") } + fn availability_cfg(&self) -> Option<&'static str> { + Some(r#"target_arch = "aarch64""#) + } + fn arch_ty(&self, vec_ty: &VecType) -> TokenStream { let scalar = match vec_ty.scalar { ScalarType::Float => "float", diff --git a/fearless_simd_gen/src/mk_wasm.rs b/fearless_simd_gen/src/mk_wasm.rs index 705777769..f2fd7de38 100644 --- a/fearless_simd_gen/src/mk_wasm.rs +++ b/fearless_simd_gen/src/mk_wasm.rs @@ -37,6 +37,10 @@ impl Level for WasmSimd128 { None } + fn availability_cfg(&self) -> Option<&'static str> { + Some(r#"all(target_arch = "wasm32", target_feature = "simd128")"#) + } + fn arch_ty(&self, _vec_ty: &VecType) -> TokenStream { quote! { v128 } } diff --git a/fearless_simd_gen/src/mk_x86.rs b/fearless_simd_gen/src/mk_x86.rs index e0c269490..d1abba6d3 100644 --- a/fearless_simd_gen/src/mk_x86.rs +++ b/fearless_simd_gen/src/mk_x86.rs @@ -48,6 +48,10 @@ impl Level for X86 { }) } + fn availability_cfg(&self) -> Option<&'static str> { + Some(r#"any(target_arch = "x86", target_arch = "x86_64")"#) + } + fn arch_ty(&self, vec_ty: &VecType) -> TokenStream { let suffix = match (vec_ty.scalar, vec_ty.scalar_bits) { (ScalarType::Float, 32) => "",