Skip to content
972 changes: 18 additions & 954 deletions fearless_simd/src/generated/avx2.rs

Large diffs are not rendered by default.

1,742 changes: 663 additions & 1,079 deletions fearless_simd/src/generated/fallback.rs

Large diffs are not rendered by default.

1,152 changes: 8 additions & 1,144 deletions fearless_simd/src/generated/neon.rs

Large diffs are not rendered by default.

720 changes: 0 additions & 720 deletions fearless_simd/src/generated/ops.rs

Large diffs are not rendered by default.

879 changes: 305 additions & 574 deletions fearless_simd/src/generated/simd_trait.rs

Large diffs are not rendered by default.

1,278 changes: 125 additions & 1,153 deletions fearless_simd/src/generated/simd_types.rs

Large diffs are not rendered by default.

926 changes: 11 additions & 915 deletions fearless_simd/src/generated/sse4_2.rs

Large diffs are not rendered by default.

918 changes: 7 additions & 911 deletions fearless_simd/src/generated/wasm.rs

Large diffs are not rendered by default.

9 changes: 5 additions & 4 deletions fearless_simd/src/traits.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,12 @@ use crate::{Level, Simd, SimdBase, seal::Seal};

/// Element-wise selection between two SIMD vectors using `self`.
pub trait Select<T: Seal>: Seal {
/// For each element of this mask, select the first operand if the element is all ones, and select the second
/// operand if the element is all zeroes.
/// For each logical lane of this mask, select the first operand if the lane is true, and select the second
/// operand if the lane is false.
///
/// If a mask element is *not* all ones or all zeroes, the result is unspecified. It may vary depending on
/// architecture, feature level, the mask elements' width, the mask vector's width, or library version.
/// Masks may be converted to and from signed integer lane arrays for compatibility with older APIs. For those
/// conversions, false is encoded as all zeroes and true is encoded as all ones. If a mask is constructed from any
/// other integer bit pattern, the result of this operation is unspecified.
fn select(self, if_true: T, if_false: T) -> T;
}

Expand Down
54 changes: 35 additions & 19 deletions fearless_simd_gen/src/mk_fallback.rs
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ impl Level for Fallback {
let items = make_list(
(0..vec_ty.len)
.map(|idx| {
let args = [quote! { a[#idx] }];
let args = [lane(quote! { a }, vec_ty, idx)];
let expr = fallback::expr(method, vec_ty, &args);
quote! { #expr }
})
Expand All @@ -170,7 +170,8 @@ impl Level for Fallback {
(0..vec_ty.len)
.map(|idx| {
let scalar_ty = target_ty.scalar.rust(target_ty.scalar_bits);
quote! { a[#idx] as #scalar_ty }
let a = lane(quote! { a }, vec_ty, idx);
quote! { #a as #scalar_ty }
})
.collect::<Vec<_>>(),
);
Expand All @@ -185,19 +186,20 @@ impl Level for Fallback {
let items = make_list(
(0..vec_ty.len)
.map(|idx| {
let b_lane = lane(quote! { b }, vec_ty, idx);
let b = if fallback::translate_op(
method,
vec_ty.scalar == ScalarType::Float,
)
.map(rhs_reference)
.unwrap_or(true)
{
quote! { &b[#idx] }
quote! { &#b_lane }
} else {
quote! { b[#idx] }
b_lane
};

let args = [quote! { a[#idx] }, quote! { #b }];
let args = [lane(quote! { a }, vec_ty, idx), quote! { #b }];
let expr = fallback::expr(method, vec_ty, &args);
quote! { #expr }
})
Expand All @@ -214,7 +216,7 @@ impl Level for Fallback {
let items = make_list(
(0..vec_ty.len)
.map(|idx| {
let args = [quote! { a[#idx] }, quote! { shift }];
let args = [lane(quote! { a }, vec_ty, idx), quote! { shift }];
let expr = fallback::expr(method, vec_ty, &args);
quote! { #expr }
})
Expand Down Expand Up @@ -260,7 +262,9 @@ impl Level for Fallback {
let items = make_list(
(0..vec_ty.len)
.map(|idx: usize| {
let args = [quote! { &a[#idx] }, quote! { &b[#idx] }];
let a = lane(quote! { a }, vec_ty, idx);
let b = lane(quote! { b }, vec_ty, idx);
let args = [quote! { &#a }, quote! { &#b }];
let expr = fallback::expr(method, vec_ty, &args);
let mask_ty = mask_type.scalar.rust(vec_ty.scalar_bits);
quote! { -(#expr as #mask_ty) }
Expand All @@ -275,10 +279,14 @@ impl Level for Fallback {
}
}
OpSig::Select => {
let mask_type = vec_ty.mask_ty();
let items = make_list(
(0..vec_ty.len)
.map(|idx| {
quote! { if a[#idx] != 0 { b[#idx] } else { c[#idx] } }
let a = lane(quote! { a }, &mask_type, idx);
let b = lane(quote! { b }, vec_ty, idx);
let c = lane(quote! { c }, vec_ty, idx);
quote! { if #a != 0 { #b } else { #c } }
})
.collect::<Vec<_>>(),
);
Expand Down Expand Up @@ -332,7 +340,9 @@ impl Level for Fallback {
let zip = make_list(
indices
.map(|idx| {
quote! {a[#idx], b[#idx] }
let a = lane(quote! { a }, vec_ty, idx);
let b = lane(quote! { b }, vec_ty, idx);
quote! { #a, #b }
})
.collect::<Vec<_>>(),
);
Expand All @@ -353,12 +363,8 @@ impl Level for Fallback {
let unzip = make_list(
indices
.clone()
.map(|idx| {
quote! {a[#idx]}
})
.chain(indices.map(|idx| {
quote! {b[#idx]}
}))
.map(|idx| lane(quote! { a }, vec_ty, idx))
.chain(indices.map(|idx| lane(quote! { b }, vec_ty, idx)))
.collect::<Vec<_>>(),
);

Expand Down Expand Up @@ -398,7 +404,8 @@ impl Level for Fallback {
let items = make_list(
(0..vec_ty.len)
.map(|idx| {
quote! { a[#idx] as #scalar }
let a = lane(quote! { a }, vec_ty, idx);
quote! { #a as #scalar }
})
.collect::<Vec<_>>(),
);
Expand Down Expand Up @@ -427,7 +434,6 @@ impl Level for Fallback {
quantifier,
condition,
} => {
let indices = (0..vec_ty.len).map(|idx| quote! { #idx });
let check = if condition {
quote! { != }
} else {
Expand All @@ -436,10 +442,12 @@ impl Level for Fallback {

let expr = match quantifier {
crate::ops::Quantifier::Any => {
quote! { #(a[#indices] #check 0)||* }
let lanes = (0..vec_ty.len).map(|idx| lane(quote! { a }, vec_ty, idx));
quote! { #(#lanes #check 0)||* }
}
crate::ops::Quantifier::All => {
quote! { #(a[#indices] #check 0)&&* }
let lanes = (0..vec_ty.len).map(|idx| lane(quote! { a }, vec_ty, idx));
quote! { #(#lanes #check 0)&&* }
}
};

Expand Down Expand Up @@ -546,6 +554,14 @@ fn interleave_indices(
make_list(indices.into_iter().map(func).collect::<Vec<_>>())
}

fn lane(value: TokenStream, vec_ty: &VecType, idx: usize) -> TokenStream {
if vec_ty.scalar == ScalarType::Mask {
quote! { #value.val.0[#idx] }
} else {
quote! { #value[#idx] }
}
}

/// Whether the second argument of the function needs to be passed by reference.
fn rhs_reference(method: &str) -> bool {
!matches!(
Expand Down
51 changes: 28 additions & 23 deletions fearless_simd_gen/src/mk_ops.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use quote::{format_ident, quote};
use crate::{
generic::generic_op_name,
ops::{CoreOpTrait, OpKind, OpSig, TyFlavor, overloaded_ops_for},
types::{SIMD_TYPES, type_imports},
types::{SIMD_TYPES, ScalarType, type_imports},
};

pub(crate) fn mk_ops() -> TokenStream {
Expand Down Expand Up @@ -85,6 +85,32 @@ pub(crate) fn mk_ops() -> TokenStream {
}
_ => {
let scalar = ty.scalar.rust(ty.scalar_bits);
let scalar_overloads = (ty.scalar != ScalarType::Mask).then(|| {
quote! {
impl<S: Simd> core::ops::#trait_id<#scalar> for #simd<S> {
type Output = Self;
#[inline(always)]
fn #opfn(self, rhs: #scalar) -> Self::Output {
self.simd.#simd_fn(self, rhs.simd_into(self.simd))
}
}

impl<S: Simd> core::ops::#trait_assign_id<#scalar> for #simd<S> {
#[inline(always)]
fn #op_assign_fn(&mut self, rhs: #scalar) {
*self = self.simd.#simd_fn(*self, rhs.simd_into(self.simd));
}
}

impl<S: Simd> core::ops::#trait_id<#simd<S>> for #scalar {
type Output = #simd<S>;
#[inline(always)]
fn #opfn(self, rhs: #simd<S>) -> Self::Output {
rhs.simd.#simd_fn(self.simd_into(rhs.simd), rhs)
}
}
}
});
impls.push(quote! {
impl<S: Simd> core::ops::#trait_id for #simd<S> {
type Output = Self;
Expand All @@ -103,28 +129,7 @@ pub(crate) fn mk_ops() -> TokenStream {
}
}

impl<S: Simd> core::ops::#trait_id<#scalar> for #simd<S> {
type Output = Self;
#[inline(always)]
fn #opfn(self, rhs: #scalar) -> Self::Output {
self.simd.#simd_fn(self, rhs.simd_into(self.simd))
}
}

impl<S: Simd> core::ops::#trait_assign_id<#scalar> for #simd<S> {
#[inline(always)]
fn #op_assign_fn(&mut self, rhs: #scalar) {
*self = self.simd.#simd_fn(*self, rhs.simd_into(self.simd));
}
}

impl<S: Simd> core::ops::#trait_id<#simd<S>> for #scalar {
type Output = #simd<S>;
#[inline(always)]
fn #opfn(self, rhs: #simd<S>) -> Self::Output {
rhs.simd.#simd_fn(self.simd_into(rhs.simd), rhs)
}
}
#scalar_overloads
});
}
}
Expand Down
78 changes: 60 additions & 18 deletions fearless_simd_gen/src/mk_simd_trait.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
// Copyright 2025 the Fearless_SIMD Authors
// SPDX-License-Identifier: Apache-2.0 OR MIT

use proc_macro2::TokenStream;
use quote::quote;
use proc_macro2::{Ident, Span, TokenStream};
use quote::{format_ident, quote};

use crate::{
ops::{OpKind, TyFlavor, base_trait_ops, ops_for_type, overloaded_ops_for, vec_trait_ops_for},
ops::{
CoreOpTrait, OpKind, OpSig, TyFlavor, base_trait_ops, ops_for_type, overloaded_ops_for,
vec_trait_ops_for,
},
types::{SIMD_TYPES, ScalarType, type_imports},
};

Expand Down Expand Up @@ -80,14 +83,13 @@ pub(crate) fn mk_simd_trait() -> TokenStream {
type i32s: SimdInt<Self, Element = i32, Block = i32x4<Self>, Mask = Self::mask32s, Bytes = <Self::u32s as Bytes>::Bytes> + SimdCvtTruncate<Self::f32s>
+ core::ops::Neg<Output = Self::i32s>;
/// A native-width SIMD mask with 8-bit lanes.
type mask8s: SimdMask<Self, Element = i8, Block = mask8x16<Self>, Bytes = <Self::u8s as Bytes>::Bytes> + Select<Self::u8s> + Select<Self::i8s> + Select<Self::mask8s>;
type mask8s: SimdMask<Self, Element = i8> + SimdFrom<i8, Self> + Select<Self::u8s> + Select<Self::i8s> + Select<Self::mask8s>;
Comment thread
Shnatsel marked this conversation as resolved.
Outdated
/// A native-width SIMD mask with 16-bit lanes.
type mask16s: SimdMask<Self, Element = i16, Block = mask16x8<Self>, Bytes = <Self::u16s as Bytes>::Bytes> + Select<Self::u16s> + Select<Self::i16s> + Select<Self::mask16s>;
type mask16s: SimdMask<Self, Element = i16> + SimdFrom<i16, Self> + Select<Self::u16s> + Select<Self::i16s> + Select<Self::mask16s>;
/// A native-width SIMD mask with 32-bit lanes.
type mask32s: SimdMask<Self, Element = i32, Block = mask32x4<Self>, Bytes = <Self::u32s as Bytes>::Bytes>
+ Select<Self::f32s> + Select<Self::u32s> + Select<Self::i32s> + Select<Self::mask32s>;
type mask32s: SimdMask<Self, Element = i32> + SimdFrom<i32, Self> + Select<Self::f32s> + Select<Self::u32s> + Select<Self::i32s> + Select<Self::mask32s>;
/// A native-width SIMD mask with 64-bit lanes.
type mask64s: SimdMask<Self, Element = i64, Block = mask64x2<Self>> + Select<Self::f64s> + Select<Self::mask64s>;
type mask64s: SimdMask<Self, Element = i64> + SimdFrom<i64, Self> + Select<Self::f64s> + Select<Self::mask64s>;

/// This SIMD token's feature level.
fn level(self) -> Level;
Expand Down Expand Up @@ -156,14 +158,11 @@ fn mk_simd_base() -> TokenStream {
/// working with a native-width vector (e.g. [`Simd::f32s`]) and
/// want to process data in native-width chunks.
const N: usize;
/// A SIMD vector mask with the same number of elements.
/// A SIMD vector mask with the same number of logical lanes.
///
/// The mask element is represented as an integer which is
/// all-0 for `false` and all-1 for `true`. When we get deep
/// into AVX-512, we need to think about predication masks.
///
/// One possibility to consider is that the SIMD trait grows
/// `maskAxB` associated types.
/// Masks intentionally do not implement [`SimdBase`]. SSE, NEON, WASM, and the
/// fallback backend currently store masks as all-zero/all-one integer vectors, but
/// AVX-512/RVV/SVE-style targets use compact predicate registers instead.
type Mask: SimdMask<S, Element = <Self::Element as SimdElement>::Mask>;
/// A 128-bit SIMD vector of the same scalar type.
type Block: SimdBase<S, Element = Self::Element>;
Expand Down Expand Up @@ -271,12 +270,50 @@ fn mk_simd_mask() -> TokenStream {
OpKind::Overloaded(core_op) => Some(core_op),
_ => None,
})
.flat_map(|core_op| core_op.trait_bounds());
.flat_map(|core_op| {
let trait_name = Ident::new(core_op.trait_name(), Span::call_site());
let trait_name_assign = format_ident!("{trait_name}Assign");
match core_op {
CoreOpTrait::Not => vec![quote! { core::ops::#trait_name<Output = Self> }],
_ => vec![
quote! { core::ops::#trait_name<Output = Self> },
quote! { core::ops::#trait_name_assign },
],
}
});
quote! {
/// Functionality implemented by SIMD masks.
pub trait SimdMask<S: Simd>: SimdBase<S> + Seal
///
/// A mask has one logical boolean lane per SIMD lane. Its storage is intentionally opaque:
/// current backends may use all-zero/all-one integer vectors internally, while future
/// predicate-register backends may use a compact representation.
pub trait SimdMask<S: Simd>:
Copy + Sync + Send + 'static
+ Seal
+ Select<Self>
#(+ #op_traits)*
{
/// The signed integer type used when converting this mask to and from lane values.
///
/// False lanes are encoded as all zeroes, and true lanes are encoded as all ones.
type Element: SimdElement;

/// This mask type's lane count.
const N: usize;

/// Get the [`Simd`] implementation associated with this type.
fn witness(&self) -> S;

/// Create a SIMD mask from signed integer mask lanes.
///
/// The slice must be exactly the size of the SIMD mask.
fn from_slice(simd: S, slice: &[Self::Element]) -> Self;

/// Store this SIMD mask as signed integer mask lanes.
///
/// The slice must be exactly the size of the SIMD mask.
fn store_slice(&self, slice: &mut [Self::Element]);

#( #methods )*
}
}
Expand All @@ -286,7 +323,12 @@ fn methods_for_vec_trait(scalar: ScalarType) -> Vec<TokenStream> {
let mut methods = vec![];
for op in vec_trait_ops_for(scalar) {
let doc = op.format_docstring(TyFlavor::VecImpl);
if let Some(method_sig) = op.vec_trait_method_sig() {
let method_sig = if scalar == ScalarType::Mask && matches!(op.sig, OpSig::Compare) {
Some(quote! { fn simd_eq(self, rhs: impl SimdInto<Self, S>) -> Self })
} else {
op.vec_trait_method_sig()
};
if let Some(method_sig) = method_sig {
methods.push(quote! {
#[doc = #doc]
#method_sig;
Expand Down
Loading
Loading