linebender · Shnatsel · May 23, 2026 · May 18, 2026 · May 19, 2026 · May 19, 2026
diff --git a/fearless_simd/src/generated/avx2.rs b/fearless_simd/src/generated/avx2.rs
diff --git a/fearless_simd/src/generated/fallback.rs b/fearless_simd/src/generated/fallback.rs
diff --git a/fearless_simd/src/generated/neon.rs b/fearless_simd/src/generated/neon.rs
diff --git a/fearless_simd/src/generated/ops.rs b/fearless_simd/src/generated/ops.rs
diff --git a/fearless_simd/src/generated/simd_trait.rs b/fearless_simd/src/generated/simd_trait.rs
diff --git a/fearless_simd/src/generated/simd_types.rs b/fearless_simd/src/generated/simd_types.rs
diff --git a/fearless_simd/src/generated/sse4_2.rs b/fearless_simd/src/generated/sse4_2.rs
diff --git a/fearless_simd/src/generated/wasm.rs b/fearless_simd/src/generated/wasm.rs
diff --git a/fearless_simd/src/traits.rs b/fearless_simd/src/traits.rs
@@ -9,11 +9,12 @@ use crate::{Level, Simd, SimdBase, seal::Seal};
 
 /// Element-wise selection between two SIMD vectors using `self`.
 pub trait Select<T: Seal>: Seal {
-    /// For each element of this mask, select the first operand if the element is all ones, and select the second
-    /// operand if the element is all zeroes.
+    /// For each logical lane of this mask, select the first operand if the lane is true, and select the second
+    /// operand if the lane is false.
     ///
-    /// If a mask element is *not* all ones or all zeroes, the result is unspecified. It may vary depending on
-    /// architecture, feature level, the mask elements' width, the mask vector's width, or library version.
+    /// Masks may be converted to and from signed integer lane arrays for compatibility with older APIs. For those
+    /// conversions, false is encoded as all zeroes and true is encoded as all ones. If a mask is constructed from any
+    /// other integer bit pattern, the result of this operation is unspecified.
     fn select(self, if_true: T, if_false: T) -> T;
 }
 

diff --git a/fearless_simd_gen/src/mk_fallback.rs b/fearless_simd_gen/src/mk_fallback.rs
@@ -152,7 +152,7 @@ impl Level for Fallback {
                 let items = make_list(
                     (0..vec_ty.len)
                         .map(|idx| {
-                            let args = [quote! { a[#idx] }];
+                            let args = [lane(quote! { a }, vec_ty, idx)];
                             let expr = fallback::expr(method, vec_ty, &args);
                             quote! { #expr }
                         })
@@ -170,7 +170,8 @@ impl Level for Fallback {
                     (0..vec_ty.len)
                         .map(|idx| {
                             let scalar_ty = target_ty.scalar.rust(target_ty.scalar_bits);
-                            quote! { a[#idx] as #scalar_ty }
+                            let a = lane(quote! { a }, vec_ty, idx);
+                            quote! { #a as #scalar_ty }
                         })
                         .collect::<Vec<_>>(),
                 );
@@ -185,19 +186,20 @@ impl Level for Fallback {
                 let items = make_list(
                     (0..vec_ty.len)
                         .map(|idx| {
+                            let b_lane = lane(quote! { b }, vec_ty, idx);
                             let b = if fallback::translate_op(
                                 method,
                                 vec_ty.scalar == ScalarType::Float,
                             )
                             .map(rhs_reference)
                             .unwrap_or(true)
                             {
-                                quote! { &b[#idx] }
+                                quote! { &#b_lane }
                             } else {
-                                quote! { b[#idx] }
+                                b_lane
                             };
 
-                            let args = [quote! { a[#idx] }, quote! { #b }];
+                            let args = [lane(quote! { a }, vec_ty, idx), quote! { #b }];
                             let expr = fallback::expr(method, vec_ty, &args);
                             quote! { #expr }
                         })
@@ -214,7 +216,7 @@ impl Level for Fallback {
                 let items = make_list(
                     (0..vec_ty.len)
                         .map(|idx| {
-                            let args = [quote! { a[#idx] }, quote! { shift }];
+                            let args = [lane(quote! { a }, vec_ty, idx), quote! { shift }];
                             let expr = fallback::expr(method, vec_ty, &args);
                             quote! { #expr }
                         })
@@ -260,7 +262,9 @@ impl Level for Fallback {
                 let items = make_list(
                     (0..vec_ty.len)
                         .map(|idx: usize| {
-                            let args = [quote! { &a[#idx] }, quote! { &b[#idx] }];
+                            let a = lane(quote! { a }, vec_ty, idx);
+                            let b = lane(quote! { b }, vec_ty, idx);
+                            let args = [quote! { &#a }, quote! { &#b }];
                             let expr = fallback::expr(method, vec_ty, &args);
                             let mask_ty = mask_type.scalar.rust(vec_ty.scalar_bits);
                             quote! { -(#expr as #mask_ty) }
@@ -275,10 +279,14 @@ impl Level for Fallback {
                 }
             }
             OpSig::Select => {
+                let mask_type = vec_ty.mask_ty();
                 let items = make_list(
                     (0..vec_ty.len)
                         .map(|idx| {
-                            quote! { if a[#idx] != 0 { b[#idx] } else { c[#idx] } }
+                            let a = lane(quote! { a }, &mask_type, idx);
+                            let b = lane(quote! { b }, vec_ty, idx);
+                            let c = lane(quote! { c }, vec_ty, idx);
+                            quote! { if #a != 0 { #b } else { #c } }
                         })
                         .collect::<Vec<_>>(),
                 );
@@ -332,7 +340,9 @@ impl Level for Fallback {
                 let zip = make_list(
                     indices
                         .map(|idx| {
-                            quote! {a[#idx], b[#idx] }
+                            let a = lane(quote! { a }, vec_ty, idx);
+                            let b = lane(quote! { b }, vec_ty, idx);
+                            quote! { #a, #b }
                         })
                         .collect::<Vec<_>>(),
                 );
@@ -353,12 +363,8 @@ impl Level for Fallback {
                 let unzip = make_list(
                     indices
                         .clone()
-                        .map(|idx| {
-                            quote! {a[#idx]}
-                        })
-                        .chain(indices.map(|idx| {
-                            quote! {b[#idx]}
-                        }))
+                        .map(|idx| lane(quote! { a }, vec_ty, idx))
+                        .chain(indices.map(|idx| lane(quote! { b }, vec_ty, idx)))
                         .collect::<Vec<_>>(),
                 );
 
@@ -398,7 +404,8 @@ impl Level for Fallback {
                     let items = make_list(
                         (0..vec_ty.len)
                             .map(|idx| {
-                                quote! { a[#idx] as #scalar }
+                                let a = lane(quote! { a }, vec_ty, idx);
+                                quote! { #a as #scalar }
                             })
                             .collect::<Vec<_>>(),
                     );
@@ -427,7 +434,6 @@ impl Level for Fallback {
                 quantifier,
                 condition,
             } => {
-                let indices = (0..vec_ty.len).map(|idx| quote! { #idx });
                 let check = if condition {
                     quote! { != }
                 } else {
@@ -436,10 +442,12 @@ impl Level for Fallback {
 
                 let expr = match quantifier {
                     crate::ops::Quantifier::Any => {
-                        quote! { #(a[#indices] #check 0)||* }
+                        let lanes = (0..vec_ty.len).map(|idx| lane(quote! { a }, vec_ty, idx));
+                        quote! { #(#lanes #check 0)||* }
                     }
                     crate::ops::Quantifier::All => {
-                        quote! { #(a[#indices] #check 0)&&* }
+                        let lanes = (0..vec_ty.len).map(|idx| lane(quote! { a }, vec_ty, idx));
+                        quote! { #(#lanes #check 0)&&* }
                     }
                 };
 
@@ -546,6 +554,14 @@ fn interleave_indices(
     make_list(indices.into_iter().map(func).collect::<Vec<_>>())
 }
 
+fn lane(value: TokenStream, vec_ty: &VecType, idx: usize) -> TokenStream {
+    if vec_ty.scalar == ScalarType::Mask {
+        quote! { #value.val.0[#idx] }
+    } else {
+        quote! { #value[#idx] }
+    }
+}
+
 /// Whether the second argument of the function needs to be passed by reference.
 fn rhs_reference(method: &str) -> bool {
     !matches!(

diff --git a/fearless_simd_gen/src/mk_ops.rs b/fearless_simd_gen/src/mk_ops.rs
@@ -7,7 +7,7 @@ use quote::{format_ident, quote};
 use crate::{
     generic::generic_op_name,
     ops::{CoreOpTrait, OpKind, OpSig, TyFlavor, overloaded_ops_for},
-    types::{SIMD_TYPES, type_imports},
+    types::{SIMD_TYPES, ScalarType, type_imports},
 };
 
 pub(crate) fn mk_ops() -> TokenStream {
@@ -85,6 +85,32 @@ pub(crate) fn mk_ops() -> TokenStream {
                 }
                 _ => {
                     let scalar = ty.scalar.rust(ty.scalar_bits);
+                    let scalar_overloads = (ty.scalar != ScalarType::Mask).then(|| {
+                        quote! {
+                            impl<S: Simd> core::ops::#trait_id<#scalar> for #simd<S> {
+                                type Output = Self;
+                                #[inline(always)]
+                                fn #opfn(self, rhs: #scalar) -> Self::Output {
+                                    self.simd.#simd_fn(self, rhs.simd_into(self.simd))
+                                }
+                            }
+
+                            impl<S: Simd> core::ops::#trait_assign_id<#scalar> for #simd<S> {
+                                #[inline(always)]
+                                fn #op_assign_fn(&mut self, rhs: #scalar) {
+                                    *self = self.simd.#simd_fn(*self, rhs.simd_into(self.simd));
+                                }
+                            }
+
+                            impl<S: Simd> core::ops::#trait_id<#simd<S>> for #scalar {
+                                type Output = #simd<S>;
+                                #[inline(always)]
+                                fn #opfn(self, rhs: #simd<S>) -> Self::Output {
+                                    rhs.simd.#simd_fn(self.simd_into(rhs.simd), rhs)
+                                }
+                            }
+                        }
+                    });
                     impls.push(quote! {
                         impl<S: Simd> core::ops::#trait_id for #simd<S> {
                             type Output = Self;
@@ -103,28 +129,7 @@ pub(crate) fn mk_ops() -> TokenStream {
                             }
                         }
 
-                        impl<S: Simd> core::ops::#trait_id<#scalar> for #simd<S> {
-                            type Output = Self;
-                            #[inline(always)]
-                            fn #opfn(self, rhs: #scalar) -> Self::Output {
-                                self.simd.#simd_fn(self, rhs.simd_into(self.simd))
-                            }
-                        }
-
-                        impl<S: Simd> core::ops::#trait_assign_id<#scalar> for #simd<S> {
-                            #[inline(always)]
-                            fn #op_assign_fn(&mut self, rhs: #scalar) {
-                                *self = self.simd.#simd_fn(*self, rhs.simd_into(self.simd));
-                            }
-                        }
-
-                        impl<S: Simd> core::ops::#trait_id<#simd<S>> for #scalar {
-                            type Output = #simd<S>;
-                            #[inline(always)]
-                            fn #opfn(self, rhs: #simd<S>) -> Self::Output {
-                                rhs.simd.#simd_fn(self.simd_into(rhs.simd), rhs)
-                            }
-                        }
+                        #scalar_overloads
                     });
                 }
             }

diff --git a/fearless_simd_gen/src/mk_simd_trait.rs b/fearless_simd_gen/src/mk_simd_trait.rs
@@ -1,11 +1,14 @@
 // Copyright 2025 the Fearless_SIMD Authors
 // SPDX-License-Identifier: Apache-2.0 OR MIT
 
-use proc_macro2::TokenStream;
-use quote::quote;
+use proc_macro2::{Ident, Span, TokenStream};
+use quote::{format_ident, quote};
 
 use crate::{
-    ops::{OpKind, TyFlavor, base_trait_ops, ops_for_type, overloaded_ops_for, vec_trait_ops_for},
+    ops::{
+        CoreOpTrait, OpKind, OpSig, TyFlavor, base_trait_ops, ops_for_type, overloaded_ops_for,
+        vec_trait_ops_for,
+    },
     types::{SIMD_TYPES, ScalarType, type_imports},
 };
 
@@ -80,14 +83,13 @@ pub(crate) fn mk_simd_trait() -> TokenStream {
             type i32s: SimdInt<Self, Element = i32, Block = i32x4<Self>, Mask = Self::mask32s, Bytes = <Self::u32s as Bytes>::Bytes> + SimdCvtTruncate<Self::f32s>
                 + core::ops::Neg<Output = Self::i32s>;
             /// A native-width SIMD mask with 8-bit lanes.
-            type mask8s: SimdMask<Self, Element = i8, Block = mask8x16<Self>, Bytes = <Self::u8s as Bytes>::Bytes> + Select<Self::u8s> + Select<Self::i8s> + Select<Self::mask8s>;
+            type mask8s: SimdMask<Self, Element = i8> + SimdFrom<i8, Self> + Select<Self::u8s> + Select<Self::i8s> + Select<Self::mask8s>;
             /// A native-width SIMD mask with 16-bit lanes.
-            type mask16s: SimdMask<Self, Element = i16, Block = mask16x8<Self>, Bytes = <Self::u16s as Bytes>::Bytes> + Select<Self::u16s> + Select<Self::i16s> + Select<Self::mask16s>;
+            type mask16s: SimdMask<Self, Element = i16> + SimdFrom<i16, Self> + Select<Self::u16s> + Select<Self::i16s> + Select<Self::mask16s>;
             /// A native-width SIMD mask with 32-bit lanes.
-            type mask32s: SimdMask<Self, Element = i32, Block = mask32x4<Self>, Bytes = <Self::u32s as Bytes>::Bytes>
-                + Select<Self::f32s> + Select<Self::u32s> + Select<Self::i32s> + Select<Self::mask32s>;
+            type mask32s: SimdMask<Self, Element = i32> + SimdFrom<i32, Self> + Select<Self::f32s> + Select<Self::u32s> + Select<Self::i32s> + Select<Self::mask32s>;
             /// A native-width SIMD mask with 64-bit lanes.
-            type mask64s: SimdMask<Self, Element = i64, Block = mask64x2<Self>> + Select<Self::f64s> + Select<Self::mask64s>;
+            type mask64s: SimdMask<Self, Element = i64> + SimdFrom<i64, Self> + Select<Self::f64s> + Select<Self::mask64s>;
 
             /// This SIMD token's feature level.
             fn level(self) -> Level;
@@ -156,14 +158,11 @@ fn mk_simd_base() -> TokenStream {
             /// working with a native-width vector (e.g. [`Simd::f32s`]) and
             /// want to process data in native-width chunks.
             const N: usize;
-            /// A SIMD vector mask with the same number of elements.
+            /// A SIMD vector mask with the same number of logical lanes.
             ///
-            /// The mask element is represented as an integer which is
-            /// all-0 for `false` and all-1 for `true`. When we get deep
-            /// into AVX-512, we need to think about predication masks.
-            ///
-            /// One possibility to consider is that the SIMD trait grows
-            /// `maskAxB` associated types.
+            /// Masks intentionally do not implement [`SimdBase`]. SSE, NEON, WASM, and the
+            /// fallback backend currently store masks as all-zero/all-one integer vectors, but
+            /// AVX-512/RVV/SVE-style targets use compact predicate registers instead.
             type Mask: SimdMask<S, Element = <Self::Element as SimdElement>::Mask>;
             /// A 128-bit SIMD vector of the same scalar type.
             type Block: SimdBase<S, Element = Self::Element>;
@@ -271,12 +270,50 @@ fn mk_simd_mask() -> TokenStream {
             OpKind::Overloaded(core_op) => Some(core_op),
             _ => None,
         })
-        .flat_map(|core_op| core_op.trait_bounds());
+        .flat_map(|core_op| {
+            let trait_name = Ident::new(core_op.trait_name(), Span::call_site());
+            let trait_name_assign = format_ident!("{trait_name}Assign");
+            match core_op {
+                CoreOpTrait::Not => vec![quote! { core::ops::#trait_name<Output = Self> }],
+                _ => vec![
+                    quote! { core::ops::#trait_name<Output = Self> },
+                    quote! { core::ops::#trait_name_assign },
+                ],
+            }
+        });
     quote! {
         /// Functionality implemented by SIMD masks.
-        pub trait SimdMask<S: Simd>: SimdBase<S> + Seal
+        ///
+        /// A mask has one logical boolean lane per SIMD lane. Its storage is intentionally opaque:
+        /// current backends may use all-zero/all-one integer vectors internally, while future
+        /// predicate-register backends may use a compact representation.
+        pub trait SimdMask<S: Simd>:
+            Copy + Sync + Send + 'static
+            + Seal
+            + Select<Self>
             #(+ #op_traits)*
         {
+            /// The signed integer type used when converting this mask to and from lane values.
+            ///
+            /// False lanes are encoded as all zeroes, and true lanes are encoded as all ones.
+            type Element: SimdElement;
+
+            /// This mask type's lane count.
+            const N: usize;
+
+            /// Get the [`Simd`] implementation associated with this type.
+            fn witness(&self) -> S;
+
+            /// Create a SIMD mask from signed integer mask lanes.
+            ///
+            /// The slice must be exactly the size of the SIMD mask.
+            fn from_slice(simd: S, slice: &[Self::Element]) -> Self;
+
+            /// Store this SIMD mask as signed integer mask lanes.
+            ///
+            /// The slice must be exactly the size of the SIMD mask.
+            fn store_slice(&self, slice: &mut [Self::Element]);
+
             #( #methods )*
         }
     }
@@ -286,7 +323,12 @@ fn methods_for_vec_trait(scalar: ScalarType) -> Vec<TokenStream> {
     let mut methods = vec![];
     for op in vec_trait_ops_for(scalar) {
         let doc = op.format_docstring(TyFlavor::VecImpl);
-        if let Some(method_sig) = op.vec_trait_method_sig() {
+        let method_sig = if scalar == ScalarType::Mask && matches!(op.sig, OpSig::Compare) {
+            Some(quote! { fn simd_eq(self, rhs: impl SimdInto<Self, S>) -> Self })
+        } else {
+            op.vec_trait_method_sig()
+        };
+        if let Some(method_sig) = method_sig {
             methods.push(quote! {
                 #[doc = #doc]
                 #method_sig;