Skip to content
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions fearless_simd/src/generated.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
clippy::cast_possible_truncation,
clippy::unseparated_literal_suffix,
clippy::use_self,
clippy::wrong_self_convention,
Comment thread
LaurenzV marked this conversation as resolved.
reason = "TODO: https://github.com/linebender/fearless_simd/issues/40"
)]
#![cfg_attr(
Expand Down
267 changes: 267 additions & 0 deletions fearless_simd/src/generated/avx2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -895,6 +895,26 @@ impl Simd for Avx2 {
unsafe { core::mem::transmute::<__m128i, [i8; 16usize]>(a.val.0) }
}
#[inline(always)]
fn from_bitmask_mask8x16(self, bits: u64) -> mask8x16<Self> {
unsafe {
{
let bit_bytes = _mm_cvtsi32_si128(bits as i32);
let bit_bytes = _mm_shuffle_epi8(
bit_bytes,
_mm_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1),
);
let bit_mask =
_mm_setr_epi8(1, 2, 4, 8, 16, 32, 64, -128, 1, 2, 4, 8, 16, 32, 64, -128);
_mm_cmpeq_epi8(_mm_and_si128(bit_bytes, bit_mask), bit_mask)
}
.simd_into(self)
}
}
#[inline(always)]
fn to_bitmask_mask8x16(self, a: mask8x16<Self>) -> u64 {
unsafe { _mm_movemask_epi8(a.into()) as u32 as u64 }
}
#[inline(always)]
fn and_mask8x16(self, a: mask8x16<Self>, b: mask8x16<Self>) -> mask8x16<Self> {
unsafe { _mm_and_si128(a.into(), b.into()).simd_into(self) }
}
Expand Down Expand Up @@ -1386,6 +1406,26 @@ impl Simd for Avx2 {
unsafe { core::mem::transmute::<__m128i, [i16; 8usize]>(a.val.0) }
}
#[inline(always)]
fn from_bitmask_mask16x8(self, bits: u64) -> mask16x8<Self> {
unsafe {
{
let bit_lanes = _mm_set1_epi16(bits as i16);
let bit_mask = _mm_setr_epi16(1, 2, 4, 8, 16, 32, 64, 128);
_mm_cmpeq_epi16(_mm_and_si128(bit_lanes, bit_mask), bit_mask)
}
.simd_into(self)
}
}
#[inline(always)]
fn to_bitmask_mask16x8(self, a: mask16x8<Self>) -> u64 {
unsafe {
{
let packed = _mm_packs_epi16(a.into(), a.into());
_mm_movemask_epi8(packed) as u8 as u64
}
}
}
#[inline(always)]
fn and_mask16x8(self, a: mask16x8<Self>, b: mask16x8<Self>) -> mask16x8<Self> {
unsafe { _mm_and_si128(a.into(), b.into()).simd_into(self) }
}
Expand Down Expand Up @@ -1887,6 +1927,21 @@ impl Simd for Avx2 {
unsafe { core::mem::transmute::<__m128i, [i32; 4usize]>(a.val.0) }
}
#[inline(always)]
fn from_bitmask_mask32x4(self, bits: u64) -> mask32x4<Self> {
unsafe {
{
let bit_lanes = _mm_set1_epi32(bits as i32);
let bit_mask = _mm_setr_epi32(1, 2, 4, 8);
_mm_cmpeq_epi32(_mm_and_si128(bit_lanes, bit_mask), bit_mask)
}
.simd_into(self)
}
}
#[inline(always)]
fn to_bitmask_mask32x4(self, a: mask32x4<Self>) -> u64 {
unsafe { _mm_movemask_ps(_mm_castsi128_ps(a.into())) as u32 as u64 }
}
#[inline(always)]
fn and_mask32x4(self, a: mask32x4<Self>, b: mask32x4<Self>) -> mask32x4<Self> {
unsafe { _mm_and_si128(a.into(), b.into()).simd_into(self) }
}
Expand Down Expand Up @@ -2189,6 +2244,21 @@ impl Simd for Avx2 {
unsafe { core::mem::transmute::<__m128i, [i64; 2usize]>(a.val.0) }
}
#[inline(always)]
fn from_bitmask_mask64x2(self, bits: u64) -> mask64x2<Self> {
unsafe {
{
let bit_lanes = _mm_set1_epi64x(bits.cast_signed());
let bit_mask = _mm_set_epi64x(2, 1);
_mm_cmpeq_epi64(_mm_and_si128(bit_lanes, bit_mask), bit_mask)
}
.simd_into(self)
}
}
#[inline(always)]
fn to_bitmask_mask64x2(self, a: mask64x2<Self>) -> u64 {
unsafe { _mm_movemask_pd(_mm_castsi128_pd(a.into())) as u32 as u64 }
}
#[inline(always)]
fn and_mask64x2(self, a: mask64x2<Self>, b: mask64x2<Self>) -> mask64x2<Self> {
unsafe { _mm_and_si128(a.into(), b.into()).simd_into(self) }
}
Expand Down Expand Up @@ -3270,6 +3340,31 @@ impl Simd for Avx2 {
unsafe { core::mem::transmute::<__m256i, [i8; 32usize]>(a.val.0) }
}
#[inline(always)]
fn from_bitmask_mask8x32(self, bits: u64) -> mask8x32<Self> {
unsafe {
{
let bit_bytes = _mm256_broadcastsi128_si256(_mm_cvtsi32_si128(bits as i32));
let bit_bytes = _mm256_shuffle_epi8(
bit_bytes,
_mm256_setr_epi8(
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3,
3, 3, 3, 3, 3, 3, 3,
),
);
let bit_mask = _mm256_setr_epi8(
1, 2, 4, 8, 16, 32, 64, -128, 1, 2, 4, 8, 16, 32, 64, -128, 1, 2, 4, 8, 16, 32,
64, -128, 1, 2, 4, 8, 16, 32, 64, -128,
);
_mm256_cmpeq_epi8(_mm256_and_si256(bit_bytes, bit_mask), bit_mask)
}
.simd_into(self)
}
}
#[inline(always)]
fn to_bitmask_mask8x32(self, a: mask8x32<Self>) -> u64 {
unsafe { _mm256_movemask_epi8(a.into()) as u32 as u64 }
}
#[inline(always)]
fn and_mask8x32(self, a: mask8x32<Self>, b: mask8x32<Self>) -> mask8x32<Self> {
unsafe { _mm256_and_si256(a.into(), b.into()).simd_into(self) }
}
Expand Down Expand Up @@ -3963,6 +4058,29 @@ impl Simd for Avx2 {
unsafe { core::mem::transmute::<__m256i, [i16; 16usize]>(a.val.0) }
}
#[inline(always)]
fn from_bitmask_mask16x16(self, bits: u64) -> mask16x16<Self> {
unsafe {
{
let bit_lanes = _mm256_set1_epi16(bits as i16);
let bit_mask = _mm256_setr_epi16(
1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, -32768,
);
_mm256_cmpeq_epi16(_mm256_and_si256(bit_lanes, bit_mask), bit_mask)
}
.simd_into(self)
}
}
#[inline(always)]
fn to_bitmask_mask16x16(self, a: mask16x16<Self>) -> u64 {
unsafe {
{
let halves: [__m128i; 2usize] = core::mem::transmute(a.val.0);
let packed = _mm_packs_epi16(halves[0], halves[1]);
_mm_movemask_epi8(packed) as u32 as u64
}
}
}
#[inline(always)]
fn and_mask16x16(self, a: mask16x16<Self>, b: mask16x16<Self>) -> mask16x16<Self> {
unsafe { _mm256_and_si256(a.into(), b.into()).simd_into(self) }
}
Expand Down Expand Up @@ -4601,6 +4719,21 @@ impl Simd for Avx2 {
unsafe { core::mem::transmute::<__m256i, [i32; 8usize]>(a.val.0) }
}
#[inline(always)]
fn from_bitmask_mask32x8(self, bits: u64) -> mask32x8<Self> {
unsafe {
{
let bit_lanes = _mm256_set1_epi32(bits as i32);
let bit_mask = _mm256_setr_epi32(1, 2, 4, 8, 16, 32, 64, 128);
_mm256_cmpeq_epi32(_mm256_and_si256(bit_lanes, bit_mask), bit_mask)
}
.simd_into(self)
}
}
#[inline(always)]
fn to_bitmask_mask32x8(self, a: mask32x8<Self>) -> u64 {
unsafe { _mm256_movemask_ps(_mm256_castsi256_ps(a.into())) as u32 as u64 }
}
#[inline(always)]
fn and_mask32x8(self, a: mask32x8<Self>, b: mask32x8<Self>) -> mask32x8<Self> {
unsafe { _mm256_and_si256(a.into(), b.into()).simd_into(self) }
}
Expand Down Expand Up @@ -4978,6 +5111,21 @@ impl Simd for Avx2 {
unsafe { core::mem::transmute::<__m256i, [i64; 4usize]>(a.val.0) }
}
#[inline(always)]
fn from_bitmask_mask64x4(self, bits: u64) -> mask64x4<Self> {
unsafe {
{
let bit_lanes = _mm256_set1_epi64x(bits.cast_signed());
let bit_mask = _mm256_set_epi64x(8, 4, 2, 1);
_mm256_cmpeq_epi64(_mm256_and_si256(bit_lanes, bit_mask), bit_mask)
}
.simd_into(self)
}
}
#[inline(always)]
fn to_bitmask_mask64x4(self, a: mask64x4<Self>) -> u64 {
unsafe { _mm256_movemask_pd(_mm256_castsi256_pd(a.into())) as u32 as u64 }
}
#[inline(always)]
fn and_mask64x4(self, a: mask64x4<Self>, b: mask64x4<Self>) -> mask64x4<Self> {
unsafe { _mm256_and_si256(a.into(), b.into()).simd_into(self) }
}
Expand Down Expand Up @@ -6076,6 +6224,50 @@ impl Simd for Avx2 {
unsafe { core::mem::transmute::<[__m256i; 2usize], [i8; 64usize]>(a.val.0) }
}
#[inline(always)]
fn from_bitmask_mask8x64(self, bits: u64) -> mask8x64<Self> {
unsafe {
{
let bit_bytes = _mm256_set1_epi64x(bits.cast_signed());
let bit_mask = _mm256_setr_epi8(
1, 2, 4, 8, 16, 32, 64, -128, 1, 2, 4, 8, 16, 32, 64, -128, 1, 2, 4, 8, 16, 32,
64, -128, 1, 2, 4, 8, 16, 32, 64, -128,
);
mask8x64 {
val: crate::support::Aligned512([
{
let bit_bytes = _mm256_shuffle_epi8(
bit_bytes,
_mm256_setr_epi8(
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
),
);
_mm256_cmpeq_epi8(_mm256_and_si256(bit_bytes, bit_mask), bit_mask)
},
{
let bit_bytes = _mm256_shuffle_epi8(
bit_bytes,
_mm256_setr_epi8(
4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6,
6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7,
),
);
_mm256_cmpeq_epi8(_mm256_and_si256(bit_bytes, bit_mask), bit_mask)
},
]),
simd: self,
}
}
}
}
#[inline(always)]
fn to_bitmask_mask8x64(self, a: mask8x64<Self>) -> u64 {
let (lo, hi) = self.split_mask8x64(a);
let lo = self.to_bitmask_mask8x32(lo);
let hi = self.to_bitmask_mask8x32(hi);
lo | (hi << 32usize)
}
#[inline(always)]
fn and_mask8x64(self, a: mask8x64<Self>, b: mask8x64<Self>) -> mask8x64<Self> {
let (a0, a1) = self.split_mask8x64(a);
let (b0, b1) = self.split_mask8x64(b);
Expand Down Expand Up @@ -6814,6 +7006,24 @@ impl Simd for Avx2 {
unsafe { core::mem::transmute::<[__m256i; 2usize], [i16; 32usize]>(a.val.0) }
}
#[inline(always)]
fn from_bitmask_mask16x32(self, bits: u64) -> mask16x32<Self> {
let lo = self.from_bitmask_mask16x16(bits);
let hi = self.from_bitmask_mask16x16(bits >> 16usize);
self.combine_mask16x16(lo, hi)
}
#[inline(always)]
fn to_bitmask_mask16x32(self, a: mask16x32<Self>) -> u64 {
unsafe {
{
let lo = _mm256_movemask_epi8(a.val.0[0]) as u32;
let hi = _mm256_movemask_epi8(a.val.0[1]) as u32;
let lo = _pext_u32(lo, 0x5555_5555u32) as u64;
let hi = _pext_u32(hi, 0x5555_5555u32) as u64;
lo | (hi << 16usize)
}
}
}
#[inline(always)]
fn and_mask16x32(self, a: mask16x32<Self>, b: mask16x32<Self>) -> mask16x32<Self> {
let (a0, a1) = self.split_mask16x32(a);
let (b0, b1) = self.split_mask16x32(b);
Expand Down Expand Up @@ -7516,6 +7726,35 @@ impl Simd for Avx2 {
unsafe { core::mem::transmute::<[__m256i; 2usize], [i32; 16usize]>(a.val.0) }
}
#[inline(always)]
fn from_bitmask_mask32x16(self, bits: u64) -> mask32x16<Self> {
unsafe {
{
let bit_lanes = _mm256_set1_epi32(bits as i32);
mask32x16 {
val: crate::support::Aligned512([
{
let bit_mask = _mm256_setr_epi32(1, 2, 4, 8, 16, 32, 64, 128);
_mm256_cmpeq_epi32(_mm256_and_si256(bit_lanes, bit_mask), bit_mask)
},
{
let bit_mask =
_mm256_setr_epi32(256, 512, 1024, 2048, 4096, 8192, 16384, 32768);
_mm256_cmpeq_epi32(_mm256_and_si256(bit_lanes, bit_mask), bit_mask)
},
]),
simd: self,
}
}
}
}
#[inline(always)]
fn to_bitmask_mask32x16(self, a: mask32x16<Self>) -> u64 {
let (lo, hi) = self.split_mask32x16(a);
let lo = self.to_bitmask_mask32x8(lo);
let hi = self.to_bitmask_mask32x8(hi);
lo | (hi << 8usize)
}
#[inline(always)]
fn and_mask32x16(self, a: mask32x16<Self>, b: mask32x16<Self>) -> mask32x16<Self> {
let (a0, a1) = self.split_mask32x16(a);
let (b0, b1) = self.split_mask32x16(b);
Expand Down Expand Up @@ -7929,6 +8168,34 @@ impl Simd for Avx2 {
unsafe { core::mem::transmute::<[__m256i; 2usize], [i64; 8usize]>(a.val.0) }
}
#[inline(always)]
fn from_bitmask_mask64x8(self, bits: u64) -> mask64x8<Self> {
unsafe {
{
let bit_lanes = _mm256_set1_epi64x(bits.cast_signed());
mask64x8 {
val: crate::support::Aligned512([
{
let bit_mask = _mm256_set_epi64x(8, 4, 2, 1);
_mm256_cmpeq_epi64(_mm256_and_si256(bit_lanes, bit_mask), bit_mask)
},
{
let bit_mask = _mm256_set_epi64x(128, 64, 32, 16);
_mm256_cmpeq_epi64(_mm256_and_si256(bit_lanes, bit_mask), bit_mask)
},
]),
simd: self,
}
}
}
}
#[inline(always)]
fn to_bitmask_mask64x8(self, a: mask64x8<Self>) -> u64 {
let (lo, hi) = self.split_mask64x8(a);
let lo = self.to_bitmask_mask64x4(lo);
let hi = self.to_bitmask_mask64x4(hi);
lo | (hi << 4usize)
}
#[inline(always)]
fn and_mask64x8(self, a: mask64x8<Self>, b: mask64x8<Self>) -> mask64x8<Self> {
let (a0, a1) = self.split_mask64x8(a);
let (b0, b1) = self.split_mask64x8(b);
Expand Down
Loading
Loading