Skip to content

Commit 2937449

Browse files
authored
Add support for AVX (#121)
1 parent e820380 commit 2937449

8 files changed

Lines changed: 155 additions & 92 deletions

File tree

.github/workflows/rust.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ jobs:
1717
strategy:
1818
matrix:
1919
rust: [stable, nightly]
20-
features: ["+avx2", "+sse2"]
20+
features: ["+avx2", "+avx", "+sse2,+sse4.1", "+sse2"]
2121
env:
2222
RUSTFLAGS: "-C target-feature=${{matrix.features}} -D warnings"
2323
steps:

src/block/avx.rs

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
#[cfg(target_arch = "x86")]
2+
use core::arch::x86::*;
3+
#[cfg(target_arch = "x86_64")]
4+
use core::arch::x86_64::*;
5+
use core::ops::{BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXor, BitXorAssign, Not};
6+
7+
#[derive(Copy, Clone, Debug)]
8+
#[repr(transparent)]
9+
pub struct Block(pub(super) __m256d);
10+
11+
impl Block {
12+
#[inline]
13+
pub fn is_empty(self) -> bool {
14+
unsafe {
15+
let value = core::mem::transmute(self);
16+
_mm256_testz_si256(value, value) == 1
17+
}
18+
}
19+
20+
#[inline]
21+
pub fn andnot(self, other: Self) -> Self {
22+
unsafe { Self(_mm256_andnot_pd(other.0, self.0)) }
23+
}
24+
}
25+
26+
impl Not for Block {
27+
type Output = Block;
28+
#[inline]
29+
fn not(self) -> Self::Output {
30+
unsafe { Self(_mm256_xor_pd(self.0, Self::ALL.0)) }
31+
}
32+
}
33+
34+
impl BitAnd for Block {
35+
type Output = Block;
36+
#[inline]
37+
fn bitand(self, other: Self) -> Self::Output {
38+
unsafe { Self(_mm256_and_pd(self.0, other.0)) }
39+
}
40+
}
41+
42+
impl BitAndAssign for Block {
43+
#[inline]
44+
fn bitand_assign(&mut self, other: Self) {
45+
unsafe {
46+
self.0 = _mm256_and_pd(self.0, other.0);
47+
}
48+
}
49+
}
50+
51+
impl BitOr for Block {
52+
type Output = Block;
53+
#[inline]
54+
fn bitor(self, other: Self) -> Self::Output {
55+
unsafe { Self(_mm256_or_pd(self.0, other.0)) }
56+
}
57+
}
58+
59+
impl BitOrAssign for Block {
60+
#[inline]
61+
fn bitor_assign(&mut self, other: Self) {
62+
unsafe {
63+
self.0 = _mm256_or_pd(self.0, other.0);
64+
}
65+
}
66+
}
67+
68+
impl BitXor for Block {
69+
type Output = Block;
70+
#[inline]
71+
fn bitxor(self, other: Self) -> Self::Output {
72+
unsafe { Self(_mm256_xor_pd(self.0, other.0)) }
73+
}
74+
}
75+
76+
impl BitXorAssign for Block {
77+
#[inline]
78+
fn bitxor_assign(&mut self, other: Self) {
79+
unsafe { self.0 = _mm256_xor_pd(self.0, other.0) }
80+
}
81+
}
82+
83+
impl PartialEq for Block {
84+
#[inline]
85+
fn eq(&self, other: &Self) -> bool {
86+
unsafe {
87+
let new = _mm256_xor_pd(self.0, other.0);
88+
let neq = core::mem::transmute(new);
89+
_mm256_testz_si256(neq, neq) == 1
90+
}
91+
}
92+
}

src/block/avx2.rs

Lines changed: 3 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -6,24 +6,9 @@ use core::ops::{BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXor, BitXorAssign,
66

77
#[derive(Copy, Clone, Debug)]
88
#[repr(transparent)]
9-
pub struct Block(__m256i);
9+
pub struct Block(pub(super) __m256i);
1010

1111
impl Block {
12-
pub const USIZE_COUNT: usize = core::mem::size_of::<Self>() / core::mem::size_of::<usize>();
13-
pub const NONE: Self = Self::from_usize_array([0; Self::USIZE_COUNT]);
14-
pub const ALL: Self = Self::from_usize_array([core::usize::MAX; Self::USIZE_COUNT]);
15-
pub const BITS: usize = core::mem::size_of::<Self>() * 8;
16-
17-
#[inline]
18-
pub fn into_usize_array(self) -> [usize; Self::USIZE_COUNT] {
19-
unsafe { core::mem::transmute(self.0) }
20-
}
21-
22-
#[inline]
23-
pub const fn from_usize_array(array: [usize; Self::USIZE_COUNT]) -> Self {
24-
Self(unsafe { core::mem::transmute(array) })
25-
}
26-
2712
#[inline]
2813
pub fn is_empty(self) -> bool {
2914
unsafe { _mm256_testz_si256(self.0, self.0) == 1 }
@@ -96,8 +81,8 @@ impl PartialEq for Block {
9681
#[inline]
9782
fn eq(&self, other: &Self) -> bool {
9883
unsafe {
99-
let eq = _mm256_cmpeq_epi8(self.0, other.0);
100-
_mm256_movemask_epi8(eq) == !(0i32)
84+
let neq = _mm256_xor_si256(self.0, other.0);
85+
_mm256_testz_si256(neq, neq) == 1
10186
}
10287
}
10388
}

src/block/default.rs

Lines changed: 1 addition & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2,26 +2,9 @@ use core::ops::{BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXor, BitXorAssign,
22

33
#[derive(Copy, Clone, PartialEq, Debug)]
44
#[repr(transparent)]
5-
pub struct Block(usize);
5+
pub struct Block(pub(super) usize);
66

77
impl Block {
8-
pub const USIZE_COUNT: usize = 1;
9-
pub const NONE: Self = Block(0);
10-
#[allow(dead_code)]
11-
pub const ALL: Self = Block(!0);
12-
pub const BITS: usize = core::mem::size_of::<Self>() * 8;
13-
14-
#[inline]
15-
pub fn into_usize_array(self) -> [usize; Self::USIZE_COUNT] {
16-
[self.0]
17-
}
18-
19-
#[inline]
20-
#[allow(dead_code)]
21-
pub const fn from_usize_array(array: [usize; Self::USIZE_COUNT]) -> Self {
22-
Self(array[0])
23-
}
24-
258
#[inline]
269
pub const fn is_empty(self) -> bool {
2710
self.0 == Self::NONE.0

src/block/mod.rs

Lines changed: 53 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,43 +1,84 @@
11
#![allow(clippy::undocumented_unsafe_blocks)]
2+
#![allow(dead_code)]
23

34
use core::cmp::Ordering;
45
use core::hash::{Hash, Hasher};
56

67
#[cfg(all(
7-
not(target_arch = "wasm32"),
8+
not(all(target_family = "wasm", target_feature = "simd128")),
89
not(target_feature = "sse2"),
10+
not(target_feature = "avx"),
911
not(target_feature = "avx2"),
1012
))]
1113
mod default;
1214
#[cfg(all(
13-
not(target_arch = "wasm32"),
15+
not(all(target_family = "wasm", target_feature = "simd128")),
1416
not(target_feature = "sse2"),
17+
not(target_feature = "avx"),
1518
not(target_feature = "avx2"),
1619
))]
1720
pub use self::default::*;
1821

1922
#[cfg(all(
20-
not(target_arch = "wasm32"),
23+
any(target_arch = "x86", target_arch = "x86_64"),
2124
target_feature = "sse2",
25+
not(target_feature = "avx"),
2226
not(target_feature = "avx2"),
2327
))]
2428
mod sse2;
2529
#[cfg(all(
26-
not(target_arch = "wasm32"),
30+
any(target_arch = "x86", target_arch = "x86_64"),
2731
target_feature = "sse2",
32+
not(target_feature = "avx"),
2833
not(target_feature = "avx2"),
2934
))]
3035
pub use self::sse2::*;
3136

32-
#[cfg(all(not(target_arch = "wasm32"), target_feature = "avx2",))]
37+
#[cfg(all(
38+
any(target_arch = "x86", target_arch = "x86_64"),
39+
target_feature = "avx",
40+
not(target_feature = "avx2")
41+
))]
42+
mod avx;
43+
#[cfg(all(
44+
any(target_arch = "x86", target_arch = "x86_64"),
45+
target_feature = "avx",
46+
not(target_feature = "avx2")
47+
))]
48+
pub use self::avx::*;
49+
50+
#[cfg(all(
51+
any(target_arch = "x86", target_arch = "x86_64"),
52+
target_feature = "avx2"
53+
))]
3354
mod avx2;
34-
#[cfg(all(not(target_arch = "wasm32"), target_feature = "avx2",))]
55+
#[cfg(all(
56+
any(target_arch = "x86", target_arch = "x86_64"),
57+
target_feature = "avx2"
58+
))]
3559
pub use self::avx2::*;
3660

37-
#[cfg(target_arch = "wasm32")]
38-
mod wasm32;
39-
#[cfg(target_arch = "wasm32")]
40-
pub use self::wasm32::*;
61+
#[cfg(all(target_family = "wasm", target_feature = "simd128"))]
62+
mod wasm;
63+
#[cfg(all(target_arch = "wasm", target_feature = "simd128"))]
64+
pub use self::wasm::*;
65+
66+
impl Block {
67+
pub const USIZE_COUNT: usize = core::mem::size_of::<Self>() / core::mem::size_of::<usize>();
68+
pub const NONE: Self = Self::from_usize_array([0; Self::USIZE_COUNT]);
69+
pub const ALL: Self = Self::from_usize_array([core::usize::MAX; Self::USIZE_COUNT]);
70+
pub const BITS: usize = core::mem::size_of::<Self>() * 8;
71+
72+
#[inline]
73+
pub fn into_usize_array(self) -> [usize; Self::USIZE_COUNT] {
74+
unsafe { core::mem::transmute(self.0) }
75+
}
76+
77+
#[inline]
78+
pub const fn from_usize_array(array: [usize; Self::USIZE_COUNT]) -> Self {
79+
Self(unsafe { core::mem::transmute(array) })
80+
}
81+
}
4182

4283
impl Eq for Block {}
4384

@@ -51,15 +92,7 @@ impl PartialOrd for Block {
5192
impl Ord for Block {
5293
#[inline]
5394
fn cmp(&self, other: &Self) -> Ordering {
54-
let a = self.into_usize_array();
55-
let b = other.into_usize_array();
56-
for i in 0..Self::USIZE_COUNT {
57-
match a[i].cmp(&b[i]) {
58-
Ordering::Equal => continue,
59-
cmp => return cmp,
60-
}
61-
}
62-
Ordering::Equal
95+
self.into_usize_array().cmp(&other.into_usize_array())
6396
}
6497
}
6598

@@ -73,6 +106,6 @@ impl Default for Block {
73106
impl Hash for Block {
74107
#[inline]
75108
fn hash<H: Hasher>(&self, hasher: &mut H) {
76-
self.into_usize_array().hash(hasher)
109+
Hash::hash_slice(&self.into_usize_array(), hasher);
77110
}
78111
}

src/block/sse2.rs

Lines changed: 2 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -8,24 +8,9 @@ use core::ops::{BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXor, BitXorAssign,
88

99
#[derive(Copy, Clone, Debug)]
1010
#[repr(transparent)]
11-
pub struct Block(__m128i);
11+
pub struct Block(pub(super) __m128i);
1212

1313
impl Block {
14-
pub const USIZE_COUNT: usize = core::mem::size_of::<Self>() / core::mem::size_of::<usize>();
15-
pub const NONE: Self = Self::from_usize_array([0; Self::USIZE_COUNT]);
16-
pub const ALL: Self = Self::from_usize_array([core::usize::MAX; Self::USIZE_COUNT]);
17-
pub const BITS: usize = core::mem::size_of::<Self>() * 8;
18-
19-
#[inline]
20-
pub fn into_usize_array(self) -> [usize; Self::USIZE_COUNT] {
21-
unsafe { core::mem::transmute(self.0) }
22-
}
23-
24-
#[inline]
25-
pub const fn from_usize_array(array: [usize; Self::USIZE_COUNT]) -> Self {
26-
Self(unsafe { core::mem::transmute(array) })
27-
}
28-
2914
#[inline]
3015
pub fn is_empty(self) -> bool {
3116
#[cfg(not(target_feature = "sse4.1"))]
@@ -34,7 +19,7 @@ impl Block {
3419
}
3520
#[cfg(target_feature = "sse4.1")]
3621
{
37-
unsafe { _mm_test_all_zeros(self.0, Self::ALL.0) == 1 }
22+
unsafe { _mm_test_all_zeros(self.0, self.0) == 1 }
3823
}
3924
}
4025

Lines changed: 2 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -8,32 +8,17 @@ use core::{
88

99
#[derive(Copy, Clone, Debug)]
1010
#[repr(transparent)]
11-
pub struct Block(v128);
11+
pub struct Block(pub(super) v128);
1212

1313
impl Block {
14-
pub const USIZE_COUNT: usize = core::mem::size_of::<Self>() / core::mem::size_of::<usize>();
15-
pub const NONE: Self = Self::from_usize_array([0; Self::USIZE_COUNT]);
16-
pub const ALL: Self = Self::from_usize_array([core::usize::MAX; Self::USIZE_COUNT]);
17-
pub const BITS: usize = core::mem::size_of::<Self>() * 8;
18-
19-
#[inline]
20-
pub fn into_usize_array(self) -> [usize; Self::USIZE_COUNT] {
21-
unsafe { core::mem::transmute(self.0) }
22-
}
23-
24-
#[inline]
25-
pub const fn from_usize_array(array: [usize; Self::USIZE_COUNT]) -> Self {
26-
Self(unsafe { core::mem::transmute(array) })
27-
}
28-
2914
#[inline]
3015
pub fn is_empty(self) -> bool {
3116
!v128_any_true(self.0)
3217
}
3318

3419
#[inline]
3520
pub fn andnot(self, other: Self) -> Self {
36-
Self(unsafe { v128_andnot(self.0, other.0) })
21+
Self(v128_andnot(self.0, other.0))
3722
}
3823
}
3924

src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
//! When SIMD is not available on the target, the crate will gracefully fallback to a default implementation. It is intended to add support for other SIMD architectures
1313
//! once they appear in stable Rust.
1414
//!
15-
//! Currently only SSE2/AVX2 on x86/x86_64 and wasm32 SIMD are supported as this is what stable Rust supports.
15+
//! Currently only SSE2/AVX/AVX2 on x86/x86_64 and wasm32 SIMD are supported as this is what stable Rust supports.
1616
#![no_std]
1717
#![deny(clippy::undocumented_unsafe_blocks)]
1818

0 commit comments

Comments
 (0)