Skip to content

Commit d7ae91f

Browse files
authored
Add SSE2/AVX2/WASM SIMD support (#86)
1 parent 61550cd commit d7ae91f

9 files changed

Lines changed: 670 additions & 152 deletions

File tree

.github/workflows/rust.yml

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,11 @@ jobs:
1818
strategy:
1919
matrix:
2020
rust: [1.56.0, stable, nightly]
21-
21+
features: ["+avx2", "+sse2", "-avx2,-sse2"]
22+
env:
23+
RUSTCFLAGS: "-C target-features={{matrix.features}}"
2224
steps:
23-
- uses: actions/checkout@v2
25+
- uses: actions/checkout@v4
2426
- uses: dtolnay/rust-toolchain@stable
2527
with:
2628
profile: minimal
@@ -43,7 +45,7 @@ jobs:
4345
rust: [stable]
4446

4547
steps:
46-
- uses: actions/checkout@v2
48+
- uses: actions/checkout@v4
4749
- uses: dtolnay/rust-toolchain@stable
4850
with:
4951
profile: minimal
@@ -64,7 +66,7 @@ jobs:
6466
rust: [stable]
6567

6668
steps:
67-
- uses: actions/checkout@v2
69+
- uses: actions/checkout@v4
6870
- uses: dtolnay/rust-toolchain@stable
6971
with:
7072
profile: minimal
@@ -85,7 +87,7 @@ jobs:
8587
rust: [stable]
8688

8789
steps:
88-
- uses: actions/checkout@v2
90+
- uses: actions/checkout@v4
8991
- uses: dtolnay/rust-toolchain@stable
9092
with:
9193
profile: minimal
@@ -95,4 +97,16 @@ jobs:
9597
- name: Run Clippy
9698
run: |
9799
cd benches
98-
cargo bench --bench benches --no-run
100+
cargo bench --bench benches --no-run
101+
102+
build-wasm:
103+
runs-on: ubuntu-latest
104+
timeout-minutes: 30
105+
needs: build
106+
steps:
107+
- uses: actions/checkout@v4
108+
- uses: dtolnay/rust-toolchain@stable
109+
with:
110+
target: wasm32-unknown-unknown
111+
- name: Check wasm
112+
run: cargo check --target wasm32-unknown-unknown

src/block/avx2.rs

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
#[cfg(target_arch = "x86")]
2+
use core::arch::x86::*;
3+
#[cfg(target_arch = "x86_64")]
4+
use core::arch::x86_64::*;
5+
use core::{
6+
cmp::Ordering,
7+
hash::{Hash, Hasher},
8+
iter::Iterator,
9+
ops::{BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXor, BitXorAssign, Not},
10+
};
11+
12+
#[derive(Copy, Clone, Debug)]
13+
#[repr(transparent)]
14+
pub struct Block(__m256i);
15+
16+
impl Block {
17+
pub const USIZE_COUNT: usize = core::mem::size_of::<Self>() / core::mem::size_of::<usize>();
18+
pub const NONE: Self = Self::from_usize_array([0; Self::USIZE_COUNT]);
19+
pub const ALL: Self = Self::from_usize_array([core::usize::MAX; Self::USIZE_COUNT]);
20+
pub const BITS: usize = core::mem::size_of::<Self>() * 8;
21+
22+
#[inline]
23+
pub fn into_usize_array(self) -> [usize; Self::USIZE_COUNT] {
24+
unsafe { core::mem::transmute(self.0) }
25+
}
26+
27+
#[inline]
28+
pub const fn from_usize_array(array: [usize; Self::USIZE_COUNT]) -> Self {
29+
Self(unsafe { core::mem::transmute(array) })
30+
}
31+
32+
#[inline]
33+
pub fn is_empty(self) -> bool {
34+
unsafe { _mm256_testz_si256(self.0, self.0) == 1 }
35+
}
36+
37+
#[inline]
38+
pub fn andnot(self, other: Self) -> Self {
39+
Self(unsafe { _mm256_andnot_si256(other.0, self.0) })
40+
}
41+
}
42+
43+
impl Not for Block {
44+
type Output = Block;
45+
#[inline]
46+
fn not(self) -> Self::Output {
47+
unsafe { Self(_mm256_xor_si256(self.0, Self::ALL.0)) }
48+
}
49+
}
50+
51+
impl BitAnd for Block {
52+
type Output = Block;
53+
#[inline]
54+
fn bitand(self, other: Self) -> Self::Output {
55+
unsafe { Self(_mm256_and_si256(self.0, other.0)) }
56+
}
57+
}
58+
59+
impl BitAndAssign for Block {
60+
#[inline]
61+
fn bitand_assign(&mut self, other: Self) {
62+
unsafe {
63+
self.0 = _mm256_and_si256(self.0, other.0);
64+
}
65+
}
66+
}
67+
68+
impl BitOr for Block {
69+
type Output = Block;
70+
#[inline]
71+
fn bitor(self, other: Self) -> Self::Output {
72+
unsafe { Self(_mm256_or_si256(self.0, other.0)) }
73+
}
74+
}
75+
76+
impl BitOrAssign for Block {
77+
#[inline]
78+
fn bitor_assign(&mut self, other: Self) {
79+
unsafe {
80+
self.0 = _mm256_or_si256(self.0, other.0);
81+
}
82+
}
83+
}
84+
85+
impl BitXor for Block {
86+
type Output = Block;
87+
#[inline]
88+
fn bitxor(self, other: Self) -> Self::Output {
89+
unsafe { Self(_mm256_xor_si256(self.0, other.0)) }
90+
}
91+
}
92+
93+
impl BitXorAssign for Block {
94+
#[inline]
95+
fn bitxor_assign(&mut self, other: Self) {
96+
unsafe { self.0 = _mm256_xor_si256(self.0, other.0) }
97+
}
98+
}
99+
100+
impl PartialEq for Block {
101+
#[inline]
102+
fn eq(&self, other: &Self) -> bool {
103+
unsafe {
104+
let eq = _mm256_cmpeq_epi8(self.0, other.0);
105+
_mm256_movemask_epi8(eq) == !(0i32)
106+
}
107+
}
108+
}

src/block/default.rs

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
use core::iter::Iterator;
2+
use core::ops::{BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXor, BitXorAssign, Not};
3+
4+
#[derive(Copy, Clone, PartialEq, Debug)]
5+
#[repr(transparent)]
6+
pub struct Block(usize);
7+
8+
impl Block {
9+
pub const USIZE_COUNT: usize = 1;
10+
pub const NONE: Self = Block(0);
11+
pub const ALL: Self = Block(!0);
12+
pub const BITS: usize = core::mem::size_of::<Self>() * 8;
13+
14+
#[inline]
15+
pub const fn is_empty(self) -> bool {
16+
self.0 == Self::NONE.0
17+
}
18+
19+
#[inline]
20+
pub fn andnot(self, other: Self) -> Self {
21+
Self(!other.0 & self.0)
22+
}
23+
}
24+
25+
impl Not for Block {
26+
type Output = Block;
27+
#[inline]
28+
fn not(self) -> Self::Output {
29+
Self(self.0.not())
30+
}
31+
}
32+
33+
impl BitAnd for Block {
34+
type Output = Block;
35+
#[inline]
36+
fn bitand(self, other: Self) -> Self::Output {
37+
Self(self.0.bitand(other.0))
38+
}
39+
}
40+
41+
impl BitAndAssign for Block {
42+
#[inline]
43+
fn bitand_assign(&mut self, other: Self) {
44+
self.0.bitand_assign(other.0);
45+
}
46+
}
47+
48+
impl BitOr for Block {
49+
type Output = Block;
50+
#[inline]
51+
fn bitor(self, other: Self) -> Self::Output {
52+
Self(self.0.bitor(other.0))
53+
}
54+
}
55+
56+
impl BitOrAssign for Block {
57+
#[inline]
58+
fn bitor_assign(&mut self, other: Self) {
59+
self.0.bitor_assign(other.0)
60+
}
61+
}
62+
63+
impl BitXor for Block {
64+
type Output = Block;
65+
#[inline]
66+
fn bitxor(self, other: Self) -> Self::Output {
67+
Self(self.0.bitxor(other.0))
68+
}
69+
}
70+
71+
impl BitXorAssign for Block {
72+
#[inline]
73+
fn bitxor_assign(&mut self, other: Self) {
74+
self.0.bitxor_assign(other.0)
75+
}
76+
}

src/block/mod.rs

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
use core::cmp::Ordering;
2+
use core::hash::{Hash, Hasher};
3+
4+
#[cfg(all(
5+
not(target_arch = "wasm32"),
6+
not(target_feature = "sse2"),
7+
not(target_feature = "avx2"),
8+
))]
9+
mod default;
10+
#[cfg(all(
11+
not(target_arch = "wasm32"),
12+
not(target_feature = "sse2"),
13+
not(target_feature = "avx2"),
14+
))]
15+
pub use self::default::*;
16+
17+
#[cfg(all(
18+
not(target_arch = "wasm32"),
19+
target_feature = "sse2",
20+
not(target_feature = "avx2"),
21+
))]
22+
mod sse2;
23+
#[cfg(all(
24+
not(target_arch = "wasm32"),
25+
target_feature = "sse2",
26+
not(target_feature = "avx2"),
27+
))]
28+
pub use self::sse2::*;
29+
30+
#[cfg(all(not(target_arch = "wasm32"), target_feature = "avx2",))]
31+
mod avx2;
32+
#[cfg(all(not(target_arch = "wasm32"), target_feature = "avx2",))]
33+
pub use self::avx2::*;
34+
35+
#[cfg(target_arch = "wasm32")]
36+
mod wasm32;
37+
#[cfg(target_arch = "wasm32")]
38+
pub use self::wasm32::*;
39+
40+
impl Eq for Block {}
41+
42+
impl PartialOrd for Block {
43+
#[inline]
44+
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
45+
Some(self.cmp(other))
46+
}
47+
}
48+
49+
impl Ord for Block {
50+
#[inline]
51+
fn cmp(&self, other: &Self) -> Ordering {
52+
let a = self.into_usize_array();
53+
let b = other.into_usize_array();
54+
for i in 0..Self::USIZE_COUNT {
55+
match a[i].cmp(&b[i]) {
56+
Ordering::Equal => continue,
57+
cmp => return cmp,
58+
}
59+
}
60+
Ordering::Equal
61+
}
62+
}
63+
64+
impl Default for Block {
65+
#[inline]
66+
fn default() -> Self {
67+
Self::NONE
68+
}
69+
}
70+
71+
impl Hash for Block {
72+
#[inline]
73+
fn hash<H: Hasher>(&self, hasher: &mut H) {
74+
self.into_usize_array().hash(hasher)
75+
}
76+
}

0 commit comments

Comments
 (0)