|
| 1 | +using System.Numerics; |
1 | 2 | using System.Runtime.Intrinsics; |
2 | 3 | using System.Runtime.Intrinsics.X86; |
3 | 4 |
|
@@ -93,6 +94,154 @@ [Benchmark]public void K16LookupSimd256() |
93 | 94 | K16SearchSimd256(_k16Keys, _k16Children, Random.Shared.Next(1, _data.Length)); |
94 | 95 | } |
95 | 96 |
|
| 97 | + [Benchmark]public void CompressedBloom() |
| 98 | + { |
| 99 | + for (int i = 0; i < 10000; i++) |
| 100 | + CompressedBloomFilterStructure_Int32_100.Contains(Random.Shared.Next(1, _data.Length)); |
| 101 | + } |
| 102 | + |
| 103 | + [Benchmark]public void EliasFanoEncoded() |
| 104 | + { |
| 105 | + for (int i = 0; i < 10000; i++) |
| 106 | + EliasFanoSetStructure_Int32_100.Contains(Random.Shared.Next(1, _data.Length)); |
| 107 | + } |
| 108 | + |
| 109 | + private static class EliasFanoSetStructure_Int32_100 |
| 110 | + { |
| 111 | + private const ulong _efMinValue = 2147483648ul; |
| 112 | + private const ulong _efMaxValue = 2147483747ul; |
| 113 | + private const int _efLowerBitCount = 0; |
| 114 | + private const int _efItemCount = 100; |
| 115 | + private static readonly ulong[] _efHighBits = |
| 116 | + [ |
| 117 | + 6148914691236517205ul, 6148914691236517205ul, 6148914691236517205ul, 85ul |
| 118 | + ]; |
| 119 | + |
| 120 | + public static bool Contains(int key) |
| 121 | + { |
| 122 | + ulong mapped = (uint)(key ^ int.MinValue); |
| 123 | + |
| 124 | + if (mapped is < _efMinValue or > _efMaxValue) |
| 125 | + return false; |
| 126 | + |
| 127 | + ulong target = mapped - _efMinValue; |
| 128 | + int low = 0; |
| 129 | + int high = _efItemCount - 1; |
| 130 | + |
| 131 | + while (low <= high) |
| 132 | + { |
| 133 | + int mid = low + ((high - low) >> 1); |
| 134 | + ulong value = GetValueAt(mid); |
| 135 | + |
| 136 | + if (value == target) |
| 137 | + return true; |
| 138 | + |
| 139 | + if (value < target) |
| 140 | + low = mid + 1; |
| 141 | + else |
| 142 | + high = mid - 1; |
| 143 | + } |
| 144 | + |
| 145 | + return false; |
| 146 | + } |
| 147 | + |
| 148 | + private static ulong GetValueAt(int index) |
| 149 | + { |
| 150 | + int selectedBit = Select1(index); |
| 151 | + ulong high = (ulong)(selectedBit - index); |
| 152 | + return (high << _efLowerBitCount) | 0UL; |
| 153 | + } |
| 154 | + |
| 155 | + private static int Select1(int index) |
| 156 | + { |
| 157 | + int remaining = index; |
| 158 | + |
| 159 | + for (int i = 0; i < _efHighBits.Length; i++) |
| 160 | + { |
| 161 | + ulong word = _efHighBits[i]; |
| 162 | + int count = CountBits(word); |
| 163 | + |
| 164 | + if (remaining < count) |
| 165 | + return (i << 6) + SelectInWord(word, remaining); |
| 166 | + |
| 167 | + remaining -= count; |
| 168 | + } |
| 169 | + |
| 170 | + throw new InvalidOperationException("Elias-Fano select out of range."); |
| 171 | + } |
| 172 | + |
| 173 | + private static int SelectInWord(ulong word, int rank) |
| 174 | + { |
| 175 | + for (int bit = 0; bit < 64; bit++) |
| 176 | + { |
| 177 | + if (((word >> bit) & 1UL) == 0) |
| 178 | + continue; |
| 179 | + |
| 180 | + if (rank == 0) |
| 181 | + return bit; |
| 182 | + |
| 183 | + rank--; |
| 184 | + } |
| 185 | + |
| 186 | + throw new InvalidOperationException("Elias-Fano word rank out of range."); |
| 187 | + } |
| 188 | + |
| 189 | + private static int CountBits(ulong value) |
| 190 | + { |
| 191 | + value -= (value >> 1) & 0x5555555555555555UL; |
| 192 | + value = (value & 0x3333333333333333UL) + ((value >> 2) & 0x3333333333333333UL); |
| 193 | + value = (value + (value >> 4)) & 0x0F0F0F0F0F0F0F0FUL; |
| 194 | + return (int)((value * 0x0101010101010101UL) >> 56); |
| 195 | + } |
| 196 | + } |
| 197 | + |
| 198 | + private static class CompressedBloomFilterStructure_Int32_100 |
| 199 | + { |
| 200 | + private static readonly int[] _compressedIndices = |
| 201 | + [ |
| 202 | + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, |
| 203 | + 10, 11, 12, 13, 14, 15 |
| 204 | + ]; |
| 205 | + |
| 206 | + private static readonly ulong[] _compressedWords = |
| 207 | + [ |
| 208 | + 281479271743489ul, 562958543486979ul, 1125917086973957ul, 2251834173947913ul, 4503668347895825ul, 9007336695791649ul, 18014673391583297ul, 36029346783166593ul, 72058693566333185ul, 144117387132666369ul, |
| 209 | + 288234774265332737ul, 576469548530665473ul, 1152939097061330945ul, 2305878194122661889ul, 4611756388245323777ul, 9223512776490647553ul |
| 210 | + ]; |
| 211 | + |
| 212 | + public static bool Contains(int key) |
| 213 | + { |
| 214 | + ulong hash = (ulong)key; |
| 215 | + uint index = (uint)(hash & 15); |
| 216 | + int target = (int)index; |
| 217 | + int low = 0; |
| 218 | + int high = _compressedIndices.Length - 1; |
| 219 | + ulong word = 0; |
| 220 | + |
| 221 | + while (low <= high) |
| 222 | + { |
| 223 | + int mid = low + ((high - low) >> 1); |
| 224 | + int value = _compressedIndices[mid]; |
| 225 | + |
| 226 | + if (value == target) |
| 227 | + { |
| 228 | + word = _compressedWords[mid]; |
| 229 | + break; |
| 230 | + } |
| 231 | + |
| 232 | + if (value < target) |
| 233 | + low = mid + 1; |
| 234 | + else |
| 235 | + high = mid - 1; |
| 236 | + } |
| 237 | + |
| 238 | + uint shift1 = (uint)(hash & 63UL); |
| 239 | + uint shift2 = (uint)((hash >> 8) & 63UL); |
| 240 | + ulong mask = (1UL << (int)shift1) | (1UL << (int)shift2); |
| 241 | + return (word & mask) == mask; |
| 242 | + } |
| 243 | + } |
| 244 | + |
96 | 245 | private static int[] BuildEytzinger(int[] sorted) |
97 | 246 | { |
98 | 247 | int[] eytzinger = new int[sorted.Length]; |
@@ -442,10 +591,10 @@ private static int K16SearchSimd128(int[] keys, int[] children, int value) |
442 | 591 | uint mask2 = (uint)Sse2.MoveMask(gt2.AsByte()); |
443 | 592 | uint mask3 = (uint)Sse2.MoveMask(gt3.AsByte()); |
444 | 593 |
|
445 | | - int childSlot = (System.Numerics.BitOperations.PopCount(mask0) |
446 | | - + System.Numerics.BitOperations.PopCount(mask1) |
447 | | - + System.Numerics.BitOperations.PopCount(mask2) |
448 | | - + System.Numerics.BitOperations.PopCount(mask3)) / 4; |
| 594 | + int childSlot = (BitOperations.PopCount(mask0) |
| 595 | + + BitOperations.PopCount(mask1) |
| 596 | + + BitOperations.PopCount(mask2) |
| 597 | + + BitOperations.PopCount(mask3)) / 4; |
449 | 598 |
|
450 | 599 | node = children[childBase + childSlot]; |
451 | 600 | } |
@@ -482,8 +631,8 @@ private static int K16SearchSimd256(int[] keys, int[] children, int value) |
482 | 631 | uint mask0 = (uint)Avx2.MoveMask(gt0.AsByte()); |
483 | 632 | uint mask1 = (uint)Avx2.MoveMask(gt1.AsByte()); |
484 | 633 |
|
485 | | - int childSlot = (System.Numerics.BitOperations.PopCount(mask0) |
486 | | - + System.Numerics.BitOperations.PopCount(mask1)) / 4; |
| 634 | + int childSlot = (BitOperations.PopCount(mask0) |
| 635 | + + BitOperations.PopCount(mask1)) / 4; |
487 | 636 |
|
488 | 637 | node = children[childBase + childSlot]; |
489 | 638 | } |
|
0 commit comments