Skip to content

Commit 9e4013e

Browse files
committed
Add rrrvector experiment
1 parent fd9e60e commit 9e4013e

15 files changed

Lines changed: 939 additions & 25 deletions

File tree

Lines changed: 251 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,251 @@
1+
using System.Runtime.CompilerServices;
2+
3+
namespace Genbox.FastData.Benchmarks.Benchmarks;
4+
5+
public class VectorBenchmarks
6+
{
7+
[Benchmark]public bool Rrr() => RrrBitVectorStructure_Int32_1000.Contains(Random.Shared.Next(0, 1000));
8+
[Benchmark]public bool Ef() => EliasFanoStructure_Int32_1000.Contains(Random.Shared.Next(0, 1000));
9+
10+
private static class RrrBitVectorStructure_Int32_1000
11+
{
12+
private const ulong _rrrMinValue = 2147483648ul;
13+
private const ulong _rrrMaxValue = 2147484647ul;
14+
private const int _rrrBlockSize = 15;
15+
private static readonly byte[] _rrrClasses = new byte[]
16+
{
17+
15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
18+
15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
19+
15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
20+
15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
21+
15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
22+
15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
23+
15, 15, 15, 15, 15, 15, 10
24+
};
25+
private static readonly uint[] _rrrOffsets = new uint[]
26+
{
27+
uint.MinValue, uint.MinValue, uint.MinValue, uint.MinValue, uint.MinValue, uint.MinValue, uint.MinValue, uint.MinValue, uint.MinValue, uint.MinValue,
28+
uint.MinValue, uint.MinValue, uint.MinValue, uint.MinValue, uint.MinValue, uint.MinValue, uint.MinValue, uint.MinValue, uint.MinValue, uint.MinValue,
29+
uint.MinValue, uint.MinValue, uint.MinValue, uint.MinValue, uint.MinValue, uint.MinValue, uint.MinValue, uint.MinValue, uint.MinValue, uint.MinValue,
30+
uint.MinValue, uint.MinValue, uint.MinValue, uint.MinValue, uint.MinValue, uint.MinValue, uint.MinValue, uint.MinValue, uint.MinValue, uint.MinValue,
31+
uint.MinValue, uint.MinValue, uint.MinValue, uint.MinValue, uint.MinValue, uint.MinValue, uint.MinValue, uint.MinValue, uint.MinValue, uint.MinValue,
32+
uint.MinValue, uint.MinValue, uint.MinValue, uint.MinValue, uint.MinValue, uint.MinValue, uint.MinValue, uint.MinValue, uint.MinValue, uint.MinValue,
33+
uint.MinValue, uint.MinValue, uint.MinValue, uint.MinValue, uint.MinValue, uint.MinValue, uint.MinValue
34+
};
35+
36+
public static bool Contains(int key)
37+
{
38+
if (((uint)key & 4294966272u) != 0)
39+
return false;
40+
41+
ulong mapped = (ulong)(uint)(key ^ int.MinValue);
42+
43+
if (mapped < _rrrMinValue || mapped > _rrrMaxValue)
44+
return false;
45+
46+
ulong normalized = mapped - _rrrMinValue;
47+
int blockIndex = (int)(normalized / (ulong)_rrrBlockSize);
48+
int bitInBlock = (int)(normalized % (ulong)_rrrBlockSize);
49+
int classValue = _rrrClasses[blockIndex];
50+
51+
if (classValue == 0)
52+
return false;
53+
54+
uint rank = _rrrOffsets[blockIndex];
55+
return DecodeBit(classValue, rank, bitInBlock);
56+
}
57+
58+
private static bool DecodeBit(int classValue, uint rank, int targetBit)
59+
{
60+
int remaining = classValue;
61+
62+
for (int bit = _rrrBlockSize - 1; bit >= 0; bit--)
63+
{
64+
if (remaining == 0)
65+
return false;
66+
67+
int comb = Binomial(bit, remaining);
68+
bool isSet;
69+
70+
if (rank >= (uint)comb)
71+
{
72+
rank -= (uint)comb;
73+
remaining--;
74+
isSet = true;
75+
}
76+
else
77+
isSet = false;
78+
79+
if (bit == targetBit)
80+
return isSet;
81+
}
82+
83+
return false;
84+
}
85+
86+
private static int Binomial(int n, int k)
87+
{
88+
if (k < 0 || k > n)
89+
return 0;
90+
91+
if (k == 0 || k == n)
92+
return 1;
93+
94+
if (k > n - k)
95+
k = n - k;
96+
97+
int result = 1;
98+
99+
for (int i = 1; i <= k; i++)
100+
result = checked(result * (n - (k - i)) / i);
101+
102+
return result;
103+
}
104+
105+
public const uint ItemCount = 1000;
106+
public const int MinKey = 0;
107+
public const int MaxKey = 999;
108+
}
109+
110+
private static class EliasFanoStructure_Int32_1000
111+
{
112+
private const int _lowerBitCount = 0;
113+
private static readonly ulong[] _upperBits = new ulong[]
114+
{
115+
6148914691236517205ul, 6148914691236517205ul, 6148914691236517205ul, 6148914691236517205ul, 6148914691236517205ul, 6148914691236517205ul, 6148914691236517205ul, 6148914691236517205ul, 6148914691236517205ul, 6148914691236517205ul,
116+
6148914691236517205ul, 6148914691236517205ul, 6148914691236517205ul, 6148914691236517205ul, 6148914691236517205ul, 6148914691236517205ul, 6148914691236517205ul, 6148914691236517205ul, 6148914691236517205ul, 6148914691236517205ul,
117+
6148914691236517205ul, 6148914691236517205ul, 6148914691236517205ul, 6148914691236517205ul, 6148914691236517205ul, 6148914691236517205ul, 6148914691236517205ul, 6148914691236517205ul, 6148914691236517205ul, 6148914691236517205ul,
118+
6148914691236517205ul, 21845ul
119+
};
120+
121+
private const int _sampleRateShift = 7;
122+
private static readonly int[] _samplePositions = new int[]
123+
{
124+
0, 257, 513, 769, 1025, 1281, 1537, 1793
125+
};
126+
127+
public static bool Contains(int key)
128+
{
129+
if (((uint)key & 4294966272u) != 0)
130+
return false;
131+
132+
long value = (long)key;
133+
long high = value >> _lowerBitCount;
134+
135+
long position = high == 0 ? 0 : SelectZero(high - 1) + 1;
136+
if (position < 0)
137+
return false;
138+
139+
long rank = position - high;
140+
if ((ulong)rank >= ItemCount)
141+
return false;
142+
143+
int currWord = (int)(position >> 6);
144+
145+
if ((uint)currWord >= (uint)_upperBits.Length)
146+
return false;
147+
148+
ulong window = _upperBits[currWord] & (ulong.MaxValue << (int)(position & 63));
149+
while (true)
150+
{
151+
while (window == 0)
152+
{
153+
currWord++;
154+
if ((uint)currWord >= (uint)_upperBits.Length)
155+
return false;
156+
157+
window = _upperBits[currWord];
158+
}
159+
160+
int trailing = System.Numerics.BitOperations.TrailingZeroCount(window);
161+
long onePosition = ((long)currWord << 6) + trailing;
162+
long currentHigh = onePosition - rank;
163+
164+
if (currentHigh >= high)
165+
{
166+
if (currentHigh > high)
167+
return false;
168+
169+
return true;
170+
}
171+
172+
window &= window - 1;
173+
rank++;
174+
175+
if ((ulong)rank >= ItemCount)
176+
return false;
177+
}
178+
}
179+
180+
private static long SelectZero(long rank)
181+
{
182+
if (rank < 0)
183+
return -1;
184+
185+
int sampleIndex = (int)(rank >> _sampleRateShift);
186+
if ((uint)sampleIndex >= (uint)_samplePositions.Length)
187+
return -1;
188+
189+
long zeroRank = (long)sampleIndex << _sampleRateShift;
190+
int startPosition = _samplePositions[sampleIndex];
191+
int wordIndex = startPosition >> 6;
192+
int startBit = startPosition & 63;
193+
194+
for (; wordIndex < _upperBits.Length; wordIndex++)
195+
{
196+
int validBits = wordIndex == _upperBits.Length - 1 ? 15 : 64;
197+
ulong validMask = validBits == 64 ? ulong.MaxValue : (1UL << validBits) - 1;
198+
ulong zeros = ~_upperBits[wordIndex] & validMask;
199+
200+
if (startBit > 0)
201+
{
202+
zeros &= ~((1UL << startBit) - 1);
203+
startBit = 0;
204+
}
205+
206+
int zeroCount = System.Numerics.BitOperations.PopCount(zeros);
207+
if (zeroCount == 0)
208+
continue;
209+
210+
if (zeroRank + zeroCount > rank)
211+
{
212+
int rankInWord = (int)(rank - zeroRank);
213+
int bitInWord = SelectBitInWord(zeros, rankInWord);
214+
return ((long)wordIndex << 6) + bitInWord;
215+
}
216+
217+
zeroRank += zeroCount;
218+
}
219+
220+
return -1;
221+
}
222+
223+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
224+
private static int SelectBitInWord(ulong word, int rank)
225+
{
226+
if ((uint)rank >= 64)
227+
return -1;
228+
229+
int remaining = rank;
230+
ulong value = word;
231+
232+
while (remaining > 0)
233+
{
234+
if (value == 0)
235+
return -1;
236+
237+
value &= value - 1;
238+
remaining--;
239+
}
240+
241+
if (value == 0)
242+
return -1;
243+
244+
return System.Numerics.BitOperations.TrailingZeroCount(value);
245+
}
246+
247+
public const uint ItemCount = 1000;
248+
public const int MinKey = 0;
249+
public const int MaxKey = 999;
250+
}
251+
}

Src/FastData.Generator.CSharp/CSharpCodeGenerator.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ protected override void AppendFooter<T>(StringBuilder sb, GeneratorConfig<T> gen
8787
HashTableCompactContext<TKey, TValue> x => new HashTableCompactCode<TKey, TValue>(x, _cfg, Shared),
8888
HashTablePerfectContext<TKey, TValue> x => new HashTablePerfectCode<TKey, TValue>(x, _cfg, Shared),
8989
EliasFanoContext<TKey, TValue> x => new EliasFanoCode<TKey, TValue>(x, _cfg),
90+
RrrBitVectorContext<TKey, TValue> x => new RrrBitVectorCode<TKey, TValue>(x, _cfg),
9091
KeyLengthContext<TValue> x => new KeyLengthCode<TKey, TValue>(x, _cfg, Shared),
9192
_ => null
9293
};

Src/FastData.Generator.CSharp/Internal/Generators/EliasFanoCode.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ public override string Generate()
134134
window &= window - 1;
135135
rank++;
136136
137-
if ((ulong)rank >= (ulong)ItemCount)
137+
if ((ulong)rank >= ItemCount)
138138
return false;
139139
}
140140
""")}}
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
using Genbox.FastData.Generator.CSharp.Internal.Framework;
2+
using Genbox.FastData.Generator.Enums;
3+
using Genbox.FastData.Generator.Extensions;
4+
using Genbox.FastData.Generators.Contexts;
5+
6+
namespace Genbox.FastData.Generator.CSharp.Internal.Generators;
7+
8+
internal sealed class RrrBitVectorCode<TKey, TValue>(RrrBitVectorContext<TKey, TValue> ctx, CSharpCodeGeneratorConfig cfg) : CSharpOutputWriter<TKey>(cfg)
9+
{
10+
public override string Generate()
11+
{
12+
string helperModifier = FieldModifier.Contains(" static ", StringComparison.Ordinal) ? "private static " : "private ";
13+
string mapSource = GetMapSource();
14+
15+
return $$"""
16+
private const ulong _rrrMinValue = {{ToValueLabel(ctx.MinValue)}};
17+
private const ulong _rrrMaxValue = {{ToValueLabel(ctx.MaxValue)}};
18+
private const int _rrrBlockSize = {{ctx.BlockSize.ToStringInvariant()}};
19+
{{FieldModifier}}byte[] _rrrClasses = new byte[] {
20+
{{FormatColumns(ctx.Classes, static x => ((int)x).ToStringInvariant())}}
21+
};
22+
{{FieldModifier}}uint[] _rrrOffsets = new uint[] {
23+
{{FormatColumns(ctx.Offsets, ToValueLabel)}}
24+
};
25+
26+
{{MethodAttribute}}
27+
{{MethodModifier}}bool Contains({{KeyTypeName}} {{InputKeyName}})
28+
{
29+
{{GetMethodHeader(MethodType.Contains)}}
30+
31+
ulong mapped = {{mapSource}};
32+
33+
if (mapped < _rrrMinValue || mapped > _rrrMaxValue)
34+
return false;
35+
36+
ulong normalized = mapped - _rrrMinValue;
37+
int blockIndex = (int)(normalized / (ulong)_rrrBlockSize);
38+
int bitInBlock = (int)(normalized % (ulong)_rrrBlockSize);
39+
int classValue = _rrrClasses[blockIndex];
40+
41+
if (classValue == 0)
42+
return false;
43+
44+
uint rank = _rrrOffsets[blockIndex];
45+
return DecodeBit(classValue, rank, bitInBlock);
46+
}
47+
48+
{{helperModifier}}bool DecodeBit(int classValue, uint rank, int targetBit)
49+
{
50+
int remaining = classValue;
51+
52+
for (int bit = _rrrBlockSize - 1; bit >= 0; bit--)
53+
{
54+
if (remaining == 0)
55+
return false;
56+
57+
int comb = Binomial(bit, remaining);
58+
bool isSet;
59+
60+
if (rank >= (uint)comb)
61+
{
62+
rank -= (uint)comb;
63+
remaining--;
64+
isSet = true;
65+
}
66+
else
67+
isSet = false;
68+
69+
if (bit == targetBit)
70+
return isSet;
71+
}
72+
73+
return false;
74+
}
75+
76+
{{helperModifier}}int Binomial(int n, int k)
77+
{
78+
if (k < 0 || k > n)
79+
return 0;
80+
81+
if (k == 0 || k == n)
82+
return 1;
83+
84+
if (k > n - k)
85+
k = n - k;
86+
87+
int result = 1;
88+
89+
for (int i = 1; i <= k; i++)
90+
result = checked(result * (n - (k - i)) / i);
91+
92+
return result;
93+
}
94+
""";
95+
}
96+
97+
private string GetMapSource()
98+
{
99+
return KeyType switch
100+
{
101+
KeyType.Char => $"(ulong){LookupKeyName}",
102+
KeyType.Byte => $"(ulong){LookupKeyName}",
103+
KeyType.UInt16 => $"(ulong){LookupKeyName}",
104+
KeyType.UInt32 => $"(ulong){LookupKeyName}",
105+
KeyType.UInt64 => LookupKeyName,
106+
KeyType.SByte => $"(ulong)(byte)({LookupKeyName} ^ sbyte.MinValue)",
107+
KeyType.Int16 => $"(ulong)(ushort)({LookupKeyName} ^ short.MinValue)",
108+
KeyType.Int32 => $"(ulong)(uint)({LookupKeyName} ^ int.MinValue)",
109+
KeyType.Int64 => $"(ulong)({LookupKeyName} ^ long.MinValue)",
110+
_ => throw new InvalidOperationException("RRR bitvector only supports integral key types.")
111+
};
112+
}
113+
}

Src/FastData.InternalShared/Helpers/TestHelper.cs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,8 @@ private static GeneratorSpec GenerateInternal<TKey, TValue>(Func<string, ICodeGe
252252
return Generate(state, new BloomFilterStructure<TKey, TValue>(GetHashData(keySpan, keyType, generator.Encoding)));
253253
if (vector.Type == typeof(EliasFanoStructure<,>))
254254
return Generate(state, new EliasFanoStructure<TKey, TValue>((NumericKeyProperties<TKey>)props, config));
255+
if (vector.Type == typeof(RrrBitVectorStructure<,>))
256+
return Generate(state, new RrrBitVectorStructure<TKey, TValue>());
255257

256258
throw new InvalidOperationException("Unsupported structure type: " + vector.Type.Name);
257259
}

0 commit comments

Comments
 (0)