Skip to content

Commit fd9e60e

Browse files
committed
Add Elias Fano structure as experiment
1 parent cd53091 commit fd9e60e

14 files changed

Lines changed: 709 additions & 16 deletions

File tree

Src/FastData.Generator.CSharp/CSharpCodeGenerator.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@ protected override void AppendFooter<T>(StringBuilder sb, GeneratorConfig<T> gen
8686
HashTableContext<TKey, TValue> x => new HashTableCode<TKey, TValue>(x, _cfg, Shared),
8787
HashTableCompactContext<TKey, TValue> x => new HashTableCompactCode<TKey, TValue>(x, _cfg, Shared),
8888
HashTablePerfectContext<TKey, TValue> x => new HashTablePerfectCode<TKey, TValue>(x, _cfg, Shared),
89+
EliasFanoContext<TKey, TValue> x => new EliasFanoCode<TKey, TValue>(x, _cfg),
8990
KeyLengthContext<TValue> x => new KeyLengthCode<TKey, TValue>(x, _cfg, Shared),
9091
_ => null
9192
};
Lines changed: 211 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,211 @@
1+
using Genbox.FastData.Generator.CSharp.Internal.Framework;
2+
using Genbox.FastData.Generator.Enums;
3+
using Genbox.FastData.Generator.Extensions;
4+
using Genbox.FastData.Generators.Contexts;
5+
6+
namespace Genbox.FastData.Generator.CSharp.Internal.Generators;
7+
8+
internal sealed class EliasFanoCode<TKey, TValue>(EliasFanoContext<TKey, TValue> ctx, CSharpCodeGeneratorConfig cfg) : CSharpOutputWriter<TKey>(cfg)
9+
{
10+
public override string Generate()
11+
{
12+
bool hasLowerBits = ctx.LowerBitCount != 0;
13+
14+
return $$"""
15+
private const int _lowerBitCount = {{ctx.LowerBitCount.ToStringInvariant()}};
16+
{{FieldModifier}}ulong[] _upperBits = new ulong[] {
17+
{{FormatColumns(ctx.UpperBits, ToValueLabel)}}
18+
};
19+
{{(hasLowerBits ? $$"""
20+
21+
{{FieldModifier}}ulong[] _lowerBits = new ulong[] {
22+
{{FormatColumns(ctx.LowerBits, ToValueLabel)}}
23+
};
24+
25+
private const ulong _lowerMask = {{ToValueLabel(ctx.LowerMask)}};
26+
"""
27+
: string.Empty)}}
28+
29+
private const int _sampleRateShift = {{ctx.SampleRateShift.ToStringInvariant()}};
30+
{{FieldModifier}}int[] _samplePositions = new int[] {
31+
{{FormatColumns(ctx.SamplePositions, static (_, x) => x.ToStringInvariant())}}
32+
};
33+
34+
{{MethodAttribute}}
35+
{{MethodModifier}}bool Contains({{KeyTypeName}} {{InputKeyName}})
36+
{
37+
{{GetMethodHeader(MethodType.Contains)}}
38+
39+
long value = (long){{LookupKeyName}};
40+
long high = value >> _lowerBitCount;
41+
42+
long position = high == 0 ? 0 : SelectZero(high - 1) + 1;
43+
if (position < 0)
44+
return false;
45+
46+
long rank = position - high;
47+
if ((ulong)rank >= ItemCount)
48+
return false;
49+
50+
int currWord = (int)(position >> 6);
51+
52+
if ((uint)currWord >= (uint)_upperBits.Length)
53+
return false;
54+
55+
ulong window = _upperBits[currWord] & (ulong.MaxValue << (int)(position & 63));
56+
{{(hasLowerBits ?
57+
"""
58+
ulong targetLow = (ulong)value & _lowerMask;
59+
long lowerBitsOffset = rank * lowerBitCount;
60+
61+
while (true)
62+
{
63+
while (window == 0)
64+
{
65+
currWord++;
66+
if ((uint)currWord >= (uint)_upperBits.Length)
67+
return false;
68+
69+
window = _upperBits[currWord];
70+
}
71+
72+
int trailing = System.Numerics.BitOperations.TrailingZeroCount(window);
73+
long onePosition = ((long)currWord << 6) + trailing;
74+
long currentHigh = onePosition - rank;
75+
76+
if (currentHigh >= high)
77+
{
78+
if (currentHigh > high)
79+
return false;
80+
81+
int wordIndex = (int)(lowerBitsOffset >> 6);
82+
int startBit = (int)(lowerBitsOffset & 63);
83+
84+
ulong currentLow;
85+
if (startBit + lowerBitCount <= 64)
86+
currentLow = (_lowerBits[wordIndex] >> startBit) & _lowerMask;
87+
else
88+
{
89+
ulong lower = _lowerBits[wordIndex] >> startBit;
90+
ulong upper = _lowerBits[wordIndex + 1] << (64 - startBit);
91+
currentLow = (lower | upper) & _lowerMask;
92+
}
93+
94+
if (currentLow == targetLow)
95+
return true;
96+
97+
if (currentLow > targetLow)
98+
return false;
99+
}
100+
101+
window &= window - 1;
102+
rank++;
103+
104+
if ((ulong)rank >= ItemCount)
105+
return false;
106+
107+
lowerBitsOffset += lowerBitCount;
108+
}
109+
"""
110+
: """
111+
while (true)
112+
{
113+
while (window == 0)
114+
{
115+
currWord++;
116+
if ((uint)currWord >= (uint)_upperBits.Length)
117+
return false;
118+
119+
window = _upperBits[currWord];
120+
}
121+
122+
int trailing = System.Numerics.BitOperations.TrailingZeroCount(window);
123+
long onePosition = ((long)currWord << 6) + trailing;
124+
long currentHigh = onePosition - rank;
125+
126+
if (currentHigh >= high)
127+
{
128+
if (currentHigh > high)
129+
return false;
130+
131+
return true;
132+
}
133+
134+
window &= window - 1;
135+
rank++;
136+
137+
if ((ulong)rank >= (ulong)ItemCount)
138+
return false;
139+
}
140+
""")}}
141+
}
142+
143+
private static long SelectZero(long rank)
144+
{
145+
if (rank < 0)
146+
return -1;
147+
148+
int sampleIndex = (int)(rank >> _sampleRateShift);
149+
if ((uint)sampleIndex >= (uint)_samplePositions.Length)
150+
return -1;
151+
152+
long zeroRank = (long)sampleIndex << _sampleRateShift;
153+
int startPosition = _samplePositions[sampleIndex];
154+
int wordIndex = startPosition >> 6;
155+
int startBit = startPosition & 63;
156+
157+
for (; wordIndex < _upperBits.Length; wordIndex++)
158+
{
159+
int validBits = wordIndex == _upperBits.Length - 1 ? {{ctx.UpperBitLength & 63}} : 64;
160+
ulong validMask = validBits == 64 ? ulong.MaxValue : (1UL << validBits) - 1;
161+
ulong zeros = ~_upperBits[wordIndex] & validMask;
162+
163+
if (startBit > 0)
164+
{
165+
zeros &= ~((1UL << startBit) - 1);
166+
startBit = 0;
167+
}
168+
169+
int zeroCount = System.Numerics.BitOperations.PopCount(zeros);
170+
if (zeroCount == 0)
171+
continue;
172+
173+
if (zeroRank + zeroCount > rank)
174+
{
175+
int rankInWord = (int)(rank - zeroRank);
176+
int bitInWord = SelectBitInWord(zeros, rankInWord);
177+
return ((long)wordIndex << 6) + bitInWord;
178+
}
179+
180+
zeroRank += zeroCount;
181+
}
182+
183+
return -1;
184+
}
185+
186+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
187+
private static int SelectBitInWord(ulong word, int rank)
188+
{
189+
if ((uint)rank >= 64)
190+
return -1;
191+
192+
int remaining = rank;
193+
ulong value = word;
194+
195+
while (remaining > 0)
196+
{
197+
if (value == 0)
198+
return -1;
199+
200+
value &= value - 1;
201+
remaining--;
202+
}
203+
204+
if (value == 0)
205+
return -1;
206+
207+
return System.Numerics.BitOperations.TrailingZeroCount(value);
208+
}
209+
""";
210+
}
211+
}

Src/FastData.InternalShared/Helpers/TestHelper.cs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,8 @@ private static GeneratorSpec GenerateInternal<TKey, TValue>(Func<string, ICodeGe
250250
return Generate(state, new HashTableCompactStructure<TKey, TValue>(GetHashData(keySpan, keyType, generator.Encoding), keyType));
251251
if (vector.Type == typeof(BloomFilterStructure<,>))
252252
return Generate(state, new BloomFilterStructure<TKey, TValue>(GetHashData(keySpan, keyType, generator.Encoding)));
253+
if (vector.Type == typeof(EliasFanoStructure<,>))
254+
return Generate(state, new EliasFanoStructure<TKey, TValue>((NumericKeyProperties<TKey>)props, config));
253255

254256
throw new InvalidOperationException("Unsupported structure type: " + vector.Type.Name);
255257
}

Src/FastData.InternalShared/Helpers/TestVectorHelper.cs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,9 @@ public static IEnumerable<ITestVector> GetValueTestVectors()
8787
yield return testVector;
8888
}
8989

90+
foreach (ITestVector testVector in GenerateTestVectors([Enumerable.Range(0, 100).ToArray()], null, typeof(EliasFanoStructure<,>)))
91+
yield return testVector;
92+
9093
// We don't include a length of 1, 2 and 4 to check if uniq length structures emit null buckets correctly
9194
foreach (ITestVector testVector in GenerateTestVectors([["aaa", "aaaaa", "aaaaaa", "aaaaaaa", "aaaaaaaa", "aaaaaaaaa", "aaaaaaaaaa"]], null, typeof(KeyLengthStructure<,>)))
9295
yield return testVector;

0 commit comments

Comments
 (0)