Skip to content

Commit 58e6ae9

Browse files
committed
Make order preservation its own settings
1 parent 8ba8517 commit 58e6ae9

4 files changed

Lines changed: 46 additions & 34 deletions

File tree

Src/FastData.Tests/FastDataGeneratorTests.cs

Lines changed: 36 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,8 @@ namespace Genbox.FastData.Tests;
1010
public class FastDataGeneratorTests
1111
{
1212
[Theory]
13-
[InlineData(DeduplicationMode.HashSetPreserveOrder)]
13+
[InlineData(DeduplicationMode.HashSet)]
1414
[InlineData(DeduplicationMode.Sort)]
15-
[InlineData(DeduplicationMode.SortPreserveOrder)]
1615
public void Generate_ThrowOnDuplicate(DeduplicationMode mode)
1716
{
1817
FastDataConfig config = new FastDataConfig();
@@ -25,9 +24,8 @@ public void Generate_ThrowOnDuplicate(DeduplicationMode mode)
2524

2625
[Theory]
2726
[InlineData(DeduplicationMode.Disabled)]
28-
[InlineData(DeduplicationMode.HashSetPreserveOrder)]
27+
[InlineData(DeduplicationMode.HashSet)]
2928
[InlineData(DeduplicationMode.Sort)]
30-
[InlineData(DeduplicationMode.SortPreserveOrder)]
3129
public void Generate_NoThrowOnDuplicates(DeduplicationMode mode)
3230
{
3331
FastDataConfig config = new FastDataConfig();
@@ -54,60 +52,76 @@ public void FastDataConfig_SkipQuantum_ThrowsWhenNotPowerOfTwo()
5452
}
5553

5654
[Theory]
57-
[InlineData(DeduplicationMode.HashSetPreserveOrder)]
58-
[InlineData(DeduplicationMode.Sort)]
59-
[InlineData(DeduplicationMode.SortPreserveOrder)]
60-
public void GenerateKeyed_StringDeduplication_RemovesDuplicates(DeduplicationMode mode)
55+
[InlineData(DeduplicationMode.HashSet, true)]
56+
[InlineData(DeduplicationMode.HashSet, false)]
57+
[InlineData(DeduplicationMode.Sort, true)]
58+
[InlineData(DeduplicationMode.Sort, false)]
59+
public void GenerateKeyed_StringDeduplication_RemovesDuplicates(DeduplicationMode mode, bool preserveOrder)
6160
{
6261
string[] keys = ["b", "a", "b", "c"];
6362
string[] values = ["vb", "va", "vb", "vc"];
6463

6564
FastDataConfig config = new FastDataConfig(StructureType.Array);
6665
config.DeduplicationMode = mode;
6766
config.ThrowOnDuplicates = false;
67+
config.PreserveOrder = preserveOrder;
6868

6969
ContextCaptureGenerator generator = new ContextCaptureGenerator();
7070
FastDataGenerator.GenerateKeyed(keys, values, config, generator);
7171

7272
ArrayContext<string, string> ctx = Assert.IsType<ArrayContext<string, string>>(generator.Context);
73-
if (mode is DeduplicationMode.HashSetPreserveOrder or DeduplicationMode.SortPreserveOrder)
73+
74+
// If user asked for preserved order, we should preserve no matter what method used
75+
if (preserveOrder)
7476
{
75-
Assert.True(ctx.Keys.Span.SequenceEqual(["b", "a", "c"]));
76-
Assert.True(ctx.Values.Span.SequenceEqual(["vb", "va", "vc"]));
77+
Assert.Equal(["b", "a", "c"], ctx.Keys.Span);
78+
Assert.Equal(["vb", "va", "vc"], ctx.Values.Span);
7779
}
7880
else
7981
{
80-
Assert.True(ctx.Keys.Span.SequenceEqual(["a", "b", "c"]));
81-
Assert.True(ctx.Values.Span.SequenceEqual(["va", "vb", "vc"]));
82+
// Additionally, if the user asked for sort, but no preservation, data should be sorted
83+
if (mode == DeduplicationMode.Sort)
84+
{
85+
Assert.Equal(["a", "b", "c"], ctx.Keys.Span);
86+
Assert.Equal(["va", "vb", "vc"], ctx.Values.Span);
87+
}
8288
}
8389
}
8490

8591
[Theory]
86-
[InlineData(DeduplicationMode.HashSetPreserveOrder)]
87-
[InlineData(DeduplicationMode.Sort)]
88-
[InlineData(DeduplicationMode.SortPreserveOrder)]
89-
public void GenerateKeyed_NumericDeduplication_RemovesDuplicates(DeduplicationMode mode)
92+
[InlineData(DeduplicationMode.HashSet, true)]
93+
[InlineData(DeduplicationMode.HashSet, false)]
94+
[InlineData(DeduplicationMode.Sort, true)]
95+
[InlineData(DeduplicationMode.Sort, false)]
96+
public void GenerateKeyed_NumericDeduplication_RemovesDuplicates(DeduplicationMode mode, bool preserveOrder)
9097
{
9198
int[] keys = [3, 1, 3, 2];
9299
string[] values = ["v3", "v1", "v3", "v2"];
93100

94101
FastDataConfig config = new FastDataConfig(StructureType.Array);
95102
config.DeduplicationMode = mode;
96103
config.ThrowOnDuplicates = false;
104+
config.PreserveOrder = preserveOrder;
97105

98106
ContextCaptureGenerator generator = new ContextCaptureGenerator();
99107
FastDataGenerator.GenerateKeyed(keys, values, config, generator);
100108

101109
ArrayContext<int, string> ctx = Assert.IsType<ArrayContext<int, string>>(generator.Context);
102-
if (mode is DeduplicationMode.HashSetPreserveOrder or DeduplicationMode.SortPreserveOrder)
110+
111+
// If user asked for preserved order, we should preserve no matter what method used
112+
if (preserveOrder)
103113
{
104-
Assert.True(ctx.Keys.Span.SequenceEqual([3, 1, 2]));
105-
Assert.True(ctx.Values.Span.SequenceEqual(["v3", "v1", "v2"]));
114+
Assert.Equal([3, 1, 2], ctx.Keys.Span);
115+
Assert.Equal(["v3", "v1", "v2"], ctx.Values.Span);
106116
}
107117
else
108118
{
109-
Assert.True(ctx.Keys.Span.SequenceEqual([1, 2, 3]));
110-
Assert.True(ctx.Values.Span.SequenceEqual(["v1", "v2", "v3"]));
119+
// Additionally, if the user asked for sort, but no preservation, data should be sorted
120+
if (mode == DeduplicationMode.Sort)
121+
{
122+
Assert.Equal([1, 2, 3], ctx.Keys.Span);
123+
Assert.Equal(["v1", "v2", "v3"], ctx.Values.Span);
124+
}
111125
}
112126
}
113127

Src/FastData/DeduplicationMode.cs

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,15 +10,10 @@ public enum DeduplicationMode : byte
1010
/// <summary>
1111
/// Uses a hash set to deduplicate data. It is faster than sorting, but uses more memory. It does not change the order of keys.
1212
/// </summary>
13-
HashSetPreserveOrder,
13+
HashSet,
1414

1515
/// <summary>
16-
/// Uses sorting to deduplicate data. It is not as fast as <seealso cref="HashSetPreserveOrder" />, but it uses about half the memory. As a side effect, it changes the order of keys, which might be a desired side effect under certain circumstances.
16+
/// Uses sorting to deduplicate data. It is not as fast as <seealso cref="HashSet" />, but it uses about half the memory. As a side effect, it changes the order of keys, which might be a desired side effect under certain circumstances.
1717
/// </summary>
1818
Sort,
19-
20-
/// <summary>
21-
/// Same as <seealso cref="Sort" />, but input order is preserved.
22-
/// </summary>
23-
SortPreserveOrder
2419
}

Src/FastData/FastDataConfig.cs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,11 @@ public sealed class FastDataConfig(StructureType structureType = StructureType.A
99
/// <summary>The type of structure to create. Defaults to Auto.</summary>
1010
public StructureType StructureType { get; set; } = structureType;
1111

12-
/// <summary>Set the method to use for deduplication of keys. Defaults to <see cref="DeduplicationMode.HashSetPreserveOrder" />.</summary>
13-
public DeduplicationMode DeduplicationMode { get; set; } = DeduplicationMode.HashSetPreserveOrder;
12+
/// <summary>Set the method to use for deduplication of keys. Defaults to <see cref="FastData.DeduplicationMode.HashSet" />.</summary>
13+
public DeduplicationMode DeduplicationMode { get; set; } = DeduplicationMode.HashSet;
14+
15+
/// <summary>When true, FastData will only use data structures and algorithms that preserve the original data order</summary>
16+
public bool PreserveOrder { get; set; } = true;
1417

1518
/// <summary>When true, throws an exception on duplicate keys</summary>
1619
public bool ThrowOnDuplicates { get; set; } = true;

Src/FastData/FastDataGenerator.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -299,15 +299,15 @@ private static bool DeduplicateKeys<TKey, TValue>(FastDataConfig fdCfg, ref Read
299299
bool isSorted = false;
300300
int uniqueCount;
301301

302-
if (fdCfg.DeduplicationMode == DeduplicationMode.HashSetPreserveOrder)
302+
if (fdCfg.DeduplicationMode == DeduplicationMode.HashSet)
303303
DeduplicateWithHashSet(copyKeys, copyValues, fdCfg.ThrowOnDuplicates, equalityComparer, out uniqueCount);
304+
else if (fdCfg.DeduplicationMode == DeduplicationMode.Sort && fdCfg.PreserveOrder)
305+
DeduplicateWithSortPreserveInputOrder(copyKeys, copyValues, fdCfg.ThrowOnDuplicates, equalityComparer, sortComparer, out uniqueCount);
304306
else if (fdCfg.DeduplicationMode == DeduplicationMode.Sort)
305307
{
306308
DeduplicateWithSort(copyKeys, copyValues, fdCfg.ThrowOnDuplicates, equalityComparer, sortComparer, out uniqueCount);
307309
isSorted = true;
308310
}
309-
else if (fdCfg.DeduplicationMode == DeduplicationMode.SortPreserveOrder)
310-
DeduplicateWithSortPreserveInputOrder(copyKeys, copyValues, fdCfg.ThrowOnDuplicates, equalityComparer, sortComparer, out uniqueCount);
311311
else
312312
throw new InvalidOperationException("Unsupported deduplication mode: " + fdCfg.DeduplicationMode);
313313

0 commit comments

Comments
 (0)