Skip to content

Commit 85a2570

Browse files
committed
fixes to this benchmark (copilot review)
* deser to recurse down * include uuid and bigdecimal * reset counter on benchmark setup
1 parent 63e6096 commit 85a2570

1 file changed

Lines changed: 25 additions & 10 deletions

File tree

parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/VariantBenchmark.java

Lines changed: 25 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@
6464
*
6565
* <ul>
6666
* <li>{@link #fieldCount} – total number of top-level fields per object.
67-
* <li>{@link #depth} – {@code Shallow} (primitives only) or {@code Nested} (some fields are
67+
* <li>{@link #depth} – {@code Flat} (primitives only) or {@code Nested} (some fields are
6868
* 5-field sub-objects).
6969
* </ul>
7070
*
@@ -77,7 +77,7 @@
7777
* <pre>
7878
* ./mvnw --projects parquet-benchmarks -amd -DskipTests -Denforcer.skip=true clean package
7979
* ./parquet-benchmarks/run.sh all org.apache.parquet.benchmarks.VariantBenchmark \
80-
* -wi 5 -i 5 -f 1 -rff /tmp/variant-benchmark.json
80+
* -wi 5 -i 5 -f 1 -rff target/results.json
8181
* </pre>
8282
*
8383
* Change fork to 1 before merge
@@ -115,6 +115,7 @@ public enum Depth {
115115

116116
/**
117117
* A counter of strings created; used to ensure limited uniqueness in strings.
118+
* Reset to 0 in {@link #setupTrial()} so each trial is reproducible.
118119
*/
119120
private static int counter;
120121

@@ -253,6 +254,7 @@ public FieldEntry(final FieldType type, final Object value) {
253254
@Setup(Level.Trial)
254255
public void setupTrial() {
255256
random = new Random(0x1ceb1cebL);
257+
counter = 0;
256258

257259
// --- field names ---
258260
fieldNames = new ArrayList<>(fieldCount);
@@ -274,7 +276,7 @@ public void setupTrial() {
274276
for (int i = 0; i < fieldCount; i++) {
275277

276278
// slightly more than the type count as there are extra strings
277-
int typeIndex = random.nextInt(typeCount + 2);
279+
int typeIndex = random.nextInt(typeCount + 4);
278280
// based on type, create entries.
279281
FieldEntry fieldEntry;
280282
switch (typeIndex) {
@@ -304,9 +306,12 @@ public void setupTrial() {
304306
fieldEntry = new FieldEntry(FieldType.Double, random.nextDouble());
305307
break;
306308
case 8:
307-
fieldEntry = new FieldEntry(FieldType.UUID, UUID.randomUUID());
309+
fieldEntry = new FieldEntry(FieldType.BigDecimal, BigDecimal.valueOf(random.nextInt()));
308310
break;
309311
case 9:
312+
fieldEntry = new FieldEntry(FieldType.UUID, UUID.randomUUID());
313+
break;
314+
case 10:
310315
fieldEntry = new FieldEntry(FieldType.Nested, null);
311316
break;
312317
default:
@@ -380,16 +385,26 @@ public void benchmarkSerialize(Blackhole bh) {
380385

381386
/**
382387
* Read path: iterate all fields of the pre-built variant, extracting each value. This exercises
383-
* the field-name lookup and type dispatch that a query engine performs on every row.
388+
* the field-name lookup and type dispatch that a query engine performs on every row. Nested
389+
* objects are recursively traversed so that {@code depth=Nested} incurs the full deserialization
390+
* cost of sub-objects.
384391
*/
385392
@Benchmark
386393
public void benchmarkDeserialize(Blackhole bh) {
387394
for (int j = 0; j < ITERATIONS; j++) {
388-
Variant v = preBuiltVariant;
389-
int n = v.numObjectElements();
390-
for (int i = 0; i < n; i++) {
391-
Variant.ObjectField field = v.getFieldAtIndex(i);
392-
bh.consume(field.key);
395+
deserializeVariant(preBuiltVariant, bh);
396+
}
397+
}
398+
399+
/** Recursively deserialize a variant object, descending into any nested objects. */
400+
private void deserializeVariant(Variant v, Blackhole bh) {
401+
int n = v.numObjectElements();
402+
for (int i = 0; i < n; i++) {
403+
Variant.ObjectField field = v.getFieldAtIndex(i);
404+
bh.consume(field.key);
405+
if (field.value.getType() == Variant.Type.OBJECT) {
406+
deserializeVariant(field.value, bh);
407+
} else {
393408
bh.consume(field.value.getValueBuffer());
394409
}
395410
}

0 commit comments

Comments
 (0)