|
25 | 25 | import java.util.Map; |
26 | 26 | import java.util.Objects; |
27 | 27 | import java.util.Optional; |
28 | | -import java.util.Set; |
29 | 28 | import java.util.stream.Collectors; |
30 | | -import org.apache.commons.lang3.StringUtils; |
31 | 29 | import org.apache.hadoop.hive.metastore.api.FieldSchema; |
32 | 30 | import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; |
33 | 31 | import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; |
| 32 | +import org.apache.hive.iceberg.com.fasterxml.jackson.core.type.TypeReference; |
| 33 | +import org.apache.hive.iceberg.com.fasterxml.jackson.databind.ObjectMapper; |
34 | 34 | import org.apache.iceberg.PartitionSpec; |
35 | 35 | import org.apache.iceberg.Schema; |
36 | 36 | import org.apache.iceberg.data.GenericRecord; |
37 | 37 | import org.apache.iceberg.data.Record; |
38 | 38 | import org.apache.iceberg.expressions.Literal; |
39 | | -import org.apache.iceberg.relocated.com.google.common.base.Splitter; |
40 | 39 | import org.apache.iceberg.relocated.com.google.common.collect.Lists; |
41 | 40 | import org.apache.iceberg.relocated.com.google.common.collect.Maps; |
42 | 41 | import org.apache.iceberg.types.Conversions; |
|
48 | 47 |
|
49 | 48 | public final class HiveSchemaUtil { |
50 | 49 |
|
| 50 | + private static final ObjectMapper MAPPER = new ObjectMapper(); |
| 51 | + |
51 | 52 | private HiveSchemaUtil() { |
52 | 53 | } |
53 | 54 |
|
@@ -237,7 +238,7 @@ private static void getDefaultValDiff(Types.NestedField field, Map<String, Strin |
237 | 238 | difference.addDefaultChanged(field, expectedDefault); |
238 | 239 | } |
239 | 240 | } else if (field.type().isStructType()) { |
240 | | - Map<String, String> structDefaults = getDefaultValuesMap(defaultStr); |
| 241 | + Map<String, String> structDefaults = getDefaultValuesMap(field.type().asStructType().fields(), defaultStr); |
241 | 242 |
|
242 | 243 | for (Types.NestedField nested : field.type().asStructType().fields()) { |
243 | 244 | getDefaultValDiff(nested, structDefaults, difference); |
@@ -413,31 +414,48 @@ public static String convertToTypeString(Type type) { |
413 | 414 | } |
414 | 415 | } |
415 | 416 |
|
416 | | - public static void setDefaultValues(Record record, List<Types.NestedField> fields, Set<String> missingColumns) { |
417 | | - for (Types.NestedField field : fields) { |
418 | | - Object fieldValue = record.getField(field.name()); |
| 417 | + public static void setDefaultValues(Record record, List<Types.NestedField> missingFields) { |
| 418 | + for (Types.NestedField field : missingFields) { |
| 419 | + if (field.type().isStructType()) { |
| 420 | + // Attempt to build the nested struct with its defaults |
| 421 | + Record nestedRecord = buildStructWithDefaults(field.type().asStructType()); |
| 422 | + if (nestedRecord != null) { |
| 423 | + record.setField(field.name(), nestedRecord); |
| 424 | + } |
| 425 | + } else if (field.writeDefault() != null) { |
| 426 | + Object defaultValue = convertToWriteType(field.writeDefault(), field.type()); |
| 427 | + record.setField(field.name(), defaultValue); |
| 428 | + } |
| 429 | + } |
| 430 | + } |
419 | 431 |
|
420 | | - if (fieldValue == null) { |
421 | | - boolean isMissing = missingColumns.contains(field.name()); |
422 | | - |
423 | | - if (isMissing) { |
424 | | - if (field.type().isStructType()) { |
425 | | - // Create struct and apply defaults to all nested fields |
426 | | - Record nestedRecord = GenericRecord.create(field.type().asStructType()); |
427 | | - record.setField(field.name(), nestedRecord); |
428 | | - // For nested fields, we consider ALL fields as "missing" to apply defaults |
429 | | - setDefaultValuesForNestedStruct(nestedRecord, field.type().asStructType().fields()); |
430 | | - } else if (field.writeDefault() != null) { |
431 | | - Object defaultValue = convertToWriteType(field.writeDefault(), field.type()); |
432 | | - record.setField(field.name(), defaultValue); |
433 | | - } |
| 432 | + /** |
| 433 | + * Recursively builds a struct populated with write defaults. |
| 434 | + * * @return A populated Record, or null if no nested fields have defaults. |
| 435 | + */ |
| 436 | + private static Record buildStructWithDefaults(Types.StructType structType) { |
| 437 | + Record nestedRecord = GenericRecord.create(structType); |
| 438 | + boolean hasAnyDefault = false; |
| 439 | + |
| 440 | + for (Types.NestedField field : structType.fields()) { |
| 441 | + if (field.writeDefault() != null) { |
| 442 | + Object defaultValue = convertToWriteType(field.writeDefault(), field.type()); |
| 443 | + nestedRecord.setField(field.name(), defaultValue); |
| 444 | + hasAnyDefault = true; |
| 445 | + } else if (field.type().isStructType()) { |
| 446 | + // Recursively process deeper nested structs |
| 447 | + Record deeperRecord = buildStructWithDefaults(field.type().asStructType()); |
| 448 | + |
| 449 | + // If the deeper struct has defaults, attach it and flag this current struct as populated |
| 450 | + if (deeperRecord != null) { |
| 451 | + nestedRecord.setField(field.name(), deeperRecord); |
| 452 | + hasAnyDefault = true; |
434 | 453 | } |
435 | | - // Explicit NULLs remain NULL |
436 | | - } else if (field.type().isStructType() && fieldValue instanceof Record) { |
437 | | - // For existing structs, apply defaults to any null nested fields |
438 | | - setDefaultValuesForNestedStruct((Record) fieldValue, field.type().asStructType().fields()); |
439 | 454 | } |
440 | 455 | } |
| 456 | + |
| 457 | + // If no fields (or nested fields) had defaults, return null to avoid an empty struct |
| 458 | + return hasAnyDefault ? nestedRecord : null; |
441 | 459 | } |
442 | 460 |
|
443 | 461 | /** |
@@ -483,11 +501,10 @@ private static void setDefaultValuesForNestedStruct(Record record, List<Types.Ne |
483 | 501 | for (Types.NestedField field : fields) { |
484 | 502 | Object fieldValue = record.getField(field.name()); |
485 | 503 |
|
486 | | - if (fieldValue == null && field.writeDefault() != null) { |
487 | | - // Always apply default to null fields in nested structs |
| 504 | + if (field.writeDefault() != null) { |
488 | 505 | Object defaultValue = convertToWriteType(field.writeDefault(), field.type()); |
489 | 506 | record.setField(field.name(), defaultValue); |
490 | | - } else if (field.type().isStructType() && fieldValue instanceof Record) { |
| 507 | + } else if (field.type().isStructType()) { |
491 | 508 | // Recursively process nested structs |
492 | 509 | setDefaultValuesForNestedStruct((Record) fieldValue, field.type().asStructType().fields()); |
493 | 510 | } |
@@ -532,12 +549,43 @@ public static Object convertToWriteType(Object value, Type type) { |
532 | 549 | return value; // fallback |
533 | 550 | } |
534 | 551 |
|
535 | | - public static Map<String, String> getDefaultValuesMap(String defaultValue) { |
536 | | - if (StringUtils.isEmpty(defaultValue)) { |
| 552 | + public static Map<String, String> getDefaultValuesMap(List<Types.NestedField> fields, String json) { |
| 553 | + if (json == null || json.trim().isEmpty()) { |
537 | 554 | return Collections.emptyMap(); |
538 | 555 | } |
539 | | - // For Struct, the default value is expected to be in key:value format |
540 | | - return Splitter.on(',').trimResults().withKeyValueSeparator(':').split(stripQuotes(defaultValue)); |
| 556 | + // 1. Strip the surrounding single or double quotes passed by Hive |
| 557 | + String cleanJson = stripQuotes(json); |
| 558 | + |
| 559 | + // 2. Check for NULL on the cleaned string |
| 560 | + if (cleanJson.equalsIgnoreCase("NULL")) { |
| 561 | + if (fields == null || fields.isEmpty()) { |
| 562 | + return Collections.emptyMap(); |
| 563 | + } |
| 564 | + return fields.stream().collect(Collectors.toMap(Types.NestedField::name, f -> "NULL")); |
| 565 | + } |
| 566 | + |
| 567 | + Map<String, Object> parsed; |
| 568 | + Map<String, String> result = Maps.newHashMap(); |
| 569 | + try { |
| 570 | + // 3. Pass the cleaned JSON to the mapper |
| 571 | + parsed = MAPPER.readValue(cleanJson, new TypeReference<>() { |
| 572 | + }); |
| 573 | + |
| 574 | + for (Map.Entry<String, Object> entry : parsed.entrySet()) { |
| 575 | + Object value = entry.getValue(); |
| 576 | + |
| 577 | + if (value == null) { |
| 578 | + result.put(entry.getKey(), "NULL"); |
| 579 | + } else if (value instanceof Map || value instanceof List) { |
| 580 | + result.put(entry.getKey(), MAPPER.writeValueAsString(value)); |
| 581 | + } else { |
| 582 | + result.put(entry.getKey(), value.toString()); |
| 583 | + } |
| 584 | + } |
| 585 | + } catch (Exception e) { |
| 586 | + throw new IllegalArgumentException("Invalid default values JSON: " + json, e); |
| 587 | + } |
| 588 | + return result; |
541 | 589 | } |
542 | 590 |
|
543 | 591 | public static String stripQuotes(String val) { |
|
0 commit comments