@@ -36,6 +36,12 @@ class ColumnXformer(
3636 companion object {
3737 const val COLUMN_PARENT_QN = " columnParentQualifiedName"
3838 const val COLUMN_NAME = " columnName"
39+ val PARENT_COLUMN_QN_HEADER = RowSerde .getHeaderForField(Column .PARENT_COLUMN_QUALIFIED_NAME , Column ::class .java)
40+ val PARENT_COLUMN_HEADER = RowSerde .getHeaderForField(Column .PARENT_COLUMN , Column ::class .java)
41+ val PARENT_COLUMN_NAME_HEADER = RowSerde .getHeaderForField(Column .PARENT_COLUMN_NAME , Column ::class .java)
42+ val NESTED_COLUMN_ORDER_HEADER = RowSerde .getHeaderForField(Column .NESTED_COLUMN_ORDER , Column ::class .java)
43+ val COLUMN_DEPTH_LEVEL_HEADER = RowSerde .getHeaderForField(Column .COLUMN_DEPTH_LEVEL , Column ::class .java)
44+ val COLUMN_HIERARCHY_HEADER = RowSerde .getHeaderForField(Column .COLUMN_HIERARCHY , Column ::class .java)
3945 val REQUIRED_HEADERS =
4046 mapOf<String , Set <String >>(
4147 Asset .TYPE_NAME .atlanFieldName to setOf (),
@@ -66,6 +72,7 @@ class ColumnXformer(
6672 val assetQN = " $connectionQN /${details.partialQN} "
6773 val parentQN = " $connectionQN /${details.parentPartialQN} "
6874 val rawDataType = trimWhitespace(inputRow.getOrElse(Column .DATA_TYPE .atlanFieldName) { " " })
75+ val displayDataType = baseTypeName(rawDataType)
6976 var precision: Int? = null
7077 var scale: Double? = null
7178 var maxLength: Long? = null
@@ -95,6 +102,7 @@ class ColumnXformer(
95102 RowSerde .getHeaderForField(Column .VIEW_QUALIFIED_NAME , Column ::class .java) to if (details.viewPQN.isNotBlank()) " $connectionQN /${details.viewPQN} " else " " ,
96103 RowSerde .getHeaderForField(Column .VIEW , Column ::class .java) to if (details.parentTypeName == View .TYPE_NAME ) " ${details.parentTypeName} @$parentQN " else " " ,
97104 RowSerde .getHeaderForField(Column .MATERIALIZED_VIEW , Column ::class .java) to if (details.parentTypeName == MaterializedView .TYPE_NAME ) " ${details.parentTypeName} @$parentQN " else " " ,
105+ RowSerde .getHeaderForField(Column .DATA_TYPE , Column ::class .java) to displayDataType,
98106 RowSerde .getHeaderForField(Column .ORDER , Column ::class .java) to inputRow.getOrElse(Column .ORDER .atlanFieldName) { " " },
99107 RowSerde .getHeaderForField(Column .RAW_DATA_TYPE_DEFINITION , Column ::class .java) to rawDataType,
100108 RowSerde .getHeaderForField(Column .PRECISION , Column ::class .java) to (precision?.toString() ? : " " ),
@@ -106,6 +114,116 @@ class ColumnXformer(
106114 }
107115 }
108116
117+ /* * Returns the base type name, stripping any angle-bracket type parameters.
118+ * E.g. "STRUCT<a:INT,b:DOUBLE>" → "STRUCT", "INT" → "INT". */
119+ private fun baseTypeName (rawType : String ): String = if (rawType.contains(" <" )) rawType.substringBefore(" <" ).trim().uppercase() else rawType
120+
121+ /* * {@inheritDoc}
122+ *
123+ * Overridden to emit additional child column rows when the column's data type is a complex type
124+ * (STRUCT, ARRAY<STRUCT>, or MAP<K, STRUCT>). Child columns are generated recursively for
125+ * deeply nested types.
126+ */
127+ override fun mapRow (inputRow : Map <String , String >): List <List <String >> {
128+ val rows = super .mapRow(inputRow).toMutableList()
129+ val rawType = trimWhitespace(inputRow.getOrElse(Column .DATA_TYPE .atlanFieldName) { " " })
130+ val parseResult = ComplexTypeParser .extractStructFields(rawType)
131+ if (parseResult != null ) {
132+ val connectionQN = getConnectionQN(inputRow)
133+ val details = getSQLHierarchyDetails(inputRow, typeNameFilter, preprocessedDetails.entityQualifiedNameToType)
134+ val parentColumnQN = " $connectionQN /${details.partialQN} "
135+ val parentAssetMap = mapAsset(inputRow)
136+ rows.addAll(buildSubColumnRows(parentAssetMap, parentColumnQN, parseResult))
137+ }
138+ return rows
139+ }
140+
141+ /* *
142+ * Recursively build child column rows for all fields in the given [parseResult].
143+ *
144+ * @param baseAssetMap field map of the immediate parent column (used to inherit context fields)
145+ * @param parentColumnQN qualified name of the parent column asset (used for [PARENT_COLUMN_QN_HEADER])
146+ * @param parseResult parsed complex type fields and optional synthetic QN node (e.g. "items" for ARRAY)
147+ * @param depth nesting depth of the child columns (1 for direct children of a top-level column, 2 for grandchildren, etc.)
148+ */
149+ private fun buildSubColumnRows (
150+ baseAssetMap : Map <String , String >,
151+ parentColumnQN : String ,
152+ parseResult : ComplexTypeParser .ParseResult ,
153+ depth : Int = 1,
154+ ): List <List <String >> {
155+ val rows = mutableListOf<List <String >>()
156+ // For ARRAY / MAP, insert the synthetic node into the QN path but NOT into parentColumnQN
157+ val qnBase = if (parseResult.syntheticNode != null ) " $parentColumnQN /${parseResult.syntheticNode} " else parentColumnQN
158+ parseResult.fields.forEachIndexed { idx, field ->
159+ val childQN = " $qnBase /${field.name} "
160+ val childAssetMap = buildChildAssetMap(baseAssetMap, parentColumnQN, childQN, field, idx + 1 , depth)
161+ rows.add(assetMapToValueList(childAssetMap))
162+ // Recurse for nested complex types (e.g. STRUCT within STRUCT, ARRAY within STRUCT)
163+ val nestedResult = ComplexTypeParser .extractStructFields(field.rawType)
164+ if (nestedResult != null ) {
165+ rows.addAll(buildSubColumnRows(childAssetMap, childQN, nestedResult, depth + 1 ))
166+ }
167+ }
168+ return rows
169+ }
170+
171+ /* *
172+ * Build the asset map for a single child column, inheriting all context fields from
173+ * [parentAssetMap] and overriding the column-specific fields.
174+ *
175+ * @param parentAssetMap asset map of the immediate parent column
176+ * @param parentColumnQN qualified name of the parent column (for [PARENT_COLUMN_QN_HEADER])
177+ * @param childQN qualified name for the child column
178+ * @param field field definition (name and raw type) for the child column
179+ * @param order ordinal position of the child column within its parent
180+ * @param depth nesting depth of this child column (1 for direct children of a top-level column, 2 for grandchildren, etc.)
181+ */
182+ private fun buildChildAssetMap (
183+ parentAssetMap : Map <String , String >,
184+ parentColumnQN : String ,
185+ childQN : String ,
186+ field : ComplexTypeParser .FieldDefinition ,
187+ order : Int ,
188+ depth : Int ,
189+ ): Map <String , String > {
190+ val childMap = parentAssetMap.toMutableMap()
191+ childMap[RowSerde .getHeaderForField(Asset .QUALIFIED_NAME )] = childQN
192+ childMap[RowSerde .getHeaderForField(Asset .NAME )] = field.name
193+ childMap[RowSerde .getHeaderForField(Column .DATA_TYPE , Column ::class .java)] = baseTypeName(field.rawType)
194+ childMap[RowSerde .getHeaderForField(Column .RAW_DATA_TYPE_DEFINITION , Column ::class .java)] = field.rawType
195+ childMap[RowSerde .getHeaderForField(Column .ORDER , Column ::class .java)] = order.toString()
196+ // Clear numeric type-specific fields — they're not meaningful for the child's raw type
197+ childMap[RowSerde .getHeaderForField(Column .PRECISION , Column ::class .java)] = " "
198+ childMap[RowSerde .getHeaderForField(Column .NUMERIC_SCALE , Column ::class .java)] = " "
199+ childMap[RowSerde .getHeaderForField(Column .MAX_LENGTH , Column ::class .java)] = " "
200+ // Clear table/view references on sub-columns so they do not appear in the table's flat
201+ // column list (table_columns relationship). Navigation is via parentColumn chain instead.
202+ childMap[RowSerde .getHeaderForField(Column .TABLE_QUALIFIED_NAME , Column ::class .java)] = " "
203+ childMap[RowSerde .getHeaderForField(Column .TABLE_NAME , Column ::class .java)] = " "
204+ childMap[RowSerde .getHeaderForField(Column .TABLE , Column ::class .java)] = " "
205+ childMap[RowSerde .getHeaderForField(Column .VIEW_QUALIFIED_NAME , Column ::class .java)] = " "
206+ childMap[RowSerde .getHeaderForField(Column .VIEW_NAME , Column ::class .java)] = " "
207+ childMap[RowSerde .getHeaderForField(Column .VIEW , Column ::class .java)] = " "
208+ childMap[RowSerde .getHeaderForField(Column .MATERIALIZED_VIEW , Column ::class .java)] = " "
209+ childMap[PARENT_COLUMN_QN_HEADER ] = parentColumnQN
210+ childMap[PARENT_COLUMN_HEADER ] = " ${Column .TYPE_NAME } @$parentColumnQN "
211+ childMap[PARENT_COLUMN_NAME_HEADER ] = parentColumnQN.substringAfterLast(' /' )
212+ childMap[NESTED_COLUMN_ORDER_HEADER ] = order.toString()
213+ // columnDepthLevel tells Atlan this is a nested sub-column (not a top-level table column).
214+ childMap[COLUMN_DEPTH_LEVEL_HEADER ] = depth.toString()
215+ // columnHierarchy lists all ancestor columns from depth-1 up to the immediate parent.
216+ // Each entry is a JSON object: {"depth":"<n>","qualifiedName":"<qn>","name":"<name>"}.
217+ // Multiple entries are newline-delimited (CellXformer.LIST_DELIMITER).
218+ // Matches the format used in AIM nested_columns.csv reference and Databricks connector.
219+ val parentHierarchyStr = parentAssetMap.getOrElse(COLUMN_HIERARCHY_HEADER ) { " " }
220+ val parentName = parentColumnQN.substringAfterLast(' /' )
221+ val newEntry = """ {"depth": "$depth ","qualifiedName": "$parentColumnQN ","name": "$parentName "}"""
222+ childMap[COLUMN_HIERARCHY_HEADER ] = if (parentHierarchyStr.isBlank()) newEntry else " $parentHierarchyStr \n $newEntry "
223+ childMap[RowSerde .getHeaderForField(Asset .SUB_TYPE )] = " nested"
224+ return childMap
225+ }
226+
109227 class Preprocessor (
110228 originalFile : String ,
111229 fieldSeparator : Char ,
0 commit comments