Skip to content

Commit 739ceae

Browse files
committed
[spark] Support partition statistics in SHOW TABLE EXTENDED PARTITION command
1 parent f6cb6c8 commit 739ceae

3 files changed

Lines changed: 69 additions & 2 deletions

File tree

paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/PaimonPartitionManagement.scala

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,9 @@
1919
package org.apache.paimon.spark
2020

2121
import org.apache.paimon.CoreOptions
22+
import org.apache.paimon.partition.PartitionStatistics
2223
import org.apache.paimon.table.{FileStoreTable, Table}
24+
import org.apache.paimon.table.source.ScanMode
2325
import org.apache.paimon.types.RowType
2426
import org.apache.paimon.utils.{InternalRowPartitionComputer, TypeUtils}
2527

@@ -136,7 +138,31 @@ trait PaimonPartitionManagement extends SupportsAtomicPartitionManagement with L
136138
}
137139

138140
override def loadPartitionMetadata(ident: InternalRow): JMap[String, String] = {
139-
Map.empty[String, String].asJava
141+
table match {
142+
case fileStoreTable: FileStoreTable =>
143+
val partitionSpec = toPaimonPartitions(Array(ident)).head
144+
val partitionEntries = fileStoreTable
145+
.newSnapshotReader()
146+
.withMode(ScanMode.ALL)
147+
.withPartitionFilter(partitionSpec)
148+
.partitionEntries()
149+
150+
if (!partitionEntries.isEmpty) {
151+
val entry = partitionEntries.get(0)
152+
Map(
153+
PartitionStatistics.FIELD_RECORD_COUNT -> entry.recordCount().toString,
154+
PartitionStatistics.FIELD_FILE_SIZE_IN_BYTES -> entry.fileSizeInBytes().toString,
155+
PartitionStatistics.FIELD_FILE_COUNT -> entry.fileCount().toString,
156+
PartitionStatistics.FIELD_LAST_FILE_CREATION_TIME -> entry
157+
.lastFileCreationTime()
158+
.toString
159+
).asJava
160+
} else {
161+
Map.empty[String, String].asJava
162+
}
163+
case _ =>
164+
Map.empty[String, String].asJava
165+
}
140166
}
141167

142168
override def listPartitionIdentifiers(

paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/DescribeTableTestBase.scala

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,33 @@ abstract class DescribeTableTestBase extends PaimonSparkTestBase {
9898
)
9999
Assertions.assertTrue(
100100
res2.select("information").collect().head.getString(0).contains("Partition Values"))
101+
102+
val info2 = res2.select("information").collect().head.getString(0)
103+
Assertions.assertTrue(
104+
info2.contains("Partition Parameters"),
105+
s"SHOW TABLE EXTENDED should contain Partition Parameters, but got: $info2")
106+
Assertions.assertTrue(
107+
info2.contains(PartitionStatistics.FIELD_RECORD_COUNT),
108+
s"SHOW TABLE EXTENDED should contain recordCount, but got: $info2")
109+
Assertions.assertTrue(
110+
info2.contains(PartitionStatistics.FIELD_FILE_SIZE_IN_BYTES),
111+
s"SHOW TABLE EXTENDED should contain fileSizeInBytes, but got: $info2")
112+
Assertions.assertTrue(
113+
info2.contains(PartitionStatistics.FIELD_FILE_COUNT),
114+
s"SHOW TABLE EXTENDED should contain fileCount, but got: $info2")
115+
Assertions.assertTrue(
116+
info2.contains(PartitionStatistics.FIELD_LAST_FILE_CREATION_TIME),
117+
s"SHOW TABLE EXTENDED should contain lastFileCreationTime, but got: $info2")
118+
Assertions.assertTrue(
119+
info2.contains("Partition Statistics: 1 rows"),
120+
s"Partition Statistics should contain '1 rows', but got: $info2")
121+
122+
val res3 =
123+
spark.sql(s"SHOW TABLE EXTENDED IN $testDB LIKE 's2' PARTITION(pt = '2024')")
124+
val info3 = res3.select("information").collect().head.getString(0)
125+
Assertions.assertTrue(
126+
info3.contains("Partition Statistics: 2 rows"),
127+
s"Partition pt='2024' should have 2 rows, but got: $info3")
101128
}
102129
}
103130
}

paimon-spark/paimon-spark3-common/src/main/scala/org/apache/paimon/spark/commands/PaimonShowTablePartitionCommand.scala

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
package org.apache.paimon.spark.commands
2020

21+
import org.apache.paimon.partition.PartitionStatistics
2122
import org.apache.paimon.spark.catalyst.Compatibility
2223
import org.apache.paimon.spark.leafnode.PaimonLeafRunnableCommand
2324

@@ -87,7 +88,20 @@ case class PaimonShowTablePartitionCommand(
8788
val partitionValues = partitions.mkString("[", ", ", "]")
8889
results.put("Partition Values", s"$partitionValues")
8990

90-
// TODO "Partition Parameters", "Created Time", "Last Access", "Partition Statistics"
91+
// Partition Parameters and Partition Statistics
92+
val metadata = partitionTable.loadPartitionMetadata(row)
93+
if (!metadata.isEmpty) {
94+
val metadataMap = metadata.asScala
95+
results.put(
96+
"Partition Parameters",
97+
s"{${metadataMap.map { case (k, v) => s"$k=$v" }.mkString(", ")}}")
98+
99+
val fileSizeInBytes =
100+
metadataMap.getOrElse(PartitionStatistics.FIELD_FILE_SIZE_IN_BYTES, "0").toLong
101+
val recordCount =
102+
metadataMap.getOrElse(PartitionStatistics.FIELD_RECORD_COUNT, "0").toLong
103+
results.put("Partition Statistics", s"$recordCount rows, $fileSizeInBytes bytes")
104+
}
91105

92106
results
93107
.map {

0 commit comments

Comments
 (0)