Skip to content

Commit 9833c03

Browse files
cristhiankCristhian Lopez VidalCopilot
authored
feat(clickhouse): support PARTITION BY after ORDER BY and ARRAY JOIN (#2283)
Co-authored-by: Cristhian Lopez Vidal <crilopez@microsoft.com> Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent 9550e93 commit 9833c03

8 files changed

Lines changed: 208 additions & 1 deletion

File tree

src/ast/query.rs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2785,6 +2785,13 @@ impl fmt::Display for Join {
27852785
self.relation,
27862786
suffix(constraint)
27872787
)),
2788+
JoinOperator::ArrayJoin => f.write_fmt(format_args!("ARRAY JOIN {}", self.relation)),
2789+
JoinOperator::LeftArrayJoin => {
2790+
f.write_fmt(format_args!("LEFT ARRAY JOIN {}", self.relation))
2791+
}
2792+
JoinOperator::InnerArrayJoin => {
2793+
f.write_fmt(format_args!("INNER ARRAY JOIN {}", self.relation))
2794+
}
27882795
}
27892796
}
27902797
}
@@ -2839,6 +2846,14 @@ pub enum JoinOperator {
28392846
///
28402847
/// See <https://dev.mysql.com/doc/refman/8.4/en/join.html>.
28412848
StraightJoin(JoinConstraint),
2849+
/// ClickHouse: `ARRAY JOIN` for unnesting arrays inline.
2850+
///
2851+
/// See <https://clickhouse.com/docs/en/sql-reference/statements/select/array-join>.
2852+
ArrayJoin,
2853+
/// ClickHouse: `LEFT ARRAY JOIN` for unnesting arrays inline (preserves rows with empty arrays).
2854+
LeftArrayJoin,
2855+
/// ClickHouse: `INNER ARRAY JOIN` for unnesting arrays inline (filters rows with empty arrays).
2856+
InnerArrayJoin,
28422857
}
28432858

28442859
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]

src/ast/spans.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2252,6 +2252,9 @@ impl Spanned for JoinOperator {
22522252
JoinOperator::Anti(join_constraint) => join_constraint.span(),
22532253
JoinOperator::Semi(join_constraint) => join_constraint.span(),
22542254
JoinOperator::StraightJoin(join_constraint) => join_constraint.span(),
2255+
JoinOperator::ArrayJoin => Span::empty(),
2256+
JoinOperator::LeftArrayJoin => Span::empty(),
2257+
JoinOperator::InnerArrayJoin => Span::empty(),
22552258
}
22562259
}
22572260
}

src/dialect/clickhouse.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,14 @@ impl Dialect for ClickHouseDialect {
6464
true
6565
}
6666

67+
fn supports_partition_by_after_order_by(&self) -> bool {
68+
true
69+
}
70+
71+
fn supports_array_join_syntax(&self) -> bool {
72+
true
73+
}
74+
6775
// ClickHouse uses this for some FORMAT expressions in `INSERT` context, e.g. when inserting
6876
// with FORMAT JSONEachRow a raw JSON key-value expression is valid and expected.
6977
//

src/dialect/generic.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,14 @@ impl Dialect for GenericDialect {
4545
true
4646
}
4747

48+
fn supports_partition_by_after_order_by(&self) -> bool {
49+
true
50+
}
51+
52+
fn supports_array_join_syntax(&self) -> bool {
53+
true
54+
}
55+
4856
fn supports_group_by_expr(&self) -> bool {
4957
true
5058
}

src/dialect/mod.rs

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -349,6 +349,23 @@ pub trait Dialect: Debug + Any {
349349
false
350350
}
351351

352+
/// Returns true if the dialect supports `PARTITION BY` appearing after `ORDER BY`
353+
/// in a `CREATE TABLE` statement (in addition to the standard placement before `ORDER BY`).
354+
///
355+
/// ClickHouse DDL uses this ordering:
356+
/// <https://clickhouse.com/docs/en/sql-reference/statements/create/table#partition-by>
357+
fn supports_partition_by_after_order_by(&self) -> bool {
358+
false
359+
}
360+
361+
/// Returns true if the dialect supports ClickHouse-style `ARRAY JOIN` / `LEFT ARRAY JOIN` /
362+
/// `INNER ARRAY JOIN` syntax for unnesting arrays inline.
363+
///
364+
/// <https://clickhouse.com/docs/en/sql-reference/statements/select/array-join>
365+
fn supports_array_join_syntax(&self) -> bool {
366+
false
367+
}
368+
352369
/// Returns true if the dialects supports `group sets, roll up, or cube` expressions.
353370
fn supports_group_by_expr(&self) -> bool {
354371
false

src/keywords.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1236,6 +1236,8 @@ pub const RESERVED_FOR_TABLE_ALIAS: &[Keyword] = &[
12361236
Keyword::FOR,
12371237
// for MYSQL PARTITION SELECTION
12381238
Keyword::PARTITION,
1239+
// for Clickhouse ARRAY JOIN (ARRAY must not be parsed as a table alias)
1240+
Keyword::ARRAY,
12391241
// for Clickhouse PREWHERE
12401242
Keyword::PREWHERE,
12411243
Keyword::SETTINGS,

src/parser/mod.rs

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8564,6 +8564,17 @@ impl<'a> Parser<'a> {
85648564
None
85658565
};
85668566

8567+
// ClickHouse allows PARTITION BY after ORDER BY
8568+
// https://clickhouse.com/docs/en/sql-reference/statements/create/table#partition-by
8569+
let partition_by = if create_table_config.partition_by.is_none()
8570+
&& self.dialect.supports_partition_by_after_order_by()
8571+
&& self.parse_keywords(&[Keyword::PARTITION, Keyword::BY])
8572+
{
8573+
Some(Box::new(self.parse_expr()?))
8574+
} else {
8575+
create_table_config.partition_by
8576+
};
8577+
85678578
let on_commit = if self.parse_keywords(&[Keyword::ON, Keyword::COMMIT]) {
85688579
Some(self.parse_create_table_on_commit()?)
85698580
} else {
@@ -8634,7 +8645,7 @@ impl<'a> Parser<'a> {
86348645
.on_commit(on_commit)
86358646
.on_cluster(on_cluster)
86368647
.clustered_by(clustered_by)
8637-
.partition_by(create_table_config.partition_by)
8648+
.partition_by(partition_by)
86388649
.cluster_by(create_table_config.cluster_by)
86398650
.inherits(create_table_config.inherits)
86408651
.partition_of(partition_of)
@@ -15768,6 +15779,33 @@ impl<'a> Parser<'a> {
1576815779
constraint: self.parse_join_constraint(false)?,
1576915780
},
1577015781
}
15782+
} else if self.dialect.supports_array_join_syntax()
15783+
&& self.parse_keywords(&[Keyword::INNER, Keyword::ARRAY, Keyword::JOIN])
15784+
{
15785+
// ClickHouse: INNER ARRAY JOIN
15786+
Join {
15787+
relation: self.parse_table_factor()?,
15788+
global,
15789+
join_operator: JoinOperator::InnerArrayJoin,
15790+
}
15791+
} else if self.dialect.supports_array_join_syntax()
15792+
&& self.parse_keywords(&[Keyword::LEFT, Keyword::ARRAY, Keyword::JOIN])
15793+
{
15794+
// ClickHouse: LEFT ARRAY JOIN
15795+
Join {
15796+
relation: self.parse_table_factor()?,
15797+
global,
15798+
join_operator: JoinOperator::LeftArrayJoin,
15799+
}
15800+
} else if self.dialect.supports_array_join_syntax()
15801+
&& self.parse_keywords(&[Keyword::ARRAY, Keyword::JOIN])
15802+
{
15803+
// ClickHouse: ARRAY JOIN
15804+
Join {
15805+
relation: self.parse_table_factor()?,
15806+
global,
15807+
join_operator: JoinOperator::ArrayJoin,
15808+
}
1577115809
} else {
1577215810
let natural = self.parse_keyword(Keyword::NATURAL);
1577315811
let peek_keyword = if let Token::Word(w) = &self.peek_token_ref().token {

tests/sqlparser_clickhouse.rs

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,65 @@ fn parse_create_table() {
233233
);
234234
}
235235

236+
#[test]
237+
fn parse_create_table_partition_by_after_order_by() {
238+
// ClickHouse DDL places PARTITION BY after ORDER BY.
239+
// MergeTree() is canonicalized to MergeTree and type names are uppercased.
240+
clickhouse().one_statement_parses_to(
241+
concat!(
242+
"CREATE TABLE IF NOT EXISTS \"MyTable\" (`col1` Int64, `col2` Int32) ",
243+
"ENGINE = MergeTree() ",
244+
"PRIMARY KEY (toDate(toDateTime(`col2`)), `col1`, `col2`) ",
245+
"ORDER BY (toDate(toDateTime(`col2`)), `col1`, `col2`) ",
246+
"PARTITION BY col1 % 64"
247+
),
248+
concat!(
249+
"CREATE TABLE IF NOT EXISTS \"MyTable\" (`col1` INT64, `col2` Int32) ",
250+
"ENGINE = MergeTree ",
251+
"PRIMARY KEY (toDate(toDateTime(`col2`)), `col1`, `col2`) ",
252+
"ORDER BY (toDate(toDateTime(`col2`)), `col1`, `col2`) ",
253+
"PARTITION BY col1 % 64"
254+
),
255+
);
256+
257+
// PARTITION BY after ORDER BY works with both ClickHouseDialect and GenericDialect
258+
clickhouse_and_generic()
259+
.verified_stmt("CREATE TABLE t (a INT) ENGINE = MergeTree ORDER BY a PARTITION BY a");
260+
261+
// Arithmetic expression in PARTITION BY (roundtrip)
262+
clickhouse_and_generic()
263+
.verified_stmt("CREATE TABLE t (a INT) ENGINE = MergeTree ORDER BY a PARTITION BY a % 64");
264+
265+
// AST: partition_by is populated with the correct expression
266+
match clickhouse_and_generic()
267+
.verified_stmt("CREATE TABLE t (a INT) ENGINE = MergeTree ORDER BY a PARTITION BY a % 64")
268+
{
269+
Statement::CreateTable(CreateTable { partition_by, .. }) => {
270+
assert_eq!(
271+
partition_by,
272+
Some(Box::new(BinaryOp {
273+
left: Box::new(Identifier(Ident::new("a"))),
274+
op: BinaryOperator::Modulo,
275+
right: Box::new(Expr::Value(
276+
Value::Number("64".parse().unwrap(), false).with_empty_span(),
277+
)),
278+
}))
279+
);
280+
}
281+
_ => unreachable!(),
282+
}
283+
284+
// Function call expression in PARTITION BY (ClickHouse-specific function)
285+
clickhouse().verified_stmt(
286+
"CREATE TABLE t (d DATE) ENGINE = MergeTree ORDER BY d PARTITION BY toYYYYMM(d)",
287+
);
288+
289+
// Negative: PARTITION BY with no expression should fail
290+
clickhouse_and_generic()
291+
.parse_sql_statements("CREATE TABLE t (a INT) ENGINE = MergeTree ORDER BY a PARTITION BY")
292+
.expect_err("PARTITION BY with no expression should fail");
293+
}
294+
236295
#[test]
237296
fn parse_insert_into_function() {
238297
clickhouse().verified_stmt(r#"INSERT INTO TABLE FUNCTION remote('localhost', default.simple_table) VALUES (100, 'inserted via remote()')"#);
@@ -1729,6 +1788,63 @@ fn test_parse_not_null_in_column_options() {
17291788
);
17301789
}
17311790

1791+
#[test]
1792+
fn parse_array_join() {
1793+
// ARRAY JOIN works with both ClickHouseDialect and GenericDialect (roundtrip)
1794+
clickhouse_and_generic().verified_stmt("SELECT x FROM t ARRAY JOIN arr AS x");
1795+
1796+
// AST: join_operator is the unit variant ArrayJoin (no constraint)
1797+
match clickhouse_and_generic().verified_stmt("SELECT x FROM t ARRAY JOIN arr AS x") {
1798+
Statement::Query(query) => {
1799+
let select = query.body.as_select().unwrap();
1800+
let join = &select.from[0].joins[0];
1801+
assert_eq!(join.join_operator, JoinOperator::ArrayJoin);
1802+
}
1803+
_ => unreachable!(),
1804+
}
1805+
1806+
// Combined: regular JOIN followed by ARRAY JOIN
1807+
clickhouse_and_generic()
1808+
.verified_stmt("SELECT x FROM t JOIN u ON t.id = u.id ARRAY JOIN arr AS x");
1809+
1810+
// Negative: ARRAY JOIN with no table expression should fail
1811+
clickhouse_and_generic()
1812+
.parse_sql_statements("SELECT x FROM t ARRAY JOIN")
1813+
.expect_err("ARRAY JOIN requires a table expression");
1814+
}
1815+
1816+
#[test]
1817+
fn parse_left_array_join() {
1818+
// LEFT ARRAY JOIN preserves rows with empty/null arrays (roundtrip)
1819+
clickhouse_and_generic().verified_stmt("SELECT x FROM t LEFT ARRAY JOIN arr AS x");
1820+
1821+
// AST: join_operator is LeftArrayJoin
1822+
match clickhouse_and_generic().verified_stmt("SELECT x FROM t LEFT ARRAY JOIN arr AS x") {
1823+
Statement::Query(query) => {
1824+
let select = query.body.as_select().unwrap();
1825+
let join = &select.from[0].joins[0];
1826+
assert_eq!(join.join_operator, JoinOperator::LeftArrayJoin);
1827+
}
1828+
_ => unreachable!(),
1829+
}
1830+
}
1831+
1832+
#[test]
1833+
fn parse_inner_array_join() {
1834+
// INNER ARRAY JOIN filters rows with empty/null arrays (roundtrip)
1835+
clickhouse_and_generic().verified_stmt("SELECT x FROM t INNER ARRAY JOIN arr AS x");
1836+
1837+
// AST: join_operator is InnerArrayJoin
1838+
match clickhouse_and_generic().verified_stmt("SELECT x FROM t INNER ARRAY JOIN arr AS x") {
1839+
Statement::Query(query) => {
1840+
let select = query.body.as_select().unwrap();
1841+
let join = &select.from[0].joins[0];
1842+
assert_eq!(join.join_operator, JoinOperator::InnerArrayJoin);
1843+
}
1844+
_ => unreachable!(),
1845+
}
1846+
}
1847+
17321848
fn clickhouse() -> TestedDialects {
17331849
TestedDialects::new(vec![Box::new(ClickHouseDialect {})])
17341850
}

0 commit comments

Comments
 (0)