Skip to content

Commit 3ac7a2e

Browse files
committed
Databricks: Add support for UPDATE SET * and INSERT * in MERGE statements
Add parsing support for the Databricks star shorthand syntax in MERGE statements, allowing `UPDATE SET *` and `INSERT *` to reference all columns from the source. - Add `MergeUpdateKind` enum (`Set(Vec<Assignment>)` | `Star`) to replace the previous `assignments` field on `MergeUpdateExpr`, enforcing valid states at the type level - Add `MergeInsertKind::Star` variant for `INSERT *` shorthand - Add `supports_merge_star_syntax()` dialect method for Databricks and Generic
1 parent 9550e93 commit 3ac7a2e

10 files changed

Lines changed: 185 additions & 38 deletions

File tree

src/ast/dml.rs

Lines changed: 52 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -649,6 +649,14 @@ pub enum MergeInsertKind {
649649
/// ```
650650
/// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement)
651651
Row,
652+
/// The insert expression uses the `*` shorthand to insert all columns.
653+
///
654+
/// Example:
655+
/// ```sql
656+
/// INSERT *
657+
/// ```
658+
/// [Databricks](https://docs.databricks.com/en/sql/language-manual/delta-merge-into.html)
659+
Star,
652660
}
653661

654662
impl Display for MergeInsertKind {
@@ -660,6 +668,9 @@ impl Display for MergeInsertKind {
660668
MergeInsertKind::Row => {
661669
write!(f, "ROW")
662670
}
671+
MergeInsertKind::Star => {
672+
write!(f, "*")
673+
}
663674
}
664675
}
665676
}
@@ -710,33 +721,70 @@ impl Display for MergeInsertExpr {
710721
}
711722
}
712723

724+
/// The kind of update used within a `MERGE` statement.
725+
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
726+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
727+
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
728+
pub enum MergeUpdateKind {
729+
/// Standard update with explicit assignments.
730+
///
731+
/// Example:
732+
/// ```sql
733+
/// UPDATE SET quantity = source.quantity, name = source.name
734+
/// ```
735+
Set(Vec<Assignment>),
736+
/// The `*` shorthand to update all columns from the source.
737+
///
738+
/// Example:
739+
/// ```sql
740+
/// UPDATE SET *
741+
/// ```
742+
/// [Databricks](https://docs.databricks.com/en/sql/language-manual/delta-merge-into.html)
743+
Star,
744+
}
745+
746+
impl Display for MergeUpdateKind {
747+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
748+
match self {
749+
MergeUpdateKind::Set(assignments) => {
750+
write!(f, "SET {}", display_comma_separated(assignments))
751+
}
752+
MergeUpdateKind::Star => {
753+
write!(f, "SET *")
754+
}
755+
}
756+
}
757+
}
758+
713759
/// The expression used to update rows within a `MERGE` statement.
714760
///
715761
/// Examples
716762
/// ```sql
717763
/// UPDATE SET quantity = T.quantity + S.quantity
764+
/// UPDATE SET *
718765
/// ```
719766
///
720767
/// [Snowflake](https://docs.snowflake.com/en/sql-reference/sql/merge)
721768
/// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement)
722769
/// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/MERGE.html)
770+
/// [Databricks](https://docs.databricks.com/en/sql/language-manual/delta-merge-into.html)
723771
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
724772
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
725773
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
726774
pub struct MergeUpdateExpr {
727775
/// The `UPDATE` token that starts the sub-expression.
728776
pub update_token: AttachedToken,
729-
/// The update assiment expressions
730-
pub assignments: Vec<Assignment>,
731-
/// `where_clause` for the update (Oralce specific)
777+
/// The kind of update: explicit assignments or `*` shorthand.
778+
pub kind: MergeUpdateKind,
779+
/// `where_clause` for the update (Oracle specific)
732780
pub update_predicate: Option<Expr>,
733781
/// `delete_clause` for the update "delete where" (Oracle specific)
734782
pub delete_predicate: Option<Expr>,
735783
}
736784

737785
impl Display for MergeUpdateExpr {
738786
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
739-
write!(f, "SET {}", display_comma_separated(&self.assignments))?;
787+
write!(f, "{}", self.kind)?;
740788
if let Some(predicate) = self.update_predicate.as_ref() {
741789
write!(f, " WHERE {predicate}")?;
742790
}

src/ast/mod.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,8 @@ pub use self::ddl::{
8585
};
8686
pub use self::dml::{
8787
Delete, Insert, Merge, MergeAction, MergeClause, MergeClauseKind, MergeInsertExpr,
88-
MergeInsertKind, MergeUpdateExpr, MultiTableInsertIntoClause, MultiTableInsertType,
88+
MergeInsertKind, MergeUpdateExpr, MergeUpdateKind, MultiTableInsertIntoClause,
89+
MultiTableInsertType,
8990
MultiTableInsertValue, MultiTableInsertValues, MultiTableInsertWhenClause, OutputClause,
9091
Update,
9192
};

src/ast/spans.rs

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,8 @@ use super::{
3939
IfStatement, IlikeSelectItem, IndexColumn, Insert, Interpolate, InterpolateExpr, Join,
4040
JoinConstraint, JoinOperator, JsonPath, JsonPathElem, LateralView, LimitClause,
4141
MatchRecognizePattern, Measure, Merge, MergeAction, MergeClause, MergeInsertExpr,
42-
MergeInsertKind, MergeUpdateExpr, NamedParenthesizedList, NamedWindowDefinition, ObjectName,
42+
MergeInsertKind, MergeUpdateExpr, MergeUpdateKind, NamedParenthesizedList,
43+
NamedWindowDefinition, ObjectName,
4344
ObjectNamePart, Offset, OnConflict, OnConflictAction, OnInsert, OpenStatement, OrderBy,
4445
OrderByExpr, OrderByKind, OutputClause, Parens, Partition, PartitionBoundValue,
4546
PivotValueSource, ProjectionSelect, Query, RaiseStatement, RaiseStatementValue,
@@ -2531,7 +2532,7 @@ impl Spanned for MergeInsertExpr {
25312532
self.kind_token.0.span,
25322533
match self.kind {
25332534
MergeInsertKind::Values(ref values) => values.span(),
2534-
MergeInsertKind::Row => Span::empty(), // ~ covered by `kind_token`
2535+
MergeInsertKind::Row | MergeInsertKind::Star => Span::empty(),
25352536
},
25362537
]
25372538
.into_iter()
@@ -2543,9 +2544,13 @@ impl Spanned for MergeInsertExpr {
25432544

25442545
impl Spanned for MergeUpdateExpr {
25452546
fn span(&self) -> Span {
2547+
let kind_span = match &self.kind {
2548+
MergeUpdateKind::Set(assignments) => union_spans(assignments.iter().map(Spanned::span)),
2549+
MergeUpdateKind::Star => Span::empty(),
2550+
};
25462551
union_spans(
25472552
core::iter::once(self.update_token.0.span)
2548-
.chain(self.assignments.iter().map(Spanned::span))
2553+
.chain(core::iter::once(kind_span))
25492554
.chain(self.update_predicate.iter().map(Spanned::span))
25502555
.chain(self.delete_predicate.iter().map(Spanned::span)),
25512556
)
@@ -2927,7 +2932,7 @@ WHERE id = 1
29272932
);
29282933
if let MergeAction::Update(MergeUpdateExpr {
29292934
update_token,
2930-
assignments: _,
2935+
kind: _,
29312936
update_predicate: _,
29322937
delete_predicate: _,
29332938
}) = &clauses[1].action

src/dialect/databricks.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,4 +108,9 @@ impl Dialect for DatabricksDialect {
108108
fn supports_select_item_multi_column_alias(&self) -> bool {
109109
true
110110
}
111+
112+
/// See <https://docs.databricks.com/en/sql/language-manual/delta-merge-into.html>
113+
fn supports_merge_star_syntax(&self) -> bool {
114+
true
115+
}
111116
}

src/dialect/generic.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -304,4 +304,8 @@ impl Dialect for GenericDialect {
304304
fn supports_xml_expressions(&self) -> bool {
305305
true
306306
}
307+
308+
fn supports_merge_star_syntax(&self) -> bool {
309+
true
310+
}
307311
}

src/dialect/mod.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1164,6 +1164,13 @@ pub trait Dialect: Debug + Any {
11641164
false
11651165
}
11661166

1167+
/// Returns true if the dialect supports `UPDATE SET *` and `INSERT *`
1168+
/// shorthand syntax in `MERGE` statements.
1169+
/// <https://docs.databricks.com/en/sql/language-manual/delta-merge-into.html>
1170+
fn supports_merge_star_syntax(&self) -> bool {
1171+
false
1172+
}
1173+
11671174
/// Returns true if the dialect supports the `LISTEN`, `UNLISTEN` and `NOTIFY` statements
11681175
fn supports_listen_notify(&self) -> bool {
11691176
false

src/parser/merge.rs

Lines changed: 44 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,12 @@ use alloc::{boxed::Box, format, vec, vec::Vec};
1818
use crate::{
1919
ast::{
2020
Merge, MergeAction, MergeClause, MergeClauseKind, MergeInsertExpr, MergeInsertKind,
21-
MergeUpdateExpr, ObjectName, OutputClause, SetExpr,
21+
MergeUpdateExpr, MergeUpdateKind, ObjectName, OutputClause, SetExpr,
2222
},
2323
dialect::{BigQueryDialect, GenericDialect, MySqlDialect},
2424
keywords::Keyword,
2525
parser::IsOptional,
26+
tokenizer::Token,
2627
tokenizer::TokenWithSpan,
2728
};
2829

@@ -120,7 +121,13 @@ impl Parser<'_> {
120121

121122
let update_token = self.get_current_token().clone();
122123
self.expect_keyword_is(Keyword::SET)?;
123-
let assignments = self.parse_comma_separated(Parser::parse_assignment)?;
124+
let kind = if self.dialect.supports_merge_star_syntax()
125+
&& self.consume_token(&Token::Mul)
126+
{
127+
MergeUpdateKind::Star
128+
} else {
129+
MergeUpdateKind::Set(self.parse_comma_separated(Parser::parse_assignment)?)
130+
};
124131
let update_predicate = if self.parse_keyword(Keyword::WHERE) {
125132
Some(self.parse_expr()?)
126133
} else {
@@ -134,7 +141,7 @@ impl Parser<'_> {
134141
};
135142
MergeAction::Update(MergeUpdateExpr {
136143
update_token: update_token.into(),
137-
assignments,
144+
kind,
138145
update_predicate,
139146
delete_predicate,
140147
})
@@ -167,32 +174,44 @@ impl Parser<'_> {
167174
};
168175

169176
let insert_token = self.get_current_token().clone();
170-
let is_mysql = dialect_of!(self is MySqlDialect);
171177

172-
let columns = self.parse_merge_clause_insert_columns(is_mysql)?;
173-
let (kind, kind_token) = if dialect_of!(self is BigQueryDialect | GenericDialect)
174-
&& self.parse_keyword(Keyword::ROW)
178+
if self.dialect.supports_merge_star_syntax() && self.consume_token(&Token::Mul)
175179
{
176-
(MergeInsertKind::Row, self.get_current_token().clone())
177-
} else {
178-
self.expect_keyword_is(Keyword::VALUES)?;
179-
let values_token = self.get_current_token().clone();
180-
let values = self.parse_values(is_mysql, false)?;
181-
(MergeInsertKind::Values(values), values_token)
182-
};
183-
let insert_predicate = if self.parse_keyword(Keyword::WHERE) {
184-
Some(self.parse_expr()?)
180+
let star_token = self.get_current_token().clone();
181+
MergeAction::Insert(MergeInsertExpr {
182+
insert_token: insert_token.into(),
183+
columns: vec![],
184+
kind_token: star_token.into(),
185+
kind: MergeInsertKind::Star,
186+
insert_predicate: None,
187+
})
185188
} else {
186-
None
187-
};
189+
let is_mysql = dialect_of!(self is MySqlDialect);
190+
let columns = self.parse_merge_clause_insert_columns(is_mysql)?;
191+
let (kind, kind_token) = if dialect_of!(self is BigQueryDialect | GenericDialect)
192+
&& self.parse_keyword(Keyword::ROW)
193+
{
194+
(MergeInsertKind::Row, self.get_current_token().clone())
195+
} else {
196+
self.expect_keyword_is(Keyword::VALUES)?;
197+
let values_token = self.get_current_token().clone();
198+
let values = self.parse_values(is_mysql, false)?;
199+
(MergeInsertKind::Values(values), values_token)
200+
};
201+
let insert_predicate = if self.parse_keyword(Keyword::WHERE) {
202+
Some(self.parse_expr()?)
203+
} else {
204+
None
205+
};
188206

189-
MergeAction::Insert(MergeInsertExpr {
190-
insert_token: insert_token.into(),
191-
columns,
192-
kind_token: kind_token.into(),
193-
kind,
194-
insert_predicate,
195-
})
207+
MergeAction::Insert(MergeInsertExpr {
208+
insert_token: insert_token.into(),
209+
columns,
210+
kind_token: kind_token.into(),
211+
kind,
212+
insert_predicate,
213+
})
214+
}
196215
}
197216
_ => {
198217
return parser_err!(

tests/sqlparser_bigquery.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1830,7 +1830,7 @@ fn parse_merge() {
18301830
});
18311831
let update_action = MergeAction::Update(MergeUpdateExpr {
18321832
update_token: AttachedToken::empty(),
1833-
assignments: vec![
1833+
kind: MergeUpdateKind::Set(vec![
18341834
Assignment {
18351835
target: AssignmentTarget::ColumnName(ObjectName::from(vec![Ident::new("a")])),
18361836
value: Expr::value(number("1")),
@@ -1839,7 +1839,7 @@ fn parse_merge() {
18391839
target: AssignmentTarget::ColumnName(ObjectName::from(vec![Ident::new("b")])),
18401840
value: Expr::value(number("2")),
18411841
},
1842-
],
1842+
]),
18431843
update_predicate: None,
18441844
delete_predicate: None,
18451845
});

tests/sqlparser_common.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10157,7 +10157,7 @@ fn parse_merge() {
1015710157
}),
1015810158
action: MergeAction::Update(MergeUpdateExpr {
1015910159
update_token: AttachedToken::empty(),
10160-
assignments: vec![
10160+
kind: MergeUpdateKind::Set(vec![
1016110161
Assignment {
1016210162
target: AssignmentTarget::ColumnName(ObjectName::from(vec![
1016310163
Ident::new("dest"),
@@ -10178,7 +10178,7 @@ fn parse_merge() {
1017810178
Ident::new("G"),
1017910179
]),
1018010180
},
10181-
],
10181+
]),
1018210182
update_predicate: None,
1018310183
delete_predicate: None,
1018410184
}),

tests/sqlparser_databricks.rs

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -738,3 +738,61 @@ fn parse_cte_without_as() {
738738
.parse_sql_statements("WITH cte (SELECT 1) SELECT * FROM cte")
739739
.is_err());
740740
}
741+
742+
#[test]
743+
fn test_merge_update_set_star_and_insert_star() {
744+
let sql = "MERGE INTO target USING source ON target.id = source.id WHEN MATCHED THEN UPDATE SET * WHEN NOT MATCHED THEN INSERT *";
745+
databricks_and_generic().verified_stmt(sql);
746+
747+
match databricks().verified_stmt(sql) {
748+
Statement::Merge(merge) => {
749+
assert_eq!(merge.clauses.len(), 2);
750+
751+
match &merge.clauses[0].action {
752+
MergeAction::Update(update_expr) => {
753+
assert!(matches!(update_expr.kind, MergeUpdateKind::Star));
754+
}
755+
_ => panic!("Expected UPDATE action"),
756+
}
757+
758+
match &merge.clauses[1].action {
759+
MergeAction::Insert(insert_expr) => {
760+
assert!(matches!(insert_expr.kind, MergeInsertKind::Star));
761+
assert!(insert_expr.columns.is_empty());
762+
}
763+
_ => panic!("Expected INSERT action"),
764+
}
765+
}
766+
_ => panic!("Expected MERGE statement"),
767+
}
768+
}
769+
770+
#[test]
771+
fn test_merge_update_set_star_with_predicate() {
772+
let sql = "MERGE INTO target USING source ON target.id = source.id WHEN MATCHED AND source.active = true THEN UPDATE SET *";
773+
databricks_and_generic().verified_stmt(sql);
774+
}
775+
776+
#[test]
777+
fn test_merge_insert_star_with_not_matched_by_target() {
778+
let sql = "MERGE INTO target USING source ON target.id = source.id WHEN NOT MATCHED BY TARGET THEN INSERT *";
779+
databricks_and_generic().verified_stmt(sql);
780+
}
781+
782+
#[test]
783+
fn test_merge_mixed_star_and_explicit() {
784+
let sql = "MERGE INTO target USING source ON target.id = source.id WHEN MATCHED THEN UPDATE SET * WHEN NOT MATCHED THEN INSERT (a, b) VALUES (source.a, source.b)";
785+
databricks_and_generic().verified_stmt(sql);
786+
}
787+
788+
#[test]
789+
fn test_merge_star_with_subquery_source() {
790+
let sql = concat!(
791+
"MERGE INTO t1 AS target ",
792+
"USING (SELECT * FROM t2) AS source ",
793+
"ON target.id = source.id ",
794+
"WHEN MATCHED THEN UPDATE SET * ",
795+
"WHEN NOT MATCHED THEN INSERT *"
796+
);
797+
databricks_and_generic().verified_stmt(sql);
798+
}

0 commit comments

Comments
 (0)