Skip to content

Commit b86a94b

Browse files
mskrzypkowsMaciej Skrzypkowskialamb
authored andcommitted
Add Redshift dialect, handle square brackets properly (apache#471)
* Redshift square bracket handling We need to detect `[` or `"` for Redshift quotes around indentifier and at the same time exclude treating JSON paths as indentifer * RedshiftSqlDialect documentation update Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org> * Renamed _chars to chars * Fixed warnings * Missing license Co-authored-by: Maciej Skrzypkowski <maciej.skrzypkowski@satoricyber.com> Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
1 parent 4b67b56 commit b86a94b

7 files changed

Lines changed: 172 additions & 2 deletions

File tree

examples/cli.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ $ cargo run --feature json_example --example cli FILENAME.sql [--dialectname]
4343
"--mysql" => Box::new(MySqlDialect {}),
4444
"--snowflake" => Box::new(SnowflakeDialect {}),
4545
"--hive" => Box::new(HiveDialect {}),
46+
"--redshift" => Box::new(RedshiftSqlDialect {}),
4647
"--generic" | "" => Box::new(GenericDialect {}),
4748
s => panic!("Unexpected parameter: {}", s),
4849
};

src/ast/ddl.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
//! (commonly referred to as Data Definition Language, or DDL)
1515
1616
#[cfg(not(feature = "std"))]
17-
use alloc::{boxed::Box, string::String, string::ToString, vec::Vec};
17+
use alloc::{boxed::Box, string::String, vec::Vec};
1818
use core::fmt;
1919

2020
#[cfg(feature = "serde")]

src/dialect/mod.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,14 @@ mod hive;
1717
mod mssql;
1818
mod mysql;
1919
mod postgresql;
20+
mod redshift;
2021
mod snowflake;
2122
mod sqlite;
2223

2324
use core::any::{Any, TypeId};
2425
use core::fmt::Debug;
26+
use core::iter::Peekable;
27+
use core::str::Chars;
2528

2629
pub use self::ansi::AnsiDialect;
2730
pub use self::clickhouse::ClickHouseDialect;
@@ -30,6 +33,7 @@ pub use self::hive::HiveDialect;
3033
pub use self::mssql::MsSqlDialect;
3134
pub use self::mysql::MySqlDialect;
3235
pub use self::postgresql::PostgreSqlDialect;
36+
pub use self::redshift::RedshiftSqlDialect;
3337
pub use self::snowflake::SnowflakeDialect;
3438
pub use self::sqlite::SQLiteDialect;
3539
pub use crate::keywords;
@@ -51,6 +55,10 @@ pub trait Dialect: Debug + Any {
5155
fn is_delimited_identifier_start(&self, ch: char) -> bool {
5256
ch == '"'
5357
}
58+
/// Determine if quoted characters are proper for identifier
59+
fn is_proper_identifier_inside_quotes(&self, mut _chars: Peekable<Chars<'_>>) -> bool {
60+
true
61+
}
5462
/// Determine if a character is a valid start character for an unquoted identifier
5563
fn is_identifier_start(&self, ch: char) -> bool;
5664
/// Determine if a character is a valid unquoted identifier character

src/dialect/redshift.rs

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
// Licensed under the Apache License, Version 2.0 (the "License");
2+
// you may not use this file except in compliance with the License.
3+
// You may obtain a copy of the License at
4+
//
5+
// http://www.apache.org/licenses/LICENSE-2.0
6+
//
7+
// Unless required by applicable law or agreed to in writing, software
8+
// distributed under the License is distributed on an "AS IS" BASIS,
9+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10+
// See the License for the specific language governing permissions and
11+
// limitations under the License.
12+
13+
use crate::dialect::Dialect;
14+
use core::iter::Peekable;
15+
use core::str::Chars;
16+
17+
use super::PostgreSqlDialect;
18+
19+
#[derive(Debug)]
20+
pub struct RedshiftSqlDialect {}
21+
22+
// In most cases the redshift dialect is identical to [`PostgresSqlDialect`].
23+
//
24+
// Notable differences:
25+
// 1. Redshift treats brackets `[` and `]` differently. For example, `SQL SELECT a[1][2] FROM b`
26+
// in the Postgres dialect, the query will be parsed as an array, while in the Redshift dialect it will
27+
// be a json path
28+
impl Dialect for RedshiftSqlDialect {
29+
fn is_delimited_identifier_start(&self, ch: char) -> bool {
30+
ch == '"' || ch == '['
31+
}
32+
33+
/// Determine if quoted characters are proper for identifier
34+
/// It's needed to distinguish treating square brackets as quotes from
35+
/// treating them as json path. If there is identifier then we assume
36+
/// there is no json path.
37+
fn is_proper_identifier_inside_quotes(&self, mut chars: Peekable<Chars<'_>>) -> bool {
38+
chars.next();
39+
let mut not_white_chars = chars.skip_while(|ch| ch.is_whitespace()).peekable();
40+
if let Some(&ch) = not_white_chars.peek() {
41+
return self.is_identifier_start(ch);
42+
}
43+
false
44+
}
45+
46+
fn is_identifier_start(&self, ch: char) -> bool {
47+
PostgreSqlDialect {}.is_identifier_start(ch)
48+
}
49+
50+
fn is_identifier_part(&self, ch: char) -> bool {
51+
PostgreSqlDialect {}.is_identifier_part(ch)
52+
}
53+
}

src/test_utils.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,7 @@ pub fn all_dialects() -> TestedDialects {
141141
Box::new(AnsiDialect {}),
142142
Box::new(SnowflakeDialect {}),
143143
Box::new(HiveDialect {}),
144+
Box::new(RedshiftSqlDialect {}),
144145
],
145146
}
146147
}

src/tokenizer.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -437,7 +437,12 @@ impl<'a> Tokenizer<'a> {
437437
Ok(Some(Token::SingleQuotedString(s)))
438438
}
439439
// delimited (quoted) identifier
440-
quote_start if self.dialect.is_delimited_identifier_start(quote_start) => {
440+
quote_start
441+
if self.dialect.is_delimited_identifier_start(ch)
442+
&& self
443+
.dialect
444+
.is_proper_identifier_inside_quotes(chars.clone()) =>
445+
{
441446
chars.next(); // consume the opening quote
442447
let quote_end = Word::matching_end_quote(quote_start);
443448
let (s, last_char) = parse_quoted_ident(chars, quote_end);

tests/sqlparser_redshift.rs

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
// Licensed under the Apache License, Version 2.0 (the "License");
2+
// you may not use this file except in compliance with the License.
3+
// You may obtain a copy of the License at
4+
//
5+
// http://www.apache.org/licenses/LICENSE-2.0
6+
//
7+
// Unless required by applicable law or agreed to in writing, software
8+
// distributed under the License is distributed on an "AS IS" BASIS,
9+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10+
// See the License for the specific language governing permissions and
11+
// limitations under the License.
12+
13+
#[macro_use]
14+
mod test_utils;
15+
16+
use test_utils::*;
17+
18+
use sqlparser::ast::*;
19+
use sqlparser::dialect::RedshiftSqlDialect;
20+
21+
#[test]
22+
fn test_square_brackets_over_db_schema_table_name() {
23+
let select = redshift().verified_only_select("SELECT [col1] FROM [test_schema].[test_table]");
24+
assert_eq!(
25+
select.projection[0],
26+
SelectItem::UnnamedExpr(Expr::Identifier(Ident {
27+
value: "col1".to_string(),
28+
quote_style: Some('[')
29+
})),
30+
);
31+
assert_eq!(
32+
select.from[0],
33+
TableWithJoins {
34+
relation: TableFactor::Table {
35+
name: ObjectName(vec![
36+
Ident {
37+
value: "test_schema".to_string(),
38+
quote_style: Some('[')
39+
},
40+
Ident {
41+
value: "test_table".to_string(),
42+
quote_style: Some('[')
43+
}
44+
]),
45+
alias: None,
46+
args: vec![],
47+
with_hints: vec![],
48+
},
49+
joins: vec![],
50+
}
51+
);
52+
}
53+
54+
#[test]
55+
fn brackets_over_db_schema_table_name_with_whites_paces() {
56+
match redshift().parse_sql_statements("SELECT [ col1 ] FROM [ test_schema].[ test_table]") {
57+
Ok(statements) => {
58+
assert_eq!(statements.len(), 1);
59+
}
60+
_ => unreachable!(),
61+
}
62+
}
63+
64+
#[test]
65+
fn test_double_quotes_over_db_schema_table_name() {
66+
let select =
67+
redshift().verified_only_select("SELECT \"col1\" FROM \"test_schema\".\"test_table\"");
68+
assert_eq!(
69+
select.projection[0],
70+
SelectItem::UnnamedExpr(Expr::Identifier(Ident {
71+
value: "col1".to_string(),
72+
quote_style: Some('"')
73+
})),
74+
);
75+
assert_eq!(
76+
select.from[0],
77+
TableWithJoins {
78+
relation: TableFactor::Table {
79+
name: ObjectName(vec![
80+
Ident {
81+
value: "test_schema".to_string(),
82+
quote_style: Some('"')
83+
},
84+
Ident {
85+
value: "test_table".to_string(),
86+
quote_style: Some('"')
87+
}
88+
]),
89+
alias: None,
90+
args: vec![],
91+
with_hints: vec![],
92+
},
93+
joins: vec![],
94+
}
95+
);
96+
}
97+
98+
fn redshift() -> TestedDialects {
99+
TestedDialects {
100+
dialects: vec![Box::new(RedshiftSqlDialect {})],
101+
}
102+
}

0 commit comments

Comments
 (0)