Skip to content

Commit b04ed04

Browse files
ovralamb
andcommitted
feat: Support escaped string literals (PostgreSQL)
Signed-off-by: Dmitry Patsura <talk@dmtry.me> Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
1 parent 47a7b66 commit b04ed04

4 files changed

Lines changed: 164 additions & 0 deletions

File tree

src/ast/value.rs

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,9 @@ pub enum Value {
3535
Number(BigDecimal, bool),
3636
/// 'string value'
3737
SingleQuotedString(String),
38+
/// e'string value' (postgres extension)
39+
/// <https://www.postgresql.org/docs/8.3/sql-syntax-lexical.html#SQL-SYNTAX-STRINGS
40+
EscapedStringLiteral(String),
3841
/// N'string value'
3942
NationalStringLiteral(String),
4043
/// X'hex value'
@@ -74,6 +77,7 @@ impl fmt::Display for Value {
7477
Value::Number(v, l) => write!(f, "{}{long}", v, long = if *l { "L" } else { "" }),
7578
Value::DoubleQuotedString(v) => write!(f, "\"{}\"", v),
7679
Value::SingleQuotedString(v) => write!(f, "'{}'", escape_single_quote_string(v)),
80+
Value::EscapedStringLiteral(v) => write!(f, "E'{}'", escape_escaped_string(v)),
7781
Value::NationalStringLiteral(v) => write!(f, "N'{}'", v),
7882
Value::HexStringLiteral(v) => write!(f, "X'{}'", v),
7983
Value::Boolean(v) => write!(f, "{}", v),
@@ -200,6 +204,40 @@ pub fn escape_single_quote_string(s: &str) -> EscapeSingleQuoteString<'_> {
200204
EscapeSingleQuoteString(s)
201205
}
202206

207+
pub struct EscapeEscapedStringLiteral<'a>(&'a str);
208+
209+
impl<'a> fmt::Display for EscapeEscapedStringLiteral<'a> {
210+
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
211+
for c in self.0.chars() {
212+
match c {
213+
'\'' => {
214+
write!(f, r#"\'"#)?;
215+
}
216+
'\\' => {
217+
write!(f, r#"\\"#)?;
218+
}
219+
'\n' => {
220+
write!(f, r#"\n"#)?;
221+
}
222+
'\t' => {
223+
write!(f, r#"\t"#)?;
224+
}
225+
'\r' => {
226+
write!(f, r#"\r"#)?;
227+
}
228+
_ => {
229+
write!(f, "{}", c)?;
230+
}
231+
}
232+
}
233+
Ok(())
234+
}
235+
}
236+
237+
pub fn escape_escaped_string(s: &str) -> EscapeEscapedStringLiteral<'_> {
238+
EscapeEscapedStringLiteral(s)
239+
}
240+
203241
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
204242
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
205243
pub enum TrimWhereField {

src/parser.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -497,6 +497,11 @@ impl<'a> Parser<'a> {
497497
expr: Box::new(self.parse_subexpr(Self::PLUS_MINUS_PREC)?),
498498
})
499499
}
500+
Token::EscapedStringLiteral(_) if dialect_of!(self is PostgreSqlDialect | GenericDialect) =>
501+
{
502+
self.prev_token();
503+
Ok(Expr::Value(self.parse_value()?))
504+
}
500505
Token::Number(_, _)
501506
| Token::SingleQuotedString(_)
502507
| Token::NationalStringLiteral(_)
@@ -883,6 +888,7 @@ impl<'a> Parser<'a> {
883888
None
884889
}
885890
Token::SingleQuotedString(_)
891+
| Token::EscapedStringLiteral(_)
886892
| Token::NationalStringLiteral(_)
887893
| Token::HexStringLiteral(_) => Some(Box::new(self.parse_expr()?)),
888894
unexpected => {
@@ -2544,6 +2550,7 @@ impl<'a> Parser<'a> {
25442550
},
25452551
Token::SingleQuotedString(ref s) => Ok(Value::SingleQuotedString(s.to_string())),
25462552
Token::NationalStringLiteral(ref s) => Ok(Value::NationalStringLiteral(s.to_string())),
2553+
Token::EscapedStringLiteral(ref s) => Ok(Value::EscapedStringLiteral(s.to_string())),
25472554
Token::HexStringLiteral(ref s) => Ok(Value::HexStringLiteral(s.to_string())),
25482555
Token::Placeholder(ref s) => Ok(Value::Placeholder(s.to_string())),
25492556
unexpected => self.expected("a value", unexpected),
@@ -2575,6 +2582,9 @@ impl<'a> Parser<'a> {
25752582
match self.next_token() {
25762583
Token::Word(Word { value, keyword, .. }) if keyword == Keyword::NoKeyword => Ok(value),
25772584
Token::SingleQuotedString(s) => Ok(s),
2585+
Token::EscapedStringLiteral(s) if dialect_of!(self is PostgreSqlDialect | GenericDialect) => {
2586+
Ok(s)
2587+
}
25782588
unexpected => self.expected("literal string", unexpected),
25792589
}
25802590
}

src/tokenizer.rs

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@ pub enum Token {
5151
SingleQuotedString(String),
5252
/// "National" string literal: i.e: N'string'
5353
NationalStringLiteral(String),
54+
/// "escaped" string literal, which are an extension to the SQL standard: i.e: e'first \n second' or E 'first \n second'
55+
EscapedStringLiteral(String),
5456
/// Hexadecimal string literal: i.e.: X'deadbeef'
5557
HexStringLiteral(String),
5658
/// Comma
@@ -152,6 +154,7 @@ impl fmt::Display for Token {
152154
Token::Char(ref c) => write!(f, "{}", c),
153155
Token::SingleQuotedString(ref s) => write!(f, "'{}'", s),
154156
Token::NationalStringLiteral(ref s) => write!(f, "N'{}'", s),
157+
Token::EscapedStringLiteral(ref s) => write!(f, "E'{}'", s),
155158
Token::HexStringLiteral(ref s) => write!(f, "X'{}'", s),
156159
Token::Comma => f.write_str(","),
157160
Token::Whitespace(ws) => write!(f, "{}", ws),
@@ -380,6 +383,21 @@ impl<'a> Tokenizer<'a> {
380383
}
381384
}
382385
}
386+
// PostgreSQL accepts "escape" string constants, which are an extension to the SQL standard.
387+
x @ 'e' | x @ 'E' => {
388+
chars.next(); // consume, to check the next char
389+
match chars.peek() {
390+
Some('\'') => {
391+
let s = self.tokenize_escaped_single_quoted_string(chars)?;
392+
Ok(Some(Token::EscapedStringLiteral(s)))
393+
}
394+
_ => {
395+
// regular identifier starting with an "E" or "e"
396+
let s = self.tokenize_word(x, chars);
397+
Ok(Some(Token::make_word(&s, None)))
398+
}
399+
}
400+
}
383401
// The spec only allows an uppercase 'X' to introduce a hex
384402
// string, but PostgreSQL, at least, allows a lowercase 'x' too.
385403
x @ 'x' | x @ 'X' => {
@@ -644,6 +662,66 @@ impl<'a> Tokenizer<'a> {
644662
s
645663
}
646664

665+
/// Read a single quoted string, starting with the opening quote.
666+
fn tokenize_escaped_single_quoted_string(
667+
&self,
668+
chars: &mut Peekable<Chars<'_>>,
669+
) -> Result<String, TokenizerError> {
670+
let mut s = String::new();
671+
chars.next(); // consume the opening quote
672+
673+
// slash escaping
674+
let mut is_escaped = false;
675+
while let Some(&ch) = chars.peek() {
676+
macro_rules! escape_control_character {
677+
($ESCAPED:expr) => {{
678+
if is_escaped {
679+
s.push($ESCAPED);
680+
is_escaped = false;
681+
} else {
682+
s.push(ch);
683+
}
684+
685+
chars.next();
686+
}};
687+
}
688+
689+
match ch {
690+
'\'' => {
691+
chars.next(); // consume
692+
if is_escaped {
693+
s.push(ch);
694+
is_escaped = false;
695+
} else if chars.peek().map(|c| *c == '\'').unwrap_or(false) {
696+
s.push(ch);
697+
chars.next();
698+
} else {
699+
return Ok(s);
700+
}
701+
}
702+
'\\' => {
703+
if is_escaped {
704+
s.push('\\');
705+
is_escaped = false;
706+
} else {
707+
is_escaped = true;
708+
}
709+
710+
chars.next();
711+
}
712+
'r' => escape_control_character!('\r'),
713+
'n' => escape_control_character!('\n'),
714+
't' => escape_control_character!('\t'),
715+
_ => {
716+
is_escaped = false;
717+
chars.next(); // consume
718+
s.push(ch);
719+
}
720+
}
721+
}
722+
self.tokenizer_error("Unterminated encoded string literal")
723+
}
724+
647725
/// Read a single quoted string, starting with the opening quote.
648726
fn tokenize_single_quoted_string(
649727
&self,

tests/sqlparser_postgres.rs

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1290,3 +1290,41 @@ fn pg_and_generic() -> TestedDialects {
12901290
dialects: vec![Box::new(PostgreSqlDialect {}), Box::new(GenericDialect {})],
12911291
}
12921292
}
1293+
1294+
#[test]
1295+
fn parse_escaped_literal_string() {
1296+
let sql =
1297+
r#"SELECT E's1 \n s1', E's2 \\n s2', E's3 \\\n s3', E's4 \\\\n s4', E'\'', E'foo \\'"#;
1298+
let select = pg().verified_only_select(sql);
1299+
assert_eq!(6, select.projection.len());
1300+
assert_eq!(
1301+
&Expr::Value(Value::EscapedStringLiteral("s1 \n s1".to_string())),
1302+
expr_from_projection(&select.projection[0])
1303+
);
1304+
assert_eq!(
1305+
&Expr::Value(Value::EscapedStringLiteral("s2 \\n s2".to_string())),
1306+
expr_from_projection(&select.projection[1])
1307+
);
1308+
assert_eq!(
1309+
&Expr::Value(Value::EscapedStringLiteral("s3 \\\n s3".to_string())),
1310+
expr_from_projection(&select.projection[2])
1311+
);
1312+
assert_eq!(
1313+
&Expr::Value(Value::EscapedStringLiteral("s4 \\\\n s4".to_string())),
1314+
expr_from_projection(&select.projection[3])
1315+
);
1316+
assert_eq!(
1317+
&Expr::Value(Value::EscapedStringLiteral("'".to_string())),
1318+
expr_from_projection(&select.projection[4])
1319+
);
1320+
assert_eq!(
1321+
&Expr::Value(Value::EscapedStringLiteral("foo \\".to_string())),
1322+
expr_from_projection(&select.projection[5])
1323+
);
1324+
1325+
let sql = r#"SELECT E'\'"#;
1326+
assert_eq!(
1327+
pg().parse_sql_statements(sql).unwrap_err().to_string(),
1328+
"sql parser error: Unterminated encoded string literal at Line: 1, Column 8"
1329+
);
1330+
}

0 commit comments

Comments
 (0)