@@ -51,6 +51,8 @@ pub enum Token {
5151 SingleQuotedString ( String ) ,
5252 /// "National" string literal: i.e: N'string'
5353 NationalStringLiteral ( String ) ,
54+ /// "escaped" string literal, which are an extension to the SQL standard: i.e: e'first \n second' or E 'first \n second'
55+ EscapedStringLiteral ( String ) ,
5456 /// Hexadecimal string literal: i.e.: X'deadbeef'
5557 HexStringLiteral ( String ) ,
5658 /// Comma
@@ -152,6 +154,7 @@ impl fmt::Display for Token {
152154 Token :: Char ( ref c) => write ! ( f, "{}" , c) ,
153155 Token :: SingleQuotedString ( ref s) => write ! ( f, "'{}'" , s) ,
154156 Token :: NationalStringLiteral ( ref s) => write ! ( f, "N'{}'" , s) ,
157+ Token :: EscapedStringLiteral ( ref s) => write ! ( f, "E'{}'" , s) ,
155158 Token :: HexStringLiteral ( ref s) => write ! ( f, "X'{}'" , s) ,
156159 Token :: Comma => f. write_str ( "," ) ,
157160 Token :: Whitespace ( ws) => write ! ( f, "{}" , ws) ,
@@ -380,6 +383,21 @@ impl<'a> Tokenizer<'a> {
380383 }
381384 }
382385 }
386+ // PostgreSQL accepts "escape" string constants, which are an extension to the SQL standard.
387+ x @ 'e' | x @ 'E' => {
388+ chars. next ( ) ; // consume, to check the next char
389+ match chars. peek ( ) {
390+ Some ( '\'' ) => {
391+ let s = self . tokenize_escaped_single_quoted_string ( chars) ?;
392+ Ok ( Some ( Token :: EscapedStringLiteral ( s) ) )
393+ }
394+ _ => {
395+ // regular identifier starting with an "E" or "e"
396+ let s = self . tokenize_word ( x, chars) ;
397+ Ok ( Some ( Token :: make_word ( & s, None ) ) )
398+ }
399+ }
400+ }
383401 // The spec only allows an uppercase 'X' to introduce a hex
384402 // string, but PostgreSQL, at least, allows a lowercase 'x' too.
385403 x @ 'x' | x @ 'X' => {
@@ -644,6 +662,66 @@ impl<'a> Tokenizer<'a> {
644662 s
645663 }
646664
665+ /// Read a single quoted string, starting with the opening quote.
666+ fn tokenize_escaped_single_quoted_string (
667+ & self ,
668+ chars : & mut Peekable < Chars < ' _ > > ,
669+ ) -> Result < String , TokenizerError > {
670+ let mut s = String :: new ( ) ;
671+ chars. next ( ) ; // consume the opening quote
672+
673+ // slash escaping
674+ let mut is_escaped = false ;
675+ while let Some ( & ch) = chars. peek ( ) {
676+ macro_rules! escape_control_character {
677+ ( $ESCAPED: expr) => { {
678+ if is_escaped {
679+ s. push( $ESCAPED) ;
680+ is_escaped = false ;
681+ } else {
682+ s. push( ch) ;
683+ }
684+
685+ chars. next( ) ;
686+ } } ;
687+ }
688+
689+ match ch {
690+ '\'' => {
691+ chars. next ( ) ; // consume
692+ if is_escaped {
693+ s. push ( ch) ;
694+ is_escaped = false ;
695+ } else if chars. peek ( ) . map ( |c| * c == '\'' ) . unwrap_or ( false ) {
696+ s. push ( ch) ;
697+ chars. next ( ) ;
698+ } else {
699+ return Ok ( s) ;
700+ }
701+ }
702+ '\\' => {
703+ if is_escaped {
704+ s. push ( '\\' ) ;
705+ is_escaped = false ;
706+ } else {
707+ is_escaped = true ;
708+ }
709+
710+ chars. next ( ) ;
711+ }
712+ 'r' => escape_control_character ! ( '\r' ) ,
713+ 'n' => escape_control_character ! ( '\n' ) ,
714+ 't' => escape_control_character ! ( '\t' ) ,
715+ _ => {
716+ is_escaped = false ;
717+ chars. next ( ) ; // consume
718+ s. push ( ch) ;
719+ }
720+ }
721+ }
722+ self . tokenizer_error ( "Unterminated encoded string literal" )
723+ }
724+
647725 /// Read a single quoted string, starting with the opening quote.
648726 fn tokenize_single_quoted_string (
649727 & self ,
0 commit comments