66using System . Collections . Generic ;
77using System . Diagnostics ;
88using System . Globalization ;
9+ using System . Linq ;
910using System . Numerics ;
1011using System . Text ;
1112
1718
1819namespace IronPython . Runtime {
1920 public static class LiteralParser {
20- public static string ParseString ( char [ ] text , int start , int length , bool isRaw , bool isUniEscape , bool normalizeLineEndings ) {
21+ internal delegate IReadOnlyList < char > ParseStringErrorHandler < T > ( IList < T > data , int start , int end ) ;
22+
23+ internal static string ParseString ( char [ ] text , int start , int length , bool isRaw , bool isUniEscape , bool normalizeLineEndings ) {
2124 Debug . Assert ( text != null ) ;
2225 Debug . Assert ( start + length <= text . Length ) ;
2326
@@ -28,32 +31,33 @@ public static string ParseString(char[] text, int start, int length, bool isRaw,
2831 return result ?? new string ( text , start , length ) ;
2932 }
3033
31- public static string ParseString ( byte [ ] bytes , int start , int length , bool isRaw ) {
34+ internal static string ParseString ( byte [ ] bytes , int start , int length , bool isRaw , ParseStringErrorHandler < byte > errorHandler ) {
3235 Debug . Assert ( bytes != null ) ;
3336 Debug . Assert ( start + length <= bytes . Length ) ;
3437
35- string result = DoParseString ( bytes , start , length , isRaw , isUniEscape : true , normalizeLineEndings : false ) ;
38+ string result = DoParseString ( bytes , start , length , isRaw , isUniEscape : true , normalizeLineEndings : false , errorHandler ) ;
3639
3740 return result ?? bytes . MakeString ( start , length ) ;
3841 }
3942
40- private static string DoParseString < T > ( T [ ] data , int start , int length , bool isRaw , bool isUniEscape , bool normalizeLineEndings ) where T : IConvertible {
43+ private static string DoParseString < T > ( T [ ] data , int start , int length , bool isRaw , bool isUniEscape , bool normalizeLineEndings , ParseStringErrorHandler < T > errorHandler = default ) where T : IConvertible {
44+ Bytes bytesData = null ;
4145 StringBuilder buf = null ;
4246 int i = start ;
4347 int l = start + length ;
4448 int val ;
4549 while ( i < l ) {
4650 char ch = data [ i ++ ] . ToChar ( null ) ;
4751 if ( ( ! isRaw || isUniEscape ) && ch == '\\ ' ) {
48- StringBuilderInit ( ref buf , data , start , i - start - 1 , length ) ;
52+ StringBuilderInit ( ref buf , data , start , i - 1 , length ) ;
4953
5054 if ( i >= l ) {
5155 if ( isRaw ) {
5256 buf . Append ( '\\ ' ) ;
53- break ;
5457 } else {
55- throw PythonOps . ValueError ( "Trailing \\ in string") ;
58+ handleError ( i - start - 1 , i - start , " \\ at end of string") ;
5659 }
60+ break ;
5761 }
5862 ch = data [ i ++ ] . ToChar ( null ) ;
5963
@@ -62,29 +66,16 @@ private static string DoParseString<T>(T[] data, int start, int length, bool isR
6266 int max = 16 ;
6367 if ( TryParseInt ( data , i , len , max , out val , out int consumed ) ) {
6468 if ( val < 0 || val > 0x10ffff ) {
65- throw PythonExceptions . CreateThrowable (
66- PythonExceptions . UnicodeDecodeError ,
67- isRaw ? "rawunicodeescape" : "unicodeescape" ,
68- data is byte [ ] byteData ? new Bytes ( byteData ) : Bytes . Empty ,
69- i - start - 2 , i - start + consumed ,
70- isRaw ? "\\ Uxxxxxxxx out of range" : "illegal Unicode character" ) ;
71- }
72-
73- if ( val < 0x010000 ) {
69+ handleError ( i - start - 2 , i - start + consumed , isRaw ? @"\Uxxxxxxxx out of range" : "illegal Unicode character" ) ;
70+ } else if ( val < 0x010000 ) {
7471 buf . Append ( ( char ) val ) ;
7572 } else {
7673 buf . Append ( char . ConvertFromUtf32 ( val ) ) ;
7774 }
78- i += len ;
7975 } else {
80- throw PythonExceptions . CreateThrowable (
81- PythonExceptions . UnicodeDecodeError ,
82- isRaw ? "rawunicodeescape" : "unicodeescape" ,
83- data is byte [ ] byteData ? new Bytes ( byteData ) : Bytes . Empty ,
84- i - start - 2 ,
85- i - start + consumed ,
86- ch == 'u' ? @"truncated \uXXXX escape" : @"truncated \UXXXXXXXX escape" ) ;
76+ handleError ( i - start - 2 , i - start + consumed , ch == 'u' ? @"truncated \uXXXX escape" : @"truncated \UXXXXXXXX escape" ) ;
8777 }
78+ i += consumed ;
8879 } else {
8980 if ( isRaw ) {
9081 buf . Append ( '\\ ' ) ;
@@ -112,10 +103,12 @@ private static string DoParseString<T>(T[] data, int start, int length, bool isR
112103 continue ;
113104 case 'N' : {
114105 IronPython . Modules . unicodedata . PerformModuleReload ( null , null ) ;
106+ StringBuilder namebuf = new StringBuilder ( ) ;
107+ bool namestarted = false ;
108+ bool namecomplete = false ;
115109 if ( i < l && data [ i ] . ToChar ( null ) == '{' ) {
110+ namestarted = true ;
116111 i ++ ;
117- StringBuilder namebuf = new StringBuilder ( ) ;
118- bool namecomplete = false ;
119112 while ( i < l ) {
120113 char namech = data [ i ++ ] . ToChar ( null ) ;
121114 if ( namech != '}' ) {
@@ -125,52 +118,33 @@ private static string DoParseString<T>(T[] data, int start, int length, bool isR
125118 break ;
126119 }
127120 }
128-
129- if ( ! namecomplete || namebuf . Length == 0 )
130- throw PythonExceptions . CreateThrowable (
131- PythonExceptions . UnicodeDecodeError ,
132- "unicodeescape" ,
133- data is byte [ ] byteData ? new Bytes ( byteData ) : Bytes . Empty ,
134- i - start - 3 - namebuf . Length - ( namecomplete ? 1 : 0 ) , // 3 for \N{ and 1 for }
135- i - start - ( namecomplete ? 1 : 0 ) , // 1 for }
136- @"malformed \N character escape" ) ;
137-
121+ }
122+ if ( ! namecomplete || namebuf . Length == 0 ) {
123+ handleError ( i - start - 2 - ( namestarted ? 1 : 0 ) - namebuf . Length - ( namecomplete ? 1 : 0 ) , // 2 for \N and 1 for { and 1 for }
124+ i - start - ( namecomplete ? 1 : 0 ) , // 1 for }
125+ @"malformed \N character escape" ) ;
126+ if ( namecomplete ) {
127+ buf . Append ( '}' ) ;
128+ }
129+ } else {
138130 try {
139131 string uval = IronPython . Modules . unicodedata . lookup ( namebuf . ToString ( ) ) ;
140132 buf . Append ( uval ) ;
141133 } catch ( KeyNotFoundException ) {
142- throw PythonExceptions . CreateThrowable (
143- PythonExceptions . UnicodeDecodeError ,
144- "unicodeescape" ,
145- data is byte [ ] byteData ? new Bytes ( byteData ) : Bytes . Empty ,
146- i - start - 4 - namebuf . Length , // 4 for \N{}
147- i - start ,
148- "unknown Unicode character name" ) ;
134+ handleError ( i - start - 4 - namebuf . Length , // 4 for \N{}
135+ i - start ,
136+ "unknown Unicode character name" ) ;
149137 }
150-
151- } else {
152- throw PythonExceptions . CreateThrowable (
153- PythonExceptions . UnicodeDecodeError ,
154- "unicodeescape" ,
155- data is byte [ ] byteData ? new Bytes ( byteData ) : Bytes . Empty ,
156- i - start - 2 , // 2 for \N
157- i - start ,
158- @"malformed \N character escape" ) ;
159138 }
160139 }
161140 continue ;
162141 case 'x' : //hex
163142 if ( ! TryParseInt ( data , i , 2 , 16 , out val , out int consumed ) ) {
164- throw PythonExceptions . CreateThrowable (
165- PythonExceptions . UnicodeDecodeError ,
166- "unicodeescape" ,
167- data is byte [ ] byteData ? new Bytes ( byteData ) : Bytes . Empty ,
168- i - start - 2 ,
169- i - start + consumed ,
170- @"truncated \xXX escape" ) ;
143+ handleError ( i - start - 2 , i - start + consumed , @"truncated \xXX escape" ) ;
144+ } else {
145+ buf . Append ( ( char ) val ) ;
171146 }
172- buf . Append ( ( char ) val ) ;
173- i += 2 ;
147+ i += consumed ;
174148 continue ;
175149 case '0' :
176150 case '1' :
@@ -200,7 +174,7 @@ private static string DoParseString<T>(T[] data, int start, int length, bool isR
200174 }
201175 }
202176 } else if ( ch == '\r ' && normalizeLineEndings ) {
203- StringBuilderInit ( ref buf , data , start , i - start - 1 , length ) ;
177+ StringBuilderInit ( ref buf , data , start , i - 1 , length ) ;
204178
205179 // normalize line endings
206180 if ( i < l && data [ i ] . ToChar ( null ) == '\n ' ) {
@@ -212,19 +186,35 @@ private static string DoParseString<T>(T[] data, int start, int length, bool isR
212186 }
213187 }
214188 return buf ? . ToString ( ) ;
189+
190+ void handleError ( int start , int end , string reason ) {
191+ if ( bytesData == null ) {
192+ var ba = data as byte [ ] ;
193+ if ( ba == null ) throw new NotImplementedException ( "Error handler for non byte[] data not supported" ) ;
194+ bytesData = new Bytes ( ba ) ;
195+ }
196+
197+ if ( errorHandler == null ) {
198+ throw PythonExceptions . CreateThrowable ( PythonExceptions . UnicodeDecodeError , isRaw ? "rawunicodeescape" : "unicodeescape" , bytesData , start , end , reason ) ;
199+ }
200+ var substitute = errorHandler ( data , start , end ) ;
201+ if ( substitute != null ) {
202+ buf . Append ( substitute . ToArray ( ) ) ;
203+ }
204+ }
215205 }
216206
217- private static void StringBuilderInit < T > ( ref StringBuilder sb , T [ ] data , int start , int count , int capacity ) {
207+ private static void StringBuilderInit < T > ( ref StringBuilder sb , T [ ] data , int start , int end , int capacity ) {
218208 if ( sb != null ) return ;
219209
220210 sb = new StringBuilder ( capacity ) ;
221211 switch ( data ) {
222212 case char [ ] text :
223- sb . Append ( text , start , count ) ;
213+ sb . Append ( text , start , end - start ) ;
224214 break ;
225215
226216 case byte [ ] bytes :
227- for ( int i = start ; i < start + count ; i ++ ) {
217+ for ( int i = start ; i < end ; i ++ ) {
228218 sb . Append ( ( char ) bytes [ i ] ) ;
229219 }
230220 break ;
@@ -236,7 +226,7 @@ private static void StringBuilderInit<T>(ref StringBuilder sb, T[] data, int sta
236226
237227 internal delegate IReadOnlyList < byte > ParseBytesErrorHandler < T > ( IList < T > data , int start , int end ) ;
238228
239- internal static List < byte > ParseBytes < T > ( IList < T > data , int start , int length , bool isRaw , bool normalizeLineEndings , ParseBytesErrorHandler < T > errorHandler = null ) where T : IConvertible {
229+ internal static List < byte > ParseBytes < T > ( IList < T > data , int start , int length , bool isRaw , bool normalizeLineEndings , ParseBytesErrorHandler < T > errorHandler = default ) where T : IConvertible {
240230 Debug . Assert ( data != null ) ;
241231 Debug . Assert ( start + length <= data . Count ) ;
242232
@@ -275,7 +265,7 @@ internal static List<byte> ParseBytes<T>(IList<T> data, int start, int length, b
275265 if ( ! TryParseInt ( data , i , 2 , 16 , out val , out int consumed ) ) {
276266 int pos = i - start - 2 ;
277267 if ( errorHandler == null ) {
278- throw PythonOps . ValueError ( "invalid \ \ x escape at position {0}" , pos ) ;
268+ throw PythonOps . ValueError ( @ "invalid \x escape at position {0}", pos ) ;
279269 }
280270 var substitute = errorHandler ( data , pos , pos + consumed ) ;
281271 if ( substitute != null ) {
0 commit comments