@@ -1589,7 +1589,8 @@ private static string ReprEncode(string s, int start, int count, bool isUniEscap
15891589 // lazily create the StringBuilder only if necessary.
15901590 StringBuilder b = null ;
15911591 int i = start ;
1592- while ( i < count ) {
1592+ int end = start + count ;
1593+ while ( i < end ) {
15931594 char ch = s [ i ++ ] ;
15941595 switch ( ch ) {
15951596 case '\\ ' : StringBuilderInit ( ref b , s , start , i - 1 ) ; b . Append ( "\\ \\ " ) ; break ;
@@ -1620,28 +1621,32 @@ private static string ReprEncode(string s, int start, int count, bool isUniEscap
16201621 }
16211622 }
16221623
1623- return b ? . ToString ( ) ?? s ;
1624+ return b ? . ToString ( ) ?? s . Substring ( start , count ) ;
16241625 }
16251626
1626- private static string RawUnicodeEscapeEncode ( string s , int start , int count ) {
1627+ private static string RawUnicodeEscapeEncode ( string s , int start , int count , bool escapeAscii = false ) {
16271628 // in the common case we don't need to encode anything, so we
16281629 // lazily create the StringBuilder only if necessary.
16291630 StringBuilder b = null ;
16301631 int i = start ;
1631- while ( i < count ) {
1632+ int end = start + count ;
1633+ while ( i < end ) {
16321634 char ch = s [ i ++ ] ;
16331635 if ( ( ch & 0xFC00 ) == 0xD800 && i < count && ( s [ i ] & 0xFC00 ) == 0xDC00 ) {
16341636 StringBuilderInit ( ref b , s , start , i - 1 ) ;
16351637 b . AppendFormat ( "\\ U{0:x8}" , char . ConvertToUtf32 ( ch , s [ i ++ ] ) ) ;
16361638 } else if ( ch > 0xFF ) {
16371639 StringBuilderInit ( ref b , s , start , i - 1 ) ;
16381640 b . AppendFormat ( "\\ u{0:x4}" , ( int ) ch ) ;
1641+ } else if ( escapeAscii ) {
1642+ StringBuilderInit ( ref b , s , start , i - 1 ) ;
1643+ b . AppendFormat ( "\\ x{0:x2}" , ( int ) ch ) ;
16391644 } else {
16401645 b ? . Append ( ch ) ;
16411646 }
16421647 }
16431648
1644- return b ? . ToString ( ) ?? s ;
1649+ return b ? . ToString ( ) ?? s . Substring ( start , count ) ;
16451650 }
16461651
16471652 private static void StringBuilderInit ( ref StringBuilder sb , string s , int start , int end ) {
@@ -1953,12 +1958,20 @@ internal static Dictionary<string, object> MakeErrorHandlersDict() {
19531958 ReflectionUtils . GetMethodInfos ( typeof ( StringOps ) . GetMember ( nameof ( IgnoreErrors ) , BindingFlags . Static | BindingFlags . NonPublic ) ) ,
19541959 typeof ( StringOps ) ) ;
19551960
1956- // TODO: Implement remaining error handlers
1957- d [ "replace" ] = null ;
1961+ d [ "replace" ] = BuiltinFunction . MakeFunction (
1962+ "replace_errors" ,
1963+ ReflectionUtils . GetMethodInfos ( typeof ( StringOps ) . GetMember ( nameof ( ReplaceErrors ) , BindingFlags . Static | BindingFlags . NonPublic ) ) ,
1964+ typeof ( StringOps ) ) ;
19581965
1959- d [ "xmlcharrefreplace" ] = null ;
1966+ d [ "xmlcharrefreplace" ] = BuiltinFunction . MakeFunction (
1967+ "xmlcharrefreplace_errors" ,
1968+ ReflectionUtils . GetMethodInfos ( typeof ( StringOps ) . GetMember ( nameof ( XmlCharRefReplaceErrors ) , BindingFlags . Static | BindingFlags . NonPublic ) ) ,
1969+ typeof ( StringOps ) ) ;
19601970
1961- d [ "backslashreplace" ] = null ;
1971+ d [ "backslashreplace" ] = BuiltinFunction . MakeFunction (
1972+ "backslashreplace_errors" ,
1973+ ReflectionUtils . GetMethodInfos ( typeof ( StringOps ) . GetMember ( nameof ( BackslashReplaceErrors ) , BindingFlags . Static | BindingFlags . NonPublic ) ) ,
1974+ typeof ( StringOps ) ) ;
19621975
19631976 return d ;
19641977 }
@@ -2638,10 +2651,96 @@ private static object IgnoreErrors(object unicodeError) {
26382651 case PythonExceptions . _UnicodeEncodeError uee :
26392652 return PythonTuple . MakeTuple ( string . Empty , uee . end ) ;
26402653 case DecoderFallbackException dfe :
2641- return PythonTuple . MakeTuple ( string . Empty , dfe . Index + dfe . BytesUnknown . Length ) ;
2654+ return PythonTuple . MakeTuple ( string . Empty , dfe . Index + dfe . BytesUnknown ? . Length ?? 0 ) ;
26422655 case EncoderFallbackException efe :
26432656 return PythonTuple . MakeTuple ( string . Empty , efe . Index + ( efe . CharUnknownHigh != '\0 ' ? 2 : 1 ) ) ;
2644- default : throw PythonOps . TypeError ( "codec must pass exception instance" ) ;
2657+ default :
2658+ throw PythonOps . TypeError ( "codec must pass exception instance" ) ;
2659+ }
2660+ }
2661+
2662+ private static object ReplaceErrors ( object unicodeError ) {
2663+ switch ( unicodeError ) {
2664+ case PythonExceptions . _UnicodeDecodeError ude :
2665+ return PythonTuple . MakeTuple ( "\ufffd " , ude . end ) ;
2666+
2667+ case PythonExceptions . _UnicodeEncodeError uee :
2668+ if ( uee . @object is string text && uee . start is int start && uee . end is int end ) {
2669+ start = Math . Max ( 0 , Math . Min ( start , text . Length - 1 ) ) ;
2670+ end = Math . Max ( start , Math . Min ( end , text . Length ) ) ;
2671+ return PythonTuple . MakeTuple ( new string ( '?' , end - start ) , end ) ;
2672+ }
2673+ goto default ;
2674+
2675+ case DecoderFallbackException dfe :
2676+ return PythonTuple . MakeTuple ( "\ufffd " , dfe . Index + dfe . BytesUnknown ? . Length ?? 0 ) ;
2677+
2678+ case EncoderFallbackException efe :
2679+ return PythonTuple . MakeTuple ( "?" , efe . Index + ( efe . CharUnknownHigh != '\0 ' ? 2 : 1 ) ) ;
2680+
2681+ default :
2682+ throw PythonOps . TypeError ( "codec must pass exception instance" ) ;
2683+ }
2684+ }
2685+
2686+ private static object BackslashReplaceErrors ( object unicodeError ) {
2687+ switch ( unicodeError ) {
2688+ case PythonExceptions . _UnicodeDecodeError ude :
2689+ throw PythonOps . TypeError ( "don't know how to handle UnicodeDecodeError in error callback" ) ;
2690+
2691+ case PythonExceptions . _UnicodeEncodeError uee :
2692+ if ( uee . @object is string text && uee . start is int start && uee . end is int end ) {
2693+ start = Math . Max ( 0 , Math . Min ( start , text . Length - 1 ) ) ;
2694+ end = Math . Max ( start , Math . Min ( end , text . Length ) ) ;
2695+ return PythonTuple . MakeTuple ( RawUnicodeEscapeEncode ( text , start , end - start , escapeAscii : true ) , end ) ;
2696+ }
2697+ goto default ;
2698+
2699+ case DecoderFallbackException dfe :
2700+ throw PythonOps . TypeError ( "don't know how to handle DecoderFallbackException in error callback" ) ;
2701+
2702+ case EncoderFallbackException efe :
2703+ string chars = ( efe . CharUnknownHigh != '\0 ' ) ? new string ( new [ ] { efe . CharUnknownHigh , efe . CharUnknownLow } ) : new string ( efe . CharUnknown , 1 ) ;
2704+ return PythonTuple . MakeTuple ( RawUnicodeEscapeEncode ( chars , 0 , chars . Length , escapeAscii : true ) , efe . Index + chars . Length ) ;
2705+
2706+ default :
2707+ throw PythonOps . TypeError ( "codec must pass exception instance" ) ;
2708+ }
2709+ }
2710+ private static object XmlCharRefReplaceErrors ( object unicodeError ) {
2711+ switch ( unicodeError ) {
2712+ case PythonExceptions . _UnicodeDecodeError ude :
2713+ throw PythonOps . TypeError ( "don't know how to handle UnicodeDecodeError in error callback" ) ;
2714+
2715+ case PythonExceptions . _UnicodeEncodeError uee :
2716+ if ( uee . @object is string text && uee . start is int start && uee . end is int end ) {
2717+ start = Math . Max ( 0 , Math . Min ( start , text . Length - 1 ) ) ;
2718+ end = Math . Max ( start , Math . Min ( end , text . Length ) ) ;
2719+ var sb = new StringBuilder ( 10 * ( end - start ) ) ;
2720+ int i = start ;
2721+ while ( i < end ) {
2722+ sb . Append ( "&#" ) ;
2723+ char ch = text [ i ++ ] ;
2724+ if ( char . IsHighSurrogate ( ch ) && i < end && char . IsLowSurrogate ( text [ i ] ) ) {
2725+ sb . Append ( char . ConvertToUtf32 ( ch , text [ i ++ ] ) ) ;
2726+ } else {
2727+ sb . Append ( ( uint ) ch ) ;
2728+ }
2729+ sb . Append ( ';' ) ;
2730+ }
2731+ return PythonTuple . MakeTuple ( sb . ToString ( ) , end ) ;
2732+ }
2733+ goto default ;
2734+
2735+ case DecoderFallbackException dfe :
2736+ throw PythonOps . TypeError ( "don't know how to handle DecoderFallbackException in error callback" ) ;
2737+
2738+ case EncoderFallbackException efe :
2739+ string chars = ( efe . CharUnknownHigh != '\0 ' ) ? $ "&#{ char . ConvertToUtf32 ( efe . CharUnknownHigh , efe . CharUnknownLow ) } " : $ "&#{ ( int ) efe . CharUnknown } ;";
2740+ return PythonTuple . MakeTuple ( chars , efe . Index + ( efe . CharUnknownHigh != '\0 ' ? 2 : 1 ) ) ;
2741+
2742+ default :
2743+ throw PythonOps . TypeError ( "codec must pass exception instance" ) ;
26452744 }
26462745 }
26472746#endif
0 commit comments