Skip to content

Commit 3df9890

Browse files
BCSharpslozier
authored andcommitted
Implement codecs.ignore_errors (#711)
1 parent 655e6b5 commit 3df9890

2 files changed

Lines changed: 26 additions & 6 deletions

File tree

Src/IronPython/Runtime/Operations/StringOps.cs

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1948,9 +1948,12 @@ internal static Dictionary<string, object> MakeErrorHandlersDict() {
19481948
ReflectionUtils.GetMethodInfos(typeof(StringOps).GetMember(nameof(StrictErrors), BindingFlags.Static | BindingFlags.NonPublic)),
19491949
typeof(StringOps));
19501950

1951-
// TODO: Implement remaining error handlers
1952-
d["ignore"] = null;
1951+
d["ignore"] = BuiltinFunction.MakeFunction(
1952+
"ignore_errors",
1953+
ReflectionUtils.GetMethodInfos(typeof(StringOps).GetMember(nameof(IgnoreErrors), BindingFlags.Static | BindingFlags.NonPublic)),
1954+
typeof(StringOps));
19531955

1956+
// TODO: Implement remaining error handlers
19541957
d["replace"] = null;
19551958

19561959
d["xmlcharrefreplace"] = null;
@@ -2627,6 +2630,20 @@ private static object StrictErrors(object unicodeError) {
26272630
default: throw PythonOps.TypeError("codec must pass exception instance");
26282631
}
26292632
}
2633+
2634+
private static object IgnoreErrors(object unicodeError) {
2635+
switch (unicodeError) {
2636+
case PythonExceptions._UnicodeDecodeError ude:
2637+
return PythonTuple.MakeTuple(string.Empty, ude.end);
2638+
case PythonExceptions._UnicodeEncodeError uee:
2639+
return PythonTuple.MakeTuple(string.Empty, uee.end);
2640+
case DecoderFallbackException dfe:
2641+
return PythonTuple.MakeTuple(string.Empty, dfe.Index + dfe.BytesUnknown.Length);
2642+
case EncoderFallbackException efe:
2643+
return PythonTuple.MakeTuple(string.Empty, efe.Index + (efe.CharUnknownHigh != '\0' ? 2 : 1));
2644+
default: throw PythonOps.TypeError("codec must pass exception instance");
2645+
}
2646+
}
26302647
#endif
26312648

26322649
#endregion

Tests/modules/io_related/test_codecs.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -460,7 +460,7 @@ def test_latin_1_encode(self):
460460
def test_error_handlers(self):
461461
ude = UnicodeDecodeError('dummy', b"abcdefgh", 3, 5, "decoding testing purposes")
462462
uee = UnicodeEncodeError('dummy', "abcdefgh", 2, 6, "encoding testing purposes")
463-
unicode_data = "ab\xff\u20ac\U0001f40d"
463+
unicode_data = "ab\xff\u20ac\U0001f40d\0z"
464464
uee_unicode = UnicodeEncodeError('dummy', unicode_data, 2, len(unicode_data), "encoding testing purposes")
465465

466466
strict = codecs.lookup_error('strict')
@@ -476,28 +476,31 @@ def test_error_handlers(self):
476476
self.assertRaisesRegex(TypeError, "\w+\(\) takes exactly (one|1) argument \(2 given\)", strict, ude, uee)
477477
self.assertRaises(LookupError, codecs.lookup_error, "STRICT")
478478

479-
return # TODO: Implement remaining error handlers
480-
481479
ignore = codecs.lookup_error('ignore')
482480
self.assertEqual(ignore, codecs.ignore_errors)
483481
self.assertEqual(ignore(ude), ("", 5))
484482
self.assertEqual(ignore(uee), ("", 6))
483+
self.assertEqual(ignore(uee_unicode), ("", uee_unicode.end))
484+
485+
return # TODO: Implement remaining error handlers
485486

486487
replace = codecs.lookup_error('replace')
487488
self.assertEqual(replace, codecs.replace_errors)
488489
self.assertEqual(replace(ude), ("�", 5))
489490
self.assertEqual(replace(uee), ("????", 6))
491+
self.assertEqual(replace(uee_unicode), ("?" * (uee_unicode.end - uee_unicode.start), uee_unicode.end))
490492

491493
backslashreplace = codecs.lookup_error('backslashreplace')
492494
self.assertEqual(backslashreplace, codecs.backslashreplace_errors)
493495
self.assertRaisesRegex(TypeError, "don't know how to handle UnicodeDecodeError in error callback", backslashreplace, ude)
494496
self.assertEqual(backslashreplace(uee), (r"\x63\x64\x65\x66", 6))
495-
self.assertEqual(backslashreplace(uee_unicode), (r"\xff\u20ac\U0001f40d", uee_unicode.end))
497+
self.assertEqual(backslashreplace(uee_unicode), (r"\xff\u20ac\U0001f40d\x00\x7a", uee_unicode.end))
496498

497499
xmlcharrefreplace = codecs.lookup_error('xmlcharrefreplace')
498500
self.assertEqual(xmlcharrefreplace, codecs.xmlcharrefreplace_errors)
499501
self.assertRaisesRegex(TypeError, "don't know how to handle UnicodeDecodeError in error callback", xmlcharrefreplace, ude)
500502
self.assertEqual(xmlcharrefreplace(uee), ("&#99;&#100;&#101;&#102;", 6))
503+
self.assertEqual(xmlcharrefreplace(uee_unicode), ("&#255;&#8364;&#128013;&#0;&#122;", uee_unicode.end))
501504

502505
#TODO: @skip("multiple_execute")
503506
def test_lookup_error(self):

0 commit comments

Comments
 (0)