Skip to content

Commit 0911fb1

Browse files
BCSharpslozier
authored andcommitted
Implement codecs.strict_errors (#696)
* Implement codecs.strict_errors * Fixed after review
1 parent ab2fa2a commit 0911fb1

3 files changed

Lines changed: 78 additions & 2 deletions

File tree

Src/IronPython/Runtime/Operations/StringOps.cs

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1869,8 +1869,8 @@ private static string UserDecode(PythonTuple codecInfo, IList<byte> data, string
18691869
}
18701870

18711871
#if FEATURE_ENCODING
1872-
private static class CodecsInfo {
1873-
public static readonly Dictionary<string, Lazy<Encoding>> Codecs = MakeCodecsDict();
1872+
internal static class CodecsInfo {
1873+
internal static readonly Dictionary<string, Lazy<Encoding>> Codecs = MakeCodecsDict();
18741874

18751875
private static Dictionary<string, Lazy<Encoding>> MakeCodecsDict() {
18761876
Dictionary<string, Lazy<Encoding>> d = new Dictionary<string, Lazy<Encoding>>();
@@ -1939,6 +1939,26 @@ private static Dictionary<string, Lazy<Encoding>> MakeCodecsDict() {
19391939
#endif
19401940
return d;
19411941
}
1942+
1943+
internal static Dictionary<string, object> MakeErrorHandlersDict() {
1944+
var d = new Dictionary<string, object>();
1945+
1946+
d["strict"] = BuiltinFunction.MakeFunction(
1947+
"strict_errors",
1948+
ReflectionUtils.GetMethodInfos(typeof(StringOps).GetMember(nameof(StrictErrors), BindingFlags.Static | BindingFlags.NonPublic)),
1949+
typeof(StringOps));
1950+
1951+
// TODO: Implement remaining error handlers
1952+
d["ignore"] = null;
1953+
1954+
d["replace"] = null;
1955+
1956+
d["xmlcharrefreplace"] = null;
1957+
1958+
d["backslashreplace"] = null;
1959+
1960+
return d;
1961+
}
19421962
}
19431963
#endif
19441964

@@ -2677,6 +2697,16 @@ public override bool Fallback(byte[] bytesUnknown, int index) {
26772697
}
26782698
}
26792699

2700+
private static object StrictErrors(object unicodeError) {
2701+
if (unicodeError is PythonExceptions.BaseException be) {
2702+
unicodeError = be.GetClrException();
2703+
}
2704+
switch (unicodeError) {
2705+
case DecoderFallbackException dfe: throw dfe;
2706+
case EncoderFallbackException efe: throw efe;
2707+
default: throw PythonOps.TypeError("codec must pass exception instance");
2708+
}
2709+
}
26802710
#endif
26812711

26822712
#endregion

Src/IronPython/Runtime/PythonContext.cs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1887,7 +1887,11 @@ public override int ExecuteProgram(SourceUnit/*!*/ program) {
18871887
internal Dictionary<string, object> ErrorHandlers {
18881888
get {
18891889
if (_errorHandlers == null) {
1890+
#if FEATURE_ENCODING
1891+
Interlocked.CompareExchange(ref _errorHandlers, StringOps.CodecsInfo.MakeErrorHandlersDict(), null);
1892+
#else
18901893
Interlocked.CompareExchange(ref _errorHandlers, new Dictionary<string, object>(), null);
1894+
#endif
18911895
}
18921896

18931897
return _errorHandlers;

Tests/modules/io_related/test_codecs.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -454,6 +454,48 @@ def test_latin_1_encode(self):
454454
for x in ['iso-8859-1', 'iso8859-1', '8859', 'cp819', 'latin', 'latin1', 'L1']:
455455
self.assertEqual('abc'.encode(x), b'abc')
456456

457+
def test_error_handlers(self):
458+
ude = UnicodeDecodeError('dummy', b"abcdefgh", 3, 5, "decoding testing purposes")
459+
uee = UnicodeEncodeError('dummy', "abcdefgh", 2, 6, "encoding testing purposes")
460+
unicode_data = "ab\xff\u20ac\U0001f40d"
461+
uee_unicode = UnicodeEncodeError('dummy', unicode_data, 2, len(unicode_data), "encoding testing purposes")
462+
463+
strict = codecs.lookup_error('strict')
464+
self.assertEqual(strict, codecs.strict_errors)
465+
with self.assertRaises(UnicodeDecodeError) as cm:
466+
strict(ude)
467+
self.assertEqual(cm.exception, ude)
468+
with self.assertRaises(UnicodeEncodeError) as cm:
469+
strict(uee)
470+
self.assertEqual(cm.exception, uee)
471+
self.assertRaisesRegex(TypeError, "codec must pass exception instance", strict, None)
472+
self.assertRaisesRegex(TypeError, "\w+\(\) takes exactly (one|1) argument \(0 given\)", strict)
473+
self.assertRaisesRegex(TypeError, "\w+\(\) takes exactly (one|1) argument \(2 given\)", strict, ude, uee)
474+
self.assertRaises(LookupError, codecs.lookup_error, "STRICT")
475+
476+
return # TODO: Implement remaining error handlers
477+
478+
ignore = codecs.lookup_error('ignore')
479+
self.assertEqual(ignore, codecs.ignore_errors)
480+
self.assertEqual(ignore(ude), ("", 5))
481+
self.assertEqual(ignore(uee), ("", 6))
482+
483+
replace = codecs.lookup_error('replace')
484+
self.assertEqual(replace, codecs.replace_errors)
485+
self.assertEqual(replace(ude), ("�", 5))
486+
self.assertEqual(replace(uee), ("????", 6))
487+
488+
backslashreplace = codecs.lookup_error('backslashreplace')
489+
self.assertEqual(backslashreplace, codecs.backslashreplace_errors)
490+
self.assertRaisesRegex(TypeError, "don't know how to handle UnicodeDecodeError in error callback", backslashreplace, ude)
491+
self.assertEqual(backslashreplace(uee), (r"\x63\x64\x65\x66", 6))
492+
self.assertEqual(backslashreplace(uee_unicode), (r"\xff\u20ac\U0001f40d", uee_unicode.end))
493+
494+
xmlcharrefreplace = codecs.lookup_error('xmlcharrefreplace')
495+
self.assertEqual(xmlcharrefreplace, codecs.xmlcharrefreplace_errors)
496+
self.assertRaisesRegex(TypeError, "don't know how to handle UnicodeDecodeError in error callback", xmlcharrefreplace, ude)
497+
self.assertEqual(xmlcharrefreplace(uee), ("&#99;&#100;&#101;&#102;", 6))
498+
457499
#TODO: @skip("multiple_execute")
458500
def test_lookup_error(self):
459501
#sanity

0 commit comments

Comments
 (0)