@@ -29,10 +29,6 @@ public void SetUp() {
2929 [ Test ] public void Test256WithPythonAscii ( ) => TestRoundTrip ( PythonAsciiEncoding . Instance , _bytes ) ;
3030 [ Test ] public void Test256WithUtf8 ( ) => TestRoundTrip ( Encoding . UTF8 , _bytes ) ;
3131 [ Test ] public void Test256WithDefault ( ) => TestRoundTrip ( Encoding . Default , _bytes ) ;
32- [ Test ] public void Test256WithUnicode ( ) => TestRoundTrip ( Encoding . Unicode , _bytes ) ;
33- [ Test ] public void Test256WithBigEndianUnicode ( ) => TestRoundTrip ( Encoding . BigEndianUnicode , _bytes ) ;
34- [ Test ] public void Test256WithUtf32 ( ) => TestRoundTrip ( Encoding . UTF32 , _bytes ) ;
35- [ Test ] public void Test256WithUtf32BE ( ) => TestRoundTrip ( new UTF32Encoding ( bigEndian : true , byteOrderMark : false ) , _bytes ) ;
3632 }
3733
3834 // Test decoding/encoding a valid UTF-8 sequence
@@ -42,18 +38,14 @@ public class Utf8Test {
4238
4339 [ SetUp ]
4440 public void SetUp ( ) {
45- // 12 bytes, rounded to multiply of 4 for the sake of UTF-32 test
41+ // 12 bytes of: Питон!!
4642 _bytes = "\xd0 \x9f \xd0 \xb8 \xd1 \x82 \xd0 \xbe \xd0 \xbd !!" . AsBytes ( ) ;
4743 }
4844
4945 [ Test ] public void TestValidUtf8WithAscii ( ) => TestRoundTrip ( Encoding . ASCII , _bytes ) ;
5046 [ Test ] public void TestValidUtf8WithPythonAscii ( ) => TestRoundTrip ( PythonAsciiEncoding . Instance , _bytes ) ;
5147 [ Test ] public void TestValidUtf8WithUtf8 ( ) => TestRoundTrip ( Encoding . UTF8 , _bytes ) ;
5248 [ Test ] public void TestValidUtf8WithDefault ( ) => TestRoundTrip ( Encoding . Default , _bytes ) ;
53- [ Test ] public void TestValidUtf8WithUnicode ( ) => TestRoundTrip ( Encoding . Unicode , _bytes ) ;
54- [ Test ] public void TestValidUtf8WithBigEndianUnicode ( ) => TestRoundTrip ( Encoding . BigEndianUnicode , _bytes ) ;
55- [ Test ] public void TestValidUtf8WithUtf32 ( ) => TestRoundTrip ( Encoding . UTF32 , _bytes ) ;
56- [ Test ] public void TestValidUtf8WithUtf32BE ( ) => TestRoundTrip ( new UTF32Encoding ( bigEndian : true , byteOrderMark : false ) , _bytes ) ;
5749 }
5850
5951 // Test decoding/encoding an invalid UTF-8 sequence
@@ -66,20 +58,16 @@ public void SetUp() {
6658 // 12 bytes: two valid UTF-8 2-byte chars, one non-decodable byte,
6759 // one UTF-8 2-byte char with a non-decodable byte inserted in between the UTF-8 bytes
6860 // and final valid UTF-8 2-byte char
69- _bytes = "\xd0 \x9f \xd0 \xb8 \x80 \xd1 \x20 \x82 \xd0 \xbe \xd0 \xbd " . AsBytes ( ) ;
61+ _bytes = "\xd0 \x9f \xd0 \xb8 \x80 \xd1 \xff \x82 \xd0 \xbe \xd0 \xbd " . AsBytes ( ) ;
7062 }
7163
7264 [ Test ] public void TestBrokenUtf8WithAscii ( ) => TestRoundTrip ( Encoding . ASCII , _bytes ) ;
7365 [ Test ] public void TestBrokenUtf8WithPythonAscii ( ) => TestRoundTrip ( PythonAsciiEncoding . Instance , _bytes ) ;
7466 [ Test ] public void TestBrokenUtf8WithUtf8 ( ) => TestRoundTrip ( Encoding . UTF8 , _bytes ) ;
7567 [ Test ] public void TestBrokenUtf8WithDefault ( ) => TestRoundTrip ( Encoding . Default , _bytes ) ;
76- [ Test ] public void TestBrokenUtf8WithUnicode ( ) => TestRoundTrip ( Encoding . Unicode , _bytes ) ;
77- [ Test ] public void TestBrokenUtf8WithBigEndianUnicode ( ) => TestRoundTrip ( Encoding . BigEndianUnicode , _bytes ) ;
78- [ Test ] public void TestBrokenUtf8WithUtf32 ( ) => TestRoundTrip ( Encoding . UTF32 , _bytes ) ;
79- [ Test ] public void TestBrokenUtf8WithUtf32BE ( ) => TestRoundTrip ( new UTF32Encoding ( bigEndian : true , byteOrderMark : false ) , _bytes ) ;
8068 }
8169
82- // Note: UTF-7 is not round-trip safe in general
70+ // Note: UTF-7, UTF-16, and UTF-32 are not round-trip safe in general
8371 private static void TestRoundTrip ( Encoding enc , byte [ ] bytes ) {
8472
8573 Encoding penc = new PythonSurrogateEscapeEncoding ( enc ) ;
@@ -344,33 +332,34 @@ public void TestEndiannessWithtUtf16BE() {
344332 public void TestEndiannessWithtUtf32LE ( ) {
345333 Encoding penc = new PythonSurrogateEscapeEncoding ( new UTF32Encoding ( bigEndian : false , byteOrderMark : false ) ) ;
346334 Assert . AreEqual ( "\u000a " , penc . GetChars ( _bytes1 ) ) ;
347- Assert . AreEqual ( " \udc00 \udc00 \udc00 \udc0a " , penc . GetChars ( _bytes2 ) ) ;
335+ Assert . Throws < DecoderFallbackException > ( ( ) => penc . GetChars ( _bytes2 ) ) ;
348336 }
349337
350338 [ Test ]
351339 public void TestEndiannessWithtUtf32BE ( ) {
352340 Encoding penc = new PythonSurrogateEscapeEncoding ( new UTF32Encoding ( bigEndian : true , byteOrderMark : false ) ) ;
353- Assert . AreEqual ( " \udc0a \udc00 \udc00 \udc00 " , penc . GetChars ( _bytes1 ) ) ;
341+ Assert . Throws < DecoderFallbackException > ( ( ) => penc . GetChars ( _bytes1 ) ) ;
354342 Assert . AreEqual ( "\u000a " , penc . GetChars ( _bytes2 ) ) ;
355343 }
356344 }
357345
358- public class IncompleteSequenceTests {
346+ public class AsciiByteTests {
359347
360348 private char [ ] _chars ;
361349
362350 [ SetUp ]
363351 public void SetUp ( ) {
364- // one surrogate escape is not enough for wide-char encodings
352+ // surrogate escape carrying byte < 128 is not allowed
365353 _chars = "+++\udc41 ++" . ToCharArray ( ) ;
366354 }
367355
368- [ Test ] public void TestIncompleteSequenceWithtUtf16LE ( ) => TestIncompleteSequence ( Encoding . Unicode , 2 ) ;
369- [ Test ] public void TestIncompleteSequenceWithtUtf16BE ( ) => TestIncompleteSequence ( Encoding . BigEndianUnicode , 2 ) ;
370- [ Test ] public void TestIncompleteSequenceWithtUtf32LE ( ) => TestIncompleteSequence ( new UTF32Encoding ( bigEndian : false , byteOrderMark : false ) , 4 ) ;
371- [ Test ] public void TestIncompleteSequenceWithtUtf32BE ( ) => TestIncompleteSequence ( new UTF32Encoding ( bigEndian : true , byteOrderMark : false ) , 4 ) ;
356+ [ Test ] public void TestAsciiByteWithtUtf8 ( ) => TestAsciiByte ( Encoding . UTF8 , 1 ) ;
357+ [ Test ] public void TestAsciiByteWithtUtf16LE ( ) => TestAsciiByte ( Encoding . Unicode , 2 ) ;
358+ [ Test ] public void TestAsciiByteWithtUtf16BE ( ) => TestAsciiByte ( Encoding . BigEndianUnicode , 2 ) ;
359+ [ Test ] public void TestAsciiByteWithtUtf32LE ( ) => TestAsciiByte ( new UTF32Encoding ( bigEndian : false , byteOrderMark : false ) , 4 ) ;
360+ [ Test ] public void TestAsciiByteWithtUtf32BE ( ) => TestAsciiByte ( new UTF32Encoding ( bigEndian : true , byteOrderMark : false ) , 4 ) ;
372361
373- public void TestIncompleteSequence ( Encoding codec , int charWidth ) {
362+ public void TestAsciiByte ( Encoding codec , int charWidth ) {
374363 Encoding penc = new PythonSurrogateEscapeEncoding ( codec ) ;
375364
376365 Assert . That ( ( ) => penc . GetBytes ( _chars ) ,
@@ -390,11 +379,9 @@ public void TestIncompleteSequence(Encoding codec, int charWidth) {
390379
391380 enc . Reset ( ) ;
392381
393- Assert . That ( enc . GetByteCount ( _chars , 0 , 4 , flush : false ) , Is . EqualTo ( 3 * charWidth ) ) ;
394- Assert . That ( ( ) => enc . GetBytes ( _chars , 0 , 4 , bytes , 0 , flush : false ) , Throws . Nothing ) ;
395- Assert . That ( ( ) => enc . GetByteCount ( _chars , 4 , 1 , flush : false ) ,
382+ Assert . That ( ( ) => enc . GetBytes ( _chars , 0 , 5 , bytes , 3 * charWidth , flush : false ) ,
396383 Throws . TypeOf < EncoderFallbackException > ( )
397- . With . Property ( "Index" ) . EqualTo ( - 1 ) // last char from previous increment
384+ . With . Property ( "Index" ) . EqualTo ( 3 )
398385 . And . Property ( "CharUnknown" ) . EqualTo ( _chars [ 3 ] ) ) ;
399386 }
400387 }
0 commit comments