Skip to content

Commit a4d3a4a

Browse files
committed
Adapt specs so Strings are always created with a #bytesize multiple of the encoding minimum length
* Only UTF-16 and UTF-32 have a encoding minimum length > 1, so UTF-16 strings must have #bytesize multiple of 2, and UTF-32 strings must have #bytesize multiple of 4.
1 parent 257ad6a commit a4d3a4a

6 files changed

Lines changed: 21 additions & 10 deletions

File tree

core/encoding/compatible_spec.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@
5454

5555
it "returns nil if the second's Encoding is not ASCII compatible" do
5656
a = "abc".force_encoding("UTF-8")
57-
b = "123".force_encoding("UTF-16LE")
57+
b = "1234".force_encoding("UTF-16LE")
5858
Encoding.compatible?(a, b).should be_nil
5959
end
6060
end

core/encoding/converter/putback_spec.rb

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,14 +34,23 @@
3434
@ec.putback.should == ""
3535
end
3636

37+
it "returns the problematic bytes for UTF-16LE" do
38+
ec = Encoding::Converter.new("utf-16le", "iso-8859-1")
39+
src = "\x00\xd8\x61\x00"
40+
dst = ""
41+
ec.primitive_convert(src, dst).should == :invalid_byte_sequence
42+
ec.primitive_errinfo.should == [:invalid_byte_sequence, "UTF-16LE", "UTF-8", "\x00\xD8", "a\x00"]
43+
ec.putback.should == "a\x00".force_encoding("utf-16le")
44+
ec.putback.should == ""
45+
end
46+
3747
it "accepts an integer argument corresponding to the number of bytes to be put back" do
3848
ec = Encoding::Converter.new("utf-16le", "iso-8859-1")
3949
src = "\x00\xd8\x61\x00"
4050
dst = ""
4151
ec.primitive_convert(src, dst).should == :invalid_byte_sequence
4252
ec.primitive_errinfo.should == [:invalid_byte_sequence, "UTF-16LE", "UTF-8", "\x00\xD8", "a\x00"]
43-
ec.putback(1).should == "\x00".force_encoding("utf-16le")
44-
ec.putback.should == "a".force_encoding("utf-16le")
53+
ec.putback(2).should == "a\x00".force_encoding("utf-16le")
4554
ec.putback.should == ""
4655
end
4756
end

core/string/force_encoding_spec.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@
6060
end
6161

6262
it "does not transcode self" do
63-
str = "\u{8612}"
63+
str = "é"
6464
str.dup.force_encoding('utf-16le').should_not == str.encode('utf-16le')
6565
end
6666

core/string/scrub_spec.rb

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,9 @@
5656
it "replaces invalid byte sequences in frozen strings" do
5757
x81 = [0x81].pack('C').force_encoding('utf-8')
5858
(-"abc\u3042#{x81}").scrub("*").should == "abc\u3042*"
59-
utf16_str = ("abc".encode('UTF-16LE').bytes + [0x81]).pack('c*').force_encoding('UTF-16LE')
59+
60+
leading_surrogate = [0x00, 0xD8]
61+
utf16_str = ("abc".encode('UTF-16LE').bytes + leading_surrogate).pack('c*').force_encoding('UTF-16LE')
6062
(-(utf16_str)).scrub("*".encode('UTF-16LE')).should == "abc*".encode('UTF-16LE')
6163
end
6264

core/string/shared/eql.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
end
2222

2323
it "considers encoding compatibility" do
24-
"hello".force_encoding("utf-8").send(@method, "hello".force_encoding("utf-32le")).should be_false
24+
"abcd".force_encoding("utf-8").send(@method, "abcd".force_encoding("utf-32le")).should be_false
2525
end
2626

2727
it "ignores subclass differences" do

core/string/valid_encoding_spec.rb

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -43,10 +43,10 @@
4343
str.force_encoding('KOI8-R').valid_encoding?.should be_true
4444
str.force_encoding('KOI8-U').valid_encoding?.should be_true
4545
str.force_encoding('Shift_JIS').valid_encoding?.should be_false
46-
str.force_encoding('UTF-16BE').valid_encoding?.should be_false
47-
str.force_encoding('UTF-16LE').valid_encoding?.should be_false
48-
str.force_encoding('UTF-32BE').valid_encoding?.should be_false
49-
str.force_encoding('UTF-32LE').valid_encoding?.should be_false
46+
"\xD8\x00".force_encoding('UTF-16BE').valid_encoding?.should be_false
47+
"\x00\xD8".force_encoding('UTF-16LE').valid_encoding?.should be_false
48+
"\x04\x03\x02\x01".force_encoding('UTF-32BE').valid_encoding?.should be_false
49+
"\x01\x02\x03\x04".force_encoding('UTF-32LE').valid_encoding?.should be_false
5050
str.force_encoding('Windows-1251').valid_encoding?.should be_true
5151
str.force_encoding('IBM437').valid_encoding?.should be_true
5252
str.force_encoding('IBM737').valid_encoding?.should be_true

0 commit comments

Comments
 (0)