Skip to content
This repository was archived by the owner on Jul 24, 2023. It is now read-only.

Commit 0498ee1

Browse files
committed
Merge pull request #46 from zawaideh/master
Fix 'invalid byte sequence in UTF-8' error in parse_link_attrs
2 parents 9ce498e + d3dca2f commit 0498ee1

2 files changed

Lines changed: 21 additions & 1 deletion

File tree

lib/openid/consumer/html_parse.rb

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,15 @@ def OpenID.unescape_hash(h)
3434

3535

3636
def OpenID.parse_link_attrs(html)
37-
stripped = html.gsub(REMOVED_RE,'')
37+
begin
38+
stripped = html.gsub(REMOVED_RE,'')
39+
rescue ArgumentError
40+
begin
41+
stripped = html.encode('UTF-8', 'binary', :invalid => :replace, :undef => :replace, :replace => '').gsub(REMOVED_RE,'')
42+
rescue Encoding::UndefinedConversionError #needed for a problem in JRuby where it can't handle the conversion
43+
stripped = html.encode('UTF-8', 'ASCII', :invalid => :replace, :undef => :replace, :replace => '').gsub(REMOVED_RE,'')
44+
end
45+
end
3846
parser = HTMLTokenizer.new(stripped)
3947

4048
links = []

test/test_linkparse.rb

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ def test_linkparse
8484
assert(false, "datafile parsing error: bad header #{h}")
8585
end
8686
}
87+
html = html.force_encoding('UTF-8') if html.respond_to? :force_encoding
8788
links = OpenID::parse_link_attrs(html)
8889

8990
found = links.dup
@@ -97,5 +98,16 @@ def test_linkparse
9798
end
9899
}
99100
assert_equal(numtests, testnum, "Number of tests")
101+
102+
# test handling of invalid UTF-8 byte sequences
103+
if "".respond_to? :force_encoding
104+
html = "<html><body>hello joel\255</body></html>".force_encoding('UTF-8')
105+
else
106+
html = "<html><body>hello joel\255</body></html>"
107+
end
108+
assert_nothing_raised do
109+
OpenID::parse_link_attrs(html)
110+
end
111+
100112
end
101113
end

0 commit comments

Comments
 (0)