Skip to content

Commit cfcc6d1

Browse files
jsvdkares
authored andcommitted
[fix] SSL write loss on non-blocking partial flush (#242)
write() unconditionally called netWriteData.clear() after a non-blocking flushData() that may not have flushed all encrypted bytes; discarding unflushed TLS records, corrupting the encrypted stream and causing 'Broken pipe' or 'Connection reset' errors on subsequent writes (most commonly seen during 'gem push' over https of large artifacts) additionally, sysread needs to also flush pending writes before reading since after write_nonblock, encrypted bytes could remain unsent; without flushing first the server would never receive the complete request body (e.g. net/http POST), causing it to time out or reset
1 parent 65ad92e commit cfcc6d1

2 files changed

Lines changed: 183 additions & 4 deletions

File tree

src/main/java/org/jruby/ext/openssl/SSLSocket.java

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -689,7 +689,9 @@ public int write(ByteBuffer src, boolean blocking) throws SSLException, IOExcept
689689
if ( netWriteData.hasRemaining() ) {
690690
flushData(blocking);
691691
}
692-
netWriteData.clear();
692+
// compact() to preserve encrypted bytes flushData could not send (non-blocking partial write)
693+
// clear() would discard them, corrupting the TLS record stream:
694+
netWriteData.compact();
693695
final SSLEngineResult result = engine.wrap(src, netWriteData);
694696
if ( result.getStatus() == SSLEngineResult.Status.CLOSED ) {
695697
throw getRuntime().newIOError("closed SSL engine");
@@ -727,9 +729,8 @@ private int readAndUnwrap(final boolean blocking) throws IOException {
727729
closeInbound();
728730
return -1;
729731
}
730-
// inbound channel has been already closed but closeInbound() must
731-
// be defered till the last engine.unwrap() call.
732-
// peerNetData could not be empty.
732+
// inbound channel has been already closed but closeInbound() must be defered till
733+
// the last engine.unwrap() call; peerNetData could not be empty
733734
}
734735
appReadData.clear();
735736
netReadData.flip();
@@ -831,6 +832,13 @@ private IRubyObject sysreadImpl(final ThreadContext context, final IRubyObject l
831832
}
832833

833834
try {
835+
// flush any pending encrypted write data before reading; after write_nonblock,
836+
// encrypted bytes may remain in the buffer that haven't been sent, if we read wout flushing,
837+
// server may not have received the complete request (e.g. net/http POST body) and will not respond
838+
if ( engine != null && netWriteData.hasRemaining() ) {
839+
flushData(blocking);
840+
}
841+
834842
// So we need to make sure to only block when there is no data left to process
835843
if ( engine == null || ! ( appReadData.hasRemaining() || netReadData.position() > 0 ) ) {
836844
final Object ex = waitSelect(SelectionKey.OP_READ, blocking, exception);
Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,171 @@
1+
# frozen_string_literal: false
2+
require File.expand_path('test_helper', File.dirname(__FILE__))
3+
4+
class TestSSLWriteFlush < TestCase
5+
6+
include SSLTestHelper
7+
8+
# Exercises the write_nonblock -> read transition used by net/http for POST
9+
# requests. The bug (clear() instead of compact()) loses encrypted bytes that
10+
# remain in netWriteData after a partial flushData on the *last* write_nonblock.
11+
#
12+
# We run multiple request/response rounds on the same TLS connection with
13+
# varying payload sizes to increase the probability that at least one round
14+
# triggers a partial flush at the write->read boundary.
15+
#
16+
# NOTE: On localhost the loopback interface rarely causes partial socket writes,
17+
# so this test may not reliably catch regressions to clear(). The definitive
18+
# coverage is in the Java-level SSLSocketTest which can control buffer state
19+
# directly. This test serves as an integration smoke test for the write->read
20+
# data path.
21+
#
22+
# https://github.com/jruby/jruby-openssl/issues/242
23+
def test_write_nonblock_data_integrity
24+
# Payload sizes chosen to exercise different alignments with the TLS record
25+
# layer (~16 KB records) and socket send buffer. Primes avoid lucky alignment.
26+
payload_sizes = [
27+
8_191, # just under 8 KB — fits in one TLS record
28+
16_381, # just under 16 KB — nearly one full TLS record
29+
65_521, # ~64 KB — several TLS records, common chunk size
30+
262_139, # ~256 KB — large payload, many partial flushes likely
31+
]
32+
33+
# The server reads a 4-byte big-endian length prefix, then that many bytes
34+
# of payload, and responds with "OK:<hex_digest>" where hex_digest is the
35+
# SHA-256 of the received payload. This is repeated for each payload size.
36+
server_proc = proc { |ctx, ssl|
37+
begin
38+
payload_sizes.length.times do
39+
# read 4-byte length prefix
40+
header = read_exactly(ssl, 4)
41+
break unless header && header.bytesize == 4
42+
expected_len = header.unpack('N')[0]
43+
44+
# read payload
45+
payload = read_exactly(ssl, expected_len)
46+
break unless payload && payload.bytesize == expected_len
47+
48+
digest = OpenSSL::Digest::SHA256.hexdigest(payload)
49+
response = "OK:#{digest}"
50+
ssl.write(response)
51+
end
52+
ensure
53+
ssl.close rescue nil
54+
end
55+
}
56+
57+
start_server0(PORT, OpenSSL::SSL::VERIFY_NONE, true,
58+
server_proc: server_proc) do |server, port|
59+
sock = TCPSocket.new("127.0.0.1", port)
60+
# Constrain the send buffer to make partial flushes more likely.
61+
# The kernel may round this up, but even a modest reduction helps.
62+
sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_SNDBUF, 2048)
63+
sock.setsockopt(Socket::IPPROTO_TCP, Socket::TCP_NODELAY, 1)
64+
65+
ssl = OpenSSL::SSL::SSLSocket.new(sock)
66+
ssl.connect
67+
ssl.sync_close = true
68+
69+
payload_sizes.each do |size|
70+
data = generate_test_data(size)
71+
expected_digest = OpenSSL::Digest::SHA256.hexdigest(data)
72+
73+
# Send length-prefixed payload via write_nonblock
74+
message = [size].pack('N') + data
75+
write_nonblock_all(ssl, message)
76+
77+
# Immediately switch to reading — this is where the bug manifests:
78+
# if compact() was replaced with clear(), residual encrypted bytes
79+
# from the last write_nonblock are lost and the server never
80+
# receives the complete payload.
81+
response = read_with_timeout(ssl, 5)
82+
83+
assert_equal "OK:#{expected_digest}", response,
84+
"Data integrity failure for #{size}-byte payload: " \
85+
"server did not receive the complete payload or it was corrupted"
86+
end
87+
88+
ssl.close
89+
end
90+
end
91+
92+
private
93+
94+
# Generate non-trivial test data that won't compress well in TLS.
95+
# Uses a seeded PRNG so failures are reproducible, and avoids
96+
# OpenSSL::Random which has a per-call size limit in some BC versions.
97+
def generate_test_data(size)
98+
rng = Random.new(size) # seeded for reproducibility
99+
(0...size).map { rng.rand(256).chr }.join.b
100+
end
101+
102+
# Write all of +data+ via write_nonblock, retrying on WaitWritable.
103+
# Does NOT do any extra flushing after the last write — this is critical
104+
# for exercising the bug where clear() loses the tail of encrypted data.
105+
def write_nonblock_all(ssl, data)
106+
remaining = data
107+
while remaining.bytesize > 0
108+
begin
109+
written = ssl.write_nonblock(remaining)
110+
remaining = remaining.byteslice(written..-1)
111+
rescue IO::WaitWritable
112+
IO.select(nil, [ssl])
113+
retry
114+
end
115+
end
116+
end
117+
118+
# Read a complete response from the SSL socket with a timeout.
119+
# Returns the accumulated data, or fails the test on timeout.
120+
def read_with_timeout(ssl, timeout_sec)
121+
response = ""
122+
deadline = Time.now + timeout_sec
123+
loop do
124+
remaining = deadline - Time.now
125+
if remaining <= 0
126+
flunk "Timed out after #{timeout_sec}s waiting for server response " \
127+
"(got #{response.bytesize} bytes so far: #{response.inspect[0, 80]})"
128+
end
129+
if IO.select([ssl], nil, nil, [remaining, 0.5].min)
130+
begin
131+
chunk = ssl.read_nonblock(16384, exception: false)
132+
case chunk
133+
when :wait_readable then next
134+
when nil then break # EOF
135+
else
136+
response << chunk
137+
# Our protocol responses are short ("OK:<64 hex chars>"), so if
138+
# we've received a plausible amount we can stop.
139+
break if response.include?("OK:") && response.bytesize >= 67
140+
end
141+
rescue IO::WaitReadable
142+
# Can occur despite exception: false and IO.select — TLS protocol
143+
# data (e.g. post-handshake messages) made the socket look readable
144+
# but no application data is available yet.
145+
next
146+
rescue EOFError
147+
break
148+
end
149+
end
150+
end
151+
response
152+
end
153+
154+
# Read exactly +n+ bytes from an SSL socket, retrying partial reads.
155+
def self.read_exactly(ssl, n)
156+
buf = ""
157+
while buf.bytesize < n
158+
chunk = ssl.readpartial(n - buf.bytesize)
159+
buf << chunk
160+
end
161+
buf
162+
rescue EOFError
163+
buf
164+
end
165+
166+
# Instance method wrapper for use in server_proc
167+
def read_exactly(ssl, n)
168+
self.class.read_exactly(ssl, n)
169+
end
170+
171+
end

0 commit comments

Comments
 (0)