Skip to content

Commit 6961864

Browse files
authored
Split out reader tests (#33)
* Split out reader tests * Test descriptions
1 parent a6c3769 commit 6961864

8 files changed

Lines changed: 1021 additions & 1040 deletions

tests/conftest.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,9 @@
1-
import xarray as xr
2-
31
import json
42
import time
3+
from pathlib import Path
54

65
import pytest
7-
from pathlib import Path
6+
import xarray as xr
87

98

109
@pytest.fixture(scope="session")

tests/mocks.py

Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
"""Shared mock classes for tests."""
2+
3+
4+
class MockGetResult:
5+
"""Mock GetResult for testing."""
6+
7+
def __init__(self, data):
8+
self._data = data
9+
10+
@property
11+
def attributes(self):
12+
return {}
13+
14+
def buffer(self):
15+
return self._data
16+
17+
@property
18+
def meta(self):
19+
return {
20+
"path": "",
21+
"last_modified": None,
22+
"size": len(self._data),
23+
"e_tag": None,
24+
"version": None,
25+
}
26+
27+
@property
28+
def range(self):
29+
return (0, len(self._data))
30+
31+
def __iter__(self):
32+
yield self._data
33+
34+
35+
class MockGetResultAsync:
36+
"""Mock async GetResult for testing."""
37+
38+
def __init__(self, data):
39+
self._data = data
40+
41+
@property
42+
def attributes(self):
43+
return {}
44+
45+
async def buffer_async(self):
46+
return self._data
47+
48+
@property
49+
def meta(self):
50+
return {
51+
"path": "",
52+
"last_modified": None,
53+
"size": len(self._data),
54+
"e_tag": None,
55+
"version": None,
56+
}
57+
58+
@property
59+
def range(self):
60+
return (0, len(self._data))
61+
62+
async def __aiter__(self):
63+
yield self._data
64+
65+
66+
class MockReadableStoreWithHead:
67+
"""A mock store that supports the Head protocol."""
68+
69+
def __init__(self, data: bytes = b"test data"):
70+
self._data = data
71+
72+
def head(self, path):
73+
return {
74+
"path": path,
75+
"last_modified": None,
76+
"size": len(self._data),
77+
"e_tag": None,
78+
"version": None,
79+
}
80+
81+
def get(self, path, *, options=None):
82+
return MockGetResult(self._data)
83+
84+
async def get_async(self, path, *, options=None):
85+
return MockGetResultAsync(self._data)
86+
87+
def get_range(self, path, *, start, end=None, length=None):
88+
if end is None:
89+
end = start + length
90+
return self._data[start:end]
91+
92+
async def get_range_async(self, path, *, start, end=None, length=None):
93+
if end is None:
94+
end = start + length
95+
return self._data[start:end]
96+
97+
def get_ranges(self, path, *, starts, ends=None, lengths=None):
98+
if ends is None:
99+
ends = [s + ln for s, ln in zip(starts, lengths)]
100+
return [self._data[s:e] for s, e in zip(starts, ends)]
101+
102+
async def get_ranges_async(self, path, *, starts, ends=None, lengths=None):
103+
if ends is None:
104+
ends = [s + ln for s, ln in zip(starts, lengths)]
105+
return [self._data[s:e] for s, e in zip(starts, ends)]
106+
107+
108+
class MockReadableStoreWithoutHead:
109+
"""A mock store without the Head protocol."""
110+
111+
def __init__(self, data: bytes = b"test data"):
112+
self._data = data
113+
114+
def get(self, path, *, options=None):
115+
return MockGetResult(self._data)
116+
117+
async def get_async(self, path, *, options=None):
118+
return MockGetResultAsync(self._data)
119+
120+
def get_range(self, path, *, start, end=None, length=None):
121+
if end is None:
122+
end = start + length
123+
return self._data[start:end]
124+
125+
async def get_range_async(self, path, *, start, end=None, length=None):
126+
if end is None:
127+
end = start + length
128+
return self._data[start:end]
129+
130+
def get_ranges(self, path, *, starts, ends=None, lengths=None):
131+
if ends is None:
132+
ends = [s + ln for s, ln in zip(starts, lengths)]
133+
return [self._data[s:e] for s, e in zip(starts, ends)]
134+
135+
async def get_ranges_async(self, path, *, starts, ends=None, lengths=None):
136+
if ends is None:
137+
ends = [s + ln for s, ln in zip(starts, lengths)]
138+
return [self._data[s:e] for s, e in zip(starts, ends)]

tests/test_buffered_reader.py

Lines changed: 246 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,246 @@
1+
"""Tests specific to BufferedStoreReader."""
2+
3+
from io import BytesIO
4+
5+
import pytest
6+
from obstore.store import MemoryStore
7+
8+
from obspec_utils.obspec import BufferedStoreReader
9+
from obspec_utils.tracing import TracingReadableStore, RequestTrace
10+
11+
12+
def test_buffered_reader_buffering():
13+
"""Test that BufferedStoreReader buffering works correctly."""
14+
memstore = MemoryStore()
15+
memstore.put("test.txt", b"0123456789ABCDEF")
16+
17+
reader = BufferedStoreReader(memstore, "test.txt", buffer_size=8)
18+
19+
assert reader.read(2) == b"01"
20+
assert reader.read(2) == b"23"
21+
22+
23+
class TestBufferBoundaryConditions:
24+
"""Test buffer boundary conditions for off-by-one errors."""
25+
26+
def test_read_exactly_last_byte_of_buffer(self):
27+
"""Read exactly the last byte of a buffered region."""
28+
data = b"0123456789"
29+
memstore = MemoryStore()
30+
memstore.put("test.txt", data)
31+
32+
reader = BufferedStoreReader(memstore, "test.txt", buffer_size=5)
33+
reader.read(1)
34+
reader.seek(4)
35+
assert reader.read(1) == b"4"
36+
37+
def test_read_at_buffer_end_boundary(self):
38+
"""Read starting exactly at buffer_end should trigger new fetch."""
39+
data = b"0123456789"
40+
memstore = MemoryStore()
41+
memstore.put("test.txt", data)
42+
43+
trace = RequestTrace()
44+
traced_store = TracingReadableStore(memstore, trace)
45+
46+
reader = BufferedStoreReader(traced_store, "test.txt", buffer_size=5)
47+
48+
assert reader.read(5) == b"01234"
49+
initial_requests = trace.total_requests
50+
51+
# Position is now 5, which equals buffer_end
52+
# Condition: 0 <= 5 < 5 is False, so should refetch
53+
assert reader.read(1) == b"5"
54+
assert trace.total_requests > initial_requests
55+
56+
def test_read_spanning_buffer_boundary(self):
57+
"""Read that starts inside buffer but extends beyond it."""
58+
data = b"0123456789ABCDEF"
59+
memstore = MemoryStore()
60+
memstore.put("test.txt", data)
61+
62+
reader = BufferedStoreReader(memstore, "test.txt", buffer_size=5)
63+
64+
assert reader.read(3) == b"012"
65+
assert reader.read(5) == b"34567"
66+
67+
def test_read_exactly_available_bytes_from_buffer(self):
68+
"""Read exactly the number of available bytes in buffer."""
69+
data = b"0123456789"
70+
memstore = MemoryStore()
71+
memstore.put("test.txt", data)
72+
73+
trace = RequestTrace()
74+
traced_store = TracingReadableStore(memstore, trace)
75+
76+
reader = BufferedStoreReader(traced_store, "test.txt", buffer_size=5)
77+
78+
assert reader.read(2) == b"01"
79+
assert trace.total_requests == 2 # get (size) + get_range (buffer)
80+
81+
# available = 5 - 2 = 3 bytes, read exactly 3
82+
assert reader.read(3) == b"234"
83+
assert trace.total_requests == 2 # served from buffer
84+
85+
def test_read_one_more_than_available(self):
86+
"""Read one byte more than available in buffer triggers refetch."""
87+
data = b"0123456789"
88+
memstore = MemoryStore()
89+
memstore.put("test.txt", data)
90+
91+
trace = RequestTrace()
92+
traced_store = TracingReadableStore(memstore, trace)
93+
94+
reader = BufferedStoreReader(traced_store, "test.txt", buffer_size=5)
95+
96+
assert reader.read(2) == b"01"
97+
assert trace.total_requests == 2 # get (size) + get_range (buffer)
98+
99+
# available = 3, requesting 4
100+
assert reader.read(4) == b"2345"
101+
assert trace.total_requests == 3 # refetch needed
102+
103+
def test_buffer_reuse_after_backward_seek(self):
104+
"""Seek backward within buffer should reuse buffered data."""
105+
data = b"0123456789"
106+
memstore = MemoryStore()
107+
memstore.put("test.txt", data)
108+
109+
trace = RequestTrace()
110+
traced_store = TracingReadableStore(memstore, trace)
111+
112+
reader = BufferedStoreReader(traced_store, "test.txt", buffer_size=5)
113+
114+
assert reader.read(5) == b"01234"
115+
assert trace.total_requests == 2 # get (size) + get_range (buffer)
116+
117+
reader.seek(2)
118+
assert reader.read(2) == b"23"
119+
assert trace.total_requests == 2 # served from buffer
120+
121+
def test_buffer_exactly_matches_file_size(self):
122+
"""Buffer size equals file size - entire file in buffer."""
123+
data = b"12345"
124+
memstore = MemoryStore()
125+
memstore.put("test.txt", data)
126+
127+
trace = RequestTrace()
128+
traced_store = TracingReadableStore(memstore, trace)
129+
130+
reader = BufferedStoreReader(traced_store, "test.txt", buffer_size=5)
131+
132+
assert reader.read(3) == b"123"
133+
assert trace.total_requests == 2 # get (size) + get_range (buffer)
134+
135+
reader.seek(0)
136+
assert reader.read(5) == b"12345"
137+
assert trace.total_requests == 2 # served from buffer
138+
139+
def test_sequential_reads_consuming_entire_buffer(self):
140+
"""Sequential reads that exactly consume the buffer."""
141+
data = b"0123456789ABCDEF"
142+
memstore = MemoryStore()
143+
memstore.put("test.txt", data)
144+
145+
reader = BufferedStoreReader(memstore, "test.txt", buffer_size=4)
146+
ref = BytesIO(data)
147+
148+
for _ in range(4):
149+
assert reader.read(4) == ref.read(4)
150+
assert reader.tell() == ref.tell()
151+
152+
def test_buffer_offset_calculation_at_various_positions(self):
153+
"""Test buffer_offset = position - buffer_start at various positions."""
154+
data = b"0123456789"
155+
memstore = MemoryStore()
156+
memstore.put("test.txt", data)
157+
158+
reader = BufferedStoreReader(memstore, "test.txt", buffer_size=5)
159+
ref = BytesIO(data)
160+
161+
for num in range(3, 8):
162+
reader.seek(num)
163+
ref.seek(num)
164+
assert reader.read(1) == ref.read(1)
165+
166+
def test_empty_buffer_initial_state(self):
167+
"""Empty buffer at start should trigger fetch."""
168+
data = b"hello"
169+
memstore = MemoryStore()
170+
memstore.put("test.txt", data)
171+
172+
reader = BufferedStoreReader(memstore, "test.txt", buffer_size=10)
173+
174+
# Buffer is empty initially (len = 0)
175+
# buffer_end = buffer_start + len(buffer) = 0 + 0 = 0
176+
# position (0) < buffer_end (0) is False
177+
# So should fetch from store
178+
assert reader.read(5) == b"hello"
179+
180+
@pytest.mark.parametrize("buffer_size", [1, 2, 3, 5, 8, 10, 16, 32])
181+
def test_various_buffer_sizes(self, buffer_size):
182+
"""Test buffer logic with various buffer sizes."""
183+
data = b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnop"
184+
memstore = MemoryStore()
185+
memstore.put("test.txt", data)
186+
187+
reader = BufferedStoreReader(memstore, "test.txt", buffer_size=buffer_size)
188+
ref = BytesIO(data)
189+
190+
while True:
191+
reader_data = reader.read(3)
192+
ref_data = ref.read(3)
193+
assert reader_data == ref_data
194+
if not reader_data:
195+
break
196+
197+
@pytest.mark.parametrize("read_size", [1, 2, 3, 4, 5])
198+
def test_various_read_sizes_within_buffer(self, read_size):
199+
"""Test different read sizes that should be satisfied from buffer."""
200+
data = b"0123456789"
201+
memstore = MemoryStore()
202+
memstore.put("test.txt", data)
203+
204+
reader = BufferedStoreReader(memstore, "test.txt", buffer_size=10)
205+
ref = BytesIO(data)
206+
207+
# Fill buffer
208+
reader.read(1)
209+
reader.seek(0)
210+
ref.read(1)
211+
ref.seek(0)
212+
213+
assert reader.read(read_size) == ref.read(read_size)
214+
215+
def test_buffer_offset_zero_case(self):
216+
"""Test when buffer_offset = position - buffer_start = 0."""
217+
data = b"hello world"
218+
memstore = MemoryStore()
219+
memstore.put("test.txt", data)
220+
221+
reader = BufferedStoreReader(memstore, "test.txt", buffer_size=5)
222+
223+
# After first read, buffer_start = 0
224+
# Seek to 0, so buffer_offset = 0 - 0 = 0
225+
reader.read(1)
226+
reader.seek(0)
227+
228+
# available = len(buffer) - 0 = 5
229+
assert reader.read(5) == b"hello"
230+
231+
def test_buffer_offset_max_case(self):
232+
"""Test when buffer_offset = len(buffer) - 1 (last valid offset)."""
233+
data = b"01234"
234+
memstore = MemoryStore()
235+
memstore.put("test.txt", data)
236+
237+
reader = BufferedStoreReader(memstore, "test.txt", buffer_size=5)
238+
239+
# Fill buffer with all 5 bytes
240+
reader.read(1)
241+
242+
# Seek to last byte position (4)
243+
# buffer_offset = 4 - 0 = 4
244+
# available = 5 - 4 = 1
245+
reader.seek(4)
246+
assert reader.read(1) == b"4"

0 commit comments

Comments
 (0)