Skip to content

Commit c992769

Browse files
authored
Fix: Recursion when pickling (#37)
* Fix: Pickling readers and store wrappers * Test __get_attr__ * Test composition
1 parent 857cbca commit c992769

7 files changed

Lines changed: 508 additions & 80 deletions

File tree

src/obspec_utils/cache.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,8 +109,18 @@ def __getattr__(self, name: str) -> Any:
109109
"""Forward unknown attributes to the underlying store.
110110
111111
This ensures CachingReadableStore is transparent for any additional
112-
methods the underlying store may have (e.g., prefix, url attributes).
112+
public methods or attributes the underlying store may have.
113+
114+
Note: Private attributes (starting with '_') are not forwarded.
113115
"""
116+
if name.startswith("_"):
117+
raise AttributeError(
118+
f"'{type(self).__name__}' object has no attribute '{name}'"
119+
)
120+
if "_store" not in self.__dict__:
121+
raise AttributeError(
122+
f"'{type(self).__name__}' object has no attribute '{name}'"
123+
)
114124
return getattr(self._store, name)
115125

116126
def __reduce__(self):

src/obspec_utils/obspec.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -429,6 +429,9 @@ def __init__(
429429
would require more requests than this, request sizes are increased to
430430
fit within this limit.
431431
"""
432+
self._store = store
433+
self._path = path
434+
432435
# Determine file size if not provided
433436
if file_size is None:
434437
if hasattr(store, "head") and callable(store.head):

src/obspec_utils/splitting.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,21 @@ def __init__(
131131
self._max_concurrent_requests = max_concurrent_requests
132132

133133
def __getattr__(self, name: str) -> Any:
134-
"""Forward unknown attributes to the underlying store."""
134+
"""Forward unknown attributes to the underlying store.
135+
136+
This ensures SplittingReadableStore is transparent for any additional
137+
public methods or attributes the underlying store may have.
138+
139+
Note: Private attributes (starting with '_') are not forwarded.
140+
"""
141+
if name.startswith("_"):
142+
raise AttributeError(
143+
f"'{type(self).__name__}' object has no attribute '{name}'"
144+
)
145+
if "_store" not in self.__dict__:
146+
raise AttributeError(
147+
f"'{type(self).__name__}' object has no attribute '{name}'"
148+
)
135149
return getattr(self._store, name)
136150

137151
def _get_file_size(self, path: str) -> int | None:

tests/mocks.py

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,3 +136,65 @@ async def get_ranges_async(self, path, *, starts, ends=None, lengths=None):
136136
if ends is None:
137137
ends = [s + ln for s, ln in zip(starts, lengths)]
138138
return [self._data[s:e] for s, e in zip(starts, ends)]
139+
140+
141+
class PicklableStore:
142+
"""A picklable store supporting multiple paths, for testing pickle support.
143+
144+
Unlike obstore's MemoryStore (Rust-backed, not picklable), this pure-Python
145+
store can be pickled and unpickled, allowing tests to verify pickle support
146+
for wrappers like CachingReadableStore and reader classes.
147+
"""
148+
149+
def __init__(self, data: dict[str, bytes] | None = None):
150+
self._data = data if data is not None else {}
151+
152+
def put(self, path: str, data: bytes) -> None:
153+
self._data[path] = data
154+
155+
def head(self, path: str) -> dict:
156+
return {"size": len(self._data[path])}
157+
158+
def get(self, path: str, *, options=None):
159+
return MockGetResult(self._data[path])
160+
161+
async def get_async(self, path: str, *, options=None):
162+
return MockGetResultAsync(self._data[path])
163+
164+
def get_range(
165+
self,
166+
path: str,
167+
*,
168+
start: int,
169+
end: int | None = None,
170+
length: int | None = None,
171+
):
172+
data = self._data[path]
173+
if length is not None:
174+
return data[start : start + length]
175+
elif end is not None:
176+
return data[start:end]
177+
return data[start:]
178+
179+
async def get_range_async(
180+
self,
181+
path: str,
182+
*,
183+
start: int,
184+
end: int | None = None,
185+
length: int | None = None,
186+
):
187+
return self.get_range(path, start=start, end=end, length=length)
188+
189+
def get_ranges(self, path: str, *, starts, ends=None, lengths=None):
190+
if lengths is not None:
191+
return [
192+
self._data[path][start : start + length]
193+
for start, length in zip(starts, lengths)
194+
]
195+
elif ends is not None:
196+
return [self._data[path][s:e] for s, e in zip(starts, ends)]
197+
raise ValueError("Must provide ends or lengths")
198+
199+
async def get_ranges_async(self, path: str, *, starts, ends=None, lengths=None):
200+
return self.get_ranges(path, starts=starts, ends=ends, lengths=lengths)

tests/test_cache.py

Lines changed: 2 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
from obspec_utils.cache import CachingReadableStore
1111
from obspec_utils.registry import ObjectStoreRegistry
1212

13+
from .mocks import PicklableStore
14+
1315

1416
class TestCachingReadableStore:
1517
"""Tests for basic caching functionality."""
@@ -392,84 +394,6 @@ def test_forwards_unknown_attributes(self):
392394
assert hasattr(cached, "delete") # MemoryStore has delete
393395

394396

395-
class PicklableStore:
396-
"""A simple picklable store for testing pickle support.
397-
398-
MemoryStore from obstore is Rust-backed and not picklable.
399-
This pure-Python store allows testing CachingReadableStore's pickle support.
400-
"""
401-
402-
def __init__(self, data: dict[str, bytes] | None = None):
403-
self._data = data or {}
404-
405-
def put(self, path: str, data: bytes) -> None:
406-
self._data[path] = data
407-
408-
def get(self, path: str, *, options=None):
409-
return _PicklableGetResult(self._data[path])
410-
411-
async def get_async(self, path: str, *, options=None):
412-
return _PicklableGetResultAsync(self._data[path])
413-
414-
def get_range(
415-
self,
416-
path: str,
417-
*,
418-
start: int,
419-
end: int | None = None,
420-
length: int | None = None,
421-
):
422-
data = self._data[path]
423-
if end is not None:
424-
return data[start:end]
425-
elif length is not None:
426-
return data[start : start + length]
427-
return data[start:]
428-
429-
async def get_range_async(
430-
self,
431-
path: str,
432-
*,
433-
start: int,
434-
end: int | None = None,
435-
length: int | None = None,
436-
):
437-
return self.get_range(path, start=start, end=end, length=length)
438-
439-
def get_ranges(self, path: str, *, starts, ends=None, lengths=None):
440-
if ends is not None:
441-
return [self._data[path][s:e] for s, e in zip(starts, ends)]
442-
elif lengths is not None:
443-
return [
444-
self._data[path][start : start + length]
445-
for start, length in zip(starts, lengths)
446-
]
447-
raise ValueError("Must provide ends or lengths")
448-
449-
async def get_ranges_async(self, path: str, *, starts, ends=None, lengths=None):
450-
return self.get_ranges(path, starts=starts, ends=ends, lengths=lengths)
451-
452-
453-
class _PicklableGetResult:
454-
"""Mock GetResult for PicklableStore."""
455-
456-
def __init__(self, data: bytes):
457-
self._data = data
458-
459-
def buffer(self):
460-
return self._data
461-
462-
463-
class _PicklableGetResultAsync:
464-
"""Mock async GetResult for PicklableStore."""
465-
466-
def __init__(self, data: bytes):
467-
self._data = data
468-
469-
async def buffer_async(self):
470-
return self._data
471-
472-
473397
class TestPickling:
474398
"""Tests for pickling support (needed for multiprocessing/distributed)."""
475399

tests/test_readers.py

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
EagerStoreReader, and ParallelStoreReader.
44
"""
55

6+
import pickle
67
from io import BytesIO
78

89
import pytest
@@ -14,6 +15,8 @@
1415
ParallelStoreReader,
1516
)
1617

18+
from .mocks import PicklableStore
19+
1720

1821
ALL_READERS = [BufferedStoreReader, EagerStoreReader, ParallelStoreReader]
1922

@@ -386,3 +389,86 @@ def test_reader_seek_invalid_whence_raises(ReaderClass):
386389

387390
with pytest.raises(ValueError):
388391
reader.seek(0, 3)
392+
393+
394+
# =============================================================================
395+
# Pickling tests
396+
# =============================================================================
397+
398+
399+
@pytest.mark.parametrize("ReaderClass", ALL_READERS)
400+
def test_reader_pickle_roundtrip(ReaderClass):
401+
"""Reader can be pickled and unpickled."""
402+
store = PicklableStore()
403+
store.put("test.txt", b"hello world")
404+
405+
reader = ReaderClass(store, "test.txt")
406+
407+
pickled = pickle.dumps(reader)
408+
restored = pickle.loads(pickled)
409+
410+
assert isinstance(restored, ReaderClass)
411+
412+
413+
@pytest.mark.parametrize("ReaderClass", ALL_READERS)
414+
def test_reader_pickle_preserves_path(ReaderClass):
415+
"""Unpickled reader preserves the file path."""
416+
store = PicklableStore()
417+
store.put("test.txt", b"hello world")
418+
419+
reader = ReaderClass(store, "test.txt")
420+
restored = pickle.loads(pickle.dumps(reader))
421+
422+
assert restored._path == "test.txt"
423+
424+
425+
@pytest.mark.parametrize("ReaderClass", ALL_READERS)
426+
def test_reader_pickle_restored_is_functional(ReaderClass):
427+
"""Restored reader can read data."""
428+
store = PicklableStore()
429+
store.put("test.txt", b"hello world")
430+
431+
reader = ReaderClass(store, "test.txt")
432+
restored = pickle.loads(pickle.dumps(reader))
433+
434+
# Should be able to read data
435+
assert restored.read(5) == b"hello"
436+
assert restored.read(6) == b" world"
437+
438+
439+
@pytest.mark.parametrize("ReaderClass", ALL_READERS)
440+
def test_reader_pickle_preserves_position(ReaderClass):
441+
"""Unpickled reader preserves the current position."""
442+
store = PicklableStore()
443+
store.put("test.txt", b"hello world")
444+
445+
reader = ReaderClass(store, "test.txt")
446+
reader.read(5) # Move position to 5
447+
assert reader.tell() == 5
448+
449+
restored = pickle.loads(pickle.dumps(reader))
450+
451+
assert restored.tell() == 5
452+
assert restored.read(6) == b" world"
453+
454+
455+
@pytest.mark.parametrize("ReaderClass", ALL_READERS)
456+
def test_reader_pickle_multiple_protocols(ReaderClass):
457+
"""Pickling works with different pickle protocols.
458+
459+
Note: EagerStoreReader uses BytesIO which requires protocol >= 2.
460+
"""
461+
store = PicklableStore()
462+
store.put("test.txt", b"hello world")
463+
464+
reader = ReaderClass(store, "test.txt")
465+
466+
# BytesIO (used by EagerStoreReader) requires protocol >= 2
467+
min_protocol = 2 if ReaderClass == EagerStoreReader else 0
468+
469+
for protocol in range(min_protocol, pickle.HIGHEST_PROTOCOL + 1):
470+
pickled = pickle.dumps(reader, protocol=protocol)
471+
restored = pickle.loads(pickled)
472+
473+
restored.seek(0)
474+
assert restored.read() == b"hello world"

0 commit comments

Comments
 (0)