|
1 | 1 | """Tests for CachingReadableStore.""" |
2 | 2 |
|
| 3 | +import pickle |
3 | 4 | import threading |
4 | 5 | from concurrent.futures import ThreadPoolExecutor |
5 | 6 |
|
@@ -389,3 +390,189 @@ def test_forwards_unknown_attributes(self): |
389 | 390 | # This tests that __getattr__ forwards correctly |
390 | 391 | assert hasattr(cached, "put") # MemoryStore has put |
391 | 392 | assert hasattr(cached, "delete") # MemoryStore has delete |
| 393 | + |
| 394 | + |
| 395 | +class PicklableStore: |
| 396 | + """A simple picklable store for testing pickle support. |
| 397 | +
|
| 398 | + MemoryStore from obstore is Rust-backed and not picklable. |
| 399 | + This pure-Python store allows testing CachingReadableStore's pickle support. |
| 400 | + """ |
| 401 | + |
| 402 | + def __init__(self, data: dict[str, bytes] | None = None): |
| 403 | + self._data = data or {} |
| 404 | + |
| 405 | + def put(self, path: str, data: bytes) -> None: |
| 406 | + self._data[path] = data |
| 407 | + |
| 408 | + def get(self, path: str, *, options=None): |
| 409 | + return _PicklableGetResult(self._data[path]) |
| 410 | + |
| 411 | + async def get_async(self, path: str, *, options=None): |
| 412 | + return _PicklableGetResultAsync(self._data[path]) |
| 413 | + |
| 414 | + def get_range( |
| 415 | + self, |
| 416 | + path: str, |
| 417 | + *, |
| 418 | + start: int, |
| 419 | + end: int | None = None, |
| 420 | + length: int | None = None, |
| 421 | + ): |
| 422 | + data = self._data[path] |
| 423 | + if end is not None: |
| 424 | + return data[start:end] |
| 425 | + elif length is not None: |
| 426 | + return data[start : start + length] |
| 427 | + return data[start:] |
| 428 | + |
| 429 | + async def get_range_async( |
| 430 | + self, |
| 431 | + path: str, |
| 432 | + *, |
| 433 | + start: int, |
| 434 | + end: int | None = None, |
| 435 | + length: int | None = None, |
| 436 | + ): |
| 437 | + return self.get_range(path, start=start, end=end, length=length) |
| 438 | + |
| 439 | + def get_ranges(self, path: str, *, starts, ends=None, lengths=None): |
| 440 | + if ends is not None: |
| 441 | + return [self._data[path][s:e] for s, e in zip(starts, ends)] |
| 442 | + elif lengths is not None: |
| 443 | + return [ |
| 444 | + self._data[path][start : start + length] |
| 445 | + for start, length in zip(starts, lengths) |
| 446 | + ] |
| 447 | + raise ValueError("Must provide ends or lengths") |
| 448 | + |
| 449 | + async def get_ranges_async(self, path: str, *, starts, ends=None, lengths=None): |
| 450 | + return self.get_ranges(path, starts=starts, ends=ends, lengths=lengths) |
| 451 | + |
| 452 | + |
| 453 | +class _PicklableGetResult: |
| 454 | + """Mock GetResult for PicklableStore.""" |
| 455 | + |
| 456 | + def __init__(self, data: bytes): |
| 457 | + self._data = data |
| 458 | + |
| 459 | + def buffer(self): |
| 460 | + return self._data |
| 461 | + |
| 462 | + |
| 463 | +class _PicklableGetResultAsync: |
| 464 | + """Mock async GetResult for PicklableStore.""" |
| 465 | + |
| 466 | + def __init__(self, data: bytes): |
| 467 | + self._data = data |
| 468 | + |
| 469 | + async def buffer_async(self): |
| 470 | + return self._data |
| 471 | + |
| 472 | + |
| 473 | +class TestPickling: |
| 474 | + """Tests for pickling support (needed for multiprocessing/distributed).""" |
| 475 | + |
| 476 | + def test_pickle_roundtrip(self): |
| 477 | + """CachingReadableStore can be pickled and unpickled.""" |
| 478 | + source = PicklableStore() |
| 479 | + source.put("file.txt", b"hello world") |
| 480 | + |
| 481 | + cached = CachingReadableStore(source, max_size=128 * 1024 * 1024) |
| 482 | + |
| 483 | + # Pickle and unpickle |
| 484 | + pickled = pickle.dumps(cached) |
| 485 | + restored = pickle.loads(pickled) |
| 486 | + |
| 487 | + assert isinstance(restored, CachingReadableStore) |
| 488 | + |
| 489 | + def test_pickle_preserves_store_and_max_size(self): |
| 490 | + """Unpickled store preserves underlying store and max_size.""" |
| 491 | + source = PicklableStore() |
| 492 | + source.put("file.txt", b"hello world") |
| 493 | + |
| 494 | + custom_max_size = 64 * 1024 * 1024 |
| 495 | + cached = CachingReadableStore(source, max_size=custom_max_size) |
| 496 | + |
| 497 | + restored = pickle.loads(pickle.dumps(cached)) |
| 498 | + |
| 499 | + # max_size should be preserved |
| 500 | + assert restored._max_size == custom_max_size |
| 501 | + |
| 502 | + # underlying store should work (can fetch data) |
| 503 | + result = restored.get("file.txt") |
| 504 | + assert bytes(result.buffer()) == b"hello world" |
| 505 | + |
| 506 | + def test_pickle_creates_empty_cache(self): |
| 507 | + """Unpickled store has a fresh empty cache.""" |
| 508 | + source = PicklableStore() |
| 509 | + source.put("file.txt", b"hello world") |
| 510 | + source.put("file2.txt", b"more data") |
| 511 | + |
| 512 | + cached = CachingReadableStore(source) |
| 513 | + |
| 514 | + # Populate the cache |
| 515 | + cached.get("file.txt") |
| 516 | + cached.get("file2.txt") |
| 517 | + assert cached.cache_size > 0 |
| 518 | + assert len(cached.cached_paths) == 2 |
| 519 | + |
| 520 | + # Pickle and unpickle |
| 521 | + restored = pickle.loads(pickle.dumps(cached)) |
| 522 | + |
| 523 | + # Restored cache should be empty |
| 524 | + assert restored.cache_size == 0 |
| 525 | + assert len(restored.cached_paths) == 0 |
| 526 | + |
| 527 | + def test_pickle_restored_store_is_functional(self): |
| 528 | + """Restored store can cache new data normally.""" |
| 529 | + source = PicklableStore() |
| 530 | + source.put("file.txt", b"hello world") |
| 531 | + |
| 532 | + cached = CachingReadableStore(source, max_size=100) |
| 533 | + cached.get("file.txt") |
| 534 | + |
| 535 | + restored = pickle.loads(pickle.dumps(cached)) |
| 536 | + |
| 537 | + # Restored store should be able to fetch and cache |
| 538 | + result = restored.get("file.txt") |
| 539 | + assert bytes(result.buffer()) == b"hello world" |
| 540 | + assert "file.txt" in restored.cached_paths |
| 541 | + assert restored.cache_size == len(b"hello world") |
| 542 | + |
| 543 | + def test_pickle_restored_store_lru_works(self): |
| 544 | + """Restored store has working LRU eviction.""" |
| 545 | + source = PicklableStore() |
| 546 | + source.put("file1.txt", b"a" * 100) |
| 547 | + source.put("file2.txt", b"b" * 100) |
| 548 | + source.put("file3.txt", b"c" * 100) |
| 549 | + |
| 550 | + cached = CachingReadableStore(source, max_size=200) |
| 551 | + |
| 552 | + restored = pickle.loads(pickle.dumps(cached)) |
| 553 | + |
| 554 | + # Cache two files |
| 555 | + restored.get("file1.txt") |
| 556 | + restored.get("file2.txt") |
| 557 | + assert restored.cached_paths == ["file1.txt", "file2.txt"] |
| 558 | + |
| 559 | + # Third file should evict first |
| 560 | + restored.get("file3.txt") |
| 561 | + assert restored.cached_paths == ["file2.txt", "file3.txt"] |
| 562 | + |
| 563 | + def test_pickle_multiple_protocols(self): |
| 564 | + """Pickling works with different pickle protocols.""" |
| 565 | + source = PicklableStore() |
| 566 | + source.put("file.txt", b"hello world") |
| 567 | + |
| 568 | + cached = CachingReadableStore(source) |
| 569 | + cached.get("file.txt") |
| 570 | + |
| 571 | + # Test all available protocols |
| 572 | + for protocol in range(pickle.HIGHEST_PROTOCOL + 1): |
| 573 | + pickled = pickle.dumps(cached, protocol=protocol) |
| 574 | + restored = pickle.loads(pickled) |
| 575 | + |
| 576 | + assert restored.cache_size == 0 # Fresh cache |
| 577 | + result = restored.get("file.txt") |
| 578 | + assert bytes(result.buffer()) == b"hello world" |
0 commit comments