1111 GetRangeAsync ,
1212 GetRanges ,
1313 GetRangesAsync ,
14+ Head ,
15+ HeadAsync ,
1416)
1517
1618
1719@runtime_checkable
1820class ReadableStore (
19- Get , GetAsync , GetRange , GetRangeAsync , GetRanges , GetRangesAsync , Protocol
21+ Get ,
22+ GetAsync ,
23+ GetRange ,
24+ GetRangeAsync ,
25+ GetRanges ,
26+ GetRangesAsync ,
27+ Head ,
28+ HeadAsync ,
29+ Protocol ,
2030):
2131 """
2232 Full read interface for transparent store wrappers.
@@ -31,6 +41,7 @@ class ReadableStore(
3141 - [Get][obspec.Get] / [GetAsync][obspec.GetAsync]: Download entire files
3242 - [GetRange][obspec.GetRange] / [GetRangeAsync][obspec.GetRangeAsync]: Download byte ranges
3343 - [GetRanges][obspec.GetRanges] / [GetRangesAsync][obspec.GetRangesAsync]: Download multiple ranges
44+ - [Head][obspec.Head] / [HeadAsync][obspec.HeadAsync]: Get file metadata (size, etag, etc.)
3445
3546 Note: This is a flat composition of obspec protocols, not a hierarchical tier.
3647 For parsers with specific requirements, compose your own protocols directly
@@ -166,11 +177,12 @@ class BufferedStoreReader:
166177 [ParallelStoreReader][obspec_utils.obspec.ParallelStoreReader] : Uses parallel requests with LRU caching for sparse access.
167178 """
168179
169- class Store (Get , GetRange , Protocol ):
180+ class Store (Get , GetRange , Head , Protocol ):
170181 """
171182 Store protocol required by BufferedStoreReader.
172183
173- Combines [Get][obspec.Get] and [GetRange][obspec.GetRange] from obspec.
184+ Combines [Get][obspec.Get], [GetRange][obspec.GetRange], and
185+ [Head][obspec.Head] from obspec.
174186 """
175187
176188 pass
@@ -204,10 +216,9 @@ def __init__(
204216 self ._buffer_start = 0
205217
206218 def _get_size (self ) -> int :
207- """Lazily fetch the file size via a get () call."""
219+ """Lazily fetch the file size via a head () call."""
208220 if self ._size is None :
209- result = self ._store .get (self ._path )
210- self ._size = result .meta ["size" ]
221+ self ._size = self ._store .head (self ._path )["size" ]
211222 return self ._size
212223
213224 def read (self , size : int = - 1 , / ) -> bytes :
@@ -345,8 +356,7 @@ class EagerStoreReader:
345356 By default, the file is fetched using parallel range requests via
346357 `get_ranges()`, which can significantly improve load time for large files.
347358 The defaults (12 MB request size, max 18 concurrent requests) are tuned for
348- cloud storage. If the store supports the `Head` protocol, the file size
349- will be determined automatically via a HEAD request.
359+ cloud storage. The file size is determined automatically via a HEAD request.
350360
351361 The parallel fetching strategy is based on Icechunk's approach:
352362 https://github.com/earth-mover/icechunk/blob/main/icechunk/src/storage/mod.rs
@@ -376,13 +386,12 @@ class EagerStoreReader:
376386 [ParallelStoreReader][obspec_utils.obspec.ParallelStoreReader] : Uses parallel requests with LRU caching for sparse access.
377387 """
378388
379- class Store (Get , GetRanges , Protocol ):
389+ class Store (Get , GetRanges , Head , Protocol ):
380390 """
381391 Store protocol required by EagerStoreReader.
382392
383- Combines [Get][obspec.Get] and [GetRanges][obspec.GetRanges] from obspec.
384- Optionally, the store may implement [Head][obspec.Head] for automatic
385- file size detection.
393+ Combines [Get][obspec.Get], [GetRanges][obspec.GetRanges], and
394+ [Head][obspec.Head] from obspec.
386395 """
387396
388397 pass
@@ -403,18 +412,18 @@ def __init__(
403412 Parameters
404413 ----------
405414 store
406- Any object implementing [Get][obspec.Get] and [GetRanges][obspec.GetRanges].
407- Optionally implements [Head][obspec.Head] for automatic file size detection .
415+ Any object implementing [Get][obspec.Get], [GetRanges][obspec.GetRanges],
416+ and [Head][obspec.Head].
408417 path
409418 The path to the file within the store.
410419 request_size
411420 Target size for each parallel range request in bytes. Default is 12 MB,
412421 tuned for cloud storage throughput. The file will be divided into
413422 parts of this size and fetched using `get_ranges()`.
414423 file_size
415- File size in bytes. If not provided, the reader will attempt to get
416- the size via `store.head()` if the store supports [Head][obspec.Head].
417- If the size cannot be determined, falls back to a single `get()` request .
424+ File size in bytes. If not provided, the size is determined via
425+ `store.head()`. Pass this to skip the HEAD request if you already
426+ know the file size .
418427 max_concurrent_requests
419428 Maximum number of parallel range requests. Default is 18. If the file
420429 would require more requests than this, request sizes are increased to
@@ -425,14 +434,7 @@ def __init__(
425434
426435 # Determine file size if not provided
427436 if file_size is None :
428- if hasattr (store , "head" ) and callable (store .head ):
429- file_size = store .head (path )["size" ]
430- else :
431- # Fall back to single request if we can't determine size
432- result = store .get (path )
433- data = bytes (result .buffer ())
434- self ._buffer = io .BytesIO (data )
435- return
437+ file_size = store .head (path )["size" ]
436438
437439 # Handle empty files
438440 if file_size == 0 :
@@ -538,11 +540,12 @@ class ParallelStoreReader:
538540 [EagerStoreReader][obspec_utils.obspec.EagerStoreReader] : Loads entire file into memory for fast random access.
539541 """
540542
541- class Store (Get , GetRanges , Protocol ):
543+ class Store (Get , GetRanges , Head , Protocol ):
542544 """
543545 Store protocol required by ParallelStoreReader.
544546
545- Combines [Get][obspec.Get] and [GetRanges][obspec.GetRanges] from obspec.
547+ Combines [Get][obspec.Get], [GetRanges][obspec.GetRanges], and
548+ [Head][obspec.Head] from obspec.
546549 """
547550
548551 pass
@@ -581,10 +584,9 @@ def __init__(
581584 self ._cache : OrderedDict [int , bytes ] = OrderedDict ()
582585
583586 def _get_size (self ) -> int :
584- """Lazily fetch the file size via a get () call."""
587+ """Lazily fetch the file size via a head () call."""
585588 if self ._size is None :
586- result = self ._store .get (self ._path )
587- self ._size = result .meta ["size" ]
589+ self ._size = self ._store .head (self ._path )["size" ]
588590 return self ._size
589591
590592 def _get_chunks (self , chunk_indices : list [int ]) -> dict [int , bytes ]:
0 commit comments