developmentseed
diff --git a/‎src/obspec_utils/aiohttp.py‎
Lines changed: 56 additions & 0 deletions b/‎src/obspec_utils/aiohttp.py‎
Lines changed: 56 additions & 0 deletions
diff --git a/‎src/obspec_utils/cache.py‎
Lines changed: 11 additions & 2 deletions b/‎src/obspec_utils/cache.py‎
Lines changed: 11 additions & 2 deletions
diff --git a/‎src/obspec_utils/obspec.py‎
Lines changed: 32 additions & 30 deletions b/‎src/obspec_utils/obspec.py‎
Lines changed: 32 additions & 30 deletions
diff --git a/‎src/obspec_utils/splitting.py‎
Lines changed: 33 additions & 51 deletions b/‎src/obspec_utils/splitting.py‎
Lines changed: 33 additions & 51 deletions
@@ -571,5 +571,61 @@ def get_ranges(
             self.get_ranges_async(path, starts=starts, ends=ends, lengths=lengths)
         )
 
+    # --- Head methods ---
+
+    async def _do_head_async(
+        self,
+        session: aiohttp.ClientSession,
+        path: str,
+    ) -> ObjectMeta:
+        """Internal method that performs the actual HEAD request."""
+        url = self._build_url(path)
+        request_headers = {} if self._session else dict(self.headers)
+
+        async with session.head(url, headers=request_headers) as response:
+            response.raise_for_status()
+            return self._parse_meta_from_headers(path, dict(response.headers))
+
+    async def head_async(self, path: str) -> ObjectMeta:
+        """
+        Get file metadata asynchronously via HEAD request.
+
+        Parameters
+        ----------
+        path
+            Path to the file relative to base_url.
+
+        Returns
+        -------
+        ObjectMeta
+            File metadata including size, last_modified, e_tag, etc.
+        """
+        if self._session is not None:
+            return await self._do_head_async(self._session, path)
+
+        # Fallback: create a temporary session for this request
+        async with aiohttp.ClientSession(
+            timeout=self.timeout, headers=self.headers
+        ) as session:
+            return await self._do_head_async(session, path)
+
+    def head(self, path: str) -> ObjectMeta:
+        """
+        Get file metadata synchronously via HEAD request.
+
+        This wraps the async implementation for convenience.
+
+        Parameters
+        ----------
+        path
+            Path to the file relative to base_url.
+
+        Returns
+        -------
+        ObjectMeta
+            File metadata including size, last_modified, e_tag, etc.
+        """
+        return asyncio.run(self.head_async(path))
+
 
 __all__ = ["AiohttpStore", "AiohttpGetResult", "AiohttpGetResultAsync"]
@@ -18,7 +18,7 @@
 if TYPE_CHECKING:
     from collections.abc import Buffer
 
-    from obspec import GetOptions, GetResult, GetResultAsync
+    from obspec import GetOptions, GetResult, GetResultAsync, ObjectMeta
 
 
 class CachingReadableStore(ReadableStore):
@@ -88,7 +88,8 @@ def __init__(self, store: ReadableStore, max_size: int = 256 * 1024 * 1024) -> N
             Any object implementing the full read interface: [Get][obspec.Get],
             [GetAsync][obspec.GetAsync], [GetRange][obspec.GetRange],
             [GetRangeAsync][obspec.GetRangeAsync], [GetRanges][obspec.GetRanges],
-            and [GetRangesAsync][obspec.GetRangesAsync].
+            [GetRangesAsync][obspec.GetRangesAsync], [Head][obspec.Head],
+            and [HeadAsync][obspec.HeadAsync].
         max_size
             Maximum cache size in bytes. Default: 256 MB.
         """
@@ -281,5 +282,13 @@ async def get_ranges_async(
             path, starts=starts, ends=ends, lengths=lengths
         )
 
+    def head(self, path: str) -> ObjectMeta:
+        """Get file metadata (delegates to underlying store)."""
+        return self._store.head(path)
+
+    async def head_async(self, path: str) -> ObjectMeta:
+        """Get file metadata async (delegates to underlying store)."""
+        return await self._store.head_async(path)
+
 
 __all__ = ["CachingReadableStore"]
@@ -11,12 +11,22 @@
     GetRangeAsync,
     GetRanges,
     GetRangesAsync,
+    Head,
+    HeadAsync,
 )
 
 
 @runtime_checkable
 class ReadableStore(
-    Get, GetAsync, GetRange, GetRangeAsync, GetRanges, GetRangesAsync, Protocol
+    Get,
+    GetAsync,
+    GetRange,
+    GetRangeAsync,
+    GetRanges,
+    GetRangesAsync,
+    Head,
+    HeadAsync,
+    Protocol,
 ):
     """
     Full read interface for transparent store wrappers.
@@ -31,6 +41,7 @@ class ReadableStore(
     - [Get][obspec.Get] / [GetAsync][obspec.GetAsync]: Download entire files
     - [GetRange][obspec.GetRange] / [GetRangeAsync][obspec.GetRangeAsync]: Download byte ranges
     - [GetRanges][obspec.GetRanges] / [GetRangesAsync][obspec.GetRangesAsync]: Download multiple ranges
+    - [Head][obspec.Head] / [HeadAsync][obspec.HeadAsync]: Get file metadata (size, etag, etc.)
 
     Note: This is a flat composition of obspec protocols, not a hierarchical tier.
     For parsers with specific requirements, compose your own protocols directly
@@ -166,11 +177,12 @@ class BufferedStoreReader:
     [ParallelStoreReader][obspec_utils.obspec.ParallelStoreReader] : Uses parallel requests with LRU caching for sparse access.
     """
 
-    class Store(Get, GetRange, Protocol):
+    class Store(Get, GetRange, Head, Protocol):
         """
         Store protocol required by BufferedStoreReader.
 
-        Combines [Get][obspec.Get] and [GetRange][obspec.GetRange] from obspec.
+        Combines [Get][obspec.Get], [GetRange][obspec.GetRange], and
+        [Head][obspec.Head] from obspec.
         """
 
         pass
@@ -204,10 +216,9 @@ def __init__(
         self._buffer_start = 0
 
     def _get_size(self) -> int:
-        """Lazily fetch the file size via a get() call."""
+        """Lazily fetch the file size via a head() call."""
         if self._size is None:
-            result = self._store.get(self._path)
-            self._size = result.meta["size"]
+            self._size = self._store.head(self._path)["size"]
         return self._size
 
     def read(self, size: int = -1, /) -> bytes:
@@ -345,8 +356,7 @@ class EagerStoreReader:
     By default, the file is fetched using parallel range requests via
     `get_ranges()`, which can significantly improve load time for large files.
     The defaults (12 MB request size, max 18 concurrent requests) are tuned for
-    cloud storage. If the store supports the `Head` protocol, the file size
-    will be determined automatically via a HEAD request.
+    cloud storage. The file size is determined automatically via a HEAD request.
 
     The parallel fetching strategy is based on Icechunk's approach:
     https://github.com/earth-mover/icechunk/blob/main/icechunk/src/storage/mod.rs
@@ -376,13 +386,12 @@ class EagerStoreReader:
     [ParallelStoreReader][obspec_utils.obspec.ParallelStoreReader] : Uses parallel requests with LRU caching for sparse access.
     """
 
-    class Store(Get, GetRanges, Protocol):
+    class Store(Get, GetRanges, Head, Protocol):
         """
         Store protocol required by EagerStoreReader.
 
-        Combines [Get][obspec.Get] and [GetRanges][obspec.GetRanges] from obspec.
-        Optionally, the store may implement [Head][obspec.Head] for automatic
-        file size detection.
+        Combines [Get][obspec.Get], [GetRanges][obspec.GetRanges], and
+        [Head][obspec.Head] from obspec.
         """
 
         pass
@@ -403,18 +412,18 @@ def __init__(
         Parameters
         ----------
         store
-            Any object implementing [Get][obspec.Get] and [GetRanges][obspec.GetRanges].
-            Optionally implements [Head][obspec.Head] for automatic file size detection.
+            Any object implementing [Get][obspec.Get], [GetRanges][obspec.GetRanges],
+            and [Head][obspec.Head].
         path
             The path to the file within the store.
         request_size
             Target size for each parallel range request in bytes. Default is 12 MB,
             tuned for cloud storage throughput. The file will be divided into
             parts of this size and fetched using `get_ranges()`.
         file_size
-            File size in bytes. If not provided, the reader will attempt to get
-            the size via `store.head()` if the store supports [Head][obspec.Head].
-            If the size cannot be determined, falls back to a single `get()` request.
+            File size in bytes. If not provided, the size is determined via
+            `store.head()`. Pass this to skip the HEAD request if you already
+            know the file size.
         max_concurrent_requests
             Maximum number of parallel range requests. Default is 18. If the file
             would require more requests than this, request sizes are increased to
@@ -425,14 +434,7 @@ def __init__(
 
         # Determine file size if not provided
         if file_size is None:
-            if hasattr(store, "head") and callable(store.head):
-                file_size = store.head(path)["size"]
-            else:
-                # Fall back to single request if we can't determine size
-                result = store.get(path)
-                data = bytes(result.buffer())
-                self._buffer = io.BytesIO(data)
-                return
+            file_size = store.head(path)["size"]
 
         # Handle empty files
         if file_size == 0:
@@ -538,11 +540,12 @@ class ParallelStoreReader:
     [EagerStoreReader][obspec_utils.obspec.EagerStoreReader] : Loads entire file into memory for fast random access.
     """
 
-    class Store(Get, GetRanges, Protocol):
+    class Store(Get, GetRanges, Head, Protocol):
         """
         Store protocol required by ParallelStoreReader.
 
-        Combines [Get][obspec.Get] and [GetRanges][obspec.GetRanges] from obspec.
+        Combines [Get][obspec.Get], [GetRanges][obspec.GetRanges], and
+        [Head][obspec.Head] from obspec.
         """
 
         pass
@@ -581,10 +584,9 @@ def __init__(
         self._cache: OrderedDict[int, bytes] = OrderedDict()
 
     def _get_size(self) -> int:
-        """Lazily fetch the file size via a get() call."""
+        """Lazily fetch the file size via a head() call."""
         if self._size is None:
-            result = self._store.get(self._path)
-            self._size = result.meta["size"]
+            self._size = self._store.head(self._path)["size"]
         return self._size
 
     def _get_chunks(self, chunk_indices: list[int]) -> dict[int, bytes]:
 
@@ -16,7 +16,7 @@
 if TYPE_CHECKING:
     from collections.abc import Buffer
 
-    from obspec import GetOptions, GetResult, GetResultAsync
+    from obspec import GetOptions, GetResult, GetResultAsync, ObjectMeta
 
 
 class SplittingReadableStore(ReadableStore):
@@ -62,10 +62,6 @@ class SplittingReadableStore(ReadableStore):
     The parallel fetching strategy is based on Icechunk's approach:
     https://github.com/earth-mover/icechunk/blob/main/icechunk/src/storage/mod.rs
 
-    **File size detection**: The wrapper attempts to determine file size via
-    head() if the store supports it. If not available, it falls back to a
-    single get() request (no splitting).
-
     Examples
     --------
     Basic usage:
@@ -123,7 +119,8 @@ def __init__(
             Any object implementing the full read interface: [Get][obspec.Get],
             [GetAsync][obspec.GetAsync], [GetRange][obspec.GetRange],
             [GetRangeAsync][obspec.GetRangeAsync], [GetRanges][obspec.GetRanges],
-            and [GetRangesAsync][obspec.GetRangesAsync].
+            [GetRangesAsync][obspec.GetRangesAsync], [Head][obspec.Head],
+            and [HeadAsync][obspec.HeadAsync].
         request_size
             Target size for each parallel range request. Default: 12 MB.
         max_concurrent_requests
@@ -151,26 +148,6 @@ def __getattr__(self, name: str) -> Any:
             )
         return getattr(self._store, name)
 
-    def _get_file_size(self, path: str) -> int | None:
-        """Try to get file size via head(), return None if not available."""
-        if hasattr(self._store, "head") and callable(self._store.head):
-            try:
-                return self._store.head(path)["size"]
-            except Exception:
-                return None
-        return None
-
-    async def _get_file_size_async(self, path: str) -> int | None:
-        """Async version of _get_file_size."""
-        if hasattr(self._store, "head_async") and callable(self._store.head_async):
-            try:
-                result = await self._store.head_async(path)
-                return result["size"]
-            except Exception:
-                return None
-        # Fall back to sync head if available
-        return self._get_file_size(path)
-
     def _compute_ranges(self, file_size: int) -> tuple[list[int], list[int]] | None:
         """Compute start positions and lengths for parallel fetching.
 
@@ -216,40 +193,37 @@ async def _wrap_as_get_result_async(self, path: str, data: bytes) -> GetResultAs
     def get(self, path: str, *, options: GetOptions | None = None) -> GetResult:
         """Get file, using parallel fetching if beneficial.
 
-        If the file size can be determined and the file is large enough to
-        benefit from splitting, fetches via parallel get_ranges(). Otherwise
-        falls back to a single get() request.
+        If the file is large enough to benefit from splitting, fetches via
+        parallel get_ranges(). Otherwise falls back to a single get() request.
         """
-        file_size = self._get_file_size(path)
+        file_size = self.head(path)["size"]
+        ranges = self._compute_ranges(file_size)
 
-        if file_size is not None:
-            ranges = self._compute_ranges(file_size)
-            if ranges is not None:
-                starts, lengths = ranges
-                results = self._store.get_ranges(path, starts=starts, lengths=lengths)
-                data = b"".join(bytes(part) for part in results)
-                return self._wrap_as_get_result(path, data)
+        if ranges is not None:
+            starts, lengths = ranges
+            results = self._store.get_ranges(path, starts=starts, lengths=lengths)
+            data = b"".join(bytes(part) for part in results)
+            return self._wrap_as_get_result(path, data)
 
-        # Fall back to regular get
+        # Fall back to regular get (file too small for splitting)
         return self._store.get(path, options=options)
 
     async def get_async(
         self, path: str, *, options: GetOptions | None = None
     ) -> GetResultAsync:
         """Async get, using parallel fetching if beneficial."""
-        file_size = await self._get_file_size_async(path)
-
-        if file_size is not None:
-            ranges = self._compute_ranges(file_size)
-            if ranges is not None:
-                starts, lengths = ranges
-                results = await self._store.get_ranges_async(
-                    path, starts=starts, lengths=lengths
-                )
-                data = b"".join(bytes(part) for part in results)
-                return await self._wrap_as_get_result_async(path, data)
-
-        # Fall back to regular get_async
+        file_size = (await self.head_async(path))["size"]
+        ranges = self._compute_ranges(file_size)
+
+        if ranges is not None:
+            starts, lengths = ranges
+            results = await self._store.get_ranges_async(
+                path, starts=starts, lengths=lengths
+            )
+            data = b"".join(bytes(part) for part in results)
+            return await self._wrap_as_get_result_async(path, data)
+
+        # Fall back to regular get_async (file too small for splitting)
         return await self._store.get_async(path, options=options)
 
     # Pass through range methods unchanged - caller already sized appropriately
@@ -302,5 +276,13 @@ async def get_ranges_async(
             path, starts=starts, ends=ends, lengths=lengths
         )
 
+    def head(self, path: str) -> ObjectMeta:
+        """Get file metadata (delegates to underlying store)."""
+        return self._store.head(path)
+
+    async def head_async(self, path: str) -> ObjectMeta:
+        """Get file metadata async (delegates to underlying store)."""
+        return await self._store.head_async(path)
+
 
 __all__ = ["SplittingReadableStore"]