Skip to content

Commit 9f73fb7

Browse files
dat is not set to csv anymore
1 parent af13b5b commit 9f73fb7

2 files changed

Lines changed: 14 additions & 12 deletions

File tree

energyml-utils/src/energyml/utils/data/datasets_io.py

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -600,9 +600,7 @@ def read_dataset(
600600
isinstance(source, str) and (source.lower().endswith(".parquet") or source.lower().endswith(".pqt"))
601601
):
602602
file_reader = ParquetFileReader()
603-
elif "csv" in mimetype or (
604-
isinstance(source, str) and (source.lower().endswith(".csv") or source.lower().endswith(".dat"))
605-
):
603+
elif "csv" in mimetype or (isinstance(source, str) and (source.lower().endswith(".csv"))):
606604
file_reader = CSVFileReader()
607605
else:
608606
file_reader = HDF5FileReader() # default is hdf5
@@ -756,11 +754,9 @@ def _register_default_handlers(self, max_open_files: int) -> None:
756754
"""Register all available handlers based on installed dependencies."""
757755
# HDF5 Handler
758756
if __H5PY_MODULE_EXISTS__:
759-
self.register_handler([".h5", ".hdf5", ".dat"], lambda: HDF5ArrayHandler()) # dat for Galaxy compatibility
757+
self.register_handler([".h5", ".hdf5"], lambda: HDF5ArrayHandler()) # dat for Galaxy compatibility
760758
else:
761-
self.register_handler(
762-
[".h5", ".hdf5", ".dat"], lambda: MockHDF5ArrayHandler()
763-
) # dat for Galaxy compatibility
759+
self.register_handler([".h5", ".hdf5"], lambda: MockHDF5ArrayHandler()) # dat for Galaxy compatibility
764760

765761
# Parquet Handler
766762
if __PARQUET_MODULE_EXISTS__:
@@ -770,7 +766,7 @@ def _register_default_handlers(self, max_open_files: int) -> None:
770766

771767
# CSV Handler - always available (uses Python's csv module)
772768
if __CSV_MODULE_EXISTS__:
773-
self.register_handler([".csv", ".txt", ".dat"], lambda: CSVArrayHandler())
769+
self.register_handler([".csv", ".txt"], lambda: CSVArrayHandler())
774770

775771
# LAS Handler
776772
if __LASIO_MODULE_EXISTS__:
@@ -980,7 +976,7 @@ def list_arrays(self, source: Union[BytesIO, str, Any]) -> List[str]:
980976
def can_handle_file(self, file_path: str) -> bool:
981977
"""Check if this handler can process the file."""
982978
ext = os.path.splitext(file_path)[1].lower()
983-
return ext in [".h5", ".hdf5", ".dat"] # dat for Galaxy compatibility
979+
return ext in [".h5", ".hdf5"] # dat for Galaxy compatibility
984980

985981
else:
986982

@@ -1026,7 +1022,7 @@ def list_arrays(self, source: Union[BytesIO, str, Any]) -> List[str]:
10261022
raise MissingExtraInstallation(extra_name="hdf5")
10271023

10281024
def can_handle_file(self, file_path: str) -> bool:
1029-
return os.path.splitext(file_path)[1].lower() in [".h5", ".hdf5", ".dat"] # dat for Galaxy compatibility
1025+
return os.path.splitext(file_path)[1].lower() in [".h5", ".hdf5"] # dat for Galaxy compatibility
10301026

10311027

10321028
# Parquet Handler
@@ -1223,7 +1219,7 @@ def can_handle_file(self, file_path: str) -> bool:
12231219
if __CSV_MODULE_EXISTS__:
12241220

12251221
class CSVArrayHandler(ExternalArrayHandler):
1226-
"""Handler for CSV files (.csv, .txt, .dat)."""
1222+
"""Handler for CSV files (.csv, .txt)."""
12271223

12281224
def __init__(self, max_open_files: int = 3):
12291225
super().__init__(max_open_files=max_open_files)
@@ -1307,7 +1303,7 @@ def list_arrays(self, source: Union[BytesIO, str, Any]) -> List[str]:
13071303
def can_handle_file(self, file_path: str) -> bool:
13081304
"""Check if this handler can process the file."""
13091305
ext = os.path.splitext(file_path)[1].lower()
1310-
return ext in [".csv", ".txt", ".dat"]
1306+
return ext in [".csv", ".txt"]
13111307

13121308

13131309
# LAS Handler

energyml-utils/tests/test_array_handlers.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,12 @@ def test_default_handler_from_registry_is_h5():
2727
assert isinstance(handler, HDF5ArrayHandler), "Default handler for .h5 should be HDF5ArrayHandler"
2828

2929

30+
def test_default_dat_handler_from_registry_is_h5():
31+
"""Test that the default handler for .h5 is HDF5ArrayHandler."""
32+
handler = get_handler_registry().get_handler_for_file(".dat") # no extension, should return default .h5 handler
33+
assert isinstance(handler, HDF5ArrayHandler), "Default handler for .h5 should be HDF5ArrayHandler"
34+
35+
3036
def test_hdf5_array_handler_read_write():
3137
"""Test HDF5ArrayHandler read/write and file closure."""
3238
arr = np.arange(6).reshape(2, 3)

0 commit comments

Comments
 (0)