Skip to content

Commit db19916

Browse files
fix computeRels in updateOnClose for epc_stream. Now supports internal media detection in content_type
1 parent 882b515 commit db19916

4 files changed

Lines changed: 288 additions & 66 deletions

File tree

energyml-utils/example/main_stream_sample.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import os
2+
import shutil
23
import sys
34
import logging
45
from energyml.utils.epc_stream import EpcStreamReader, RelsUpdateMode
@@ -661,6 +662,19 @@ def recompute_rels(path: str):
661662
EpcStreamReader(epc_file_path=path, enable_parallel_rels=True, rels_update_mode=RelsUpdateMode.UPDATE_ON_CLOSE)
662663

663664

665+
def recompute_rels_change_name(path: str):
666+
path_reshaped = path.replace(".epc", "_reshaped.epc")
667+
path_reshaped_seq = path.replace(".epc", "_reshaped_seq.epc")
668+
shutil.copy(path, path_reshaped)
669+
shutil.copy(path, path_reshaped_seq)
670+
EpcStreamReader(
671+
epc_file_path=path_reshaped, enable_parallel_rels=True, rels_update_mode=RelsUpdateMode.UPDATE_ON_CLOSE
672+
)
673+
EpcStreamReader(
674+
epc_file_path=path_reshaped_seq, enable_parallel_rels=False, rels_update_mode=RelsUpdateMode.UPDATE_ON_CLOSE
675+
)
676+
677+
664678
if __name__ == "__main__":
665679
logging.basicConfig(level=logging.DEBUG)
666680

@@ -670,5 +684,8 @@ def recompute_rels(path: str):
670684
# test_create_epc_v2("wip/test_create.epc")
671685
# test_create_epc_v3_with_different_external_files("wip/test_create_v3.epc")
672686

673-
recompute_rels(sys.argv[1] if len(sys.argv) > 1 else "wip/failingData/fix/S-PASS-1-EARTHMODEL_ONLY.epc")
674-
recompute_rels(sys.argv[1] if len(sys.argv) > 1 else "wip/failingData/fix/S-PASS-1-GEOMODEL.epc")
687+
# recompute_rels_change_name(sys.argv[1] if len(sys.argv) > 1 else "wip/failingData/fix/S-PASS-1-EARTHMODEL_ONLY.epc")
688+
# recompute_rels_change_name(sys.argv[1] if len(sys.argv) > 1 else "wip/failingData/fix/S-PASS-1-GEOMODEL.epc")
689+
recompute_rels_change_name(
690+
sys.argv[1] if len(sys.argv) > 1 else "wip/failingData/fix/sample_mini_firp_201_norels_with_media.epc"
691+
)

energyml-utils/src/energyml/utils/constants.py

Lines changed: 129 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,7 @@ class OptimizedRegex:
211211
# TODO: RELS_CONTENT_TYPE may be incorrect or not well named, needs review
212212
RELS_CONTENT_TYPE = "application/vnd.openxmlformats-package.core-properties+xml"
213213
RELS_FOLDER_NAME = "_rels"
214+
CORE_PROPERTIES_FOLDER_NAME = "docProps"
214215

215216
primitives = (bool, str, int, float, type(None))
216217

@@ -225,6 +226,20 @@ class MimeType(Enum):
225226
RELS = "application/vnd.openxmlformats-package.relationships+xml"
226227
CORE_PROPERTIES = "application/vnd.openxmlformats-package.core-properties+xml"
227228
EXTENDED_CORE_PROPERTIES = "application/x-extended-core-properties+xml"
229+
JPEG = "image/jpeg"
230+
PNG = "image/png"
231+
TIFF = "image/tiff"
232+
GIF = "image/gif"
233+
SVG = "image/svg+xml"
234+
DOC = "application/msword"
235+
DOCX = "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
236+
XLSX = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
237+
XML = "application/xml"
238+
JSON = "application/json"
239+
TXT = "text/plain"
240+
MARKDOWN = "text/markdown"
241+
HTML = "text/html"
242+
ZIP = "application/zip"
228243

229244
def __str__(self):
230245
return self.value
@@ -282,6 +297,120 @@ class RawFile:
282297
content: Optional[BytesIO] = field(default=None)
283298

284299

300+
# ===================================
301+
# MIME TYPE MAPPINGS
302+
# ===================================
303+
304+
# Primary mapping: MimeType enum → file extension
305+
MIME_TYPE_TO_EXTENSION: dict[MimeType, str] = {
306+
MimeType.CSV: "csv",
307+
MimeType.HDF5: "h5",
308+
MimeType.PARQUET: "parquet",
309+
MimeType.PDF: "pdf",
310+
MimeType.RELS: "rels",
311+
MimeType.CORE_PROPERTIES: "xml",
312+
MimeType.EXTENDED_CORE_PROPERTIES: "xml",
313+
MimeType.JPEG: "jpg",
314+
MimeType.PNG: "png",
315+
MimeType.TIFF: "tiff",
316+
MimeType.GIF: "gif",
317+
MimeType.SVG: "svg",
318+
MimeType.DOC: "doc",
319+
MimeType.DOCX: "docx",
320+
MimeType.XLSX: "xlsx",
321+
MimeType.XML: "xml",
322+
MimeType.JSON: "json",
323+
MimeType.TXT: "txt",
324+
MimeType.MARKDOWN: "md",
325+
MimeType.HTML: "html",
326+
MimeType.ZIP: "zip",
327+
}
328+
329+
# Alternative MIME type strings (aliases and variants)
330+
MIME_TYPE_ALIASES: dict[str, MimeType] = {
331+
"application/parquet": MimeType.PARQUET,
332+
"application/vnd.apache.parquet": MimeType.PARQUET,
333+
"text/xml": MimeType.XML,
334+
"image/jpg": MimeType.JPEG,
335+
}
336+
337+
# Alternative file extensions
338+
EXTENSION_ALIASES: dict[str, str] = {
339+
"hdf5": "h5",
340+
"jpeg": "jpg",
341+
"tif": "tiff",
342+
"markdown": "md",
343+
"htm": "html",
344+
}
345+
346+
347+
def mime_type_to_file_extension(mime_type: str) -> Optional[str]:
348+
"""
349+
Convert MIME type to file extension using the MimeType enum and aliases.
350+
351+
Args:
352+
mime_type: MIME type string (case-insensitive)
353+
354+
Returns:
355+
File extension without leading dot, or None if not found
356+
357+
Examples:
358+
>>> mime_type_to_file_extension("text/csv")
359+
'csv'
360+
>>> mime_type_to_file_extension("application/parquet")
361+
'parquet'
362+
"""
363+
if not mime_type:
364+
return None
365+
366+
mime_type_lower = mime_type.lower()
367+
368+
# Try to find in MimeType enum
369+
for mime_enum in MimeType:
370+
if mime_enum.value.lower() == mime_type_lower:
371+
return MIME_TYPE_TO_EXTENSION.get(mime_enum)
372+
373+
# Try aliases
374+
mime_enum = MIME_TYPE_ALIASES.get(mime_type_lower)
375+
if mime_enum:
376+
return MIME_TYPE_TO_EXTENSION.get(mime_enum)
377+
378+
return None
379+
380+
381+
def file_extension_to_mime_type(extension: str) -> Optional[str]:
382+
"""
383+
Convert file extension to MIME type using the MimeType enum.
384+
385+
Args:
386+
extension: File extension with or without leading dot (case-insensitive)
387+
388+
Returns:
389+
MIME type string, or None if not found
390+
391+
Examples:
392+
>>> file_extension_to_mime_type("csv")
393+
'text/csv'
394+
>>> file_extension_to_mime_type(".json")
395+
'application/json'
396+
"""
397+
if not extension:
398+
return None
399+
400+
# Remove leading dot if present
401+
ext_lower = extension.lstrip(".").lower()
402+
403+
# Normalize through aliases first
404+
ext_normalized = EXTENSION_ALIASES.get(ext_lower, ext_lower)
405+
406+
# Find the MimeType that matches this extension
407+
for mime_enum, ext in MIME_TYPE_TO_EXTENSION.items():
408+
if ext == ext_normalized:
409+
return mime_enum.value
410+
411+
return None
412+
413+
285414
# ===================================
286415
# OPTIMIZED UTILITY FUNCTIONS
287416
# ===================================
@@ -499,54 +628,6 @@ def extract_uuid_from_string(s: str) -> Optional[str]:
499628
return None
500629

501630

502-
def mime_type_to_file_extension(mime_type: str) -> Optional[str]:
503-
"""Convert MIME type to file extension"""
504-
if not mime_type:
505-
return None
506-
507-
mime_type_lower = mime_type.lower()
508-
509-
# Use dict for faster lookup than if/elif chain
510-
mime_to_ext = {
511-
"application/x-parquet": "parquet",
512-
"application/parquet": "parquet",
513-
"application/vnd.apache.parquet": "parquet",
514-
"application/x-hdf5": "h5",
515-
"text/csv": "csv",
516-
"application/vnd.openxmlformats-package.relationships+xml": "rels",
517-
"application/pdf": "pdf",
518-
"application/xml": "xml",
519-
"text/xml": "xml",
520-
"application/json": "json",
521-
"application/vnd.openxmlformats-package.core-properties+xml": "xml",
522-
"application/x-extended-core-properties+xml": "xml",
523-
}
524-
525-
return mime_to_ext.get(mime_type_lower)
526-
527-
528-
def file_extension_to_mime_type(extension: str) -> Optional[str]:
529-
"""Convert file extension to MIME type"""
530-
if not extension:
531-
return None
532-
533-
ext_lower = extension.lower()
534-
535-
# Use dict for faster lookup than if/elif chain
536-
ext_to_mime = {
537-
"parquet": "application/x-parquet",
538-
"h5": "application/x-hdf5",
539-
"hdf5": "application/x-hdf5",
540-
"csv": "text/csv",
541-
"rels": "application/vnd.openxmlformats-package.relationships+xml",
542-
"pdf": "application/pdf",
543-
"xml": "application/xml",
544-
"json": "application/json",
545-
}
546-
547-
return ext_to_mime.get(ext_lower)
548-
549-
550631
# ===================================
551632
# PATH UTILITIES
552633
# ===================================

0 commit comments

Comments
 (0)