Skip to content

Commit cb8b780

Browse files
better "as_dor" function (#12)
- improving @as_dor function and make it work with uris and dict - avoid NaN in json serialization - making @get_class_fields working for C++ proxy objects
1 parent 6113561 commit cb8b780

9 files changed

Lines changed: 232 additions & 39 deletions

File tree

energyml-utils/example/main.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
correct_dor,
2929
)
3030
from src.energyml.utils.xml import *
31-
from src.energyml.utils.data.datasets_io import HDF5FileReader
31+
from src.energyml.utils.data.datasets_io import HDF5FileReader, get_path_in_external_with_path
3232

3333
fi_cit = Citation(
3434
title="An interpretation",
@@ -378,6 +378,7 @@ def test_local_depth_crs():
378378

379379
def test_crs():
380380
from energyml.eml.v2_3.commonv2 import LocalEngineeringCompoundCrs
381+
381382
crs = random_value_from_class(LocalEngineeringCompoundCrs)
382383
print(is_z_reversed(crs))
383384

energyml-utils/example/tools.py

Lines changed: 40 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,19 @@
1010
from src.energyml.utils.data.datasets_io import CSVFileReader, HDF5FileWriter, ParquetFileWriter, DATFileReader
1111
from src.energyml.utils.data.mesh import MeshFileFormat, export_multiple_data
1212
from src.energyml.utils.epc import Epc, gen_energyml_object_path
13-
from src.energyml.utils.introspection import get_class_from_simple_name, random_value_from_class, \
14-
set_attribute_from_path, get_object_attribute, get_qualified_type_from_class, get_content_type_from_class, \
15-
get_object_attribute_rgx, get_direct_dor_list, get_obj_uuid, get_class_from_qualified_type, \
16-
get_object_attribute_or_create
13+
from src.energyml.utils.introspection import (
14+
get_class_from_simple_name,
15+
random_value_from_class,
16+
set_attribute_from_path,
17+
get_object_attribute,
18+
get_qualified_type_from_class,
19+
get_content_type_from_class,
20+
get_object_attribute_rgx,
21+
get_direct_dor_list,
22+
get_obj_uuid,
23+
get_class_from_qualified_type,
24+
get_object_attribute_or_create,
25+
)
1726
from src.energyml.utils.serialization import (
1827
serialize_json,
1928
JSON_VERSION,
@@ -370,14 +379,17 @@ def xml_to_json():
370379
json_content = serialize_json(objs, JSON_VERSION.OSDU_OFFICIAL)
371380
elif args.file.lower().endswith(".epc"):
372381
epc = Epc.read_file(args.file)
382+
# print(epc.energyml_objects)
373383
json_content = (
374384
"[\n"
375385
+ ",".join(list(map(lambda o: serialize_json(o, JSON_VERSION.OSDU_OFFICIAL), epc.energyml_objects)))
376386
+ "]"
377387
)
378388

379389
with open(output_path, "w") as fout:
380-
fout.write(json_content)
390+
# print(json_content)
391+
if json_content is not None:
392+
fout.write(json_content)
381393

382394

383395
def json_to_xml():
@@ -403,6 +415,29 @@ def json_to_xml():
403415
fout.write(xml_content)
404416

405417

418+
def json_to_epc():
419+
parser = argparse.ArgumentParser()
420+
parser.add_argument("--file", "-f", type=str, help="Input File")
421+
parser.add_argument("--out", "-o", type=str, default=None, help=f"Output EPC file")
422+
423+
args = parser.parse_args()
424+
425+
epc = Epc(epc_file_path=args.out)
426+
with open(args.file, "rb") as f:
427+
f_content = f.read()
428+
objs = []
429+
try:
430+
objs = read_energyml_json_bytes(f_content, JSON_VERSION.OSDU_OFFICIAL)
431+
except:
432+
objs = read_energyml_json_bytes(f_content, JSON_VERSION.XSDATA)
433+
434+
dir = pathlib.Path(args.out or args.file).parent.resolve()
435+
for obj in objs:
436+
epc.energyml_objects.append(obj)
437+
438+
epc.export_file(args.out)
439+
440+
406441
def describe_as_csv():
407442
parser = argparse.ArgumentParser()
408443
parser.add_argument("--folder", "-f", type=str, help="Input File")

energyml-utils/pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,4 +128,5 @@ csv_to_dataset = "example.tools:csv_to_dataset"
128128
generate_data = "example.tools:generate_data"
129129
xml_to_json = "example.tools:xml_to_json"
130130
json_to_xml = "example.tools:json_to_xml"
131+
json_to_epc = "example.tools:json_to_epc"
131132
describe_as_csv = "example.tools:describe_as_csv"

energyml-utils/src/energyml/utils/constants.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -305,6 +305,23 @@ def parse_content_or_qualified_type(cqt: str) -> Optional[re.Match[str]]:
305305
return parsed
306306

307307

308+
def content_type_to_qualified_type(ct: str):
309+
parsed = parse_content_or_qualified_type(ct)
310+
return parsed.group("domain") + parsed.group("domainVersion").replace(".", "") + "." + parsed.group("type")
311+
312+
313+
def qualified_type_to_content_type(qt: str):
314+
parsed = parse_content_or_qualified_type(qt)
315+
return (
316+
"application/x-"
317+
+ parsed.group("domain")
318+
+ "+xml;version="
319+
+ re.sub(r"(\d)(\d)", r"\1.\2", parsed.group("domainVersion"))
320+
+ ";type="
321+
+ parsed.group("type")
322+
)
323+
324+
308325
def get_domain_version_from_content_or_qualified_type(cqt: str) -> Optional[str]:
309326
"""
310327
return a version number like "2.2" or "2.0"

energyml-utils/src/energyml/utils/epc.py

Lines changed: 73 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
Keywords1,
3030
TargetMode,
3131
)
32+
from .uri import parse_uri
3233
from xsdata.formats.dataclass.models.generics import DerivedElement
3334

3435
from .constants import (
@@ -39,6 +40,8 @@
3940
RawFile,
4041
EPCRelsRelationshipType,
4142
MimeType,
43+
content_type_to_qualified_type,
44+
qualified_type_to_content_type,
4245
split_identifier,
4346
get_property_kind_dict_path_as_dict,
4447
)
@@ -49,6 +52,7 @@
4952
from .introspection import (
5053
get_class_from_content_type,
5154
get_obj_type,
55+
is_dor,
5256
search_attribute_matching_type,
5357
get_obj_version,
5458
get_obj_uuid,
@@ -65,7 +69,8 @@
6569
set_attribute_from_path,
6670
set_attribute_value,
6771
get_object_attribute,
68-
get_qualified_type_from_class, get_class_fields,
72+
get_qualified_type_from_class,
73+
get_class_fields,
6974
)
7075
from .manager import get_class_pkg, get_class_pkg_version
7176
from .serialization import (
@@ -631,32 +636,74 @@ def as_dor(obj_or_identifier: Any, dor_qualified_type: str = "eml23.DataObjectRe
631636
"""
632637
dor = None
633638
if obj_or_identifier is not None:
634-
if isinstance(obj_or_identifier, str): # is an identifier
635-
cls = get_class_from_qualified_type(dor_qualified_type)
636-
dor = cls()
637-
if len(__CACHE_PROP_KIND_DICT__) == 0:
638-
# update the cache to check if it is a
639-
update_prop_kind_dict_cache()
640-
try:
641-
uuid, version = split_identifier(obj_or_identifier)
642-
if uuid in __CACHE_PROP_KIND_DICT__:
643-
return as_dor(__CACHE_PROP_KIND_DICT__[uuid])
644-
else:
645-
set_attribute_from_path(dor, "uuid", uuid)
646-
set_attribute_from_path(dor, "ObjectVersion", version)
647-
except AttributeError:
648-
logging.error(f"Failed to parse identifier {obj_or_identifier}. DOR will be empty")
639+
cls = get_class_from_qualified_type(dor_qualified_type)
640+
dor = cls()
641+
if isinstance(obj_or_identifier, str): # is an identifier or uri
642+
parsed_uri = parse_uri(obj_or_identifier)
643+
if parsed_uri is not None:
644+
if hasattr(dor, "qualified_type"):
645+
set_attribute_from_path(dor, "qualified_type", parsed_uri.get_qualified_type())
646+
if hasattr(dor, "content_type"):
647+
set_attribute_from_path(
648+
dor, "content_type", qualified_type_to_content_type(parsed_uri.get_qualified_type())
649+
)
650+
set_attribute_from_path(dor, "uuid", parsed_uri.uuid)
651+
if hasattr(dor, "object_version"):
652+
set_attribute_from_path(dor, "version_string", parsed_uri.version)
653+
if hasattr(dor, "version_string"):
654+
set_attribute_from_path(dor, "version_string", parsed_uri.version)
655+
656+
else: # identifier
657+
if len(__CACHE_PROP_KIND_DICT__) == 0:
658+
# update the cache to check if it is a
659+
try:
660+
update_prop_kind_dict_cache()
661+
except FileNotFoundError as e:
662+
logging.error(f"Failed to parse propertykind dict {e}")
663+
try:
664+
uuid, version = split_identifier(obj_or_identifier)
665+
if uuid in __CACHE_PROP_KIND_DICT__:
666+
return as_dor(__CACHE_PROP_KIND_DICT__[uuid])
667+
else:
668+
set_attribute_from_path(dor, "uuid", uuid)
669+
set_attribute_from_path(dor, "ObjectVersion", version)
670+
except AttributeError:
671+
logging.error(f"Failed to parse identifier {obj_or_identifier}. DOR will be empty")
649672
else:
650-
cls = get_class_from_qualified_type(dor_qualified_type)
651-
dor = cls()
652-
if hasattr(dor, "qualified_type"):
653-
set_attribute_from_path(dor, "qualified_type", get_qualified_type_from_class(obj_or_identifier))
654-
if hasattr(dor, "content_type"):
655-
set_attribute_from_path(dor, "content_type", get_content_type_from_class(obj_or_identifier))
656-
657-
set_attribute_from_path(dor, "uuid", get_object_attribute(obj_or_identifier, "uuid"))
658-
set_attribute_from_path(dor, "object_version", get_object_attribute(obj_or_identifier, "ObjectVersion"))
659-
set_attribute_from_path(dor, "title", get_object_attribute(obj_or_identifier, "Citation.Title"))
673+
if is_dor(obj_or_identifier):
674+
# If it is a dor, we create a dor conversionif hasattr(dor, "qualified_type"):
675+
if hasattr(dor, "qualified_type"):
676+
if hasattr(obj_or_identifier, "qualified_type"):
677+
dor.qualified_type = get_object_attribute(obj_or_identifier, "qualified_type")
678+
elif hasattr(obj_or_identifier, "content_type"):
679+
dor.qualified_type = content_type_to_qualified_type(
680+
get_object_attribute(obj_or_identifier, "content_type")
681+
)
682+
683+
if hasattr(dor, "content_type"):
684+
if hasattr(obj_or_identifier, "qualified_type"):
685+
dor.content_type = qualified_type_to_content_type(
686+
get_object_attribute(obj_or_identifier, "qualified_type")
687+
)
688+
elif hasattr(obj_or_identifier, "content_type"):
689+
dor.content_type = get_object_attribute(obj_or_identifier, "content_type")
690+
691+
set_attribute_from_path(dor, "title", get_object_attribute(obj_or_identifier, "Title"))
692+
693+
else:
694+
if hasattr(dor, "qualified_type"):
695+
set_attribute_from_path(dor, "qualified_type", get_qualified_type_from_class(obj_or_identifier))
696+
if hasattr(dor, "content_type"):
697+
set_attribute_from_path(dor, "content_type", get_content_type_from_class(obj_or_identifier))
698+
699+
set_attribute_from_path(dor, "title", get_object_attribute(obj_or_identifier, "Citation.Title"))
700+
701+
set_attribute_from_path(dor, "uuid", get_obj_uuid(obj_or_identifier))
702+
703+
if hasattr(dor, "object_version"):
704+
set_attribute_from_path(dor, "object_version", get_obj_version(obj_or_identifier))
705+
if hasattr(dor, "version_string"):
706+
set_attribute_from_path(dor, "version_string", get_obj_version(obj_or_identifier))
660707

661708
return dor
662709

energyml-utils/src/energyml/utils/introspection.py

Lines changed: 34 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import re
88
import sys
99
import typing
10-
from dataclasses import Field
10+
from dataclasses import Field, field
1111
from enum import Enum
1212
from importlib import import_module
1313
from types import ModuleType
@@ -311,8 +311,18 @@ def get_class_fields(cls: Union[type, Any]) -> Dict[str, Field]:
311311
try:
312312
return cls.__dataclass_fields__
313313
except AttributeError:
314-
# print(list_function_parameters_with_types(cls.__new__, True))
315-
return list_function_parameters_with_types(cls.__new__, True)
314+
try:
315+
# print(list_function_parameters_with_types(cls.__new__, True))
316+
return list_function_parameters_with_types(cls.__new__, True)
317+
except AttributeError as e:
318+
# For not working types like proxy type for C++ binding
319+
res = {}
320+
for a_name, a_type in inspect.getmembers(cls):
321+
# print(f"{a_name} => {inspect.getmembers(a_type)}")
322+
if not a_name.startswith("_") and not callable(getattr(cls, a_name, None)):
323+
res[a_name] = field()
324+
325+
return res
316326

317327

318328
def get_class_attributes(cls: Union[type, Any]) -> List[str]:
@@ -529,7 +539,7 @@ def get_object_attribute_advanced(obj: Any, attr_dot_path: str) -> Any:
529539
return value
530540

531541

532-
def get_object_attribute_no_verif(obj: Any, attr_name: str) -> Any:
542+
def get_object_attribute_no_verif(obj: Any, attr_name: str, default: Optional[Any] = None) -> Any:
533543
"""
534544
Return the value of the attribute named after param :param:`attr_name` without verification (may raise an exception
535545
if it doesn't exists).
@@ -540,11 +550,19 @@ def get_object_attribute_no_verif(obj: Any, attr_name: str) -> Any:
540550
:return:
541551
"""
542552
if isinstance(obj, list):
543-
return obj[int(attr_name)]
553+
if int(attr_name) < len(obj):
554+
return obj[int(attr_name)] or default
555+
else:
556+
raise AttributeError(obj, name=attr_name)
544557
elif isinstance(obj, dict):
545-
return obj[attr_name]
558+
if attr_name in obj:
559+
return obj.get(attr_name, default)
560+
else:
561+
raise AttributeError(obj, name=attr_name)
546562
else:
547-
return getattr(obj, attr_name, None)
563+
return (
564+
getattr(obj, attr_name) or default
565+
) # we did not used the "default" of getattr to keep raising AttributeError
548566

549567

550568
def get_object_attribute_rgx(obj: Any, attr_dot_path_rgx: str) -> Any:
@@ -599,6 +617,14 @@ def class_match_rgx(
599617
return False
600618

601619

620+
def is_dor(obj: any) -> bool:
621+
return (
622+
"dataobjectreference" in get_obj_type(obj).lower()
623+
or get_object_attribute(obj, "ContentType") is not None
624+
or get_object_attribute(obj, "QualifiedType") is not None
625+
)
626+
627+
602628
def search_attribute_matching_type_with_path(
603629
obj: Any,
604630
type_rgx: str,
@@ -1016,6 +1042,7 @@ def get_obj_version(obj: Any) -> str:
10161042
return get_object_attribute_no_verif(obj, "version_string")
10171043
except Exception:
10181044
logging.error(f"Error with {type(obj)}")
1045+
return None
10191046
# raise e
10201047

10211048

energyml-utils/src/energyml/utils/serialization.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
# SPDX-License-Identifier: Apache-2.0
33
import json
44
import logging
5+
import numpy as np
56
import traceback
67
from enum import Enum
78
from io import BytesIO
@@ -448,6 +449,9 @@ def _to_json_dict_fn(
448449
"""
449450
if obj is None:
450451
return None
452+
elif isinstance(obj, float) and np.isnan(obj):
453+
print("NaN found")
454+
return None
451455
elif is_enum(obj):
452456
return obj.value
453457
# return {
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
from src.energyml.utils.constants import content_type_to_qualified_type, qualified_type_to_content_type
2+
3+
4+
def test_content_type_to_qualified_type():
5+
assert (
6+
content_type_to_qualified_type("application/x-resqml+xml;version=2.0;type=obj_FaultInterpretation")
7+
== "resqml20.obj_FaultInterpretation"
8+
)
9+
10+
11+
def test_qualified_type_to_content_type():
12+
assert (
13+
qualified_type_to_content_type("resqml20.obj_FaultInterpretation")
14+
== "application/x-resqml+xml;version=2.0;type=obj_FaultInterpretation"
15+
)

0 commit comments

Comments
 (0)