Skip to content

Commit 7052e18

Browse files
global xml context
1 parent 52243d4 commit 7052e18

3 files changed

Lines changed: 107 additions & 20 deletions

File tree

energyml-utils/example/attic/compare_inmem_n_stream.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,13 @@ def time_comparison(
140140
print(f" • Overall speedup: {speedup_factor:.2f}x faster\n")
141141

142142

143+
def recompute_rels(epc_file_path: str):
144+
with EpcStreamReader(
145+
epc_file_path=epc_file_path, enable_parallel_rels=True, rels_update_mode=RelsUpdateMode.UPDATE_ON_CLOSE
146+
) as reader:
147+
pass # Just open and close to trigger rels computation on close
148+
149+
143150
if __name__ == "__main__":
144151
logging.basicConfig(level=logging.DEBUG)
145152

@@ -150,11 +157,13 @@ def time_comparison(
150157
# output_folder="rc/performance_results",
151158
# )
152159

153-
time_comparison(
154-
filepath=sys.argv[1] if len(sys.argv) > 1 else "rc/epc/80wells_surf.epc", output_folder="rc/performance_results"
155-
)
160+
# time_comparison(
161+
# filepath=sys.argv[1] if len(sys.argv) > 1 else "rc/epc/80wells_surf.epc", output_folder="rc/performance_results"
162+
# )
156163

157164
# time_comparison(
158165
# filepath=sys.argv[1] if len(sys.argv) > 1 else "wip/failingData/fix/sample_mini_firp_201_norels_with_media.epc",
159166
# output_folder="rc/performance_results",
160167
# )
168+
169+
recompute_rels("C:/Users/Cryptaro/Downloads/Galaxy384-[[Output] EPC file pointset extraction].epc")
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
"""
2+
Test for parsing.
3+
4+
To test : edit _read_energyml_xml_bytes_as_class in serialization.py :
5+
6+
__ENV__IMPROVEMENT__ = "__ENV__IMPROVEMENT__"
7+
"__ENV__IMPROVEMENT_LXML__" = ""__ENV__IMPROVEMENT_LXML__""
8+
9+
if os.environ.get(__ENV__IMPROVEMENT__, "0") == "0":
10+
if os.environ.get("__ENV__IMPROVEMENT_LXML__", "0") == "1":
11+
parser = XmlParser(config=config, handler=LxmlEventHandler)
12+
else:
13+
parser = XmlParser(config=config)
14+
else:
15+
if os.environ.get("__ENV__IMPROVEMENT_LXML__", "0") == "1":
16+
parser = XmlParser(config=config, context=GLOBAL_XML_CONTEXT, handler=LxmlEventHandler)
17+
else:
18+
parser = XmlParser(config=config, context=GLOBAL_XML_CONTEXT)
19+
20+
"""
21+
22+
import logging
23+
import os
24+
import sys
25+
import time
26+
from typing import Optional
27+
28+
from energyml.utils.epc import Epc
29+
30+
31+
def reexport_in_memory_par_read(filepath: str, output_folder: Optional[str] = None):
32+
is_opti = os.environ.get("__ENV__IMPROVEMENT__", "0") == "1"
33+
34+
suffix = "opti" if is_opti else "std"
35+
if os.environ.get("__ENV__IMPROVEMENT_LXML__", "0") == "1":
36+
suffix += "_lxml"
37+
38+
path_in_memory = filepath.replace(".epc", f"_parsing_imp_xml_{suffix}.epc")
39+
if output_folder:
40+
os.makedirs(output_folder, exist_ok=True)
41+
path_in_memory = f"{output_folder}/{path_in_memory.split('/')[-1]}"
42+
epc = Epc.read_file(epc_file_path=filepath, read_rels_from_files=False, read_parallel=True, recompute_rels=False)
43+
44+
if os.path.exists(path_in_memory):
45+
os.remove(path_in_memory)
46+
epc.export_file(path_in_memory, parallel=True)
47+
48+
49+
def time_test(f: callable, **kwargs):
50+
print(f"⏳ Testing {f.__name__}...")
51+
start = time.perf_counter()
52+
f(**kwargs)
53+
elapsed_inmem = time.perf_counter() - start
54+
# results.append(("In-Memory (Epc)", elapsed_inmem))
55+
print(f" ✓ Completed in {elapsed_inmem:.3f}s\n")
56+
return ("In-Memory (Epc)", elapsed_inmem)
57+
58+
59+
if __name__ == "__main__":
60+
logging.basicConfig(level=logging.DEBUG)
61+
62+
os.environ["__ENV__IMPROVEMENT__"] = "0"
63+
os.environ["__ENV__IMPROVEMENT_LXML__"] = "0"
64+
65+
time_test(
66+
reexport_in_memory_par_read,
67+
filepath=sys.argv[1] if len(sys.argv) > 1 else "rc/epc/80wells_surf.epc",
68+
output_folder="results",
69+
)
70+
71+
os.environ["__ENV__IMPROVEMENT__"] = "1"
72+
time_test(
73+
reexport_in_memory_par_read,
74+
filepath=sys.argv[1] if len(sys.argv) > 1 else "rc/epc/80wells_surf.epc",
75+
output_folder="results",
76+
)
77+
78+
os.environ["__ENV__IMPROVEMENT__"] = "1"
79+
os.environ["__ENV__IMPROVEMENT_LXML__"] = "1"
80+
time_test(
81+
reexport_in_memory_par_read,
82+
filepath=sys.argv[1] if len(sys.argv) > 1 else "rc/epc/80wells_surf.epc",
83+
output_folder="results",
84+
)

energyml-utils/src/energyml/utils/serialization.py

Lines changed: 11 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,13 @@
4242
ENERGYML_NAMESPACES,
4343
)
4444

45+
from xsdata.formats.dataclass.parsers.handlers import LxmlEventHandler
46+
47+
GLOBAL_XML_CONTEXT = XmlContext(
48+
# element_name_generator=text.camel_case,
49+
# attribute_name_generator=text.kebab_case
50+
)
51+
4552

4653
class JSON_VERSION(Enum):
4754
XSDATA = "XSDATA"
@@ -65,7 +72,7 @@ def _read_energyml_xml_bytes_as_class(
6572
fail_on_unknown_attributes=fail_on_unknown_attributes,
6673
# process_xinclude=True,
6774
)
68-
parser = XmlParser(config=config)
75+
parser = XmlParser(config=config, context=GLOBAL_XML_CONTEXT, handler=LxmlEventHandler)
6976
try:
7077
return parser.from_bytes(file, obj_class)
7178
except ParserError as e:
@@ -81,11 +88,6 @@ def _read_energyml_xml_bytes_as_class(
8188

8289

8390
def read_energyml_xml_tree(file: etree, obj_type: Optional[type] = None) -> Any:
84-
# if obj_type is None:
85-
# obj_type = get_class_from_name(get_class_name_from_xml(file))
86-
# parser = XmlParser(handler=XmlEventHandler)
87-
# # parser = XmlParser(handler=LxmlEventHandler)
88-
# return parser.parse(file, obj_type)
8991
return read_energyml_xml_bytes(etree.tostring(file, encoding="utf8"))
9092

9193

@@ -155,7 +157,7 @@ def _read_energyml_json_bytes_as_class(file: bytes, json_version: JSON_VERSION,
155157
# fail_on_unknown_attributes=False,
156158
# process_xinclude=True,
157159
)
158-
parser = JsonParser(config=config)
160+
parser = JsonParser(config=config, context=GLOBAL_XML_CONTEXT)
159161
try:
160162
return parser.from_bytes(file, obj_class)
161163
except ParserError as e:
@@ -269,12 +271,8 @@ def serialize_xml(obj, check_obj_prefixed_classes: bool = True) -> str:
269271
# logging.debug(f"[1] Serializing object of type {type(obj)}")
270272
obj = as_obj_prefixed_class_if_possible(obj) if check_obj_prefixed_classes else obj
271273
# logging.debug(f"[2] Serializing object of type {type(obj)}")
272-
context = XmlContext(
273-
# element_name_generator=text.camel_case,
274-
# attribute_name_generator=text.kebab_case
275-
)
276274
serializer_config = SerializerConfig(indent=" ")
277-
serializer = XmlSerializer(context=context, config=serializer_config)
275+
serializer = XmlSerializer(context=GLOBAL_XML_CONTEXT, config=serializer_config)
278276
# res = serializer.render(obj)
279277
res = serializer.render(obj, ns_map=ENERGYML_NAMESPACES)
280278
# logging.debug(f"[3] Serialized XML with meta namespace : {obj.Meta.namespace}: {serialize_json(obj)}")
@@ -286,12 +284,8 @@ def serialize_json(
286284
) -> str:
287285
obj = as_obj_prefixed_class_if_possible(obj) if check_obj_prefixed_classes else obj
288286
if json_version == JSON_VERSION.XSDATA:
289-
context = XmlContext(
290-
# element_name_generator=text.camel_case,
291-
# attribute_name_generator=text.kebab_case
292-
)
293287
serializer_config = SerializerConfig(indent=" ")
294-
serializer = JsonSerializer(context=context, config=serializer_config)
288+
serializer = JsonSerializer(context=GLOBAL_XML_CONTEXT, config=serializer_config)
295289
return serializer.render(obj)
296290
elif json_version == JSON_VERSION.OSDU_OFFICIAL:
297291
return json.dumps(to_json_dict(obj), indent=4, sort_keys=True)

0 commit comments

Comments
 (0)