Skip to content

Commit 5f46a89

Browse files
validation improvement
1 parent 694e6dd commit 5f46a89

3 files changed

Lines changed: 187 additions & 72 deletions

File tree

energyml-utils/example/tools.py

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
import pathlib
77
from typing import Optional, List, Dict, Any
88

9+
from src.energyml.utils.validation import validate_epc
10+
911
from src.energyml.utils.constants import get_property_kind_dict_path_as_xml
1012
from src.energyml.utils.data.datasets_io import CSVFileReader, HDF5FileWriter, ParquetFileWriter, DATFileReader
1113
from src.energyml.utils.data.mesh import MeshFileFormat, export_multiple_data, export_obj, read_mesh_object
@@ -536,6 +538,86 @@ def describe_as_csv():
536538
print("Finished")
537539

538540

541+
def validate_files():
542+
parser = argparse.ArgumentParser()
543+
# parser.add_argument("--folder", type=str, help="Input folder")
544+
parser.add_argument("--file", "-f", type=str, help="Input file (json or xml or epc)")
545+
546+
args = parser.parse_args()
547+
548+
objects = []
549+
550+
if not os.path.exists(args.file):
551+
print(f"File {args.file} does not exist.")
552+
return
553+
elif not os.path.isdir(args.file) and not args.file.lower().endswith((".json", ".xml", ".epc")):
554+
print(f"File {args.file} is not a valid input file (should be a folder or a json/xml/epc file).")
555+
return
556+
elif os.path.isdir(args.file):
557+
for filename in os.listdir(args.file):
558+
f = os.path.join(args.file, filename)
559+
if os.path.isfile(f):
560+
if f.endswith(".json"):
561+
with open(f, "rb") as file:
562+
f_content = file.read()
563+
try:
564+
objs = read_energyml_json_bytes(f_content, JSON_VERSION.OSDU_OFFICIAL)
565+
objects.extend(objs)
566+
except Exception as e:
567+
print(f"File {filename} is NOT a valid EnergyML JSON file: {e}")
568+
elif f.endswith(".xml"):
569+
with open(f, "rb") as file:
570+
f_content = file.read()
571+
try:
572+
obj = read_energyml_xml_bytes(f_content)
573+
objects.append(obj)
574+
except Exception as e:
575+
print(f"File {filename} is NOT a valid EnergyML XML file: {e}")
576+
elif f.endswith(".epc"):
577+
try:
578+
epc = Epc.read_file(f)
579+
if epc is not None:
580+
objects.extend(epc.energyml_objects)
581+
else:
582+
print(f"File {filename} is NOT a valid EnergyML EPC file: Empty EPC")
583+
except Exception as e:
584+
print(f"File {filename} is NOT a valid EnergyML EPC file: {e}")
585+
elif os.path.isfile(args.file):
586+
f = args.file
587+
filename = os.path.basename(f)
588+
if f.endswith(".json"):
589+
with open(f, "rb") as file:
590+
f_content = file.read()
591+
try:
592+
objs = read_energyml_json_bytes(f_content, JSON_VERSION.OSDU_OFFICIAL)
593+
objects.extend(objs)
594+
except Exception as e:
595+
print(f"File {filename} is NOT a valid EnergyML JSON file: {e}")
596+
elif f.endswith(".xml"):
597+
with open(f, "rb") as file:
598+
f_content = file.read()
599+
try:
600+
obj = read_energyml_xml_bytes(f_content)
601+
objects.append(obj)
602+
except Exception as e:
603+
print(f"File {filename} is NOT a valid EnergyML XML file: {e}")
604+
elif f.endswith(".epc"):
605+
try:
606+
epc = Epc.read_file(f)
607+
if epc is not None:
608+
objects.extend(epc.energyml_objects)
609+
else:
610+
print(f"File {filename} is NOT a valid EnergyML EPC file: Empty EPC")
611+
except Exception as e:
612+
print(f"File {filename} is NOT a valid EnergyML EPC file: {e}")
613+
614+
epc = Epc()
615+
epc.energyml_objects = objects
616+
617+
for err in validate_epc(epc):
618+
print(err.toJson())
619+
620+
539621
# def export_wavefront():
540622
# parser = argparse.ArgumentParser()
541623
# parser.add_argument("--epc", "-f", type=str, help="Epc file path")

energyml-utils/pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -133,4 +133,5 @@ generate_data = "example.tools:generate_data"
133133
xml_to_json = "example.tools:xml_to_json"
134134
json_to_xml = "example.tools:json_to_xml"
135135
json_to_epc = "example.tools:json_to_epc"
136-
describe_as_csv = "example.tools:describe_as_csv"
136+
describe_as_csv = "example.tools:describe_as_csv"
137+
validate = "example.tools:validate_files"

energyml-utils/src/energyml/utils/validation.py

Lines changed: 103 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@
33
import re
44
from dataclasses import dataclass, field, Field
55
from enum import Enum
6-
from typing import Any, List
6+
import traceback
7+
from typing import Any, List, Optional
78

89
from .epc import (
910
get_obj_identifier,
@@ -12,6 +13,7 @@
1213
from .introspection import (
1314
get_class_fields,
1415
get_object_attribute,
16+
is_primitive,
1517
search_attribute_matching_type_with_path,
1618
get_object_attribute_no_verif,
1719
get_object_attribute_rgx,
@@ -20,9 +22,11 @@
2022
get_obj_version,
2123
get_content_type_from_class,
2224
get_qualified_type_from_class,
23-
is_enum, get_object_uri,
25+
is_enum,
26+
get_object_uri,
2427
)
2528

29+
2630
class ErrorType(Enum):
2731
CRITICAL = "critical"
2832
DEBUG = "debug"
@@ -52,13 +56,16 @@ class ValidationObjectError(ValidationError):
5256

5357
target_obj: Any = field(default=None)
5458

55-
attribute_dot_path: str = field(default=None)
59+
attribute_dot_path: Optional[str] = field(default=None)
5660

5761
def __str__(self):
5862
return f"{ValidationError.__str__(self)}\n\t{get_obj_identifier(self.target_obj)} : '{self.attribute_dot_path}'"
5963

6064
def toJson(self):
61-
return super().toJson() | {"target_obj": str(get_object_uri(self.target_obj)), "attribute_dot_path": self.attribute_dot_path}
65+
return super().toJson() | {
66+
"target_obj": str(get_object_uri(self.target_obj)),
67+
"attribute_dot_path": self.attribute_dot_path,
68+
}
6269

6370

6471
@dataclass
@@ -69,7 +76,7 @@ def __str__(self):
6976

7077
@dataclass
7178
class MissingEntityError(ValidationObjectError):
72-
missing_uuid: str = field(default=None)
79+
missing_uuid: Optional[str] = field(default=None)
7380

7481
def __str__(self):
7582
return f"{ValidationError.__str__(self)}\n\tMissing entity in {get_obj_identifier(self.target_obj)} at path '{self.attribute_dot_path}'. Missing entity uuid: {self.missing_uuid}"
@@ -122,8 +129,8 @@ def dor_validation(energyml_objects: List[Any]) -> List[ValidationError]:
122129
error_type=ErrorType.CRITICAL,
123130
target_obj=obj,
124131
attribute_dot_path=dor_path,
125-
missing_uuid=dor_uuid
126-
# msg=f"[DOR ERR] has wrong information. Unkown object with uuid '{dor_uuid}'",
132+
missing_uuid=dor_uuid,
133+
msg=f"[DOR ERR] has wrong information. Unknown object with uuid '{dor_uuid}'",
127134
)
128135
)
129136
else:
@@ -133,7 +140,7 @@ def dor_validation(energyml_objects: List[Any]) -> List[ValidationError]:
133140
error_type=ErrorType.CRITICAL,
134141
target_obj=obj,
135142
attribute_dot_path=dor_path,
136-
msg=f"[DOR ERR] has wrong information. Unkown object version '{dor_version}'. "
143+
msg=f"[DOR ERR] has wrong information. Unknown object version '{dor_version}'. "
137144
f"Version must be one of {accessible_version}",
138145
)
139146
)
@@ -201,6 +208,8 @@ def _patterns_validation(obj: Any, root_obj: Any, current_attribute_dot_path: st
201208
"""
202209
error_list = []
203210

211+
if is_primitive(obj):
212+
return error_list
204213
if isinstance(obj, list):
205214
cpt = 0
206215
for val in obj:
@@ -235,82 +244,105 @@ def validate_attribute(value: Any, root_obj: Any, att_field: Field, path: str) -
235244
attribute_dot_path=path,
236245
)
237246
)
247+
elif isinstance(att_field, str):
248+
errs.append(
249+
ValidationObjectError(
250+
error_type=ErrorType.WARNING,
251+
target_obj=root_obj,
252+
attribute_dot_path=path,
253+
msg=f"Attribute '{att_field}' is a string but got value '{value}'",
254+
)
255+
)
238256
elif not is_enum(value): # sometimes enums values fails the validation
239-
min_length = att_field.metadata.get("min_length", None)
240-
max_length = att_field.metadata.get("max_length", None)
241-
pattern = att_field.metadata.get("pattern", None)
242-
min_occurs = att_field.metadata.get("min_occurs", None)
243-
min_inclusive = att_field.metadata.get("min_inclusive", None)
244-
# white_space
245-
246-
if max_length is not None:
247-
length = len(value)
248-
if length > max_length:
249-
errs.append(
250-
ValidationObjectError(
251-
error_type=ErrorType.CRITICAL,
252-
target_obj=root_obj,
253-
attribute_dot_path=path,
254-
msg=f"Max length was {max_length} but found {length}",
255-
)
256-
)
257-
258-
if min_length is not None:
259-
length = len(value)
260-
if length < min_length:
261-
errs.append(
262-
ValidationObjectError(
263-
error_type=ErrorType.CRITICAL,
264-
target_obj=root_obj,
265-
attribute_dot_path=path,
266-
msg=f"Max length was {min_length} but found {length}",
257+
try:
258+
min_length = att_field.metadata.get("min_length", None)
259+
max_length = att_field.metadata.get("max_length", None)
260+
pattern = att_field.metadata.get("pattern", None)
261+
min_occurs = att_field.metadata.get("min_occurs", None)
262+
min_inclusive = att_field.metadata.get("min_inclusive", None)
263+
# white_space
264+
265+
if max_length is not None:
266+
length = len(value)
267+
if length > max_length:
268+
errs.append(
269+
ValidationObjectError(
270+
msg=f"Max length was {max_length} but found {length}",
271+
error_type=ErrorType.CRITICAL,
272+
target_obj=root_obj,
273+
attribute_dot_path=path,
274+
)
267275
)
268-
)
269276

270-
if min_occurs is not None:
271-
if isinstance(value, list) and min_occurs > len(value):
272-
errs.append(
273-
ValidationObjectError(
274-
error_type=ErrorType.CRITICAL,
275-
target_obj=root_obj,
276-
attribute_dot_path=path,
277-
msg=f"Min occurs was {min_occurs} but found {len(value)}",
277+
if min_length is not None:
278+
length = len(value)
279+
if length < min_length:
280+
errs.append(
281+
ValidationObjectError(
282+
msg=f"Max length was {min_length} but found {length}",
283+
error_type=ErrorType.CRITICAL,
284+
target_obj=root_obj,
285+
attribute_dot_path=path,
286+
)
278287
)
279-
)
280288

281-
if min_inclusive is not None:
282-
potential_err = ValidationObjectError(
283-
error_type=ErrorType.CRITICAL,
284-
target_obj=root_obj,
285-
attribute_dot_path=path,
286-
msg=f"Min occurs was {min_inclusive} but found {value}",
287-
)
288-
if isinstance(value, list):
289-
for val in value:
290-
if (isinstance(val, str) and len(val) > min_inclusive) or (
291-
(isinstance(val, int) or isinstance(val, float)) and val > min_inclusive
292-
):
293-
errs.append(potential_err)
294-
295-
if pattern is not None:
296-
if not isinstance(value, list):
297-
testing_value_list = [value]
298-
else:
299-
testing_value_list = value
300-
301-
for v in testing_value_list:
302-
if is_enum(v):
303-
v = v.value
304-
if re.match(pattern, v) is None:
289+
if min_occurs is not None:
290+
if isinstance(value, list) and min_occurs > len(value):
305291
errs.append(
306292
ValidationObjectError(
293+
msg=f"Min occurs was {min_occurs} but found {len(value)}",
307294
error_type=ErrorType.CRITICAL,
308295
target_obj=root_obj,
309296
attribute_dot_path=path,
310-
msg=f"Pattern error. Value '{v}' was supposed to respect pattern '{pattern}'",
311297
)
312298
)
313299

300+
if min_inclusive is not None:
301+
potential_err = ValidationObjectError(
302+
msg=f"Min occurs was {min_inclusive} but found {value}",
303+
error_type=ErrorType.CRITICAL,
304+
target_obj=root_obj,
305+
attribute_dot_path=path,
306+
)
307+
if isinstance(value, list):
308+
for val in value:
309+
if (isinstance(val, str) and len(val) > min_inclusive) or (
310+
(isinstance(val, int) or isinstance(val, float)) and val > min_inclusive
311+
):
312+
errs.append(potential_err)
313+
314+
if pattern is not None:
315+
if not isinstance(value, list):
316+
testing_value_list = [value]
317+
else:
318+
testing_value_list = value
319+
320+
for v in testing_value_list:
321+
if is_enum(v):
322+
v = v.value
323+
if re.match(pattern, v) is None:
324+
errs.append(
325+
ValidationObjectError(
326+
msg=f"Pattern error. Value '{v}' was supposed to respect pattern '{pattern}'",
327+
error_type=ErrorType.CRITICAL,
328+
target_obj=root_obj,
329+
attribute_dot_path=path,
330+
)
331+
)
332+
except Exception as e:
333+
print(f"Error while validating attribute '{att_field}' with value '{value}': {str(e)} for {path}")
334+
errs.append(
335+
ValidationObjectError(
336+
msg=f"Error while validating attribute '{att_field}' with value '{value}': {str(e)}",
337+
error_type=ErrorType.CRITICAL,
338+
target_obj=root_obj,
339+
attribute_dot_path=path,
340+
)
341+
)
342+
traceback.print_exc()
343+
exit(0)
344+
return errs
345+
314346
return errs + _patterns_validation(
315347
obj=value,
316348
root_obj=root_obj,

0 commit comments

Comments
 (0)