Skip to content

Commit 41284ab

Browse files
better valdiation result
1 parent c5f11cf commit 41284ab

7 files changed

Lines changed: 109 additions & 5 deletions

File tree

energyml-utils/README.md

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -318,6 +318,26 @@ Validate a folder containing EPC/XML/JSON files:
318318
poetry run validate --file "path/to/your/folder" *> output_logs.json
319319
```
320320

321+
Ignore specific error types (e.g., INFO):
322+
```bash
323+
poetry run validate --file "path/to/file.epc" --ignore-err-type INFO *> output_logs.json
324+
```
325+
326+
Group errors by their class for better organization:
327+
```bash
328+
poetry run validate --file "path/to/file.epc" --group-by-err-class *> output_logs.json
329+
```
330+
331+
Include PRODML version errors in validation (by default they are ignored):
332+
```bash
333+
poetry run validate --file "path/to/file.epc" --ignore-prodml-version-errs *> output_logs.json
334+
```
335+
336+
Combined example with multiple options:
337+
```bash
338+
poetry run validate --file "path/to/file.epc" -i INFO WARNING --group-by-err-class *> output_logs.json
339+
```
340+
321341
### Extract 3D Representations
322342

323343
Extract all representations from an EPC to OBJ files:

energyml-utils/example/tools.py

Lines changed: 47 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import json
55
import os
66
import pathlib
7+
import traceback
78
from typing import Optional, List, Dict, Any
89
import sys
910
from pathlib import Path
@@ -12,14 +13,15 @@
1213
src_path = Path(__file__).parent.parent / "src"
1314
sys.path.insert(0, str(src_path))
1415

15-
from energyml.utils.validation import validate_epc
16+
from energyml.utils.validation import ErrorType, validate_epc
1617

1718
from energyml.utils.constants import get_property_kind_dict_path_as_xml
1819
from energyml.utils.data.datasets_io import CSVFileReader, HDF5FileWriter, ParquetFileWriter, DATFileReader
1920
from energyml.utils.data.mesh import MeshFileFormat, export_multiple_data, export_obj, read_mesh_object
2021
from energyml.utils.epc import Epc, gen_energyml_object_path
2122
from energyml.utils.introspection import (
2223
get_class_from_simple_name,
24+
get_enum_values,
2325
get_module_name_and_type_from_content_or_qualified_type,
2426
random_value_from_class,
2527
search_class_in_module_from_partial_name,
@@ -548,6 +550,26 @@ def validate_files():
548550
parser = argparse.ArgumentParser()
549551
# parser.add_argument("--folder", type=str, help="Input folder")
550552
parser.add_argument("--file", "-f", type=str, help="Input file (json or xml or epc)")
553+
parser.add_argument(
554+
"--ignore-err-type",
555+
"-i",
556+
type=str,
557+
help=f"Error types to ignore. Possible values {get_enum_values(ErrorType)}",
558+
nargs="*",
559+
)
560+
561+
parser.add_argument(
562+
"--ignore-prodml-version-errs",
563+
action="store_false",
564+
dest="ignore_prodml_version_errs",
565+
help="Disable ignoring errors related to Prodml version (by default, these errors are ignored)",
566+
)
567+
568+
parser.add_argument(
569+
"--group-by-err-class",
570+
action="store_true",
571+
help="Group errors by their class (e.g. all validation errors together, all parsing errors together, etc.)",
572+
)
551573

552574
args = parser.parse_args()
553575

@@ -615,14 +637,36 @@ def validate_files():
615637
else:
616638
print(f"File {filename} is NOT a valid EnergyML EPC file: Empty EPC")
617639
except Exception as e:
640+
traceback.print_exc()
618641
print(f"File {filename} is NOT a valid EnergyML EPC file: {e}")
619642

620643
epc = Epc()
621644
epc.energyml_objects = objects
622645

623-
err_json = [err.toJson() for err in validate_epc(epc)]
646+
err_json = [
647+
err.toJson()
648+
for err in validate_epc(epc)
649+
if str(err.error_type).lower() not in (et.lower() for et in (args.ignore_err_type or []))
650+
]
624651

625-
print(json.dumps(err_json, indent=4))
652+
err_json_sorted = sorted(
653+
err_json, key=lambda x: (x["err_class"], x["error_type"], x["object_uuid"] if "object_uuid" in x else "")
654+
)
655+
656+
if args.ignore_prodml_version_errs:
657+
err_json_sorted = [err for err in err_json_sorted if not ("prodml23" in err.get("msg", ""))]
658+
659+
if args.group_by_err_class:
660+
err_json_grouped = {}
661+
for err in err_json_sorted:
662+
err_class = err.get("err_class", "UnknownErrorClass")
663+
if err_class not in err_json_grouped:
664+
err_json_grouped[err_class] = []
665+
err_json_grouped[err_class].append(err)
666+
print(json.dumps(err_json_grouped, indent=4))
667+
else:
668+
# print(json.dumps(err_json, indent=4))
669+
print(json.dumps(err_json_sorted, indent=4))
626670

627671

628672
# def export_wavefront():
41.5 KB
Binary file not shown.

energyml-utils/src/energyml/utils/epc.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -367,7 +367,7 @@ def gen_opc_content_type(self) -> Types:
367367

368368
return ct
369369

370-
@log_timestamp
370+
# @log_timestamp
371371
def export_file(self, path: Optional[str] = None, allowZip64: bool = True) -> None:
372372
"""
373373
Export the epc file. If :param:`path` is None, the epc 'self.epc_file_path' is used
@@ -869,7 +869,7 @@ def write_array(
869869
# Class methods
870870

871871
@classmethod
872-
@log_timestamp
872+
# @log_timestamp
873873
def read_file(cls, epc_file_path: str) -> "Epc":
874874
with open(epc_file_path, "rb") as f:
875875
epc = cls.read_stream(BytesIO(f.read()))

energyml-utils/src/energyml/utils/introspection.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1199,6 +1199,24 @@ def get_obj_title(obj: Any) -> Optional[str]:
11991199
for ck in obj[k].keys():
12001200
if re.match(r"title", ck, re.IGNORECASE):
12011201
return obj[k][ck]
1202+
# search for title or name if not classical citation.title found
1203+
1204+
for k in obj.keys():
1205+
if re.match(r"title", k, re.IGNORECASE):
1206+
return obj[k]
1207+
elif re.match(r"name", k, re.IGNORECASE):
1208+
return obj[k]
1209+
1210+
else:
1211+
# DOR :
1212+
try:
1213+
return getattr(obj, "title")
1214+
except AttributeError:
1215+
# etp resource meta :
1216+
try:
1217+
return getattr(obj, "name")
1218+
except AttributeError:
1219+
pass
12021220
return None
12031221

12041222

@@ -1706,6 +1724,10 @@ def get_all_possible_instanciable_classes_for_attribute(parent_obj: Any, attribu
17061724
return []
17071725

17081726

1727+
def get_enum_values(cls: Any) -> List[str]:
1728+
return cls._member_names_ if is_enum(cls) else []
1729+
1730+
17091731
def _random_value_from_class(
17101732
cls: Any,
17111733
energyml_module_context: List[str],

energyml-utils/src/energyml/utils/validation.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,9 @@ class ErrorType(Enum):
3737
INFO = "info"
3838
WARNING = "warning"
3939

40+
def __str__(self):
41+
return self.value
42+
4043

4144
@dataclass
4245
class ValidationError:
@@ -52,6 +55,7 @@ def toJson(self):
5255
return {
5356
"msg": self.msg,
5457
"error_type": self.error_type.value,
58+
"err_class": self.__class__.__name__,
5559
}
5660

5761
@property

energyml-utils/tests/test_introspection.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -658,6 +658,20 @@ def test_get_obj_title(triangulated_set_no_version, fault_interpretation):
658658
"""Test extracting object title."""
659659
assert get_obj_title(triangulated_set_no_version) == "Test Citation v2.3"
660660
assert get_obj_title(fault_interpretation) == "Test Citation v2.0"
661+
assert get_obj_title(as_dor(fault_interpretation)) == "Test Citation v2.0"
662+
663+
class MockObjWithTitle:
664+
name = "Mock Title"
665+
666+
assert get_obj_title(MockObjWithTitle()) == "Mock Title"
667+
668+
assert get_obj_title({"Title": "Dict Title"}) == "Dict Title"
669+
assert get_obj_title({"title": "Dict Title Lower"}) == "Dict Title Lower"
670+
assert get_obj_title({"what": 42}) is None
671+
assert get_obj_title({"name": "Dict Title Lower"}) == "Dict Title Lower"
672+
673+
# priority to citation.title
674+
assert get_obj_title({"name": "Dict Title Lower", "citation": {"title": "Citation Title"}}) == "Citation Title"
661675

662676

663677
def test_get_obj_type(triangulated_set_no_version, fault_interpretation):

0 commit comments

Comments
 (0)