|
17 | 17 | from functools import wraps |
18 | 18 | from io import BytesIO |
19 | 19 | from typing import List, Any, Union, Dict, Optional |
| 20 | +import numpy as np |
| 21 | +from xsdata.formats.dataclass.models.generics import DerivedElement |
20 | 22 |
|
21 | 23 | from energyml.opc.opc import ( |
22 | 24 | CoreProperties, |
23 | 25 | Relationships, |
24 | 26 | Types, |
25 | | - Default, |
26 | 27 | Relationship, |
27 | 28 | Override, |
28 | | - Created, |
29 | | - Creator, |
30 | | - Identifier, |
31 | | - Keywords1, |
32 | 29 | ) |
33 | 30 | from energyml.utils.epc_utils import ( |
34 | 31 | gen_core_props_path, |
|
38 | 35 | create_h5_external_relationship, |
39 | 36 | ) |
40 | 37 | from energyml.utils.storage_interface import DataArrayMetadata, EnergymlStorageInterface, ResourceMetadata |
41 | | -import numpy as np |
42 | 38 | from energyml.utils.uri import Uri, parse_uri |
43 | | -from xsdata.formats.dataclass.models.generics import DerivedElement |
44 | 39 |
|
45 | 40 | from energyml.utils.constants import ( |
46 | | - RELS_CONTENT_TYPE, |
47 | 41 | EpcExportVersion, |
48 | 42 | RawFile, |
49 | 43 | EPCRelsRelationshipType, |
|
62 | 56 | get_obj_uuid, |
63 | 57 | get_content_type_from_class, |
64 | 58 | get_direct_dor_list, |
65 | | - epoch_to_date, |
66 | | - epoch, |
67 | 59 | gen_uuid, |
68 | 60 | get_obj_identifier, |
69 | 61 | get_object_attribute, |
|
80 | 72 | from energyml.utils.xml import is_energyml_content_type |
81 | 73 |
|
82 | 74 |
|
| 75 | +class EnergymlObjectCollection: |
| 76 | + """ |
| 77 | + A collection that maintains both list semantics (for backward compatibility) |
| 78 | + and dict-based lookups (for O(1) performance) for energyml objects. |
| 79 | +
|
| 80 | + This allows existing code using .append() to work while providing efficient |
| 81 | + get_object_by_identifier() and get_object_by_uuid() operations. |
| 82 | + """ |
| 83 | + |
| 84 | + def __init__(self, objects: Optional[List[Any]] = None): |
| 85 | + self._by_identifier: Dict[str, Any] = {} |
| 86 | + self._by_uri: Dict[str, Any] = {} |
| 87 | + self._by_uuid: Dict[str, List[Any]] = {} |
| 88 | + self._objects_list: List[Any] = [] |
| 89 | + |
| 90 | + if objects: |
| 91 | + for obj in objects: |
| 92 | + self.append(obj) |
| 93 | + |
| 94 | + def append(self, obj: Any) -> None: |
| 95 | + """Add an object to the collection (list-compatible method).""" |
| 96 | + identifier = get_obj_identifier(obj) |
| 97 | + uri = str(get_obj_uri(obj)) |
| 98 | + uuid = get_obj_uuid(obj) |
| 99 | + |
| 100 | + # Check if object already exists by identifier |
| 101 | + if identifier in self._by_identifier: |
| 102 | + # Replace existing object |
| 103 | + existing = self._by_identifier[identifier] |
| 104 | + idx = self._objects_list.index(existing) |
| 105 | + self._objects_list[idx] = obj |
| 106 | + |
| 107 | + # Clean up old URI mapping |
| 108 | + old_uri = str(get_obj_uri(existing)) |
| 109 | + if old_uri in self._by_uri: |
| 110 | + del self._by_uri[old_uri] |
| 111 | + |
| 112 | + # Clean up old UUID mapping |
| 113 | + old_uuid = get_obj_uuid(existing) |
| 114 | + if old_uuid in self._by_uuid and existing in self._by_uuid[old_uuid]: |
| 115 | + self._by_uuid[old_uuid].remove(existing) |
| 116 | + if not self._by_uuid[old_uuid]: |
| 117 | + del self._by_uuid[old_uuid] |
| 118 | + else: |
| 119 | + # Add new object |
| 120 | + self._objects_list.append(obj) |
| 121 | + |
| 122 | + # Update all indices |
| 123 | + self._by_identifier[identifier] = obj |
| 124 | + self._by_uri[uri] = obj |
| 125 | + |
| 126 | + if uuid not in self._by_uuid: |
| 127 | + self._by_uuid[uuid] = [] |
| 128 | + if obj not in self._by_uuid[uuid]: |
| 129 | + self._by_uuid[uuid].append(obj) |
| 130 | + |
| 131 | + def remove(self, obj: Any) -> None: |
| 132 | + """Remove an object from the collection (list-compatible method).""" |
| 133 | + identifier = get_obj_identifier(obj) |
| 134 | + |
| 135 | + if identifier in self._by_identifier: |
| 136 | + stored_obj = self._by_identifier[identifier] |
| 137 | + self._objects_list.remove(stored_obj) |
| 138 | + |
| 139 | + # Clean up all indices |
| 140 | + del self._by_identifier[identifier] |
| 141 | + |
| 142 | + uri = str(get_obj_uri(stored_obj)) |
| 143 | + if uri in self._by_uri: |
| 144 | + del self._by_uri[uri] |
| 145 | + |
| 146 | + uuid = get_obj_uuid(stored_obj) |
| 147 | + if uuid in self._by_uuid and stored_obj in self._by_uuid[uuid]: |
| 148 | + self._by_uuid[uuid].remove(stored_obj) |
| 149 | + if not self._by_uuid[uuid]: |
| 150 | + del self._by_uuid[uuid] |
| 151 | + |
| 152 | + def get_by_identifier(self, identifier: Union[str, Uri]) -> Optional[Any]: |
| 153 | + """Get object by identifier (O(1) lookup).""" |
| 154 | + # Try identifier lookup first |
| 155 | + obj = self._by_identifier.get(str(identifier)) |
| 156 | + if obj is not None: |
| 157 | + return obj |
| 158 | + |
| 159 | + # Try URI lookup |
| 160 | + return self._by_uri.get(str(identifier)) |
| 161 | + |
| 162 | + def get_by_uuid(self, uuid: str) -> List[Any]: |
| 163 | + """Get all objects with this UUID (O(1) lookup).""" |
| 164 | + return self._by_uuid.get(uuid, []) |
| 165 | + |
| 166 | + def __iter__(self): |
| 167 | + """Iterate over objects in insertion order.""" |
| 168 | + return iter(self._objects_list) |
| 169 | + |
| 170 | + def __len__(self) -> int: |
| 171 | + """Get number of objects.""" |
| 172 | + return len(self._objects_list) |
| 173 | + |
| 174 | + def __getitem__(self, index: int) -> Any: |
| 175 | + """Support indexing (e.g., energyml_objects[0]).""" |
| 176 | + return self._objects_list[index] |
| 177 | + |
| 178 | + def __bool__(self) -> bool: |
| 179 | + """Support boolean checks (e.g., if energyml_objects:).""" |
| 180 | + return len(self._objects_list) > 0 |
| 181 | + |
| 182 | + |
83 | 183 | def log_timestamp(func): |
84 | 184 | """Decorator to log timestamps for function execution.""" |
85 | 185 |
|
@@ -134,8 +234,8 @@ class Epc(EnergymlStorageInterface): |
134 | 234 | core_props: Optional[CoreProperties] = field(default=None) |
135 | 235 |
|
136 | 236 | """ xml files referred in the [Content_Types].xml """ |
137 | | - energyml_objects: List = field( |
138 | | - default_factory=list, |
| 237 | + energyml_objects: EnergymlObjectCollection = field( |
| 238 | + default_factory=EnergymlObjectCollection, |
139 | 239 | ) |
140 | 240 |
|
141 | 241 | """ Other files content like pdf etc """ |
@@ -564,20 +664,16 @@ def get_object_by_uuid(self, uuid: str) -> List[Any]: |
564 | 664 | :param uuid: |
565 | 665 | :return: |
566 | 666 | """ |
567 | | - return list(filter(lambda o: get_obj_uuid(o) == uuid, self.energyml_objects)) |
| 667 | + return self.energyml_objects.get_by_uuid(uuid) |
568 | 668 |
|
569 | 669 | def get_object_by_identifier(self, identifier: Union[str, Uri]) -> Optional[Any]: |
570 | 670 | """ |
571 | 671 | Search an object by its identifier. |
572 | 672 | :param identifier: given by the function :func:`get_obj_identifier`, or a URI (or its str representation) |
573 | 673 | :return: |
574 | 674 | """ |
575 | | - is_uri = isinstance(identifier, Uri) or parse_uri(identifier) is not None |
576 | | - id_str = str(identifier) |
577 | | - for o in self.energyml_objects: |
578 | | - if (get_obj_identifier(o) if not is_uri else str(get_obj_uri(o))) == id_str: |
579 | | - return o |
580 | | - return None |
| 675 | + # Use the O(1) dict lookup from the collection |
| 676 | + return self.energyml_objects.get_by_identifier(identifier) |
581 | 677 |
|
582 | 678 | def get_object(self, identifier: Union[str, Uri]) -> Optional[Any]: |
583 | 679 | return self.get_object_by_identifier(identifier) |
@@ -898,7 +994,7 @@ def read_stream(cls, epc_file_io: BytesIO): # returns an Epc instance |
898 | 994 | ) |
899 | 995 |
|
900 | 996 | return Epc( |
901 | | - energyml_objects=obj_list, |
| 997 | + energyml_objects=EnergymlObjectCollection(obj_list), |
902 | 998 | raw_files=raw_file_list, |
903 | 999 | core_props=core_props, |
904 | 1000 | additional_rels=additional_rels, |
|
0 commit comments