Skip to content

Commit cdc3b52

Browse files
README
1 parent db19916 commit cdc3b52

14 files changed

Lines changed: 194 additions & 27 deletions

energyml-utils/README.md

Lines changed: 194 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -86,27 +86,32 @@ The **EpcStreamReader** provides memory-efficient handling of large EPC files th
8686
- **Smart Caching**: LRU (Least Recently Used) cache with configurable size
8787
- **Automatic EPC Version Detection**: Supports both CLASSIC and EXPANDED EPC formats
8888
- **Add/Remove/Update Operations**: Full CRUD operations with automatic file structure maintenance
89+
- **Relationship Management**: Automatic or manual .rels file updates with parallel processing support
90+
- **External Data Arrays**: Read/write HDF5, Parquet, CSV arrays with intelligent file caching
8991
- **Context Management**: Automatic resource cleanup with `with` statements
9092
- **Memory Monitoring**: Track cache efficiency and memory usage statistics
9193

9294
### Basic Usage
9395

9496
```python
95-
from energyml.utils.epc_stream import EpcStreamReader
97+
from energyml.utils.epc_stream import EpcStreamReader, RelsUpdateMode
9698

9799
# Open EPC file with context manager (recommended)
98-
with EpcStreamReader('large_file.epc', cache_size=50) as reader:
100+
with EpcStreamReader('large_file.epc',
101+
cache_size=50,
102+
rels_update_mode=RelsUpdateMode.UPDATE_ON_CLOSE) as reader:
99103
# List all objects without loading them
100-
print(f"Total objects: {reader.stats.total_objects}")
104+
print(f"Total objects: {len(reader)}")
101105

102106
# Get object by identifier
103-
obj: Any = reader.get_object_by_identifier("uuid.version")
107+
obj = reader.get_object("uuid.version")
104108

105-
# Get objects by type
106-
features: List[Any] = reader.get_objects_by_type("BoundaryFeature")
109+
# List objects by type (returns metadata, not full objects)
110+
features = reader.list_objects(object_type="BoundaryFeature")
111+
print(f"Found {len(features)} features")
107112

108113
# Get all objects with same UUID
109-
versions: List[Any] = reader.get_object_by_uuid("12345678-1234-1234-1234-123456789abc")
114+
versions = reader.get_object_by_uuid("12345678-1234-1234-1234-123456789abc")
110115
```
111116

112117
### Adding Objects
@@ -135,31 +140,31 @@ with EpcStreamReader('my_file.epc') as reader:
135140

136141
```python
137142
with EpcStreamReader('my_file.epc') as reader:
138-
# Remove specific version by full identifier
139-
success = reader.remove_object("uuid.version")
143+
# Remove by full identifier
144+
success = reader.delete_object("uuid.version")
140145

141-
# Remove ALL versions by UUID only
142-
success = reader.remove_object("12345678-1234-1234-1234-123456789abc")
146+
# Or use the alias
147+
success = reader.remove_object("uuid.version")
143148

144149
if success:
145-
print("Object(s) removed successfully")
150+
print("Object removed successfully")
146151
```
147152

148153
### Updating Objects
149154

150155
```python
151-
...
156+
from energyml.utils.epc_stream import EpcStreamReader
152157
from energyml.utils.introspection import set_attribute_from_path
153158

154159
with EpcStreamReader('my_file.epc') as reader:
155160
# Get existing object
156-
obj = reader.get_object_by_identifier("uuid.version")
161+
obj = reader.get_object("uuid.version")
157162

158163
# Modify the object
159164
set_attribute_from_path(obj, "citation.title", "Updated Title")
160165

161166
# Update in EPC file
162-
new_identifier = reader.update_object(obj)
167+
new_identifier = reader.put_object(obj)
163168
print(f"Updated object: {new_identifier}")
164169
```
165170

@@ -190,23 +195,71 @@ with EpcStreamReader('my_file.epc') as reader:
190195
# Objects added will use the same format as the existing EPC file
191196
```
192197

198+
### Relationship Management
199+
200+
```python
201+
from energyml.utils.epc_stream import EpcStreamReader, RelsUpdateMode
202+
203+
# Choose relationship update strategy
204+
with EpcStreamReader('my_file.epc',
205+
rels_update_mode=RelsUpdateMode.UPDATE_ON_CLOSE,
206+
enable_parallel_rels=True) as reader:
207+
208+
# Add/modify objects - rels updated automatically based on mode
209+
reader.add_object(my_object)
210+
211+
# Manual rebuild of all relationships (e.g., after bulk operations)
212+
stats = reader.rebuild_all_rels(clean_first=True)
213+
print(f"Rebuilt {stats['rels_files_created']} .rels files")
214+
```
215+
216+
### External Data Arrays
217+
218+
```python
219+
import numpy as np
220+
221+
with EpcStreamReader('my_file.epc') as reader:
222+
# Read array from HDF5/Parquet/CSV
223+
data = reader.read_array(
224+
proxy=my_representation,
225+
path_in_external="/geometry/points"
226+
)
227+
228+
# Write array to external file
229+
new_data = np.array([[1, 2, 3], [4, 5, 6]])
230+
success = reader.write_array(
231+
proxy=my_representation,
232+
path_in_external="/geometry/points",
233+
array=new_data
234+
)
235+
236+
# Get metadata without loading full array
237+
metadata = reader.get_array_metadata(my_representation)
238+
print(f"Array shape: {metadata.dimensions}, dtype: {metadata.array_type}")
239+
```
240+
193241
### Advanced Usage
194242

195243
```python
196-
# Initialize without preloading metadata for faster startup
197-
reader = EpcStreamReader('huge_file.epc', preload_metadata=False, cache_size=200)
244+
# Initialize with persistent ZIP connection for better performance
245+
reader = EpcStreamReader('huge_file.epc',
246+
keep_open=True,
247+
cache_size=200,
248+
enable_parallel_rels=True,
249+
parallel_worker_ratio=10)
198250

199251
try:
200-
# Manual metadata loading when needed
201-
reader._load_metadata()
202-
203252
# Get object dependencies
204253
deps = reader.get_object_dependencies("uuid.version")
205254

206255
# Batch processing with memory monitoring
207256
for obj_type in ["BoundaryFeature", "PropertyKind"]:
208-
objects = reader.get_objects_by_type(obj_type)
209-
print(f"Processing {len(objects)} {obj_type} objects")
257+
obj_list = reader.list_objects(object_type=obj_type)
258+
print(f"Processing {len(obj_list)} {obj_type} objects")
259+
260+
for metadata in obj_list:
261+
obj = reader.get_object(metadata.identifier)
262+
# Process object...
210263

211264
finally:
212265
reader.close() # Manual cleanup if not using context manager
@@ -240,25 +293,139 @@ $env:PYTHONPATH="src"
240293
```
241294

242295

243-
## Validation examples :
244296

245-
An epc file:
297+
## Poetry Script Examples :
298+
299+
### Validation
300+
301+
Validate an EPC file:
246302
```bash
247303
poetry run validate --file "path/to/your/energyml/object.epc" *> output_logs.json
248304
```
249305

250-
An xml file:
306+
Validate an XML file:
251307
```bash
252308
poetry run validate --file "path/to/your/energyml/object.xml" *> output_logs.json
253309
```
254310

255-
A json file:
311+
Validate a JSON file:
256312
```bash
257313
poetry run validate --file "path/to/your/energyml/object.json" *> output_logs.json
258314
```
259315

260-
A folder containing Epc/xml/json files:
316+
Validate a folder containing EPC/XML/JSON files:
261317
```bash
262318
poetry run validate --file "path/to/your/folder" *> output_logs.json
263319
```
264320

321+
### Extract 3D Representations
322+
323+
Extract all representations from an EPC to OBJ files:
324+
```bash
325+
poetry run extract_3d --epc "path/to/file.epc" --output "output_folder"
326+
```
327+
328+
Extract specific representations by UUID:
329+
```bash
330+
poetry run extract_3d --epc "path/to/file.epc" --output "output_folder" --uuid "uuid1" "uuid2"
331+
```
332+
333+
Extract to OFF format without CRS displacement:
334+
```bash
335+
poetry run extract_3d --epc "path/to/file.epc" --output "output_folder" --file-format OFF --no-crs
336+
```
337+
338+
### CSV to Dataset
339+
340+
Convert CSV to HDF5:
341+
```bash
342+
poetry run csv_to_dataset --csv "data.csv" --output "output.h5"
343+
```
344+
345+
Convert CSV to Parquet with custom delimiter:
346+
```bash
347+
poetry run csv_to_dataset --csv "data.csv" --output "output.parquet" --csv-delimiter ";"
348+
```
349+
350+
With dataset name prefix:
351+
```bash
352+
poetry run csv_to_dataset --csv "data.csv" --output "output.h5" --prefix "/my/path/"
353+
```
354+
355+
With column mapping (JSON file):
356+
```bash
357+
poetry run csv_to_dataset --csv "data.csv" --output "output.h5" --mapping "mapping.json"
358+
```
359+
360+
With inline column mapping:
361+
```bash
362+
poetry run csv_to_dataset --csv "data.csv" --output "output.h5" --mapping-line '{"DATASET_A": ["COL1", "COL2"], "DATASET_B": ["COL3"]}'
363+
```
364+
365+
### Generate Random Data
366+
367+
Generate a random RESQML object in JSON:
368+
```bash
369+
poetry run generate_data --type "energyml.resqml.v2_2.resqmlv2.TriangulatedSetRepresentation" --file-format json
370+
```
371+
372+
Generate a random object in XML:
373+
```bash
374+
poetry run generate_data --type "energyml.resqml.v2_0_1.resqmlv2.Grid2dRepresentation" --file-format xml
375+
```
376+
377+
Using qualified type:
378+
```bash
379+
poetry run generate_data --type "resqml22.WellboreFeature" --file-format json
380+
```
381+
382+
### XML to JSON Conversion
383+
384+
Convert an XML file to JSON:
385+
```bash
386+
poetry run xml_to_json --file "path/to/object.xml"
387+
```
388+
389+
Convert with custom output path:
390+
```bash
391+
poetry run xml_to_json --file "path/to/object.xml" --out "output.json"
392+
```
393+
394+
Convert entire EPC to JSON array:
395+
```bash
396+
poetry run xml_to_json --file "path/to/file.epc" --out "output.json"
397+
```
398+
399+
### JSON to XML Conversion
400+
401+
Convert a JSON file to XML:
402+
```bash
403+
poetry run json_to_xml --file "path/to/object.json"
404+
```
405+
406+
Convert with custom output directory:
407+
```bash
408+
poetry run json_to_xml --file "path/to/object.json" --out "output_folder/"
409+
```
410+
411+
### Describe as CSV
412+
413+
Generate a CSV description of all objects in a folder:
414+
```bash
415+
poetry run describe_as_csv --folder "path/to/folder"
416+
```
417+
418+
With custom columns:
419+
```bash
420+
poetry run describe_as_csv --folder "path/to/folder" \
421+
--columnsNames "Title" "Type" "UUID" \
422+
--columnsValues "citation.title" "$qualifiedType" "Uuid"
423+
```
424+
425+
Available special values for columnsValues:
426+
- `$type`: Object Python type
427+
- `$qualifiedType`: EnergyML qualified type
428+
- `$contentType`: EnergyML content type
429+
- `$path`: File path
430+
- `$dor`: UUIDs of referenced objects
431+

energyml-utils/example/attic/__init__.py

Whitespace-only changes.

energyml-utils/example/epc_rels_management_example.py renamed to energyml-utils/example/attic/epc_rels_management_example.py

File renamed without changes.

energyml-utils/example/epc_stream_keep_open_example.py renamed to energyml-utils/example/attic/epc_stream_keep_open_example.py

File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.

0 commit comments

Comments
 (0)