@@ -86,27 +86,32 @@ The **EpcStreamReader** provides memory-efficient handling of large EPC files th
8686- ** Smart Caching** : LRU (Least Recently Used) cache with configurable size
8787- ** Automatic EPC Version Detection** : Supports both CLASSIC and EXPANDED EPC formats
8888- ** Add/Remove/Update Operations** : Full CRUD operations with automatic file structure maintenance
89+ - ** Relationship Management** : Automatic or manual .rels file updates with parallel processing support
90+ - ** External Data Arrays** : Read/write HDF5, Parquet, CSV arrays with intelligent file caching
8991- ** Context Management** : Automatic resource cleanup with ` with ` statements
9092- ** Memory Monitoring** : Track cache efficiency and memory usage statistics
9193
9294### Basic Usage
9395
9496``` python
95- from energyml.utils.epc_stream import EpcStreamReader
97+ from energyml.utils.epc_stream import EpcStreamReader, RelsUpdateMode
9698
9799# Open EPC file with context manager (recommended)
98- with EpcStreamReader(' large_file.epc' , cache_size = 50 ) as reader:
100+ with EpcStreamReader(' large_file.epc' ,
101+ cache_size = 50 ,
102+ rels_update_mode = RelsUpdateMode.UPDATE_ON_CLOSE ) as reader:
99103 # List all objects without loading them
100- print (f " Total objects: { reader.stats.total_objects } " )
104+ print (f " Total objects: { len ( reader) } " )
101105
102106 # Get object by identifier
103- obj: Any = reader.get_object_by_identifier (" uuid.version" )
107+ obj = reader.get_object (" uuid.version" )
104108
105- # Get objects by type
106- features: List[Any] = reader.get_objects_by_type(" BoundaryFeature" )
109+ # List objects by type (returns metadata, not full objects)
110+ features = reader.list_objects(object_type = " BoundaryFeature" )
111+ print (f " Found { len (features)} features " )
107112
108113 # Get all objects with same UUID
109- versions: List[Any] = reader.get_object_by_uuid(" 12345678-1234-1234-1234-123456789abc" )
114+ versions = reader.get_object_by_uuid(" 12345678-1234-1234-1234-123456789abc" )
110115```
111116
112117### Adding Objects
@@ -135,31 +140,31 @@ with EpcStreamReader('my_file.epc') as reader:
135140
136141``` python
137142with EpcStreamReader(' my_file.epc' ) as reader:
138- # Remove specific version by full identifier
139- success = reader.remove_object (" uuid.version" )
143+ # Remove by full identifier
144+ success = reader.delete_object (" uuid.version" )
140145
141- # Remove ALL versions by UUID only
142- success = reader.remove_object(" 12345678-1234-1234-1234-123456789abc " )
146+ # Or use the alias
147+ success = reader.remove_object(" uuid.version " )
143148
144149 if success:
145- print (" Object(s) removed successfully" )
150+ print (" Object removed successfully" )
146151```
147152
148153### Updating Objects
149154
150155``` python
151- ...
156+ from energyml.utils.epc_stream import EpcStreamReader
152157from energyml.utils.introspection import set_attribute_from_path
153158
154159with EpcStreamReader(' my_file.epc' ) as reader:
155160 # Get existing object
156- obj = reader.get_object_by_identifier (" uuid.version" )
161+ obj = reader.get_object (" uuid.version" )
157162
158163 # Modify the object
159164 set_attribute_from_path(obj, " citation.title" , " Updated Title" )
160165
161166 # Update in EPC file
162- new_identifier = reader.update_object (obj)
167+ new_identifier = reader.put_object (obj)
163168 print (f " Updated object: { new_identifier} " )
164169```
165170
@@ -190,23 +195,71 @@ with EpcStreamReader('my_file.epc') as reader:
190195 # Objects added will use the same format as the existing EPC file
191196```
192197
198+ ### Relationship Management
199+
200+ ``` python
201+ from energyml.utils.epc_stream import EpcStreamReader, RelsUpdateMode
202+
203+ # Choose relationship update strategy
204+ with EpcStreamReader(' my_file.epc' ,
205+ rels_update_mode = RelsUpdateMode.UPDATE_ON_CLOSE ,
206+ enable_parallel_rels = True ) as reader:
207+
208+ # Add/modify objects - rels updated automatically based on mode
209+ reader.add_object(my_object)
210+
211+ # Manual rebuild of all relationships (e.g., after bulk operations)
212+ stats = reader.rebuild_all_rels(clean_first = True )
213+ print (f " Rebuilt { stats[' rels_files_created' ]} .rels files " )
214+ ```
215+
216+ ### External Data Arrays
217+
218+ ``` python
219+ import numpy as np
220+
221+ with EpcStreamReader(' my_file.epc' ) as reader:
222+ # Read array from HDF5/Parquet/CSV
223+ data = reader.read_array(
224+ proxy = my_representation,
225+ path_in_external = " /geometry/points"
226+ )
227+
228+ # Write array to external file
229+ new_data = np.array([[1 , 2 , 3 ], [4 , 5 , 6 ]])
230+ success = reader.write_array(
231+ proxy = my_representation,
232+ path_in_external = " /geometry/points" ,
233+ array = new_data
234+ )
235+
236+ # Get metadata without loading full array
237+ metadata = reader.get_array_metadata(my_representation)
238+ print (f " Array shape: { metadata.dimensions} , dtype: { metadata.array_type} " )
239+ ```
240+
193241### Advanced Usage
194242
195243``` python
196- # Initialize without preloading metadata for faster startup
197- reader = EpcStreamReader(' huge_file.epc' , preload_metadata = False , cache_size = 200 )
244+ # Initialize with persistent ZIP connection for better performance
245+ reader = EpcStreamReader(' huge_file.epc' ,
246+ keep_open = True ,
247+ cache_size = 200 ,
248+ enable_parallel_rels = True ,
249+ parallel_worker_ratio = 10 )
198250
199251try :
200- # Manual metadata loading when needed
201- reader._load_metadata()
202-
203252 # Get object dependencies
204253 deps = reader.get_object_dependencies(" uuid.version" )
205254
206255 # Batch processing with memory monitoring
207256 for obj_type in [" BoundaryFeature" , " PropertyKind" ]:
208- objects = reader.get_objects_by_type(obj_type)
209- print (f " Processing { len (objects)} { obj_type} objects " )
257+ obj_list = reader.list_objects(object_type = obj_type)
258+ print (f " Processing { len (obj_list)} { obj_type} objects " )
259+
260+ for metadata in obj_list:
261+ obj = reader.get_object(metadata.identifier)
262+ # Process object...
210263
211264finally :
212265 reader.close() # Manual cleanup if not using context manager
@@ -240,25 +293,139 @@ $env:PYTHONPATH="src"
240293```
241294
242295
243- ## Validation examples :
244296
245- An epc file:
297+ ## Poetry Script Examples :
298+
299+ ### Validation
300+
301+ Validate an EPC file:
246302``` bash
247303poetry run validate --file " path/to/your/energyml/object.epc" * > output_logs.json
248304```
249305
250- An xml file:
306+ Validate an XML file:
251307``` bash
252308poetry run validate --file " path/to/your/energyml/object.xml" * > output_logs.json
253309```
254310
255- A json file:
311+ Validate a JSON file:
256312``` bash
257313poetry run validate --file " path/to/your/energyml/object.json" * > output_logs.json
258314```
259315
260- A folder containing Epc/xml/json files:
316+ Validate a folder containing EPC/XML/JSON files:
261317``` bash
262318poetry run validate --file " path/to/your/folder" * > output_logs.json
263319```
264320
321+ ### Extract 3D Representations
322+
323+ Extract all representations from an EPC to OBJ files:
324+ ``` bash
325+ poetry run extract_3d --epc " path/to/file.epc" --output " output_folder"
326+ ```
327+
328+ Extract specific representations by UUID:
329+ ``` bash
330+ poetry run extract_3d --epc " path/to/file.epc" --output " output_folder" --uuid " uuid1" " uuid2"
331+ ```
332+
333+ Extract to OFF format without CRS displacement:
334+ ``` bash
335+ poetry run extract_3d --epc " path/to/file.epc" --output " output_folder" --file-format OFF --no-crs
336+ ```
337+
338+ ### CSV to Dataset
339+
340+ Convert CSV to HDF5:
341+ ``` bash
342+ poetry run csv_to_dataset --csv " data.csv" --output " output.h5"
343+ ```
344+
345+ Convert CSV to Parquet with custom delimiter:
346+ ``` bash
347+ poetry run csv_to_dataset --csv " data.csv" --output " output.parquet" --csv-delimiter " ;"
348+ ```
349+
350+ With dataset name prefix:
351+ ``` bash
352+ poetry run csv_to_dataset --csv " data.csv" --output " output.h5" --prefix " /my/path/"
353+ ```
354+
355+ With column mapping (JSON file):
356+ ``` bash
357+ poetry run csv_to_dataset --csv " data.csv" --output " output.h5" --mapping " mapping.json"
358+ ```
359+
360+ With inline column mapping:
361+ ``` bash
362+ poetry run csv_to_dataset --csv " data.csv" --output " output.h5" --mapping-line ' {"DATASET_A": ["COL1", "COL2"], "DATASET_B": ["COL3"]}'
363+ ```
364+
365+ ### Generate Random Data
366+
367+ Generate a random RESQML object in JSON:
368+ ``` bash
369+ poetry run generate_data --type " energyml.resqml.v2_2.resqmlv2.TriangulatedSetRepresentation" --file-format json
370+ ```
371+
372+ Generate a random object in XML:
373+ ``` bash
374+ poetry run generate_data --type " energyml.resqml.v2_0_1.resqmlv2.Grid2dRepresentation" --file-format xml
375+ ```
376+
377+ Using qualified type:
378+ ``` bash
379+ poetry run generate_data --type " resqml22.WellboreFeature" --file-format json
380+ ```
381+
382+ ### XML to JSON Conversion
383+
384+ Convert an XML file to JSON:
385+ ``` bash
386+ poetry run xml_to_json --file " path/to/object.xml"
387+ ```
388+
389+ Convert with custom output path:
390+ ``` bash
391+ poetry run xml_to_json --file " path/to/object.xml" --out " output.json"
392+ ```
393+
394+ Convert entire EPC to JSON array:
395+ ``` bash
396+ poetry run xml_to_json --file " path/to/file.epc" --out " output.json"
397+ ```
398+
399+ ### JSON to XML Conversion
400+
401+ Convert a JSON file to XML:
402+ ``` bash
403+ poetry run json_to_xml --file " path/to/object.json"
404+ ```
405+
406+ Convert with custom output directory:
407+ ``` bash
408+ poetry run json_to_xml --file " path/to/object.json" --out " output_folder/"
409+ ```
410+
411+ ### Describe as CSV
412+
413+ Generate a CSV description of all objects in a folder:
414+ ``` bash
415+ poetry run describe_as_csv --folder " path/to/folder"
416+ ```
417+
418+ With custom columns:
419+ ``` bash
420+ poetry run describe_as_csv --folder " path/to/folder" \
421+ --columnsNames " Title" " Type" " UUID" \
422+ --columnsValues " citation.title" " $qualifiedType " " Uuid"
423+ ```
424+
425+ Available special values for columnsValues:
426+ - ` $type ` : Object Python type
427+ - ` $qualifiedType ` : EnergyML qualified type
428+ - ` $contentType ` : EnergyML content type
429+ - ` $path ` : File path
430+ - ` $dor ` : UUIDs of referenced objects
431+
0 commit comments