Skip to content

Commit e0eb195

Browse files
fix epc in memory
1 parent cdc3b52 commit e0eb195

4 files changed

Lines changed: 326 additions & 94 deletions

File tree

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
import logging
2+
import os
3+
import shutil
4+
import sys
5+
import time
6+
from typing import Optional
7+
8+
from energyml.utils.epc_stream import EpcStreamReader, RelsUpdateMode
9+
from energyml.utils.epc import Epc
10+
from energyml.utils.epc_utils import update_prop_kind_dict_cache
11+
12+
13+
def reexport_stream_seq(filepath: str, output_folder: Optional[str] = None):
14+
path_seq = filepath.replace(".epc", "_stream_seq.epc")
15+
if output_folder:
16+
os.makedirs(output_folder, exist_ok=True)
17+
path_seq = f"{output_folder}/{path_seq.split('/')[-1]}"
18+
shutil.copy(filepath, path_seq)
19+
with EpcStreamReader(
20+
epc_file_path=path_seq, enable_parallel_rels=False, rels_update_mode=RelsUpdateMode.UPDATE_ON_CLOSE
21+
) as reader:
22+
pass # Just open and close to trigger rels computation on close
23+
24+
25+
def reexport_stream_parallel(filepath: str, output_folder: Optional[str] = None):
26+
path_parallel = filepath.replace(".epc", "_stream_parallel.epc")
27+
if output_folder:
28+
os.makedirs(output_folder, exist_ok=True)
29+
path_parallel = f"{output_folder}/{path_parallel.split('/')[-1]}"
30+
shutil.copy(filepath, path_parallel)
31+
with EpcStreamReader(
32+
epc_file_path=path_parallel, enable_parallel_rels=True, rels_update_mode=RelsUpdateMode.UPDATE_ON_CLOSE
33+
) as reader:
34+
pass # Just open and close to trigger rels computation on close
35+
36+
37+
def reexport_in_memory(filepath: str, output_folder: Optional[str] = None):
38+
path_in_memory = filepath.replace(".epc", "_in_memory.epc")
39+
if output_folder:
40+
os.makedirs(output_folder, exist_ok=True)
41+
path_in_memory = f"{output_folder}/{path_in_memory.split('/')[-1]}"
42+
epc = Epc.read_file(filepath)
43+
44+
if os.path.exists(path_in_memory):
45+
os.remove(path_in_memory)
46+
epc.export_file(path_in_memory)
47+
48+
49+
def time_comparison(filepath: str, output_folder: Optional[str] = None, skip_sequential: bool = True):
50+
"""Compare performance of different EPC reexport methods."""
51+
print(f"\n{'=' * 70}")
52+
print(f"Performance Comparison: {filepath.split('/')[-1]}")
53+
print(f"{'=' * 70}\n")
54+
55+
results = []
56+
57+
# Test 1: In-Memory
58+
print("⏳ Testing In-Memory EPC processing...")
59+
start = time.perf_counter()
60+
reexport_in_memory(filepath, output_folder)
61+
elapsed_inmem = time.perf_counter() - start
62+
results.append(("In-Memory (Epc)", elapsed_inmem))
63+
print(f" ✓ Completed in {elapsed_inmem:.3f}s\n")
64+
65+
if not skip_sequential:
66+
# Test 2: Streaming Sequential
67+
print("⏳ Testing Streaming Sequential processing...")
68+
start = time.perf_counter()
69+
reexport_stream_seq(filepath, output_folder)
70+
elapsed_seq = time.perf_counter() - start
71+
results.append(("Stream Sequential", elapsed_seq))
72+
print(f" ✓ Completed in {elapsed_seq:.3f}s\n")
73+
74+
# Test 3: Streaming Parallel
75+
print("⏳ Testing Streaming Parallel processing...")
76+
start = time.perf_counter()
77+
reexport_stream_parallel(filepath, output_folder)
78+
elapsed_parallel = time.perf_counter() - start
79+
results.append(("Stream Parallel", elapsed_parallel))
80+
print(f" ✓ Completed in {elapsed_parallel:.3f}s\n")
81+
82+
# Calculate speedups
83+
results_sorted = sorted(results, key=lambda x: x[1])
84+
fastest_time = results_sorted[0][1]
85+
86+
# Print fancy table
87+
print(f"\n{'=' * 70}")
88+
print(f"{'PERFORMANCE RESULTS':^70}")
89+
print(f"{'=' * 70}")
90+
print(f"{'Method':<25} {'Time (s)':>12} {'Speedup':>12} {'Status':>15}")
91+
print(f"{'-' * 70}")
92+
93+
for method, elapsed in results_sorted:
94+
speedup = fastest_time / elapsed
95+
if speedup >= 0.95: # Fastest
96+
status = "🏆 FASTEST"
97+
elif speedup >= 0.8:
98+
status = "✓ Good"
99+
else:
100+
status = "○ Slower"
101+
102+
print(f"{method:<25} {elapsed:>12.3f} {speedup:>12.2f}x {status:>15}")
103+
104+
print(f"{'=' * 70}")
105+
106+
# Summary
107+
fastest_method = results_sorted[0][0]
108+
slowest_method = results_sorted[-1][0]
109+
speedup_factor = results_sorted[-1][1] / fastest_time
110+
111+
print(f"\n📊 Summary:")
112+
print(f" • Fastest: {fastest_method} ({fastest_time:.3f}s)")
113+
print(f" • Slowest: {slowest_method} ({results_sorted[-1][1]:.3f}s)")
114+
print(f" • Overall speedup: {speedup_factor:.2f}x faster\n")
115+
116+
117+
if __name__ == "__main__":
118+
logging.basicConfig(level=logging.DEBUG)
119+
120+
update_prop_kind_dict_cache()
121+
122+
time_comparison(
123+
filepath=sys.argv[1] if len(sys.argv) > 1 else "rc/epc/80wells_surf.epc", output_folder="rc/performance_results"
124+
)
125+
126+
# time_comparison(
127+
# filepath=sys.argv[1] if len(sys.argv) > 1 else "wip/failingData/fix/sample_mini_firp_201_norels_with_media.epc",
128+
# output_folder="rc/performance_results",
129+
# )

0 commit comments

Comments
 (0)