Skip to content

Commit 474f0d7

Browse files
committed
add inplace option to avoid unnecessary deepcopy
1 parent 9c74653 commit 474f0d7

3 files changed

Lines changed: 22 additions & 7 deletions

File tree

jdata/jdata.py

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,9 @@ def encode(d, opt={}):
7777
'blosc2lz4hc','blosc2zlib','blosc2zstd'] for compression codec, default is None
7878
'nthread': number of compression thread of the codec is of the blosc2 class, default is 1
7979
"""
80+
81+
opt.setdefault("inplace", False)
82+
8083
if "compression" in opt:
8184
if opt["compression"] == "lzma":
8285
try:
@@ -189,7 +192,7 @@ def encode(d, opt={}):
189192
newobj.pop("_ArrayData_")
190193
return newobj
191194
else:
192-
return copy.deepcopy(d)
195+
return copy.deepcopy(d) if opt["inplace"] else d
193196

194197

195198
##====================================================================================
@@ -207,6 +210,9 @@ def decode(d, opt={}):
207210
@param[in] opt: options, can contain a dict with the following keys
208211
'nthread': number of decompression thread of the codec is of the blosc2 class, default is 1
209212
"""
213+
214+
opt.setdefault("inplace", False)
215+
210216
if (isinstance(d, str) or type(d) == "unicode") and len(d) <= 6 and len(d) > 4 and d[-1] == "_":
211217
if d == "_NaN_":
212218
return float("nan")
@@ -249,7 +255,7 @@ def decode(d, opt={}):
249255
newobj = lz4.frame.decompress(bytes(newobj))
250256
except Exception:
251257
print('Warning: you must install "lz4" module to decompress a data record in this file, ignoring')
252-
return copy.deepcopy(d)
258+
return copy.deepcopy(d) if opt["inplace"] else d
253259
elif d["_ArrayZipType_"].startswith("blosc2"):
254260
try:
255261
import blosc2
@@ -260,7 +266,7 @@ def decode(d, opt={}):
260266
newobj = blosc2.decompress2(bytes(newobj), as_bytearray=False, nthreads=blosc2nthread)
261267
except Exception:
262268
print('Warning: you must install "blosc2" module to decompress a data record in this file, ignoring')
263-
return copy.deepcopy(d)
269+
return copy.deepcopy(d) if opt["inplace"] else d
264270
newobj = np.frombuffer(newobj, dtype=np.dtype(d["_ArrayType_"])).reshape(d["_ArrayZipSize_"])
265271
if "_ArrayIsComplex_" in d and newobj.shape[0] == 2:
266272
newobj = newobj[0] + 1j * newobj[1]
@@ -300,7 +306,7 @@ def decode(d, opt={}):
300306
)
301307
return decodedict(d, opt)
302308
else:
303-
return copy.deepcopy(d)
309+
return copy.deepcopy(d) if opt["inplace"] else d
304310

305311

306312
##====================================================================================
@@ -344,7 +350,7 @@ def encodedict(d0, opt={}):
344350

345351

346352
def encodelist(d0, opt={}):
347-
d = copy.deepcopy(d0)
353+
d = copy.deepcopy(d0) if opt["inplace"] else d0
348354
for i, s in enumerate(d):
349355
d[i] = encode(s, opt)
350356
return d
@@ -367,7 +373,7 @@ def decodedict(d0, opt={}):
367373

368374

369375
def decodelist(d0, opt={}):
370-
d = copy.deepcopy(d0)
376+
d = copy.deepcopy(d0) if opt["inplace"] else d0
371377
for i, s in enumerate(d):
372378
d[i] = decode(s, opt)
373379
return d

jdata/jfile.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ def loadt(fname, opt={}, **kwargs):
8787
kwargs.setdefault("strict", False)
8888
kwargs.setdefault("object_pairs_hook", OrderedDict)
8989
opt.setdefault("decode", True)
90+
opt.setdefault("inplace", True)
9091
opt["base64"] = True
9192

9293
with open(fname, "r") as fid:
@@ -106,6 +107,7 @@ def savet(data, fname, opt={}, **kwargs):
106107
"""
107108
kwargs.setdefault("default", jd.jsonfilter)
108109
opt.setdefault("encode", True)
110+
opt.setdefault("inplace", True)
109111
opt["base64"] = True
110112

111113
if opt["encode"]:
@@ -125,6 +127,7 @@ def show(data, opt={}, **kwargs):
125127
kwargs.setdefault("default", jd.jsonfilter)
126128
opt.setdefault("string", False)
127129
opt.setdefault("encode", True)
130+
opt.setdefault("inplace", True)
128131
opt["base64"] = True
129132

130133
if opt["encode"]:
@@ -150,6 +153,7 @@ def loadb(fname, opt={}, **kwargs):
150153
@param[in] opt: options, if opt['decode']=True or 1 (default), call jdata.decode() before saving
151154
"""
152155
opt.setdefault("decode", True)
156+
opt.setdefault("inplace", True)
153157
opt["base64"] = False
154158

155159
try:
@@ -172,6 +176,7 @@ def saveb(data, fname, opt={}, **kwargs):
172176
@param[in] opt: options, if opt['encode']=True or 1 (default), call jdata.encode() before saving
173177
"""
174178
opt.setdefault("encode", True)
179+
opt.setdefault("inplace", True)
175180

176181
try:
177182
import bjdata

test/benchcodecs.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,14 @@
22
Speed benchmark for saving/loading numpy arrays using various compression codecs
33
"""
44
import jdata as jd
5+
import bjdata as bj
56
import numpy as np
67
import time
78
import os
89

910
print("jdata version:" + jd.__version__)
1011

11-
codecs = ["npy", "npz", "zlib", "lzma", "lz4", "blosc2blosclz", "blosc2lz4", "blosc2lz4hc", "blosc2zlib", "blosc2zstd"]
12+
codecs = ["npy", "npz", "bjd", "zlib", "lzma", "lz4", "blosc2blosclz", "blosc2lz4", "blosc2lz4hc", "blosc2zlib", "blosc2zstd"]
1213
nthread = 8
1314

1415

@@ -21,6 +22,9 @@ def benchmark(codec, x):
2122
elif codec == "npz":
2223
ext = "." + codec
2324
np.savez_compressed("matrix_" + codec + ext, x)
25+
elif codec == "bjd":
26+
ext = "." + codec
27+
jd.save(x, "matrix_" + codec + ext, {"encode": False})
2428
else:
2529
jd.save(x, "matrix_" + codec + ext, {"compression": codec, "nthread": nthread})
2630
dt = time.time() - t0 # saving time

0 commit comments

Comments
 (0)