add inplace option to avoid unnecessary deepcopy

fangq · fangq · commit 474f0d782725 · 2022-09-06T09:48:50.000-04:00
diff --git a/jdata/jdata.py b/jdata/jdata.py
@@ -77,6 +77,9 @@ def encode(d, opt={}):
                         'blosc2lz4hc','blosc2zlib','blosc2zstd'] for compression codec, default is None
          'nthread': number of compression thread of the codec is of the blosc2 class, default is 1
     """
+
+    opt.setdefault("inplace", False)
+
     if "compression" in opt:
         if opt["compression"] == "lzma":
             try:
@@ -189,7 +192,7 @@ def encode(d, opt={}):
             newobj.pop("_ArrayData_")
         return newobj
     else:
-        return copy.deepcopy(d)
+        return copy.deepcopy(d) if opt["inplace"] else d
 
 
 ##====================================================================================
@@ -207,6 +210,9 @@ def decode(d, opt={}):
     @param[in] opt: options, can contain a dict with the following keys
          'nthread': number of decompression thread of the codec is of the blosc2 class, default is 1
     """
+
+    opt.setdefault("inplace", False)
+
     if (isinstance(d, str) or type(d) == "unicode") and len(d) <= 6 and len(d) > 4 and d[-1] == "_":
         if d == "_NaN_":
             return float("nan")
@@ -249,7 +255,7 @@ def decode(d, opt={}):
                         newobj = lz4.frame.decompress(bytes(newobj))
                     except Exception:
                         print('Warning: you must install "lz4" module to decompress a data record in this file, ignoring')
-                        return copy.deepcopy(d)
+                        return copy.deepcopy(d) if opt["inplace"] else d
                 elif d["_ArrayZipType_"].startswith("blosc2"):
                     try:
                         import blosc2
@@ -260,7 +266,7 @@ def decode(d, opt={}):
                         newobj = blosc2.decompress2(bytes(newobj), as_bytearray=False, nthreads=blosc2nthread)
                     except Exception:
                         print('Warning: you must install "blosc2" module to decompress a data record in this file, ignoring')
-                        return copy.deepcopy(d)
+                        return copy.deepcopy(d) if opt["inplace"] else d
                 newobj = np.frombuffer(newobj, dtype=np.dtype(d["_ArrayType_"])).reshape(d["_ArrayZipSize_"])
                 if "_ArrayIsComplex_" in d and newobj.shape[0] == 2:
                     newobj = newobj[0] + 1j * newobj[1]
@@ -300,7 +306,7 @@ def decode(d, opt={}):
                 )
         return decodedict(d, opt)
     else:
-        return copy.deepcopy(d)
+        return copy.deepcopy(d) if opt["inplace"] else d
 
 
 ##====================================================================================
@@ -344,7 +350,7 @@ def encodedict(d0, opt={}):
 
 
 def encodelist(d0, opt={}):
-    d = copy.deepcopy(d0)
+    d = copy.deepcopy(d0) if opt["inplace"] else d0
     for i, s in enumerate(d):
         d[i] = encode(s, opt)
     return d
@@ -367,7 +373,7 @@ def decodedict(d0, opt={}):
 
 
 def decodelist(d0, opt={}):
-    d = copy.deepcopy(d0)
+    d = copy.deepcopy(d0) if opt["inplace"] else d0
     for i, s in enumerate(d):
         d[i] = decode(s, opt)
     return d
diff --git a/jdata/jfile.py b/jdata/jfile.py
@@ -87,6 +87,7 @@ def loadt(fname, opt={}, **kwargs):
     kwargs.setdefault("strict", False)
     kwargs.setdefault("object_pairs_hook", OrderedDict)
     opt.setdefault("decode", True)
+    opt.setdefault("inplace", True)
     opt["base64"] = True
 
     with open(fname, "r") as fid:
@@ -106,6 +107,7 @@ def savet(data, fname, opt={}, **kwargs):
     """
     kwargs.setdefault("default", jd.jsonfilter)
     opt.setdefault("encode", True)
+    opt.setdefault("inplace", True)
     opt["base64"] = True
 
     if opt["encode"]:
@@ -125,6 +127,7 @@ def show(data, opt={}, **kwargs):
     kwargs.setdefault("default", jd.jsonfilter)
     opt.setdefault("string", False)
     opt.setdefault("encode", True)
+    opt.setdefault("inplace", True)
     opt["base64"] = True
 
     if opt["encode"]:
@@ -150,6 +153,7 @@ def loadb(fname, opt={}, **kwargs):
     @param[in] opt: options, if opt['decode']=True or 1 (default), call jdata.decode() before saving
     """
     opt.setdefault("decode", True)
+    opt.setdefault("inplace", True)
     opt["base64"] = False
 
     try:
@@ -172,6 +176,7 @@ def saveb(data, fname, opt={}, **kwargs):
     @param[in] opt: options, if opt['encode']=True or 1 (default), call jdata.encode() before saving
     """
     opt.setdefault("encode", True)
+    opt.setdefault("inplace", True)
 
     try:
         import bjdata
diff --git a/test/benchcodecs.py b/test/benchcodecs.py
@@ -2,13 +2,14 @@
     Speed benchmark for saving/loading numpy arrays using various compression codecs
 """
 import jdata as jd
+import bjdata as bj
 import numpy as np
 import time
 import os
 
 print("jdata version:" + jd.__version__)
 
-codecs = ["npy", "npz", "zlib", "lzma", "lz4", "blosc2blosclz", "blosc2lz4", "blosc2lz4hc", "blosc2zlib", "blosc2zstd"]
+codecs = ["npy", "npz", "bjd", "zlib", "lzma", "lz4", "blosc2blosclz", "blosc2lz4", "blosc2lz4hc", "blosc2zlib", "blosc2zstd"]
 nthread = 8
 
 
@@ -21,6 +22,9 @@ def benchmark(codec, x):
     elif codec == "npz":
         ext = "." + codec
         np.savez_compressed("matrix_" + codec + ext, x)
+    elif codec == "bjd":
+        ext = "." + codec
+        jd.save(x, "matrix_" + codec + ext, {"encode": False})
     else:
         jd.save(x, "matrix_" + codec + ext, {"compression": codec, "nthread": nthread})
     dt = time.time() - t0  # saving time