@@ -1553,15 +1553,22 @@ def read_file(
15531553 """
15541554 with open (epc_file_path , "rb" ) as f :
15551555 if read_parallel :
1556- epc = cls .read_stream_ultra_fast (
1557- BytesIO (f .read ()), read_rels_from_files = read_rels_from_files , recompute_rels = recompute_rels
1556+ epc = (
1557+ cls .read_stream_ultra_fast (
1558+ BytesIO (f .read ()), read_rels_from_files = read_rels_from_files , recompute_rels = recompute_rels
1559+ )
1560+ if not os .environ .get ("EPC_FAST_V2" , "0" ) == "1"
1561+ else cls .read_stream_ultra_fast_v2 (
1562+ BytesIO (f .read ()), read_rels_from_files = read_rels_from_files , recompute_rels = recompute_rels
1563+ )
15581564 )
15591565 else :
15601566 epc = cls .read_stream (
15611567 BytesIO (f .read ()), read_rels_from_files = read_rels_from_files , recompute_rels = recompute_rels
15621568 )
1563- epc .epc_file_path = epc_file_path
1564- return epc
1569+ if epc is not None :
1570+ epc .epc_file_path = epc_file_path
1571+ return epc
15651572 raise IOError (f"Failed to open EPC file { epc_file_path } " )
15661573
15671574 @classmethod
@@ -1575,6 +1582,7 @@ def read_stream(
15751582 :param recompute_rels: If True, recompute all relationships after loading
15761583 :return: an :class:`EPC` instance
15771584 """
1585+ print ("Reading EPC file seq..." )
15781586 try :
15791587 _read_files = []
15801588 obj_list = []
@@ -1727,6 +1735,8 @@ def read_stream_ultra_fast(
17271735 from concurrent .futures import ProcessPoolExecutor , as_completed
17281736 import multiprocessing
17291737
1738+ print ("Reading EPC file parrallel v1..." )
1739+
17301740 obj_to_process = {}
17311741 rels_to_process = {}
17321742 raw_files = []
@@ -1813,6 +1823,87 @@ def read_stream_ultra_fast(
18131823
18141824 return epc
18151825
1826+ @classmethod
1827+ def read_stream_ultra_fast_v2 (
1828+ cls , epc_file_io : BytesIO , read_rels_from_files : bool = True , recompute_rels : bool = False
1829+ ) -> Optional ["Epc" ]:
1830+ from concurrent .futures import ThreadPoolExecutor # Passage au ThreadPool
1831+
1832+ print ("Reading EPC file parrallel v2..." )
1833+
1834+ obj_list = []
1835+ path_to_obj = {}
1836+ rels_content_map = {}
1837+ raw_files = []
1838+ core_props = None
1839+
1840+ # On utilise un ThreadPool pour éviter le coût de sérialisation Pickle
1841+ # lxml libère le GIL, donc c'est très efficace
1842+ with ThreadPoolExecutor () as executor :
1843+ futures = []
1844+
1845+ with zipfile .ZipFile (epc_file_io , "r" ) as epc_file :
1846+ # On récupère l'index d'abord
1847+ ct_path = get_epc_content_type_path ()
1848+ content_type_obj = read_energyml_xml_bytes (epc_file .read (ct_path ))
1849+
1850+ # Identification des types via le ContentTypes
1851+ energyml_paths = {}
1852+ for ov in content_type_obj .override :
1853+ path = ov .part_name .lstrip ("/\\ " )
1854+ if is_energyml_content_type (ov .content_type ):
1855+ energyml_paths [path ] = ov .content_type
1856+ elif get_class_from_content_type (ov .content_type ) == CoreProperties :
1857+ core_props = read_energyml_xml_bytes (epc_file .read (path ), CoreProperties )
1858+
1859+ for info in epc_file .infolist ():
1860+ fname = info .filename
1861+
1862+ # STREAMING : On lance la tâche dès qu'on a les bytes
1863+ if fname in energyml_paths :
1864+ data = epc_file .read (fname )
1865+ f = executor .submit (_parallel_xml_read , data , energyml_paths [fname ])
1866+ futures .append ((f , "OBJ" , fname ))
1867+
1868+ elif read_rels_from_files and fname .lower ().endswith (".rels" ):
1869+ data = epc_file .read (fname )
1870+ f = executor .submit (_parallel_rels_read , data )
1871+ futures .append ((f , "REL" , fname ))
1872+ elif (
1873+ not fname .lower ().endswith (".rels" )
1874+ and not fname .lower ().endswith (gen_core_props_path ().lower ())
1875+ and fname not in energyml_paths
1876+ and fname != ct_path
1877+ ):
1878+ raw_files .append (RawFile (path = fname , content = BytesIO (epc_file .read (fname ))))
1879+
1880+ # 2. Récupération des résultats (pendant que le ZIP continue d'être lu si possible)
1881+ for future , kind , path in futures :
1882+ res = future .result ()
1883+ if isinstance (res , Exception ):
1884+ continue
1885+
1886+ if kind == "OBJ" :
1887+ path_to_obj [path ] = res
1888+ obj_list .append (res )
1889+ else :
1890+ o_path = str (Path (path ).parent .parent / Path (path ).stem ).replace ("\\ " , "/" )
1891+ rels_content_map [o_path ] = res
1892+
1893+ # 3. Assemblage final dans le processus parent
1894+ epc = Epc (energyml_objects = EnergymlObjectCollection (obj_list ), raw_files = raw_files , core_props = core_props )
1895+
1896+ if read_rels_from_files :
1897+ for obj_path , rels_obj in rels_content_map .items ():
1898+ if obj_path in path_to_obj :
1899+ target_obj = path_to_obj [obj_path ]
1900+ epc ._rels_cache .set_rels_from_file (target_obj , rels_obj ) # type: ignore
1901+
1902+ if recompute_rels :
1903+ epc ._rels_cache .recompute_cache () # type: ignore
1904+
1905+ return epc
1906+
18161907
18171908# ______ __ ____ __ _
18181909# / ____/___ ___ _________ ___ ______ ___ / / / __/_ ______ _____/ /_(_)___ ____ _____
0 commit comments