11use bencher:: { benchmark_group, benchmark_main} ;
22
33use bencher:: Bencher ;
4+ use lazy_static:: lazy_static;
45use tempdir:: TempDir ;
6+ use tempfile:: tempfile;
57
68use std:: fs;
79use std:: path:: Path ;
10+ use std:: sync:: { Arc , Mutex } ;
811
912use zip:: result:: ZipResult ;
13+ use zip:: write:: ZipWriter ;
1014use zip:: ZipArchive ;
1115
12- #[ cfg( all( feature = "parallelism" , feature = "bzip2" , unix) ) ]
16+ #[ cfg( all( feature = "parallelism" , unix) ) ]
1317use zip:: read:: { split_extract, ExtractionParameters } ;
1418
15- #[ cfg( feature = "parallelism" ) ]
16- use num_cpus;
17-
1819/* This archive has a set of entries repeated 20x:
1920 * - 200K random data, stored uncompressed (CompressionMethod::Stored)
2021 * - 246K text data (the project gutenberg html version of king lear)
2122 * (CompressionMethod::Bzip2, compression level 1) (project gutenberg ebooks are public domain)
2223 *
2324 * The full archive file is 5.3MB.
2425 */
25- fn get_test_archive ( ) -> ZipResult < ZipArchive < fs:: File > > {
26+ fn static_test_archive ( ) -> ZipResult < ZipArchive < fs:: File > > {
27+ assert ! (
28+ cfg!( feature = "bzip2" ) ,
29+ "this test archive requires bzip2 support"
30+ ) ;
2631 let path =
2732 Path :: new ( env ! ( "CARGO_MANIFEST_DIR" ) ) . join ( "tests/data/stored-and-compressed-text.zip" ) ;
2833 let file = fs:: File :: open ( path) ?;
2934 ZipArchive :: new ( file)
3035}
3136
32- fn extract_basic ( bench : & mut Bencher ) {
33- let mut readable_archive = get_test_archive ( ) . unwrap ( ) ;
34- let total_size: u64 = readable_archive
35- . decompressed_size ( )
36- . unwrap ( )
37- . try_into ( )
38- . unwrap ( ) ;
37+ lazy_static ! {
38+ static ref STATIC_TEST_ARCHIVE : Arc <Mutex <ZipArchive <fs:: File >>> = {
39+ let archive = static_test_archive( ) . unwrap( ) ;
40+ Arc :: new( Mutex :: new( archive) )
41+ } ;
42+ }
43+
44+ /* This archive is generated dynamically, in order to scale with the number of reported CPUs.
45+ * - We want at least 768 files (4 per VCPU on EC2 *.48xlarge instances) to run in CI.
46+ * - We want to retain the interspersed random/text entries from static_test_archive().
47+ *
48+ * We will copy over entries from the static archive repeatedly until we reach the desired file
49+ * count.
50+ */
51+ fn dynamic_test_archive ( src_archive : & mut ZipArchive < fs:: File > ) -> ZipResult < ZipArchive < fs:: File > > {
52+ let desired_num_entries: usize = num_cpus:: get ( ) * 4 ;
53+ let mut output_archive = ZipWriter :: new ( tempfile ( ) ?) ;
54+
55+ for ( src_index, output_index) in ( 0 ..src_archive. len ( ) ) . cycle ( ) . zip ( 0 ..desired_num_entries) {
56+ let src_file = src_archive. by_index_raw ( src_index) ?;
57+ let output_name = if src_file. name ( ) . starts_with ( "random-" ) {
58+ format ! ( "random-{output_index}.dat" )
59+ } else {
60+ assert ! ( src_file. name( ) . starts_with( "text-" ) ) ;
61+ format ! ( "text-{output_index}.dat" )
62+ } ;
63+ output_archive. raw_copy_file_rename ( src_file, output_name) ?;
64+ }
65+
66+ output_archive. finish_into_readable ( )
67+ }
68+
69+ lazy_static ! {
70+ static ref DYNAMIC_TEST_ARCHIVE : Arc <Mutex <ZipArchive <fs:: File >>> = {
71+ let mut src = STATIC_TEST_ARCHIVE . lock( ) . unwrap( ) ;
72+ let archive = dynamic_test_archive( & mut src) . unwrap( ) ;
73+ Arc :: new( Mutex :: new( archive) )
74+ } ;
75+ }
76+
77+ fn do_extract_basic ( bench : & mut Bencher , archive : & mut ZipArchive < fs:: File > ) {
78+ let total_size: u64 = archive. decompressed_size ( ) . unwrap ( ) . try_into ( ) . unwrap ( ) ;
3979
4080 let parent = TempDir :: new ( "zip-extract" ) . unwrap ( ) ;
4181
@@ -45,19 +85,24 @@ fn extract_basic(bench: &mut Bencher) {
4585 let outdir = TempDir :: new_in ( parent. path ( ) , "bench-subdir" )
4686 . unwrap ( )
4787 . into_path ( ) ;
48- readable_archive . extract ( outdir) . unwrap ( ) ;
88+ archive . extract ( outdir) . unwrap ( ) ;
4989 } ) ;
5090 } ) ;
5191}
5292
53- #[ cfg( all( feature = "parallelism" , feature = "bzip2" , unix) ) ]
54- fn extract_split ( bench : & mut Bencher ) {
55- let readable_archive = get_test_archive ( ) . unwrap ( ) ;
56- let total_size: u64 = readable_archive
57- . decompressed_size ( )
58- . unwrap ( )
59- . try_into ( )
60- . unwrap ( ) ;
93+ fn extract_basic_static ( bench : & mut Bencher ) {
94+ let mut archive = STATIC_TEST_ARCHIVE . lock ( ) . unwrap ( ) ;
95+ do_extract_basic ( bench, & mut archive) ;
96+ }
97+
98+ fn extract_basic_dynamic ( bench : & mut Bencher ) {
99+ let mut archive = DYNAMIC_TEST_ARCHIVE . lock ( ) . unwrap ( ) ;
100+ do_extract_basic ( bench, & mut archive) ;
101+ }
102+
103+ #[ cfg( all( feature = "parallelism" , unix) ) ]
104+ fn do_extract_split ( bench : & mut Bencher , archive : & ZipArchive < fs:: File > ) {
105+ let total_size: u64 = archive. decompressed_size ( ) . unwrap ( ) . try_into ( ) . unwrap ( ) ;
61106
62107 let params = ExtractionParameters {
63108 decompression_threads : num_cpus:: get ( ) / 3 ,
@@ -72,15 +117,33 @@ fn extract_split(bench: &mut Bencher) {
72117 let outdir = TempDir :: new_in ( parent. path ( ) , "bench-subdir" )
73118 . unwrap ( )
74119 . into_path ( ) ;
75- split_extract ( & readable_archive , & outdir, params. clone ( ) ) . unwrap ( ) ;
120+ split_extract ( archive , & outdir, params. clone ( ) ) . unwrap ( ) ;
76121 } ) ;
77122 } ) ;
78123}
79124
80- #[ cfg( not( all( feature = "parallelism" , feature = "bzip2" , unix) ) ) ]
81- benchmark_group ! ( benches, extract_basic) ;
125+ #[ cfg( all( feature = "parallelism" , unix) ) ]
126+ fn extract_split_static ( bench : & mut Bencher ) {
127+ let archive = STATIC_TEST_ARCHIVE . lock ( ) . unwrap ( ) ;
128+ do_extract_split ( bench, & archive) ;
129+ }
130+
131+ #[ cfg( all( feature = "parallelism" , unix) ) ]
132+ fn extract_split_dynamic ( bench : & mut Bencher ) {
133+ let archive = DYNAMIC_TEST_ARCHIVE . lock ( ) . unwrap ( ) ;
134+ do_extract_split ( bench, & archive) ;
135+ }
82136
83- #[ cfg( all( feature = "parallelism" , feature = "bzip2" , unix) ) ]
84- benchmark_group ! ( benches, extract_basic, extract_split) ;
137+ #[ cfg( not( all( feature = "parallelism" , unix) ) ) ]
138+ benchmark_group ! ( benches, extract_basic_static, extract_basic_dynamic) ;
139+
140+ #[ cfg( all( feature = "parallelism" , unix) ) ]
141+ benchmark_group ! (
142+ benches,
143+ extract_basic_static,
144+ extract_basic_dynamic,
145+ extract_split_static,
146+ extract_split_dynamic
147+ ) ;
85148
86149benchmark_main ! ( benches) ;
0 commit comments