Skip to content

Commit b605170

Browse files
committed
page size tuning experiment
1 parent 1e24c4a commit b605170

4 files changed

Lines changed: 103 additions & 4 deletions

File tree

loaders/sam_into_sqlite.cc

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,8 @@ void help() {
175175
<< " --table-prefix PREFIX prefix to the name of each table created" << '\n'
176176
<< " --no-gri skip genomic range indexing" << '\n'
177177
<< " --no-qname-index skip QNAME indexing" << '\n'
178+
<< " --inner-page-KiB N inner page size; one of {1,2,4,8,16,32,64}" << '\n'
179+
<< " --outer-page-KiB N outer page size; one of {1,2,4,8,16,32,64}" << '\n'
178180
<< " -l,--level LEVEL database compression level (-7 to 22, default 6)" << '\n'
179181
<< " -q,--quiet suppress progress information on standard error" << '\n'
180182
<< " -h,--help show this help message" << '\n'
@@ -184,18 +186,20 @@ void help() {
184186
int main(int argc, char *argv[]) {
185187
string table_prefix, infilename, outfilename;
186188
bool gri = true, qname_idx = true, progress = true;
187-
int level = 6;
189+
int level = 6, inner_page_KiB = 16, outer_page_KiB = 32;
188190

189191
static struct option long_options[] = {{"help", no_argument, 0, 'h'},
190192
{"quiet", no_argument, 0, 'q'},
191193
{"level", required_argument, 0, 'l'},
194+
{"inner-page-KiB", required_argument, 0, 'I'},
195+
{"outer-page-KiB", required_argument, 0, 'O'},
192196
{"table-prefix", required_argument, 0, 't'},
193197
{"no-gri", no_argument, 0, 'G'},
194198
{"no-qname-index", no_argument, 0, 'Q'},
195199
{0, 0, 0, 0}};
196200

197201
int c;
198-
while (-1 != (c = getopt_long(argc, argv, "hqQGl:t:", long_options, nullptr))) {
202+
while (-1 != (c = getopt_long(argc, argv, "hqQGl:t:I:O:", long_options, nullptr))) {
199203
switch (c) {
200204
case 'h':
201205
help();
@@ -216,7 +220,23 @@ int main(int argc, char *argv[]) {
216220
errno = 0;
217221
level = strtol(optarg, nullptr, 10);
218222
if (errno || level < -7 || level > 22) {
219-
cerr << "sam_into_sqlite: couldn't parse --level in [-7,22]";
223+
cerr << "sam_into_sqlite: couldn't parse --level in [-7,22]" << endl;
224+
return -1;
225+
}
226+
break;
227+
case 'I':
228+
errno = 0;
229+
inner_page_KiB = strtol(optarg, nullptr, 10);
230+
if (errno || inner_page_KiB < 1 || inner_page_KiB > 64) {
231+
cerr << "sam_into_sqlite: invalid --inner-page-KiB" << endl;
232+
return -1;
233+
}
234+
break;
235+
case 'O':
236+
errno = 0;
237+
outer_page_KiB = strtol(optarg, nullptr, 10);
238+
if (errno || outer_page_KiB < 1 || outer_page_KiB > 64) {
239+
cerr << "sam_into_sqlite: invalid --outer-page-KiB" << endl;
220240
return -1;
221241
}
222242
break;
@@ -257,9 +277,12 @@ int main(int argc, char *argv[]) {
257277
// open output database
258278
sqlite3_config(SQLITE_CONFIG_MEMSTATUS, 0);
259279
sqlite3_config(SQLITE_CONFIG_LOOKASIDE, 2048, 128);
280+
string config_json = R"({"unsafe_load": true, "zstd_level":)" + to_string(level) +
281+
R"(,"inner_page_KiB":)" + to_string(inner_page_KiB) +
282+
R"(,"outer_page_KiB":)" + to_string(outer_page_KiB) + "}";
260283
auto db = GenomicSQLiteOpen(
261284
outfilename, SQLITE_OPEN_CREATE | SQLITE_OPEN_READWRITE | SQLITE_OPEN_NOMUTEX,
262-
R"( {"unsafe_load": true, "zstd_level": )" + to_string(level) + "}");
285+
config_json);
263286
#ifndef NDEBUG
264287
db->exec("PRAGMA foreign_keys=ON");
265288
#endif

test/compression_tuning/Dockerfile

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
FROM ubuntu:20.04
2+
ENV DEBIAN_FRONTEND=noninteractive
3+
RUN apt-get -qq update && apt-get install -y samtools zstd

test/compression_tuning/Makefile

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
all:
2+
docker build -t genomicsqlite_compression_tuning .
3+
miniwdl run run.wdl \
4+
trial.docker=genomicsqlite_compression_tuning \
5+
trial.bam=../data/NA12878.chr21:20000000-22500000.bam \
6+
trial.sam_into_sqlite=../../build/loaders/sam_into_sqlite \
7+
trial.libgenomicsqlite_so=../../build/libgenomicsqlite.so \
8+
--verbose --dir /tmp
9+
10+
.PHONY: all

test/compression_tuning/run.wdl

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
version 1.0
2+
3+
struct Result {
4+
Int inner_page_KiB
5+
Int outer_page_KiB
6+
Int db_size
7+
Int efficiency
8+
}
9+
10+
workflow genomicsqlite_compression_tuning {
11+
input {}
12+
Array[Int] page_sizes = [4, 8, 16, 32, 64]
13+
scatter (inner_page_KiB in page_sizes) {
14+
scatter(outer_page_KiB in page_sizes) {
15+
call trial {
16+
input:
17+
inner_page_KiB = inner_page_KiB,
18+
outer_page_KiB = outer_page_KiB
19+
}
20+
Result res = object {
21+
inner_page_KiB: inner_page_KiB,
22+
outer_page_KiB: outer_page_KiB,
23+
db_size: trial.db_size,
24+
efficiency: trial.efficiency
25+
}
26+
}
27+
}
28+
29+
output {
30+
Array[Result] results = flatten(res)
31+
}
32+
}
33+
34+
task trial {
35+
input {
36+
File bam
37+
File sam_into_sqlite
38+
File libgenomicsqlite_so
39+
Int inner_page_KiB
40+
Int outer_page_KiB
41+
String docker
42+
}
43+
44+
command <<<
45+
set -euxo pipefail
46+
cp ~{libgenomicsqlite_so} libgenomicsqlite.so
47+
cp ~{sam_into_sqlite} sam_into_sqlite
48+
chmod +x sam_into_sqlite
49+
50+
LD_LIBRARY_PATH=$(pwd) ./sam_into_sqlite --inner-page-KiB ~{inner_page_KiB} --outer-page-KiB ~{outer_page_KiB} ~{bam} reads.db
51+
zstd -6 -q reads.db -o reads.db.zst
52+
>>>
53+
54+
runtime {
55+
docker: docker
56+
cpu: 4
57+
}
58+
59+
output {
60+
Int db_size = round(size("reads.db"))
61+
Int efficiency = round(100.0*size("reads.db.zst")/size("reads.db"))
62+
}
63+
}

0 commit comments

Comments
 (0)