Skip to content

Commit 51dc522

Browse files
committed
vcf_into_sqlite page size options
1 parent f4b4096 commit 51dc522

2 files changed

Lines changed: 27 additions & 4 deletions

File tree

loaders/vcf_into_sqlite.cc

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -552,6 +552,8 @@ void help() {
552552
<< " --genotypes-without-rowid make the genotypes table WITHOUT ROWID (advantageous if the FORMAT fields aren't too large)"
553553
<< '\n'
554554
<< " --no-gri skip genomic range indexing" << '\n'
555+
<< " --inner-page-KiB N inner page size; one of {1,2,4,8,16,32,64}" << '\n'
556+
<< " --outer-page-KiB N outer page size; one of {1,2,4,8,16,32,64}" << '\n'
555557
<< " -l,--level LEVEL database compression level (-7 to 22, default 6)" << '\n'
556558
<< " -q,--quiet suppress progress information on standard error" << '\n'
557559
<< " -h,--help show this help message" << '\n'
@@ -561,10 +563,12 @@ void help() {
561563
int main(int argc, char *argv[]) {
562564
string table_prefix, infilename, outfilename;
563565
bool gri = true, progress = true, genotypes_without_rowid = false;
564-
int level = 6, ploidy = 2;
566+
int level = 6, ploidy = 2, inner_page_KiB = 16, outer_page_KiB = 32;
565567

566568
static struct option long_options[] = {{"help", no_argument, 0, 'h'},
567569
{"quiet", no_argument, 0, 'q'},
570+
{"inner-page-KiB", required_argument, 0, 'I'},
571+
{"outer-page-KiB", required_argument, 0, 'O'},
568572
{"level", required_argument, 0, 'l'},
569573
{"ploidy", required_argument, 0, 'p'},
570574
{"genotypes-without-rowid", no_argument, 0, 'R'},
@@ -598,6 +602,22 @@ int main(int argc, char *argv[]) {
598602
return -1;
599603
}
600604
break;
605+
case 'I':
606+
errno = 0;
607+
inner_page_KiB = strtol(optarg, nullptr, 10);
608+
if (errno || inner_page_KiB < 1 || inner_page_KiB > 64) {
609+
cerr << "vcf_into_sqlite: invalid --inner-page-KiB" << endl;
610+
return -1;
611+
}
612+
break;
613+
case 'O':
614+
errno = 0;
615+
outer_page_KiB = strtol(optarg, nullptr, 10);
616+
if (errno || outer_page_KiB < 1 || outer_page_KiB > 64) {
617+
cerr << "vcf_into_sqlite: invalid --outer-page-KiB" << endl;
618+
return -1;
619+
}
620+
break;
601621
case 'l':
602622
errno = 0;
603623
level = strtol(optarg, nullptr, 10);
@@ -649,9 +669,12 @@ int main(int argc, char *argv[]) {
649669
// open output database
650670
sqlite3_config(SQLITE_CONFIG_MEMSTATUS, 0);
651671
sqlite3_config(SQLITE_CONFIG_LOOKASIDE, 2048, 128);
672+
string config_json = R"({"unsafe_load": true, "zstd_level":)" + to_string(level) +
673+
R"(,"inner_page_KiB":)" + to_string(inner_page_KiB) +
674+
R"(,"outer_page_KiB":)" + to_string(outer_page_KiB) + "}";
652675
auto db = GenomicSQLiteOpen(
653676
outfilename, SQLITE_OPEN_CREATE | SQLITE_OPEN_READWRITE | SQLITE_OPEN_NOMUTEX,
654-
R"( {"unsafe_load": true, "zstd_level": )" + to_string(level) + "}");
677+
config_json);
655678
#ifndef NDEBUG
656679
db->exec("PRAGMA foreign_keys=ON");
657680
#endif

test/genomicsqlite_big_tests.wdl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ task test_sam {
109109
>&2 ls -l "$reads_file"
110110
111111
# load database
112-
time sam_into_sqlite "$reads_file" "~{dbname}"
112+
time sam_into_sqlite --inner-page-KiB 64 --outer-page-KiB 2 "$reads_file" "~{dbname}"
113113
>&2 ls -l "~{dbname}"
114114
115115
# GRI query
@@ -170,7 +170,7 @@ task test_vcf {
170170
chmod +x /usr/local/bin/vcf_into_sqlite
171171
172172
# load database
173-
time vcf_into_sqlite --genotypes-without-rowid "~{variants}" "~{dbname}"
173+
time vcf_into_sqlite --inner-page-KiB 64 --outer-page-KiB 2 --genotypes-without-rowid "~{variants}" "~{dbname}"
174174
175175
# GRI query
176176
time python3 - <<"EOF"

0 commit comments

Comments
 (0)