Skip to content

Commit 076ac7e

Browse files
committed
ld & scalability tweaks
1 parent 4cbab83 commit 076ac7e

5 files changed

Lines changed: 15 additions & 11 deletions

File tree

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,5 +49,5 @@ add_subdirectory(loaders)
4949
if(CMAKE_PROJECT_NAME STREQUAL PROJECT_NAME)
5050
include(CTest)
5151
enable_testing()
52-
add_test(NAME pytest COMMAND env PYTHONPATH=$PYTHONPATH:${CMAKE_CURRENT_SOURCE_DIR}/bindings/python LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${CMAKE_BINARY_DIR} python3 -m pytest -xv -n 4 --tb=short ${CMAKE_CURRENT_SOURCE_DIR}/test)
52+
add_test(NAME pytest COMMAND env PYTHONPATH=$PYTHONPATH:${CMAKE_CURRENT_SOURCE_DIR}/bindings/python LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${CMAKE_BINARY_DIR} GENOMICSQLITE_SYSTEM_LIBRARY=1 python3 -m pytest -xv -n 4 --tb=short ${CMAKE_CURRENT_SOURCE_DIR}/test)
5353
endif()

bindings/python/genomicsqlite/__init__.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,16 +10,18 @@
1010
from typing import Optional, NamedTuple, Dict, Any
1111
from ctypes.util import find_library
1212

13-
HERE = os.path.dirname(__file__)
13+
_HERE = os.path.dirname(__file__)
14+
_YES = ("1", "true", "t", "yes", "y")
1415

15-
# One-time global initialization -- locate shared library file; preferably the copy installed with
16-
# this package, otherwise look in the usual places.
16+
# Module initialization -- locate shared library file; preferably the copy installed with this
17+
# package, otherwise look in the usual places.
1718
_DLL = None
18-
if platform.system() == "Linux" and os.path.isfile(os.path.join(HERE, "libgenomicsqlite.so")):
19-
_DLL = os.path.join(HERE, "libgenomicsqlite.so")
19+
if os.environ.get("GENOMICSQLITE_SYSTEM_LIBRARY", "").strip().lower() not in _YES:
20+
if platform.system() == "Linux" and os.path.isfile(os.path.join(_HERE, "libgenomicsqlite.so")):
21+
_DLL = os.path.join(_HERE, "libgenomicsqlite.so")
2022
if not _DLL:
2123
_DLL = find_library("genomicsqlite")
22-
assert _DLL, "couldn't locate genomicsqlite shared-library file"
24+
assert _DLL, "Unable to locate genomicsqlite shared-library file"
2325
# open a dummy connection to :memory: just for loading the extension.
2426
_MEMCONN = sqlite3.connect(":memory:")
2527
_MEMCONN.enable_load_extension(True)

loaders/sam_into_sqlite.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,7 @@ int main(int argc, char *argv[]) {
191191
hts_set_threads(sam.get(), max(2U, thread::hardware_concurrency() / 4));
192192
unique_ptr<sam_hdr_t, void (*)(sam_hdr_t *)> hdr(sam_hdr_read(sam.get()), &sam_hdr_destroy);
193193
if (!hdr) {
194-
cerr << "sam_into_sqlite: failed reading VCF header from " << infilename << '\n';
194+
cerr << "sam_into_sqlite: failed reading SAM header from " << infilename << '\n';
195195
return 1;
196196
}
197197

src/genomicsqlite.cc

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,7 @@ string GenomicSQLiteURI(const string &dbfile, const string &config_json = "") {
136136
uri << "file:" << dbfile << "?vfs=zstd";
137137
uri << "&threads=" << to_string(threads);
138138
uri << "&outer_page_size=" << to_string(outer_page_KiB * 1024);
139+
uri << "&outer_cache_size=-65536"; // enlarge to hold index b-tree pages for large db's
139140
uri << "&level=" << to_string(zstd_level);
140141
if (unsafe_load) {
141142
uri << "&outer_unsafe";
@@ -237,7 +238,8 @@ string GenomicSQLiteTuningSQL(const string &config_json, const string &schema =
237238
}
238239
map<string, string> pragmas;
239240
pragmas[schema_prefix + "cache_size"] = to_string(-1024 * page_cache_MiB);
240-
pragmas["threads"] = to_string(threads >= 0 ? threads : thread::hardware_concurrency());
241+
pragmas["threads"] =
242+
to_string(threads >= 0 ? threads : std::min(8, (int)thread::hardware_concurrency()));
241243
if (unsafe_load) {
242244
pragmas[schema_prefix + "journal_mode"] = "OFF";
243245
pragmas[schema_prefix + "synchronous"] = "OFF";

test/genomicsqlite_big_tests.wdl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -130,9 +130,9 @@ task test_sam {
130130
131131
# add a QNAME-sorted seqs table
132132
chmod +x /usr/lib/python3.8/genomicsqlite.py
133-
time /usr/lib/python3.8/genomicsqlite.py "~{dbname}" "PRAGMA cache_size=-4194304; CREATE TABLE reads_seqs_by_qname AS SELECT * from reads_seqs NOT INDEXED ORDER BY qname"
133+
time /usr/lib/python3.8/genomicsqlite.py "~{dbname}" "PRAGMA journal_mode=off; PRAGMA synchronous=off; PRAGMA cache_size=-4194304; CREATE TABLE reads_seqs_by_qname AS SELECT * from reads_seqs NOT INDEXED ORDER BY qname"
134134
>&2 ls -l "~{dbname}"
135-
time /usr/lib/python3.8/genomicsqlite.py "~{dbname}" "PRAGMA journal_mode=off; DROP TABLE reads_seqs_by_qname"
135+
time /usr/lib/python3.8/genomicsqlite.py "~{dbname}" "PRAGMA journal_mode=off; PRAGMA synchronous=off; DROP TABLE reads_seqs_by_qname"
136136
>&2 ls -l "~{dbname}"
137137
>>>
138138

0 commit comments

Comments
 (0)