@@ -393,7 +393,7 @@ const sqlite3_int64 GRI_BIN_OFFSETS[] = {
393393 1 + 16 + 256 + 4096 + 65536 + 1048576 + 16777216 ,
394394 1 + 16 + 256 + 4096 + 65336 + 1048576 + 16777216 + 268435456 ,
395395};
396- const sqlite_int64 GRI_POS_OFFSETS[] = {
396+ const sqlite3_int64 GRI_POS_OFFSETS[] = {
397397 0 , 134217728 , 8388608 , 524288 , 32768 , 2048 , 128 , 8 , 0 ,
398398};
399399const sqlite3_int64 GRI_BIN_COUNT = GRI_BIN_OFFSETS[GRI_MAX_LEVEL] + 4294967296LL ;
@@ -495,66 +495,88 @@ static void sqlfn_create_genomic_range_index_sql(sqlite3_context *ctx, int argc,
495495 SQL_WRAPPER (CreateGenomicRangeIndexSQL (schema_table, rid, beg, end, max_depth))
496496}
497497
498- struct gri_properties {
498+ static int gri_bin_depth (sqlite3_int64 bin) {
499+ assert (bin >= 0 && bin < GRI_BIN_COUNT);
500+ for (int lv = 0 ; lv < GRI_MAX_LEVEL; ++lv) {
501+ if (bin < GRI_BIN_OFFSETS[lv + 1 ]) {
502+ return lv;
503+ }
504+ }
505+ return GRI_MAX_LEVEL;
506+ }
507+
508+ struct gri_depth_range_t {
499509 int min_depth = 0 , max_depth = GRI_MAX_LEVEL;
500510};
501511
502- static gri_properties InspectGRI (sqlite3 *dbconn, const string &schema_table) {
503- // find the range of nonempty bin levels [min_depth..max_depth]
504- gri_properties ans;
512+ static gri_depth_range_t DetectDepthRange (sqlite3 *dbconn, const string &schema_table) {
505513 string table = split_schema_table (schema_table).second ;
506- // convoluted query ensures it "skip-scans" the index without requiring ANALYZE
507- string query = " SELECT _rowid_ FROM " + schema_table + " INDEXED BY " + table +
508- " __gri WHERE _gri_rid IN (SELECT DISTINCT _gri_rid FROM " + schema_table +
509- " INDEXED BY " + table + " __gri) AND _gri_bin >= ? LIMIT 1" ;
514+
515+ // Detect min & max bin depth (level) occupied in the table's GRI. Since bin numbers increase
516+ // with depth, we can find the min and max bin numbers & then figure their respective depths.
517+ //
518+ // We'd like to write simply SELECT MIN(_gri_bin), MAX(_gri_bin) ... and trust SQLite to plan
519+ // an efficient skip-scan of the GRI on (_gri_rid, _gri_bin, ...). Unfortunately it doesn't do
520+ // that, so instead we write some convoluted SQL that forces the efficient plan.
521+ //
522+ // This consists of --
523+ // (i) recursive CTE to find the set of relevant _gri_rid (because even
524+ // SELECT DISTINCT _gri_rid ... triggers a full index scan)
525+ // (ii) for each _gri_rid: pick out the min/max bins with ORDER BY _gri_bin [DESC] LIMIT 1
526+ // (iii) min() and max() over the per-rid answers
527+ // We do the (iii) aggregation externally to ensure SQLite only does one pass through the index
528+
529+ string tbl_gri = schema_table + " INDEXED BY " + table + " __gri" ;
530+ string query =
531+ " WITH RECURSIVE __distinct(__rid) AS\n "
532+ " (SELECT (SELECT _gri_rid FROM " +
533+ tbl_gri +
534+ " ORDER BY _gri_rid NULLS LAST LIMIT 1) AS __rid_0 WHERE __rid_0 IS NOT NULL\n "
535+ " UNION ALL\n "
536+ " SELECT (SELECT _gri_rid FROM " +
537+ tbl_gri +
538+ " WHERE _gri_rid > __rid ORDER BY _gri_rid LIMIT 1) AS __rid_i FROM __distinct WHERE __rid_i IS NOT NULL)\n "
539+ " SELECT\n "
540+ " (SELECT _gri_bin FROM " +
541+ tbl_gri +
542+ " WHERE _gri_rid = __rid AND _gri_bin >= 0 ORDER BY _gri_rid, _gri_bin LIMIT 1),\n "
543+ " (SELECT _gri_bin FROM " +
544+ tbl_gri +
545+ " WHERE _gri_rid = __rid AND _gri_bin >= 0 ORDER BY _gri_rid DESC, _gri_bin DESC LIMIT 1)\n "
546+ " FROM __distinct" ;
547+ _DBG << endl << query << endl;
510548 shared_ptr<sqlite3_stmt> stmt;
511549 {
512550 sqlite3_stmt *pStmt = nullptr ;
513551 if (sqlite3_prepare_v3 (dbconn, query.c_str (), -1 , 0 , &pStmt, nullptr ) != SQLITE_OK) {
552+ throw runtime_error (sqlite3_errmsg (dbconn));
514553 throw runtime_error (" GenomicSQLite: table has no genomic range index" );
515554 }
516555 stmt = shared_ptr<sqlite3_stmt>(pStmt, sqlite3_finalize);
517556 }
518- for (ans.max_depth = GRI_MAX_LEVEL; ans.max_depth > 0 ; --(ans.max_depth )) {
519- if (sqlite3_bind_int64 (stmt.get (), 1 , GRI_BIN_OFFSETS[ans.max_depth ]) != SQLITE_OK) {
520- throw runtime_error (" GenomicSQLite: error inspecting genomic range index" );
521- }
522- int rc = sqlite3_step (stmt.get ());
523- if (rc == SQLITE_ROW && sqlite3_column_type (stmt.get (), 0 ) == SQLITE_INTEGER) {
524- break ;
557+
558+ sqlite3_int64 min_bin = GRI_BIN_COUNT, max_bin = -1 ;
559+ int rc;
560+ while ((rc = sqlite3_step (stmt.get ())) == SQLITE_ROW) {
561+ if (sqlite3_column_type (stmt.get (), 0 ) == SQLITE_INTEGER) {
562+ min_bin = min (min_bin, sqlite3_column_int64 (stmt.get (), 0 ));
525563 }
526- if ((rc != SQLITE_ROW && rc != SQLITE_DONE) || sqlite3_reset ( stmt.get ()) != SQLITE_OK ) {
527- throw runtime_error ( " GenomicSQLite: error inspecting genomic range index " );
564+ if (sqlite3_column_type ( stmt.get (), 1 ) == SQLITE_INTEGER ) {
565+ max_bin = max (max_bin, sqlite3_column_int64 (stmt. get (), 1 ) );
528566 }
529567 }
568+ if (rc != SQLITE_DONE) {
569+ throw runtime_error (" GenomicSQLite: error inspecting genomic range index" );
570+ }
530571
531- stmt.reset ();
532- query = " SELECT _rowid_ FROM " + schema_table + " INDEXED BY " + table +
533- " __gri WHERE _gri_rid IN (SELECT DISTINCT _gri_rid FROM " + schema_table +
534- " INDEXED BY " + table + " __gri) AND _gri_bin < ? LIMIT 1" ;
535- {
536- sqlite3_stmt *pStmt = nullptr ;
537- if (sqlite3_prepare_v3 (dbconn, query.c_str (), -1 , 0 , &pStmt, nullptr ) != SQLITE_OK) {
538- throw runtime_error (" GenomicSQLite: table has no genomic range index" );
539- }
540- stmt = shared_ptr<sqlite3_stmt>(pStmt, sqlite3_finalize);
572+ // set min/max depth based on min/max bin
573+ gri_depth_range_t ans;
574+ if (min_bin < GRI_BIN_COUNT) {
575+ ans.min_depth = gri_bin_depth (min_bin);
541576 }
542- for (ans.min_depth = 0 ; ans.min_depth < ans.max_depth ; ++(ans.min_depth )) {
543- if (sqlite3_bind_int64 (stmt.get (), 1 ,
544- (ans.min_depth < GRI_MAX_LEVEL) ? GRI_BIN_OFFSETS[ans.min_depth + 1 ]
545- : GRI_BIN_COUNT) != SQLITE_OK) {
546- throw runtime_error (" GenomicSQLite: error inspecting genomic range index" );
547- }
548- int rc = sqlite3_step (stmt.get ());
549- if (rc == SQLITE_ROW && sqlite3_column_type (stmt.get (), 0 ) == SQLITE_INTEGER) {
550- break ;
551- }
552- if (rc != SQLITE_ROW && rc != SQLITE_DONE && rc != SQLITE_OK ||
553- sqlite3_reset (stmt.get ()) != SQLITE_OK) {
554- throw runtime_error (" GenomicSQLite: error inspecting genomic range index" );
555- }
577+ if (max_bin >= 0 ) {
578+ ans.max_depth = gri_bin_depth (max_bin);
556579 }
557-
558580 assert (ans.min_depth >= 0 && ans.min_depth <= ans.max_depth && ans.max_depth < GRI_LEVELS);
559581 return ans;
560582}
@@ -585,9 +607,9 @@ static string FilterTerm(const string &indexed_table, const string &qbegs, const
585607
586608string GenomicRangeRowidsSQL (const string &indexed_table, sqlite3 *dbconn, const string &qrid,
587609 const string &qbeg, const string &qend) {
588- gri_properties table_gri;
610+ gri_depth_range_t table_gri;
589611 if (dbconn) {
590- table_gri = InspectGRI (dbconn, indexed_table);
612+ table_gri = DetectDepthRange (dbconn, indexed_table);
591613 }
592614 string table = split_schema_table (indexed_table).second ;
593615
0 commit comments