@@ -1148,7 +1148,7 @@ extern "C" int nucleotides_twobit(const char *seq, size_t len, void *out) {
11481148 return -2 ;
11491149 }
11501150 assert (c_i >= 0 && c_i < 128 );
1151- const unsigned char crumb = dna_crumb_table[c_i];
1151+ const unsigned char crumb = dna_crumb_table[( unsigned char ) c_i];
11521152 if (crumb > 3 ) {
11531153 return -1 ;
11541154 }
@@ -1432,6 +1432,68 @@ static void sqlfn_twobit_rna(sqlite3_context *ctx, int argc, sqlite3_value **arg
14321432 twobit_nucleotides (ctx, argc, argv, true );
14331433}
14341434
1435+ /*
1436+ complement = [0xFF for i in range(256)]
1437+ for l,r in (
1438+ ('A','T'), ('C','G'), ('G','C'), ('T','A'),
1439+ ('a','t'), ('c','g'), ('g','c'), ('t','a'),
1440+ ):
1441+ complement[ord(l)] = r
1442+
1443+ print(complement)
1444+ */
1445+ const unsigned char dna_complement_table[] = {
1446+ 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF ,
1447+ 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF ,
1448+ 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF ,
1449+ 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF ,
1450+ 0xFF , ' T' , 0xFF , ' G' , 0xFF , 0xFF , 0xFF , ' C' , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF ,
1451+ 0xFF , 0xFF , 0xFF , 0xFF , ' A' , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF ,
1452+ 0xFF , ' t' , 0xFF , ' g' , 0xFF , 0xFF , 0xFF , ' c' , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF ,
1453+ 0xFF , 0xFF , 0xFF , 0xFF , ' a' , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF ,
1454+ 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF ,
1455+ 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF ,
1456+ 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF ,
1457+ 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF ,
1458+ 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF ,
1459+ 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF ,
1460+ 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF ,
1461+ 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF , 0xFF };
1462+
1463+ extern " C" int dna_revcomp (const char *dna, size_t len, char *out) {
1464+ for (; len; --len, ++out)
1465+ if ((*out = dna_complement_table[(unsigned char )dna[len - 1 ]]) == 0xFF )
1466+ return -1 ;
1467+ *out = 0 ;
1468+ return 0 ;
1469+ }
1470+
1471+ static void sqlfn_dna_revcomp (sqlite3_context *ctx, int argc, sqlite3_value **argv) {
1472+ assert (argc == 1 );
1473+ const char *seq = nullptr ;
1474+ ARG_TEXT_OPTIONAL (seq, 0 );
1475+ if (!seq) {
1476+ return sqlite3_value_type (argv[0 ]) == SQLITE_NULL ? sqlite3_result_null (ctx)
1477+ : sqlite3_result_error_nomem (ctx);
1478+ }
1479+
1480+ auto seqlen = sqlite3_value_bytes (argv[0 ]);
1481+ assert (seqlen >= 0 );
1482+ if (seqlen <= 0 ) {
1483+ return sqlite3_result_value (ctx, argv[0 ]);
1484+ }
1485+
1486+ try {
1487+ std::unique_ptr<char []> buf (new char [seqlen + 1 ]);
1488+ if (dna_revcomp (seq, seqlen, buf.get ()) < 0 ) {
1489+ return sqlite3_result_error (ctx, " non-DNA input to dna_revcomp()" , -1 );
1490+ }
1491+ return sqlite3_result_text (ctx, buf.get (), seqlen, SQLITE_TRANSIENT);
1492+ } catch (std::bad_alloc &) {
1493+ return sqlite3_result_error_nomem (ctx);
1494+ }
1495+ }
1496+
14351497/* *************************************************************************************************
14361498 * parse_genomic_range_*()
14371499 **************************************************************************************************/
@@ -1458,7 +1520,7 @@ static uint64_t parse_genomic_range_pos(const string &txt, size_t ofs1, size_t o
14581520 return ans;
14591521}
14601522
1461- static std::tuple<string, uint64_t , uint64_t > parse_genomic_range (const string &txt) {
1523+ std::tuple<string, uint64_t , uint64_t > parse_genomic_range (const string &txt) {
14621524 auto p1 = txt.find (' :' );
14631525 auto p2 = txt.find (' -' );
14641526 if (p1 == string::npos || p2 == string::npos || p1 < 1 || p2 < p1 + 2 || p2 >= txt.size () - 1 ) {
@@ -1482,10 +1544,14 @@ static std::tuple<string, uint64_t, uint64_t> parse_genomic_range(const string &
14821544
14831545static void sqlfn_parse_genomic_range_sequence (sqlite3_context *ctx, int argc,
14841546 sqlite3_value **argv) {
1485- string txt;
1486- ARG_TEXT (txt, 0 );
1547+ const char *txt = nullptr ;
1548+ ARG_TEXT_OPTIONAL (txt, 0 );
1549+ if (!txt) {
1550+ return sqlite3_value_type (argv[0 ]) == SQLITE_NULL ? sqlite3_result_null (ctx)
1551+ : sqlite3_result_error_nomem (ctx);
1552+ }
14871553 try {
1488- auto t = parse_genomic_range (txt);
1554+ auto t = parse_genomic_range (string ( txt, sqlite3_value_bytes (argv[ 0 ])) );
14891555 auto &chrom = get<0 >(t);
14901556 return sqlite3_result_text (ctx, chrom.c_str (), chrom.size (), SQLITE_TRANSIENT);
14911557 } catch (std::exception &exn) {
@@ -1494,21 +1560,29 @@ static void sqlfn_parse_genomic_range_sequence(sqlite3_context *ctx, int argc,
14941560}
14951561
14961562static void sqlfn_parse_genomic_range_begin (sqlite3_context *ctx, int argc, sqlite3_value **argv) {
1497- string txt;
1498- ARG_TEXT (txt, 0 );
1563+ const char *txt = nullptr ;
1564+ ARG_TEXT_OPTIONAL (txt, 0 );
1565+ if (!txt) {
1566+ return sqlite3_value_type (argv[0 ]) == SQLITE_NULL ? sqlite3_result_null (ctx)
1567+ : sqlite3_result_error_nomem (ctx);
1568+ }
14991569 try {
1500- auto t = parse_genomic_range (txt);
1570+ auto t = parse_genomic_range (string ( txt, sqlite3_value_bytes (argv[ 0 ])) );
15011571 return sqlite3_result_int64 (ctx, get<1 >(t));
15021572 } catch (std::exception &exn) {
15031573 sqlite3_result_error (ctx, exn.what (), -1 );
15041574 }
15051575}
15061576
15071577static void sqlfn_parse_genomic_range_end (sqlite3_context *ctx, int argc, sqlite3_value **argv) {
1508- string txt;
1509- ARG_TEXT (txt, 0 );
1578+ const char *txt = nullptr ;
1579+ ARG_TEXT_OPTIONAL (txt, 0 );
1580+ if (!txt) {
1581+ return sqlite3_value_type (argv[0 ]) == SQLITE_NULL ? sqlite3_result_null (ctx)
1582+ : sqlite3_result_error_nomem (ctx);
1583+ }
15101584 try {
1511- auto t = parse_genomic_range (txt);
1585+ auto t = parse_genomic_range (string ( txt, sqlite3_value_bytes (argv[ 0 ])) );
15121586 return sqlite3_result_int64 (ctx, get<2 >(t));
15131587 } catch (std::exception &exn) {
15141588 sqlite3_result_error (ctx, exn.what (), -1 );
@@ -1563,6 +1637,7 @@ static int register_genomicsqlite_functions(sqlite3 *db, const char **pzErrMsg,
15631637 {FPNM (twobit_rna), 1 , SQLITE_DETERMINISTIC},
15641638 {FPNM (twobit_rna), 2 , SQLITE_DETERMINISTIC},
15651639 {FPNM (twobit_rna), 3 , SQLITE_DETERMINISTIC},
1640+ {FPNM (dna_revcomp), 1 , SQLITE_DETERMINISTIC},
15661641 {FPNM (parse_genomic_range_sequence), 1 , SQLITE_DETERMINISTIC},
15671642 {FPNM (parse_genomic_range_begin), 1 , SQLITE_DETERMINISTIC},
15681643 {FPNM (parse_genomic_range_end), 1 , SQLITE_DETERMINISTIC}};
0 commit comments