11import os
22import sqlite3
33import subprocess
4+ import random
45import pytest
56import genomicsqlite
67
@@ -51,6 +52,9 @@ def genomicsqlite_txdb(txdb):
5152
5253def test_txdbquery (genomicsqlite_txdb ):
5354 conn = genomicsqlite .connect (genomicsqlite_txdb , read_only = True )
55+ assert next (conn .execute ("PRAGMA page_size" ))[0 ] == 16384
56+
57+ # one query
5458 results = list (
5559 t [0 ]
5660 for t in conn .execute (
@@ -65,4 +69,42 @@ def test_txdbquery(genomicsqlite_txdb):
6569 ["ENST00000416293.7" , "ENST00000261733.7" , "ENST00000548536.1" , "ENST00000549106.1" ]
6670 )
6771
68- assert next (conn .execute ("PRAGMA page_size" ))[0 ] == 16384
72+ # random queries
73+ chroms = list (
74+ conn .execute (
75+ "SELECT tx_chrom, length FROM (SELECT tx_chrom, MAX(tx_end) AS length FROM transcript GROUP BY tx_chrom) WHERE length > 1000000"
76+ )
77+ )
78+
79+ random .seed (0xBADF00D )
80+ for tbl in ("transcript" , "cds" ):
81+ query = genomicsqlite .genomic_range_rowids_sql (conn , tbl )[1 :- 1 ]
82+ pfx = "tx" if tbl == "transcript" else tbl
83+ control_query = f"SELECT _rowid_ FROM { tbl } NOT INDEXED WHERE { pfx } _chrom = ? AND NOT ({ pfx } _end < ? OR { pfx } _start > ?) ORDER BY _rowid_"
84+
85+ total_results = 0
86+ for _ in range (1000 ):
87+ chrom = random .choice (chroms )
88+ beg = random .randint (0 , chrom [1 ] - 65536 )
89+ end = beg + random .randint (1 , random .choice ([16 , 256 , 4096 , 65536 ]))
90+ ids = list (row [0 ] for row in conn .execute (query , (chrom [0 ], beg , end )))
91+ control_ids = list (row [0 ] for row in conn .execute (control_query , (chrom [0 ], beg , end )))
92+ assert ids == control_ids
93+ total_results += len (control_ids )
94+ assert total_results in (3802 , 1341 )
95+
96+ # join cds to exon
97+ cds_exon_counts = (
98+ "SELECT cds._rowid_ AS cds_id, COUNT(exon._rowid_) AS containing_exons FROM cds, exon WHERE exon._rowid_ IN "
99+ + genomicsqlite .genomic_range_rowids_sql (conn , "exon" , "cds_chrom" , "cds_start" , "cds_end" )
100+ + " AND cds_start >= exon_start and cds_end <= exon_end GROUP BY cds._rowid_"
101+ )
102+ cds_exon_count_hist = list (
103+ conn .execute (
104+ f"SELECT containing_exons, count(cds_id) AS cds_count FROM ({ cds_exon_counts } ) GROUP BY containing_exons ORDER BY containing_exons"
105+ )
106+ )
107+ for elt in cds_exon_count_hist :
108+ print (elt )
109+ assert cds_exon_count_hist [0 ] == (1 , 168266 )
110+ assert cds_exon_count_hist [1 ] == (2 , 71159 )
0 commit comments