Skip to content

Commit 0018e52

Browse files
committed
TxDb integration test
1 parent 23a6206 commit 0018e52

2 files changed

Lines changed: 44 additions & 2 deletions

File tree

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,4 +37,4 @@ repos:
3737
files: \.py$
3838
verbose: true
3939
entry: env PYTHONPATH=bindings/python pylint
40-
args: [-d, "bad-continuation,global-statement,missing-docstring,missing-module-docstring,line-too-long,too-many-arguments,duplicate-code,redefined-outer-name"]
40+
args: [-d, "bad-continuation,global-statement,missing-docstring,missing-module-docstring,line-too-long,too-many-arguments,duplicate-code,redefined-outer-name,too-many-locals"]

test/test_txdb.py

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import os
22
import sqlite3
33
import subprocess
4+
import random
45
import pytest
56
import genomicsqlite
67

@@ -51,6 +52,9 @@ def genomicsqlite_txdb(txdb):
5152

5253
def test_txdbquery(genomicsqlite_txdb):
5354
conn = genomicsqlite.connect(genomicsqlite_txdb, read_only=True)
55+
assert next(conn.execute("PRAGMA page_size"))[0] == 16384
56+
57+
# one query
5458
results = list(
5559
t[0]
5660
for t in conn.execute(
@@ -65,4 +69,42 @@ def test_txdbquery(genomicsqlite_txdb):
6569
["ENST00000416293.7", "ENST00000261733.7", "ENST00000548536.1", "ENST00000549106.1"]
6670
)
6771

68-
assert next(conn.execute("PRAGMA page_size"))[0] == 16384
72+
# random queries
73+
chroms = list(
74+
conn.execute(
75+
"SELECT tx_chrom, length FROM (SELECT tx_chrom, MAX(tx_end) AS length FROM transcript GROUP BY tx_chrom) WHERE length > 1000000"
76+
)
77+
)
78+
79+
random.seed(0xBADF00D)
80+
for tbl in ("transcript", "cds"):
81+
query = genomicsqlite.genomic_range_rowids_sql(conn, tbl)[1:-1]
82+
pfx = "tx" if tbl == "transcript" else tbl
83+
control_query = f"SELECT _rowid_ FROM {tbl} NOT INDEXED WHERE {pfx}_chrom = ? AND NOT ({pfx}_end < ? OR {pfx}_start > ?) ORDER BY _rowid_"
84+
85+
total_results = 0
86+
for _ in range(1000):
87+
chrom = random.choice(chroms)
88+
beg = random.randint(0, chrom[1] - 65536)
89+
end = beg + random.randint(1, random.choice([16, 256, 4096, 65536]))
90+
ids = list(row[0] for row in conn.execute(query, (chrom[0], beg, end)))
91+
control_ids = list(row[0] for row in conn.execute(control_query, (chrom[0], beg, end)))
92+
assert ids == control_ids
93+
total_results += len(control_ids)
94+
assert total_results in (3802, 1341)
95+
96+
# join cds to exon
97+
cds_exon_counts = (
98+
"SELECT cds._rowid_ AS cds_id, COUNT(exon._rowid_) AS containing_exons FROM cds, exon WHERE exon._rowid_ IN "
99+
+ genomicsqlite.genomic_range_rowids_sql(conn, "exon", "cds_chrom", "cds_start", "cds_end")
100+
+ " AND cds_start >= exon_start and cds_end <= exon_end GROUP BY cds._rowid_"
101+
)
102+
cds_exon_count_hist = list(
103+
conn.execute(
104+
f"SELECT containing_exons, count(cds_id) AS cds_count FROM ({cds_exon_counts}) GROUP BY containing_exons ORDER BY containing_exons"
105+
)
106+
)
107+
for elt in cds_exon_count_hist:
108+
print(elt)
109+
assert cds_exon_count_hist[0] == (1, 168266)
110+
assert cds_exon_count_hist[1] == (2, 71159)

0 commit comments

Comments
 (0)