Skip to content

Commit 845383c

Browse files
committed
SAM background producer
1 parent 32e2e05 commit 845383c

2 files changed

Lines changed: 55 additions & 20 deletions

File tree

loaders/sam_into_sqlite.cc

Lines changed: 54 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@
1818
using namespace std;
1919

2020
// auto-destructing htslib kstring_t
21-
unique_ptr<kstring_t, void (*)(kstring_t *)> kstringXX() {
22-
return unique_ptr<kstring_t, void (*)(kstring_t *)>(new kstring_t{0, 0, 0}, [](kstring_t *p) {
21+
shared_ptr<kstring_t> kstringXX() {
22+
return shared_ptr<kstring_t>(new kstring_t{0, 0, 0}, [](kstring_t *p) {
2323
free(p->s);
2424
delete p;
2525
});
@@ -108,6 +108,50 @@ int write_tags_json(const map<string, int> &readgroups, const vector<char *> &sa
108108
return rg_id;
109109
}
110110

111+
struct SamItem {
112+
shared_ptr<kstring_t> line;
113+
vector<char *> fields;
114+
shared_ptr<bam1_t> rec;
115+
};
116+
117+
class SamReader : public BackgroundProducer<SamItem> {
118+
public:
119+
SamReader(samFile *sam, bam_hdr_t *hdr, int ringsize)
120+
: BackgroundProducer<SamItem>(ringsize), sam_(sam), hdr_(hdr) {}
121+
122+
private:
123+
samFile *sam_;
124+
bam_hdr_t *hdr_;
125+
126+
bool Produce(SamItem &it) override {
127+
if (!it.line) {
128+
it.line = kstringXX();
129+
}
130+
it.line->l = 0;
131+
it.fields.clear();
132+
if (!it.rec) {
133+
it.rec = shared_ptr<bam1_t>(bam_init1(), &bam_destroy1);
134+
}
135+
int ret = sam_read1(sam_, hdr_, it.rec.get());
136+
if (ret == -1) {
137+
return false;
138+
} else if (ret < 0) {
139+
throw runtime_error("SAM parser error: sam_read1() -> " + to_string(ret));
140+
}
141+
// Load the tab-split SAM line too, as some fields are easier to access that way
142+
ret = sam_format1(hdr_, it.rec.get(), it.line.get());
143+
if (ret < 0) {
144+
throw runtime_error("Corrupt SAM record; sam_format1() -> " + to_string(ret));
145+
}
146+
split(it.line->s, '\t', back_inserter(it.fields));
147+
if (it.fields.size() < 11) {
148+
throw runtime_error("Corrupt SAM record; fields.size() = " +
149+
to_string(it.fields.size()));
150+
}
151+
return true;
152+
}
153+
};
154+
111155
void help() {
112156
cout << "sam_into_sqlite: import SAM/BAM/CRAM into GenomicSQLite database" << '\n'
113157
<< GIT_REVISION << " " << __DATE__ << '\n'
@@ -270,19 +314,12 @@ int main(int argc, char *argv[]) {
270314

271315
// stream bam1_t records
272316
progress &&cerr << "inserting reads...";
273-
unique_ptr<bam1_t, void (*)(bam1_t *)> rec(bam_init1(), bam_destroy1);
317+
SamReader reader(sam.get(), hdr.get(), 64);
274318
OStringStream cigarstr, tagsbuf;
275-
auto sam_line = kstringXX();
276-
vector<char *> sam_fields;
277-
int rc;
278-
while ((rc = sam_read1(sam.get(), hdr.get(), rec.get())) >= 0) {
279-
// Load the tab-split SAM line too, as some fields are easier to access that way
280-
if (sam_format1(hdr.get(), rec.get(), sam_line.get()) < 0) {
281-
throw runtime_error("corrupt SAM");
282-
}
283-
sam_fields.clear();
284-
split(sam_line->s, '\t', back_inserter(sam_fields));
285-
assert(sam_fields.size() >= 11);
319+
while (reader.next()) {
320+
SamItem &it = reader.item();
321+
bam1_t *rec = it.rec.get();
322+
auto &sam_fields = it.fields;
286323

287324
insert_read.reset();
288325
insert_read.clearBindings();
@@ -292,7 +329,7 @@ int main(int argc, char *argv[]) {
292329
insert_read.bind(2, rec->core.tid); // rid
293330
if (rec->core.pos >= 0) {
294331
insert_read.bind(3, rec->core.pos); // pos
295-
auto endpos = bam_endpos(rec.get());
332+
auto endpos = bam_endpos(rec);
296333
if (endpos >= rec->core.pos) {
297334
insert_read.bind(4, endpos); // endpos
298335
}
@@ -322,7 +359,7 @@ int main(int argc, char *argv[]) {
322359
insert_seqs.reset();
323360
insert_seqs.clearBindings();
324361
insert_seqs.bind(1, rowid);
325-
insert_seqs.bindNoCopy(2, bam_get_qname(rec.get())); // qname
362+
insert_seqs.bindNoCopy(2, bam_get_qname(rec)); // qname
326363
char *seq = sam_fields[9];
327364
if (*seq && strcmp(seq, "*"))
328365
insert_seqs.bindNoCopy(3, seq); // seq
@@ -336,9 +373,7 @@ int main(int argc, char *argv[]) {
336373
insert_tags.bind(2, tagsbuf.Get()); // tags_json
337374
insert_tags.exec();
338375
}
339-
if (rc != -1) {
340-
throw std::runtime_error("error reading SAM records");
341-
}
376+
progress &&cerr << reader.log() << endl;
342377

343378
// create indices
344379
if (gri) {

loaders/vcf_into_sqlite.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -527,7 +527,7 @@ class BCFReader : public BackgroundProducer<shared_ptr<bcf1_t>> {
527527
return false;
528528
} else if (ret != 0 || it->errcode) {
529529
ostringstream msg;
530-
msg << "VCF parser failed: bcf1_read() -> " << ret
530+
msg << "VCF parser error: bcf1_read() -> " << ret
531531
<< ", bcf1_t::errcode = " << it->errcode;
532532
throw runtime_error(msg.str());
533533
}

0 commit comments

Comments
 (0)