1818using namespace std ;
1919
2020// auto-destructing htslib kstring_t
21- unique_ptr <kstring_t , void (*)( kstring_t *) > kstringXX () {
22- return unique_ptr <kstring_t , void (*)( kstring_t *) >(new kstring_t {0 , 0 , 0 }, [](kstring_t *p) {
21+ shared_ptr <kstring_t > kstringXX () {
22+ return shared_ptr <kstring_t >(new kstring_t {0 , 0 , 0 }, [](kstring_t *p) {
2323 free (p->s );
2424 delete p;
2525 });
@@ -108,6 +108,50 @@ int write_tags_json(const map<string, int> &readgroups, const vector<char *> &sa
108108 return rg_id;
109109}
110110
111+ struct SamItem {
112+ shared_ptr<kstring_t > line;
113+ vector<char *> fields;
114+ shared_ptr<bam1_t > rec;
115+ };
116+
117+ class SamReader : public BackgroundProducer <SamItem> {
118+ public:
119+ SamReader (samFile *sam, bam_hdr_t *hdr, int ringsize)
120+ : BackgroundProducer<SamItem>(ringsize), sam_(sam), hdr_(hdr) {}
121+
122+ private:
123+ samFile *sam_;
124+ bam_hdr_t *hdr_;
125+
126+ bool Produce (SamItem &it) override {
127+ if (!it.line ) {
128+ it.line = kstringXX ();
129+ }
130+ it.line ->l = 0 ;
131+ it.fields .clear ();
132+ if (!it.rec ) {
133+ it.rec = shared_ptr<bam1_t >(bam_init1 (), &bam_destroy1);
134+ }
135+ int ret = sam_read1 (sam_, hdr_, it.rec .get ());
136+ if (ret == -1 ) {
137+ return false ;
138+ } else if (ret < 0 ) {
139+ throw runtime_error (" SAM parser error: sam_read1() -> " + to_string (ret));
140+ }
141+ // Load the tab-split SAM line too, as some fields are easier to access that way
142+ ret = sam_format1 (hdr_, it.rec .get (), it.line .get ());
143+ if (ret < 0 ) {
144+ throw runtime_error (" Corrupt SAM record; sam_format1() -> " + to_string (ret));
145+ }
146+ split (it.line ->s , ' \t ' , back_inserter (it.fields ));
147+ if (it.fields .size () < 11 ) {
148+ throw runtime_error (" Corrupt SAM record; fields.size() = " +
149+ to_string (it.fields .size ()));
150+ }
151+ return true ;
152+ }
153+ };
154+
111155void help () {
112156 cout << " sam_into_sqlite: import SAM/BAM/CRAM into GenomicSQLite database" << ' \n '
113157 << GIT_REVISION << " " << __DATE__ << ' \n '
@@ -270,19 +314,12 @@ int main(int argc, char *argv[]) {
270314
271315 // stream bam1_t records
272316 progress &&cerr << " inserting reads..." ;
273- unique_ptr< bam1_t , void (*)( bam1_t *)> rec ( bam_init1 ( ), bam_destroy1 );
317+ SamReader reader (sam. get (), hdr. get ( ), 64 );
274318 OStringStream cigarstr, tagsbuf;
275- auto sam_line = kstringXX ();
276- vector<char *> sam_fields;
277- int rc;
278- while ((rc = sam_read1 (sam.get (), hdr.get (), rec.get ())) >= 0 ) {
279- // Load the tab-split SAM line too, as some fields are easier to access that way
280- if (sam_format1 (hdr.get (), rec.get (), sam_line.get ()) < 0 ) {
281- throw runtime_error (" corrupt SAM" );
282- }
283- sam_fields.clear ();
284- split (sam_line->s , ' \t ' , back_inserter (sam_fields));
285- assert (sam_fields.size () >= 11 );
319+ while (reader.next ()) {
320+ SamItem &it = reader.item ();
321+ bam1_t *rec = it.rec .get ();
322+ auto &sam_fields = it.fields ;
286323
287324 insert_read.reset ();
288325 insert_read.clearBindings ();
@@ -292,7 +329,7 @@ int main(int argc, char *argv[]) {
292329 insert_read.bind (2 , rec->core .tid ); // rid
293330 if (rec->core .pos >= 0 ) {
294331 insert_read.bind (3 , rec->core .pos ); // pos
295- auto endpos = bam_endpos (rec. get () );
332+ auto endpos = bam_endpos (rec);
296333 if (endpos >= rec->core .pos ) {
297334 insert_read.bind (4 , endpos); // endpos
298335 }
@@ -322,7 +359,7 @@ int main(int argc, char *argv[]) {
322359 insert_seqs.reset ();
323360 insert_seqs.clearBindings ();
324361 insert_seqs.bind (1 , rowid);
325- insert_seqs.bindNoCopy (2 , bam_get_qname (rec. get () )); // qname
362+ insert_seqs.bindNoCopy (2 , bam_get_qname (rec)); // qname
326363 char *seq = sam_fields[9 ];
327364 if (*seq && strcmp (seq, " *" ))
328365 insert_seqs.bindNoCopy (3 , seq); // seq
@@ -336,9 +373,7 @@ int main(int argc, char *argv[]) {
336373 insert_tags.bind (2 , tagsbuf.Get ()); // tags_json
337374 insert_tags.exec ();
338375 }
339- if (rc != -1 ) {
340- throw std::runtime_error (" error reading SAM records" );
341- }
376+ progress &&cerr << reader.log () << endl;
342377
343378 // create indices
344379 if (gri) {
0 commit comments