77 * - array values are stored as JSON
88 */
99#include " common.hpp"
10- #include < atomic>
11- #include < chrono>
1210#include < cmath>
1311#include < functional>
1412#include < getopt.h>
1513#include < htslib/vcf.h>
1614#include < map>
1715#include < set>
1816#include < sstream>
19- #include < thread>
2017#include < unistd.h>
21- #include < vector>
2218
2319// unpack each bcf_hrec_t with the key type (e.g. INFO, FORMAT) into an easier-to-use map
2420vector<map<string, string>> extract_hrecs (bcf_hdr_t *hdr, const char *key,
@@ -513,98 +509,33 @@ void insert_genotypes(bcf_hdr_t *hdr, bcf1_t *rec, vector<map<string, string>> &
513509}
514510
515511// stream BCF records using background thread
516- class BCFReader {
512+ class BCFReader : public BackgroundProducer <shared_ptr<bcf1_t >> {
513+ public:
514+ BCFReader (vcfFile *vcf, bcf_hdr_t *hdr, int ringsize)
515+ : BackgroundProducer<shared_ptr<bcf1_t >>(ringsize), vcf_(vcf), hdr_(hdr) {}
516+
517+ private:
517518 vcfFile *vcf_;
518519 bcf_hdr_t *hdr_;
519520
520- vector<unique_ptr<bcf1_t , void (*)(bcf1_t *)>> ring_;
521- unique_ptr<thread> worker_;
522- atomic<bool > stop_;
523- int err_ = 0 , errcode_ = 0 ;
524- atomic<long long > p_, // produced
525- c_; // if c_>0 then item (c_-1)%R is currently being consumed
526- chrono::duration<double > p_spin_ = chrono::duration<double >::zero(),
527- c_spin_ = chrono::duration<double >::zero();
528- chrono::time_point<chrono::high_resolution_clock> t0_;
529-
530- void background () {
531- t0_ = chrono::high_resolution_clock::now ();
532- auto R = ring_.size ();
533- do {
534- assert (p_ >= c_);
535- bcf1_t *rec = ring_[p_ % R].get ();
536- int ret = bcf_read (vcf_, hdr_, rec);
537- if (ret != 0 ) {
538- if (ret != -1 || rec->errcode ) {
539- err_ = ret;
540- errcode_ = rec->errcode ;
541- }
542- stop_ = true ;
543- } else {
544- ret = bcf_unpack (rec, BCF_UN_ALL);
545- if (ret != 0 ) {
546- err_ = ret;
547- stop_ = true ;
548- } else {
549- ++p_;
550- }
551- }
552- auto t_spin = chrono::high_resolution_clock::now ();
553- for (int i = 0 ; !stop_ && p_ - max (c_.load (), 1LL ) == R - 1 ; ++i) {
554- this_thread::sleep_for (chrono::milliseconds (1 ));
555- }
556- p_spin_ += chrono::high_resolution_clock::now () - t_spin;
557- } while (!stop_);
558- }
559-
560- public:
561- BCFReader (vcfFile *vcf, bcf_hdr_t *hdr, int ringsize) : vcf_(vcf), hdr_(hdr) {
562- assert (ringsize > 1 );
563- stop_ = false ;
564- p_ = 0 ;
565- c_ = 0 ;
566- for (int i = 0 ; i < ringsize; i++) {
567- ring_.emplace_back (bcf_init (), &bcf_destroy);
521+ bool Produce (shared_ptr<bcf1_t > &it) override {
522+ if (!it) {
523+ it = shared_ptr<bcf1_t >(bcf_init (), &bcf_destroy);
568524 }
569- }
570-
571- bcf1_t *read () {
572- if (!worker_) {
573- worker_.reset (new thread ([this ]() { this ->background (); }));
574- }
575- auto t_spin = chrono::high_resolution_clock::now ();
576- while (!stop_ && p_ == c_)
577- this_thread::yield ();
578- c_spin_ += chrono::high_resolution_clock::now () - t_spin;
579- if (stop_) {
580- if (err_ || errcode_) {
581- worker_->join ();
582- ostringstream msg;
583- msg << " vcf_into_sqlite: failed reading VCF; bcf_read() -> " << err_
584- << " bcf1_t::errcode = " << errcode_ << ' \n ' ;
585- throw runtime_error (msg.str ());
586- }
587- if (c_ == p_) {
588- worker_->join ();
589- return nullptr ;
590- }
525+ int ret = bcf_read (vcf_, hdr_, it.get ());
526+ if (ret == -1 ) {
527+ return false ;
528+ } else if (ret != 0 || it->errcode ) {
529+ ostringstream msg;
530+ msg << " VCF parser failed: bcf1_read() -> " << ret
531+ << " , bcf1_t::errcode = " << it->errcode ;
532+ throw runtime_error (msg.str ());
591533 }
592- assert (c_ < p_);
593- return ring_[c_++ % ring_.size ()].get ();
594- }
595-
596- void cancel () {
597- stop_ = true ;
598- if (worker_) {
599- worker_->join ();
534+ ret = bcf_unpack (it.get (), BCF_UN_ALL);
535+ if (ret != 0 ) {
536+ throw runtime_error (" Corrupt VCF/BCF record; bcf_unpack() -> " + to_string (ret));
600537 }
601- }
602-
603- void log () {
604- chrono::duration<double > elapsed = chrono::high_resolution_clock::now () - t0_;
605- cerr << c_ << " record(s) processed in " << elapsed.count () << " s"
606- << " ; producer thread spun for " << p_spin_.count () << " s"
607- << " ; consumer thread spun for " << c_spin_.count () << " s" << endl;
538+ return true ;
608539 }
609540};
610541
@@ -778,8 +709,9 @@ int main(int argc, char *argv[]) {
778709 << (format_hrecs.empty () ? " ..." : " & genotypes..." ) << endl;
779710
780711 BCFReader reader (vcf.get (), hdr.get (), 64 );
781- bcf1_t *rec;
782- while ((rec = reader.read ())) {
712+ while (reader.next ()) {
713+ bcf1_t *rec = reader.item ().get ();
714+ assert (rec);
783715 try {
784716 insert_variant (hdr.get (), rec, info_hrecs, *insert_variant_stmt);
785717 if (!format_hrecs.empty ()) {
@@ -791,11 +723,11 @@ int main(int argc, char *argv[]) {
791723 *insert_genotype_stmt);
792724 }
793725 } catch (exception &exn) {
794- reader.cancel ();
726+ reader.abort ();
795727 throw exn;
796728 }
797729 }
798- progress && ( reader.log (), true ) ;
730+ progress &&cerr << reader.log () << endl ;
799731
800732 // create GRI
801733 if (gri) {
0 commit comments