diff --git a/src/Matcher.cpp b/src/Matcher.cpp index e19d3ec..2758781 100644 --- a/src/Matcher.cpp +++ b/src/Matcher.cpp @@ -28,7 +28,10 @@ MatchGroup::MatchGroup(int _num_samples, double _cm_position) : num_samples(_num_samples), cm_position(_cm_position) { for (int i = 0; i < num_samples; i++) { + // For each sample, we have a mapping match->score assigning a + // matching score to each candidate closest cousin match_candidates_counts.push_back(std::unordered_map()); + match_candidates[i] = {}; } } @@ -44,6 +47,38 @@ MatchGroup::MatchGroup(const std::vector& target_ids, } } +MatchGroupDifference::MatchGroupDifference(const MatchGroup& prev, const MatchGroup& next, const int _site) + : site(_site) { + // added = std::unordered_map>(); + // removed = std::unordered_map>(); + if (prev.cm_position >= next.cm_position) { + throw std::runtime_error("Match group position out of order"); + } + if (prev.num_samples != next.num_samples) { + throw std::runtime_error("Incompatible match groups"); + } + for (int i = 0; i < prev.num_samples; i++) { + // "added" is "next_group - prev_group" + std::unordered_set added_i(next.match_candidates.at(i)); + for (auto s : prev.match_candidates.at(i)) { + added_i.erase(s); + } + + // "removed" is "prev_group - next_group" + std::unordered_set removed_i(prev.match_candidates.at(i)); + for (auto s : next.match_candidates.at(i)) { + removed_i.erase(s); + } + + if (added_i.size() > 0) { + added[i] = added_i; + } + if (removed_i.size() > 0) { + removed[i] = removed_i; + } + } +} + void MatchGroup::filter_matches(int min_matches) { // First set the candidates for this group // For sequences of high index, we lower the number of sequences used to save time and memory @@ -56,6 +91,8 @@ void MatchGroup::filter_matches(int min_matches) { } else if (i < 1000) { for (auto counts : match_candidates_counts.at(i)) { + // TODO: should move this min(2, min_matches) to the "i<100" clause + // and have this identical with the 10,000 one if (counts.second >= std::min(2, min_matches)) { match_candidates.at(i).insert(counts.first); } @@ -180,11 +217,20 @@ Matcher::Matcher(int _n, const std::vector& _genetic_positions, double _ std::cout << "Will use " << query_sites.size() << " query sites and " << match_group_sites.size() << " match_group_sites" << std::endl; - match_groups.reserve(match_group_sites.size()); - for (int match_group_site : match_group_sites) { - match_groups.emplace_back(num_samples, genetic_positions[match_group_site]); - } + // once "current_group" has been constructed, we can process "prev_group" + // and compute the match_group_diff + current_group = MatchGroup(num_samples, genetic_positions[0]); + prev_group = MatchGroup(num_samples, genetic_positions[0] - 1); + prevprev_group = MatchGroup(num_samples, genetic_positions[0] - 2); + match_diffs.reserve(match_group_sites.size()); + + // match_groups.reserve(match_group_sites.size()); + // for (int match_group_site : match_group_sites) { + // match_groups.emplace_back(num_samples, genetic_positions[match_group_site]); + // } + + // PBWT quantities sorting.reserve(num_samples); next_sorting.reserve(num_samples); permutation.reserve(num_samples); @@ -235,8 +281,34 @@ void Matcher::process_site(const std::vector& genotype) { // Threading-neighbor queries if (match_group_idx < (static_cast(match_group_sites.size()) - 1) && (sites_processed >= match_group_sites.at(match_group_idx + 1))) { + // std::cout << "MATCH GROUP\n"; + // std::cout << match_group_idx << "\n"; + // Process all matches for this group + // std::cout << "filtering\n"; + current_group.filter_matches(min_matches); + // int total_current = 0; + // for (int k = 0; k < num_samples; k++) { + // total_current += current_group.match_candidates.at(k).size(); + // } + // std::cout << "TOTAL CURRENT: " << total_current << "\n"; + + // Share top matches for adjacent groups + if (match_group_idx > 0) { + // std::cout << "tops\n"; + prev_group.insert_tops_from(current_group); + current_group.insert_tops_from(prev_group); + // std::cout << "matchdiff\n"; + match_diffs.emplace_back(prevprev_group, prev_group, match_group_sites[match_group_idx - 1]); + } + + // std::cout << "increment\n"; + prevprev_group = prev_group; + prev_group = current_group; match_group_idx++; - match_groups.at(match_group_idx - 1).filter_matches(min_matches); + + // std::cout << "new matchgroup\n"; + current_group = MatchGroup(num_samples, genetic_positions[match_group_sites[match_group_idx]]); + // std::cout << "done\n"; } // If we've reached a query site, query @@ -274,7 +346,7 @@ void Matcher::process_site(const std::vector& genotype) { } for (int m : matches) { std::unordered_map& mmmap = - match_groups.at(match_group_idx).match_candidates_counts.at(i); + current_group.match_candidates_counts.at(i); if (m >= i) { throw std::runtime_error("Illegal match candidate " + std::to_string(m) + ", something is very wrong"); @@ -290,7 +362,12 @@ void Matcher::process_site(const std::vector& genotype) { // Special case for last query if (next_query_site_idx == static_cast(query_sites.size())) { - match_groups.at(match_group_sites.size() - 1).filter_matches(min_matches); + // match_groups.at(match_group_sites.size() - 1).filter_matches(min_matches); + current_group.filter_matches(min_matches); + prev_group.insert_tops_from(current_group); + current_group.insert_tops_from(prev_group); + match_diffs.emplace_back(prevprev_group, prev_group, match_group_sites[match_group_idx - 1]); + match_diffs.emplace_back(prev_group, current_group, match_group_sites[match_group_idx]); } } sites_processed++; @@ -298,46 +375,77 @@ void Matcher::process_site(const std::vector& genotype) { // Propagate top 4 matches from left and right match groups void Matcher::propagate_adjacent_matches() { - for (int i = 1; i < static_cast(match_groups.size()); i++) { - MatchGroup& group = match_groups.at(i); - MatchGroup& prev = match_groups.at(i - 1); - group.insert_tops_from(prev); - prev.insert_tops_from(group); - } + // for (int i = 1; i < static_cast(match_groups.size()); i++) { + // MatchGroup& group = match_groups.at(i); + // MatchGroup& prev = match_groups.at(i - 1); + // group.insert_tops_from(prev); + // prev.insert_tops_from(group); + // } } -std::vector Matcher::get_matches() { - return match_groups; -} +// std::vector Matcher::get_matches() { +// return match_groups; +// } -// This returns a list (groups) of lists (targets) of sets (matches) -std::vector>> -Matcher::serializable_matches(std::vector& target_ids) { - std::vector>> serialized_matches(match_groups.size()); - int group_counter = 0; - for (MatchGroup& match_group : match_groups) { - std::vector> current_group_matches(target_ids.size()); - int match_counter = 0; - for (int target_id : target_ids) { - current_group_matches[match_counter] = std::move(match_group.match_candidates.at(target_id)); - match_group.match_candidates.at(target_id).clear(); - match_counter++; +// This returns a list (match-group sites) of lists (probands) of sets (matches) +// std::vector>> +// Matcher::serializable_matches(std::vector& target_ids) { +// std::vector>> serialized_matches(match_groups.size()); +// int group_counter = 0; +// for (MatchGroup& match_group : match_groups) { +// std::vector> current_group_matches(target_ids.size()); +// int match_counter = 0; +// for (int target_id : target_ids) { +// current_group_matches[match_counter] = std::move(match_group.match_candidates.at(target_id)); +// match_group.match_candidates.at(target_id).clear(); +// match_counter++; +// } +// serialized_matches[group_counter] = std::move(current_group_matches); +// group_counter++; +// } +// return serialized_matches; +// } + +// This returns a list of uint-quadruples: +// sample_id: the sample this entry refers to +// target_id: the closest cousin candidate +// added/removed: 1/0 depending on type of entry +// cm_idx: index of position of change indexed into the genetic_positions vector +std::vector> Matcher::serializable_matches(std::vector& sample_ids) { + std::vector> out; + int group_counter; + for (MatchGroupDifference& group_diff : match_diffs) { + int site = group_diff.site; + // + for (int sample_id : sample_ids) { + if (group_diff.added.find(sample_id) != group_diff.added.end()) { + for (auto target_id : group_diff.added[sample_id]) { + // int entry[4] = {sample_id, target_id, 1, site}; + std::vector entry = {sample_id, target_id, 1, site}; + out.push_back(entry); + } + } + if (group_diff.removed.find(sample_id) != group_diff.removed.end()) { + for (auto target_id : group_diff.removed[sample_id]) { + // int entry[4] = {}; + std::vector entry = {sample_id, target_id, 0, site}; + out.push_back(entry); + } + } } - serialized_matches[group_counter] = std::move(current_group_matches); - group_counter++; } - return serialized_matches; + return out; } -void Matcher::clear() { - match_groups.clear(); -} +// void Matcher::clear() { +// match_groups.clear(); +// } std::vector Matcher::cm_positions() { std::vector cms; - cms.reserve(match_groups.size()); - for (MatchGroup& match_group : match_groups) { - cms.push_back(match_group.cm_position); + cms.reserve(match_diffs.size()); + for (MatchGroupDifference& match_diff : match_diffs) { + cms.push_back(genetic_positions[match_diff.site]); } return cms; } diff --git a/src/Matcher.hpp b/src/Matcher.hpp index a7039ec..55bcc27 100644 --- a/src/Matcher.hpp +++ b/src/Matcher.hpp @@ -24,6 +24,7 @@ /// for a certain interval, store the matches for all samples class MatchGroup { public: + MatchGroup() : num_samples(0), cm_position(0.0) {}; MatchGroup(int _num_samples, double cm_position); MatchGroup(const std::vector& target_ids, const std::vector>& matches, const double _cm_position); @@ -39,6 +40,26 @@ class MatchGroup { double cm_position = 0.0; }; +class MatchGroupDifference { +public: + MatchGroupDifference(const MatchGroup& prev, const MatchGroup& next, const int _site); + +public: + std::unordered_map> added; + std::unordered_map> removed; + int site = 0; +}; + +class MatchGroupEntry { +public: + MatchGroupEntry(int _sample_id, int _target_id, int _added, int _site) + : sample_id(_sample_id), target_id(_target_id), added(_added), site(_site) {}; + const int sample_id = 0; + const int target_id = 0; + const int added = 0; + const int site = 0; +}; + class Matcher { public: Matcher(int _n, const std::vector& _genetic_positions, double _query_interval_size, @@ -47,11 +68,12 @@ class Matcher { // Do all the work void process_site(const std::vector& genotype); void propagate_adjacent_matches(); - void clear(); + // void clear(); - std::vector get_matches(); - std::vector>> - serializable_matches(std::vector& target_ids); + // std::vector get_matches(); + // std::vector>> + // serializable_matches(std::vector& target_ids); + std::vector> serializable_matches(std::vector& sample_ids); std::vector cm_positions(); std::vector get_sorting(); @@ -66,6 +88,7 @@ class Matcher { std::vector query_sites; std::vector match_group_sites; int num_sites = 0; + std::vector match_diffs; // matches in these groups are considered together in the hmm private: @@ -73,7 +96,10 @@ class Matcher { int sites_processed = 0; int next_query_site_idx = 0; int match_group_idx = 0; - std::vector match_groups; + // std::vector match_groups; + MatchGroup current_group; + MatchGroup prev_group; + MatchGroup prevprev_group; std::vector sorting; std::vector next_sorting; std::vector permutation; diff --git a/src/ThreadsLowMem.cpp b/src/ThreadsLowMem.cpp index 2ed8ca9..fd46975 100644 --- a/src/ThreadsLowMem.cpp +++ b/src/ThreadsLowMem.cpp @@ -96,15 +96,33 @@ ThreadsLowMem::ThreadsLowMem(const std::vector _target_ids, } } -// Initialize Threads-Viterbi instances using IDs and genetic positions for each match group -void ThreadsLowMem::initialize_viterbi(std::vector>>& match_ids, - const std::vector& cm_positions) { - if (match_ids.size() != cm_positions.size() || match_ids.size() < 1) { - throw std::runtime_error("Match-data is missing or does not have same shape as genetic map"); - } - match_groups.reserve(match_ids.size()); - for (std::size_t i = 0; i < match_ids.size(); i++) { - match_groups.emplace_back(target_ids, match_ids.at(i), cm_positions.at(i)); +// // Initialize Threads-Viterbi instances using IDs and genetic positions for each match group +// void ThreadsLowMem::initialize_viterbi(std::vector>>& match_ids, +// const std::vector& cm_positions) { +// if (match_ids.size() != cm_positions.size() || match_ids.size() < 1) { +// throw std::runtime_error("Match-data is missing or does not have same shape as genetic map"); +// } +// match_groups.reserve(match_ids.size()); +// for (std::size_t i = 0; i < match_ids.size(); i++) { +// match_groups.emplace_back(target_ids, match_ids.at(i), cm_positions.at(i)); +// } + +// match_group_idx = 0; +// hmm_sites_processed = 0; +// for (int target_id : target_ids) { +// if (target_id == 0) { +// continue; +// } +// // THE WHOLE NOMENCLATURE HERE IS WRONG WE'VE SWAPPED SAMPLE/TARGET WRT Matcher.cpp +// std::vector sample_ids(match_groups.at(0).match_candidates.at(target_id).begin(), +// match_groups.at(0).match_candidates.at(target_id).end()); +// hmms.emplace(target_id, ViterbiState(target_id, sample_ids)); +// } +// } + +void ThreadsLowMem::initialize_viterbi(std::vector>& match_group_entries) { + for (auto& mge : match_group_entries) { + match_group_queue.emplace(mge[0], mge[1], mge[2], mge[3]); } match_group_idx = 0; @@ -113,32 +131,51 @@ void ThreadsLowMem::initialize_viterbi(std::vector sample_ids(match_groups.at(0).match_candidates.at(target_id).begin(), - match_groups.at(0).match_candidates.at(target_id).end()); + std::vector sample_ids; hmms.emplace(target_id, ViterbiState(target_id, sample_ids)); } } // Pass genotypes for a single site through the intialized Threads-Viterbi instances void ThreadsLowMem::process_site_viterbi(const std::vector& genotype) { - bool group_change = false; + // bool group_change = false; + + // Here we pop the queue + while (!match_group_queue.empty() && hmm_sites_processed >= match_group_queue.front().site) { + MatchGroupEntry& entry = match_group_queue.front(); + int sample_id = entry.sample_id; + int target_id = entry.target_id; + if (entry.added == 1) { + hmms.at(sample_id).add_target(target_id); + } else if (entry.added == 0) { + hmms.at(sample_id).remove_target(target_id); + } + match_group_queue.pop(); + } - if (match_group_idx < (static_cast(match_groups.size()) - 1) && - (genetic_positions.at(hmm_sites_processed) >= - match_groups.at(match_group_idx + 1).cm_position)) { - match_group_idx++; - group_change = true; + if (hmm_sites_processed == 0) { + for (auto& it : hmms) { + it.second.initialize(); + } } + + // if (match_group_idx < (static_cast(match_groups.size()) - 1) && + // (genetic_positions.at(hmm_sites_processed) >= + // match_groups.at(match_group_idx + 1).cm_position)) { + // match_group_idx++; + // group_change = true; + // } double k = 2. * 0.01 * cm_sizes.at(hmm_sites_processed); double l = 2. * mutation_rate * bp_sizes.at(hmm_sites_processed); + for (int target_id : target_ids) { if (target_id == 0) { continue; } - if (group_change) { - hmms.at(target_id).set_samples( - match_groups.at(match_group_idx).match_candidates.at(target_id)); - } + // if (group_change) { + // hmms.at(target_id).set_samples( + // match_groups.at(match_group_idx).match_candidates.at(target_id)); + // } double t = expected_branch_lengths.at(target_id); double rho_c = k * t; @@ -146,9 +183,11 @@ void ThreadsLowMem::process_site_viterbi(const std::vector& genotype) { : -(std::log1p(-std::exp(-(k * t))) - std::log(target_id)); double mu_c = l * t; double mu = -std::log1p(-std::exp(-(l * t))); + hmms.at(target_id).process_site(genotype, rho, rho_c, mu, mu_c); } hmm_sites_processed++; + return; } diff --git a/src/ThreadsLowMem.hpp b/src/ThreadsLowMem.hpp index 77ad06f..c8154c5 100644 --- a/src/ThreadsLowMem.hpp +++ b/src/ThreadsLowMem.hpp @@ -22,6 +22,7 @@ #include "ThreadsFastLS.hpp" #include "ViterbiLowMem.hpp" #include +#include #include #include #include @@ -37,8 +38,10 @@ class ThreadsLowMem { // 1. process all sites for the PBWT (done by the Matcher) // 2a. initialize hmms - void initialize_viterbi(std::vector>>& match_ids, - const std::vector& cm_positions); + // void initialize_viterbi(std::vector>>& match_ids, + // const std::vector& cm_positions); + void initialize_viterbi(std::vector>& match_group_entries); + // 2b. process all sites for the hmms void process_site_viterbi(const std::vector& genotype); // 2c. prune branches at regular intervals (i.e. when there's a lot of them, figure this out soon) @@ -85,7 +88,8 @@ class ThreadsLowMem { int hmm_sites_processed = 0; std::unordered_map hmms; int match_group_idx = 0; - std::vector match_groups; + // std::vector match_groups; + std::queue match_group_queue; // 3. Path segment and path dating quantites HMM psmc; diff --git a/src/ViterbiLowMem.cpp b/src/ViterbiLowMem.cpp index 306cde1..abc540f 100644 --- a/src/ViterbiLowMem.cpp +++ b/src/ViterbiLowMem.cpp @@ -21,6 +21,7 @@ #include #include #include +#include #include namespace { @@ -121,8 +122,15 @@ ViterbiPath::dump_data_in_range(int start, int end) { out_starts, out_ids, out_heights); } -ViterbiState::ViterbiState(int _target_id, std::vector _sample_ids) - : target_id(_target_id), sample_ids(_sample_ids) { +ViterbiState::ViterbiState(int _target_id, std::vector& _sample_ids) + : target_id(_target_id) { + + for (const auto s : _sample_ids) { + sample_ids.insert(s); + } +} + +void ViterbiState::initialize() { // init current_tracebacks if (sample_ids.size() == 0) { throw std::runtime_error("found no samples for ViterbiState object for sample " + @@ -134,7 +142,8 @@ ViterbiState::ViterbiState(int _target_id, std::vector _sample_ids) current_tracebacks[sample_id] = &traceback_states.at(key); } best_score = 0; - best_match = sample_ids.at(0); + // best_match = sample_ids.at(0); + best_match = *(sample_ids.begin()); } void ViterbiState::process_site(const std::vector& genotype, double rho, double rho_c, @@ -144,6 +153,7 @@ void ViterbiState::process_site(const std::vector& genotype, double rho, do int best_new_match = best_match; double new_score; int observed_allele = genotype.at(target_id); + TracebackNode* prev_best = current_tracebacks.at(best_match); for (int sample_id : sample_ids) { int allele = genotype.at(sample_id); @@ -188,18 +198,41 @@ void ViterbiState::process_site(const std::vector& genotype, double rho, do sites_processed++; } -void ViterbiState::set_samples(std::unordered_set new_sample_ids) { - std::vector new_samples_vec(new_sample_ids.begin(), new_sample_ids.end()); - if (!new_sample_ids.count(best_match)) { - new_samples_vec.push_back(best_match); +// void ViterbiState::set_samples(std::unordered_set new_sample_ids) { +// std::vector new_samples_vec(new_sample_ids.begin(), new_sample_ids.end()); +// if (!new_sample_ids.count(best_match)) { +// new_samples_vec.push_back(best_match); +// } +// for (int sample_id : sample_ids) { +// // clean up branches we definitely won't use +// if (!new_sample_ids.count(sample_id) && sample_id != best_match) { +// current_tracebacks.erase(sample_id); +// } +// } +// sample_ids = new_samples_vec; +// } + +void ViterbiState::add_target(const int sample_id) { + if (sample_ids.find(sample_id) != sample_ids.end()) { + return; + // throw std::runtime_error("Attempting to insert illegal sample into Viterbi state."); + } + sample_ids.insert(sample_id); +} + +void ViterbiState::remove_target(const int sample_id) { + if (sample_ids.find(sample_id) == sample_ids.end()) { + throw std::runtime_error("Attempting to remove illegal sample into Viterbi state."); } - for (int sample_id : sample_ids) { - // clean up branches we definitely won't use - if (!new_sample_ids.count(sample_id) && sample_id != best_match) { - current_tracebacks.erase(sample_id); - } + if (sample_id == best_match) { + // this may result in unexpected behaviour, consider renaming function + // remove_if_not_best + return; + } + sample_ids.erase(sample_id); + if (sample_id != best_match) { + current_tracebacks.erase(sample_id); } - sample_ids = new_samples_vec; } void ViterbiState::prune() { diff --git a/src/ViterbiLowMem.hpp b/src/ViterbiLowMem.hpp index 4a33dfd..e5f2501 100644 --- a/src/ViterbiLowMem.hpp +++ b/src/ViterbiLowMem.hpp @@ -21,6 +21,7 @@ #include #include #include +#include class TracebackNode { public: @@ -59,11 +60,13 @@ class ViterbiPath { class ViterbiState { public: - ViterbiState(int _target_id, std::vector _sample_ids); - + ViterbiState(int _target_id, std::vector& _sample_ids); + void initialize(); void process_site(const std::vector& genotype, double rho, double rho_c, double _mu, double _mu_c); - void set_samples(std::unordered_set new_sample_ids); + // void set_samples(std::unordered_set new_sample_ids); + void add_target(const int sample_id); + void remove_target(const int sample_id); int count_branches() const; void prune(); ViterbiPath traceback(); @@ -79,7 +82,7 @@ class ViterbiState { double best_score = 0.0; int sites_processed = 0; double mutation_penalty = 0.0; - std::vector sample_ids; + std::set sample_ids; std::vector sample_scores; std::unordered_map current_tracebacks; }; diff --git a/src/threads_arg/infer.py b/src/threads_arg/infer.py index 88faeac..5dbee06 100644 --- a/src/threads_arg/infer.py +++ b/src/threads_arg/infer.py @@ -100,9 +100,9 @@ def partial_viterbi(pgen, mode, num_samples_hap, physical_positions, genetic_pos # Warning: this creates big copies of data if num_subsets == 1: - TLM.initialize_viterbi(s_match_group, match_cm_positions) + TLM.initialize_viterbi(s_match_group)#, match_cm_positions) else: - TLM.initialize_viterbi([[s[k] for k in sample_index_subset] for s in s_match_group], match_cm_positions) + TLM.initialize_viterbi([[s[k] for k in sample_index_subset] for s in s_match_group])#, match_cm_positions) M = reader.get_variant_ct() BATCH_SIZE = int(4e7 // num_samples_hap) @@ -244,7 +244,7 @@ def matcher_callback(i, g, mask, matcher): iterate_pgen(pgen, matcher_callback, mask=ac_mask, matcher=matcher) # Add top matches from adjacent sites to each match-chunk - matcher.propagate_adjacent_matches() + # matcher.propagate_adjacent_matches() # From here we parallelise if we can actual_num_threads = min(default_process_count(), num_threads) @@ -284,7 +284,8 @@ def matcher_callback(i, g, mask, matcher): sample_batch = list(range(2 * num_samples)) s_match_group = matcher.serializable_matches(sample_batch) match_cm_positions = matcher.cm_positions() - matcher.clear() + # matcher.clear() + # breakpoint() del matcher gc.collect() thread_id = 1 diff --git a/src/threads_arg_pybind.cpp b/src/threads_arg_pybind.cpp index c829a20..8aab096 100644 --- a/src/threads_arg_pybind.cpp +++ b/src/threads_arg_pybind.cpp @@ -77,6 +77,11 @@ PYBIND11_MODULE(threads_arg_python_bindings, m) { .def_readonly("match_candidates_counts", &MatchGroup::match_candidates_counts) .def_readonly("top_four_maps", &MatchGroup::top_four_maps) .def_readonly("cm_position", &MatchGroup::cm_position); + + py::class_(m, "MatchGroupDifference") + .def_readonly("added", &MatchGroupDifference::added) + .def_readonly("removed", &MatchGroupDifference::removed) + .def_readonly("site", &MatchGroupDifference::site); py::class_(m, "Matcher") .def(py::init, double, double, int, int>(), "Initialize", @@ -88,14 +93,15 @@ PYBIND11_MODULE(threads_arg_python_bindings, m) { .def_readonly("query_interval_size", &Matcher::query_interval_size) .def_readonly("num_samples", &Matcher::num_samples) .def_readonly("num_sites", &Matcher::num_sites) + .def_readonly("match_diffs", &Matcher::match_diffs) .def("process_site", &Matcher::process_site) - .def("propagate_adjacent_matches", &Matcher::propagate_adjacent_matches) - .def("get_matches", &Matcher::get_matches) + // .def("propagate_adjacent_matches", &Matcher::propagate_adjacent_matches) + // .def("get_matches", &Matcher::get_matches) .def("serializable_matches", &Matcher::serializable_matches) .def("cm_positions", &Matcher::cm_positions) .def("get_sorting", &Matcher::get_sorting) - .def("get_permutation", &Matcher::get_permutation) - .def("clear", &Matcher::clear); + .def("get_permutation", &Matcher::get_permutation); + // .def("clear", &Matcher::clear); py::class_(m, "ImputationMatcher") .def(py::init&, double, int>(), "Initialize",