Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
f31e348
Allow showing Giraffe work without position-indexing all haplotypes
adamnovak Mar 25, 2026
e805195
Quiet chaining debugging because it is too loud to run
adamnovak Mar 25, 2026
c2c534d
Stop extra chain viz lines showing up on hover for the selected seed …
adamnovak Mar 25, 2026
f249723
Hide tracebacks from previously selected seed before trying to draw n…
adamnovak Mar 25, 2026
b32e325
Add test for seed anchor scoring
adamnovak Mar 25, 2026
eee4b94
Make test fail successfully
adamnovak Mar 25, 2026
1751719
Score left margins of minimizer anchors
adamnovak Mar 25, 2026
5e69e50
Avoid logging long sequences and add seed names to normal work-showin…
adamnovak Apr 6, 2026
88c49ec
Add seed dump lines for seeds with no linear positions
adamnovak Apr 6, 2026
63440c2
Detect off-reference seeds better
adamnovak Apr 6, 2026
300a526
Add almost entirely synthetic and non-functional code to let the chai…
adamnovak Apr 6, 2026
a0ceb96
Fix JS traceback so it can start at any seed, not just an on-linear-r…
adamnovak Apr 6, 2026
b289817
Stop logging all the tracebacks again
adamnovak Apr 6, 2026
cd87101
Add synthetic code for showing when a chain starts/ends off-reference
adamnovak Apr 6, 2026
e0f1ae7
When indexing haplotypes for position, actually get positions on them
adamnovak Apr 6, 2026
247439a
Stop reporting seeds like the numbers match the log anchor numbers
adamnovak Apr 7, 2026
ddee335
Actually hand off information about off-reference-ness to plot and al…
adamnovak Apr 7, 2026
db0ec56
Stop showing rejected transitions form off-reference seeds lining off…
adamnovak Apr 7, 2026
d2c56e6
Merge remote-tracking branch 'mine/score-seeds-correctly' into score-…
adamnovak Apr 7, 2026
1bfa590
Merge remote-tracking branch 'origin/master' into score-seeds-correctly
adamnovak Apr 15, 2026
508afe0
Apply re-tuned HiFi parameters for seed scoring bugfix
adamnovak Apr 15, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
348 changes: 293 additions & 55 deletions scripts/build-chain-viz.py

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions src/algorithms/alignment_path_offsets.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ alignment_path_offsets(const PathPositionHandleGraph& graph,
// Find the position of this end of this mapping
pos_t mapping_pos = make_pos_t(mapping.position());
// Find the positions for this end of this Mapping
auto pos_offs = algorithms::nearest_offsets_in_paths(&graph, mapping_pos, nearby ? search_limit : -1, path_filter);
auto pos_offs = algorithms::nearest_offsets_in_paths(&graph, mapping_pos, nearby ? search_limit : -1, {PathSense::REFERENCE, PathSense::GENERIC}, path_filter);
for (auto look_at_end : end) {
// For the start and the end of the Mapping, as needed
for (auto& p : pos_offs) {
Expand Down Expand Up @@ -84,7 +84,7 @@ multipath_alignment_path_offsets(const PathPositionHandleGraph& graph,
for (size_t j = 0; j < subpath.path().mapping_size(); ++j) {
// get the positions on paths that this mapping touches
pos_t mapping_pos = make_pos_t(subpath.path().mapping(j).position());
subpath_search_results[j] = nearest_offsets_in_paths(&graph, mapping_pos, 0, path_filter);
subpath_search_results[j] = nearest_offsets_in_paths(&graph, mapping_pos, 0, {PathSense::REFERENCE, PathSense::GENERIC}, path_filter);
// make sure that offsets are stored in increasing order
for (pair<const path_handle_t, vector<pair<size_t, bool>>>& search_record : subpath_search_results[j]) {
sort(search_record.second.begin(), search_record.second.end());
Expand Down
13 changes: 13 additions & 0 deletions src/algorithms/chain_items.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

//#define debug_chaining
//#define debug_transition
//#define debug_dp

namespace vg {
namespace algorithms {
Expand Down Expand Up @@ -564,8 +565,10 @@ TracedScore chain_items_dp(vector<TracedScore>& chain_scores,
// We will run this over every transition in a good DP order.
auto iteratee = [&](const transition_info& transition) {
if (show_work) {
#ifdef debug_dp
cerr << "DP: " << transition.from_anchor << "->" << transition.to_anchor
<< " rd " << transition.read_distance << " gd " << transition.graph_distance << endl;
#endif
}

crash_unless(chain_scores.size() > transition.to_anchor);
Expand All @@ -590,8 +593,10 @@ TracedScore chain_items_dp(vector<TracedScore>& chain_scores,
auto& source = to_chain[transition.from_anchor];

if (show_work) {
#ifdef debug_dp
cerr << "\t\tCome from score " << chain_scores[transition.from_anchor]
<< " across " << source << " to " << here << endl;
#endif
}

// How much does it pay (+) or cost (-) to make the jump from there
Expand All @@ -608,9 +613,11 @@ TracedScore chain_items_dp(vector<TracedScore>& chain_scores,
size_t possible_match_length = std::min(transition.read_distance, transition.graph_distance);

if (show_work) {
#ifdef debug_dp
cerr << "\t\t\tFor read distance " << transition.read_distance << " and graph distance " << transition.graph_distance
<< " an indel of length " << indel_length
<< ((transition.read_distance > transition.graph_distance) ? " seems plausible" : " would be required") << endl;
#endif
}

if (indel_length > max_indel_bases) {
Expand Down Expand Up @@ -661,8 +668,10 @@ TracedScore chain_items_dp(vector<TracedScore>& chain_scores,
chain_scores[transition.to_anchor] = std::max(chain_scores[transition.to_anchor], from_source_score);

if (show_work) {
#ifdef debug_dp
cerr << "\t\tWe can reach #" << transition.to_anchor << " with " << source_score << " + " << jump_points
<< " from transition + " << item_points << " from item = " << from_source_score << endl;
#endif
}

if (diagram) {
Expand Down Expand Up @@ -692,7 +701,9 @@ TracedScore chain_items_dp(vector<TracedScore>& chain_scores,
}
} else {
if (show_work) {
#ifdef debug_dp
cerr << "\t\tTransition is impossible." << endl;
#endif
}
}
};
Expand Down Expand Up @@ -751,7 +762,9 @@ TracedScore chain_items_dp(vector<TracedScore>& chain_scores,
best_score.max_in(chain_scores, to_anchor);

if (show_work) {
#ifdef debug_dp
cerr << "\tBest chain end so far: " << best_score << endl;
#endif
}

}
Expand Down
3 changes: 2 additions & 1 deletion src/algorithms/nearest_offsets_in_paths.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ using namespace std;
path_offset_collection_t nearest_offsets_in_paths(const PathPositionHandleGraph* graph,
const pos_t& pos,
int64_t max_search,
const std::unordered_set<PathSense>& desired_senses,
const std::function<bool(const path_handle_t&)>* path_filter) {

// init the return value
Expand Down Expand Up @@ -48,7 +49,7 @@ path_offset_collection_t nearest_offsets_in_paths(const PathPositionHandleGraph*
<< " in " << (search_left ? "leftward" : "rightward") << " direction at distance " << dist << endl;
#endif

graph->for_each_step_of_sense(here, {PathSense::REFERENCE, PathSense::GENERIC}, [&](const step_handle_t& step) {
graph->for_each_step_of_sense(here, desired_senses, [&](const step_handle_t& step) {
// For each path visit that occurs on this node
#ifdef debug
cerr << "handle is on step at path offset " << graph->get_position_of_step(step) << endl;
Expand Down
3 changes: 2 additions & 1 deletion src/algorithms/nearest_offsets_in_paths.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,10 @@ using path_offset_collection_t = unordered_map<path_handle_t, vector<pair<size_t
///
/// If path_filter is set, ignores paths for which it returns false.
///
/// Doesn't consider haplotype paths.
/// Doesn't consider haplotype paths. by default.
path_offset_collection_t nearest_offsets_in_paths(const PathPositionHandleGraph* graph,
const pos_t& pos, int64_t max_search,
const std::unordered_set<PathSense>& desired_senses = {PathSense::REFERENCE, PathSense::GENERIC},
const std::function<bool(const path_handle_t&)>* path_filter = nullptr);

/// Wrapper for the above to support some earlier code. Only looks for paths
Expand Down
7 changes: 6 additions & 1 deletion src/minimizer_mapper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -427,6 +427,10 @@ class MinimizerMapper : public AlignerClient {
/// Track linear reference position for placements in log output.
static constexpr bool default_track_position = false;
bool track_position = default_track_position;

/// Track positions along haplotypes
static constexpr bool default_haplotype_positions = false;
bool haplotype_positions = default_haplotype_positions;

/// If set, log what the mapper is thinking in its mapping of each read.
static constexpr bool default_show_work = false;
Expand Down Expand Up @@ -1467,7 +1471,8 @@ class MinimizerMapper : public AlignerClient {
const std::vector<std::vector<size_t>>& chains,
const std::vector<std::vector<bool>>& chain_rec_flags,
const std::vector<size_t>& chain_source_tree,
const PathPositionHandleGraph* path_graph);
const PathPositionHandleGraph* path_graph,
bool haplotype_positions);

/// Dump a graph
static void dump_debug_graph(const HandleGraph& graph);
Expand Down
42 changes: 34 additions & 8 deletions src/minimizer_mapper_from_chains.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,8 @@ void MinimizerMapper::dump_debug_chains(const ZipCodeForest& zip_code_forest,
const std::vector<std::vector<size_t>>& chains,
const std::vector<std::vector<bool>>& chain_rec_flags,
const std::vector<size_t>& chain_source_tree,
const PathPositionHandleGraph* path_graph) {
const PathPositionHandleGraph* path_graph,
bool haplotype_positions) {
if (!path_graph) {
// We don't have a path positional graph for this
return;
Expand Down Expand Up @@ -242,6 +243,10 @@ void MinimizerMapper::dump_debug_chains(const ZipCodeForest& zip_code_forest,

// Determine the positions of all the involved seeds.
std::unordered_map<size_t, algorithms::path_offset_collection_t> seed_positions;
std::unordered_set<PathSense> wanted_senses {PathSense::REFERENCE, PathSense::GENERIC};
if (haplotype_positions) {
wanted_senses.insert(PathSense::HAPLOTYPE);
}
for (auto& kv : seed_sets) {
for (const std::vector<size_t> included_seeds : kv.second) {
for (auto& seed_num : included_seeds) {
Expand All @@ -251,7 +256,7 @@ void MinimizerMapper::dump_debug_chains(const ZipCodeForest& zip_code_forest,
auto found = seed_positions.find(seed_num);
if (found == seed_positions.end()) {
// If we don't know the seed's positions yet, get them
found = seed_positions.emplace_hint(found, seed_num, algorithms::nearest_offsets_in_paths(path_graph, seed.pos, 100));
found = seed_positions.emplace_hint(found, seed_num, algorithms::nearest_offsets_in_paths(path_graph, seed.pos, 100, wanted_senses));
for (auto& handle_and_positions : found->second) {
std::string path_name = path_graph->get_path_name(handle_and_positions.first);
for (auto& position : handle_and_positions.second) {
Expand All @@ -268,6 +273,19 @@ void MinimizerMapper::dump_debug_chains(const ZipCodeForest& zip_code_forest,
seedpos.field(ss.str());
}
}
if (found->second.empty()) {
// The seed doesn't have any linear positions, but might still participate in the winning chain traceback.
// Report it.
seedpos.line();
seedpos.field(seed_anchors.at(seed_num).read_start());
seedpos.field("");
seedpos.field("");
seedpos.field("");
seedpos.field(seed_num);
std::stringstream ss;
ss << seed_anchors.at(seed_num);
seedpos.field(ss.str());
}
}
}
}
Expand Down Expand Up @@ -735,7 +753,7 @@ vector<Alignment> MinimizerMapper::map_from_chains(Alignment& aln) {
}
#endif

// Turn all the seeds into anchors. Either we'll fragment them directly or
// Turn all the seeds into anchors. Either we'll chain them directly or
// use them to make gapless extension anchors over them.
// TODO: Can we only use the seeds that are in trees we keep?
vector<algorithms::Anchor> seed_anchors = this->to_anchors(aln, minimizers, seeds);
Expand Down Expand Up @@ -781,7 +799,7 @@ vector<Alignment> MinimizerMapper::map_from_chains(Alignment& aln) {

// Dump all chains if requested (do this before alignments, while chains still exist)
if (show_work && !chains.empty() && this->path_graph != nullptr) {
dump_debug_chains(zip_code_forest, seeds, minimizers, seed_anchors, chains, chain_rec_flags, chain_source_tree, this->path_graph);
dump_debug_chains(zip_code_forest, seeds, minimizers, seed_anchors, chains, chain_rec_flags, chain_source_tree, this->path_graph, this->haplotype_positions);
}

if (alignments.size() == 0) {
Expand Down Expand Up @@ -1630,9 +1648,13 @@ void MinimizerMapper::do_chaining_on_trees(Alignment& aln, const ZipCodeForest&
// Add position annotations for the good-looking chains.
// Should be much faster than full correctness tracking from every seed.
crash_unless(this->path_graph);
std::unordered_set<PathSense> wanted_senses {PathSense::REFERENCE, PathSense::GENERIC};
if (haplotype_positions) {
wanted_senses.insert(PathSense::HAPLOTYPE);
}
for (auto& boundary : {anchor_view[scored_chain.second.front()].graph_start(), anchor_view[scored_chain.second.back()].graph_end()}) {
// For each end of the chain
auto offsets = algorithms::nearest_offsets_in_paths(this->path_graph, boundary, 100);
auto offsets = algorithms::nearest_offsets_in_paths(this->path_graph, boundary, 100, wanted_senses);
for (auto& handle_and_positions : offsets) {
for (auto& position : handle_and_positions.second) {
// Tell the funnel all the effective positions, ignoring orientation
Expand Down Expand Up @@ -2706,7 +2728,11 @@ Alignment MinimizerMapper::find_chain_alignment(
#pragma omp critical (cerr)
{
cerr << log_name() << "Need to align graph from " << (*here).graph_end() << " to " << (*next).graph_start()
<< " separated by ~" << graph_dist << " bp and sequence \"" << linking_bases << "\"" << endl;
<< " separated by ~" << graph_dist << " bp";
if (linking_bases.size() < 200) {
cerr << " and sequence \"" << linking_bases << "\"";
}
cerr << endl;
}
}
#endif
Expand Down Expand Up @@ -2894,7 +2920,7 @@ Alignment MinimizerMapper::find_chain_alignment(
if (show_work) {
#pragma omp critical (cerr)
{
cerr << log_name() << "Aligned and added link of " << link_length << " bp read and " << link_graph_path_length << " bp graph via " << link_alignment_source << " with score of " << link_score << std::endl;
cerr << log_name() << "Aligned and added link to " << *next << " of " << link_length << " bp read and " << link_graph_path_length << " bp graph via " << link_alignment_source << " with score of " << link_score << std::endl;
}
}

Expand Down Expand Up @@ -3828,7 +3854,7 @@ algorithms::Anchor MinimizerMapper::to_anchor(const Alignment& aln, const Vector
// Work out how many points the anchor is.
// TODO: Always make sequence and quality available for scoring!
// We're going to score the anchor as the full minimizer, and rely on the margins to stop us from taking overlapping anchors.
int score = aligner->score_exact_match(aln, read_start - margin_left, length + margin_right);
int score = aligner->score_exact_match(aln, read_start - margin_left, margin_left + length + margin_right);
return algorithms::Anchor(read_start, graph_start, length, margin_left, margin_right, score, seed_number, &(seed.zipcode), hint_start, source.is_repetitive, paths);
}

Expand Down
Loading
Loading