@@ -457,6 +457,9 @@ def binary_search(config: SearchConfig,
457457
458458 iteration_count = 1
459459
460+ # Number of sequences being searched on
461+ num_sequences = len (sequence_segments )
462+
460463 # While there are still kmers to search
461464 while not np .all (finished_search ):
462465 config .log ("Iteration {}" .format (iteration_count ))
@@ -482,9 +485,9 @@ def binary_search(config: SearchConfig,
482485 "Number of counted positions ({}) and number of counts ({}) " \
483486 "do not match" .format (len (kmer_indices ), len (count_list ))
484487
485- # Where we have counts of 1
488+ # Where we have counts of 1 per sequence searched over
486489 unique_lengths [kmer_indices ] = np .where (
487- (count_list == 1 ) &
490+ (count_list == num_sequences ) &
488491 # And if there is no current unique length recorded
489492 ((unique_lengths [kmer_indices ] == 0 ) |
490493 # Or there is a smaller length found than the current min length
@@ -495,20 +498,20 @@ def binary_search(config: SearchConfig,
495498 # Otherwise keep the current unique length
496499 unique_lengths [kmer_indices ])
497500
498- # If we have a k-mer count of 1 and the current length queried is the
499- # same as the lower bound (i.e. can't get smaller for a unique count)
500- # This position has finished searching
501+ # If we have a k-mer count of 1 per sequence and the current length
502+ # queried is the same as the lower bound (i.e. can't get smaller for a
503+ # unique count) This position has finished searching
501504 finished_search [kmer_indices ] = np .where (
502- count_list == 1 ,
505+ count_list == num_sequences ,
503506 current_length_query [kmer_indices ] ==
504507 lower_length_bound [kmer_indices ],
505508 finished_search [kmer_indices ])
506509
507- # If we have a k-mer count > 1 and the current length queried is the
508- # same as the uppper bound (i.e. can't find a unique length or larger)
509- # This position has finished searching
510+ # If we have a k-mer count > number of sequences and the current length
511+ # queried is the same as the uppper bound (i.e. can't find a unique
512+ # length or larger) This position has finished searching
510513 finished_search [kmer_indices ] = np .where (
511- count_list > 1 ,
514+ count_list > num_sequences ,
512515 current_length_query [kmer_indices ] ==
513516 upper_length_bound [kmer_indices ],
514517 finished_search [kmer_indices ])
@@ -519,15 +522,15 @@ def binary_search(config: SearchConfig,
519522 # we need to decrease our k-mer length (i.e. counts == 1)
520523 # Set the new upper (inclusive) bound to the current query length - 1
521524 upper_length_bound [kmer_indices ] = np .where (
522- count_list == 1 ,
525+ count_list == num_sequences ,
523526 current_length_query [kmer_indices ] - 1 ,
524527 upper_length_bound [kmer_indices ])
525528
526529 # Raise the lower bounds of our search range on positions where
527530 # we need to increase our k-mer length (i.e. counts > 1)
528531 # Set the new lower (inclusive) bound to the current query length + 1
529532 lower_length_bound [kmer_indices ] = np .where (
530- count_list > 1 ,
533+ count_list > num_sequences ,
531534 current_length_query [kmer_indices ] + 1 ,
532535 lower_length_bound [kmer_indices ])
533536
@@ -569,6 +572,8 @@ def linear_search(config: SearchConfig,
569572 # List of minimum lengths (where 0 is nothing was found)
570573 unique_lengths = np .zeros (num_kmers , dtype = np .uint32 )
571574
575+ num_sequences = len (sequence_segments )
576+
572577 # For each kmer length
573578 for kmer_length in config .kmer_lengths :
574579 config .log ("{} k-mers remaining" .format ((~ finished_search ).sum ()))
@@ -620,8 +625,9 @@ def linear_search(config: SearchConfig,
620625 "do not match" .format (len (kmer_indices ), len (count_list ))
621626
622627 unique_lengths [kmer_indices ] = np .where (
623- # Where the count is 1
624- (count_list == 1 ) &
628+ # Where the count is equal to the number of sequences to search on
629+ # (i.e. a unique count per sequence specified)
630+ (count_list == num_sequences ) &
625631 # And if there is no current unique length recorded
626632 (unique_lengths [kmer_indices ] == 0 ),
627633 # Record the minimum kmer length found if it less than the current
@@ -644,20 +650,22 @@ def get_kmer_counts(config: SearchConfig,
644650 kmer_lengths : list [int ]) -> npt .NDArray [np .uint32 ]:
645651
646652 count_list = np .zeros (len (sequence_indices ), dtype = np .uint32 )
647- # For each index (usually just one)
648- # NB: We assume a guarantee that we have > 0 indexes at this point
649- for fmindex_filepath in config .fmindex_filepaths :
650653
651- if len (config .fmindex_filepaths ) > 1 :
652- config .log (f"Counting kmers from index: { fmindex_filepath } " )
653654
654655 # For every sequence segment in the list of sequence segments for this
655656 # index (usually just one)
657+ # For every sequence segment in the list of sequence segments for this
658+ # index (usually just one)
659+ for fmindex_filepath in config .fmindex_filepaths :
660+ # For each index (usually just one)
661+ # NB: We assume a guarantee that we have > 0 indexes at this point
656662 for i , sequence in enumerate (sequences ):
657663
658- if len (sequences ) > 1 :
659- # TODO: Print out sequence ID?
660- config .log (f"Counting sequence #{ i + 1 } " )
664+ if (len (sequences ) or
665+ len (config .fmindex_filepaths )) > 1 :
666+ config .log (f"Counting k-mers from "
667+ f"{ config .fmindex_filepaths [i ]} in index "
668+ f"{ fmindex_filepath } " )
661669
662670 # Count the occurences of kmers on the forward strand
663671 count_list += np .array (count_kmers_from_sequence (
0 commit comments