@@ -25,6 +25,7 @@ def write_unique_counts(fasta_filename: Path,
2525 initial_search_length : int ,
2626 include_sequence_ids : list [bytes ],
2727 exclude_sequence_ids : list [bytes ],
28+ skip_reverse_complement : bool ,
2829 num_threads : int ,
2930 use_binary_search = False ,
3031 output_directory : Path = Path ("." ),
@@ -136,6 +137,7 @@ def write_unique_counts(fasta_filename: Path,
136137 min_kmer_length ,
137138 max_kmer_length ,
138139 initial_search_length ,
140+ skip_reverse_complement ,
139141 num_threads ,
140142 data_type , # type: ignore
141143 verbose )
@@ -145,6 +147,7 @@ def write_unique_counts(fasta_filename: Path,
145147 sequence_segment ,
146148 kmer_lengths ,
147149 num_kmers ,
150+ skip_reverse_complement ,
148151 num_threads ,
149152 data_type , # type: ignore
150153 verbose )
@@ -207,6 +210,7 @@ def binary_search(index_filename: Path,
207210 min_kmer_length : int ,
208211 max_kmer_length : int ,
209212 initial_search_length : int ,
213+ skip_reverse_complement : bool ,
210214 num_threads : int ,
211215 data_type : Union [np .uint8 , np .uint16 , np .uint32 ],
212216 verbose : bool ) -> tuple [npt .NDArray [np .uint ], int ]:
@@ -292,6 +296,7 @@ def binary_search(index_filename: Path,
292296 kmer_indices .tolist (),
293297 current_length_query [
294298 kmer_indices ].tolist (),
299+ skip_reverse_complement ,
295300 num_threads )
296301
297302 # Assert that the number of indices to count and the number of counts
@@ -368,6 +373,7 @@ def linear_search(index_filename: Path,
368373 sequence_segment : SequenceSegment ,
369374 kmer_lengths : list [int ],
370375 num_kmers : int ,
376+ skip_reverse_complement ,
371377 num_threads : int ,
372378 data_type : Union [np .uint8 , np .uint16 , np .uint32 ],
373379 verbose : bool ) -> tuple [npt .NDArray [np .uint ], int ]:
@@ -429,6 +435,7 @@ def linear_search(index_filename: Path,
429435 sequence_segment .data ,
430436 kmer_indices .tolist (),
431437 max_kmer_query_lengths ,
438+ skip_reverse_complement ,
432439 num_threads )
433440
434441 # Assert that the number of indices to count and the number of counts
@@ -460,6 +467,7 @@ def get_kmer_counts(index_filename: Path,
460467 sequence_data : bytes ,
461468 index_list : list [int ],
462469 kmer_lengths : list [int ],
470+ skip_reverse_complement : bool ,
463471 num_threads : int ) -> npt .NDArray [np .uint32 ]:
464472
465473 # Count the occurences of kmers on the forward strand
@@ -471,24 +479,25 @@ def get_kmer_counts(index_filename: Path,
471479 num_threads ),
472480 dtype = np .uint32 )
473481
474- # Count the occurrences of kmers on the reverse complement strand
475- # TODO: Add no reverse complement option to skip this to
476- # support bisulfite treated kmer counting
477- # TODO: Add option for a complement table
478- reverse_complement_sequence = \
479- sequence_data .translate (COMPLEMENT_TRANSLATE_TABLE )[::- 1 ]
480-
481- sequence_data_length = len (sequence_data )
482- reverse_index_list = [sequence_data_length - i - kmer_length
483- for i , kmer_length in zip (index_list , kmer_lengths )]
484-
485- count_list += np .array (count_kmers_from_sequence (
486- str (index_filename ),
487- reverse_complement_sequence ,
488- reverse_index_list ,
489- kmer_lengths ,
490- num_threads ),
491- dtype = np .uint32 )
482+ # If we are not skipping the reverse complement strand (default)
483+ if not skip_reverse_complement :
484+ # Count the occurrences of kmers on the reverse complement strand
485+ # TODO: Add option for a complement table
486+ reverse_complement_sequence = \
487+ sequence_data .translate (COMPLEMENT_TRANSLATE_TABLE )[::- 1 ]
488+
489+ sequence_data_length = len (sequence_data )
490+ reverse_index_list = [sequence_data_length - i - kmer_length
491+ for i , kmer_length in zip (index_list ,
492+ kmer_lengths )]
493+
494+ count_list += np .array (count_kmers_from_sequence (
495+ str (index_filename ),
496+ reverse_complement_sequence ,
497+ reverse_index_list ,
498+ kmer_lengths ,
499+ num_threads ),
500+ dtype = np .uint32 )
492501
493502 # If any element in the count list is 0
494503 if np .any (count_list == 0 ):
@@ -698,6 +707,7 @@ def main(args):
698707 initial_search_length = args .initial_search_length
699708 include_sequences_arg = args .include_sequences
700709 exclude_sequences_arg = args .exclude_sequences
710+ skip_reverse_complement = args .norc
701711 num_threads = args .num_threads
702712 verbose = args .verbose
703713
@@ -773,6 +783,7 @@ def main(args):
773783 initial_search_length ,
774784 include_sequence_ids ,
775785 exclude_sequence_ids ,
786+ skip_reverse_complement ,
776787 num_threads ,
777788 use_binary_search ,
778789 output_directory ,
0 commit comments