Skip to content

Commit 9ebf882

Browse files
committed
Update tests with new search configuration
1 parent 7a77272 commit 9ebf882

5 files changed

Lines changed: 47 additions & 40 deletions

File tree

.github/workflows/continuous-integration-workflow.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ jobs:
7171
# Make sure to be running newmap from installed site-packages
7272
cd tests
7373
python -m unittest discover
74-
./run_all.sh
74+
./test_end_to_end.sh
7575
- uses: actions/upload-artifact@v4
7676
if: ${{ failure() }} # Only on failure attempt to upload core dump
7777
with:

newmap/search.py

Lines changed: 27 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from contextlib import ExitStack
22
from collections.abc import Callable
3-
from dataclasses import dataclass
3+
from dataclasses import dataclass, field
44
from functools import partial
55
from math import ceil, log2
66
from pathlib import Path
@@ -31,6 +31,9 @@ def nil_search_log(*_: str):
3131

3232
@dataclass(frozen=True)
3333
class SearchConfig:
34+
# NB: All defaults here are not guaranteed to match the command line
35+
# interface defaults, they are for python interface convience only
36+
3437
# Position args
3538
fasta_filepaths: list[Path]
3639
fmindex_filepaths: list[Path]
@@ -39,22 +42,25 @@ class SearchConfig:
3942
kmer_lengths: list[int]
4043
is_binary_search: bool
4144

42-
use_reverse_complement: bool
43-
output_directory: Path
45+
use_reverse_complement: bool = True
46+
output_directory: Path = Path.cwd()
4447

45-
include_sequence_ids: list[bytes]
46-
exclude_sequence_ids: list[bytes]
48+
# NB: Cannot assign mutable default arguments since they would be shared
49+
# between instances so we must use default_factory to create a new list
50+
# for each instance when using a frozen dataclass
51+
include_sequence_ids: list[bytes] = field(default_factory=list)
52+
exclude_sequence_ids: list[bytes] = field(default_factory=list)
4753

4854
# Performance arguments
49-
num_threads: int
50-
kmer_batch_size: int
51-
initial_search_length: int
55+
num_threads: int = 1
56+
kmer_batch_size: int = 1000000
57+
initial_search_length: int = 0 # NB: default of none
5258

5359
# Verbosity
5460
# NB: Only used when additional calculations are needed for logging
55-
verbose: bool
56-
# Logging function
57-
log: Callable[[str], None]
61+
verbose: bool = False
62+
# Logging function (initialzed based on verbosity)
63+
log: Callable[[str], None] = field(init=False)
5864

5965
@classmethod
6066
def from_args(cls, args):
@@ -154,11 +160,6 @@ def from_args(cls, args):
154160
[s.encode() for s in
155161
exclude_sequences_arg.split(SEQUENCE_ID_SEPARATOR)]
156162

157-
if args.verbose:
158-
logging_function = partial(verbose_print, True)
159-
else:
160-
logging_function = nil_search_log
161-
162163
return cls(
163164
# Position args
164165
fasta_filepaths=fasta_filenames,
@@ -174,7 +175,6 @@ def from_args(cls, args):
174175
include_sequence_ids=include_sequence_ids,
175176
exclude_sequence_ids=exclude_sequence_ids,
176177

177-
log=logging_function,
178178
verbose=args.verbose,
179179

180180
# Performance arguments
@@ -183,6 +183,16 @@ def from_args(cls, args):
183183
initial_search_length=initial_search_length
184184
)
185185

186+
def __post_init__(self):
187+
# Setup logging based on verbosity
188+
if self.verbose:
189+
logging_function = partial(verbose_print, True)
190+
else:
191+
logging_function = nil_search_log
192+
193+
# Set attribute on the object since the dataclass is frozen
194+
object.__setattr__(self, "log", logging_function)
195+
186196

187197
def write_unique_counts(config: SearchConfig):
188198

tests/test_unique_counts.py

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from newmap.main import (DEFAULT_COMPRESSION_RATIO,
1010
DEFAULT_SEED_LENGTH)
1111
from newmap.index import generate_fm_index
12-
from newmap.search import write_unique_counts
12+
from newmap.search import SearchConfig, write_unique_counts
1313

1414
# Expected minimum unique lengths at each position
1515
# NB: In order to manually count correctly, it is important to remember to
@@ -46,16 +46,15 @@ def test_linear_search(self):
4646
self.search(use_binary_search=False)
4747

4848
def search(self, use_binary_search):
49-
write_unique_counts(Path(self.fasta_filename),
50-
Path(self.genome_index_filename),
51-
15, # Batch size
52-
list(range(4, 11)), # Kmer lengths 4 to 10
53-
0, # Initial search length
54-
[], # Include chr ids
55-
[], # Exclude chr ids
56-
False, # no reverse complement
57-
self.num_threads,
58-
use_binary_search)
49+
50+
write_unique_counts(SearchConfig(
51+
fasta_filepaths=[Path(self.fasta_filename)],
52+
fmindex_filepaths=[Path(self.genome_index_filename)],
53+
kmer_lengths=list(range(4, 11)),
54+
kmer_batch_size=15,
55+
is_binary_search=use_binary_search,
56+
num_threads=self.num_threads,
57+
))
5958

6059
# Check the results in chr1.unique.uint8 and chr2.unique.uint8
6160
chr1_results = np.fromfile('chr1.unique.uint8', dtype=np.uint8)

tests/test_unique_to_mappability.py

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from newmap.main import (DEFAULT_COMPRESSION_RATIO,
88
DEFAULT_SEED_LENGTH)
99
from newmap.index import generate_fm_index
10-
from newmap.search import write_unique_counts
10+
from newmap.search import SearchConfig, write_unique_counts
1111
from newmap.track import write_mappability_files
1212

1313

@@ -24,16 +24,14 @@ def setUpClass(cls):
2424
DEFAULT_COMPRESSION_RATIO,
2525
DEFAULT_SEED_LENGTH)
2626

27-
write_unique_counts(Path(cls.fasta_filename),
28-
Path(cls.genome_index_filename),
29-
15, # Batch size
30-
list(range(4, 11)), # Kmer lengths 4 to 10
31-
0, # Initial search length
32-
[], # Include chr ids
33-
[], # Exclude chr ids
34-
False, # no reverse complement
35-
cls.num_threads,
36-
use_binary_search=True)
27+
write_unique_counts(SearchConfig(
28+
fasta_filepaths=[Path(cls.fasta_filename)],
29+
fmindex_filepaths=[Path(cls.genome_index_filename)],
30+
kmer_lengths=list(range(4, 11)),
31+
kmer_batch_size=15,
32+
is_binary_search=True,
33+
num_threads=cls.num_threads,
34+
))
3735

3836
@classmethod
3937
def tearDownClass(cls):

0 commit comments

Comments
 (0)