@@ -42,6 +42,23 @@ module SequenceServer
4242 'Sinvicta2-2-3.prot.subset.fasta' )
4343 end
4444
45+ let 'fasta_file_prot_with_unknown_bases_seq' do
46+ dir_path = File . join ( root_database_dir , 'with_unknown_bases' )
47+ FileUtils . mkdir_p ( dir_path )
48+ file_path = File . join ( dir_path , 'with_unknown_bases.fasta' )
49+ # Write a file with unknown bases that is GUESS_SAMPLE_SIZE 'N's long.
50+ File . open ( file_path , 'w' ) do |f |
51+ f . puts ">seq1"
52+ f . puts ( ( "N" * 70 ) + "\n " ) * ( SequenceServer ::MAKEBLASTDB ::GUESS_SAMPLE_SIZE / 70 ) . to_i
53+ f . puts "VSDTAKVLVTEVLEKVSVNRVATFTIEADASLGTPVVEVLSPTRESLSVHVKQNSQGTYTV"
54+ f . puts ">seq2"
55+ f . puts "VSDTAKVLVTEVLEKVSVNRVATFTIEADASLGTPVVEVLSPTRESLSVHVKQNSQGTYTV"
56+ f . puts "VSDTAKVLVTEVLEKVSVNRVATFTIEADASLGTPVVEVLSPTRESLSVHVKQNSQGTYTV"
57+ end
58+
59+ file_path
60+ end
61+
4562 let 'fasta_file_nucl_seqs' do
4663 File . join ( database_dir_v5 , 'transcripts' , 'Solenopsis_invicta' ,
4764 'Sinvicta2-2-3.cdna.subset.fasta' )
@@ -72,6 +89,10 @@ module SequenceServer
7289 expect ( makeblastdb . send ( :guess_sequence_type_in_fasta , fasta_file_nucl_seqs ) ) . to eq :nucleotide
7390 end
7491
92+ it 'can ignore unknown bases when detecting the sequence type' do
93+ expect ( makeblastdb . send ( :guess_sequence_type_in_fasta , fasta_file_prot_with_unknown_bases_seq ) ) . to eq :protein
94+ end
95+
7596 it 'can tell FASTA files that are yet to be made into a BLAST+ database' do
7697 makeblastdb = SequenceServer ::MAKEBLASTDB . new ( database_dir_unformatted )
7798 expect ( makeblastdb . any_to_format_or_reformat? ) . to be_truthy
0 commit comments