@@ -24,20 +24,11 @@ module BLAST
2424 class Report < Report
2525 def initialize ( job )
2626 super do
27- @queries = [ ]
2827 @querydb = job . databases
2928 end
3029 end
3130
32- # Attributes parsed out from BLAST output.
33- attr_reader :program , :program_version , :stats , :queries
34-
35- # Attributes parsed from job metadata and BLAST output.
36- attr_reader :querydb , :dbtype , :params
37-
3831 def to_json ( *_args )
39- generate
40-
4132 %i[ querydb program program_version params stats
4233 queries ] . inject ( { } ) do |h , k |
4334 h [ k ] = send ( k )
@@ -53,83 +44,89 @@ def to_json(*_args)
5344 def xml_file_size
5445 return File . size ( job . imported_xml_file ) if job . imported_xml_file
5546
56- generate
57-
5847 xml_formatter . size
5948 end
6049
61- # Generate report.
62- def generate
63- return self if @_generated
64-
65- job . raise!
66- xml_ir = nil
67- tsv_ir = nil
68- if job . imported_xml_file
69- xml_ir = parse_xml File . read ( job . imported_xml_file )
70- tsv_ir = Hash . new do | h1 , k1 |
71- h1 [ k1 ] = Hash . new do | h2 , k2 |
72- h2 [ k2 ] = [ '' , '' , [ ] ]
73- end
74- end
75- else
76- xml_ir = parse_xml ( xml_formatter . read_file )
77- tsv_ir = parse_tsv ( tsv_formatter . read_file )
50+ def done?
51+ return true if job . imported_xml_file
52+
53+ File . exist? ( xml_formatter . filepath ) && File . exist? ( tsv_formatter . filepath )
54+ end
55+
56+ def program
57+ @program ||= xml_ir [ 0 ]
58+ end
59+
60+ def program_version
61+ @program_version ||= xml_ir [ 1 ]
62+ end
63+
64+ def querydb
65+ @querydb ||= xml_ir [ 3 ] . split . map do | path |
66+ { title : File . basename ( path ) }
7867 end
79- extract_program_info xml_ir
80- extract_db_info xml_ir
81- extract_params xml_ir
82- extract_stats xml_ir
83- extract_queries xml_ir , tsv_ir
68+ end
69+
70+ def dbtype
71+ @dbtype ||= querydb &. first &. type || dbtype_from_program
72+ end
8473
85- @_generated = true
74+ def params
75+ @params ||= extract_params
76+ end
8677
87- self
78+ def stats
79+ @stats ||= extract_stats
8880 end
8981
90- def done?
91- return true if job . imported_xml_file
82+ def queries
83+ @queries ||= xml_ir [ 8 ] . map do |n |
84+ query = Query . new ( self , n [ 0 ] , n [ 2 ] , n [ 3 ] , [ ] )
85+ query . hits = query_hits ( n [ 4 ] , tsv_ir [ query . id ] , query )
9286
93- File . exist? ( xml_formatter . filepath ) && File . exist? ( tsv_formatter . filepath )
87+ query
88+ end
9489 end
9590
9691 private
9792
98- def xml_formatter
99- @xml_formatter ||= Formatter . run ( job , 'xml' )
93+ def xml_ir
94+ @xml_ir ||=
95+ if job . imported_xml_file
96+ parse_xml File . read ( job . imported_xml_file )
97+ else
98+ job . raise!
99+ parse_xml ( xml_formatter . read_file )
100+ end
100101 end
101102
102- def tsv_formatter
103- @tsv_formatter ||= Formatter . run ( job , 'custom_tsv' )
103+ def tsv_ir
104+ @tsv_ir ||=
105+ if job . imported_xml_file
106+ Hash . new do |h1 , k1 |
107+ h1 [ k1 ] = Hash . new do |h2 , k2 |
108+ h2 [ k2 ] = [ '' , '' , [ ] ]
109+ end
110+ end
111+ else
112+ job . raise!
113+ parse_tsv ( tsv_formatter . read_file )
114+ end
104115 end
105116
106- # Make program name and program name + version available via `program`
107- # and `program_version` attributes.
108- def extract_program_info ( ir )
109- @program = ir [ 0 ]
110- @program_version = ir [ 1 ]
117+ def xml_formatter
118+ @xml_formatter ||= Formatter . run ( job , 'xml' )
111119 end
112120
113- # Get database information (title and type) from job yaml or from XML.
114- # Sets `querydb` and `dbtype` attributes.
115- def extract_db_info ( ir )
116- if @querydb . empty?
117- @querydb = ir [ 3 ] . split . map do |path |
118- { title : File . basename ( path ) }
119- end
120- @dbtype = dbtype_from_program
121- else
122- @dbtype = @querydb . first . type
123- end
121+ def tsv_formatter
122+ @tsv_formatter ||= Formatter . run ( job , 'custom_tsv' )
124123 end
125124
126- # Make search params available via `params` attribute.
127- #
128125 # Search params tweak the results. Like evalue cutoff or penalty to open
129126 # a gap. BLAST+ doesn't list all input params in the XML output. Only
130127 # matrix, evalue, gapopen, gapextend, and filters are available from XML
131128 # output.
132- def extract_params ( ir )
129+ def extract_params
133130 # Parse/get params from the job first.
134131 job_params = parse_advanced ( job . advanced )
135132 # Old jobs from beta releases may not have the advanced key but they
@@ -138,21 +135,19 @@ def extract_params(ir)
138135
139136 # Parse params from BLAST XML.
140137 @params = Hash [
141- *ir [ 7 ] . first . map { |k , v | [ k . gsub ( 'Parameters_' , '' ) , v ] } . flatten
138+ *xml_ir [ 7 ] . first . map { |k , v | [ k . gsub ( 'Parameters_' , '' ) , v ] } . flatten
142139 ]
143140 @params [ 'evalue' ] = @params . delete ( 'expect' )
144141
145142 # Merge into job_params.
146143 @params = job_params . merge ( @params )
147144 end
148145
149- # Make search stats available via `stats` attribute.
150- #
151146 # Search stats are computed metrics. Like total number of sequences or
152147 # effective search space.
153- def extract_stats ( ir )
154- stats = ir [ 8 ] . first [ 5 ] [ 0 ]
155- @stats = {
148+ def extract_stats
149+ stats = xml_ir [ 8 ] . first [ 5 ] [ 0 ]
150+ {
156151 nsequences : stats [ 0 ] ,
157152 ncharacters : stats [ 1 ] ,
158153 hsp_length : stats [ 2 ] ,
@@ -163,20 +158,11 @@ def extract_stats(ir)
163158 }
164159 end
165160
166- # Create query objects for the given report from the given ir.
167- def extract_queries ( xml_ir , tsv_ir )
168- xml_ir [ 8 ] . each do |n |
169- query = Query . new ( self , n [ 0 ] , n [ 2 ] , n [ 3 ] , [ ] )
170- extract_hits ( n [ 4 ] , tsv_ir [ query . id ] , query )
171- queries << query
172- end
173- end
174-
175161 # Create Hit objects for the given query from the given ir.
176- def extract_hits ( xml_ir , tsv_ir , query )
177- return if xml_ir == [ "\n " ] # => No hits.
162+ def query_hits ( xml_ir , tsv_ir , query )
163+ return [ ] if xml_ir == [ "\n " ] # => No hits.
178164
179- xml_ir . each do |n |
165+ xml_ir . map do |n |
180166 # If hit comes from a non -parse_seqids database, then id (n[1]) is a
181167 # BLAST assigned internal id of the format 'gnl|BL_ORD_ID|serial'. We
182168 # assign the id to accession (because we use accession for sequence
@@ -190,19 +176,21 @@ def extract_hits(xml_ir, tsv_ir, query)
190176 n [ 1 ] = defline . shift
191177 n [ 2 ] = defline . join ( ' ' )
192178 end
179+
193180 hit = Hit . new ( query , n [ 0 ] , n [ 1 ] , n [ 3 ] , n [ 2 ] , n [ 4 ] ,
194181 tsv_ir [ n [ 1 ] ] [ 0 ] , tsv_ir [ n [ 1 ] ] [ 1 ] , [ ] )
195- extract_hsps ( n [ 5 ] , tsv_ir [ n [ 1 ] ] [ 2 ] , hit )
196- query . hits << hit
182+
183+ hit . hsps = hsps ( n [ 5 ] , tsv_ir [ n [ 1 ] ] [ 2 ] , hit )
184+
185+ hit
197186 end
198187 end
199188
200- # Create HSP objects for the given hit from the given ir.
201- def extract_hsps ( xml_ir , tsv_ir , hit )
202- xml_ir . each_with_index do |n , i |
189+ def hsps ( xml_ir , tsv_ir , hit )
190+ xml_ir . map . with_index do |n , i |
203191 n . insert ( 14 , tsv_ir [ i ] )
204- hsp = HSP . new ( hit , * n )
205- hit . hsps << hsp
192+
193+ HSP . new ( hit , * n )
206194 end
207195 end
208196
0 commit comments