Skip to content

Commit 0f9acaa

Browse files
authored
Merge pull request #734 from tadast/tt/lazy-report
Refactor BLAST::Report to lazily generate reports
2 parents 4e9721a + 389ae3a commit 0f9acaa

10 files changed

Lines changed: 17047 additions & 87 deletions

File tree

lib/sequenceserver/blast/report.rb

Lines changed: 74 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -24,20 +24,11 @@ module BLAST
2424
class Report < Report
2525
def initialize(job)
2626
super do
27-
@queries = []
2827
@querydb = job.databases
2928
end
3029
end
3130

32-
# Attributes parsed out from BLAST output.
33-
attr_reader :program, :program_version, :stats, :queries
34-
35-
# Attributes parsed from job metadata and BLAST output.
36-
attr_reader :querydb, :dbtype, :params
37-
3831
def to_json(*_args)
39-
generate
40-
4132
%i[querydb program program_version params stats
4233
queries].inject({}) do |h, k|
4334
h[k] = send(k)
@@ -53,83 +44,89 @@ def to_json(*_args)
5344
def xml_file_size
5445
return File.size(job.imported_xml_file) if job.imported_xml_file
5546

56-
generate
57-
5847
xml_formatter.size
5948
end
6049

61-
# Generate report.
62-
def generate
63-
return self if @_generated
64-
65-
job.raise!
66-
xml_ir = nil
67-
tsv_ir = nil
68-
if job.imported_xml_file
69-
xml_ir = parse_xml File.read(job.imported_xml_file)
70-
tsv_ir = Hash.new do |h1, k1|
71-
h1[k1] = Hash.new do |h2, k2|
72-
h2[k2] = ['', '', []]
73-
end
74-
end
75-
else
76-
xml_ir = parse_xml(xml_formatter.read_file)
77-
tsv_ir = parse_tsv(tsv_formatter.read_file)
50+
def done?
51+
return true if job.imported_xml_file
52+
53+
File.exist?(xml_formatter.filepath) && File.exist?(tsv_formatter.filepath)
54+
end
55+
56+
def program
57+
@program ||= xml_ir[0]
58+
end
59+
60+
def program_version
61+
@program_version ||= xml_ir[1]
62+
end
63+
64+
def querydb
65+
@querydb ||= xml_ir[3].split.map do |path|
66+
{ title: File.basename(path) }
7867
end
79-
extract_program_info xml_ir
80-
extract_db_info xml_ir
81-
extract_params xml_ir
82-
extract_stats xml_ir
83-
extract_queries xml_ir, tsv_ir
68+
end
69+
70+
def dbtype
71+
@dbtype ||= querydb&.first&.type || dbtype_from_program
72+
end
8473

85-
@_generated = true
74+
def params
75+
@params ||= extract_params
76+
end
8677

87-
self
78+
def stats
79+
@stats ||= extract_stats
8880
end
8981

90-
def done?
91-
return true if job.imported_xml_file
82+
def queries
83+
@queries ||= xml_ir[8].map do |n|
84+
query = Query.new(self, n[0], n[2], n[3], [])
85+
query.hits = query_hits(n[4], tsv_ir[query.id], query)
9286

93-
File.exist?(xml_formatter.filepath) && File.exist?(tsv_formatter.filepath)
87+
query
88+
end
9489
end
9590

9691
private
9792

98-
def xml_formatter
99-
@xml_formatter ||= Formatter.run(job, 'xml')
93+
def xml_ir
94+
@xml_ir ||=
95+
if job.imported_xml_file
96+
parse_xml File.read(job.imported_xml_file)
97+
else
98+
job.raise!
99+
parse_xml(xml_formatter.read_file)
100+
end
100101
end
101102

102-
def tsv_formatter
103-
@tsv_formatter ||= Formatter.run(job, 'custom_tsv')
103+
def tsv_ir
104+
@tsv_ir ||=
105+
if job.imported_xml_file
106+
Hash.new do |h1, k1|
107+
h1[k1] = Hash.new do |h2, k2|
108+
h2[k2] = ['', '', []]
109+
end
110+
end
111+
else
112+
job.raise!
113+
parse_tsv(tsv_formatter.read_file)
114+
end
104115
end
105116

106-
# Make program name and program name + version available via `program`
107-
# and `program_version` attributes.
108-
def extract_program_info(ir)
109-
@program = ir[0]
110-
@program_version = ir[1]
117+
def xml_formatter
118+
@xml_formatter ||= Formatter.run(job, 'xml')
111119
end
112120

113-
# Get database information (title and type) from job yaml or from XML.
114-
# Sets `querydb` and `dbtype` attributes.
115-
def extract_db_info(ir)
116-
if @querydb.empty?
117-
@querydb = ir[3].split.map do |path|
118-
{ title: File.basename(path) }
119-
end
120-
@dbtype = dbtype_from_program
121-
else
122-
@dbtype = @querydb.first.type
123-
end
121+
def tsv_formatter
122+
@tsv_formatter ||= Formatter.run(job, 'custom_tsv')
124123
end
125124

126-
# Make search params available via `params` attribute.
127-
#
128125
# Search params tweak the results. Like evalue cutoff or penalty to open
129126
# a gap. BLAST+ doesn't list all input params in the XML output. Only
130127
# matrix, evalue, gapopen, gapextend, and filters are available from XML
131128
# output.
132-
def extract_params(ir)
129+
def extract_params
133130
# Parse/get params from the job first.
134131
job_params = parse_advanced(job.advanced)
135132
# Old jobs from beta releases may not have the advanced key but they
@@ -138,21 +135,19 @@ def extract_params(ir)
138135

139136
# Parse params from BLAST XML.
140137
@params = Hash[
141-
*ir[7].first.map { |k, v| [k.gsub('Parameters_', ''), v] }.flatten
138+
*xml_ir[7].first.map { |k, v| [k.gsub('Parameters_', ''), v] }.flatten
142139
]
143140
@params['evalue'] = @params.delete('expect')
144141

145142
# Merge into job_params.
146143
@params = job_params.merge(@params)
147144
end
148145

149-
# Make search stats available via `stats` attribute.
150-
#
151146
# Search stats are computed metrics. Like total number of sequences or
152147
# effective search space.
153-
def extract_stats(ir)
154-
stats = ir[8].first[5][0]
155-
@stats = {
148+
def extract_stats
149+
stats = xml_ir[8].first[5][0]
150+
{
156151
nsequences: stats[0],
157152
ncharacters: stats[1],
158153
hsp_length: stats[2],
@@ -163,20 +158,11 @@ def extract_stats(ir)
163158
}
164159
end
165160

166-
# Create query objects for the given report from the given ir.
167-
def extract_queries(xml_ir, tsv_ir)
168-
xml_ir[8].each do |n|
169-
query = Query.new(self, n[0], n[2], n[3], [])
170-
extract_hits(n[4], tsv_ir[query.id], query)
171-
queries << query
172-
end
173-
end
174-
175161
# Create Hit objects for the given query from the given ir.
176-
def extract_hits(xml_ir, tsv_ir, query)
177-
return if xml_ir == ["\n"] # => No hits.
162+
def query_hits(xml_ir, tsv_ir, query)
163+
return [] if xml_ir == ["\n"] # => No hits.
178164

179-
xml_ir.each do |n|
165+
xml_ir.map do |n|
180166
# If hit comes from a non -parse_seqids database, then id (n[1]) is a
181167
# BLAST assigned internal id of the format 'gnl|BL_ORD_ID|serial'. We
182168
# assign the id to accession (because we use accession for sequence
@@ -190,19 +176,21 @@ def extract_hits(xml_ir, tsv_ir, query)
190176
n[1] = defline.shift
191177
n[2] = defline.join(' ')
192178
end
179+
193180
hit = Hit.new(query, n[0], n[1], n[3], n[2], n[4],
194181
tsv_ir[n[1]][0], tsv_ir[n[1]][1], [])
195-
extract_hsps(n[5], tsv_ir[n[1]][2], hit)
196-
query.hits << hit
182+
183+
hit.hsps = hsps(n[5], tsv_ir[n[1]][2], hit)
184+
185+
hit
197186
end
198187
end
199188

200-
# Create HSP objects for the given hit from the given ir.
201-
def extract_hsps(xml_ir, tsv_ir, hit)
202-
xml_ir.each_with_index do |n, i|
189+
def hsps(xml_ir, tsv_ir, hit)
190+
xml_ir.map.with_index do |n, i|
203191
n.insert(14, tsv_ir[i])
204-
hsp = HSP.new(hit, *n)
205-
hit.hsps << hsp
192+
193+
HSP.new(hit, *n)
206194
end
207195
end
208196

lib/sequenceserver/report.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ module SequenceServer
88
class Report
99
class << self
1010
def generate(job)
11-
BLAST::Report.new(job).generate
11+
BLAST::Report.new(job)
1212
end
1313
end
1414

spec/blast/report_spec.rb

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
require 'spec_helper'
2+
require 'sequenceserver/report'
3+
require 'sequenceserver/blast/report'
4+
5+
module SequenceServer
6+
RSpec.describe BLAST::Job do
7+
before do
8+
FileUtils.mkdir_p(DOTDIR)
9+
FileUtils.rm_r(File.join(DOTDIR, job_id)) if File.exist?(File.join(DOTDIR, job_id))
10+
FileUtils.cp_r(File.join(__dir__, '..', 'fixtures', job_id), DOTDIR)
11+
12+
# For all files in DOTDIR/job_id, replace $PATH_PREFIX with the root dir of the project.
13+
job_dir = File.join(DOTDIR, job_id)
14+
root_dir = File.expand_path(File.join(__dir__, '..', '..', '..'))
15+
Dir[File.join(job_dir, '**', '*')].each do |f|
16+
File.write(f, File.read(f).gsub('$PATH_PREFIX', root_dir)) if File.file?(f)
17+
end
18+
19+
SequenceServer.init
20+
end
21+
22+
let(:job_id) { '38334a72-e8e7-4732-872b-24d3f8723563' }
23+
let(:job) { SequenceServer::Job.fetch(job_id) }
24+
let(:report) { BLAST::Report.new(job) }
25+
let(:keys_to_ignore) { %i[querydb submitted_at imported_xml seqserv_version cloud_sharing_enabled] }
26+
27+
describe "#to_json" do
28+
it "returns a JSON representation of the job" do
29+
actual_report = JSON.parse(report.to_json).reject { |k, _| keys_to_ignore.include?(k) }
30+
expected_report = JSON.parse(File.read(File.join(job.dir, 'expected_outputs/frontend.json'))).reject { |k, _| keys_to_ignore.include?(k) }
31+
32+
actual_report.each do |k, v|
33+
expect(v).to eq(expected_report[k])
34+
end
35+
end
36+
end
37+
end
38+
end

spec/fixtures/38334a72-e8e7-4732-872b-24d3f8723563/expected_outputs/frontend.json

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)