Skip to content

Commit f1c2cca

Browse files
authored
Add recursive copy for structures (#56)
* Add recursive copy for structures Certain methods, like `applytransform!`, exist only in mutating form. Rather than adding a non-mutating version for every potential mutating function, we can simply copy the structure, apply the transformation, and return the new structure. * Fix parenting * disordered copy (smoke test) * Fix
1 parent 53d9c1a commit f1c2cca

2 files changed

Lines changed: 94 additions & 2 deletions

File tree

src/model.jl

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,12 +113,14 @@ struct Atom <: AbstractAtom
113113
charge::String
114114
residue::StructuralElement
115115
end
116+
Atom(a::Atom, r::StructuralElement) = Atom(a.serial, a.name, a.alt_loc_id, copy(a.coords), a.occupancy, a.temp_factor, a.element, a.charge, r)
116117

117118
"A container to hold different locations of the same atom."
118119
struct DisorderedAtom <: AbstractAtom
119120
alt_loc_ids::Dict{Char, Atom}
120121
default::Char
121122
end
123+
DisorderedAtom(da::DisorderedAtom, r::StructuralElement) = DisorderedAtom(Dict(k => Atom(a, r) for (k, a) in da.alt_loc_ids), da.default)
122124

123125
"""
124126
A residue (amino acid) or other molecule - either a `Residue` or a
@@ -137,6 +139,14 @@ mutable struct Residue <: AbstractResidue
137139
chain::StructuralElement
138140
ss_code::Char
139141
end
142+
function Residue(r::Residue, chain::StructuralElement)
143+
atoms = Dict{String, AbstractAtom}()
144+
rnew = Residue(r.name, r.number, r.ins_code, r.het_res, [name for name in r.atom_list], atoms, chain, r.ss_code)
145+
for (name, atom) in r.atoms
146+
atoms[name] = isa(atom, Atom) ? Atom(atom, rnew) : DisorderedAtom(atom, rnew)
147+
end
148+
return rnew
149+
end
140150

141151
"""
142152
A container to hold different versions of the same residue (point
@@ -146,6 +156,7 @@ struct DisorderedResidue <: AbstractResidue
146156
names::Dict{String, Residue}
147157
default::String
148158
end
159+
DisorderedResidue(dr::DisorderedResidue, chain::StructuralElement) = DisorderedResidue(Dict(k => Residue(r, chain) for (k, r) in dr.names), dr.default)
149160

150161
"A chain (molecule) from a macromolecular structure."
151162
mutable struct Chain <: StructuralElement
@@ -154,13 +165,29 @@ mutable struct Chain <: StructuralElement
154165
residues::Dict{String, AbstractResidue}
155166
model::StructuralElement
156167
end
168+
function Chain(c::Chain, model::StructuralElement)
169+
residues = Dict{String, AbstractResidue}()
170+
cnew = Chain(c.id, [id for id in c.res_list], residues, model)
171+
for (id, res) in c.residues
172+
residues[id] = isa(res, Residue) ? Residue(res, cnew) : DisorderedResidue(res, cnew)
173+
end
174+
return cnew
175+
end
157176

158177
"A conformation of a macromolecular structure."
159178
struct Model <: StructuralElement
160179
number::Int
161180
chains::Dict{String, Chain}
162181
structure::StructuralElement
163182
end
183+
function Model(m::Model, structure::StructuralElement)
184+
chains = Dict{String, Chain}()
185+
mnew = Model(m.number, chains, structure)
186+
for (id, ch) in m.chains
187+
chains[id] = Chain(ch, mnew)
188+
end
189+
return mnew
190+
end
164191

165192
"""
166193
A container for multiple `Model`s that represents a Protein Data Bank (PDB)
@@ -170,6 +197,14 @@ struct MolecularStructure <: StructuralElement
170197
name::String
171198
models::Dict{Int, Model}
172199
end
200+
function MolecularStructure(s::MolecularStructure)
201+
models = Dict{Int, Model}()
202+
snew = MolecularStructure(s.name, models)
203+
for (number, mo) in s.models
204+
models[number] = Model(mo, snew)
205+
end
206+
return snew
207+
end
173208

174209
"""
175210
A record for a single atom, e.g. as represented in a Protein Data Bank
@@ -338,6 +373,15 @@ end
338373
Base.firstindex(struc::MolecularStructure) = first(modelnumbers(struc))
339374
Base.lastindex(struc::MolecularStructure) = last(modelnumbers(struc))
340375

376+
# recursive copy methods. If we copy a subelement (anything below MolecularStructure), it shares the parent element
377+
Base.copy(a::Atom) = Atom(a, a.residue)
378+
Base.copy(da::DisorderedAtom) = DisorderedAtom(da, only(unique(a -> a.residue, values(da.alt_loc_ids))).residue)
379+
Base.copy(r::Residue) = Residue(r, r.chain)
380+
Base.copy(dr::DisorderedResidue) = DisorderedResidue(dr, only(unique(r -> r.chain, values(dr.names))).chain)
381+
Base.copy(c::Chain) = Chain(c, c.model)
382+
Base.copy(m::Model) = Model(m, m.structure)
383+
Base.copy(s::MolecularStructure) = MolecularStructure(s)
384+
341385
# Check if an atom name exists in a residue as a whitespace-padded version
342386
function findatombyname(res::Residue, atom_name::AbstractString)
343387
# Look for atom name directly

test/runtests.jl

Lines changed: 50 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,33 @@ function countlines_gzip(filename::AbstractString; gzip=false)
8585
end
8686
end
8787

88+
getparent(a::Atom) = a.residue
89+
getparent(r::Residue) = r.chain
90+
getparent(c::Chain) = c.model
91+
getparent(m::Model) = m.structure
92+
getchildren(::Atom) = ()
93+
getchildren(r::Residue) = values(r.atoms)
94+
getchildren(c::Chain) = values(c.residues)
95+
getchildren(m::Model) = values(m.chains)
96+
getchildren(s::MolecularStructure) = values(s.models)
97+
function testparent(children, parent)
98+
for child in children
99+
if isa(child, DisorderedAtom)
100+
for a in values(child.alt_loc_ids)
101+
@test getparent(a) === parent
102+
end
103+
elseif isa(child, DisorderedResidue)
104+
for r in values(child.names)
105+
@test getparent(r) === parent
106+
testparent(getchildren(r), r)
107+
end
108+
else
109+
@test getparent(child) === parent
110+
testparent(getchildren(child), child)
111+
end
112+
end
113+
end
114+
88115
Aqua.test_all(BioStructures; ambiguities=(recursive=false))
89116

90117
# This is the only test set that requires an internet connection
@@ -198,8 +225,8 @@ end
198225
), " VA")
199226
dis_res = struc['A']["H_20A"]
200227
@test isa(dis_res, DisorderedResidue)
201-
struc['A'][10][" CA "] = Atom(
202-
100, " CA ", ' ', [1.0, 2.0, 3.0], 1.0, 10.0, " C", " ", res)
228+
a = Atom(100, " CA ", ' ', [1.0, 2.0, 3.0], 1.0, 10.0, " C", " ", res)
229+
struc['A'][10][" CA "] = a
203230
at = struc['A'][10]["CA"]
204231
@test isa(at, Atom)
205232
struc['A'][10][" CB "] = DisorderedAtom(Dict(
@@ -214,6 +241,27 @@ end
214241
400, " O ", ' ', [1.0, 2.0, 3.0], 1.0, 10.0, " O", " ", disorderedres(dis_res, "ILE"))
215242
fixlists!(struc)
216243

244+
# copy doesn't share memory
245+
testparent(getchildren(struc), struc)
246+
struc_copy = copy(struc)
247+
testparent(getchildren(struc_copy), struc_copy)
248+
struc_copy['A'][10]["CA"].coords[2] = 100
249+
@test struc_copy['A'][10]["CA"].coords[2] == 100
250+
@test a.coords[2] == 2
251+
@test struc['A'][10]["CA"].coords[2] == 2
252+
# intermediate copies preserve parenting up to the node of the copy
253+
testparent(getchildren(mo), mo)
254+
mo_copy = copy(mo)
255+
testparent(getchildren(mo_copy), mo_copy)
256+
testparent(getchildren(ch), ch)
257+
ch_copy = copy(ch)
258+
testparent(getchildren(ch_copy), ch_copy)
259+
testparent(getchildren(res), res)
260+
res_copy = copy(res)
261+
testparent(getchildren(res_copy), res_copy)
262+
@test copy(dis_res) isa DisorderedResidue
263+
@test copy(dis_at) isa DisorderedAtom
264+
217265
# Test alternate constructors
218266
MolecularStructure("struc", Dict(1 => Model()))
219267
MolecularStructure()

0 commit comments

Comments
 (0)