Skip to content

Commit 3b4e96e

Browse files
authored
Specialize residue names (#80)
If you've added hydrogens to a structure, this supports the full information present in the `residuedata` tables imported from ff19SB: - renames HIS to one of HID/HIE/HIP (using the protonation state from the added hydrogens) - renames the amino- and carboxyl-terminal residues by prepending `N`/`C`
1 parent 98bfd75 commit 3b4e96e

6 files changed

Lines changed: 2440 additions & 2 deletions

File tree

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "BioStructures"
22
uuid = "de9282ab-8554-53be-b2d6-f6c222edabfc"
33
authors = ["Joe G Greener <jgreener@hotmail.co.uk>"]
4-
version = "4.6.1"
4+
version = "4.7.0"
55

66
[deps]
77
BioGenerics = "47718e42-2ac5-11e9-14af-e5595289c2ea"

docs/src/documentation.md

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -568,7 +568,34 @@ Similar to [`ContactMap`](@ref), contacts are found between any element type pas
568568
So if you wanted the graph of chain contacts in a protein complex you could give a [`Model`](@ref) as the first argument.
569569

570570
Calling `MetaGraph` on a [`Chain`](@ref) without a contact distance does something different: it constructs a graph of atoms where edges are determined by the known bonds of standard amino acids in the chain.
571-
Hydrogens should be present and atom names should match those in OpenMM.
571+
Hydrogens should be present and atom names should match those in OpenMM. See [`specialize_resnames!`](@ref) to
572+
ensure that residue names have been specialized to their actual configuration in the protein:
573+
574+
```julia-repl
575+
julia> using BioStructures, MetaGraphs
576+
577+
julia> struc_M3YHX5 = read(joinpath(pkgdir(BioStructures), "test", "data", "AF-M3YHX5-F1-model_v4_hydrogens.cif"), MMCIFFormat)
578+
MolecularStructure AF-M3YHX5-F1-model_v4_hydrogens.cif with 1 models, 1 chains (A), 112 residues, 1833 atoms
579+
580+
julia> MetaGraph(struc_M3YHX5["A"])
581+
r = Residue 1:A with name MET, 19 atoms
582+
ERROR: KeyError: key "H" not found
583+
[...]
584+
585+
julia> mg = MetaGraph(struc_M3YHX5["A"]; strict=false)
586+
{1833, 1850} undirected Int64 metagraph with Float64 weights defined by :weight (default weight 1.0)
587+
```
588+
589+
but if you really want to get all the bonds, you'd better use the default `strict=true`.
590+
Fortunately, this example only requires residue renaming:
591+
592+
```julia-repl
593+
julia> specialize_resnames!(struc_M3YHX5)
594+
MolecularStructure AF-M3YHX5-F1-model_v4_hydrogens.cif with 1 models, 1 chains (A), 112 residues, 1833 atoms
595+
596+
julia> mg = MetaGraph(struc_M3YHX5["A"])
597+
{1833, 1853} undirected Int64 metagraph with Float64 weights defined by :weight (default weight 1.0)
598+
```
572599

573600
## Assigning secondary structure
574601

src/model.jl

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ export
3939
defaultresname,
4040
defaultresidue,
4141
resnames,
42+
specialize_resnames!,
4243
sscode,
4344
sscode!,
4445
chain,
@@ -1097,6 +1098,54 @@ This is the `Model` with the lowest model number.
10971098
"""
10981099
defaultmodel(struc::MolecularStructure) = first(sort(collect(values(models(struc)))))
10991100

1101+
"""
1102+
specialize_resnames!(r)
1103+
1104+
If `r` is a histidine residue, rename it to reflect a specific protonation configuration (HID,
1105+
HIE, or HIP) based on the presence of HD1 and HE2 atoms. For any other residue,
1106+
`r` will be unchanged.
1107+
1108+
Hydrogens must have been assigned prior to calling this function.
1109+
"""
1110+
function specialize_resnames!(r::Residue)
1111+
rname = resname(r)
1112+
if rname == "HIS"
1113+
hd1 = findatombyname(r, "HD1"; strict=false)
1114+
he2 = findatombyname(r, "HE2"; strict=false)
1115+
if !isnothing(hd1) && !isnothing(he2)
1116+
r.name = "HIP"
1117+
elseif !isnothing(hd1)
1118+
r.name = "HID"
1119+
elseif !isnothing(he2)
1120+
r.name = "HIE"
1121+
else
1122+
error("at least one of HD1 or HE2 must be present")
1123+
end
1124+
end
1125+
# N and C termini
1126+
if haskey(r.atoms, "H3") && !haskey(residuedata[rname].atoms, "H3") && haskey(residuedata, "N" * rname)
1127+
r.name = "N" * rname
1128+
elseif haskey(r.atoms, "OXT") && !haskey(residuedata[rname].atoms, "OXT") && haskey(residuedata, "C" * rname)
1129+
r.name = "C" * rname
1130+
end
1131+
return r
1132+
end
1133+
specialize_resnames!(r::DisorderedResidue) = (foreach(specialize_resnames!, collectresidues(r; expand_disordered=true)); r)
1134+
function specialize_resnames!(c::Chain)
1135+
for (key, res) in c.residues
1136+
if isa(res, DisorderedResidue)
1137+
# Create a new `names` dictionary with specialized residue names
1138+
names = Dict((specialize_resnames!(r); resname(r, strip=false) => r) for r in values(res.names))
1139+
c.residues[key] = DisorderedResidue(names, resname(res.names[defaultresname(res)]))
1140+
else
1141+
specialize_resnames!(res)
1142+
end
1143+
end
1144+
return c
1145+
end
1146+
specialize_resnames!(m::Model) = (foreach(specialize_resnames!, m); m)
1147+
specialize_resnames!(s::MolecularStructure) = (foreach(specialize_resnames!, s); s)
1148+
11001149
# Sort lists of elements
11011150

11021151
# Sort atoms by serial

0 commit comments

Comments
 (0)