Skip to content

Commit 8d4d112

Browse files
committed
move DSSP and STRIDE to extensions
1 parent 9a94650 commit 8d4d112

11 files changed

Lines changed: 221 additions & 181 deletions

File tree

Project.toml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,29 +7,31 @@ version = "3.1.0"
77
BioGenerics = "47718e42-2ac5-11e9-14af-e5595289c2ea"
88
BioSymbols = "3c28c6f8-a34d-59c4-9654-267d177fcfa9"
99
CodecZlib = "944b1d66-785c-5afd-91f1-9de20f533193"
10-
DSSP_jll = "74334e00-59ce-546d-b517-81f3b7e1d491"
1110
Downloads = "f43a241f-c20a-4ad4-852c-f6b1247861c6"
1211
Format = "1fa38f19-a742-5d3f-a2b9-30dd87b9d5f8"
1312
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
1413
PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a"
1514
RecipesBase = "3cdcf5f2-1ef4-517c-9805-6587b60abb01"
16-
STRIDE_jll = "850473c1-9ef0-5df9-a957-757f4cde8b8b"
1715
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
1816

1917
[weakdeps]
2018
BioAlignments = "00701ae9-d1dc-5365-b64a-a3a3ebf5695e"
2119
BioSequences = "7e6ae17a-c86d-528c-b3b9-7f778a29fe59"
20+
DSSP_jll = "74334e00-59ce-546d-b517-81f3b7e1d491"
2221
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
2322
Graphs = "86223c79-3864-5bf0-83f7-82e725a168b6"
2423
MMTF = "259c3a9c-12c3-507f-b21f-68ecc40fcda4"
2524
MetaGraphs = "626554b9-1ddb-594c-aa3c-2596fe9399a5"
25+
STRIDE_jll = "850473c1-9ef0-5df9-a957-757f4cde8b8b"
2626

2727
[extensions]
2828
BioStructuresBioAlignmentsExt = ["BioSequences", "BioAlignments"]
2929
BioStructuresBioSequencesExt = "BioSequences"
30+
BioStructuresDSSPExt = "DSSP_jll"
3031
BioStructuresDataFramesExt = "DataFrames"
3132
BioStructuresGraphsExt = ["Graphs", "MetaGraphs"]
3233
BioStructuresMMTFExt = "MMTF"
34+
BioStructuresSTRIDEExt = "STRIDE_jll"
3335

3436
[compat]
3537
BioAlignments = "3"

docs/src/api.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ Package extensions are used in order to reduce the number of dependencies:
1010
- To use `DataFrame`, call `using DataFrames`.
1111
- To use `MetaGraph`, call `using Graphs, MetaGraphs`.
1212
- To use [`MMTFDict`](@ref) or [`writemmtf`](@ref), call `import MMTF as MMTFPkg` (to avoid clashing with [`BioStructures.MMTF`](@ref)).
13+
- To use [`rundssp!`](@ref), [`rundssp`](@ref) or the `run_dssp` option for `read`/[`retrievepdb`](@ref), call `using DSSP_jll`.
14+
- To use [`runstride!`](@ref), [`runstride`](@ref) or the `run_stride` option for `read`/[`retrievepdb`](@ref), call `using STRIDE_jll`.
1315

1416
Exported names:
1517
```@index

ext/BioStructuresDSSPExt.jl

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
module BioStructuresDSSPExt
2+
3+
using BioStructures
4+
using DSSP_jll
5+
6+
const dssp_executable = `$(DSSP_jll.mkdssp()) --mmcif-dictionary $(DSSP_jll.mmcif_pdbx_dic) --output-format dssp`
7+
8+
function BioStructures.rundssp!(mo::Model)
9+
# Write the structure to a temporary PDB file
10+
# Our mmCIF writer does not write out enough of the required entries for DSSP to read it
11+
tmp_pdb_path = tempname() * ".pdb"
12+
open(tmp_pdb_path, "w") do io
13+
println(io, "HEADER")
14+
writepdb(io, mo)
15+
end
16+
17+
# Run DSSP on the temporary PDB file
18+
dssp_output_lines = readlines(pipeline(`$dssp_executable $tmp_pdb_path`))
19+
rm(tmp_pdb_path)
20+
21+
data_begin = false
22+
for line in dssp_output_lines
23+
if startswith(line, " # RESIDUE AA STRUCTURE BP1")
24+
data_begin = true
25+
continue
26+
end
27+
data_begin || continue
28+
line[14] == '!' && continue
29+
res_id = strip(line[6:11]) # Insertion code is in column 11
30+
ch_id = line[12]
31+
ss_code = line[17]
32+
ch = mo[ch_id]
33+
# DSSP does not mark hetero atoms
34+
if haskey(ch.residues, res_id)
35+
sscode!(ch[res_id], ss_code)
36+
elseif haskey(ch.residues, "H_$res_id")
37+
sscode!(ch["H_$res_id"], ss_code)
38+
else
39+
error("Could not assign secondary structure to residue ID $res_id in chain $ch_id")
40+
end
41+
end
42+
return mo
43+
end
44+
45+
function BioStructures.rundssp!(struc::MolecularStructure)
46+
for mo in values(models(struc))
47+
rundssp!(mo)
48+
end
49+
return struc
50+
end
51+
52+
BioStructures.rundssp(el::Union{MolecularStructure, Model}) = rundssp!(deepcopy(el))
53+
54+
function BioStructures.rundssp(filepath_in, dssp_filepath_out)
55+
run(`$dssp_executable $filepath_in $dssp_filepath_out`)
56+
end
57+
58+
end # BioStructuresDSSPExt

ext/BioStructuresSTRIDEExt.jl

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
module BioStructuresSTRIDEExt
2+
3+
using BioStructures
4+
using STRIDE_jll
5+
6+
const stride_executable = `$(STRIDE_jll.stride_exe())`
7+
8+
function BioStructures.runstride!(mo::Model)
9+
# Write the structure to a temporary PDB file
10+
# STRIDE does not work with mmCIF files
11+
tmp_pdb_path = tempname() * ".pdb"
12+
open(tmp_pdb_path, "w") do io
13+
writepdb(io, mo)
14+
end
15+
16+
# Run STRIDE on the temporary PDB file
17+
stride_output_lines = readlines(pipeline(`$stride_executable $tmp_pdb_path`))
18+
rm(tmp_pdb_path)
19+
20+
for line in stride_output_lines
21+
if startswith(line, "ASG")
22+
ch_id = line[10]
23+
res_id = strip(line[11:15]) # Insertion code is directly after residue number
24+
ss_code = uppercase(line[25]) # STRIDE sometimes returns 'b' instead of 'B'
25+
sscode!(mo[ch_id][res_id], ss_code)
26+
end
27+
end
28+
return mo
29+
end
30+
31+
function BioStructures.runstride!(struc::MolecularStructure)
32+
for mo in values(models(struc))
33+
runstride!(mo)
34+
end
35+
return struc
36+
end
37+
38+
BioStructures.runstride(el::Union{MolecularStructure, Model}) = runstride!(deepcopy(el))
39+
40+
function BioStructures.runstride(filepath_in, stride_filepath_out)
41+
run(`$stride_executable $filepath_in -f$stride_filepath_out`)
42+
end
43+
44+
end # BioStructuresSTRIDEExt

src/BioStructures.jl

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,14 +16,11 @@ using Downloads
1616
using Format
1717
using PrecompileTools
1818
using RecipesBase
19-
using STRIDE_jll
20-
using DSSP_jll
2119

2220
using LinearAlgebra
2321
using Statistics
2422

2523
include("model.jl")
26-
include("secondary.jl")
2724
include("select.jl")
2825
include("pdb.jl")
2926
include("mmcif.jl")

src/download.jl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -373,7 +373,9 @@ Requires an internet connection.
373373
- `read_std_atoms::Bool=true`: whether to read standard ATOM records.
374374
- `read_het_atoms::Bool=true`: whether to read HETATOM records.
375375
- `run_dssp::Bool=false`: whether to run DSSP to assign secondary structure.
376+
Requires the DSSP_jll.jl package to be imported if set to `true`.
376377
- `run_stride::Bool=false`: whether to run STRIDE to assign secondary structure.
378+
Requires the STRIDE_jll.jl package to be imported if set to `true`.
377379
"""
378380
function retrievepdb(pdbid::AbstractString;
379381
dir::AbstractString=pwd(),

src/model.jl

Lines changed: 105 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@ export
3939
defaultresname,
4040
defaultresidue,
4141
resnames,
42+
sscode,
43+
sscode!,
4244
chain,
4345
chainid,
4446
chainid!,
@@ -72,7 +74,14 @@ export
7274
pdbextension,
7375
generatechainid,
7476
MMTFDict,
75-
writemmtf
77+
writemmtf,
78+
helixsscodes,
79+
sheetsscodes,
80+
coilsscodes,
81+
rundssp!,
82+
rundssp,
83+
runstride!,
84+
runstride
7685

7786
"A macromolecular structural element."
7887
abstract type StructuralElement end
@@ -806,6 +815,39 @@ function DisorderedResidue(dis_res::DisorderedResidue, default::AbstractString)
806815
return DisorderedResidue(dis_res.names, default)
807816
end
808817

818+
const ss_code_unassigned = '-'
819+
820+
"""
821+
sscode(res)
822+
sscode(at)
823+
824+
Get the secondary structure code of an `AbstractResidue` or `AbstractAtom` as a `Char`.
825+
826+
`'$ss_code_unassigned'` represents unassigned secondary structure.
827+
Secondary structure can be assigned using `rundssp!` or `runstride!`.
828+
"""
829+
sscode(res::Residue) = res.ss_code
830+
sscode(dis_res::DisorderedResidue) = sscode(defaultresidue(dis_res))
831+
sscode(at::Atom) = sscode(residue(at))
832+
sscode(dis_at::DisorderedAtom) = sscode(defaultatom(dis_at))
833+
834+
"""
835+
sscode!(res, ss_code)
836+
837+
Set the secondary structure code of an `AbstractResidue` to a `Char`.
838+
"""
839+
function sscode!(res::Residue, ss_code)
840+
res.ss_code = ss_code
841+
return res
842+
end
843+
844+
function sscode!(dis_res::DisorderedResidue, ss_code)
845+
for res_name in resnames(dis_res)
846+
sscode!(disorderedres(dis_res, res_name), ss_code)
847+
end
848+
return dis_res
849+
end
850+
809851
"""
810852
chain(at)
811853
chain(res)
@@ -1643,6 +1685,8 @@ function generatechainid(entity_id::Integer)
16431685
return out_string
16441686
end
16451687

1688+
# MMTF functions
1689+
16461690
"""
16471691
MMTFDict(filepath; gzip=false)
16481692
MMTFDict(io; gzip=false)
@@ -1680,6 +1724,66 @@ gzipped.
16801724
"""
16811725
function writemmtf end
16821726

1727+
# Secondary structure
1728+
1729+
"`Set` of secondary structure codes corresponding to an α-helix."
1730+
const helixsscodes = Set(['G', 'H', 'I', 'P'])
1731+
1732+
"`Set` of secondary structure codes corresponding to a β-sheet."
1733+
const sheetsscodes = Set(['E', 'B'])
1734+
1735+
"`Set` of secondary structure codes corresponding to a coil."
1736+
const coilsscodes = Set(['C', 'T', 'S', ' '])
1737+
1738+
"""
1739+
rundssp!(struc)
1740+
rundssp!(model)
1741+
1742+
Run DSSP (Define Secondary Structure of Proteins) on the given structural element
1743+
to assign secondary structure.
1744+
1745+
Requires the DSSP_jll.jl package to be imported.
1746+
A temporary PDB file is written, so this will fail if the structural element cannot
1747+
be written to a PDB file, for example if there are two-letter chain IDs.
1748+
"""
1749+
function rundssp! end
1750+
1751+
"""
1752+
rundssp(struc)
1753+
rundssp(model)
1754+
rundssp(filepath_in, dssp_filepath_out)
1755+
1756+
Return a copy of the structural element with DSSP (Define Secondary Structure of Proteins)
1757+
run to assign secondary structure, or run DSSP directly on a PDB or mmCIF file.
1758+
1759+
Requires the DSSP_jll.jl package to be imported.
1760+
"""
1761+
function rundssp end
1762+
1763+
"""
1764+
runstride!(struc)
1765+
runstride!(model)
1766+
1767+
Run STRIDE on the given structural element to assign secondary structure.
1768+
1769+
Requires the STRIDE_jll.jl package to be imported.
1770+
A temporary PDB file is written, so this will fail if the structural element cannot
1771+
be written to a PDB file, for example if there are two-letter chain IDs.
1772+
"""
1773+
function runstride! end
1774+
1775+
"""
1776+
runstride(struc)
1777+
runstride(model)
1778+
runstride(filepath_in, stride_filepath_out)
1779+
1780+
Return a copy of the structural element with STRIDE
1781+
run to assign secondary structure, or run STRIDE directly on a PDB file.
1782+
1783+
Requires the STRIDE_jll.jl package to be imported.
1784+
"""
1785+
function runstride end
1786+
16831787
# Descriptive showing of elements on a single line
16841788

16851789
Base.show(io::IO, struc::MolecularStructure) = print(io,

src/pdb.jl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,9 @@ Read a Protein Data Bank (PDB) file and return a `MolecularStructure`.
3838
- `read_std_atoms::Bool=true`: whether to read standard ATOM records.
3939
- `read_het_atoms::Bool=true`: whether to read HETATOM records.
4040
- `run_dssp::Bool=false`: whether to run DSSP to assign secondary structure.
41+
Requires the DSSP_jll.jl package to be imported if set to `true`.
4142
- `run_stride::Bool=false`: whether to run STRIDE to assign secondary structure.
43+
Requires the STRIDE_jll.jl package to be imported if set to `true`.
4244
- `gzip::Bool=false`: whether the input is gzipped, not available for PDB
4345
format.
4446
"""

0 commit comments

Comments
 (0)