Skip to content

Commit e5c8534

Browse files
committed
retrievepdb format option and use mmCIF by default
1 parent 971a51a commit e5c8534

3 files changed

Lines changed: 15 additions & 9 deletions

File tree

docs/src/documentation.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -644,6 +644,8 @@ INFO: Downloading PDB: 1ALW
644644
MolecularStructure 1ALW.pdb with 1 models, 2 chains (A,B), 346 residues, 2928 atoms
645645
```
646646

647+
By default the mmCIF file is downloaded as this is available for all PDB entries, but this can be changed with the `format` keyword argument.
648+
647649
If you prefer to work with data frames rather than the data structures in BioStructures, the `DataFrame` constructor from [DataFrames.jl](https://github.com/JuliaData/DataFrames.jl) has been extended to construct relevant data frames from lists of atoms or residues:
648650

649651
```julia-repl

src/download.jl

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ Requires an internet connection.
150150
# Arguments
151151
- `dir::AbstractString=pwd()`: the directory to which the PDB file is
152152
downloaded; defaults to the current working directory.
153-
- `format::Type=PDB`: the format of the PDB file; options are PDBFormat,
153+
- `format::Type=PDBFormat`: the format of the PDB file; options are PDBFormat,
154154
PDBXMLFormat and MMCIFFormat. MMTF files are no longer available to download.
155155
- `obsolete::Bool=false`: if set `true`, the PDB file is downloaded in the
156156
auto-generated "obsolete" directory inside the specified `dir`.
@@ -283,7 +283,7 @@ Requires an internet connection.
283283
# Arguments
284284
- `dir::AbstractString=pwd()`: the directory to which the PDB files are
285285
downloaded; defaults to the current working directory.
286-
- `format::Type=PDB`: the format of the PDB file; options are PDBFormat,
286+
- `format::Type=PDBFormat`: the format of the PDB file; options are PDBFormat,
287287
PDBXMLFormat and MMCIFFormat. MMTF files are no longer available to download.
288288
- `overwrite::Bool=false`: if set `true`, overwrites the PDB file if it exists
289289
in `dir`; by default skips downloading the PDB file if it exists.
@@ -344,7 +344,7 @@ Requires an internet connection.
344344
# Arguments
345345
- `obsolete_dir::AbstractString=pwd()`: the directory where the PDB files are
346346
downloaded; defaults to the current working directory.
347-
- `format::Type=PDB`: the format of the PDB file; options are PDBFormat,
347+
- `format::Type=PDBFormat`: the format of the PDB file; options are PDBFormat,
348348
PDBXMLFormat and MMCIFFormat. MMTF files are no longer available to download.
349349
- `overwrite::Bool=false`: if set `true`, overwrites the PDB file if it exists
350350
in `dir`; by default skips downloading the PDB file if it exists.
@@ -368,6 +368,8 @@ Requires an internet connection.
368368
- `pdbid::AbstractString`: the PDB ID to be downloaded and read.
369369
- `dir::AbstractString=pwd()`: the directory to which the PDB file is
370370
downloaded; defaults to the current working directory.
371+
- `format::Type=MMCIFFormat`: the format of the PDB file; options are PDBFormat,
372+
PDBXMLFormat and MMCIFFormat. MMTF files are no longer available to download.
371373
- `obsolete::Bool=false`: if set `true`, the PDB file is downloaded in the
372374
auto-generated "obsolete" directory inside the specified `dir`.
373375
- `overwrite::Bool=false`: if set `true`, overwrites the PDB file if it exists
@@ -387,21 +389,23 @@ Requires an internet connection.
387389
"""
388390
function retrievepdb(pdbid::AbstractString;
389391
dir::AbstractString=pwd(),
392+
format::Type{<:Union{PDBFormat, PDBXMLFormat, MMCIFFormat}}=MMCIFFormat,
390393
obsolete::Bool=false,
391394
overwrite::Bool=false,
392395
ba_number::Integer=0,
393396
structure_name::AbstractString="$(uppercase(pdbid)).pdb",
394397
kwargs...)
395-
downloadpdb(pdbid, dir=dir, obsolete=obsolete, overwrite=overwrite, ba_number=ba_number)
398+
downloadpdb(pdbid, dir=dir, format=format, obsolete=obsolete,
399+
overwrite=overwrite, ba_number=ba_number)
396400
if obsolete
397401
# If obsolete is set true, the PDB file is present in the obsolete directory inside dir
398402
dir = joinpath(dir, "obsolete")
399403
end
400-
pdbid = uppercase(pdbid)
404+
pdbid_upper = uppercase(pdbid)
401405
if ba_number == 0
402-
pdbpath = joinpath(dir, "$pdbid.pdb")
406+
pdbpath = joinpath(dir, "$pdbid_upper.$(pdbextension[format])")
403407
else
404-
pdbpath = joinpath(dir, "$(pdbid)_ba$ba_number.pdb")
408+
pdbpath = joinpath(dir, "$(pdbid_upper)_ba$ba_number.$(pdbextension[format])")
405409
end
406-
read(pdbpath, PDBFormat; structure_name=structure_name, kwargs...)
410+
read(pdbpath, format; structure_name=structure_name, kwargs...)
407411
end

test/runtests.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,7 @@ Aqua.test_all(BioStructures; ambiguities=(recursive=false))
169169

170170
struc = retrievepdb("1AKE", dir=temp_dir, obsolete=true, read_het_atoms=false)
171171
@test countatoms(struc) == 3312
172-
@test serial(collectatoms(struc)[2000]) == 2006
172+
@test serial(collectatoms(struc)[2000]) == 2005
173173
@test sum(ishetero, collectatoms(struc)) == 0
174174

175175
struc = retrievepdb("1AKE", dir=temp_dir, ba_number=1, read_het_atoms=false, read_std_atoms=false)

0 commit comments

Comments
 (0)