Skip to content

Commit 48d8146

Browse files
authored
CIF: handle all optional _atom_site fields (#79)
This is a follow-up to #78, ensuring that all optional fields in the in the `_atom_site` category are handled correctly. This also adds tests for files written by two external tools.
1 parent bbb8354 commit 48d8146

3 files changed

Lines changed: 114 additions & 14 deletions

File tree

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "BioStructures"
22
uuid = "de9282ab-8554-53be-b2d6-f6c222edabfc"
33
authors = ["Joe G Greener <jgreener@hotmail.co.uk>"]
4-
version = "4.6.0"
4+
version = "4.6.1"
55

66
[deps]
77
BioGenerics = "47718e42-2ac5-11e9-14af-e5595289c2ea"

src/mmcif.jl

Lines changed: 31 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -383,25 +383,43 @@ function MolecularStructure(mmcif_dict::MMCIFDict;
383383
return struc
384384
end
385385

386+
struct NoField end
387+
function firstfield(d::MMCIFDict, fields...)
388+
for fn in fields
389+
f = get(d, fn, NoField())
390+
if f !== NoField()
391+
return f
392+
end
393+
end
394+
throw(KeyError("None of the fields $(join(fields, ", ")) found in MMCIFDict"))
395+
end
396+
397+
function get_ith(d::MMCIFDict, field::AbstractString, i::Integer, default)
398+
f = get(d, field, NoField())
399+
f === NoField() && return default
400+
i > length(f) && throw(BoundsError("Index $i out of bounds for field $field with length $(length(f))"))
401+
return f[i]
402+
end
403+
386404
# Constructor from mmCIF ATOM/HETATM line
387405
AtomRecord(d::MMCIFDict, i::Integer) = AtomRecord(
388-
d["_atom_site.group_PDB"][i] == "HETATM",
406+
get_ith(d, "_atom_site.group_PDB", i, "ATOM") == "HETATM",
389407
parse(Int, d["_atom_site.id"][i]),
390-
get(d, "_atom_site.auth_atom_id", d["_atom_site.label_atom_id"])[i],
391-
d["_atom_site.label_alt_id"][i] in missingvals ? ' ' : d["_atom_site.label_alt_id"][i][1],
392-
d["_atom_site.auth_comp_id"][i],
408+
firstfield(d, "_atom_site.auth_atom_id", "_atom_site.label_atom_id")[i],
409+
(altid = get_ith(d, "_atom_site.label_alt_id", i, " "); altid in missingvals ? ' ' : altid[1]),
410+
firstfield(d, "_atom_site.auth_comp_id", "_atom_site.label_comp_id")[i],
393411
d["_atom_site.auth_asym_id"][i],
394-
parse(Int, d["_atom_site.auth_seq_id"][i]),
395-
d["_atom_site.pdbx_PDB_ins_code"][i] in missingvals ? ' ' : d["_atom_site.pdbx_PDB_ins_code"][i][1],
412+
parse(Int, firstfield(d, "_atom_site.auth_seq_id", "_atom_site.label_seq_id")[i]),
413+
(inscode = get_ith(d, "_atom_site.pdbx_PDB_ins_code", i, " "); inscode in missingvals ? ' ' : inscode[1]),
396414
[
397-
parse(Float64, d["_atom_site.Cartn_x"][i]),
398-
parse(Float64, d["_atom_site.Cartn_y"][i]),
399-
parse(Float64, d["_atom_site.Cartn_z"][i])
415+
parse(Float64, get_ith(d, "_atom_site.Cartn_x", i, "NaN")),
416+
parse(Float64, get_ith(d, "_atom_site.Cartn_y", i, "NaN")),
417+
parse(Float64, get_ith(d, "_atom_site.Cartn_z", i, "NaN"))
400418
],
401-
d["_atom_site.occupancy"][i] in missingvals ? 1.0 : parse(Float64, d["_atom_site.occupancy"][i]),
402-
d["_atom_site.B_iso_or_equiv"][i] in missingvals ? 0.0 : parse(Float64, d["_atom_site.B_iso_or_equiv"][i]),
403-
d["_atom_site.type_symbol"][i] in missingvals ? " " : d["_atom_site.type_symbol"][i],
404-
haskey(d, "_atom_site.pdbx_formal_charge") ? (d["_atom_site.pdbx_formal_charge"][i] in missingvals ? " " : d["_atom_site.pdbx_formal_charge"][i]) : " ",
419+
parse(Float64, get_ith(d, "_atom_site.occupancy", i, "1.0")),
420+
parse(Float64, get_ith(d, "_atom_site.B_iso_or_equiv", i, "0.0")),
421+
get_ith(d, "_atom_site.type_symbol", i, " "),
422+
(charge = get_ith(d, "_atom_site.pdbx_formal_charge", i, " "); charge in missingvals ? " " : charge),
405423
)
406424

407425
# Format a mmCIF data value by enclosing with quotes or semicolon lines where

test/runtests.jl

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2217,6 +2217,88 @@ end
22172217
@test coords(struc['A'][1]["CA"]) == [26.091, 52.849, 39.889]
22182218
@test serial(struc['A'][1]["O"]) == 4
22192219

2220+
# As written out by ChimeraX after adding hydrogens
2221+
multlinestruc_str = """
2222+
data_test
2223+
loop_
2224+
_atom_site.group_PDB
2225+
_atom_site.id
2226+
_atom_site.type_symbol
2227+
_atom_site.label_atom_id
2228+
_atom_site.label_alt_id
2229+
_atom_site.label_comp_id
2230+
_atom_site.label_asym_id
2231+
_atom_site.label_entity_id
2232+
_atom_site.label_seq_id
2233+
_atom_site.Cartn_x
2234+
_atom_site.Cartn_y
2235+
_atom_site.Cartn_z
2236+
_atom_site.auth_asym_id
2237+
_atom_site.auth_seq_id
2238+
_atom_site.pdbx_PDB_ins_code
2239+
_atom_site.occupancy
2240+
_atom_site.B_iso_or_equiv
2241+
_atom_site.pdbx_PDB_model_num
2242+
ATOM 1 N N . MET A 1 1 33.586 -1.645 -17.107 A 1 ? 1.00 52.29 1
2243+
ATOM 2 C CA . MET A 1 1 32.274 -0.966 -17.129 A 1 ? 1.00 52.29 1
2244+
ATOM 3 C C . MET A 1 1 32.541 0.520 -17.257 A 1 ? 1.00 52.29 1
2245+
ATOM 4 C CB . MET A 1 1 31.483 -1.295 -15.852 A 1 ? 1.00 52.29 1
2246+
ATOM 5 O O . MET A 1 1 33.172 1.074 -16.369 A 1 ? 1.00 52.29 1
2247+
ATOM 6 C CG . MET A 1 1 30.781 -2.650 -15.985 A 1 ? 1.00 52.29 1
2248+
ATOM 7 S SD . MET A 1 1 30.056 -3.237 -14.436 A 1 ? 1.00 52.29 1
2249+
ATOM 8 C CE . MET A 1 1 28.335 -3.503 -14.949 A 1 ? 1.00 52.29 1
2250+
ATOM 9 H HA . MET A 1 1 31.709 -1.305 -17.998 A 1 ? 1.00 52.29 1
2251+
ATOM 10 H HB2 . MET A 1 1 30.735 -0.520 -15.684 A 1 ? 1.00 52.29 1
2252+
ATOM 11 H HB3 . MET A 1 1 32.166 -1.326 -15.003 A 1 ? 1.00 52.29 1
2253+
ATOM 12 H HG2 . MET A 1 1 29.986 -2.556 -16.725 A 1 ? 1.00 52.29 1
2254+
ATOM 13 H HG3 . MET A 1 1 31.503 -3.387 -16.337 A 1 ? 1.00 52.29 1
2255+
ATOM 14 H H1 . MET A 1 1 34.096 -1.421 -17.950 A 1 ? 1.00 52.29 1
2256+
ATOM 15 H H2 . MET A 1 1 34.112 -1.334 -16.303 A 1 ? 1.00 52.29 1
2257+
ATOM 16 H H3 . MET A 1 1 33.447 -2.644 -17.052 A 1 ? 1.00 52.29 1
2258+
ATOM 17 H HE1 . MET A 1 1 27.755 -3.867 -14.101 A 1 ? 1.00 52.29 1
2259+
ATOM 18 H HE2 . MET A 1 1 27.912 -2.562 -15.301 A 1 ? 1.00 52.29 1
2260+
ATOM 19 H HE3 . MET A 1 1 28.305 -4.239 -15.753 A 1 ? 1.00 52.29 1
2261+
"""
2262+
struc = read(IOBuffer(multlinestruc_str), MMCIFFormat)
2263+
@test coords(struc['A'][1]["CA"]) == [32.274, -0.966, -17.129]
2264+
@test serial(struc['A'][1]["O"]) == 5
2265+
2266+
# As written out by Boltz-2
2267+
multlinestruc_str = """
2268+
data_test
2269+
loop_
2270+
_atom_site.group_PDB
2271+
_atom_site.id
2272+
_atom_site.type_symbol
2273+
_atom_site.label_atom_id
2274+
_atom_site.label_alt_id
2275+
_atom_site.label_comp_id
2276+
_atom_site.label_seq_id
2277+
_atom_site.auth_seq_id
2278+
_atom_site.pdbx_PDB_ins_code
2279+
_atom_site.label_asym_id
2280+
_atom_site.Cartn_x
2281+
_atom_site.Cartn_y
2282+
_atom_site.Cartn_z
2283+
_atom_site.occupancy
2284+
_atom_site.label_entity_id
2285+
_atom_site.auth_asym_id
2286+
_atom_site.auth_comp_id
2287+
_atom_site.B_iso_or_equiv
2288+
_atom_site.pdbx_PDB_model_num
2289+
ATOM 1 N N . MET 1 1 ? A 6.92711 -31.05979 1.53642 1 1 A MET 49.872 1
2290+
ATOM 2 C CA . MET 1 1 ? A 6.00617 -31.17880 0.39791 1 1 A MET 49.872 1
2291+
ATOM 3 C C . MET 1 1 ? A 6.77288 -31.45638 -0.88915 1 1 A MET 49.872 1
2292+
ATOM 4 O O . MET 1 1 ? A 7.75015 -30.77929 -1.19468 1 1 A MET 49.872 1
2293+
ATOM 5 C CB . MET 1 1 ? A 5.17828 -29.90640 0.21397 1 1 A MET 49.872 1
2294+
ATOM 6 C CG . MET 1 1 ? A 4.15304 -29.99347 -0.90422 1 1 A MET 49.872 1
2295+
ATOM 7 S SD . MET 1 1 ? A 3.34741 -28.41183 -1.23501 1 1 A MET 49.872 1
2296+
ATOM 8 C CE . MET 1 1 ? A 4.64046 -27.59273 -2.18119 1 1 A MET 49.872 1
2297+
"""
2298+
struc = read(IOBuffer(multlinestruc_str), MMCIFFormat)
2299+
@test coords(struc['A'][1]["CA"]) == [6.00617, -31.17880, 0.39791]
2300+
@test serial(struc['A'][1]["O"]) == 4
2301+
22202302
# Test files that should not parse
22212303
@test_throws Exception read(testfilepath("mmCIF", "1AKE_err.cif"), MMCIFFormat)
22222304
@test_throws ErrorException read(testfilepath("mmCIF", "1EN2_err.cif"), MMCIFFormat)

0 commit comments

Comments
 (0)