Skip to content

Commit 4ef7671

Browse files
add crystallographic data to fetch metadata
1 parent 912b2e8 commit 4ef7671

3 files changed

Lines changed: 155 additions & 53 deletions

File tree

mp_api/mcp/_schemas.py

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,9 @@
33

44
from typing import Any
55

6+
from emmet.core.summary import SummaryDoc
67
from pydantic import BaseModel, Field, model_validator
8+
from typing_extensions import Self
79

810
from mp_api.client.core.utils import validate_ids
911

@@ -264,6 +266,120 @@ class MaterialMetadata(BaseModel):
264266
),
265267
)
266268

269+
cell_vectors: list[list[float]] | None = Field(
270+
None,
271+
description=(
272+
"The 3x3 array of (lattice) cell vectors, all values in Å."
273+
"The first row is the a vector, the second the b vector, "
274+
"and the third the c vector."
275+
),
276+
)
277+
278+
atoms: list[str] | None = Field(
279+
None,
280+
description=(
281+
"A list of atom symbols on each site. Should have length `nsites`."
282+
),
283+
)
284+
285+
cartesian_coordinates: list[list[float]] | None = Field(
286+
None,
287+
description=(
288+
"A `nsites` x 3 array of floats, all values in Å, "
289+
"representing the Cartesian coordinates of the atoms."
290+
"The order is the same as in `atoms`."
291+
),
292+
)
293+
magnetic_moments: list[float] | None = Field(
294+
None,
295+
description=(
296+
"A `nsites` array of floats, all values in μB, representing "
297+
"the on-site magnetic moments found by integrating the "
298+
"electronic spin density in a sphere surrounding each site "
299+
"in the structure."
300+
),
301+
)
302+
303+
@staticmethod
304+
def _summary_fields() -> list[str]:
305+
"""Get a list of the fields needed in a SummaryDoc to populate this document."""
306+
return [
307+
*(set(MaterialMetadata.model_fields) & set(SummaryDoc.model_fields)),
308+
# The following fields get renamed and flattened in `MaterialMetadata`
309+
"structure",
310+
"bulk_modulus",
311+
"shear_modulus",
312+
"database_IDs",
313+
"symmetry",
314+
]
315+
316+
@classmethod
317+
def from_summary_data(cls, summary_data: dict[str, Any], **kwargs) -> Self:
318+
"""Create a MaterialMetadata document from materials summary data.
319+
320+
Args:
321+
summary_data : dict of str to Any
322+
The dict representation of an `emmet.core.summary.SummaryDoc`
323+
document (i.e., its `model_dump_json`)
324+
**kwargs : to pass to `MaterialMetadata`
325+
"""
326+
metadata = {
327+
**kwargs,
328+
**{
329+
k: summary_data[k]
330+
for k in MaterialMetadata.model_fields
331+
if summary_data.get(k) is not None
332+
},
333+
}
334+
for k in {"bulk", "shear"}:
335+
if summary_data.get(f"{k}_modulus"):
336+
metadata.update(
337+
{
338+
f"{k}_modulus_{v}": summary_data[f"{k}_modulus"].get(v)
339+
for v in ("voigt", "reuss", "hill")
340+
}
341+
)
342+
343+
# Augment with experimental database id information
344+
if summary_data.get("database_IDs"):
345+
metadata.update(
346+
{
347+
f"linked_{database}_ids": ", ".join(matched_ids)
348+
for database, matched_ids in summary_data["database_IDs"].items()
349+
}
350+
)
351+
352+
if (symm_meta := summary_data.get("symmetry")) is not None:
353+
metadata.update(
354+
{
355+
k: symm_meta.get(v)
356+
for k, v in {
357+
"space_group_number": "number",
358+
"space_group_symbol": "symbol",
359+
"crystal_system": "crystal_system",
360+
"point_group": "point_group",
361+
}.items()
362+
}
363+
)
364+
365+
# flatten structure data
366+
if struct_dict := summary_data.get("structure"):
367+
magnetic_moments = [
368+
site["properties"].get("magmom") for site in struct_dict["sites"]
369+
]
370+
metadata.update(
371+
cell_vectors=struct_dict["lattice"]["matrix"],
372+
atoms=[site["species"][0]["element"] for site in struct_dict["sites"]],
373+
cartesian_coordinates=[site["xyz"] for site in struct_dict["sites"]],
374+
magnetic_moments=(
375+
None
376+
if any(magmom is None for magmom in magnetic_moments)
377+
else magnetic_moments
378+
),
379+
)
380+
381+
return cls(**metadata)
382+
267383

268384
class FetchResult(BaseModel):
269385
"""Schematize result of the `fetch` MCP tool.

mp_api/mcp/tools.py

Lines changed: 20 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -167,57 +167,7 @@ def fetch(self, idx: str) -> FetchResult:
167167
if not robo_desc:
168168
return FetchResult(id=idx)
169169

170-
metadata: dict[str, str] | None = None
171-
if (
172-
len(
173-
summary_docs := self.client.materials.summary.search(
174-
material_ids=[idx],
175-
fields=list(
176-
set(MaterialMetadata.model_fields)
177-
& set(self.client.materials.summary.document_model.model_fields)
178-
),
179-
)
180-
)
181-
> 0
182-
):
183-
# Try to avoid more nested fields, just provide things with
184-
# simple str or numeric type
185-
summary_doc = summary_docs[0]
186-
metadata = {
187-
k: summary_doc[k]
188-
for k in MaterialMetadata.model_fields
189-
if summary_doc.get(k) is not None
190-
}
191-
for k in {"bulk", "shear"}:
192-
if summary_doc.get(f"{k}_modulus"):
193-
metadata.update(
194-
{
195-
f"{k}_modulus_{v}": summary_doc[f"{k}_modulus"].get(v)
196-
for v in ("voigt", "reuss", "hill")
197-
}
198-
)
199-
200-
# Augment with experimental database id information
201-
if summary_doc.get("database_IDs"):
202-
metadata.update(
203-
{
204-
f"linked_{database}_ids": ", ".join(matched_ids)
205-
for database, matched_ids in summary_doc["database_IDs"].items()
206-
}
207-
)
208-
209-
if (symm_meta := summary_doc.get("symmetry")) is not None:
210-
metadata.update(
211-
{
212-
k: symm_meta.get(v)
213-
for k, v in {
214-
"space_group_number": "number",
215-
"space_group_symbol": "symbol",
216-
"crystal_system": "crystal_system",
217-
"point_group": "point_group",
218-
}.items()
219-
}
220-
)
170+
metadata: dict[str, str] = {}
221171

222172
if len(sim_docs := self.client.materials.similarity.find_similar(idx, top=10)):
223173
if not isinstance(sim_docs[0], dict):
@@ -231,7 +181,25 @@ def fetch(self, idx: str) -> FetchResult:
231181
)
232182
)
233183

234-
return FetchResult(id=idx, text=robo_desc, metadata=metadata)
184+
summary_doc = {}
185+
if (
186+
len(
187+
summary_docs := self.client.materials.summary.search(
188+
material_ids=[idx],
189+
fields=MaterialMetadata._summary_fields(),
190+
)
191+
)
192+
> 0
193+
):
194+
# Try to avoid more nested fields, just provide things with
195+
# simple str or numeric type
196+
summary_doc = summary_docs[0]
197+
198+
return FetchResult(
199+
id=idx,
200+
text=robo_desc,
201+
metadata=MaterialMetadata.from_summary_data(summary_doc, **metadata),
202+
)
235203

236204
def get_phase_diagram_from_elements(
237205
self,

tests/mcp/test_tools.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
1-
from re import search
1+
import numpy as np
22
from pymatgen.core import Composition
3+
import pytest
34

45
from mp_api.mcp._schemas import SearchOutput, FetchResult, MaterialMetadata
56
from mp_api.mcp.tools import MPCoreMCP
@@ -23,6 +24,7 @@ def test_core_tools():
2324
robo_desc_docs = mcp_tools.client.materials.robocrys.search_docs(
2425
material_ids=[*[doc.id for doc in search_results.results], fetch_results.id]
2526
)
27+
ref_struct = mcp_tools.client.get_structure_by_material_id(fetch_results.id)
2628

2729
robo_descs = {doc["material_id"]: doc["description"] for doc in robo_desc_docs}
2830

@@ -41,3 +43,19 @@ def test_core_tools():
4143
assert isinstance(fetch_results.metadata, MaterialMetadata)
4244
assert isinstance(fetch_results.metadata.structurally_similar_materials, str)
4345
assert fetch_results.text == robo_descs[fetch_results.id]
46+
47+
assert np.allclose(
48+
ref_struct.lattice.matrix,
49+
fetch_results.metadata.cell_vectors,
50+
)
51+
assert np.allclose(
52+
ref_struct.cart_coords,
53+
fetch_results.metadata.cartesian_coordinates,
54+
)
55+
assert fetch_results.metadata.atoms == [
56+
str(site.species.elements[0]) for site in ref_struct
57+
]
58+
if magmoms := ref_struct.site_properties.get("magmom"):
59+
assert fetch_results.metadata.magnetic_moments == pytest.approx(magmoms)
60+
else:
61+
assert fetch_results.metadata.magnetic_moments is None

0 commit comments

Comments
 (0)