Skip to content

Commit 0303331

Browse files
housekeeping
1 parent eaca434 commit 0303331

9 files changed

Lines changed: 102 additions & 151 deletions

File tree

mp_api/client/core/client.py

Lines changed: 29 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
from __future__ import annotations
77

8+
import gzip
89
import inspect
910
import itertools
1011
import os
@@ -16,17 +17,21 @@
1617
from functools import cache
1718
from importlib import import_module
1819
from importlib.metadata import PackageNotFoundError, version
20+
from io import BytesIO
1921
from json import JSONDecodeError
2022
from math import ceil
2123
from typing import TYPE_CHECKING, ForwardRef, Optional, get_args
2224
from urllib.parse import quote
2325

26+
import boto3
2427
import requests
28+
from botocore import UNSIGNED
29+
from botocore.config import Config
30+
from botocore.exceptions import ClientError
2531
from emmet.core.utils import jsanitize
2632
from pydantic import BaseModel, create_model
2733
from requests.adapters import HTTPAdapter
2834
from requests.exceptions import RequestException
29-
from smart_open import open
3035
from tqdm.auto import tqdm
3136
from urllib3.util.retry import Retry
3237

@@ -39,13 +44,6 @@
3944
validate_ids,
4045
)
4146

42-
try:
43-
import boto3
44-
from botocore import UNSIGNED
45-
from botocore.config import Config
46-
except ImportError:
47-
boto3 = None
48-
4947
try:
5048
import flask
5149
except ImportError:
@@ -165,13 +163,6 @@ def session(self) -> requests.Session:
165163

166164
@property
167165
def s3_client(self):
168-
if boto3 is None:
169-
raise MPRestError(
170-
"boto3 not installed. To query charge density, "
171-
"band structure, or density of states data first "
172-
"install with: 'pip install boto3'"
173-
)
174-
175166
if not self._s3_client:
176167
self._s3_client = boto3.client(
177168
"s3",
@@ -379,20 +370,31 @@ def _query_open_data(
379370
Returns:
380371
dict: MontyDecoded data
381372
"""
382-
decoder = decoder or load_json
373+
try:
374+
byio = BytesIO()
375+
self.s3_client.download_fileobj(bucket, key, byio)
376+
byio.seek(0)
377+
if (file_data := byio.read()).startswith(b"\x1f\x8b"):
378+
file_data = gzip.decompress(file_data)
379+
byio.close()
383380

384-
file = open(
385-
f"s3://{bucket}/{key}",
386-
encoding="utf-8",
387-
transport_params={"client": self.s3_client},
388-
)
381+
decoder = decoder or load_json
389382

390-
if "jsonl" in key:
391-
decoded_data = [decoder(jline) for jline in file.read().splitlines()]
392-
else:
393-
decoded_data = decoder(file.read())
394-
if not isinstance(decoded_data, list):
395-
decoded_data = [decoded_data]
383+
if "jsonl" in key:
384+
decoded_data = [decoder(jline) for jline in file_data.splitlines()]
385+
else:
386+
decoded_data = decoder(file_data)
387+
if not isinstance(decoded_data, list):
388+
decoded_data = [decoded_data]
389+
390+
raise_error = not decoded_data or len(decoded_data) == 0
391+
392+
except ClientError:
393+
# No such object exists
394+
raise_error = True
395+
396+
if raise_error:
397+
raise MPRestError(f"No object found: s3://{bucket}/{key}")
396398

397399
return decoded_data, len(decoded_data) # type: ignore
398400

mp_api/client/core/settings.py

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
import os
2-
from multiprocessing import cpu_count
3-
from typing import List
42

3+
from emmet.core.settings import EmmetSettings
54
from pydantic import Field, field_validator
65
from pydantic_settings import BaseSettings, SettingsConfigDict
76
from pymatgen.core import _load_pmg_settings
@@ -14,12 +13,9 @@
1413
_MUTE_PROGRESS_BAR = PMG_SETTINGS.get("MPRESTER_MUTE_PROGRESS_BARS", False)
1514
_MAX_HTTP_URL_LENGTH = PMG_SETTINGS.get("MPRESTER_MAX_HTTP_URL_LENGTH", 2000)
1615
_MAX_LIST_LENGTH = min(PMG_SETTINGS.get("MPRESTER_MAX_LIST_LENGTH", 10000), 10000)
17-
_DEFAULT_ENDPOINT = "https://api.materialsproject.org/"
1816

19-
try:
20-
CPU_COUNT = cpu_count()
21-
except NotImplementedError:
22-
pass
17+
_EMMET_SETTINGS = EmmetSettings()
18+
_DEFAULT_ENDPOINT = "https://api.materialsproject.org/"
2319

2420

2521
class MAPIClientSettings(BaseSettings):
@@ -32,7 +28,7 @@ class MAPIClientSettings(BaseSettings):
3228
description="Directory with test files",
3329
)
3430

35-
QUERY_NO_PARALLEL: List[str] = Field(
31+
QUERY_NO_PARALLEL: list[str] = Field(
3632
[
3733
"elements",
3834
"exclude_elements",
@@ -93,6 +89,20 @@ class MAPIClientSettings(BaseSettings):
9389
_DEFAULT_ENDPOINT, description="The default API endpoint to use."
9490
)
9591

92+
LTOL: float = Field(
93+
_EMMET_SETTINGS.LTOL,
94+
description="Fractional length tolerance for structure matching",
95+
)
96+
97+
STOL: float = Field(
98+
_EMMET_SETTINGS.STOL, description="Site tolerance for structure matching."
99+
)
100+
101+
ANGLE_TOL: float = Field(
102+
_EMMET_SETTINGS.ANGLE_TOL,
103+
description="Angle tolerance for structure matching in degrees.",
104+
)
105+
96106
model_config = SettingsConfigDict(env_prefix="MPRESTER_")
97107

98108
@field_validator("ENDPOINT", mode="before")

mp_api/client/mprester.py

Lines changed: 20 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88

99
from emmet.core.electronic_structure import BSPathType
1010
from emmet.core.mpid import MPID, AlphaID
11-
from emmet.core.settings import EmmetSettings
1211
from emmet.core.tasks import TaskDoc
1312
from emmet.core.types.enums import ThermoType
1413
from emmet.core.vasp.calc_types import CalcType
@@ -42,7 +41,6 @@
4241
from pymatgen.analysis.phase_diagram import PDEntry
4342
from pymatgen.entries.computed_entries import ComputedEntry
4443

45-
_EMMET_SETTINGS = EmmetSettings()
4644
DEFAULT_THERMOTYPE_CRITERIA = {"thermo_types": ["GGA_GGA+U"]}
4745

4846
RESTER_LAYOUT = {
@@ -119,9 +117,7 @@ def __init__(
119117
"""
120118
self.api_key = validate_api_key(api_key)
121119

122-
self.endpoint = validate_endpoint(endpoint) or MAPI_CLIENT_SETTINGS.ENDPOINT
123-
if not self.endpoint.endswith("/"):
124-
self.endpoint += "/"
120+
self.endpoint = validate_endpoint(endpoint)
125121

126122
self.headers = headers or {}
127123
self.session = session or BaseRester._create_session(
@@ -464,9 +460,9 @@ def get_structures(
464460
def find_structure(
465461
self,
466462
filename_or_structure: str | Structure,
467-
ltol: float = _EMMET_SETTINGS.LTOL,
468-
stol: float = _EMMET_SETTINGS.STOL,
469-
angle_tol: float = _EMMET_SETTINGS.ANGLE_TOL,
463+
ltol: float = MAPI_CLIENT_SETTINGS.LTOL,
464+
stol: float = MAPI_CLIENT_SETTINGS.STOL,
465+
angle_tol: float = MAPI_CLIENT_SETTINGS.ANGLE_TOL,
470466
allow_multiple_results: bool = False,
471467
) -> list[str] | str:
472468
"""Finds matching structures from the Materials Project database.
@@ -501,13 +497,14 @@ def get_entries(
501497
self,
502498
chemsys_formula_mpids: str | list[str],
503499
compatible_only: bool = True,
504-
inc_structure: bool | None = None,
505500
property_data: list[str] | None = None,
506501
conventional_unit_cell: bool = False,
507502
additional_criteria: dict | None = None,
503+
**kwargs,
508504
) -> list[ComputedStructureEntry]:
509-
"""Get a list of ComputedEntries or ComputedStructureEntries corresponding
510-
to a chemical system or formula. This returns entries for all thermo types
505+
"""Get a list of ComputedStructureEntry from a chemical system, or formula, or MPID.
506+
507+
This returns ComputedStructureEntries with final structures for all thermo types
511508
represented in the database. Each type corresponds to a different mixing scheme
512509
(i.e. GGA/GGA+U, GGA/GGA+U/R2SCAN, R2SCAN). By default the thermo_type of the
513510
entry is also returned.
@@ -523,12 +520,6 @@ def get_entries(
523520
which performs adjustments to allow mixing of GGA and GGA+U
524521
calculations for more accurate phase diagrams and reaction
525522
energies. This data is obtained from the core "thermo" API endpoint.
526-
inc_structure (str): *This is a deprecated argument*. Previously, if None, entries
527-
returned were ComputedEntries. If inc_structure="initial",
528-
ComputedStructureEntries with initial structures were returned.
529-
Otherwise, ComputedStructureEntries with final structures
530-
were returned. This is no longer needed as all entries will contain the
531-
final structure data by default.
532523
property_data (list): Specify additional properties to include in
533524
entry.data. If None, only default data is included. Should be a subset of
534525
input parameters in the 'MPRester.thermo.available_fields' list.
@@ -538,14 +529,15 @@ def get_entries(
538529
correspond to proper function inputs to `MPRester.thermo.search`. For instance,
539530
if you are only interested in entries on the convex hull, you could pass
540531
{"energy_above_hull": (0.0, 0.0)} or {"is_stable": True}.
532+
kwargs: Used here only to gracefully handle deprecated arguments. All kwargs are ignored.
541533
542534
Returns:
543535
List ComputedStructureEntry objects.
544536
"""
545-
if inc_structure is not None:
537+
if kwargs.pop("inc_structure", None) is not None:
546538
warnings.warn(
547-
"The 'inc_structure' argument is deprecated as structure "
548-
"data is now always included in all returned entry objects."
539+
"The `inc_structure` argument is deprecated as final structures "
540+
"are always included in all returned ComputedStructureEntry objects."
549541
)
550542

551543
if isinstance(chemsys_formula_mpids, str):
@@ -935,9 +927,9 @@ def get_entry_by_material_id(
935927
self,
936928
material_id: str,
937929
compatible_only: bool = True,
938-
inc_structure: bool | None = None,
939930
property_data: list[str] | None = None,
940931
conventional_unit_cell: bool = False,
932+
**kwargs,
941933
):
942934
"""Get all ComputedEntry objects corresponding to a material_id.
943935
@@ -950,37 +942,32 @@ def get_entry_by_material_id(
950942
which performs adjustments to allow mixing of GGA and GGA+U
951943
calculations for more accurate phase diagrams and reaction
952944
energies. This data is obtained from the core "thermo" API endpoint.
953-
inc_structure (str): *This is a deprecated argument*. Previously, if None, entries
954-
returned were ComputedEntries. If inc_structure="initial",
955-
ComputedStructureEntries with initial structures were returned.
956-
Otherwise, ComputedStructureEntries with final structures
957-
were returned. This is no longer needed as all entries will contain
958-
structure data by default.
959945
property_data (list): Specify additional properties to include in
960946
entry.data. If None, only default data is included. Should be a subset of
961947
input parameters in the 'MPRester.thermo.available_fields' list.
962948
conventional_unit_cell (bool): Whether to get the standard
963949
conventional unit cell
950+
kwargs : Other kwargs to pass to `get_entries`
964951
Returns:
965952
List of ComputedEntry or ComputedStructureEntry object.
966953
"""
967954
return self.get_entries(
968955
material_id,
969956
compatible_only=compatible_only,
970-
inc_structure=inc_structure,
971957
property_data=property_data,
972958
conventional_unit_cell=conventional_unit_cell,
959+
**kwargs,
973960
)
974961

975962
def get_entries_in_chemsys(
976963
self,
977964
elements: str | list[str],
978965
use_gibbs: int | None = None,
979966
compatible_only: bool = True,
980-
inc_structure: bool | None = None,
981967
property_data: list[str] | None = None,
982968
conventional_unit_cell: bool = False,
983969
additional_criteria: dict = DEFAULT_THERMOTYPE_CRITERIA,
970+
**kwargs,
984971
):
985972
"""Helper method to get a list of ComputedEntries in a chemical system.
986973
For example, elements = ["Li", "Fe", "O"] will return a list of all
@@ -1006,12 +993,6 @@ def get_entries_in_chemsys(
1006993
which performs adjustments to allow mixing of GGA and GGA+U
1007994
calculations for more accurate phase diagrams and reaction
1008995
energies. This data is obtained from the core "thermo" API endpoint.
1009-
inc_structure (str): *This is a deprecated argument*. Previously, if None, entries
1010-
returned were ComputedEntries. If inc_structure="initial",
1011-
ComputedStructureEntries with initial structures were returned.
1012-
Otherwise, ComputedStructureEntries with final structures
1013-
were returned. This is no longer needed as all entries will contain
1014-
structure data by default.
1015996
property_data (list): Specify additional properties to include in
1016997
entry.data. If None, only default data is included. Should be a subset of
1017998
input parameters in the 'MPRester.thermo.available_fields' list.
@@ -1022,6 +1003,7 @@ def get_entries_in_chemsys(
10221003
if you are only interested in entries on the convex hull, you could pass
10231004
{"energy_above_hull": (0.0, 0.0)} or {"is_stable": True}, or if you are only interested
10241005
in entry data
1006+
kwargs : Other kwargs to pass to `get_entries`
10251007
Returns:
10261008
List of ComputedStructureEntries.
10271009
"""
@@ -1042,10 +1024,10 @@ def get_entries_in_chemsys(
10421024
self.get_entries(
10431025
all_chemsyses,
10441026
compatible_only=compatible_only,
1045-
inc_structure=inc_structure,
10461027
property_data=property_data,
10471028
conventional_unit_cell=conventional_unit_cell,
10481029
additional_criteria=additional_criteria or DEFAULT_THERMOTYPE_CRITERIA,
1030+
**kwargs,
10491031
)
10501032
)
10511033

@@ -1167,11 +1149,7 @@ def get_charge_density_from_task_id(
11671149
key=f"chgcars/{validate_ids([task_id])[0]}.json.gz",
11681150
decoder=lambda x: load_json(x, deser=True),
11691151
)
1170-
chgcar = self.materials.tasks._query_open_data(**kwargs)[0]
1171-
if not chgcar:
1172-
raise MPRestError(f"No charge density fetched for task_id {task_id}.")
1173-
1174-
chgcar = chgcar[0]["data"] # type: ignore
1152+
chgcar = self.materials.tasks._query_open_data(**kwargs)[0][0]["data"]
11751153

11761154
if inc_task_doc:
11771155
task_doc = self.materials.tasks.search(task_ids=task_id)[0]
@@ -1341,7 +1319,6 @@ def get_cohesive_energy(
13411319
entries = self.get_entries(
13421320
material_ids,
13431321
compatible_only=False,
1344-
inc_structure=True,
13451322
property_data=None,
13461323
conventional_unit_cell=False,
13471324
)
@@ -1481,7 +1458,7 @@ def get_stability(
14811458
pd = self.materials.thermo.get_phase_diagram_from_chemsys(
14821459
chemsys_str, thermo_type=thermo_type
14831460
)
1484-
except OSError:
1461+
except MPRestError:
14851462
pd = None
14861463

14871464
if not pd:

0 commit comments

Comments
 (0)