|
28 | 28 | from pymatgen.electronic_structure.core import OrbitalType, Spin |
29 | 29 | from pymatgen.entries.computed_entries import ComputedEntry |
30 | 30 |
|
| 31 | +from mp_api.client.core import MPRestError |
31 | 32 | from mp_api.mcp.utils import _NeedsMPClient |
| 33 | +from mp_api.mcp._schemas import OpenAISearchOutput, OpenAIResult |
| 34 | + |
| 35 | +class MPOpenAIMcpTools(_NeedsMPClient): |
| 36 | + """Define OpenAI-specific MCP for the Materials Project API.""" |
| 37 | + |
| 38 | + def search(self, query : str) -> OpenAISearchOutput: |
| 39 | + """Define OpenAI compatible search. |
| 40 | + |
| 41 | + Search through the autogenerated robocrystallographer |
| 42 | + descriptions of materials to return lists of likely |
| 43 | + matching materials. |
| 44 | +
|
| 45 | + Args: |
| 46 | + query (str) : A natural language query of material keywords. |
| 47 | + It is assumed that the query contains comma-delimited keywords. |
| 48 | + |
| 49 | + Returns: |
| 50 | + OpenAISearchOutput, a dict of `results` each with structure |
| 51 | + mp_api.mcp._schemas.OpenAIResult |
| 52 | + """ |
| 53 | + return OpenAISearchOutput( |
| 54 | + retults = [ |
| 55 | + OpenAIResult( |
| 56 | + id = doc["material_id"], |
| 57 | + text = doc["description"] |
| 58 | + ) |
| 59 | + for doc in self.client.robocrys.search(query.split(",")) |
| 60 | + ] |
| 61 | + ) |
| 62 | + |
| 63 | + def fetch(self, idx : str) -> OpenAIResult: |
| 64 | + """Retrieve complete material information by Materials Project ID. |
| 65 | + |
| 66 | + Args: |
| 67 | + idx (str) : A Materials Project ID. |
| 68 | + Should be an integer prefixed by `mp-`, ex: "mp-149", "mp-13" |
| 69 | +
|
| 70 | + Returns: |
| 71 | + OpenAIResult : Complete document with id, title, robocrys |
| 72 | + autogenerated description, URL, and metadata derived from |
| 73 | + the materials summary collection. |
| 74 | +
|
| 75 | + If no data about the particular id is available, returns a |
| 76 | + OpenAIResult with only the id field populated. |
| 77 | +
|
| 78 | + Raises: |
| 79 | + MPRestError: If no identifier is specified |
| 80 | + """ |
| 81 | + if not isinstance(idx,str): |
| 82 | + raise MPRestError( |
| 83 | + f"Unknown {idx=}. Should be an integer prefixed by `mp-`, ex: " |
| 84 | + "'mp-1', 'mp-1010101'" |
| 85 | + ) |
| 86 | + |
| 87 | + robo_desc : str | None = None |
| 88 | + if len( |
| 89 | + robo_docs := self.client.robocrys.search_docs( |
| 90 | + material_ids=[idx] |
| 91 | + ) |
| 92 | + ) > 0: |
| 93 | + robo_desc = robo_docs[0]["description"] |
| 94 | + |
| 95 | + if not robo_desc: |
| 96 | + return OpenAIResult(id = idx) |
| 97 | + |
| 98 | + metadata : dict[str,str] | None = None |
| 99 | + if len( |
| 100 | + summary_docs := self.client.summary.search( |
| 101 | + material_ids=[idx] |
| 102 | + ) |
| 103 | + ) > 0: |
| 104 | + # Try to avoid more nested fields, just provide things with |
| 105 | + # simple str or numeric type |
| 106 | + metadata = { |
| 107 | + k : str(summary_docs[0][k]) |
| 108 | + for k in self.client.summary.document_model.model_fields |
| 109 | + if isinstance(summary_docs[0][k], str | int | float) |
| 110 | + } |
| 111 | + |
| 112 | + # Augment with experimental database id information |
| 113 | + if summary_docs[0]["database_IDs"]: |
| 114 | + metadata.update( |
| 115 | + { |
| 116 | + f"linked_{database}_ids" : ", ".join(matched_ids) |
| 117 | + for database, matched_ids in summary_docs[0]["database_IDs"].items() |
| 118 | + } |
| 119 | + ) |
| 120 | + |
| 121 | + return OpenAIResult( |
| 122 | + id = idx, |
| 123 | + text = robo_desc, |
| 124 | + metadata = metadata |
| 125 | + ) |
32 | 126 |
|
33 | 127 |
|
34 | 128 | class MPMcpTools(_NeedsMPClient): |
|
0 commit comments