11from __future__ import annotations
22
3- import zlib
43from typing import TYPE_CHECKING
54
6- import numpy as np
75from emmet .core .mpid import MPID , AlphaID
8- from emmet .core .similarity import CrystalNNSimilarity , SimilarityDoc , SimilarityEntry
6+ from emmet .core .similarity import (
7+ CrystalNNSimilarity ,
8+ SimilarityDoc ,
9+ SimilarityEntry ,
10+ _vector_to_hex_and_norm ,
11+ )
912from pymatgen .core import Composition , Structure
1013
1114from mp_api .client .core import BaseRester , MPRestError
1215from mp_api .client .core .utils import validate_ids
1316
1417if TYPE_CHECKING :
18+ import numpy as np
1519 from emmet .core .similarity import SimilarityScorer
1620
1721# This limit seems to be associated with MongoDB vector search
@@ -31,10 +35,6 @@ def fingerprint_structure(self, structure: Structure) -> np.ndarray:
3135 self ._fingerprinter = CrystalNNSimilarity ()
3236 return self ._fingerprinter ._featurize_structure (structure )
3337
34- def _get_hex_fingerprint (self , feature_vetor : np .ndarray ) -> str :
35- """Convert feature vector fingerprint to compressed hex str."""
36- return zlib .compress (feature_vetor .tobytes ()).hex ()
37-
3838 def search (
3939 self ,
4040 material_ids : str | list [str ] | None = None ,
@@ -126,9 +126,11 @@ def find_similar(
126126 "Please specify a positive integer or `None` to return all results."
127127 )
128128
129+ vector_hex , vector_norm = _vector_to_hex_and_norm (feature_vector )
129130 result = self ._query_resource (
130131 criteria = {
131- "feature_vector_hex" : self ._get_hex_fingerprint (feature_vector ),
132+ "feature_vector_hex" : vector_hex ,
133+ "feature_vector_norm" : vector_norm ,
132134 "_limit" : top ,
133135 },
134136 suburl = "match" ,
0 commit comments