Skip to content

Commit f0f52ae

Browse files
committed
specify in server to not preprocess data
1 parent fd814cf commit f0f52ae

4 files changed

Lines changed: 34 additions & 61 deletions

File tree

vetiver/handlers/spacy.py

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -60,15 +60,7 @@ def handler_predict(self, input_data, check_prototype):
6060

6161
response_body = []
6262

63-
for dic in input_data:
64-
doc = self.model(dic.text)
63+
for doc in self.model.pipe(input_data.text):
6564
response_body.append(doc.to_json())
6665

6766
return pd.Series(response_body)
68-
69-
70-
# def get_data(doc):
71-
# ents = [
72-
# doc.to_dict()
73-
# ]
74-
# return {"text": doc.text, "ents": ents}

vetiver/server.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212

1313
from .utils import _jupyter_nb
1414
from .vetiver_model import VetiverModel
15+
from .handlers.spacy import SpacyHandler
1516
from .meta import VetiverMeta
1617

1718

@@ -173,6 +174,8 @@ async def custom_endpoint(
173174

174175
if isinstance(input_data, List):
175176
served_data = _batch_data(input_data)
177+
elif isinstance(self.model.translator, SpacyHandler):
178+
served_data = input_data
176179
else:
177180
served_data = _prepare_data(input_data)
178181

vetiver/tests/test_spacy.py

Lines changed: 26 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,8 @@ def animal_component_function(doc):
2626
matcher = spacy.matcher.PhraseMatcher(nlp.vocab)
2727
matcher.add("ANIMAL", animals)
2828
nlp.add_pipe("animals")
29-
df = pd.DataFrame({"text": ["i have a dog", "my turtle is smarter than my dog"]})
3029

31-
return vetiver.VetiverModel(nlp, "animals", prototype_data=df)
30+
return vetiver.VetiverModel(nlp, "animals")
3231

3332

3433
@pytest.fixture
@@ -42,23 +41,13 @@ def vetiver_client(spacy_model): # With check_prototype=True
4241

4342
@pytest.fixture
4443
def vetiver_client_check_ptype_false(spacy_model): # With check_prototype=False
45-
app = vetiver.VetiverAPI(spacy_model, check_prototype=False)
44+
app = vetiver.VetiverAPI(spacy_model, check_prototype=True)
4645
app.app.root_path = "/predict"
4746
client = TestClient(app.app)
4847

4948
return client
5049

5150

52-
def test_vetiver_build(spacy_model):
53-
54-
df = pd.DataFrame({"text": ["i have a dog", "my turtle is smarter than my dog"]})
55-
56-
response = spacy_model.handler_predict(df, True)
57-
58-
assert isinstance(response, pd.Series)
59-
assert response.iloc[0].ents == ("dog",)
60-
61-
6251
def test_vetiver_post(vetiver_client):
6352
df = pd.DataFrame({"text": ["one", "my turtle is smarter than my dog"]})
6453

@@ -68,53 +57,38 @@ def test_vetiver_post(vetiver_client):
6857
assert response.to_dict() == {
6958
"predict": {
7059
0: {
71-
"text": "i have a dog",
72-
"ents": [{"label": "ANIMAL", "start": 9, "end": 12}],
60+
"text": "one",
61+
"ents": [],
62+
"sents": [{"start": 0, "end": 3}],
63+
"tokens": [{"id": 0, "start": 0, "end": 3}],
7364
},
7465
1: {
7566
"text": "my turtle is smarter than my dog",
7667
"ents": [
77-
{"label": "ANIMAL", "start": 3, "end": 9},
78-
{"label": "ANIMAL", "start": 29, "end": 32},
68+
{"start": 3, "end": 9, "label": "ANIMAL"},
69+
{"start": 29, "end": 32, "label": "ANIMAL"},
70+
],
71+
"tokens": [
72+
{"id": 0, "start": 0, "end": 2},
73+
{"id": 1, "start": 3, "end": 9},
74+
{"id": 2, "start": 10, "end": 12},
75+
{"id": 3, "start": 13, "end": 20},
76+
{"id": 4, "start": 21, "end": 25},
77+
{"id": 5, "start": 26, "end": 28},
78+
{"id": 6, "start": 29, "end": 32},
7979
],
8080
},
8181
}
8282
}
8383

8484

85-
# def test_batch(vetiver_client):
86-
# nlp = spacy.blank("en")
87-
# words1 = "This is a new"
88-
# doc1 = spacy.tokens.Doc(nlp.vocab, words=words1)
89-
# words2 = ["Another", "one", "."]
90-
# doc2 = spacy.tokens.Doc(nlp.vocab, words=words2)
91-
92-
# response = vetiver.predict(endpoint=vetiver_client, data=[doc1, doc2])
93-
94-
# assert response == [[True, False, False, False, False, False], [True, False, False]]
95-
96-
97-
# def test_no_ptype(vetiver_client_check_ptype_false):
98-
# nlp = spacy.blank("en")
99-
# words1 = ["This", "is", "a", "new", "Sentence", "."]
100-
# doc1 = spacy.tokens.Doc(nlp.vocab, words=words1)
101-
# words2 = ["Another", "one", "."]
102-
# doc2 = spacy.tokens.Doc(nlp.vocab, words=words2)
103-
104-
# response = vetiver.predict(
105-
# endpoint=vetiver_client_check_ptype_false, data=[doc1, doc2]
106-
# )
107-
108-
# assert response == [[True, False, False, False, False, False], [True, False, False]]
109-
110-
111-
# def test_serialize(spacy_model):
112-
# import pins
85+
def test_serialize(spacy_model):
86+
import pins
11387

114-
# board = pins.board_temp(allow_pickle_read=True)
115-
# vetiver.vetiver_pin_write(board=board, model=spacy_model)
116-
# assert isinstance(
117-
# board.pin_read("sentencizer"),
118-
# spacy.pipeline.sentencizer.Sentencizer,
119-
# )
120-
# board.pin_delete("sentencizer")
88+
board = pins.board_temp(allow_pickle_read=True)
89+
vetiver.vetiver_pin_write(board=board, model=spacy_model)
90+
assert isinstance(
91+
board.pin_read("animals"),
92+
spacy.Language,
93+
)
94+
board.pin_delete("animals")

vetiver/vetiver_model.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@ def __init__(
8686
translator = create_handler(model, prototype_data)
8787

8888
self.model = translator.model
89+
self.translator = type(translator)
8990
self.prototype = translator.construct_prototype()
9091
self.model_name = model_name
9192
self.description = description if description else translator.describe()
@@ -102,6 +103,7 @@ def from_pin(cls, board, name: str, version: str = None):
102103
if "vetiver_meta" in meta.user:
103104
get_prototype = meta.user.get("vetiver_meta").get("prototype", None)
104105
required_pkgs = meta.user.get("vetiver_meta").get("required_pkgs", None)
106+
python_version = meta.user.get("vetiver_meta").get("python_version", None)
105107
meta.user.pop("vetiver_meta")
106108
else:
107109
# ptype = meta.user.get("ptype", None)
@@ -113,6 +115,7 @@ def from_pin(cls, board, name: str, version: str = None):
113115
# get_prototype = None
114116

115117
required_pkgs = meta.user.get("required_pkgs")
118+
python_version = meta.user.get("python_version")
116119

117120
return cls(
118121
model=model,
@@ -123,6 +126,7 @@ def from_pin(cls, board, name: str, version: str = None):
123126
"version": meta.version.version,
124127
"url": meta.local.get("url"), # None all the time, besides Connect,
125128
"required_pkgs": required_pkgs,
129+
"python_version": python_version,
126130
},
127131
prototype_data=json.loads(get_prototype) if get_prototype else None,
128132
versioned=True,

0 commit comments

Comments
 (0)