add appropriate helper for spacy

isabelizimm · isabelizimm · commit 9141abaa825a · 2023-03-27T14:50:19.000-04:00
diff --git a/vetiver/handlers/spacy.py b/vetiver/handlers/spacy.py
@@ -37,10 +37,17 @@ def construct_prototype(self):
             text_column_name = "text"
 
         else:
-            if len(self.prototype_data.columns) != 1:
+            if (
+                isinstance(self.prototype_data, pd.DataFrame)
+                and len(self.prototype_data.columns) != 1
+            ):
                 raise TypeError("Expected 1 column of text data")
 
-            text_column_name = self.prototype_data.columns[0]
+            text_column_name = (
+                self.prototype_data.columns[0]
+                if isinstance(self.prototype_data, pd.DataFrame)
+                else list(self.prototype_data.keys())[0]
+            )
 
         prototype = vetiver_create_prototype(pd.DataFrame({text_column_name: ["text"]}))
 
diff --git a/vetiver/helpers.py b/vetiver/helpers.py
@@ -28,6 +28,12 @@ def _(pred_data):
     return pd.DataFrame([dict(s) for s in pred_data])
 
 
+@api_data_to_frame.register(pd.DataFrame)
+def _pd_frame(pred_data):
+
+    return pred_data
+
+
 @api_data_to_frame.register(dict)
 def _dict(pred_data):
     return api_data_to_frame([pred_data])
diff --git a/vetiver/tests/test_spacy.py b/vetiver/tests/test_spacy.py
@@ -5,28 +5,30 @@
 import numpy as np  # noqa
 import pandas as pd  # noqa
 from fastapi.testclient import TestClient  # noqa
-
+from numpy import nan  # noqa
 import vetiver  # noqa
 
 
+@spacy.language.Language.component("animals")
+def animal_component_function(doc):
+    matches = matcher(doc)  # noqa
+    spans = [
+        spacy.tokens.Span(doc, start, end, label="ANIMAL")
+        for match_id, start, end in matches
+    ]
+    doc.ents = spans
+    return doc
+
+
+nlp = spacy.blank("en")
+animals = list(nlp.pipe(["dog", "cat", "turtle"]))
+matcher = spacy.matcher.PhraseMatcher(nlp.vocab)
+matcher.add("ANIMAL", animals)
+nlp.add_pipe("animals")
+
+
 @pytest.fixture
 def spacy_model():
-    @spacy.language.Language.component("animals")
-    def animal_component_function(doc):
-        matches = matcher(doc)  # noqa
-        spans = [
-            spacy.tokens.Span(doc, start, end, label="ANIMAL")
-            for match_id, start, end in matches
-        ]
-        doc.ents = spans
-        return doc
-
-    nlp = spacy.blank("en")
-    animals = list(nlp.pipe(["dog", "cat", "turtle"]))
-    matcher = spacy.matcher.PhraseMatcher(nlp.vocab)
-    matcher.add("ANIMAL", animals)
-    nlp.add_pipe("animals")
-
     return nlp
 
 
@@ -58,24 +60,23 @@ def test_vetiver_predict_with_prototype(vetiver_client_with_prototype):
 
     assert isinstance(response, pd.DataFrame), response
     assert response.to_dict() == {
-        "predict": {
-            "0": {
-                "text": "turtles",
-                "ents": [],
-                "sents": [{"start": 0, "end": 7}],
-                "tokens": [{"id": 0, "start": 0, "end": 7}],
-            },
-            "1": {
-                "text": "i have a dog",
-                "ents": [{"start": 9, "end": 12, "label": "ANIMAL"}],
-                "tokens": [
-                    {"id": 0, "start": 0, "end": 1},
-                    {"id": 1, "start": 2, "end": 6},
-                    {"id": 2, "start": 7, "end": 8},
-                    {"id": 3, "start": 9, "end": 12},
-                ],
-            },
-        }
+        "0": {
+            "text": "turtles",
+            "ents": [],
+            "sents": [{"start": 0, "end": 7}],
+            "tokens": [{"id": 0, "start": 0, "end": 7}],
+        },
+        "1": {
+            "text": "i have a dog",
+            "ents": [{"start": 9, "end": 12, "label": "ANIMAL"}],
+            "sents": nan,
+            "tokens": [
+                {"id": 0, "start": 0, "end": 1},
+                {"id": 1, "start": 2, "end": 6},
+                {"id": 2, "start": 7, "end": 8},
+                {"id": 3, "start": 9, "end": 12},
+            ],
+        },
     }
 
 
@@ -86,34 +87,49 @@ def test_vetiver_predict_no_prototype(vetiver_client_no_prototype):
 
     assert isinstance(response, pd.DataFrame), response
     assert response.to_dict() == {
-        "predict": {
-            "0": {
-                "text": "turtles",
-                "ents": [],
-                "sents": [{"start": 0, "end": 7}],
-                "tokens": [{"id": 0, "start": 0, "end": 7}],
-            },
-            "1": {
-                "text": "i have a dog",
-                "ents": [{"start": 9, "end": 12, "label": "ANIMAL"}],
-                "tokens": [
-                    {"id": 0, "start": 0, "end": 1},
-                    {"id": 1, "start": 2, "end": 6},
-                    {"id": 2, "start": 7, "end": 8},
-                    {"id": 3, "start": 9, "end": 12},
-                ],
-            },
-        }
+        "0": {
+            "text": "turtles",
+            "ents": [],
+            "sents": [{"start": 0, "end": 7}],
+            "tokens": [{"id": 0, "start": 0, "end": 7}],
+        },
+        "1": {
+            "text": "i have a dog",
+            "ents": [{"start": 9, "end": 12, "label": "ANIMAL"}],
+            "sents": nan,
+            "tokens": [
+                {"id": 0, "start": 0, "end": 1},
+                {"id": 1, "start": 2, "end": 6},
+                {"id": 2, "start": 7, "end": 8},
+                {"id": 3, "start": 9, "end": 12},
+            ],
+        },
     }
 
 
-# def test_serialize(spacy_model):
-#     import pins
+def test_serialize_no_prototype(spacy_model):
+    import pins
 
-#     board = pins.board_temp(allow_pickle_read=True)
-#     vetiver.vetiver_pin_write(board=board, model=spacy_model)
-#     assert isinstance(
-#         board.pin_read("sentencizer"),
-#         spacy.pipeline.sentencizer.Sentencizer,
-#     )
-#     board.pin_delete("sentencizer")
+    board = pins.board_temp(allow_pickle_read=True)
+    v = vetiver.VetiverModel(spacy_model, "animals")
+    vetiver.vetiver_pin_write(board=board, model=v)
+    v2 = vetiver.VetiverModel.from_pin(board, "animals")
+    assert isinstance(
+        v2.model,
+        spacy.lang.en.English,
+    )
+
+
+def test_serialize_prototype(spacy_model):
+    import pins
+
+    board = pins.board_temp(allow_pickle_read=True)
+    v = vetiver.VetiverModel(
+        spacy_model, "animals", prototype_data=pd.DataFrame({"text": ["text"]})
+    )
+    vetiver.vetiver_pin_write(board=board, model=v)
+    v2 = vetiver.VetiverModel.from_pin(board, "animals")
+    assert isinstance(
+        v2.model,
+        spacy.lang.en.English,
+    )