Skip to content

Commit dcb0de4

Browse files
has2k1isabelizimm
authored andcommitted
Fix ptype for dataframe and array
1. The row slice assumed 1-indexing. 2. Could not create a ptype for an array of dtype=object 3. Added doctests to show the equivalence between the ptype (dynamically generated) and the more common (statically created) usage of pydantic BaseModels.
1 parent b8902b6 commit dcb0de4

1 file changed

Lines changed: 101 additions & 11 deletions

File tree

vetiver/ptype.py

Lines changed: 101 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -59,12 +59,12 @@ def vetiver_create_ptype(data):
5959
6060
Parameters
6161
----------
62-
data :
63-
Data that represents what
62+
data : object
63+
An object with information (data) whose layout is to be determined.
6464
6565
Returns
6666
-------
67-
ptype
67+
ptype : pydantic.main.BaseModel
6868
Data prototype
6969
7070
"""
@@ -74,31 +74,121 @@ def vetiver_create_ptype(data):
7474

7575

7676
@vetiver_create_ptype.register
77-
def _vetiver_create_ptype(data: pd.DataFrame):
78-
dict_data = data.iloc[1, :].to_dict()
77+
def _(data: pd.DataFrame):
78+
"""
79+
Create ptype for a pandas dataframe
80+
81+
Parameters
82+
----------
83+
data : DataFrame
84+
Pandas dataframe
85+
86+
Examples
87+
--------
88+
>>> from pydantic import BaseModel
89+
>>> df = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]})
90+
>>> prototype = vetiver_create_ptype(df)
91+
>>> issubclass(prototype, BaseModel)
92+
True
93+
>>> prototype()
94+
ptype(x=1, y=4)
95+
96+
The data prototype created for the dataframe is equivalent to:
97+
98+
>>> class another_prototype(BaseModel):
99+
... class Config:
100+
... title = 'ptype'
101+
... x: int = 1
102+
... y: int = 4
103+
104+
>>> another_prototype()
105+
another_prototype(x=1, y=4)
106+
>>> another_prototype() == prototype()
107+
True
108+
109+
Changing the title using `class Config` ensures that the
110+
also json/schemas match.
111+
112+
>>> another_prototype.schema() == prototype.schema()
113+
True
114+
"""
115+
dict_data = data.iloc[0, :].to_dict()
79116
ptype = create_model("ptype", **dict_data)
80117
return ptype
81118

82119

83120
@vetiver_create_ptype.register
84-
def _vetiver_create_ptype(data: np.ndarray):
85-
dict_data = dict(enumerate(data[1], 0))
121+
def _(data: np.ndarray):
122+
"""
123+
Create ptype for a numpy array
124+
125+
Parameters
126+
----------
127+
data : ndarray
128+
2-Dimensional numpy array
129+
130+
Examples
131+
--------
132+
>>> arr = np.array([[1, 4], [2, 5], [3, 6]])
133+
>>> prototype = vetiver_create_ptype(arr)
134+
>>> prototype()
135+
ptype(0=1, 1=4)
136+
137+
>>> arr2 = np.array([[1, 'a'], [2, 'b'], [3, 'c']], dtype=object)
138+
>>> prototype2 = vetiver_create_ptype(arr2)
139+
>>> prototype2()
140+
ptype(0=1, 1='a')
141+
"""
142+
def _item(value):
143+
# pydantic needs python objects. .item() converts a numpy
144+
# scalar type to a python equivalent, and if the ndarray
145+
# is dtype=object, it may have python objects
146+
try:
147+
return value.item()
148+
except AttributeError:
149+
return value
150+
151+
dict_data = dict(enumerate(data[0], 0))
86152
# pydantic requires strings as indicies
87-
dict_data = {f"{key}": value.item() for key, value in dict_data.items()}
153+
dict_data = {f"{key}": _item(value) for key, value in dict_data.items()}
88154
ptype = create_model("ptype", **dict_data)
89155
return ptype
90156

91157

92158
@vetiver_create_ptype.register
93-
def _vetiver_create_ptype(data: dict):
159+
def _(data: dict):
160+
"""
161+
Create ptype for a dict
162+
163+
Parameters
164+
----------
165+
data : dict
166+
Dictionary
167+
"""
94168
return create_model("ptype", **data)
95169

96170

97171
@vetiver_create_ptype.register
98-
def _vetiver_create_ptype(data: BaseModel):
172+
def _(data: BaseModel):
173+
"""
174+
Create ptype for a pydantic BaseModel object
175+
176+
Parameters
177+
----------
178+
data : pydantic.BaseModel
179+
Pydantic BaseModel
180+
"""
99181
return data
100182

101183

102184
@vetiver_create_ptype.register
103-
def _vetiver_create_ptype(data: NoneType):
185+
def _(data: NoneType):
186+
"""
187+
Create ptype for None
188+
189+
Parameters
190+
----------
191+
data : None
192+
None
193+
"""
104194
return None

0 commit comments

Comments
 (0)