1+ from functools import singledispatch
2+ try :
3+ from types import NoneType
4+ except ImportError :
5+ # python < 3.10
6+ NoneType = type (None )
7+
18import pandas as pd
29import numpy as np
310from pydantic import BaseModel , create_model
411
12+
513class NoAvailablePTypeError (Exception ):
614 """
715 Throw an error if we cannot create
@@ -18,72 +26,173 @@ def __init__(
1826
1927class InvalidPTypeError (Exception ):
2028 """
21- Throw an error if `save_ptype` is not
22- True, False, or data.frame
29+ Throw an error if ptype cannot be recognised
2330 """
2431
2532 def __init__ (
2633 self ,
27- message = "The `ptype_data` argument must be a pandas .DataFrame, a pydantic BaseModel, np.ndarray, or `save_ptype` must be FALSE. " ,
34+ message = "`ptype_data` must be a pd .DataFrame, a pydantic BaseModel or np.ndarray" ,
2835 ):
2936 self .message = message
3037 super ().__init__ (self .message )
3138
3239
33- def vetiver_create_ptype (ptype_data , save_ptype : bool ):
40+ CREATE_PTYPE_TPL = """\
41+ Failed to create a data prototype (ptype) from data of \
42+ type {_data_type}. If your datatype is not one of \
43+ (pd.DataFrame, pydantic.BaseModel, np.ndarry, dict), \
44+ you should write a function to create the ptype. Here is \
45+ a template for such a function: \
46+
47+ from pydantic import create_model
48+ from vetiver.ptype import vetiver_create_ptype
49+
50+ @vetiver_create_ptype.register
51+ def _(data: {_data_type}):
52+ data_dict = ... # convert data to a dictionary
53+ ptype = create_model("ptype", **data_dict)
54+ return ptype
55+
56+ If your datatype is a common type, please consider submitting \
57+ a pull request.
58+ """
59+
60+ @singledispatch
61+ def vetiver_create_ptype (data ):
3462 """Create zero row structure to save data types
63+
3564 Parameters
3665 ----------
37- ptype_data :
38- Data that represents what
39- save_ptype : bool
40- Whether or not ptype should be created
66+ data : object
67+ An object with information (data) whose layout is to be determined.
4168
4269 Returns
4370 -------
44- ptype
71+ ptype : pydantic.main.BaseModel
4572 Data prototype
4673
4774 """
48- ptype = None
75+ raise InvalidPTypeError (
76+ message = CREATE_PTYPE_TPL .format (_data_type = type (data ))
77+ )
4978
50- if save_ptype == False :
51- pass
52- elif save_ptype == True :
53- try :
54- if isinstance (ptype_data , np .ndarray ):
55- ptype = _array_to_ptype (ptype_data [1 ])
56- elif isinstance (ptype_data , dict ):
57- ptype = _dict_to_ptype (ptype_data )
58- elif isinstance (ptype_data .construct (), BaseModel ):
59- ptype = ptype_data
60- except AttributeError : # cannot construct basemodel
61- if isinstance (ptype_data , pd .DataFrame ):
62- ptype = _df_to_ptype (ptype_data .iloc [1 , :])
63- else :
64- raise InvalidPTypeError
6579
80+ @vetiver_create_ptype .register
81+ def _ (data : pd .DataFrame ):
82+ """
83+ Create ptype for a pandas dataframe
84+
85+ Parameters
86+ ----------
87+ data : DataFrame
88+ Pandas dataframe
89+
90+ Examples
91+ --------
92+ >>> from pydantic import BaseModel
93+ >>> df = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]})
94+ >>> prototype = vetiver_create_ptype(df)
95+ >>> issubclass(prototype, BaseModel)
96+ True
97+ >>> prototype()
98+ ptype(x=1, y=4)
99+
100+ The data prototype created for the dataframe is equivalent to:
101+
102+ >>> class another_prototype(BaseModel):
103+ ... class Config:
104+ ... title = 'ptype'
105+ ... x: int = 1
106+ ... y: int = 4
107+
108+ >>> another_prototype()
109+ another_prototype(x=1, y=4)
110+ >>> another_prototype() == prototype()
111+ True
112+
113+ Changing the title using `class Config` ensures that the
114+ also json/schemas match.
115+
116+ >>> another_prototype.schema() == prototype.schema()
117+ True
118+ """
119+ dict_data = data .iloc [0 , :].to_dict ()
120+ ptype = create_model ("ptype" , ** dict_data )
66121 return ptype
67122
68123
69- def _df_to_ptype (train_data ):
124+ @vetiver_create_ptype .register
125+ def _ (data : np .ndarray ):
126+ """
127+ Create ptype for a numpy array
70128
71- dict_data = train_data .to_dict ()
72- ptype = create_model ("ptype" , ** dict_data )
129+ Parameters
130+ ----------
131+ data : ndarray
132+ 2-Dimensional numpy array
133+
134+ Examples
135+ --------
136+ >>> arr = np.array([[1, 4], [2, 5], [3, 6]])
137+ >>> prototype = vetiver_create_ptype(arr)
138+ >>> prototype()
139+ ptype(0=1, 1=4)
140+
141+ >>> arr2 = np.array([[1, 'a'], [2, 'b'], [3, 'c']], dtype=object)
142+ >>> prototype2 = vetiver_create_ptype(arr2)
143+ >>> prototype2()
144+ ptype(0=1, 1='a')
145+ """
146+ def _item (value ):
147+ # pydantic needs python objects. .item() converts a numpy
148+ # scalar type to a python equivalent, and if the ndarray
149+ # is dtype=object, it may have python objects
150+ try :
151+ return value .item ()
152+ except AttributeError :
153+ return value
73154
155+ dict_data = dict (enumerate (data [0 ], 0 ))
156+ # pydantic requires strings as indicies
157+ dict_data = {f"{ key } " : _item (value ) for key , value in dict_data .items ()}
158+ ptype = create_model ("ptype" , ** dict_data )
74159 return ptype
75160
76161
77- def _array_to_ptype (train_data ):
78- dict_data = dict (enumerate (train_data , 0 ))
162+ @vetiver_create_ptype .register
163+ def _ (data : dict ):
164+ """
165+ Create ptype for a dict
79166
80- # pydantic requires strings as indicies
81- dict_data = {str (key ): value .item () for key , value in dict_data .items ()}
82- ptype = create_model ("ptype" , ** dict_data )
167+ Parameters
168+ ----------
169+ data : dict
170+ Dictionary
171+ """
172+ return create_model ("ptype" , ** data )
83173
84- return ptype
85174
175+ @vetiver_create_ptype .register
176+ def _ (data : BaseModel ):
177+ """
178+ Create ptype for a pydantic BaseModel object
86179
87- def _dict_to_ptype (train_data ):
180+ Parameters
181+ ----------
182+ data : pydantic.BaseModel
183+ Pydantic BaseModel
184+ """
185+ return data
88186
89- return create_model ("ptype" ,** train_data )
187+
188+ @vetiver_create_ptype .register
189+ def _ (data : NoneType ):
190+ """
191+ Create ptype for None
192+
193+ Parameters
194+ ----------
195+ data : None
196+ None
197+ """
198+ return None
0 commit comments