44__all__ = ['load' , 'prop_dataset' ]
55
66# %% ../nbs/API/load.ipynb 4
7- def load (data , idx = None , x = None , y = None , paired = None , id_col = None ,
8- ci = 95 , resamples = 5000 , random_seed = 12345 , proportional = False ,
9- delta2 = False , experiment = None , experiment_label = None ,
10- x1_level = None , mini_meta = False ):
11- '''
7+ def load (
8+ data ,
9+ idx = None ,
10+ x = None ,
11+ y = None ,
12+ paired = None ,
13+ id_col = None ,
14+ ci = 95 ,
15+ resamples = 5000 ,
16+ random_seed = 12345 ,
17+ proportional = False ,
18+ delta2 = False ,
19+ experiment = None ,
20+ experiment_label = None ,
21+ x1_level = None ,
22+ mini_meta = False ,
23+ ):
24+ """
1225 Loads data in preparation for estimation statistics.
1326
1427 This is designed to work with pandas DataFrames.
@@ -22,15 +35,15 @@ def load(data, idx=None, x=None, y=None, paired=None, id_col=None,
2235 with each individual tuple producing its own contrast plot
2336 x : string or list, default None
2437 Column name(s) of the independent variable. This can be expressed as
25- a list of 2 elements if and only if 'delta2' is True; otherwise it
38+ a list of 2 elements if and only if 'delta2' is True; otherwise it
2639 can only be a string.
2740 y : string, default None
2841 Column names for data to be plotted on the x-axis and y-axis.
2942 paired : string, default None
30- The type of the experiment under which the data are obtained. If 'paired'
43+ The type of the experiment under which the data are obtained. If 'paired'
3144 is None then the data will not be treated as paired data in the subsequent
32- calculations. If 'paired' is 'baseline', then in each tuple of x, other
33- groups will be paired up with the first group (as control). If 'paired' is
45+ calculations. If 'paired' is 'baseline', then in each tuple of x, other
46+ groups will be paired up with the first group (as control). If 'paired' is
3447 'sequential', then in each tuple of x, each group will be paired up with
3548 its previous group (as control).
3649 id_col : default None.
@@ -45,7 +58,7 @@ def load(data, idx=None, x=None, y=None, paired=None, id_col=None,
4558 This integer is used to seed the random number generator during
4659 bootstrap resampling, ensuring that the confidence intervals
4760 reported are replicable.
48- proportional : boolean, default False.
61+ proportional : boolean, default False.
4962 An indicator of whether the data is binary or not. When set to True, it
5063 specifies that the data consists of binary data, where the values are
5164 limited to 0 and 1. The code is not suitable for analyzing proportion
@@ -55,76 +68,112 @@ def load(data, idx=None, x=None, y=None, paired=None, id_col=None,
5568 delta2 : boolean, default False
5669 Indicator of delta-delta experiment
5770 experiment : String, default None
58- The name of the column of the dataframe which contains the label of
71+ The name of the column of the dataframe which contains the label of
5972 experiments
6073 experiment_lab : list, default None
6174 A list of String to specify the order of subplots for delta-delta plots.
62- This can be expressed as a list of 2 elements if and only if 'delta2'
63- is True; otherwise it can only be a string.
75+ This can be expressed as a list of 2 elements if and only if 'delta2'
76+ is True; otherwise it can only be a string.
6477 x1_level : list, default None
6578 A list of String to specify the order of subplots for delta-delta plots.
66- This can be expressed as a list of 2 elements if and only if 'delta2'
67- is True; otherwise it can only be a string.
79+ This can be expressed as a list of 2 elements if and only if 'delta2'
80+ is True; otherwise it can only be a string.
6881 mini_meta : boolean, default False
6982 Indicator of weighted delta calculation.
7083
7184 Returns
7285 -------
7386 A `Dabest` object.
74- '''
75- from ._classes import Dabest
76-
77- return Dabest (data , idx , x , y , paired , id_col , ci , resamples , random_seed , proportional , delta2 , experiment , experiment_label , x1_level , mini_meta )
78-
79-
87+ """
88+ from dabest import Dabest
89+
90+ return Dabest (
91+ data ,
92+ idx ,
93+ x ,
94+ y ,
95+ paired ,
96+ id_col ,
97+ ci ,
98+ resamples ,
99+ random_seed ,
100+ proportional ,
101+ delta2 ,
102+ experiment ,
103+ experiment_label ,
104+ x1_level ,
105+ mini_meta ,
106+ )
80107
81108# %% ../nbs/API/load.ipynb 5
82109import numpy as np
83110from typing import Union , Optional
111+ import pandas as pd
84112
85- def prop_dataset (group :Union [list , tuple , np .ndarray , dict ], #Accepts lists, tuples, or numpy ndarrays of numeric types.
86- group_names : Optional [list ] = None ):
87- '''
113+
114+ def prop_dataset (
115+ group : Union [
116+ list , tuple , np .ndarray , dict
117+ ], # Accepts lists, tuples, or numpy ndarrays of numeric types.
118+ group_names : Optional [list ] = None ,
119+ ):
120+ """
88121 Convenient function to generate a dataframe of binary data.
89- '''
90- import pandas as pd
122+ """
91123
92124 if isinstance (group , dict ):
93125 # If group_names is not provided, use the keys of the dict as group_names
94126 if group_names is None :
95127 group_names = list (group .keys ())
96128 elif not set (group_names ) == set (group .keys ()):
97129 # Check if the group_names provided is the same as the keys of the dict
98- raise ValueError (' group_names must be the same as the keys of the dict.' )
130+ raise ValueError (" group_names must be the same as the keys of the dict." )
99131 # Check if the values in the dict are numeric
100- if not all ([isinstance (group [name ], (list , tuple , np .ndarray )) for name in group_names ]):
101- raise ValueError ('group must be a dict of lists, tuples, or numpy ndarrays of numeric types.' )
132+ if not all (
133+ [isinstance (group [name ], (list , tuple , np .ndarray )) for name in group_names ]
134+ ):
135+ raise ValueError (
136+ "group must be a dict of lists, tuples, or numpy ndarrays of numeric types."
137+ )
102138 # Check if the values in the dict only have two elements under each parent key
103139 if not all ([len (group [name ]) == 2 for name in group_names ]):
104- raise ValueError (' Each parent key should have only two elements.' )
140+ raise ValueError (" Each parent key should have only two elements." )
105141 group_val = group
106142
107143 else :
108144 if group_names is None :
109- raise ValueError (' group_names must be provided if group is not a dict.' )
145+ raise ValueError (" group_names must be provided if group is not a dict." )
110146 # Check if the length of group is two times of the length of group_names
111147 if not len (group ) == 2 * len (group_names ):
112- raise ValueError ('The length of group must be two times of the length of group_names.' )
113- group_val = {group_names [i ]: [group [i * 2 ], group [i * 2 + 1 ]] for i in range (len (group_names ))}
148+ raise ValueError (
149+ "The length of group must be two times of the length of group_names."
150+ )
151+ group_val = {
152+ group_names [i ]: [group [i * 2 ], group [i * 2 + 1 ]]
153+ for i in range (len (group_names ))
154+ }
114155
115156 # Check if the sum of values in group_val under each key are the same
116- if not all ([sum (group_val [name ]) == sum (group_val [group_names [0 ]]) for name in group_val .keys ()]):
117- raise ValueError ('The sum of values under each key must be the same.' )
118-
119- id_col = pd .Series (range (1 , sum (group_val [group_names [0 ]])+ 1 ))
120-
157+ if not all (
158+ [
159+ sum (group_val [name ]) == sum (group_val [group_names [0 ]])
160+ for name in group_val .keys ()
161+ ]
162+ ):
163+ raise ValueError ("The sum of values under each key must be the same." )
164+
165+ id_col = pd .Series (range (1 , sum (group_val [group_names [0 ]]) + 1 ))
166+
121167 final_df = pd .DataFrame ()
122168
123169 for name in group_val .keys ():
124- col = np .repeat (0 , group_val [name ][0 ]).tolist () + np .repeat (1 , group_val [name ][1 ]).tolist ()
125- df = pd .DataFrame ({name :col })
170+ col = (
171+ np .repeat (0 , group_val [name ][0 ]).tolist ()
172+ + np .repeat (1 , group_val [name ][1 ]).tolist ()
173+ )
174+ df = pd .DataFrame ({name : col })
126175 final_df = pd .concat ([final_df , df ], axis = 1 )
127176
128- final_df ['ID' ] = id_col
177+ final_df ["ID" ] = id_col
129178
130179 return final_df
0 commit comments