@@ -59,112 +59,13 @@ def __init__(
5959
6060 # after this call the attributes self.__experiment_label and self.__x1_level are updated
6161 self ._check_errors (x , y , idx , experiment , experiment_label , x1_level )
62-
63-
64- # Check if there is NaN under any of the paired settings
65- if self .__is_paired and self .__output_data .isnull ().values .any ():
66- warn1 = f"NaN values detected under paired setting and removed,"
67- warn2 = f" please check your data."
68- warnings .warn (warn1 + warn2 )
69- if x is not None and y is not None :
70- rmname = self .__output_data [self .__output_data [y ].isnull ()][self .__id_col ].tolist ()
71- self .__output_data = self .__output_data [~ self .__output_data [self .__id_col ].isin (rmname )]
72- elif x is None and y is None :
73- self .__output_data .dropna (inplace = True )
7462
7563 # create new x & idx and record the second variable if this is a valid 2x2 ANOVA case
76- if idx is None and x is not None and y is not None :
77- # Add a length check for unique values in the first element in list x,
78- # if the length is greater than 2, force delta2 to be False
79- # Should be removed if delta2 for situations other than 2x2 is supported
80- if len (self .__output_data [x [0 ]].unique ()) > 2 and self .__x1_level is None :
81- self .__delta2 = False
82- # stop the loop if delta2 is False
83-
84- # add a new column which is a combination of experiment and the first variable
85- new_col_name = experiment + x [0 ]
86- while new_col_name in self .__output_data .columns :
87- new_col_name += "_"
88-
89- self .__output_data [new_col_name ] = (
90- self .__output_data [x [0 ]].astype (str )
91- + " "
92- + self .__output_data [experiment ].astype (str )
93- )
94-
95- # create idx and record the first and second x variable
96- idx = []
97- for i in list (map (lambda x : str (x ), self .__experiment_label )):
98- temp = []
99- for j in list (map (lambda x : str (x ), self .__x1_level )):
100- temp .append (j + " " + i )
101- idx .append (temp )
102-
103- self .__idx = idx
104- self .__x1 = x [0 ]
105- self .__x2 = x [1 ]
106- x = new_col_name
107- else :
108- self .__idx = idx
109- self .__x1 = None
110- self .__x2 = None
111-
112- # Determine the kind of estimation plot we need to produce.
113- if all ([isinstance (i , (str , int , float )) for i in idx ]):
114- # flatten out idx.
115- all_plot_groups = pd .Series ([t for t in idx ]).unique ().tolist ()
116- if len (idx ) > len (all_plot_groups ):
117- err0 = "`idx` contains duplicated groups. Please remove any duplicates and try again."
118- raise ValueError (err0 )
119-
120- # We need to re-wrap this idx inside another tuple so as to
121- # easily loop thru each pairwise group later on.
122- self .__idx = (idx ,)
123-
124- elif all ([isinstance (i , (tuple , list )) for i in idx ]):
125- all_plot_groups = pd .Series ([tt for t in idx for tt in t ]).unique ().tolist ()
126-
127- actual_groups_given = sum ([len (i ) for i in idx ])
128-
129- if actual_groups_given > len (all_plot_groups ):
130- err0 = "Groups are repeated across tuples,"
131- err1 = " or a tuple has repeated groups in it."
132- err2 = " Please remove any duplicates and try again."
133- raise ValueError (err0 + err1 + err2 )
134-
135- else : # mix of string and tuple?
136- err = "There seems to be a problem with the idx you " "entered--{}." .format (
137- idx
138- )
139- raise ValueError (err )
140-
141- # Check if there is a typo on paired
142- if self .__is_paired and self .__is_paired not in ("baseline" , "sequential" ):
143- err = "{} assigned for `paired` is not valid." .format (self .__is_paired )
144- raise ValueError (err )
145-
146- # Determine the type of data: wide or long.
147- if x is None and y is not None :
148- err = "You have only specified `y`. Please also specify `x`."
149- raise ValueError (err )
150-
151- if x is not None and y is None :
152- err = "You have only specified `x`. Please also specify `y`."
153- raise ValueError (err )
64+ idx , x , all_plot_groups = self ._prep_idx (idx , x , y , experiment )
15465
15566 self .__plot_data = self ._get_plot_data (x , y , all_plot_groups )
15667 self .__all_plot_groups = all_plot_groups
15768
158- # Check if `id_col` is valid
159- if self .__is_paired :
160- if id_col is None :
161- err = "`id_col` must be specified if `paired` is assigned with a not NoneType value."
162- raise IndexError (err )
163-
164- if id_col not in self .__plot_data .columns :
165- err = "{} is not a column in `data`. " .format (id_col )
166- raise IndexError (err )
167-
16869 self ._compute_effectsize_dfs ()
16970
17071 def __repr__ (self ):
@@ -225,6 +126,74 @@ def __repr__(self):
225126
226127 return "\n " .join (out )
227128
129+
130+ def _prep_idx (self , idx , x , y , experiment ):
131+ """
132+ Function to prepare the idx.
133+ """
134+ if idx is None and x is not None and y is not None :
135+ # Add a length check for unique values in the first element in list x,
136+ # if the length is greater than 2, force delta2 to be False
137+ # Should be removed if delta2 for situations other than 2x2 is supported
138+ if len (self .__output_data [x [0 ]].unique ()) > 2 :
139+ self .__delta2 = False
140+
141+ # add a new column which is a combination of experiment and the first variable
142+ new_col_name = experiment + x [0 ]
143+ while new_col_name in self .__output_data .columns :
144+ new_col_name += "_"
145+
146+ self .__output_data [new_col_name ] = (
147+ self .__output_data [x [0 ]].astype (str )
148+ + " "
149+ + self .__output_data [experiment ].astype (str )
150+ )
151+
152+ # create idx and record the first and second x variable
153+ idx = []
154+ for i in list (map (lambda x : str (x ), self .__experiment_label )):
155+ temp = []
156+ for j in list (map (lambda x : str (x ), self .__x1_level )):
157+ temp .append (j + " " + i )
158+ idx .append (temp )
159+
160+ self .__idx = idx
161+ self .__x1 = x [0 ]
162+ self .__x2 = x [1 ]
163+ x = new_col_name
164+ else :
165+ self .__idx = idx
166+ self .__x1 = None
167+ self .__x2 = None
168+
169+ # Determine the kind of estimation plot we need to produce.
170+ if all ([isinstance (i , (str , int , float )) for i in self .__idx ]):
171+ # flatten out idx.
172+ all_plot_groups = pd .Series ([t for t in self .__idx ]).unique ().tolist ()
173+ if len (self .__idx ) > len (all_plot_groups ):
174+ err0 = "`idx` contains duplicated groups. Please remove any duplicates and try again."
175+ raise ValueError (err0 )
176+
177+ # We need to re-wrap this idx inside another tuple so as to
178+ # easily loop thru each pairwise group later on.
179+ self .__idx = (idx ,)
180+
181+ elif all ([isinstance (i , (tuple , list )) for i in self .__idx ]):
182+ all_plot_groups = pd .Series ([tt for t in self .__idx for tt in t ]).unique ().tolist ()
183+ actual_groups_given = sum ([len (i ) for i in self .__idx ])
184+
185+ if actual_groups_given > len (all_plot_groups ):
186+ err0 = "Groups are repeated across tuples,"
187+ err1 = " or a tuple has repeated groups in it."
188+ err2 = " Please remove any duplicates and try again."
189+ raise ValueError (err0 + err1 + err2 )
190+
191+ else : # mix of string and tuple?
192+ err = "There seems to be a problem with the idx you " "entered--{}." .format (self .__idx )
193+ raise ValueError (err )
194+
195+ return idx , x , all_plot_groups
196+
228197 @property
229198 def mean_diff (self ):
230199 """
@@ -278,7 +247,11 @@ def delta_g(self):
278247 """
279248 Returns an :py:class:`EffectSizeDataFrame` for deltas' g, its confidence interval, and relevant statistics, for all comparisons as indicated via the `idx` and `paired` argument in `dabest.load()`.
280249 """
281- return self .__delta_g
250+ if self .__delta2 :
251+ return self .__delta_g
252+ else :
253+ raise TypeError ("Delta-g is only available for delta-delta situations." )
254+ # return self.__delta_g
282255
283256 @property
284257 def input_data (self ):
@@ -445,6 +418,13 @@ def _check_errors(self, x, y, idx, experiment, experiment_label, x1_level):
445418 At the end of this function these two class attributes are updated
446419 self.__experiment_label and self.__x1_level
447420 '''
421+
422+ # Check if idx is present (if not a 2x2 Anova case)
423+ if idx is None :
424+ if not self .__delta2 :
425+ err0 = "Please specify `idx`."
426+ raise ValueError (err0 )
427+
448428 # Check if it is a valid mini_meta case
449429 if self .__mini_meta :
450430 # Only mini_meta calculation but not proportional and delta-delta function
@@ -565,7 +545,6 @@ def _check_errors(self, x, y, idx, experiment, experiment_label, x1_level):
565545 i , experiment
566546 )
567547 raise IndexError (err )
568-
569548 else :
570549 x1_level = self .__output_data [x [0 ]].unique ()
571550
@@ -575,34 +554,65 @@ def _check_errors(self, x, y, idx, experiment, experiment_label, x1_level):
575554 self .__experiment_label = experiment_label
576555 self .__x1_level = x1_level
577556
578- def _get_plot_data (self , x , y , all_plot_groups ):
579- """
580- Function to prepare some attributes for plotting
581- """
582- # Check if there is NaN under any of the paired settings
583- if self .__is_paired is not None and self .__output_data .isnull ().values .any ():
557+ if self .__is_paired and self .__output_data .isnull ().values .any ():
584558 warn1 = f"NaN values detected under paired setting and removed,"
585559 warn2 = f" please check your data."
586560 warnings .warn (warn1 + warn2 )
587- rmname = self .__output_data [self .__output_data [y ].isnull ()][self .__id_col ].tolist ()
588- self .__output_data = self .__output_data [~ self .__output_data [self .__id_col ].isin (rmname )]
589-
590- # Identify the type of data that was passed in.
591- if x is not None and y is not None :
592- # Assume we have a long dataset.
593- # check both x and y are column names in data.
594- if x not in self .__output_data .columns :
595- err = "{0} is not a column in `data`. Please check." .format (x )
561+ if x is not None and y is not None :
562+ rmname = self .__output_data [self .__output_data [y ].isnull ()][self .__id_col ].tolist ()
563+ self .__output_data = self .__output_data [~ self .__output_data [self .__id_col ].isin (rmname )]
564+ elif x is None and y is None :
565+ self .__output_data .dropna (inplace = True )
566+
567+ # Check if there is a typo on paired
568+ if self .__is_paired and self .__is_paired not in ("baseline" , "sequential" ):
569+ err = "'{}' assigned for `paired` is not valid. Please use either 'baseline' or 'sequential'." .format (self .__is_paired )
570+ raise ValueError (err )
571+
572+ # Check if `id_col` is valid
573+ if self .__is_paired :
574+ if self .__id_col is None :
575+ err = "`id_col` must be specified if `paired` is assigned with a not NoneType value."
596576 raise IndexError (err )
597- if y not in self .__output_data .columns :
598- err = "{0} is not a column in `data`. Please check." .format (y )
577+
578+ if self .__id_col not in self .__output_data .columns :
579+ err = "`id_col` was given as '{}'; however, '{}' is not a column in `data`." .format (self .__id_col , self .__id_col )
599580 raise IndexError (err )
581+
582+ # Check if x and y are supplied (relevant to long format data)
583+ if x is None and y is not None :
584+ err = "You have only specified `y`. Please also specify `x` (for long format data)."
585+ raise ValueError (err )
600586
601- # check y is numeric.
587+ if x is not None and y is None :
588+ err = "You have only specified `x`. Please also specify `y` (for long format data)."
589+ raise ValueError (err )
590+
591+ if x is not None and y is not None :
592+ # Assume we have a long dataset.
593+ # check both x and y are column names in data.
594+ if not self .__delta2 :
595+ if x not in self .__output_data .columns :
596+ err = "'{0}' is not a column in `data`. Please check." .format (x )
597+ raise IndexError (err )
598+ if y not in self .__output_data .columns :
599+ err = "'{0}' is not a column in `data`. Please check." .format (y )
600+ raise IndexError (err )
601+ # Check that the `y` column is numeric.
602602 if not issubdtype (self .__output_data [y ].dtype , number ):
603- err = "{0} is a column in `data`, but it is not numeric." . format ( y )
603+ err = "The `y` column in `data` is not numeric. Please check."
604604 raise ValueError (err )
605605
606+
607+ def _get_plot_data (self , x , y , all_plot_groups ):
608+ # def _get_plot_data(self, x, y):
609+ """
610+ Function to prepare some attributes for plotting
611+ """
612+ # all_plot_groups = self.__all_plot_groups
613+ # Identify the type of data that was passed in.
614+ if x is not None and y is not None :
615+ # Assume we have a long dataset.
606616 # check all the idx can be found in self.__output_data[x]
607617 for g in all_plot_groups :
608618 if g not in self .__output_data [x ].unique ():
@@ -630,12 +640,6 @@ def _get_plot_data(self, x, y, all_plot_groups):
630640 self .__xvar = "group"
631641 self .__yvar = "value"
632642
633- # Check if there is NaN under any of the paired settings
634- if self .__is_paired is not None and self .__output_data .isnull ().values .any ():
635- warn1 = f"NaN values detected under paired setting and removed,"
636- warn2 = f" please check your data."
637- warnings .warn (warn1 + warn2 )
638-
639643 # First, check we have all columns in the dataset.
640644 for g in all_plot_groups :
641645 if g not in self .__output_data .columns :
0 commit comments