1010from scipy .stats import norm
1111from scipy .stats import randint
1212
13+
1314# %% ../nbs/API/dabest_object.ipynb 6
1415class Dabest (object ):
1516
@@ -57,18 +58,6 @@ def __init__(
5758 self ._check_errors (x , y , idx , experiment , experiment_label , x1_level )
5859
5960
60- # Check if there is NaN under any of the paired settings
61- if self .__is_paired and self .__output_data .isnull ().values .any ():
62- import warnings
63- warn1 = f"NaN values detected under paired setting and removed,"
64- warn2 = f" please check your data."
65- warnings .warn (warn1 + warn2 )
66- if x is not None and y is not None :
67- rmname = self .__output_data [self .__output_data [y ].isnull ()][self .__id_col ].tolist ()
68- self .__output_data = self .__output_data [~ self .__output_data [self .__id_col ].isin (rmname )]
69- elif x is None and y is None :
70- self .__output_data .dropna (inplace = True )
71-
7261 # create new x & idx and record the second variable if this is a valid 2x2 ANOVA case
7362 if idx is None and x is not None and y is not None :
7463 # Add a length check for unique values in the first element in list x,
@@ -453,47 +442,26 @@ def _check_errors(self, x, y, idx, experiment, experiment_label, x1_level):
453442 raise ValueError (err0 )
454443
455444 # Check if the columns stated are valid
456- # Initialize a flag to track if any element in idx is neither str nor (tuple, list)
457- valid_types = True
458-
459- # Initialize variables to track the conditions for str and (tuple, list)
460- is_str_condition_met , is_tuple_list_condition_met = False , False
461-
462- # Single traversal for optimization
463- for item in idx :
464- if isinstance (item , str ):
465- is_str_condition_met = True
466- elif isinstance (item , (tuple , list )) and len (item ) == 2 :
467- is_tuple_list_condition_met = True
468- else :
469- valid_types = False
470- break # Exit the loop if an invalid type is found
471-
472- # Check if all types are valid
473- if not valid_types :
474- err0 = "`mini_meta` is True, but `idx` ({})" .format (idx )
475- err1 = "does not contain exactly 2 unique columns."
476- raise ValueError (err0 + err1 )
477-
478- # Handling str type condition
479- if is_str_condition_met :
480- if len (pd .unique (idx ).tolist ()) != 2 :
445+ # TODO instead of traversing twice idx you can traverse only once
446+ # and break the loop if the condition is not satisfied?
447+ # TODO What if the type is not str and not tuple,list? missing raise Error
448+ if all ([isinstance (i , str ) for i in idx ]):
449+ if len (pd .unique ([t for t in idx ]).tolist ()) != 2 :
481450 err0 = "`mini_meta` is True, but `idx` ({})" .format (idx )
482- err1 = "does not contain exactly 2 unique columns."
451+ err1 = "does not contain exactly 2 columns."
483452 raise ValueError (err0 + err1 )
484453
485- # Handling (tuple, list) type condition
486- if is_tuple_list_condition_met :
454+ if all ([isinstance (i , (tuple , list )) for i in idx ]):
487455 all_idx_lengths = [len (t ) for t in idx ]
488456 if (array (all_idx_lengths ) != 2 ).any ():
489- err1 = "`mini_meta` is True, but some elements in idx "
490- err2 = "in {} do not consist only of two groups." .format (idx )
457+ err1 = "`mini_meta` is True, but some idx "
458+ err2 = "in {} does not consist only of two groups." .format (idx )
491459 raise ValueError (err1 + err2 )
492460
493-
461+ # TODO can you have True mini_meta and delta2 at the same time?
494462 # Check if this is a 2x2 ANOVA case and x & y are valid columns
495463 # Create experiment_label and x1_level
496- elif self .__delta2 :
464+ if self .__delta2 :
497465 if x is None :
498466 error_msg = "If `delta2` is True. `x` parameter cannot be None. String or list expected"
499467 raise ValueError (error_msg )
@@ -566,6 +534,7 @@ def _check_errors(self, x, y, idx, experiment, experiment_label, x1_level):
566534 else :
567535 x1_level = self .__output_data [x [0 ]].unique ()
568536
537+ # TODO what if experiment is None?
569538 elif experiment :
570539 experiment_label = self .__output_data [experiment ].unique ()
571540 x1_level = self .__output_data [x [0 ]].unique ()
@@ -576,16 +545,7 @@ def _get_plot_data(self, x, y, all_plot_groups):
576545 """
577546 Function to prepare some attributes for plotting
578547 """
579- # Check if there is NaN under any of the paired settings
580- if self .__is_paired is not None and self .__output_data .isnull ().values .any ():
581- print ("Nan" )
582- import warnings
583- warn1 = f"NaN values detected under paired setting and removed,"
584- warn2 = f" please check your data."
585- warnings .warn (warn1 + warn2 )
586- rmname = self .__output_data [self .__output_data [y ].isnull ()][self .__id_col ].tolist ()
587- self .__output_data = self .__output_data [~ self .__output_data [self .__id_col ].isin (rmname )]
588-
548+
589549 # Identify the type of data that was passed in.
590550 if x is not None and y is not None :
591551 # Assume we have a long dataset.
@@ -629,13 +589,6 @@ def _get_plot_data(self, x, y, all_plot_groups):
629589 self .__xvar = "group"
630590 self .__yvar = "value"
631591
632- # Check if there is NaN under any of the paired settings
633- if self .__is_paired is not None and self .__output_data .isnull ().values .any ():
634- import warnings
635- warn1 = f"NaN values detected under paired setting and removed,"
636- warn2 = f" please check your data."
637- warnings .warn (warn1 + warn2 )
638-
639592 # First, check we have all columns in the dataset.
640593 for g in all_plot_groups :
641594 if g not in self .__output_data .columns :
@@ -658,7 +611,10 @@ def _get_plot_data(self, x, y, all_plot_groups):
658611 # Added in v0.2.7.
659612 plot_data .dropna (axis = 0 , how = "any" , subset = [self .__yvar ], inplace = True )
660613
661-
614+ # TODO these comments should not be in the code but on the release notes of the package version
615+ # Lines 131 to 140 added in v0.2.3.
616+ # Fixes a bug that jammed up when the xvar column was already
617+ # a pandas Categorical. Now we check for this and act appropriately.
662618 if isinstance (plot_data [self .__xvar ].dtype , pd .CategoricalDtype ):
663619 plot_data [self .__xvar ].cat .remove_unused_categories (inplace = True )
664620 plot_data [self .__xvar ].cat .reorder_categories (
0 commit comments