1010from scipy .stats import norm
1111from scipy .stats import randint
1212
13-
1413# %% ../nbs/API/dabest_object.ipynb 6
1514class Dabest (object ):
1615
@@ -58,6 +57,18 @@ def __init__(
5857 self ._check_errors (x , y , idx , experiment , experiment_label , x1_level )
5958
6059
60+ # Check if there is NaN under any of the paired settings
61+ if self .__is_paired and self .__output_data .isnull ().values .any ():
62+ import warnings
63+ warn1 = f"NaN values detected under paired setting and removed,"
64+ warn2 = f" please check your data."
65+ warnings .warn (warn1 + warn2 )
66+ if x is not None and y is not None :
67+ rmname = self .__output_data [self .__output_data [y ].isnull ()][self .__id_col ].tolist ()
68+ self .__output_data = self .__output_data [~ self .__output_data [self .__id_col ].isin (rmname )]
69+ elif x is None and y is None :
70+ self .__output_data .dropna (inplace = True )
71+
6172 # create new x & idx and record the second variable if this is a valid 2x2 ANOVA case
6273 if idx is None and x is not None and y is not None :
6374 # Add a length check for unique values in the first element in list x,
@@ -442,26 +453,47 @@ def _check_errors(self, x, y, idx, experiment, experiment_label, x1_level):
442453 raise ValueError (err0 )
443454
444455 # Check if the columns stated are valid
445- # TODO instead of traversing twice idx you can traverse only once
446- # and break the loop if the condition is not satisfied?
447- # TODO What if the type is not str and not tuple,list? missing raise Error
448- if all ([isinstance (i , str ) for i in idx ]):
449- if len (pd .unique ([t for t in idx ]).tolist ()) != 2 :
456+ # Initialize a flag to track if any element in idx is neither str nor (tuple, list)
457+ valid_types = True
458+
459+ # Initialize variables to track the conditions for str and (tuple, list)
460+ is_str_condition_met , is_tuple_list_condition_met = False , False
461+
462+ # Single traversal for optimization
463+ for item in idx :
464+ if isinstance (item , str ):
465+ is_str_condition_met = True
466+ elif isinstance (item , (tuple , list )) and len (item ) == 2 :
467+ is_tuple_list_condition_met = True
468+ else :
469+ valid_types = False
470+ break # Exit the loop if an invalid type is found
471+
472+ # Check if all types are valid
473+ if not valid_types :
474+ err0 = "`mini_meta` is True, but `idx` ({})" .format (idx )
475+ err1 = "does not contain exactly 2 unique columns."
476+ raise ValueError (err0 + err1 )
477+
478+ # Handling str type condition
479+ if is_str_condition_met :
480+ if len (pd .unique (idx ).tolist ()) != 2 :
450481 err0 = "`mini_meta` is True, but `idx` ({})" .format (idx )
451- err1 = "does not contain exactly 2 columns."
482+ err1 = "does not contain exactly 2 unique columns."
452483 raise ValueError (err0 + err1 )
453484
454- if all ([isinstance (i , (tuple , list )) for i in idx ]):
485+ # Handling (tuple, list) type condition
486+ if is_tuple_list_condition_met :
455487 all_idx_lengths = [len (t ) for t in idx ]
456488 if (array (all_idx_lengths ) != 2 ).any ():
457- err1 = "`mini_meta` is True, but some idx "
458- err2 = "in {} does not consist only of two groups." .format (idx )
489+ err1 = "`mini_meta` is True, but some elements in idx "
490+ err2 = "in {} do not consist only of two groups." .format (idx )
459491 raise ValueError (err1 + err2 )
460492
461- # TODO can you have True mini_meta and delta2 at the same time?
493+
462494 # Check if this is a 2x2 ANOVA case and x & y are valid columns
463495 # Create experiment_label and x1_level
464- if self .__delta2 :
496+ elif self .__delta2 :
465497 if x is None :
466498 error_msg = "If `delta2` is True. `x` parameter cannot be None. String or list expected"
467499 raise ValueError (error_msg )
@@ -534,7 +566,6 @@ def _check_errors(self, x, y, idx, experiment, experiment_label, x1_level):
534566 else :
535567 x1_level = self .__output_data [x [0 ]].unique ()
536568
537- # TODO what if experiment is None?
538569 elif experiment :
539570 experiment_label = self .__output_data [experiment ].unique ()
540571 x1_level = self .__output_data [x [0 ]].unique ()
@@ -545,7 +576,16 @@ def _get_plot_data(self, x, y, all_plot_groups):
545576 """
546577 Function to prepare some attributes for plotting
547578 """
548-
579+ # Check if there is NaN under any of the paired settings
580+ if self .__is_paired is not None and self .__output_data .isnull ().values .any ():
581+ print ("Nan" )
582+ import warnings
583+ warn1 = f"NaN values detected under paired setting and removed,"
584+ warn2 = f" please check your data."
585+ warnings .warn (warn1 + warn2 )
586+ rmname = self .__output_data [self .__output_data [y ].isnull ()][self .__id_col ].tolist ()
587+ self .__output_data = self .__output_data [~ self .__output_data [self .__id_col ].isin (rmname )]
588+
549589 # Identify the type of data that was passed in.
550590 if x is not None and y is not None :
551591 # Assume we have a long dataset.
@@ -589,6 +629,13 @@ def _get_plot_data(self, x, y, all_plot_groups):
589629 self .__xvar = "group"
590630 self .__yvar = "value"
591631
632+ # Check if there is NaN under any of the paired settings
633+ if self .__is_paired is not None and self .__output_data .isnull ().values .any ():
634+ import warnings
635+ warn1 = f"NaN values detected under paired setting and removed,"
636+ warn2 = f" please check your data."
637+ warnings .warn (warn1 + warn2 )
638+
592639 # First, check we have all columns in the dataset.
593640 for g in all_plot_groups :
594641 if g not in self .__output_data .columns :
@@ -611,10 +658,7 @@ def _get_plot_data(self, x, y, all_plot_groups):
611658 # Added in v0.2.7.
612659 plot_data .dropna (axis = 0 , how = "any" , subset = [self .__yvar ], inplace = True )
613660
614- # TODO these comments should not be in the code but on the release notes of the package version
615- # Lines 131 to 140 added in v0.2.3.
616- # Fixes a bug that jammed up when the xvar column was already
617- # a pandas Categorical. Now we check for this and act appropriately.
661+
618662 if isinstance (plot_data [self .__xvar ].dtype , pd .CategoricalDtype ):
619663 plot_data [self .__xvar ].cat .remove_unused_categories (inplace = True )
620664 plot_data [self .__xvar ].cat .reorder_categories (
0 commit comments