Skip to content

Commit e6dfe38

Browse files
committed
added informative error message for missing idx and trimmed down and removed duplicate code in dabest_object file
1 parent 1e98326 commit e6dfe38

3 files changed

Lines changed: 264 additions & 256 deletions

File tree

dabest/_dabest_object.py

Lines changed: 131 additions & 127 deletions
Original file line numberDiff line numberDiff line change
@@ -59,112 +59,13 @@ def __init__(
5959

6060
# after this call the attributes self.__experiment_label and self.__x1_level are updated
6161
self._check_errors(x, y, idx, experiment, experiment_label, x1_level)
62-
63-
64-
# Check if there is NaN under any of the paired settings
65-
if self.__is_paired and self.__output_data.isnull().values.any():
66-
warn1 = f"NaN values detected under paired setting and removed,"
67-
warn2 = f" please check your data."
68-
warnings.warn(warn1 + warn2)
69-
if x is not None and y is not None:
70-
rmname = self.__output_data[self.__output_data[y].isnull()][self.__id_col].tolist()
71-
self.__output_data = self.__output_data[~self.__output_data[self.__id_col].isin(rmname)]
72-
elif x is None and y is None:
73-
self.__output_data.dropna(inplace=True)
7462

7563
# create new x & idx and record the second variable if this is a valid 2x2 ANOVA case
76-
if idx is None and x is not None and y is not None:
77-
# Add a length check for unique values in the first element in list x,
78-
# if the length is greater than 2, force delta2 to be False
79-
# Should be removed if delta2 for situations other than 2x2 is supported
80-
if len(self.__output_data[x[0]].unique()) > 2 and self.__x1_level is None:
81-
self.__delta2 = False
82-
# stop the loop if delta2 is False
83-
84-
# add a new column which is a combination of experiment and the first variable
85-
new_col_name = experiment + x[0]
86-
while new_col_name in self.__output_data.columns:
87-
new_col_name += "_"
88-
89-
self.__output_data[new_col_name] = (
90-
self.__output_data[x[0]].astype(str)
91-
+ " "
92-
+ self.__output_data[experiment].astype(str)
93-
)
94-
95-
# create idx and record the first and second x variable
96-
idx = []
97-
for i in list(map(lambda x: str(x), self.__experiment_label)):
98-
temp = []
99-
for j in list(map(lambda x: str(x), self.__x1_level)):
100-
temp.append(j + " " + i)
101-
idx.append(temp)
102-
103-
self.__idx = idx
104-
self.__x1 = x[0]
105-
self.__x2 = x[1]
106-
x = new_col_name
107-
else:
108-
self.__idx = idx
109-
self.__x1 = None
110-
self.__x2 = None
111-
112-
# Determine the kind of estimation plot we need to produce.
113-
if all([isinstance(i, (str, int, float)) for i in idx]):
114-
# flatten out idx.
115-
all_plot_groups = pd.Series([t for t in idx]).unique().tolist()
116-
if len(idx) > len(all_plot_groups):
117-
err0 = "`idx` contains duplicated groups. Please remove any duplicates and try again."
118-
raise ValueError(err0)
119-
120-
# We need to re-wrap this idx inside another tuple so as to
121-
# easily loop thru each pairwise group later on.
122-
self.__idx = (idx,)
123-
124-
elif all([isinstance(i, (tuple, list)) for i in idx]):
125-
all_plot_groups = pd.Series([tt for t in idx for tt in t]).unique().tolist()
126-
127-
actual_groups_given = sum([len(i) for i in idx])
128-
129-
if actual_groups_given > len(all_plot_groups):
130-
err0 = "Groups are repeated across tuples,"
131-
err1 = " or a tuple has repeated groups in it."
132-
err2 = " Please remove any duplicates and try again."
133-
raise ValueError(err0 + err1 + err2)
134-
135-
else: # mix of string and tuple?
136-
err = "There seems to be a problem with the idx you " "entered--{}.".format(
137-
idx
138-
)
139-
raise ValueError(err)
140-
141-
# Check if there is a typo on paired
142-
if self.__is_paired and self.__is_paired not in ("baseline", "sequential"):
143-
err = "{} assigned for `paired` is not valid.".format(self.__is_paired)
144-
raise ValueError(err)
145-
146-
# Determine the type of data: wide or long.
147-
if x is None and y is not None:
148-
err = "You have only specified `y`. Please also specify `x`."
149-
raise ValueError(err)
150-
151-
if x is not None and y is None:
152-
err = "You have only specified `x`. Please also specify `y`."
153-
raise ValueError(err)
64+
idx, x, all_plot_groups = self._prep_idx(idx, x, y, experiment)
15465

15566
self.__plot_data = self._get_plot_data(x, y, all_plot_groups)
15667
self.__all_plot_groups = all_plot_groups
15768

158-
# Check if `id_col` is valid
159-
if self.__is_paired:
160-
if id_col is None:
161-
err = "`id_col` must be specified if `paired` is assigned with a not NoneType value."
162-
raise IndexError(err)
163-
164-
if id_col not in self.__plot_data.columns:
165-
err = "{} is not a column in `data`. ".format(id_col)
166-
raise IndexError(err)
167-
16869
self._compute_effectsize_dfs()
16970

17071
def __repr__(self):
@@ -225,6 +126,74 @@ def __repr__(self):
225126

226127
return "\n".join(out)
227128

129+
130+
def _prep_idx(self, idx, x, y, experiment):
131+
"""
132+
Function to prepare the idx.
133+
"""
134+
if idx is None and x is not None and y is not None:
135+
# Add a length check for unique values in the first element in list x,
136+
# if the length is greater than 2, force delta2 to be False
137+
# Should be removed if delta2 for situations other than 2x2 is supported
138+
if len(self.__output_data[x[0]].unique()) > 2:
139+
self.__delta2 = False
140+
141+
# add a new column which is a combination of experiment and the first variable
142+
new_col_name = experiment + x[0]
143+
while new_col_name in self.__output_data.columns:
144+
new_col_name += "_"
145+
146+
self.__output_data[new_col_name] = (
147+
self.__output_data[x[0]].astype(str)
148+
+ " "
149+
+ self.__output_data[experiment].astype(str)
150+
)
151+
152+
# create idx and record the first and second x variable
153+
idx = []
154+
for i in list(map(lambda x: str(x), self.__experiment_label)):
155+
temp = []
156+
for j in list(map(lambda x: str(x), self.__x1_level)):
157+
temp.append(j + " " + i)
158+
idx.append(temp)
159+
160+
self.__idx = idx
161+
self.__x1 = x[0]
162+
self.__x2 = x[1]
163+
x = new_col_name
164+
else:
165+
self.__idx = idx
166+
self.__x1 = None
167+
self.__x2 = None
168+
169+
# Determine the kind of estimation plot we need to produce.
170+
if all([isinstance(i, (str, int, float)) for i in self.__idx]):
171+
# flatten out idx.
172+
all_plot_groups = pd.Series([t for t in self.__idx]).unique().tolist()
173+
if len(self.__idx) > len(all_plot_groups):
174+
err0 = "`idx` contains duplicated groups. Please remove any duplicates and try again."
175+
raise ValueError(err0)
176+
177+
# We need to re-wrap this idx inside another tuple so as to
178+
# easily loop thru each pairwise group later on.
179+
self.__idx = (idx,)
180+
181+
elif all([isinstance(i, (tuple, list)) for i in self.__idx]):
182+
all_plot_groups = pd.Series([tt for t in self.__idx for tt in t]).unique().tolist()
183+
actual_groups_given = sum([len(i) for i in self.__idx])
184+
185+
if actual_groups_given > len(all_plot_groups):
186+
err0 = "Groups are repeated across tuples,"
187+
err1 = " or a tuple has repeated groups in it."
188+
err2 = " Please remove any duplicates and try again."
189+
raise ValueError(err0 + err1 + err2)
190+
191+
else: # mix of string and tuple?
192+
err = "There seems to be a problem with the idx you " "entered--{}.".format(self.__idx)
193+
raise ValueError(err)
194+
195+
return idx, x, all_plot_groups
196+
228197
@property
229198
def mean_diff(self):
230199
"""
@@ -278,7 +247,11 @@ def delta_g(self):
278247
"""
279248
Returns an :py:class:`EffectSizeDataFrame` for deltas' g, its confidence interval, and relevant statistics, for all comparisons as indicated via the `idx` and `paired` argument in `dabest.load()`.
280249
"""
281-
return self.__delta_g
250+
if self.__delta2:
251+
return self.__delta_g
252+
else:
253+
raise TypeError("Delta-g is only available for delta-delta situations.")
254+
# return self.__delta_g
282255

283256
@property
284257
def input_data(self):
@@ -445,6 +418,13 @@ def _check_errors(self, x, y, idx, experiment, experiment_label, x1_level):
445418
At the end of this function these two class attributes are updated
446419
self.__experiment_label and self.__x1_level
447420
'''
421+
422+
# Check if idx is present (if not a 2x2 Anova case)
423+
if idx is None:
424+
if not self.__delta2:
425+
err0 = "Please specify `idx`."
426+
raise ValueError(err0)
427+
448428
# Check if it is a valid mini_meta case
449429
if self.__mini_meta:
450430
# Only mini_meta calculation but not proportional and delta-delta function
@@ -565,7 +545,6 @@ def _check_errors(self, x, y, idx, experiment, experiment_label, x1_level):
565545
i, experiment
566546
)
567547
raise IndexError(err)
568-
569548
else:
570549
x1_level = self.__output_data[x[0]].unique()
571550

@@ -575,34 +554,65 @@ def _check_errors(self, x, y, idx, experiment, experiment_label, x1_level):
575554
self.__experiment_label = experiment_label
576555
self.__x1_level = x1_level
577556

578-
def _get_plot_data(self, x, y, all_plot_groups):
579-
"""
580-
Function to prepare some attributes for plotting
581-
"""
582-
# Check if there is NaN under any of the paired settings
583-
if self.__is_paired is not None and self.__output_data.isnull().values.any():
557+
if self.__is_paired and self.__output_data.isnull().values.any():
584558
warn1 = f"NaN values detected under paired setting and removed,"
585559
warn2 = f" please check your data."
586560
warnings.warn(warn1 + warn2)
587-
rmname = self.__output_data[self.__output_data[y].isnull()][self.__id_col].tolist()
588-
self.__output_data = self.__output_data[~self.__output_data[self.__id_col].isin(rmname)]
589-
590-
# Identify the type of data that was passed in.
591-
if x is not None and y is not None:
592-
# Assume we have a long dataset.
593-
# check both x and y are column names in data.
594-
if x not in self.__output_data.columns:
595-
err = "{0} is not a column in `data`. Please check.".format(x)
561+
if x is not None and y is not None:
562+
rmname = self.__output_data[self.__output_data[y].isnull()][self.__id_col].tolist()
563+
self.__output_data = self.__output_data[~self.__output_data[self.__id_col].isin(rmname)]
564+
elif x is None and y is None:
565+
self.__output_data.dropna(inplace=True)
566+
567+
# Check if there is a typo on paired
568+
if self.__is_paired and self.__is_paired not in ("baseline", "sequential"):
569+
err = "'{}' assigned for `paired` is not valid. Please use either 'baseline' or 'sequential'.".format(self.__is_paired)
570+
raise ValueError(err)
571+
572+
# Check if `id_col` is valid
573+
if self.__is_paired:
574+
if self.__id_col is None:
575+
err = "`id_col` must be specified if `paired` is assigned with a not NoneType value."
596576
raise IndexError(err)
597-
if y not in self.__output_data.columns:
598-
err = "{0} is not a column in `data`. Please check.".format(y)
577+
578+
if self.__id_col not in self.__output_data.columns:
579+
err = "`id_col` was given as '{}'; however, '{}' is not a column in `data`.".format(self.__id_col, self.__id_col)
599580
raise IndexError(err)
581+
582+
# Check if x and y are supplied (relevant to long format data)
583+
if x is None and y is not None:
584+
err = "You have only specified `y`. Please also specify `x` (for long format data)."
585+
raise ValueError(err)
600586

601-
# check y is numeric.
587+
if x is not None and y is None:
588+
err = "You have only specified `x`. Please also specify `y` (for long format data)."
589+
raise ValueError(err)
590+
591+
if x is not None and y is not None:
592+
# Assume we have a long dataset.
593+
# check both x and y are column names in data.
594+
if not self.__delta2:
595+
if x not in self.__output_data.columns:
596+
err = "'{0}' is not a column in `data`. Please check.".format(x)
597+
raise IndexError(err)
598+
if y not in self.__output_data.columns:
599+
err = "'{0}' is not a column in `data`. Please check.".format(y)
600+
raise IndexError(err)
601+
# Check that the `y` column is numeric.
602602
if not issubdtype(self.__output_data[y].dtype, number):
603-
err = "{0} is a column in `data`, but it is not numeric.".format(y)
603+
err = "The `y` column in `data` is not numeric. Please check."
604604
raise ValueError(err)
605605

606+
607+
def _get_plot_data(self, x, y, all_plot_groups):
608+
# def _get_plot_data(self, x, y):
609+
"""
610+
Function to prepare some attributes for plotting
611+
"""
612+
# all_plot_groups = self.__all_plot_groups
613+
# Identify the type of data that was passed in.
614+
if x is not None and y is not None:
615+
# Assume we have a long dataset.
606616
# check all the idx can be found in self.__output_data[x]
607617
for g in all_plot_groups:
608618
if g not in self.__output_data[x].unique():
@@ -630,12 +640,6 @@ def _get_plot_data(self, x, y, all_plot_groups):
630640
self.__xvar = "group"
631641
self.__yvar = "value"
632642

633-
# Check if there is NaN under any of the paired settings
634-
if self.__is_paired is not None and self.__output_data.isnull().values.any():
635-
warn1 = f"NaN values detected under paired setting and removed,"
636-
warn2 = f" please check your data."
637-
warnings.warn(warn1 + warn2)
638-
639643
# First, check we have all columns in the dataset.
640644
for g in all_plot_groups:
641645
if g not in self.__output_data.columns:

0 commit comments

Comments
 (0)