Skip to content

Commit 9727f17

Browse files
authored
Revert "Checks out all the todos left in the refactoring stages and add error testings"
1 parent b5dbb86 commit 9727f17

26 files changed

Lines changed: 173 additions & 711 deletions

CHANGELOG.md

Lines changed: 0 additions & 14 deletions
This file was deleted.

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,7 @@ contributing](CONTRIBUTING.md), create a new issue using Feature request
166166
template or create a new post in [our Google
167167
Group](https://groups.google.com/g/estimationstats).
168168

169+
169170
## Acknowledgements
170171

171172
We would like to thank alpha testers from the [Claridge-Chang
@@ -178,6 +179,7 @@ Stanislav Ott.
178179
## Testing
179180

180181
To test DABEST, you need to install
182+
181183
[pytest](https://docs.pytest.org/en/latest) and
182184
[nbdev](https://nbdev.fast.ai/).
183185

dabest/_bootstrap_tools.py

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -108,15 +108,7 @@ def __init__(
108108
ttest_single = "NIL"
109109
ttest_2_ind = "NIL"
110110
ttest_2_paired = ttest_rel(x1, x2)[1]
111-
112-
try:
113-
wilcoxonresult = wilcoxon(x1, x2)[1]
114-
except ValueError as e:
115-
warnings.warn("Wilcoxon test could not be performed. This might be due "
116-
"to no variability in the difference of the paired groups. \n"
117-
"Error: {}\n"
118-
"For detailed information, please refer to https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.wilcoxon.html "
119-
.format(e))
111+
wilcoxonresult = wilcoxon(x1, x2)[1]
120112
mannwhitneyresult = "NIL"
121113

122114
# Turns data into array, then tuple.

dabest/_dabest_object.py

Lines changed: 18 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from scipy.stats import norm
1111
from scipy.stats import randint
1212

13+
1314
# %% ../nbs/API/dabest_object.ipynb 6
1415
class Dabest(object):
1516

@@ -57,18 +58,6 @@ def __init__(
5758
self._check_errors(x, y, idx, experiment, experiment_label, x1_level)
5859

5960

60-
# Check if there is NaN under any of the paired settings
61-
if self.__is_paired and self.__output_data.isnull().values.any():
62-
import warnings
63-
warn1 = f"NaN values detected under paired setting and removed,"
64-
warn2 = f" please check your data."
65-
warnings.warn(warn1 + warn2)
66-
if x is not None and y is not None:
67-
rmname = self.__output_data[self.__output_data[y].isnull()][self.__id_col].tolist()
68-
self.__output_data = self.__output_data[~self.__output_data[self.__id_col].isin(rmname)]
69-
elif x is None and y is None:
70-
self.__output_data.dropna(inplace=True)
71-
7261
# create new x & idx and record the second variable if this is a valid 2x2 ANOVA case
7362
if idx is None and x is not None and y is not None:
7463
# Add a length check for unique values in the first element in list x,
@@ -453,47 +442,26 @@ def _check_errors(self, x, y, idx, experiment, experiment_label, x1_level):
453442
raise ValueError(err0)
454443

455444
# Check if the columns stated are valid
456-
# Initialize a flag to track if any element in idx is neither str nor (tuple, list)
457-
valid_types = True
458-
459-
# Initialize variables to track the conditions for str and (tuple, list)
460-
is_str_condition_met, is_tuple_list_condition_met = False, False
461-
462-
# Single traversal for optimization
463-
for item in idx:
464-
if isinstance(item, str):
465-
is_str_condition_met = True
466-
elif isinstance(item, (tuple, list)) and len(item) == 2:
467-
is_tuple_list_condition_met = True
468-
else:
469-
valid_types = False
470-
break # Exit the loop if an invalid type is found
471-
472-
# Check if all types are valid
473-
if not valid_types:
474-
err0 = "`mini_meta` is True, but `idx` ({})".format(idx)
475-
err1 = "does not contain exactly 2 unique columns."
476-
raise ValueError(err0 + err1)
477-
478-
# Handling str type condition
479-
if is_str_condition_met:
480-
if len(pd.unique(idx).tolist()) != 2:
445+
# TODO instead of traversing twice idx you can traverse only once
446+
# and break the loop if the condition is not satisfied?
447+
# TODO What if the type is not str and not tuple,list? missing raise Error
448+
if all([isinstance(i, str) for i in idx]):
449+
if len(pd.unique([t for t in idx]).tolist()) != 2:
481450
err0 = "`mini_meta` is True, but `idx` ({})".format(idx)
482-
err1 = "does not contain exactly 2 unique columns."
451+
err1 = "does not contain exactly 2 columns."
483452
raise ValueError(err0 + err1)
484453

485-
# Handling (tuple, list) type condition
486-
if is_tuple_list_condition_met:
454+
if all([isinstance(i, (tuple, list)) for i in idx]):
487455
all_idx_lengths = [len(t) for t in idx]
488456
if (array(all_idx_lengths) != 2).any():
489-
err1 = "`mini_meta` is True, but some elements in idx "
490-
err2 = "in {} do not consist only of two groups.".format(idx)
457+
err1 = "`mini_meta` is True, but some idx "
458+
err2 = "in {} does not consist only of two groups.".format(idx)
491459
raise ValueError(err1 + err2)
492460

493-
461+
# TODO can you have True mini_meta and delta2 at the same time?
494462
# Check if this is a 2x2 ANOVA case and x & y are valid columns
495463
# Create experiment_label and x1_level
496-
elif self.__delta2:
464+
if self.__delta2:
497465
if x is None:
498466
error_msg = "If `delta2` is True. `x` parameter cannot be None. String or list expected"
499467
raise ValueError(error_msg)
@@ -566,6 +534,7 @@ def _check_errors(self, x, y, idx, experiment, experiment_label, x1_level):
566534
else:
567535
x1_level = self.__output_data[x[0]].unique()
568536

537+
# TODO what if experiment is None?
569538
elif experiment:
570539
experiment_label = self.__output_data[experiment].unique()
571540
x1_level = self.__output_data[x[0]].unique()
@@ -576,16 +545,7 @@ def _get_plot_data(self, x, y, all_plot_groups):
576545
"""
577546
Function to prepare some attributes for plotting
578547
"""
579-
# Check if there is NaN under any of the paired settings
580-
if self.__is_paired is not None and self.__output_data.isnull().values.any():
581-
print("Nan")
582-
import warnings
583-
warn1 = f"NaN values detected under paired setting and removed,"
584-
warn2 = f" please check your data."
585-
warnings.warn(warn1 + warn2)
586-
rmname = self.__output_data[self.__output_data[y].isnull()][self.__id_col].tolist()
587-
self.__output_data = self.__output_data[~self.__output_data[self.__id_col].isin(rmname)]
588-
548+
589549
# Identify the type of data that was passed in.
590550
if x is not None and y is not None:
591551
# Assume we have a long dataset.
@@ -629,13 +589,6 @@ def _get_plot_data(self, x, y, all_plot_groups):
629589
self.__xvar = "group"
630590
self.__yvar = "value"
631591

632-
# Check if there is NaN under any of the paired settings
633-
if self.__is_paired is not None and self.__output_data.isnull().values.any():
634-
import warnings
635-
warn1 = f"NaN values detected under paired setting and removed,"
636-
warn2 = f" please check your data."
637-
warnings.warn(warn1 + warn2)
638-
639592
# First, check we have all columns in the dataset.
640593
for g in all_plot_groups:
641594
if g not in self.__output_data.columns:
@@ -658,7 +611,10 @@ def _get_plot_data(self, x, y, all_plot_groups):
658611
# Added in v0.2.7.
659612
plot_data.dropna(axis=0, how="any", subset=[self.__yvar], inplace=True)
660613

661-
614+
# TODO these comments should not be in the code but on the release notes of the package version
615+
# Lines 131 to 140 added in v0.2.3.
616+
# Fixes a bug that jammed up when the xvar column was already
617+
# a pandas Categorical. Now we check for this and act appropriately.
662618
if isinstance(plot_data[self.__xvar].dtype, pd.CategoricalDtype):
663619
plot_data[self.__xvar].cat.remove_unused_categories(inplace=True)
664620
plot_data[self.__xvar].cat.reorder_categories(

dabest/_delta_objects.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -555,7 +555,7 @@ def __repr__(self, header=True, sigfig=3):
555555
bs = bs1 + bs2
556556

557557
pval_def1 = "Any p-value reported is the probability of observing the" + \
558-
"effect size (or greater),\nassuming the null hypothesis of " + \
558+
"effect size (or greater),\nassuming the null hypothesis of" + \
559559
"zero difference is true."
560560
pval_def2 = "\nFor each p-value, 5000 reshuffles of the " + \
561561
"control and test labels were performed."

dabest/_effsize_objects.py

Lines changed: 32 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,7 @@ def __repr__(self, show_resample_count=True, define_pval=True, sigfig=3):
219219

220220
pval_def1 = (
221221
"Any p-value reported is the probability of observing the"
222-
+ "effect size (or greater),\nassuming the null hypothesis of "
222+
+ "effect size (or greater),\nassuming the null hypothesis of"
223223
+ "zero difference is true."
224224
)
225225
pval_def2 = (
@@ -299,6 +299,7 @@ def _compute_bca_intervals(self, sorted_bootstraps):
299299
)
300300

301301
else:
302+
# TODO improve error handling, separate file with error messages?
302303
err1 = "The $lim_type limit of the BCa interval cannot be computed."
303304
err2 = "It is set to the effect size itself."
304305
err3 = "All bootstrap values were likely all the same."
@@ -329,16 +330,9 @@ def _perform_statistical_test(self):
329330

330331
if self.__is_paired and not self.__proportional:
331332
# Wilcoxon, a non-parametric version of the paired T-test.
332-
try:
333-
wilcoxon = spstats.wilcoxon(self.__control, self.__test)
334-
self.__pvalue_wilcoxon = wilcoxon.pvalue
335-
self.__statistic_wilcoxon = wilcoxon.statistic
336-
except ValueError as e:
337-
warnings.warn("Wilcoxon test could not be performed. This might be due "
338-
"to no variability in the difference of the paired groups. \n"
339-
"Error: {}\n"
340-
"For detailed information, please refer to https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.wilcoxon.html "
341-
.format(e))
333+
wilcoxon = spstats.wilcoxon(self.__control, self.__test)
334+
self.__pvalue_wilcoxon = wilcoxon.pvalue
335+
self.__statistic_wilcoxon = wilcoxon.statistic
342336

343337
if self.__effect_size != "median_diff":
344338
# Paired Student's t-test.
@@ -363,16 +357,6 @@ def _perform_statistical_test(self):
363357
self.__pvalue_mcnemar = _mcnemar.pvalue
364358
self.__statistic_mcnemar = _mcnemar.statistic
365359

366-
elif self.__proportional:
367-
# The Cohen's h calculation is for binary categorical data
368-
try:
369-
self.__proportional_difference = es.cohens_h(
370-
self.__control, self.__test
371-
)
372-
except ValueError as e:
373-
warnings.warn(f"Calculation of Cohen's h failed. This method is applicable "
374-
f"only for binary data (0's and 1's). Details: {e}")
375-
376360
elif self.__effect_size == "cliffs_delta":
377361
# Let's go with Brunner-Munzel!
378362
brunner_munzel = spstats.brunnermunzel(
@@ -414,13 +398,23 @@ def _perform_statistical_test(self):
414398
)
415399
self.__pvalue_mann_whitney = mann_whitney.pvalue
416400
self.__statistic_mann_whitney = mann_whitney.statistic
417-
except ValueError as e:
418-
warnings.warn("Mann-Whitney test could not be performed. This might be due "
419-
"to identical rank values in both control and test groups. "
420-
"Details: {}".format(e))
401+
except ValueError:
402+
# TODO At least print some warning?
403+
# Occurs when the control and test are exactly identical
404+
# in terms of rank (eg. all zeros.)
405+
pass
421406

422407
standardized_es = es.cohens_d(self.__control, self.__test, is_paired=None)
423408

409+
# The Cohen's h calculation is for binary categorical data
410+
try:
411+
self.__proportional_difference = es.cohens_h(
412+
self.__control, self.__test
413+
)
414+
except ValueError:
415+
# TODO At least print some warning?
416+
# Occur only when the data consists not only 0's and 1's.
417+
pass
424418

425419
def to_dict(self):
426420
"""
@@ -573,79 +567,87 @@ def statistic_mcnemar(self):
573567

574568
@property
575569
def pvalue_paired_students_t(self):
570+
# TODO Missing docstring
576571
try:
577572
return self.__pvalue_paired_students_t
578573
except AttributeError:
579574
return npnan
580575

581576
@property
582577
def statistic_paired_students_t(self):
578+
# TODO Missing docstring
583579
try:
584580
return self.__statistic_paired_students_t
585581
except AttributeError:
586582
return npnan
587583

588584
@property
589585
def pvalue_kruskal(self):
586+
# TODO Missing docstring
590587
try:
591588
return self.__pvalue_kruskal
592589
except AttributeError:
593590
return npnan
594591

595592
@property
596593
def statistic_kruskal(self):
594+
# TODO Missing docstring
597595
try:
598596
return self.__statistic_kruskal
599597
except AttributeError:
600598
return npnan
601599

602600
@property
603601
def pvalue_welch(self):
602+
# TODO Missing docstring
604603
try:
605604
return self.__pvalue_welch
606605
except AttributeError:
607606
return npnan
608607

609608
@property
610609
def statistic_welch(self):
610+
# TODO Missing docstring
611611
try:
612612
return self.__statistic_welch
613613
except AttributeError:
614614
return npnan
615615

616616
@property
617617
def pvalue_students_t(self):
618+
# TODO Missing docstring
618619
try:
619620
return self.__pvalue_students_t
620621
except AttributeError:
621622
return npnan
622623

623624
@property
624625
def statistic_students_t(self):
626+
# TODO Missing docstring
625627
try:
626628
return self.__statistic_students_t
627629
except AttributeError:
628630
return npnan
629631

630632
@property
631633
def pvalue_mann_whitney(self):
634+
# TODO Missing docstring
632635
try:
633636
return self.__pvalue_mann_whitney
634637
except AttributeError:
635638
return npnan
636639

637640
@property
638641
def statistic_mann_whitney(self):
642+
# TODO Missing docstring
639643
try:
640644
return self.__statistic_mann_whitney
641645
except AttributeError:
642646
return npnan
643647

644648
@property
645649
def pvalue_permutation(self):
646-
"""
647-
p value of permutation test
648-
"""
650+
# TODO Missing docstring
649651
return self.__PermutationTest_result.pvalue
650652

651653
@property
@@ -661,10 +663,12 @@ def permutations(self):
661663

662664
@property
663665
def permutations_var(self):
666+
# TODO Missing docstring
664667
return self.__PermutationTest_result.permutations_var
665668

666669
@property
667670
def proportional_difference(self):
671+
# TODO Missing docstring
668672
try:
669673
return self.__proportional_difference
670674
except AttributeError:

dabest/_stats_tools/confint_1group.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,7 @@ def compute_1group_jackknife(x, func, *args, **kwargs):
3838

3939

4040
def compute_1group_acceleration(jack_dist):
41-
"""
42-
Returns the accaleration value based on the jackknife distribution.
43-
"""
41+
# TODO is it needed a function to just call one line?
4442
from . import confint_2group_diff as ci_2g
4543

4644
return ci_2g._calc_accel(jack_dist)

0 commit comments

Comments
 (0)