Skip to content

Commit d5b2884

Browse files
authored
Merge pull request #170 from ACCLAB/revert-169-revert-163-todos
Revert "Revert #163"
2 parents 24ca6b0 + c0e694d commit d5b2884

26 files changed

Lines changed: 711 additions & 173 deletions

CHANGELOG.md

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
# Release notes
2+
3+
<!-- do not remove -->
4+
5+
## 2023.03.29
6+
7+
### New Features
8+
- Add new form of paired proportion plots for a better support of Repeated Measures
9+
10+
11+
## 0.2.3
12+
13+
### Bug Fixes
14+
- Fixes a bug that jammed up when the xvar column was already a pandas Categorical. Now we check for this and act appropriately.

README.md

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,6 @@ contributing](CONTRIBUTING.md), create a new issue using Feature request
166166
template or create a new post in [our Google
167167
Group](https://groups.google.com/g/estimationstats).
168168

169-
170169
## Acknowledgements
171170

172171
We would like to thank alpha testers from the [Claridge-Chang
@@ -179,7 +178,6 @@ Stanislav Ott.
179178
## Testing
180179

181180
To test DABEST, you need to install
182-
183181
[pytest](https://docs.pytest.org/en/latest) and
184182
[nbdev](https://nbdev.fast.ai/).
185183

dabest/_bootstrap_tools.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,15 @@ def __init__(
108108
ttest_single = "NIL"
109109
ttest_2_ind = "NIL"
110110
ttest_2_paired = ttest_rel(x1, x2)[1]
111-
wilcoxonresult = wilcoxon(x1, x2)[1]
111+
112+
try:
113+
wilcoxonresult = wilcoxon(x1, x2)[1]
114+
except ValueError as e:
115+
warnings.warn("Wilcoxon test could not be performed. This might be due "
116+
"to no variability in the difference of the paired groups. \n"
117+
"Error: {}\n"
118+
"For detailed information, please refer to https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.wilcoxon.html "
119+
.format(e))
112120
mannwhitneyresult = "NIL"
113121

114122
# Turns data into array, then tuple.

dabest/_dabest_object.py

Lines changed: 62 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
from scipy.stats import norm
1111
from scipy.stats import randint
1212

13-
1413
# %% ../nbs/API/dabest_object.ipynb 6
1514
class Dabest(object):
1615

@@ -58,6 +57,18 @@ def __init__(
5857
self._check_errors(x, y, idx, experiment, experiment_label, x1_level)
5958

6059

60+
# Check if there is NaN under any of the paired settings
61+
if self.__is_paired and self.__output_data.isnull().values.any():
62+
import warnings
63+
warn1 = f"NaN values detected under paired setting and removed,"
64+
warn2 = f" please check your data."
65+
warnings.warn(warn1 + warn2)
66+
if x is not None and y is not None:
67+
rmname = self.__output_data[self.__output_data[y].isnull()][self.__id_col].tolist()
68+
self.__output_data = self.__output_data[~self.__output_data[self.__id_col].isin(rmname)]
69+
elif x is None and y is None:
70+
self.__output_data.dropna(inplace=True)
71+
6172
# create new x & idx and record the second variable if this is a valid 2x2 ANOVA case
6273
if idx is None and x is not None and y is not None:
6374
# Add a length check for unique values in the first element in list x,
@@ -442,26 +453,47 @@ def _check_errors(self, x, y, idx, experiment, experiment_label, x1_level):
442453
raise ValueError(err0)
443454

444455
# Check if the columns stated are valid
445-
# TODO instead of traversing twice idx you can traverse only once
446-
# and break the loop if the condition is not satisfied?
447-
# TODO What if the type is not str and not tuple,list? missing raise Error
448-
if all([isinstance(i, str) for i in idx]):
449-
if len(pd.unique([t for t in idx]).tolist()) != 2:
456+
# Initialize a flag to track if any element in idx is neither str nor (tuple, list)
457+
valid_types = True
458+
459+
# Initialize variables to track the conditions for str and (tuple, list)
460+
is_str_condition_met, is_tuple_list_condition_met = False, False
461+
462+
# Single traversal for optimization
463+
for item in idx:
464+
if isinstance(item, str):
465+
is_str_condition_met = True
466+
elif isinstance(item, (tuple, list)) and len(item) == 2:
467+
is_tuple_list_condition_met = True
468+
else:
469+
valid_types = False
470+
break # Exit the loop if an invalid type is found
471+
472+
# Check if all types are valid
473+
if not valid_types:
474+
err0 = "`mini_meta` is True, but `idx` ({})".format(idx)
475+
err1 = "does not contain exactly 2 unique columns."
476+
raise ValueError(err0 + err1)
477+
478+
# Handling str type condition
479+
if is_str_condition_met:
480+
if len(pd.unique(idx).tolist()) != 2:
450481
err0 = "`mini_meta` is True, but `idx` ({})".format(idx)
451-
err1 = "does not contain exactly 2 columns."
482+
err1 = "does not contain exactly 2 unique columns."
452483
raise ValueError(err0 + err1)
453484

454-
if all([isinstance(i, (tuple, list)) for i in idx]):
485+
# Handling (tuple, list) type condition
486+
if is_tuple_list_condition_met:
455487
all_idx_lengths = [len(t) for t in idx]
456488
if (array(all_idx_lengths) != 2).any():
457-
err1 = "`mini_meta` is True, but some idx "
458-
err2 = "in {} does not consist only of two groups.".format(idx)
489+
err1 = "`mini_meta` is True, but some elements in idx "
490+
err2 = "in {} do not consist only of two groups.".format(idx)
459491
raise ValueError(err1 + err2)
460492

461-
# TODO can you have True mini_meta and delta2 at the same time?
493+
462494
# Check if this is a 2x2 ANOVA case and x & y are valid columns
463495
# Create experiment_label and x1_level
464-
if self.__delta2:
496+
elif self.__delta2:
465497
if x is None:
466498
error_msg = "If `delta2` is True. `x` parameter cannot be None. String or list expected"
467499
raise ValueError(error_msg)
@@ -534,7 +566,6 @@ def _check_errors(self, x, y, idx, experiment, experiment_label, x1_level):
534566
else:
535567
x1_level = self.__output_data[x[0]].unique()
536568

537-
# TODO what if experiment is None?
538569
elif experiment:
539570
experiment_label = self.__output_data[experiment].unique()
540571
x1_level = self.__output_data[x[0]].unique()
@@ -545,7 +576,16 @@ def _get_plot_data(self, x, y, all_plot_groups):
545576
"""
546577
Function to prepare some attributes for plotting
547578
"""
548-
579+
# Check if there is NaN under any of the paired settings
580+
if self.__is_paired is not None and self.__output_data.isnull().values.any():
581+
print("Nan")
582+
import warnings
583+
warn1 = f"NaN values detected under paired setting and removed,"
584+
warn2 = f" please check your data."
585+
warnings.warn(warn1 + warn2)
586+
rmname = self.__output_data[self.__output_data[y].isnull()][self.__id_col].tolist()
587+
self.__output_data = self.__output_data[~self.__output_data[self.__id_col].isin(rmname)]
588+
549589
# Identify the type of data that was passed in.
550590
if x is not None and y is not None:
551591
# Assume we have a long dataset.
@@ -589,6 +629,13 @@ def _get_plot_data(self, x, y, all_plot_groups):
589629
self.__xvar = "group"
590630
self.__yvar = "value"
591631

632+
# Check if there is NaN under any of the paired settings
633+
if self.__is_paired is not None and self.__output_data.isnull().values.any():
634+
import warnings
635+
warn1 = f"NaN values detected under paired setting and removed,"
636+
warn2 = f" please check your data."
637+
warnings.warn(warn1 + warn2)
638+
592639
# First, check we have all columns in the dataset.
593640
for g in all_plot_groups:
594641
if g not in self.__output_data.columns:
@@ -611,10 +658,7 @@ def _get_plot_data(self, x, y, all_plot_groups):
611658
# Added in v0.2.7.
612659
plot_data.dropna(axis=0, how="any", subset=[self.__yvar], inplace=True)
613660

614-
# TODO these comments should not be in the code but on the release notes of the package version
615-
# Lines 131 to 140 added in v0.2.3.
616-
# Fixes a bug that jammed up when the xvar column was already
617-
# a pandas Categorical. Now we check for this and act appropriately.
661+
618662
if isinstance(plot_data[self.__xvar].dtype, pd.CategoricalDtype):
619663
plot_data[self.__xvar].cat.remove_unused_categories(inplace=True)
620664
plot_data[self.__xvar].cat.reorder_categories(

dabest/_delta_objects.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -555,7 +555,7 @@ def __repr__(self, header=True, sigfig=3):
555555
bs = bs1 + bs2
556556

557557
pval_def1 = "Any p-value reported is the probability of observing the" + \
558-
"effect size (or greater),\nassuming the null hypothesis of" + \
558+
"effect size (or greater),\nassuming the null hypothesis of " + \
559559
"zero difference is true."
560560
pval_def2 = "\nFor each p-value, 5000 reshuffles of the " + \
561561
"control and test labels were performed."

dabest/_effsize_objects.py

Lines changed: 28 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,7 @@ def __repr__(self, show_resample_count=True, define_pval=True, sigfig=3):
219219

220220
pval_def1 = (
221221
"Any p-value reported is the probability of observing the"
222-
+ "effect size (or greater),\nassuming the null hypothesis of"
222+
+ "effect size (or greater),\nassuming the null hypothesis of "
223223
+ "zero difference is true."
224224
)
225225
pval_def2 = (
@@ -299,7 +299,6 @@ def _compute_bca_intervals(self, sorted_bootstraps):
299299
)
300300

301301
else:
302-
# TODO improve error handling, separate file with error messages?
303302
err1 = "The $lim_type limit of the BCa interval cannot be computed."
304303
err2 = "It is set to the effect size itself."
305304
err3 = "All bootstrap values were likely all the same."
@@ -330,9 +329,16 @@ def _perform_statistical_test(self):
330329

331330
if self.__is_paired and not self.__proportional:
332331
# Wilcoxon, a non-parametric version of the paired T-test.
333-
wilcoxon = spstats.wilcoxon(self.__control, self.__test)
334-
self.__pvalue_wilcoxon = wilcoxon.pvalue
335-
self.__statistic_wilcoxon = wilcoxon.statistic
332+
try:
333+
wilcoxon = spstats.wilcoxon(self.__control, self.__test)
334+
self.__pvalue_wilcoxon = wilcoxon.pvalue
335+
self.__statistic_wilcoxon = wilcoxon.statistic
336+
except ValueError as e:
337+
warnings.warn("Wilcoxon test could not be performed. This might be due "
338+
"to no variability in the difference of the paired groups. \n"
339+
"Error: {}\n"
340+
"For detailed information, please refer to https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.wilcoxon.html "
341+
.format(e))
336342

337343
if self.__effect_size != "median_diff":
338344
# Paired Student's t-test.
@@ -357,6 +363,16 @@ def _perform_statistical_test(self):
357363
self.__pvalue_mcnemar = _mcnemar.pvalue
358364
self.__statistic_mcnemar = _mcnemar.statistic
359365

366+
elif self.__proportional:
367+
# The Cohen's h calculation is for binary categorical data
368+
try:
369+
self.__proportional_difference = es.cohens_h(
370+
self.__control, self.__test
371+
)
372+
except ValueError as e:
373+
warnings.warn(f"Calculation of Cohen's h failed. This method is applicable "
374+
f"only for binary data (0's and 1's). Details: {e}")
375+
360376
elif self.__effect_size == "cliffs_delta":
361377
# Let's go with Brunner-Munzel!
362378
brunner_munzel = spstats.brunnermunzel(
@@ -398,23 +414,13 @@ def _perform_statistical_test(self):
398414
)
399415
self.__pvalue_mann_whitney = mann_whitney.pvalue
400416
self.__statistic_mann_whitney = mann_whitney.statistic
401-
except ValueError:
402-
# TODO At least print some warning?
403-
# Occurs when the control and test are exactly identical
404-
# in terms of rank (eg. all zeros.)
405-
pass
417+
except ValueError as e:
418+
warnings.warn("Mann-Whitney test could not be performed. This might be due "
419+
"to identical rank values in both control and test groups. "
420+
"Details: {}".format(e))
406421

407422
standardized_es = es.cohens_d(self.__control, self.__test, is_paired=None)
408423

409-
# The Cohen's h calculation is for binary categorical data
410-
try:
411-
self.__proportional_difference = es.cohens_h(
412-
self.__control, self.__test
413-
)
414-
except ValueError:
415-
# TODO At least print some warning?
416-
# Occur only when the data consists not only 0's and 1's.
417-
pass
418424

419425
def to_dict(self):
420426
"""
@@ -567,87 +573,79 @@ def statistic_mcnemar(self):
567573

568574
@property
569575
def pvalue_paired_students_t(self):
570-
# TODO Missing docstring
571576
try:
572577
return self.__pvalue_paired_students_t
573578
except AttributeError:
574579
return npnan
575580

576581
@property
577582
def statistic_paired_students_t(self):
578-
# TODO Missing docstring
579583
try:
580584
return self.__statistic_paired_students_t
581585
except AttributeError:
582586
return npnan
583587

584588
@property
585589
def pvalue_kruskal(self):
586-
# TODO Missing docstring
587590
try:
588591
return self.__pvalue_kruskal
589592
except AttributeError:
590593
return npnan
591594

592595
@property
593596
def statistic_kruskal(self):
594-
# TODO Missing docstring
595597
try:
596598
return self.__statistic_kruskal
597599
except AttributeError:
598600
return npnan
599601

600602
@property
601603
def pvalue_welch(self):
602-
# TODO Missing docstring
603604
try:
604605
return self.__pvalue_welch
605606
except AttributeError:
606607
return npnan
607608

608609
@property
609610
def statistic_welch(self):
610-
# TODO Missing docstring
611611
try:
612612
return self.__statistic_welch
613613
except AttributeError:
614614
return npnan
615615

616616
@property
617617
def pvalue_students_t(self):
618-
# TODO Missing docstring
619618
try:
620619
return self.__pvalue_students_t
621620
except AttributeError:
622621
return npnan
623622

624623
@property
625624
def statistic_students_t(self):
626-
# TODO Missing docstring
627625
try:
628626
return self.__statistic_students_t
629627
except AttributeError:
630628
return npnan
631629

632630
@property
633631
def pvalue_mann_whitney(self):
634-
# TODO Missing docstring
635632
try:
636633
return self.__pvalue_mann_whitney
637634
except AttributeError:
638635
return npnan
639636

640637
@property
641638
def statistic_mann_whitney(self):
642-
# TODO Missing docstring
643639
try:
644640
return self.__statistic_mann_whitney
645641
except AttributeError:
646642
return npnan
647643

648644
@property
649645
def pvalue_permutation(self):
650-
# TODO Missing docstring
646+
"""
647+
p value of permutation test
648+
"""
651649
return self.__PermutationTest_result.pvalue
652650

653651
@property
@@ -663,12 +661,10 @@ def permutations(self):
663661

664662
@property
665663
def permutations_var(self):
666-
# TODO Missing docstring
667664
return self.__PermutationTest_result.permutations_var
668665

669666
@property
670667
def proportional_difference(self):
671-
# TODO Missing docstring
672668
try:
673669
return self.__proportional_difference
674670
except AttributeError:

dabest/_stats_tools/confint_1group.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,9 @@ def compute_1group_jackknife(x, func, *args, **kwargs):
3838

3939

4040
def compute_1group_acceleration(jack_dist):
41-
# TODO is it needed a function to just call one line?
41+
"""
42+
Returns the accaleration value based on the jackknife distribution.
43+
"""
4244
from . import confint_2group_diff as ci_2g
4345

4446
return ci_2g._calc_accel(jack_dist)

0 commit comments

Comments
 (0)