Skip to content

Commit f3ffe58

Browse files
authored
Merge pull request #137 from Jacobluke-/0.4dev
Documentation, bug fixes and code readability
2 parents e386f95 + dd7691e commit f3ffe58

17 files changed

Lines changed: 222 additions & 488 deletions

dabest/_api.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,12 @@ def load(data, idx=None, x=None, y=None, paired=None, id_col=None,
4646
bootstrap resampling, ensuring that the confidence intervals
4747
reported are replicable.
4848
proportional : boolean, default False.
49-
TO INCLUDE MORE DESCRIPTION ABOUT DATA FORMAT
49+
An indicator of whether the data is binary or not. When set to True, it
50+
specifies that the data consists of binary data, where the values are
51+
limited to 0 and 1. The code is not suitable for analyzing proportion
52+
data that contains non-numeric values, such as strings like ‘yes’ and ‘no’.
53+
When False or not provided, the algorithm assumes that
54+
the data is continuous and uses a non-proportional representation.
5055
delta2 : boolean, default False
5156
Indicator of delta-delta experiment
5257
experiment : String, default None

dabest/_classes.py

Lines changed: 25 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -451,6 +451,7 @@ def mean_diff(self):
451451
\\text{Mean difference} = \\overline{x}_{Test} - \\overline{x}_{Control}
452452
453453
where :math:`\\overline{x}` is the mean for the group :math:`x`.
454+
454455
"""
455456
return self.__mean_diff
456457

@@ -459,7 +460,8 @@ def mean_diff(self):
459460
def median_diff(self):
460461
"""
461462
Returns an :py:class:`EffectSizeDataFrame` for the median difference, its confidence interval, and relevant statistics, for all comparisons as indicated via the `idx` and `paired` argument in `dabest.load()`.
462-
463+
464+
463465
Example
464466
-------
465467
>>> from scipy.stats import norm
@@ -471,7 +473,8 @@ def median_diff(self):
471473
"test": test})
472474
>>> my_dabest_object = dabest.load(my_df, idx=("control", "test"))
473475
>>> my_dabest_object.median_diff
474-
476+
477+
475478
Notes
476479
-----
477480
This is the median difference between the control group and the test group.
@@ -487,6 +490,15 @@ def median_diff(self):
487490
488491
.. math::
489492
\\text{Median difference} = \\widetilde{x}_{Test - Control}
493+
494+
495+
Things to note
496+
--------------
497+
Using median difference as the statistic in bootstrapping may result in a biased estimate and cause problems with BCa confidence intervals. Consider using mean difference instead.
498+
499+
When plotting, consider using percentile confidence intervals instead of BCa confidence intervals by specifying `ci_type = 'percentile'` in .plot().
500+
501+
For detailed information, please refer to `Issue 129 <https://github.com/ACCLAB/DABEST-python/issues/129>`_.
490502
491503
"""
492504
return self.__median_diff
@@ -549,6 +561,7 @@ def cohens_d(self):
549561
https://en.wikipedia.org/wiki/Effect_size#Cohen's_d
550562
https://en.wikipedia.org/wiki/Bessel%27s_correction
551563
https://en.wikipedia.org/wiki/Standard_deviation#Corrected_sample_standard_deviation
564+
552565
"""
553566
return self.__cohens_d
554567

@@ -588,6 +601,7 @@ def cohens_h(self):
588601
589602
References:
590603
https://en.wikipedia.org/wiki/Cohen%27s_h
604+
591605
"""
592606
return self.__cohens_h
593607

@@ -630,6 +644,7 @@ def hedges_g(self):
630644
References:
631645
https://en.wikipedia.org/wiki/Effect_size#Hedges'_g
632646
https://journals.sagepub.com/doi/10.3102/10769986006002107
647+
633648
"""
634649
return self.__hedges_g
635650

@@ -669,6 +684,7 @@ def cliffs_delta(self):
669684
References:
670685
https://en.wikipedia.org/wiki/Effect_size#Effect_size_for_ordinal_data
671686
https://psycnet.apa.org/record/1994-08169-001
687+
672688
"""
673689
return self.__cliffs_delta
674690

@@ -863,15 +879,16 @@ class DeltaDelta(object):
863879
864880
\\Delta_{1} = \\overline{X}_{A_{2}, B_{1}} - \\overline{X}_{A_{1}, B_{1}}
865881
866-
\\Delta_{2} = \\overline{X}_{A_{2}, B_{2}} - \\overline{X}_{A_{1}, B_{2}}
882+
\\Delta_{2} = \\overline{X}_{A_{2}, B_{2}} - \\overline{X}_{A_{1}, B_{2}}
883+
867884
868885
where :math:`\overline{X}_{A_{i}, B_{j}}` is the mean of the sample with A = i and B = j, :math:`\\Delta` is the mean difference between two samples.
869886
870887
A delta-delta value is then calculated as the mean difference between the two primary deltas:
871888
872889
.. math::
873890
874-
\\Delta_{\\Delta} = \\Delta_{B_{2}} - \\Delta_{B_{1}}
891+
\\Delta_{\\Delta} = \\Delta_{2} - \\Delta_{1}
875892
876893
and:
877894
@@ -887,6 +904,7 @@ class DeltaDelta(object):
887904
-------
888905
>>> import numpy as np
889906
>>> import pandas as pd
907+
>>> import dabest
890908
>>> from scipy.stats import norm # Used in generation of populations.
891909
>>> np.random.seed(9999) # Fix the seed so the results are replicable.
892910
>>> from scipy.stats import norm # Used in generation of populations.
@@ -1298,17 +1316,17 @@ class MiniMetaDelta(object):
12981316
>>> from scipy.stats import norm
12991317
>>> import pandas as pd
13001318
>>> import dabest
1319+
>>> Ns = 20
13011320
>>> c1 = norm.rvs(loc=3, scale=0.4, size=Ns)
13021321
>>> c2 = norm.rvs(loc=3.5, scale=0.75, size=Ns)
13031322
>>> c3 = norm.rvs(loc=3.25, scale=0.4, size=Ns)
1304-
13051323
>>> t1 = norm.rvs(loc=3.5, scale=0.5, size=Ns)
13061324
>>> t2 = norm.rvs(loc=2.5, scale=0.6, size=Ns)
13071325
>>> t3 = norm.rvs(loc=3, scale=0.75, size=Ns)
13081326
>>> my_df = pd.DataFrame({'Control 1' : c1, 'Test 1' : t1,
13091327
'Control 2' : c2, 'Test 2' : t2,
13101328
'Control 3' : c3, 'Test 3' : t3})
1311-
>>> my_dabest_object = dabest.load(df, idx=(("Control 1", "Test 1"), ("Control 2", "Test 2"), ("Control 3", "Test 3")), mini_meta=True)
1329+
>>> my_dabest_object = dabest.load(my_df, idx=(("Control 1", "Test 1"), ("Control 2", "Test 2"), ("Control 3", "Test 3")), mini_meta=True)
13121330
>>> my_dabest_object.mean_diff.mini_meta_delta
13131331
13141332
Notes
@@ -2717,12 +2735,6 @@ def __pre_calc(self):
27172735

27182736
reprs.append(text_repr)
27192737

2720-
varname = get_varname(self.__dabest_obj)
2721-
lastline = "To get the results of all valid statistical tests, " +\
2722-
"use `{}.{}.statistical_tests`".format(varname, self.__effect_size)
2723-
reprs.append(lastline)
2724-
2725-
reprs.insert(0, print_greeting())
27262738

27272739
self.__for_print = "\n\n".join(reprs)
27282740

@@ -3455,4 +3467,4 @@ def permutations_var(self):
34553467
"""
34563468
The experiment group variance of all the permutations in a list.
34573469
"""
3458-
return self.__permutations_var
3470+
return self.__permutations_var

dabest/_stats_tools/effsize.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -78,12 +78,12 @@ def two_group_difference(control, test, is_paired=None,
7878
return func_difference(control, test, np.mean, is_paired)
7979

8080
elif effect_size == "median_diff":
81-
mes1 = "Using median as the statistic in bootstrapping may \
82-
result in a biased estimate and cause problems with \
83-
BCa confidence intervals. Consider using a different statistic, such as the mean.\n"
84-
mes2 = "When plotting, please consider using percetile confidence intervals\
85-
by specifying `ci_type='percentile'`. For detailed information, \
86-
refer to https://github.com/ACCLAB/DABEST-python/issues/129"
81+
mes1 = "Using median as the statistic in bootstrapping may " + \
82+
"result in a biased estimate and cause problems with " + \
83+
"BCa confidence intervals. Consider using a different statistic, such as the mean.\n"
84+
mes2 = "When plotting, please consider using percetile confidence intervals " + \
85+
"by specifying `ci_type='percentile'`. For detailed information, " + \
86+
"refer to https://github.com/ACCLAB/DABEST-python/issues/129 \n"
8787
warnings.warn(message=mes1+mes2, category=UserWarning)
8888
return func_difference(control, test, np.median, is_paired)
8989

0 commit comments

Comments
 (0)