Skip to content

Commit 011d017

Browse files
committed
First time trying nbdev, modify _classes.py
1 parent 0e21f5f commit 011d017

2 files changed

Lines changed: 430 additions & 10 deletions

File tree

dabest/_classes.py

Lines changed: 215 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -134,18 +134,21 @@ def __init__(self, data, idx, x, y, paired, id_col, ci,
134134

135135
else:
136136
x1_level = data_in[x[0]].unique()
137+
elif experiment is not None:
138+
experiment_label = data_in[experiment].unique()
139+
x1_level = data_in[x[0]].unique()
137140
self.__experiment_label = experiment_label
138141
self.__x1_level = x1_level
139142

140143

141-
# Check if idx is specified
142-
if delta2 is False and not idx:
143-
err = '`idx` is not a column in `data`. Please check.'
144-
raise IndexError(err)
144+
# # Check if idx is specified
145+
# if delta2 is False and not idx:
146+
# err = '`idx` is not a column in `data`. Please check.'
147+
# raise IndexError(err)
145148

146149

147150
# create new x & idx and record the second variable if this is a valid 2x2 ANOVA case
148-
if delta2 is True:
151+
if idx is None and x is not None and y is not None:
149152
# add a new column which is a combination of experiment and the first variable
150153
new_col_name = experiment+x[0]
151154
while new_col_name in data_in.columns:
@@ -436,6 +439,28 @@ def __repr__(self):
436439
def mean_diff(self):
437440
"""
438441
Returns an :py:class:`EffectSizeDataFrame` for the mean difference, its confidence interval, and relevant statistics, for all comparisons as indicated via the `idx` and `paired` argument in `dabest.load()`.
442+
443+
Example
444+
-------
445+
>>> from scipy.stats import norm
446+
>>> import pandas as pd
447+
>>> import dabest
448+
>>> control = norm.rvs(loc=0, size=30, random_state=12345)
449+
>>> test = norm.rvs(loc=0.5, size=30, random_state=12345)
450+
>>> my_df = pd.DataFrame({"control": control,
451+
"test": test})
452+
>>> my_dabest_object = dabest.load(my_df, idx=("control", "test"))
453+
>>> my_dabest_object.mean_diff
454+
455+
Notes
456+
-----
457+
This is simply the mean of the control group subtracted from
458+
the mean of the test group.
459+
460+
.. math::
461+
\\text{Mean difference} = \\overline{x}_{Test} - \\overline{x}_{Control}
462+
463+
where :math:`\\overline{x}` is the mean for the group :math:`x`.
439464
440465
"""
441466
return self.__mean_diff
@@ -447,6 +472,44 @@ def median_diff(self):
447472
Returns an :py:class:`EffectSizeDataFrame` for the median difference, its confidence interval, and relevant statistics, for all comparisons as indicated via the `idx` and `paired` argument in `dabest.load()`.
448473
449474
475+
Example
476+
-------
477+
>>> from scipy.stats import norm
478+
>>> import pandas as pd
479+
>>> import dabest
480+
>>> control = norm.rvs(loc=0, size=30, random_state=12345)
481+
>>> test = norm.rvs(loc=0.5, size=30, random_state=12345)
482+
>>> my_df = pd.DataFrame({"control": control,
483+
"test": test})
484+
>>> my_dabest_object = dabest.load(my_df, idx=("control", "test"))
485+
>>> my_dabest_object.median_diff
486+
487+
488+
Notes
489+
-----
490+
This is the median difference between the control group and the test group.
491+
492+
If the comparison(s) are unpaired, median_diff is computed with the following equation:
493+
494+
.. math::
495+
\\text{Median difference} = \\widetilde{x}_{Test} - \\widetilde{x}_{Control}
496+
497+
where :math:`\\widetilde{x}` is the median for the group :math:`x`.
498+
499+
If the comparison(s) are paired, median_diff is computed with the following equation:
500+
501+
.. math::
502+
\\text{Median difference} = \\widetilde{x}_{Test - Control}
503+
504+
505+
Things to note
506+
--------------
507+
Using median difference as the statistic in bootstrapping may result in a biased estimate and cause problems with BCa confidence intervals. Consider using mean difference instead.
508+
509+
When plotting, consider using percentile confidence intervals instead of BCa confidence intervals by specifying `ci_type = 'percentile'` in .plot().
510+
511+
For detailed information, please refer to `Issue 129 <https://github.com/ACCLAB/DABEST-python/issues/129>`_.
512+
450513
"""
451514
return self.__median_diff
452515

@@ -456,6 +519,59 @@ def cohens_d(self):
456519
"""
457520
Returns an :py:class:`EffectSizeDataFrame` for the standardized mean difference Cohen's `d`, its confidence interval, and relevant statistics, for all comparisons as indicated via the `idx` and `paired` argument in `dabest.load()`.
458521
522+
Example
523+
-------
524+
>>> from scipy.stats import norm
525+
>>> import pandas as pd
526+
>>> import dabest
527+
>>> control = norm.rvs(loc=0, size=30, random_state=12345)
528+
>>> test = norm.rvs(loc=0.5, size=30, random_state=12345)
529+
>>> my_df = pd.DataFrame({"control": control,
530+
"test": test})
531+
>>> my_dabest_object = dabest.load(my_df, idx=("control", "test"))
532+
>>> my_dabest_object.cohens_d
533+
534+
Notes
535+
-----
536+
Cohen's `d` is simply the mean of the control group subtracted from
537+
the mean of the test group.
538+
539+
If `paired` is None, then the comparison(s) are unpaired;
540+
otherwise the comparison(s) are paired.
541+
542+
If the comparison(s) are unpaired, Cohen's `d` is computed with the following equation:
543+
544+
.. math::
545+
546+
d = \\frac{\\overline{x}_{Test} - \\overline{x}_{Control}} {\\text{pooled standard deviation}}
547+
548+
549+
For paired comparisons, Cohen's d is given by
550+
551+
.. math::
552+
d = \\frac{\\overline{x}_{Test} - \\overline{x}_{Control}} {\\text{average standard deviation}}
553+
554+
where :math:`\\overline{x}` is the mean of the respective group of observations, :math:`{Var}_{x}` denotes the variance of that group,
555+
556+
.. math::
557+
558+
\\text{pooled standard deviation} = \\sqrt{ \\frac{(n_{control} - 1) * {Var}_{control} + (n_{test} - 1) * {Var}_{test} } {n_{control} + n_{test} - 2} }
559+
560+
and
561+
562+
.. math::
563+
564+
\\text{average standard deviation} = \\sqrt{ \\frac{{Var}_{control} + {Var}_{test}} {2}}
565+
566+
The sample variance (and standard deviation) uses N-1 degrees of freedoms.
567+
This is an application of `Bessel's correction <https://en.wikipedia.org/wiki/Bessel%27s_correction>`_, and yields the unbiased
568+
sample variance.
569+
570+
References:
571+
https://en.wikipedia.org/wiki/Effect_size#Cohen's_d
572+
https://en.wikipedia.org/wiki/Bessel%27s_correction
573+
https://en.wikipedia.org/wiki/Standard_deviation#Corrected_sample_standard_deviation
574+
459575
"""
460576
return self.__cohens_d
461577

@@ -465,6 +581,36 @@ def cohens_h(self):
465581
"""
466582
Returns an :py:class:`EffectSizeDataFrame` for the standardized mean difference Cohen's `h`, its confidence interval, and relevant statistics, for all comparisons as indicated via the `idx` and `directional` argument in `dabest.load()`.
467583
584+
Example
585+
-------
586+
>>> from scipy.stats import randint
587+
>>> import pandas as pd
588+
>>> import dabest
589+
>>> control = randint.rvs(0, 2, size=30, random_state=12345)
590+
>>> test = randint.rvs(0, 2, size=30, random_state=12345)
591+
>>> my_df = pd.DataFrame({"control": control,
592+
"test": test})
593+
>>> my_dabest_object = dabest.load(my_df, idx=("control", "test")
594+
>>> my_dabest_object.cohens_h
595+
596+
Notes
597+
-----
598+
Cohen's *h* uses the information of proportion in the control and test groups to calculate the distance between two proportions.
599+
It can be used to describe the difference between two proportions as "small", "medium", or "large".
600+
It can be used to determine if the difference between two proportions is "meaningful".
601+
602+
A directional Cohen's *h* is computed with the following equation:
603+
604+
.. math::
605+
h = 2 * \\arcsin{\\sqrt{proportion_{Test}}} - 2 * \\arcsin{\\sqrt{proportion_{Control}}}
606+
607+
For a non-directional Cohen's *h*, the equation is:
608+
609+
.. math::
610+
h = |2 * \\arcsin{\\sqrt{proportion_{Test}}} - 2 * \\arcsin{\\sqrt{proportion_{Control}}}|
611+
612+
References:
613+
https://en.wikipedia.org/wiki/Cohen%27s_h
468614
469615
"""
470616
return self.__cohens_h
@@ -475,6 +621,39 @@ def hedges_g(self):
475621
"""
476622
Returns an :py:class:`EffectSizeDataFrame` for the standardized mean difference Hedges' `g`, its confidence interval, and relevant statistics, for all comparisons as indicated via the `idx` and `paired` argument in `dabest.load()`.
477623
624+
625+
Example
626+
-------
627+
>>> from scipy.stats import norm
628+
>>> import pandas as pd
629+
>>> import dabest
630+
>>> control = norm.rvs(loc=0, size=30, random_state=12345)
631+
>>> test = norm.rvs(loc=0.5, size=30, random_state=12345)
632+
>>> my_df = pd.DataFrame({"control": control,
633+
"test": test})
634+
>>> my_dabest_object = dabest.load(my_df, idx=("control", "test"))
635+
>>> my_dabest_object.hedges_g
636+
637+
Notes
638+
-----
639+
640+
Hedges' `g` is :py:attr:`cohens_d` corrected for bias via multiplication with the following correction factor:
641+
642+
.. math::
643+
\\frac{ \\Gamma( \\frac{a} {2} )} {\\sqrt{ \\frac{a} {2} } \\times \\Gamma( \\frac{a - 1} {2} )}
644+
645+
where
646+
647+
.. math::
648+
a = {n}_{control} + {n}_{test} - 2
649+
650+
and :math:`\\Gamma(x)` is the `Gamma function <https://en.wikipedia.org/wiki/Gamma_function>`_.
651+
652+
653+
654+
References:
655+
https://en.wikipedia.org/wiki/Effect_size#Hedges'_g
656+
https://journals.sagepub.com/doi/10.3102/10769986006002107
478657
479658
"""
480659
return self.__hedges_g
@@ -484,6 +663,37 @@ def hedges_g(self):
484663
def cliffs_delta(self):
485664
"""
486665
Returns an :py:class:`EffectSizeDataFrame` for Cliff's delta, its confidence interval, and relevant statistics, for all comparisons as indicated via the `idx` and `paired` argument in `dabest.load()`.
666+
667+
668+
Example
669+
-------
670+
>>> from scipy.stats import norm
671+
>>> import pandas as pd
672+
>>> import dabest
673+
>>> control = norm.rvs(loc=0, size=30, random_state=12345)
674+
>>> test = norm.rvs(loc=0.5, size=30, random_state=12345)
675+
>>> my_df = pd.DataFrame({"control": control,
676+
"test": test})
677+
>>> my_dabest_object = dabest.load(my_df, idx=("control", "test"))
678+
>>> my_dabest_object.cliffs_delta
679+
680+
681+
Notes
682+
-----
683+
684+
Cliff's delta is a measure of ordinal dominance, ie. how often the values from the test sample are larger than values from the control sample.
685+
686+
.. math::
687+
\\text{Cliff's delta} = \\frac{\\#({x}_{test} > {x}_{control}) - \\#({x}_{test} < {x}_{control})} {{n}_{Test} \\times {n}_{Control}}
688+
689+
690+
where :math:`\\#` denotes the number of times a value from the test sample exceeds (or is lesser than) values in the control sample.
691+
692+
Cliff's delta ranges from -1 to 1; it can also be thought of as a measure of the degree of overlap between the two samples. An attractive aspect of this effect size is that it does not make an assumptions about the underlying distributions that the samples were drawn from.
693+
694+
References:
695+
https://en.wikipedia.org/wiki/Effect_size#Effect_size_for_ordinal_data
696+
https://psycnet.apa.org/record/1994-08169-001
487697
488698
"""
489699
return self.__cliffs_delta

0 commit comments

Comments
 (0)