Skip to content

Commit 9f94dee

Browse files
committed
update docstrings
1 parent 765b3ca commit 9f94dee

2 files changed

Lines changed: 232 additions & 15 deletions

File tree

dabest/_classes.py

Lines changed: 231 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,8 @@ def __init__(self, data, idx, x, y, paired, id_col, ci, resamples,
1414

1515
"""
1616
Parses and stores pandas DataFrames in preparation for estimation
17-
statistics.
17+
statistics. You should not be calling this class directly; instead,
18+
use `dabest.load()` to parse your DataFrame prior to analysis.
1819
"""
1920

2021
# Import standard data science libraries.
@@ -181,26 +182,26 @@ def __init__(self, data, idx, x, y, paired, id_col, ci, resamples,
181182
raise IndexError(err)
182183

183184
EffectSizeDataFrame_kwargs = dict(ci=ci, is_paired=paired,
184-
random_seed=random_seed,
185-
resamples=resamples)
185+
random_seed=random_seed,
186+
resamples=resamples)
186187

187-
self.mean_diff = EffectSizeDataFrame(self, "mean_diff",
188+
self.__mean_diff = EffectSizeDataFrame(self, "mean_diff",
188189
**EffectSizeDataFrame_kwargs)
189190

190-
self.median_diff = EffectSizeDataFrame(self, "median_diff",
191+
self.__median_diff = EffectSizeDataFrame(self, "median_diff",
191192
**EffectSizeDataFrame_kwargs)
192193

193-
self.cohens_d = EffectSizeDataFrame(self, "cohens_d",
194+
self.__cohens_d = EffectSizeDataFrame(self, "cohens_d",
194195
**EffectSizeDataFrame_kwargs)
195196

196-
self.hedges_g = EffectSizeDataFrame(self, "hedges_g",
197+
self.__hedges_g = EffectSizeDataFrame(self, "hedges_g",
197198
**EffectSizeDataFrame_kwargs)
198199

199200
if paired is False:
200-
self.cliffs_delta = EffectSizeDataFrame(self, "cliffs_delta",
201+
self.__cliffs_delta = EffectSizeDataFrame(self, "cliffs_delta",
201202
**EffectSizeDataFrame_kwargs)
202203
else:
203-
self.cliffs_delta = "The data is paired; Cliff's delta is therefore undefined."
204+
self.__cliffs_delta = "The data is paired; Cliff's delta is therefore undefined."
204205

205206

206207
def __repr__(self):
@@ -247,6 +248,205 @@ def __repr__(self):
247248
# @property
248249
# def variable_name(self):
249250
# return self.__variable_name()
251+
252+
@property
253+
def mean_diff(self):
254+
"""
255+
Returns an :py:class:`EffectSizeDataFrame` for the mean difference, its confidence interval, and relevant statistics, for all comparisons as indicated via the `idx` and `paired` argument in `dabest.load()`.
256+
257+
Example
258+
-------
259+
>>> from scipy.stats import norm
260+
>>> import pandas as pd
261+
>>> import dabest
262+
>>> control = norm.rvs(loc=0, size=30, random_state=12345)
263+
>>> test = norm.rvs(loc=0.5, size=30, random_state=12345)
264+
>>> my_df = pd.DataFrame({"control": control,
265+
"test": test})
266+
>>> my_dabest_object = dabest.load(my_df, idx=("control", "test"))
267+
>>> my_dabest_object.mean_diff
268+
269+
Notes
270+
-----
271+
This is simply the mean of the control group subtracted from
272+
the mean of the test group.
273+
274+
.. math::
275+
\\text{Mean difference} = \\overline{x}_{Test} - \\overline{x}_{Control}
276+
277+
where :math:`\\overline{x}` is the mean for the group :math:`x`.
278+
"""
279+
return self.__mean_diff
280+
281+
282+
@property
283+
def median_diff(self):
284+
"""
285+
Returns an :py:class:`EffectSizeDataFrame` for the median difference, its confidence interval, and relevant statistics, for all comparisons as indicated via the `idx` and `paired` argument in `dabest.load()`.
286+
287+
Example
288+
-------
289+
>>> from scipy.stats import norm
290+
>>> import pandas as pd
291+
>>> import dabest
292+
>>> control = norm.rvs(loc=0, size=30, random_state=12345)
293+
>>> test = norm.rvs(loc=0.5, size=30, random_state=12345)
294+
>>> my_df = pd.DataFrame({"control": control,
295+
"test": test})
296+
>>> my_dabest_object = dabest.load(my_df, idx=("control", "test"))
297+
>>> my_dabest_object.median_diff
298+
299+
Notes
300+
-----
301+
This is simply the median of the control group subtracted from
302+
the median of the test group.
303+
304+
.. math::
305+
\\text{Median difference} = \\widetilde{x}_{Test} - \\widetilde{x}_{Control}
306+
307+
where :math:`\\widetilde{x}` is the median for the group :math:`x`.
308+
"""
309+
return self.__median_diff
310+
311+
312+
@property
313+
def cohens_d(self):
314+
"""
315+
Returns an :py:class:`EffectSizeDataFrame` for the standardized mean difference Cohen's `d`, its confidence interval, and relevant statistics, for all comparisons as indicated via the `idx` and `paired` argument in `dabest.load()`.
316+
317+
Example
318+
-------
319+
>>> from scipy.stats import norm
320+
>>> import pandas as pd
321+
>>> import dabest
322+
>>> control = norm.rvs(loc=0, size=30, random_state=12345)
323+
>>> test = norm.rvs(loc=0.5, size=30, random_state=12345)
324+
>>> my_df = pd.DataFrame({"control": control,
325+
"test": test})
326+
>>> my_dabest_object = dabest.load(my_df, idx=("control", "test"))
327+
>>> my_dabest_object.cohens_d
328+
329+
Notes
330+
-----
331+
Cohen's `d` is simply the mean of the control group subtracted from
332+
the mean of the test group.
333+
334+
If the comparison(s) are unpaired, Cohen's `d` is computed with the following equation:
335+
336+
.. math::
337+
338+
d = \\frac{\\overline{x}_{Test} - \\overline{x}_{Control}} {\\text{pooled standard deviation}}
339+
340+
341+
For paired comparisons, Cohen's d is given by
342+
343+
.. math::
344+
d = \\frac{\\overline{x}_{Test} - \\overline{x}_{Control}} {\\text{average standard deviation}}
345+
346+
where :math:`\\overline{x}` is the mean of the respective group of observations, :math:`{Var}_{x}` denotes the variance of that group,
347+
348+
.. math::
349+
350+
\\text{pooled standard deviation} = \\sqrt{ \\frac{(n_{control} - 1) * {Var}_{control} + (n_{test} - 1) * {Var}_{test} } {n_{control} + n_{test} - 2} }
351+
352+
and
353+
354+
.. math::
355+
356+
\\text{average standard deviation} = \\sqrt{ \\frac{{Var}_{control} + {Var}_{test}} {2}}
357+
358+
The sample variance (and standard deviation) uses N-1 degrees of freedoms.
359+
This is an application of `Bessel's correction <https://en.wikipedia.org/wiki/Bessel%27s_correction>`_, and yields the unbiased
360+
sample variance.
361+
362+
References:
363+
https://en.wikipedia.org/wiki/Effect_size#Cohen's_d
364+
https://en.wikipedia.org/wiki/Bessel%27s_correction
365+
https://en.wikipedia.org/wiki/Standard_deviation#Corrected_sample_standard_deviation
366+
"""
367+
return self.__cohens_d
368+
369+
370+
@property
371+
def hedges_g(self):
372+
"""
373+
Returns an :py:class:`EffectSizeDataFrame` for the standardized mean difference Hedges' `g`, its confidence interval, and relevant statistics, for all comparisons as indicated via the `idx` and `paired` argument in `dabest.load()`.
374+
375+
376+
Example
377+
-------
378+
>>> from scipy.stats import norm
379+
>>> import pandas as pd
380+
>>> import dabest
381+
>>> control = norm.rvs(loc=0, size=30, random_state=12345)
382+
>>> test = norm.rvs(loc=0.5, size=30, random_state=12345)
383+
>>> my_df = pd.DataFrame({"control": control,
384+
"test": test})
385+
>>> my_dabest_object = dabest.load(my_df, idx=("control", "test"))
386+
>>> my_dabest_object.hedges_g
387+
388+
Notes
389+
-----
390+
391+
Hedges' `g` is :py:attr:`cohens_d` corrected for bias via multiplication with the following correction factor:
392+
393+
.. math::
394+
\\frac{ \\Gamma( \\frac{a} {2} )} {\\sqrt{ \\frac{a} {2} } \\times \\Gamma( \\frac{a - 1} {2} )}
395+
396+
where
397+
398+
.. math::
399+
a = {n}_{control} + {n}_{test} - 2
400+
401+
and :math:`\\Gamma(x)` is the `Gamma function <https://en.wikipedia.org/wiki/Gamma_function>`_.
402+
403+
404+
405+
References:
406+
https://en.wikipedia.org/wiki/Effect_size#Hedges'_g
407+
https://journals.sagepub.com/doi/10.3102/10769986006002107
408+
"""
409+
return self.__hedges_g
410+
411+
412+
@property
413+
def cliffs_delta(self):
414+
"""
415+
Returns an :py:class:`EffectSizeDataFrame` for Cliff's delta, its confidence interval, and relevant statistics, for all comparisons as indicated via the `idx` and `paired` argument in `dabest.load()`.
416+
417+
418+
Example
419+
-------
420+
>>> from scipy.stats import norm
421+
>>> import pandas as pd
422+
>>> import dabest
423+
>>> control = norm.rvs(loc=0, size=30, random_state=12345)
424+
>>> test = norm.rvs(loc=0.5, size=30, random_state=12345)
425+
>>> my_df = pd.DataFrame({"control": control,
426+
"test": test})
427+
>>> my_dabest_object = dabest.load(my_df, idx=("control", "test"))
428+
>>> my_dabest_object.cliffs_delta
429+
430+
431+
Notes
432+
-----
433+
434+
Cliff's delta is a measure of ordinal dominance, ie. how often the values from the test sample are larger than values from the control sample.
435+
436+
.. math::
437+
\\text{Cliff's delta} = \\frac{\\#({x}_{test} > {x}_{control}) - \\#({x}_{test} < {x}_{control})} {{n}_{Test} \\times {n}_{Control}}
438+
439+
440+
where :math:`\\#` denotes the number of times a value from the test sample exceeds (or is lesser than) values in the control sample.
441+
442+
Cliff's delta ranges from -1 to 1; it can also be thought of as a measure of the degree of overlap between the two samples. An attractive aspect of this effect size is that it does not make an assumptions about the underlying distributions that the samples were drawn from.
443+
444+
References:
445+
https://en.wikipedia.org/wiki/Effect_size#Effect_size_for_ordinal_data
446+
https://psycnet.apa.org/record/1994-08169-001
447+
"""
448+
return self.__cliffs_delta
449+
250450

251451

252452
@property
@@ -273,7 +473,7 @@ def is_paired(self):
273473
@property
274474
def id_col(self):
275475
"""
276-
Returns the ic column declared to `dabest.load()`.
476+
Returns the id column declared to `dabest.load()`.
277477
"""
278478
return self.__id_col
279479

@@ -1294,7 +1494,6 @@ def __calc_lqrt(self):
12941494
self.__lqrt_results = pd.DataFrame(out)
12951495

12961496

1297-
12981497
def plot(self, color_col=None,
12991498

13001499
raw_marker_size=6, es_marker_size=9,
@@ -1582,7 +1781,11 @@ def dabest_obj(self):
15821781

15831782
@property
15841783
def lqrt(self):
1585-
"""Returns all pairwise Lq-Likelihood Ratio Type test results nicely."""
1784+
"""Returns all pairwise Lq-Likelihood Ratio Type test results
1785+
as a pandas DataFrame.
1786+
1787+
For more information on LqRT tests, see https://arxiv.org/abs/1911.11922
1788+
"""
15861789
try:
15871790
return self.__lqrt_results
15881791
except AttributeError:
@@ -1622,6 +1825,21 @@ class PermutationTest:
16221825
16231826
effect_size : string
16241827
The type of effect size reported.
1828+
1829+
1830+
Notes
1831+
-----
1832+
The basic concept of permutation tests is the same as that behind bootstrapping.
1833+
In an "exact" permutation test, all possible resuffles of the control and test
1834+
labels are performed, and the proportion of effect sizes that equal or exceed
1835+
the observed effect size is computed. This is the probability, under the null
1836+
hypothesis of zero difference between test and control groups, of observing the
1837+
effect size: the p-value of the Student's t-test.
1838+
1839+
Exact permutation tests are impractical: computing the effect sizes for all reshuffles quickly exceeds trivial computational loads. A control group and a test group both with 10 observations each would have a total of :math:`20!` or :math:`2.43 \\times {10}^{18}` reshuffles.
1840+
Therefore, in practice, "approximate" permutation tests are performed, where a sufficient number of reshuffles are performed (5,000 or 10,000), from which the p-value is computed.
1841+
1842+
More information can be found `here <https://en.wikipedia.org/wiki/Resampling_(statistics)#Permutation_tests>`_.
16251843
16261844
16271845
Example
@@ -1635,7 +1853,6 @@ class PermutationTest:
16351853
... effect_size="mean_diff",
16361854
... is_paired=False)
16371855
>>> perm_test
1638-
16391856
5000 permutations were taken. The pvalue is 0.0758.
16401857
"""
16411858

@@ -1707,7 +1924,7 @@ def __init__(self, control, test,
17071924

17081925

17091926
def __repr__(self):
1710-
return("{} permutations were taken. The pvalue is {}.".format(self.permutation_count,
1927+
return("{} permutations were taken. The p-value is {}.".format(self.permutation_count,
17111928
self.pvalue))
17121929

17131930

dabest/_stats_tools/effsize.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ def two_group_difference(control, test, is_paired=False,
6060
J(n) = ------------------------------
6161
sqrt(n/2) * gamma((n - 1) / 2)
6262
63-
where n = (n1 + n2 -2).
63+
where n = (n1 + n2 - 2).
6464
6565
median_diff: This is the median of `control` subtracted from the
6666
median of `test`.

0 commit comments

Comments
 (0)