Skip to content

Commit 1484206

Browse files
committed
Amendments on paired and baseline plot
- change all the `repeated_measures` to `paired`\`is_paired` - change the baseline plot to multi-paired version - update the demo of median_diff
1 parent 53446b7 commit 1484206

8 files changed

Lines changed: 213 additions & 201 deletions

File tree

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,3 +123,5 @@ fontList-v300.json
123123
tex.cache/
124124
.Rproj.user
125125
testtt.py
126+
real.py
127+
0to2_beforeduringafter.csv

dabest/_api.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
# Email : joseshowh@gmail.com
55

66

7-
def load(data, idx, x=None, y=None, repeated_measures = None, id_col=None,
7+
def load(data, idx, x=None, y=None, paired = None, id_col=None,
88
ci=95, resamples=5000, random_seed=12345):
99
'''
1010
Loads data in preparation for estimation statistics.
@@ -21,7 +21,7 @@ def load(data, idx, x=None, y=None, repeated_measures = None, id_col=None,
2121
x : string, default None
2222
y : string, default None
2323
Column names for data to be plotted on the x-axis and y-axis.
24-
repeated_measures : string, default None
24+
paired : string, default None
2525
The type of the experiment under which the data are obtained
2626
id_col : default None.
2727
Required if `paired` is True.
@@ -63,4 +63,4 @@ def load(data, idx, x=None, y=None, repeated_measures = None, id_col=None,
6363
'''
6464
from ._classes import Dabest
6565

66-
return Dabest(data, idx, x, y, repeated_measures, id_col, ci, resamples, random_seed)
66+
return Dabest(data, idx, x, y, paired, id_col, ci, resamples, random_seed)

dabest/_bootstrap_tools.py

Lines changed: 17 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -9,111 +9,87 @@
99

1010
class bootstrap:
1111
'''Computes the summary statistic and a bootstrapped confidence interval.
12-
1312
Keywords:
1413
x1, x2: array-like
1514
The data in a one-dimensional array form. Only x1 is required.
1615
If x2 is given, the bootstrapped summary difference between
1716
the two groups (x2-x1) is computed.
1817
NaNs are automatically discarded.
19-
20-
repeated_measures: string, default None
21-
The type of the experiment design.
22-
18+
paired: boolean, default False
19+
Whether or not x1 and x2 are paired samples.
2320
statfunction: callable, default np.mean
2421
The summary statistic called on data.
25-
2622
smoothboot: boolean, default False
2723
Taken from seaborn.algorithms.bootstrap.
2824
If True, performs a smoothed bootstrap (draws samples from a kernel
2925
destiny estimate).
30-
3126
alpha: float, default 0.05
3227
Denotes the likelihood that the confidence interval produced
3328
does not include the true summary statistic. When alpha = 0.05,
3429
a 95% confidence interval is produced.
35-
3630
reps: int, default 5000
3731
Number of bootstrap iterations to perform.
38-
3932
Returns:
4033
An `bootstrap` object reporting the summary statistics, percentile CIs,
4134
bias-corrected and accelerated (BCa) CIs, and the settings used.
42-
4335
summary: float
4436
The summary statistic.
45-
4637
is_difference: boolean
4738
Whether or not the summary is the difference between two groups.
4839
If False, only x1 was supplied.
49-
50-
repeated_measures: string
51-
The type of experiment design.
52-
40+
is_paired: boolean
41+
Whether or not the difference reported is between 2 paired groups.
5342
statistic: callable
5443
The function used to compute the summary.
55-
5644
reps: int
5745
The number of bootstrap iterations performed.
58-
5946
stat_array: array.
6047
A sorted array of values obtained by bootstrapping the input arrays.
61-
6248
ci: float
6349
The size of the confidence interval reported (in percentage).
64-
6550
pct_ci_low, pct_ci_high: floats
6651
The upper and lower bounds of the confidence interval as computed
6752
by taking the percentage bounds.
68-
6953
pct_low_high_indices: array
7054
An array with the indices in `stat_array` corresponding to the
7155
percentage confidence interval bounds.
72-
7356
bca_ci_low, bca_ci_high: floats
7457
The upper and lower bounds of the bias-corrected and accelerated
7558
(BCa) confidence interval. See Efron 1977.
76-
7759
bca_low_high_indices: array
7860
An array with the indices in `stat_array` corresponding to the BCa
7961
confidence interval bounds.
80-
8162
pvalue_1samp_ttest: float
8263
P-value obtained from scipy.stats.ttest_1samp. If 2 arrays were
8364
passed (x1 and x2), returns 'NIL'.
8465
See https://docs.scipy.org/doc/scipy-1.0.0/reference/generated/scipy.stats.ttest_1samp.html
85-
8666
pvalue_2samp_ind_ttest: float
8767
P-value obtained from scipy.stats.ttest_ind.
88-
If a single array was given (x1 only), or if `repeated_measures` is not None,
68+
If a single array was given (x1 only), or if `paired` is True,
8969
returns 'NIL'.
9070
See https://docs.scipy.org/doc/scipy-1.0.0/reference/generated/scipy.stats.ttest_ind.html
91-
9271
pvalue_2samp_related_ttest: float
9372
P-value obtained from scipy.stats.ttest_rel.
94-
If a single array was given (x1 only), or if `repeated_measures` is None,
73+
If a single array was given (x1 only), or if `paired` is False,
9574
returns 'NIL'.
9675
See https://docs.scipy.org/doc/scipy-1.0.0/reference/generated/scipy.stats.ttest_rel.html
97-
9876
pvalue_wilcoxon: float
9977
P-value obtained from scipy.stats.wilcoxon.
100-
If a single array was given (x1 only), or if `repeated_measures` is None,
78+
If a single array was given (x1 only), or if `paired` is False,
10179
returns 'NIL'.
10280
The Wilcoxons signed-rank test is a nonparametric paired test of
10381
the null hypothesis that the related samples x1 and x2 are from
10482
the same distribution.
10583
See https://docs.scipy.org/doc/scipy-1.0.0/reference/scipy.stats.wilcoxon.html
106-
10784
pvalue_mann_whitney: float
10885
Two-sided p-value obtained from scipy.stats.mannwhitneyu.
10986
If a single array was given (x1 only), returns 'NIL'.
11087
The Mann-Whitney U-test is a nonparametric unpaired test of the null
11188
hypothesis that x1 and x2 are from the same distribution.
11289
See https://docs.scipy.org/doc/scipy-1.0.0/reference/generated/scipy.stats.mannwhitneyu.html
113-
11490
'''
11591
def __init__(self, x1, x2=None,
116-
repeated_measures=None,
92+
paired=False,
11793
statfunction=None,
11894
smoothboot=False,
11995
alpha_level=0.05,
@@ -146,7 +122,7 @@ def __init__(self, x1, x2=None,
146122
'n_boot': reps,
147123
'smooth': smoothboot}
148124

149-
if repeated_measures:
125+
if paired:
150126
# check x2 is not None:
151127
if x2 is None:
152128
raise ValueError('Please specify x2.')
@@ -155,17 +131,17 @@ def __init__(self, x1, x2=None,
155131
if len(x1) != len(x2):
156132
raise ValueError('x1 and x2 are not the same length.')
157133

158-
if (x2 is None) or (repeated_measures) :
134+
if (x2 is None) or (paired is True) :
159135

160136
if x2 is None:
161137
tx = x1
162-
repeated_measures = None
138+
paired = False
163139
ttest_single = ttest_1samp(x1, 0)[1]
164140
ttest_2_ind = 'NIL'
165141
ttest_2_paired = 'NIL'
166142
wilcoxonresult = 'NIL'
167143

168-
elif repeated_measures:
144+
elif paired is True:
169145
diff = True
170146
tx = x2 - x1
171147
ttest_single = 'NIL'
@@ -188,7 +164,7 @@ def __init__(self, x1, x2=None,
188164
pct_low_high = np.nan_to_num(pct_low_high).astype('int')
189165

190166

191-
elif x2 is not None and repeated_measures is None:
167+
elif x2 is not None and paired is False:
192168
diff = True
193169
x2 = pd.Series(x2).dropna()
194170
# Generate statarrays for both arrays.
@@ -228,7 +204,7 @@ def __init__(self, x1, x2=None,
228204
" results may be unstable.")
229205

230206
self.summary = summ_stat
231-
self.repeated_measures = repeated_measures
207+
self.is_paired = paired
232208
self.is_difference = diff
233209
self.statistic = str(statfunction)
234210
self.n_reps = reps
@@ -252,7 +228,7 @@ def __init__(self, x1, x2=None,
252228

253229
self.results = {'stat_summary': self.summary,
254230
'is_difference': diff,
255-
'repeated_measures': repeated_measures,
231+
'is_paired': paired,
256232
'bca_ci_low': self.bca_ci_low,
257233
'bca_ci_high': self.bca_ci_high,
258234
'ci': self.ci
@@ -270,7 +246,7 @@ def __repr__(self):
270246

271247
diff_types = {True: 'paired', False: 'unpaired'}
272248
if self.is_difference:
273-
a = 'The {} {} difference is {}.'.format(diff_types[self.repeated_measures is not None],
249+
a = 'The {} {} difference is {}.'.format(diff_types[self.is_paired],
274250
stat, self.summary)
275251
else:
276252
a = 'The {} is {}.'.format(stat, self.summary)
@@ -284,7 +260,6 @@ def jackknife_indexes(data):
284260
From the scikits.bootstrap package.
285261
Given an array, returns a list of arrays where each array is a set of
286262
jackknife indexes.
287-
288263
For a given set of data Y, the jackknife sample J[i] is defined as the
289264
data set Y with the ith data point deleted.
290265
"""
@@ -332,4 +307,4 @@ def bca(data, alphas, statarray, statfunction, ostat, reps):
332307
nvals = np.round((reps-1)*avals)
333308
nvals = np.nan_to_num(nvals).astype('int')
334309

335-
return nvals
310+
return nvals

0 commit comments

Comments
 (0)