Skip to content

Commit ef3bdfd

Browse files
authored
Refactoring phase 1 (#152)
* delta objects notebook * Organizing imports in class. Reduced load of numpy library * More fixing and ordering of imports. Cleaning and small changes * effsize objects in separate notebook. Cleaning and small changes * Renaming and refactoring in classes. Modularity principle * Refactoring Modularity and black formatting * Using self atributes * More small fixes from phase 1 refactoring * Fixing name of pre-commit file * Added missing tolerance parameter for ui tests in test_10 file
1 parent 50ddcf6 commit ef3bdfd

33 files changed

Lines changed: 10348 additions & 9905 deletions

README.md

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
DABEST-Python
2-
================
1+
# DABEST-Python
32

43
<!-- WARNING: THIS FILE WAS AUTOGENERATED! DO NOT EDIT! -->
54

dabest/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from ._api import load, prop_dataset
22
from ._stats_tools import effsize as effsize
3-
from ._classes import TwoGroupsEffectSize, PermutationTest
3+
from ._effsize_objects import TwoGroupsEffectSize, PermutationTest
4+
from ._dabest_object import Dabest
45

56
__version__ = "2023.03.29"

dabest/_api.py

Lines changed: 90 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,24 @@
44
__all__ = ['load', 'prop_dataset']
55

66
# %% ../nbs/API/load.ipynb 4
7-
def load(data, idx=None, x=None, y=None, paired=None, id_col=None,
8-
ci=95, resamples=5000, random_seed=12345, proportional=False,
9-
delta2 = False, experiment = None, experiment_label = None,
10-
x1_level = None, mini_meta=False):
11-
'''
7+
def load(
8+
data,
9+
idx=None,
10+
x=None,
11+
y=None,
12+
paired=None,
13+
id_col=None,
14+
ci=95,
15+
resamples=5000,
16+
random_seed=12345,
17+
proportional=False,
18+
delta2=False,
19+
experiment=None,
20+
experiment_label=None,
21+
x1_level=None,
22+
mini_meta=False,
23+
):
24+
"""
1225
Loads data in preparation for estimation statistics.
1326
1427
This is designed to work with pandas DataFrames.
@@ -22,15 +35,15 @@ def load(data, idx=None, x=None, y=None, paired=None, id_col=None,
2235
with each individual tuple producing its own contrast plot
2336
x : string or list, default None
2437
Column name(s) of the independent variable. This can be expressed as
25-
a list of 2 elements if and only if 'delta2' is True; otherwise it
38+
a list of 2 elements if and only if 'delta2' is True; otherwise it
2639
can only be a string.
2740
y : string, default None
2841
Column names for data to be plotted on the x-axis and y-axis.
2942
paired : string, default None
30-
The type of the experiment under which the data are obtained. If 'paired'
43+
The type of the experiment under which the data are obtained. If 'paired'
3144
is None then the data will not be treated as paired data in the subsequent
32-
calculations. If 'paired' is 'baseline', then in each tuple of x, other
33-
groups will be paired up with the first group (as control). If 'paired' is
45+
calculations. If 'paired' is 'baseline', then in each tuple of x, other
46+
groups will be paired up with the first group (as control). If 'paired' is
3447
'sequential', then in each tuple of x, each group will be paired up with
3548
its previous group (as control).
3649
id_col : default None.
@@ -45,7 +58,7 @@ def load(data, idx=None, x=None, y=None, paired=None, id_col=None,
4558
This integer is used to seed the random number generator during
4659
bootstrap resampling, ensuring that the confidence intervals
4760
reported are replicable.
48-
proportional : boolean, default False.
61+
proportional : boolean, default False.
4962
An indicator of whether the data is binary or not. When set to True, it
5063
specifies that the data consists of binary data, where the values are
5164
limited to 0 and 1. The code is not suitable for analyzing proportion
@@ -55,76 +68,112 @@ def load(data, idx=None, x=None, y=None, paired=None, id_col=None,
5568
delta2 : boolean, default False
5669
Indicator of delta-delta experiment
5770
experiment : String, default None
58-
The name of the column of the dataframe which contains the label of
71+
The name of the column of the dataframe which contains the label of
5972
experiments
6073
experiment_lab : list, default None
6174
A list of String to specify the order of subplots for delta-delta plots.
62-
This can be expressed as a list of 2 elements if and only if 'delta2'
63-
is True; otherwise it can only be a string.
75+
This can be expressed as a list of 2 elements if and only if 'delta2'
76+
is True; otherwise it can only be a string.
6477
x1_level : list, default None
6578
A list of String to specify the order of subplots for delta-delta plots.
66-
This can be expressed as a list of 2 elements if and only if 'delta2'
67-
is True; otherwise it can only be a string.
79+
This can be expressed as a list of 2 elements if and only if 'delta2'
80+
is True; otherwise it can only be a string.
6881
mini_meta : boolean, default False
6982
Indicator of weighted delta calculation.
7083
7184
Returns
7285
-------
7386
A `Dabest` object.
74-
'''
75-
from ._classes import Dabest
76-
77-
return Dabest(data, idx, x, y, paired, id_col, ci, resamples, random_seed, proportional, delta2, experiment, experiment_label, x1_level, mini_meta)
78-
79-
87+
"""
88+
from dabest import Dabest
89+
90+
return Dabest(
91+
data,
92+
idx,
93+
x,
94+
y,
95+
paired,
96+
id_col,
97+
ci,
98+
resamples,
99+
random_seed,
100+
proportional,
101+
delta2,
102+
experiment,
103+
experiment_label,
104+
x1_level,
105+
mini_meta,
106+
)
80107

81108
# %% ../nbs/API/load.ipynb 5
82109
import numpy as np
83110
from typing import Union, Optional
111+
import pandas as pd
84112

85-
def prop_dataset(group:Union[list, tuple, np.ndarray, dict], #Accepts lists, tuples, or numpy ndarrays of numeric types.
86-
group_names: Optional[list] = None):
87-
'''
113+
114+
def prop_dataset(
115+
group: Union[
116+
list, tuple, np.ndarray, dict
117+
], # Accepts lists, tuples, or numpy ndarrays of numeric types.
118+
group_names: Optional[list] = None,
119+
):
120+
"""
88121
Convenient function to generate a dataframe of binary data.
89-
'''
90-
import pandas as pd
122+
"""
91123

92124
if isinstance(group, dict):
93125
# If group_names is not provided, use the keys of the dict as group_names
94126
if group_names is None:
95127
group_names = list(group.keys())
96128
elif not set(group_names) == set(group.keys()):
97129
# Check if the group_names provided is the same as the keys of the dict
98-
raise ValueError('group_names must be the same as the keys of the dict.')
130+
raise ValueError("group_names must be the same as the keys of the dict.")
99131
# Check if the values in the dict are numeric
100-
if not all([isinstance(group[name], (list, tuple, np.ndarray)) for name in group_names]):
101-
raise ValueError('group must be a dict of lists, tuples, or numpy ndarrays of numeric types.')
132+
if not all(
133+
[isinstance(group[name], (list, tuple, np.ndarray)) for name in group_names]
134+
):
135+
raise ValueError(
136+
"group must be a dict of lists, tuples, or numpy ndarrays of numeric types."
137+
)
102138
# Check if the values in the dict only have two elements under each parent key
103139
if not all([len(group[name]) == 2 for name in group_names]):
104-
raise ValueError('Each parent key should have only two elements.')
140+
raise ValueError("Each parent key should have only two elements.")
105141
group_val = group
106142

107143
else:
108144
if group_names is None:
109-
raise ValueError('group_names must be provided if group is not a dict.')
145+
raise ValueError("group_names must be provided if group is not a dict.")
110146
# Check if the length of group is two times of the length of group_names
111147
if not len(group) == 2 * len(group_names):
112-
raise ValueError('The length of group must be two times of the length of group_names.')
113-
group_val = {group_names[i]: [group[i*2], group[i*2+1]] for i in range(len(group_names))}
148+
raise ValueError(
149+
"The length of group must be two times of the length of group_names."
150+
)
151+
group_val = {
152+
group_names[i]: [group[i * 2], group[i * 2 + 1]]
153+
for i in range(len(group_names))
154+
}
114155

115156
# Check if the sum of values in group_val under each key are the same
116-
if not all([sum(group_val[name]) == sum(group_val[group_names[0]]) for name in group_val.keys()]):
117-
raise ValueError('The sum of values under each key must be the same.')
118-
119-
id_col = pd.Series(range(1, sum(group_val[group_names[0]])+1))
120-
157+
if not all(
158+
[
159+
sum(group_val[name]) == sum(group_val[group_names[0]])
160+
for name in group_val.keys()
161+
]
162+
):
163+
raise ValueError("The sum of values under each key must be the same.")
164+
165+
id_col = pd.Series(range(1, sum(group_val[group_names[0]]) + 1))
166+
121167
final_df = pd.DataFrame()
122168

123169
for name in group_val.keys():
124-
col = np.repeat(0, group_val[name][0]).tolist() + np.repeat(1, group_val[name][1]).tolist()
125-
df = pd.DataFrame({name:col})
170+
col = (
171+
np.repeat(0, group_val[name][0]).tolist()
172+
+ np.repeat(1, group_val[name][1]).tolist()
173+
)
174+
df = pd.DataFrame({name: col})
126175
final_df = pd.concat([final_df, df], axis=1)
127176

128-
final_df['ID'] = id_col
177+
final_df["ID"] = id_col
129178

130179
return final_df

0 commit comments

Comments
 (0)