Skip to content

Commit 746dc2b

Browse files
committed
Cleared all the TODOs
1 parent b73a800 commit 746dc2b

17 files changed

Lines changed: 317 additions & 107 deletions

CHANGELOG.md

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
# Release notes
2+
3+
<!-- do not remove -->
4+
5+
## 2023.03.29
6+
7+
### New Features
8+
- Add new form of paired proportion plots for a better support of Repeated Measures
9+
10+
11+
## 0.2.3
12+
13+
### Bug Fixes
14+
- Fixes a bug that jammed up when the xvar column was already a pandas Categorical. Now we check for this and act appropriately.

dabest/_bootstrap_tools.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,13 @@ def __init__(
108108
ttest_single = "NIL"
109109
ttest_2_ind = "NIL"
110110
ttest_2_paired = ttest_rel(x1, x2)[1]
111-
wilcoxonresult = wilcoxon(x1, x2)[1]
111+
112+
try:
113+
wilcoxonresult = wilcoxon(x1, x2)[1]
114+
except ValueError as e:
115+
warnings.warn("Wilcoxon test could not be performed. This might be due "
116+
"to identical values under the same group. "
117+
"Details: {}".format(e))
112118
mannwhitneyresult = "NIL"
113119

114120
# Turns data into array, then tuple.

dabest/_dabest_object.py

Lines changed: 31 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -442,26 +442,45 @@ def _check_errors(self, x, y, idx, experiment, experiment_label, x1_level):
442442
raise ValueError(err0)
443443

444444
# Check if the columns stated are valid
445-
# TODO instead of traversing twice idx you can traverse only once
446-
# and break the loop if the condition is not satisfied?
447-
# TODO What if the type is not str and not tuple,list? missing raise Error
448-
if all([isinstance(i, str) for i in idx]):
449-
if len(pd.unique([t for t in idx]).tolist()) != 2:
445+
# Initialize a flag to track if any element in idx is neither str nor (tuple, list)
446+
valid_types = True
447+
448+
# Initialize variables to track the conditions for str and (tuple, list)
449+
is_str_condition_met, is_tuple_list_condition_met = False, False
450+
451+
# Single traversal for optimization
452+
for item in idx:
453+
if isinstance(item, str):
454+
is_str_condition_met = True
455+
elif isinstance(item, (tuple, list)) and len(item) == 2:
456+
is_tuple_list_condition_met = True
457+
else:
458+
valid_types = False
459+
break # Exit the loop if an invalid type is found
460+
461+
# Check if all types are valid
462+
if not valid_types:
463+
raise TypeError("Invalid type found in idx. Expected str, tuple, or list.")
464+
465+
# Handling str type condition
466+
if is_str_condition_met:
467+
if len(pd.unique(idx).tolist()) != 2:
450468
err0 = "`mini_meta` is True, but `idx` ({})".format(idx)
451-
err1 = "does not contain exactly 2 columns."
469+
err1 = "does not contain exactly 2 unique columns."
452470
raise ValueError(err0 + err1)
453471

454-
if all([isinstance(i, (tuple, list)) for i in idx]):
472+
# Handling (tuple, list) type condition
473+
if is_tuple_list_condition_met:
455474
all_idx_lengths = [len(t) for t in idx]
456475
if (array(all_idx_lengths) != 2).any():
457-
err1 = "`mini_meta` is True, but some idx "
458-
err2 = "in {} does not consist only of two groups.".format(idx)
476+
err1 = "`mini_meta` is True, but some elements in idx "
477+
err2 = "in {} do not consist only of two groups.".format(idx)
459478
raise ValueError(err1 + err2)
460479

461-
# TODO can you have True mini_meta and delta2 at the same time?
480+
462481
# Check if this is a 2x2 ANOVA case and x & y are valid columns
463482
# Create experiment_label and x1_level
464-
if self.__delta2:
483+
elif self.__delta2:
465484
if x is None:
466485
error_msg = "If `delta2` is True. `x` parameter cannot be None. String or list expected"
467486
raise ValueError(error_msg)
@@ -534,7 +553,6 @@ def _check_errors(self, x, y, idx, experiment, experiment_label, x1_level):
534553
else:
535554
x1_level = self.__output_data[x[0]].unique()
536555

537-
# TODO what if experiment is None?
538556
elif experiment:
539557
experiment_label = self.__output_data[experiment].unique()
540558
x1_level = self.__output_data[x[0]].unique()
@@ -611,10 +629,7 @@ def _get_plot_data(self, x, y, all_plot_groups):
611629
# Added in v0.2.7.
612630
plot_data.dropna(axis=0, how="any", subset=[self.__yvar], inplace=True)
613631

614-
# TODO these comments should not be in the code but on the release notes of the package version
615-
# Lines 131 to 140 added in v0.2.3.
616-
# Fixes a bug that jammed up when the xvar column was already
617-
# a pandas Categorical. Now we check for this and act appropriately.
632+
618633
if isinstance(plot_data[self.__xvar].dtype, pd.CategoricalDtype):
619634
plot_data[self.__xvar].cat.remove_unused_categories(inplace=True)
620635
plot_data[self.__xvar].cat.reorder_categories(

dabest/_effsize_objects.py

Lines changed: 18 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -299,7 +299,6 @@ def _compute_bca_intervals(self, sorted_bootstraps):
299299
)
300300

301301
else:
302-
# TODO improve error handling, separate file with error messages?
303302
err1 = "The $lim_type limit of the BCa interval cannot be computed."
304303
err2 = "It is set to the effect size itself."
305304
err3 = "All bootstrap values were likely all the same."
@@ -330,9 +329,14 @@ def _perform_statistical_test(self):
330329

331330
if self.__is_paired and not self.__proportional:
332331
# Wilcoxon, a non-parametric version of the paired T-test.
333-
wilcoxon = spstats.wilcoxon(self.__control, self.__test)
334-
self.__pvalue_wilcoxon = wilcoxon.pvalue
335-
self.__statistic_wilcoxon = wilcoxon.statistic
332+
try:
333+
wilcoxon = spstats.wilcoxon(self.__control, self.__test)
334+
self.__pvalue_wilcoxon = wilcoxon.pvalue
335+
self.__statistic_wilcoxon = wilcoxon.statistic
336+
except ValueError as e:
337+
warnings.warn("Wilcoxon test could not be performed. This might be due "
338+
"to identical values under the same group. "
339+
"Details: {}".format(e))
336340

337341
if self.__effect_size != "median_diff":
338342
# Paired Student's t-test.
@@ -398,11 +402,10 @@ def _perform_statistical_test(self):
398402
)
399403
self.__pvalue_mann_whitney = mann_whitney.pvalue
400404
self.__statistic_mann_whitney = mann_whitney.statistic
401-
except ValueError:
402-
# TODO At least print some warning?
403-
# Occurs when the control and test are exactly identical
404-
# in terms of rank (eg. all zeros.)
405-
pass
405+
except ValueError as e:
406+
warnings.warn("Mann-Whitney test could not be performed. This might be due "
407+
"to identical rank values in both control and test groups. "
408+
"Details: {}".format(e))
406409

407410
standardized_es = es.cohens_d(self.__control, self.__test, is_paired=None)
408411

@@ -411,10 +414,9 @@ def _perform_statistical_test(self):
411414
self.__proportional_difference = es.cohens_h(
412415
self.__control, self.__test
413416
)
414-
except ValueError:
415-
# TODO At least print some warning?
416-
# Occur only when the data consists not only 0's and 1's.
417-
pass
417+
except ValueError as e:
418+
warnings.warn(f"Calculation of Cohen's h failed. This method is applicable "
419+
f"only for binary data (0's and 1's). Details: {e}")
418420

419421
def to_dict(self):
420422
"""
@@ -567,87 +569,79 @@ def statistic_mcnemar(self):
567569

568570
@property
569571
def pvalue_paired_students_t(self):
570-
# TODO Missing docstring
571572
try:
572573
return self.__pvalue_paired_students_t
573574
except AttributeError:
574575
return npnan
575576

576577
@property
577578
def statistic_paired_students_t(self):
578-
# TODO Missing docstring
579579
try:
580580
return self.__statistic_paired_students_t
581581
except AttributeError:
582582
return npnan
583583

584584
@property
585585
def pvalue_kruskal(self):
586-
# TODO Missing docstring
587586
try:
588587
return self.__pvalue_kruskal
589588
except AttributeError:
590589
return npnan
591590

592591
@property
593592
def statistic_kruskal(self):
594-
# TODO Missing docstring
595593
try:
596594
return self.__statistic_kruskal
597595
except AttributeError:
598596
return npnan
599597

600598
@property
601599
def pvalue_welch(self):
602-
# TODO Missing docstring
603600
try:
604601
return self.__pvalue_welch
605602
except AttributeError:
606603
return npnan
607604

608605
@property
609606
def statistic_welch(self):
610-
# TODO Missing docstring
611607
try:
612608
return self.__statistic_welch
613609
except AttributeError:
614610
return npnan
615611

616612
@property
617613
def pvalue_students_t(self):
618-
# TODO Missing docstring
619614
try:
620615
return self.__pvalue_students_t
621616
except AttributeError:
622617
return npnan
623618

624619
@property
625620
def statistic_students_t(self):
626-
# TODO Missing docstring
627621
try:
628622
return self.__statistic_students_t
629623
except AttributeError:
630624
return npnan
631625

632626
@property
633627
def pvalue_mann_whitney(self):
634-
# TODO Missing docstring
635628
try:
636629
return self.__pvalue_mann_whitney
637630
except AttributeError:
638631
return npnan
639632

640633
@property
641634
def statistic_mann_whitney(self):
642-
# TODO Missing docstring
643635
try:
644636
return self.__statistic_mann_whitney
645637
except AttributeError:
646638
return npnan
647639

648640
@property
649641
def pvalue_permutation(self):
650-
# TODO Missing docstring
642+
"""
643+
p value of permutation test
644+
"""
651645
return self.__PermutationTest_result.pvalue
652646

653647
@property
@@ -663,12 +657,10 @@ def permutations(self):
663657

664658
@property
665659
def permutations_var(self):
666-
# TODO Missing docstring
667660
return self.__PermutationTest_result.permutations_var
668661

669662
@property
670663
def proportional_difference(self):
671-
# TODO Missing docstring
672664
try:
673665
return self.__proportional_difference
674666
except AttributeError:

dabest/_stats_tools/confint_1group.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,9 @@ def compute_1group_jackknife(x, func, *args, **kwargs):
3838

3939

4040
def compute_1group_acceleration(jack_dist):
41-
# TODO is it needed a function to just call one line?
41+
"""
42+
Returns the accaleration value based on the jackknife distribution.
43+
"""
4244
from . import confint_2group_diff as ci_2g
4345

4446
return ci_2g._calc_accel(jack_dist)

dabest/_stats_tools/confint_2group_diff.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,9 @@ def compute_meandiff_jackknife(x0, x1, is_paired, effect_size):
9898

9999

100100
def _calc_accel(jack_dist):
101-
# TODO Missing docstring
101+
"""
102+
Given the Jackknife distribution, calculates the acceleration factor.
103+
"""
102104
jack_mean = npmean(jack_dist)
103105

104106
numer = npsum((jack_mean - jack_dist) ** 3)

dabest/_stats_tools/effsize.py

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,9 @@ def cohens_d(control:list|tuple|np.ndarray,
210210
M = np.mean(test) - np.mean(control)
211211
divisor = pooled_sd
212212

213-
# TODO what if divisor = 0?
213+
if divisor == 0:
214+
raise ValueError("The divisor is zero, indicating no variability in the data.")
215+
214216
return M / divisor
215217

216218
# %% ../../nbs/API/effsize.ipynb 8
@@ -312,8 +314,33 @@ def cliffs_delta(control:list|tuple|np.ndarray,
312314

313315
# %% ../../nbs/API/effsize.ipynb 11
314316
def _compute_standardizers(control, test):
315-
# TODO missing docstring
316-
# For calculation of correlation; not currently used.
317+
"""
318+
Computes the pooled and average standard deviations for two datasets.
319+
320+
This function is useful in the context of statistical analysis, particularly
321+
when calculating standardized mean differences between two groups. It supports
322+
both unpaired and paired data scenarios.
323+
324+
Parameters:
325+
control (array-like): A numeric array representing the control group data.
326+
test (array-like): A numeric array representing the test group data.
327+
328+
Returns:
329+
tuple: A tuple containing two elements:
330+
- pooled (float): The pooled standard deviation, calculated for unpaired two-group
331+
scenarios. It is computed using the sample variances of the
332+
control and test groups, weighted by their sample sizes.
333+
- average (float): The average standard deviation, calculated for paired data
334+
scenarios. It is the average of the sample standard deviations
335+
of the control and test groups.
336+
337+
Note:
338+
The function assumes that the input arrays are independent samples and calculates
339+
the sample variances using N-1 degrees of freedom.
340+
341+
For calculation of correlation; not currently used.
342+
343+
"""
317344
# from scipy.stats import pearsonr
318345

319346
control_n = len(control)

dabest/misc_tools.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,17 @@ def unpack_and_add(l, c):
3333

3434

3535
def print_greeting():
36-
# TODO missing docstring
36+
"""
37+
Generates a greeting message based on the current time, along with the version information of DABEST.
38+
39+
This function dynamically generates a greeting ('Good morning', 'Good afternoon', 'Good evening')
40+
based on the current system time. It also retrieves and displays the version of DABEST (Data Analysis
41+
using Bootstrap-Coupled ESTimation). The message includes a header with the DABEST version and the
42+
current time formatted in a user-friendly manner.
43+
44+
Returns:
45+
str: A formatted string containing the greeting message, DABEST version, and current time.
46+
"""
3747
from .__init__ import __version__
3848

3949
line1 = "DABEST v{}".format(__version__)

0 commit comments

Comments
 (0)