fix test_load_errors and renaming

cyberosa · cyberosa · commit 9dc9fa9735e9 · 2023-12-20T23:28:02.000+01:00
diff --git a/nbs/tests/test_99_confidence_intervals.ipynb b/nbs/tests/test_99_confidence_intervals.ipynb
@@ -75,33 +75,35 @@
    "outputs": [],
    "source": [
     "# Dropped to 30 reps to save time. v0.2.5.\n",
-    "reps = 30\n",
-    "ci = 95\n",
+    "reps=30\n",
+    "ci=95\n",
     "POPULATION_N = 10000\n",
     "SAMPLE_N = 10\n",
     "\n",
     "# Create data for hedges g and cohens d.\n",
     "CONTROL_MEAN = np.random.randint(1, 1000)\n",
-    "POP_SD = np.random.randint(1, 15)\n",
-    "POP_D = np.round(np.random.uniform(-2, 2, 1)[0], 2)\n",
+    "POP_SD       = np.random.randint(1, 15)\n",
+    "POP_D        = np.round(np.random.uniform(-2, 2, 1)[0], 2)\n",
     "\n",
     "TRUE_STD_DIFFERENCE = CONTROL_MEAN + (POP_D * POP_SD)\n",
     "norm_sample_kwargs = dict(scale=POP_SD, size=SAMPLE_N)\n",
     "c1 = norm.rvs(loc=CONTROL_MEAN, **norm_sample_kwargs)\n",
-    "t1 = norm.rvs(loc=CONTROL_MEAN + TRUE_STD_DIFFERENCE, **norm_sample_kwargs)\n",
+    "t1 = norm.rvs(loc=CONTROL_MEAN+TRUE_STD_DIFFERENCE, **norm_sample_kwargs)\n",
+    "\n",
+    "std_diff_df = pd.DataFrame({'Control' : c1, 'Test': t1})\n",
     "\n",
-    "std_diff_df = pd.DataFrame({\"Control\": c1, \"Test\": t1})\n",
     "\n",
     "\n",
     "# Create mean_diff data\n",
     "CONTROL_MEAN = np.random.randint(1, 1000)\n",
-    "POP_SD = np.random.randint(1, 15)\n",
-    "TRUE_DIFFERENCE = np.random.randint(-POP_SD * 5, POP_SD * 5)\n",
+    "POP_SD       = np.random.randint(1, 15)\n",
+    "TRUE_DIFFERENCE = np.random.randint(-POP_SD*5, POP_SD*5)\n",
     "\n",
     "c1 = norm.rvs(loc=CONTROL_MEAN, **norm_sample_kwargs)\n",
-    "t1 = norm.rvs(loc=CONTROL_MEAN + TRUE_DIFFERENCE, **norm_sample_kwargs)\n",
+    "t1 = norm.rvs(loc=CONTROL_MEAN+TRUE_DIFFERENCE, **norm_sample_kwargs)\n",
+    "\n",
+    "mean_df = pd.DataFrame({'Control' : c1, 'Test': t1})\n",
     "\n",
-    "mean_df = pd.DataFrame({\"Control\": c1, \"Test\": t1})\n",
     "\n",
     "\n",
     "# Create median_diff data\n",
@@ -110,13 +112,14 @@
     "\n",
     "skew_kwargs = dict(a=A, scale=5, size=POPULATION_N)\n",
     "skewpop1 = skewnorm.rvs(**skew_kwargs, loc=100)\n",
-    "skewpop2 = skewnorm.rvs(**skew_kwargs, loc=100 + MEDIAN_DIFFERENCE)\n",
+    "skewpop2 = skewnorm.rvs(**skew_kwargs, loc=100+MEDIAN_DIFFERENCE)\n",
     "\n",
     "sample_kwargs = dict(replace=False, size=SAMPLE_N)\n",
     "skewsample1 = np.random.choice(skewpop1, **sample_kwargs)\n",
     "skewsample2 = np.random.choice(skewpop2, **sample_kwargs)\n",
     "\n",
-    "median_df = pd.DataFrame({\"Control\": skewsample1, \"Test\": skewsample2})\n",
+    "median_df = pd.DataFrame({'Control' : skewsample1, 'Test': skewsample2})\n",
+    "\n",
     "\n",
     "\n",
     "# Create two populations with a 50% overlap.\n",
@@ -125,24 +128,25 @@
     "\n",
     "pop_kwargs = dict(scale=SD, size=POPULATION_N)\n",
     "pop1 = norm.rvs(loc=100, **pop_kwargs)\n",
-    "pop2 = norm.rvs(loc=100 + CD_DIFFERENCE, **pop_kwargs)\n",
+    "pop2 = norm.rvs(loc=100+CD_DIFFERENCE, **pop_kwargs)\n",
     "\n",
     "sample_kwargs = dict(replace=False, size=SAMPLE_N)\n",
     "sample1 = np.random.choice(pop1, **sample_kwargs)\n",
     "sample2 = np.random.choice(pop2, **sample_kwargs)\n",
     "\n",
-    "cd_df = pd.DataFrame({\"Control\": sample1, \"Test\": sample2})\n",
+    "cd_df = pd.DataFrame({'Control' : sample1, 'Test': sample2})\n",
+    "\n",
     "\n",
     "\n",
     "# Create several CIs and see if the true population difference lies within.\n",
-    "error_count_cohens_d = 0\n",
-    "error_count_hedges_g = 0\n",
-    "error_count_mean_diff = 0\n",
-    "error_count_median_diff = 0\n",
+    "error_count_cohens_d     = 0\n",
+    "error_count_hedges_g     = 0\n",
+    "error_count_mean_diff    = 0\n",
+    "error_count_median_diff  = 0\n",
     "error_count_cliffs_delta = 0\n",
     "\n",
     "for i in range(0, reps):\n",
-    "    # print(i) # for debug.\n",
+    "    print(i) # for debug.\n",
     "    # pick a random seed\n",
     "    rnd_sd = np.random.randint(0, 999999)\n",
     "    load_kwargs = dict(ci=ci, random_seed=rnd_sd)\n",
@@ -151,44 +155,48 @@
     "    cd = std_diff_data.cohens_d.results\n",
     "    # print(\"cohen's d\")  # for debug.\n",
     "    cd_low, cd_high = float(cd.bca_low), float(cd.bca_high)\n",
-    "    if not cd_low < POP_D < cd_high:\n",
+    "    if cd_low < POP_D < cd_high is False:\n",
     "        error_count_cohens_d += 1\n",
     "\n",
     "    hg = std_diff_data.hedges_g.results\n",
     "    # print(\"hedges' g\") # for debug.\n",
     "    hg_low, hg_high = float(hg.bca_low), float(hg.bca_high)\n",
-    "    if not hg_low < POP_D < hg_high:\n",
+    "    if hg_low < POP_D < hg_high is False:\n",
     "        error_count_hedges_g += 1\n",
     "\n",
+    "\n",
     "    mean_diff_data = load(data=mean_df, idx=(\"Control\", \"Test\"), **load_kwargs)\n",
     "    mean_d = mean_diff_data.mean_diff.results\n",
     "    # print(\"mean diff\") # for debug.\n",
     "    mean_d_low, mean_d_high = float(mean_d.bca_low), float(mean_d.bca_high)\n",
-    "    if not mean_d_low < TRUE_DIFFERENCE < mean_d_high:\n",
+    "    if mean_d_low < TRUE_DIFFERENCE < mean_d_high is False:\n",
     "        error_count_mean_diff += 1\n",
     "\n",
-    "    median_diff_data = load(data=median_df, idx=(\"Control\", \"Test\"), **load_kwargs)\n",
+    "\n",
+    "    median_diff_data = load(data=median_df, idx=(\"Control\", \"Test\"),\n",
+    "                         **load_kwargs)\n",
     "    median_d = median_diff_data.median_diff.results\n",
     "    # print(\"median diff\") # for debug.\n",
     "    median_d_low, median_d_high = float(median_d.bca_low), float(median_d.bca_high)\n",
-    "    if not median_d_low < MEDIAN_DIFFERENCE < median_d_high:\n",
+    "    if median_d_low < MEDIAN_DIFFERENCE < median_d_high is False:\n",
     "        error_count_median_diff += 1\n",
     "\n",
+    "\n",
     "    cd_data = load(data=cd_df, idx=(\"Control\", \"Test\"), **load_kwargs)\n",
     "    cliffs = cd_data.cliffs_delta.results\n",
     "    # print(\"cliff's delta\") # for debug.\n",
     "    low, high = float(cliffs.bca_low), float(cliffs.bca_high)\n",
-    "    if not low < 0.5 < high:\n",
+    "    if low < 0.5 < high is False:\n",
     "        error_count_cliffs_delta += 1\n",
     "\n",
     "\n",
     "max_errors = int(np.ceil(reps * (100 - ci) / 100))\n",
     "\n",
-    "assert error_count_cohens_d <= max_errors\n",
-    "assert error_count_hedges_g <= max_errors\n",
-    "assert error_count_mean_diff <= max_errors\n",
-    "assert error_count_median_diff <= max_errors\n",
-    "assert error_count_cliffs_delta <= max_errors"
+    "assert error_count_cohens_d     <= max_errors\n",
+    "assert error_count_hedges_g     <= max_errors\n",
+    "assert error_count_mean_diff    <= max_errors\n",
+    "assert error_count_median_diff  <= max_errors\n",
+    "assert error_count_cliffs_delta <= max_errors\n"
    ]
   },
   {
@@ -205,6 +213,10 @@
    "display_name": "python3",
    "language": "python",
    "name": "python3"
+  },
+  "language_info": {
+   "name": "python",
+   "version": "3.10.12"
   }
  },
  "nbformat": 4,
diff --git a/nbs/tests/test_load_errors.py b/nbs/tests/test_load_errors.py
@@ -35,7 +35,7 @@ def test_wrong_params_combinations():
 
     assert error_msg in str(excinfo.value)
 
-    error_msg = "`proportional` and `delta` cannot be True at the same time."
+    error_msg = "`proportional` and `delta2` cannot be True at the same time."
     with pytest.raises(ValueError) as excinfo:
         my_data = load(
             dummy_df,