Merge pull request #2784 from andrjohns/issue-2783-bernoulli-cdf-stable

andrjohns · web-flow · commit 9f2689e920f4 · 2023-09-19T16:23:15.000+03:00
Improve Numerical Stability of Bernoulli CDF functions
diff --git a/stan/math/prim/prob/bernoulli_cdf.hpp b/stan/math/prim/prob/bernoulli_cdf.hpp
@@ -3,12 +3,10 @@
 
 #include <stan/math/prim/meta.hpp>
 #include <stan/math/prim/err.hpp>
-#include <stan/math/prim/fun/constants.hpp>
-#include <stan/math/prim/fun/max_size.hpp>
-#include <stan/math/prim/fun/scalar_seq_view.hpp>
+#include <stan/math/prim/fun/any.hpp>
+#include <stan/math/prim/fun/select.hpp>
 #include <stan/math/prim/fun/size.hpp>
 #include <stan/math/prim/fun/size_zero.hpp>
-#include <stan/math/prim/fun/value_of.hpp>
 #include <stan/math/prim/functor/partials_propagator.hpp>
 
 namespace stan {
@@ -36,50 +34,30 @@ return_type_t<T_prob> bernoulli_cdf(const T_n& n, const T_prob& theta) {
   check_consistent_sizes(function, "Random variable", n,
                          "Probability parameter", theta);
   T_theta_ref theta_ref = theta;
-  check_bounded(function, "Probability parameter", value_of(theta_ref), 0.0,
-                1.0);
+  const auto& n_arr = as_array_or_scalar(n);
+  const auto& theta_arr = as_value_column_array_or_scalar(theta_ref);
+  check_bounded(function, "Probability parameter", theta_arr, 0.0, 1.0);
 
   if (size_zero(n, theta)) {
     return 1.0;
   }
 
-  T_partials_return P(1.0);
   auto ops_partials = make_partials_propagator(theta_ref);
 
-  scalar_seq_view<T_n> n_vec(n);
-  scalar_seq_view<T_theta_ref> theta_vec(theta_ref);
-  size_t max_size_seq_view = max_size(n, theta);
-
   // Explicit return for extreme values
   // The gradients are technically ill-defined, but treated as zero
-  for (size_t i = 0; i < stan::math::size(n); i++) {
-    if (n_vec.val(i) < 0) {
-      return ops_partials.build(0.0);
-    }
+  if (any(n_arr < 0)) {
+    return ops_partials.build(0.0);
   }
+  const auto& log1m_theta = select(theta_arr == 1, 0.0, log1m(theta_arr));
+  const auto& P1 = select(n_arr == 0, log1m_theta, 0.0);
 
-  for (size_t i = 0; i < max_size_seq_view; i++) {
-    // Explicit results for extreme values
-    // The gradients are technically ill-defined, but treated as zero
-    if (n_vec.val(i) >= 1) {
-      continue;
-    }
-
-    const T_partials_return Pi = 1 - theta_vec.val(i);
-
-    P *= Pi;
-
-    if (!is_constant_all<T_prob>::value) {
-      partials<0>(ops_partials)[i] += -1 / Pi;
-    }
-  }
+  T_partials_return P = sum(P1);
 
   if (!is_constant_all<T_prob>::value) {
-    for (size_t i = 0; i < stan::math::size(theta); ++i) {
-      partials<0>(ops_partials)[i] *= P;
-    }
+    partials<0>(ops_partials) = select(n_arr == 0, -exp(P - P1), 0.0);
   }
-  return ops_partials.build(P);
+  return ops_partials.build(exp(P));
 }
 
 }  // namespace math
diff --git a/stan/math/prim/prob/bernoulli_lccdf.hpp b/stan/math/prim/prob/bernoulli_lccdf.hpp
@@ -3,16 +3,13 @@
 
 #include <stan/math/prim/meta.hpp>
 #include <stan/math/prim/err.hpp>
+#include <stan/math/prim/fun/any.hpp>
 #include <stan/math/prim/fun/constants.hpp>
 #include <stan/math/prim/fun/inv.hpp>
 #include <stan/math/prim/fun/log.hpp>
-#include <stan/math/prim/fun/max_size.hpp>
-#include <stan/math/prim/fun/scalar_seq_view.hpp>
-#include <stan/math/prim/fun/size.hpp>
+#include <stan/math/prim/fun/select.hpp>
 #include <stan/math/prim/fun/size_zero.hpp>
-#include <stan/math/prim/fun/value_of.hpp>
 #include <stan/math/prim/functor/partials_propagator.hpp>
-#include <cmath>
 
 namespace stan {
 namespace math {
@@ -33,50 +30,38 @@ template <typename T_n, typename T_prob,
           require_all_not_nonscalar_prim_or_rev_kernel_expression_t<
               T_n, T_prob>* = nullptr>
 return_type_t<T_prob> bernoulli_lccdf(const T_n& n, const T_prob& theta) {
-  using T_partials_return = partials_return_t<T_n, T_prob>;
   using T_theta_ref = ref_type_t<T_prob>;
-  using std::log;
   static const char* function = "bernoulli_lccdf";
   check_consistent_sizes(function, "Random variable", n,
                          "Probability parameter", theta);
   T_theta_ref theta_ref = theta;
-  check_bounded(function, "Probability parameter", value_of(theta_ref), 0.0,
-                1.0);
+  const auto& n_arr = as_array_or_scalar(n);
+  const auto& theta_arr = as_value_column_array_or_scalar(theta_ref);
+  check_bounded(function, "Probability parameter", theta_arr, 0.0, 1.0);
 
   if (size_zero(n, theta)) {
     return 0.0;
   }
 
-  T_partials_return P(0.0);
   auto ops_partials = make_partials_propagator(theta_ref);
 
-  scalar_seq_view<T_n> n_vec(n);
-  scalar_seq_view<T_theta_ref> theta_vec(theta_ref);
-  size_t max_size_seq_view = max_size(n, theta);
-
   // Explicit return for extreme values
   // The gradients are technically ill-defined, but treated as zero
-  for (size_t i = 0; i < stan::math::size(n); i++) {
-    const double n_dbl = n_vec.val(i);
-    if (n_dbl < 0) {
-      return ops_partials.build(0.0);
-    }
-    if (n_dbl >= 1) {
-      return ops_partials.build(NEGATIVE_INFTY);
-    }
+  if (any(n_arr < 0)) {
+    return ops_partials.build(0.0);
+  } else if (any(n_arr >= 1)) {
+    return ops_partials.build(NEGATIVE_INFTY);
   }
 
-  for (size_t i = 0; i < max_size_seq_view; i++) {
-    const T_partials_return Pi = theta_vec.val(i);
-
-    P += log(Pi);
+  size_t theta_size = math::size(theta_arr);
+  size_t n_size = math::size(n_arr);
+  double broadcast_n = theta_size == n_size ? 1 : n_size;
 
-    if (!is_constant_all<T_prob>::value) {
-      partials<0>(ops_partials)[i] += inv(Pi);
-    }
+  if (!is_constant_all<T_prob>::value) {
+    partials<0>(ops_partials) = inv(theta_arr) * broadcast_n;
   }
 
-  return ops_partials.build(P);
+  return ops_partials.build(sum(log(theta_arr)) * broadcast_n);
 }
 
 }  // namespace math
diff --git a/stan/math/prim/prob/bernoulli_lcdf.hpp b/stan/math/prim/prob/bernoulli_lcdf.hpp
@@ -3,16 +3,11 @@
 
 #include <stan/math/prim/meta.hpp>
 #include <stan/math/prim/err.hpp>
+#include <stan/math/prim/fun/any.hpp>
 #include <stan/math/prim/fun/constants.hpp>
-#include <stan/math/prim/fun/inv.hpp>
-#include <stan/math/prim/fun/log.hpp>
-#include <stan/math/prim/fun/max_size.hpp>
-#include <stan/math/prim/fun/scalar_seq_view.hpp>
-#include <stan/math/prim/fun/size.hpp>
+#include <stan/math/prim/fun/select.hpp>
 #include <stan/math/prim/fun/size_zero.hpp>
-#include <stan/math/prim/fun/value_of.hpp>
 #include <stan/math/prim/functor/partials_propagator.hpp>
-#include <cmath>
 
 namespace stan {
 namespace math {
@@ -33,52 +28,34 @@ template <typename T_n, typename T_prob,
           require_all_not_nonscalar_prim_or_rev_kernel_expression_t<
               T_n, T_prob>* = nullptr>
 return_type_t<T_prob> bernoulli_lcdf(const T_n& n, const T_prob& theta) {
-  using T_partials_return = partials_return_t<T_n, T_prob>;
   using T_theta_ref = ref_type_t<T_prob>;
-  using std::log;
   static const char* function = "bernoulli_lcdf";
   check_consistent_sizes(function, "Random variable", n,
                          "Probability parameter", theta);
   T_theta_ref theta_ref = theta;
-  check_bounded(function, "Probability parameter", value_of(theta_ref), 0.0,
-                1.0);
+  const auto& n_arr = as_array_or_scalar(n);
+  const auto& theta_arr = as_value_column_array_or_scalar(theta_ref);
+  check_bounded(function, "Probability parameter", theta_arr, 0.0, 1.0);
 
   if (size_zero(n, theta)) {
     return 0.0;
   }
 
-  T_partials_return P(0.0);
   auto ops_partials = make_partials_propagator(theta_ref);
 
-  scalar_seq_view<T_n> n_vec(n);
-  scalar_seq_view<T_theta_ref> theta_vec(theta_ref);
-  size_t max_size_seq_view = max_size(n, theta);
-
   // Explicit return for extreme values
   // The gradients are technically ill-defined, but treated as zero
-  for (size_t i = 0; i < stan::math::size(n); i++) {
-    if (n_vec.val(i) < 0) {
-      return ops_partials.build(NEGATIVE_INFTY);
-    }
+  if (any(n_arr < 0)) {
+    return ops_partials.build(NEGATIVE_INFTY);
   }
 
-  for (size_t i = 0; i < max_size_seq_view; i++) {
-    // Explicit results for extreme values
-    // The gradients are technically ill-defined, but treated as zero
-    if (n_vec.val(i) >= 1) {
-      continue;
-    }
-
-    const T_partials_return Pi = 1 - theta_vec.val(i);
-
-    P += log(Pi);
+  const auto& log1m_theta = select(theta_arr == 1, 0.0, log1m(theta_arr));
 
-    if (!is_constant_all<T_prob>::value) {
-      partials<0>(ops_partials)[i] -= inv(Pi);
-    }
+  if (!is_constant_all<T_prob>::value) {
+    partials<0>(ops_partials) = select(n_arr == 0, -exp(-log1m_theta), 0.0);
   }
 
-  return ops_partials.build(P);
+  return ops_partials.build(sum(select(n_arr == 0, log1m_theta, 0.0)));
 }
 
 }  // namespace math
diff --git a/test/unit/math/mix/prob/bernoulli_cdf_test.cpp b/test/unit/math/mix/prob/bernoulli_cdf_test.cpp
@@ -0,0 +1,24 @@
+#include <test/unit/math/test_ad.hpp>
+#include <limits>
+
+TEST(mathMixScalFun, bernoulliCDF) {
+  // bind integer arg because can't autodiff through
+  auto f = [](const auto& x1) {
+    return [=](const auto& x2) { return stan::math::bernoulli_cdf(x1, x2); };
+  };
+  stan::test::expect_ad(f(0), 0.1);
+  stan::test::expect_ad(f(0), std::numeric_limits<double>::quiet_NaN());
+  stan::test::expect_ad(f(1), 0.5);
+  stan::test::expect_ad(f(1), std::numeric_limits<double>::quiet_NaN());
+  stan::test::expect_ad(f(1), 0.2);
+
+  std::vector<int> std_in1{0, 1};
+  Eigen::VectorXd in2(2);
+  in2 << 0.5, 0.9;
+
+  stan::test::expect_ad(f(std_in1), 0.2);
+  stan::test::expect_ad(f(std_in1), std::numeric_limits<double>::quiet_NaN());
+  stan::test::expect_ad(f(1), in2);
+  stan::test::expect_ad(f(std_in1), in2);
+  stan::test::expect_ad(f(std_in1), std::numeric_limits<double>::quiet_NaN());
+}
diff --git a/test/unit/math/mix/prob/bernoulli_lccdf_test.cpp b/test/unit/math/mix/prob/bernoulli_lccdf_test.cpp
@@ -0,0 +1,24 @@
+#include <test/unit/math/test_ad.hpp>
+#include <limits>
+
+TEST(mathMixScalFun, bernoulliLCCDF) {
+  // bind integer arg because can't autodiff through
+  auto f = [](const auto& x1) {
+    return [=](const auto& x2) { return stan::math::bernoulli_lccdf(x1, x2); };
+  };
+  stan::test::expect_ad(f(0), 0.1);
+  stan::test::expect_ad(f(0), std::numeric_limits<double>::quiet_NaN());
+  stan::test::expect_ad(f(1), 0.5);
+  stan::test::expect_ad(f(1), std::numeric_limits<double>::quiet_NaN());
+  stan::test::expect_ad(f(1), 0.2);
+
+  std::vector<int> std_in1{0, 1};
+  Eigen::VectorXd in2(2);
+  in2 << 0.5, 0.9;
+
+  stan::test::expect_ad(f(std_in1), 0.2);
+  stan::test::expect_ad(f(std_in1), std::numeric_limits<double>::quiet_NaN());
+  stan::test::expect_ad(f(1), in2);
+  stan::test::expect_ad(f(std_in1), in2);
+  stan::test::expect_ad(f(std_in1), std::numeric_limits<double>::quiet_NaN());
+}
diff --git a/test/unit/math/mix/prob/bernoulli_lcdf_test.cpp b/test/unit/math/mix/prob/bernoulli_lcdf_test.cpp
@@ -0,0 +1,24 @@
+#include <test/unit/math/test_ad.hpp>
+#include <limits>
+
+TEST(mathMixScalFun, bernoulliLCDF) {
+  // bind integer arg because can't autodiff through
+  auto f = [](const auto& x1) {
+    return [=](const auto& x2) { return stan::math::bernoulli_lcdf(x1, x2); };
+  };
+  stan::test::expect_ad(f(0), 0.1);
+  stan::test::expect_ad(f(0), std::numeric_limits<double>::quiet_NaN());
+  stan::test::expect_ad(f(1), 0.5);
+  stan::test::expect_ad(f(1), std::numeric_limits<double>::quiet_NaN());
+  stan::test::expect_ad(f(1), 0.2);
+
+  std::vector<int> std_in1{0, 1};
+  Eigen::VectorXd in2(2);
+  in2 << 0.5, 0.9;
+
+  stan::test::expect_ad(f(std_in1), 0.2);
+  stan::test::expect_ad(f(std_in1), std::numeric_limits<double>::quiet_NaN());
+  stan::test::expect_ad(f(1), in2);
+  stan::test::expect_ad(f(std_in1), in2);
+  stan::test::expect_ad(f(std_in1), std::numeric_limits<double>::quiet_NaN());
+}