Merge pull request #2914 from stan-dev/feature/2913-finite-diff-Hvp

andrjohns · web-flow · commit 89119024fb32 · 2023-06-29T09:53:09.000+03:00
Add rev functor to compute hessian-times-vector by finite differences
diff --git a/stan/math/rev/functor.hpp b/stan/math/rev/functor.hpp
@@ -29,5 +29,6 @@
 #include <stan/math/rev/functor/partials_propagator.hpp>
 #include <stan/math/rev/functor/reduce_sum.hpp>
 #include <stan/math/rev/functor/finite_diff_hessian_auto.hpp>
+#include <stan/math/rev/functor/finite_diff_hessian_times_vector_auto.hpp>
 
 #endif
diff --git a/stan/math/rev/functor/finite_diff_hessian_times_vector_auto.hpp b/stan/math/rev/functor/finite_diff_hessian_times_vector_auto.hpp
@@ -0,0 +1,67 @@
+#ifndef STAN_MATH_REV_FUNCTOR_HESSIAN_TIMES_VECTOR_HPP
+#define STAN_MATH_REV_FUNCTOR_HESSIAN_TIMES_VECTOR_HPP
+
+#include <stan/math/prim/fun/constants.hpp>
+#include <stan/math/rev/meta.hpp>
+#include <stan/math/rev/core.hpp>
+#include <stan/math/prim/fun/Eigen.hpp>
+#include <stan/math/rev/functor.hpp>
+#include <cmath>
+
+namespace stan {
+namespace math {
+namespace internal {
+
+/**
+ * Calculate the value and the product of the Hessian and the specified
+ * vector of the specified function at the specified argument using
+ * central finite difference of gradients, automatically setting
+ * the stepsize between the function evaluations along a dimension.
+ *
+ * <p>The functor must implement
+ *
+ * <code>
+ * double operator()(const Eigen::Matrix<stan::math::var, -1, 1>&)
+ * </code>
+ *
+ * <p>For details of the algorithm, see
+ * https://justindomke.wordpress.com/2009/01/17/hessian-vector-products/
+ *
+ * <p>Step size is set automatically using
+ * `sqrt(epsilon) * (1 + ||x||) / ||v||`,
+ * as suggested in https://doi.org/10.1016/j.cam.2008.12.024
+ *
+ * 2 gradient calls are needed for the algorithm.
+ *
+ * @tparam F Type of function
+ * @param[in] f Function
+ * @param[in] x Argument to function
+ * @param[in] v Vector to multiply Hessian with
+ * @param[out] fx Function applied to argument
+ * @param[out] hvp Product of Hessian and vector at argument
+ */
+template <typename F>
+void finite_diff_hessian_times_vector_auto(const F& f, const Eigen::VectorXd& x,
+                                           const Eigen::VectorXd& v, double& fx,
+                                           Eigen::VectorXd& hvp) {
+  fx = f(x);
+
+  double epsilon = std::sqrt(EPSILON) * (1 + x.norm()) / v.norm();
+
+  Eigen::VectorXd v_eps = epsilon * v;
+
+  int d = x.size();
+  double tmp;
+  Eigen::VectorXd grad_forward(d);
+  gradient(f, x + v_eps, tmp, grad_forward);
+
+  Eigen::VectorXd grad_backward(d);
+  gradient(f, x - v_eps, tmp, grad_backward);
+
+  hvp.resize(d);
+  hvp = (grad_forward - grad_backward) / (2 * epsilon);
+}
+}  // namespace internal
+}  // namespace math
+}  // namespace stan
+#endif
diff --git a/test/unit/math/rev/functor/finite_diff_hessian_times_vector_test.cpp b/test/unit/math/rev/functor/finite_diff_hessian_times_vector_test.cpp
@@ -0,0 +1,81 @@
+#include <stan/math/rev.hpp>
+#include <gtest/gtest.h>
+#include <test/unit/math/rev/fun/util.hpp>
+#include <iostream>
+#include <stdexcept>
+#include <vector>
+
+using Eigen::Dynamic;
+using Eigen::Matrix;
+
+namespace finite_diff_hessian_times_vector_test {
+// fun1(x, y) = (x^2 * y) + (3 * y^2)
+struct fun1 {
+  template <typename T>
+  inline T operator()(const Matrix<T, Dynamic, 1>& x) const {
+    return x(0) * x(0) * x(1) + 3.0 * x(1) * x(1);
+  }
+};
+
+struct fun2 {
+  // fun2(x, y) = (x^2 * y) + (3 * y^2) + (5 * x * y) + sin(x)
+  // d/dx fun2(x, y) = (2 * x * y) + (5 * y) + cos(x)
+  // d/dy fun2(x, y) = (x^2) + (6 * y) + (5 * x)
+  // d^2/dx^2 fun2(x, y) = (2 * y) - (sin(x))
+  // d^2/dydx fun2(x, y) = (2 * x) + 5
+  // d^2/dxdy fun2(x, y) = (2 * x) + 5
+  // d^2/dy^2 fun2(x, y) = 6
+  template <typename T>
+  inline T operator()(const Matrix<T, Dynamic, 1>& x) const {
+    using std::sin;
+    return x(0) * x(0) * x(1) + 3.0 * x(1) * x(1) + 5.0 * x(0) * x(1)
+           + sin(x(0));
+  }
+};
+
+TEST(RevFunctor, finiteDiffHessianTimesVector) {
+  using stan::math::internal::finite_diff_hessian_times_vector_auto;
+
+  fun1 f;
+
+  Matrix<double, Dynamic, 1> x(2);
+  x << 2, -3;
+
+  Matrix<double, Dynamic, 1> v(2);
+  v << 8, 5;
+
+  Matrix<double, Dynamic, 1> Hv;
+  double fx;
+  finite_diff_hessian_times_vector_auto(f, x, v, fx, Hv);
+
+  EXPECT_FLOAT_EQ(2 * 2 * -3 + 3.0 * -3 * -3, fx);
+
+  EXPECT_EQ(2, Hv.size());
+  EXPECT_FLOAT_EQ(2 * x(1) * v(0) + 2 * x(0) * v(1), Hv(0));
+  EXPECT_FLOAT_EQ(2 * x(0) * v(0) + 6 * v(1), Hv(1));
+}
+
+TEST(RevFunctor, finiteDiffHessianTimesVector2) {
+  using stan::math::internal::finite_diff_hessian_times_vector_auto;
+
+  fun2 f;
+
+  Matrix<double, Dynamic, 1> x(2);
+  x << 13, -4;
+
+  Matrix<double, Dynamic, 1> v(2);
+  v << 10, 0.2;
+
+  Matrix<double, Dynamic, 1> Hv;
+  double fx;
+  finite_diff_hessian_times_vector_auto(f, x, v, fx, Hv);
+
+  EXPECT_FLOAT_EQ(13 * 13 * -4 + 3 * -4 * -4 + 5 * 13 * -4 + std::sin(13), fx);
+
+  EXPECT_EQ(2, Hv.size());
+  EXPECT_FLOAT_EQ((2 * x(1) - std::sin(x(0))) * v(0) + (2 * x(0) + 5) * v(1),
+                  Hv(0));
+  EXPECT_FLOAT_EQ((2 * x(0) + 5) * v(0) + 6 * v(1), Hv(1));
+}
+
+}  // namespace finite_diff_hessian_times_vector_test