fix assignment for nullptr var_value<matrix> and for assigning expressions

SteveBronder · SteveBronder · commit 0426e8a3755e · 2023-11-27T17:49:51.000-05:00
diff --git a/stan/math/rev/core/arena_matrix.hpp b/stan/math/rev/core/arena_matrix.hpp
@@ -128,6 +128,10 @@ class arena_matrix : public Eigen::Map<MatrixType> {
     Base::operator=(a);
     return *this;
   }
+  template <typename T>
+  void hard_copy(const T& x) {
+    Base::operator=(x);
+  }
 };
 
 }  // namespace math
diff --git a/stan/math/rev/core/var.hpp b/stan/math/rev/core/var.hpp
@@ -390,6 +390,8 @@ class var_value<T, internal::require_matrix_var_value<T>> {
     reverse_pass_callback(
         [this_vi = this->vi_, other_vi = other.vi_]() mutable {
           other_vi->adj_ += this_vi->adj_;
+          //
+          this_vi->adj_.setZero();
         });
   }
 
@@ -1020,9 +1022,9 @@ class var_value<T, internal::require_matrix_var_value<T>> {
    * @param other the value to assign
    * @return this
    */
-  template <typename S, require_assignable_t<value_type, S>* = nullptr,
-            require_all_plain_type_t<T, S>* = nullptr,
-            require_not_same_t<plain_type_t<T>, plain_type_t<S>>* = nullptr>
+  template <typename S, typename T_ = T, require_assignable_t<value_type, S>* = nullptr,
+            require_all_plain_type_t<T_, S>* = nullptr,
+            require_not_same_t<plain_type_t<T_>, plain_type_t<S>>* = nullptr>
   inline var_value<T>& operator=(const var_value<S>& other) {
     static_assert(
         EIGEN_PREDICATE_SAME_MATRIX_SIZE(T, S),
@@ -1032,16 +1034,63 @@ class var_value<T, internal::require_matrix_var_value<T>> {
   }
 
   /**
-   * Assignment of another var value, when either this or the other one does not
+   * Assignment of another var value, when the `this` does not
    * contain a plain type.
-   * @tparam S type of the value in the `var_value` to assing
+   * @tparam S type of the value in the `var_value` to assign
+   * @param other the value to assign
+   * @return this
+   */
+  template <typename S, typename T_ = T,
+            require_assignable_t<value_type, S>* = nullptr,
+            require_not_plain_type_t<S>* = nullptr,
+            require_plain_type_t<T_>* = nullptr>
+  inline var_value<T>& operator=(const var_value<S>& other) {
+    // If vi_ is nullptr then the var needs initialized via copy constructor
+    if (!(this->vi_)) {
+      *this = var_value<T>(other);
+      return *this;
+    } 
+    arena_t<plain_type_t<T>> prev_val(vi_->val_.rows(), vi_->val_.cols());
+    prev_val.hard_copy(vi_->val_);
+    vi_->val_.hard_copy(other.val());
+    // no need to change any adjoints - these are just zeros before the reverse
+    // pass
+
+    reverse_pass_callback(
+        [this_vi = this->vi_, other_vi = other.vi_, prev_val]() mutable {
+          this_vi->val_.hard_copy(prev_val);
+
+          // we have no way of detecting aliasing between this->vi_->adj_ and
+          // other.vi_->adj_, so we must copy adjoint before reseting to zero
+
+          // we can reuse prev_val instead of allocating a new matrix
+          prev_val.hard_copy(this_vi->adj_);
+          this_vi->adj_.setZero();
+          other_vi->adj_ += prev_val;
+        });
+    return *this;
+  }
+  /**
+   * Assignment of another var value, when either both `this` or other does not
+   * contain a plain type.
+   * @tparam S type of the value in the `var_value` to assign
    * @param other the value to assign
    * @return this
    */
   template <typename S, typename T_ = T,
             require_assignable_t<value_type, S>* = nullptr,
-            require_any_not_plain_type_t<T_, S>* = nullptr>
+            require_any_not_plain_type_t<T_, S>* = nullptr,
+            require_not_plain_type_t<T_>* = nullptr>
   inline var_value<T>& operator=(const var_value<S>& other) {
+    // If vi_ is nullptr then the var needs initialized via copy constructor
+    if (!(this->vi_)) {
+      []() STAN_COLD_PATH {
+        throw std::domain_error(
+          "var_value<matrix>::operator=(var_value<expression>):"
+          " Internal Bug! Please report this with an example"
+          " of your model to the Stan math github repository.");
+      }();
+    } 
     arena_t<plain_type_t<T>> prev_val = vi_->val_;
     vi_->val_ = other.val();
     // no need to change any adjoints - these are just zeros before the reverse
@@ -1055,13 +1104,14 @@ class var_value<T, internal::require_matrix_var_value<T>> {
           // other.vi_->adj_, so we must copy adjoint before reseting to zero
 
           // we can reuse prev_val instead of allocating a new matrix
-          prev_val = this_vi->adj_;
+          prev_val.hard_copy(this_vi->adj_);
           this_vi->adj_.setZero();
           other_vi->adj_ += prev_val;
         });
     return *this;
   }
 
+
   /**
    * No-op to match with Eigen methods which call eval
    */
diff --git a/test/unit/math/rev/core/var_test.cpp b/test/unit/math/rev/core/var_test.cpp
@@ -910,3 +910,49 @@ TEST_F(AgradRev, matrix_compile_time_conversions) {
   EXPECT_MATRIX_FLOAT_EQ(colvec.val(), rowvec.val());
   EXPECT_MATRIX_FLOAT_EQ(x11.val(), rowvec.val());
 }
+
+TEST_F(AgradRev, assign_nan) {
+    using stan::math::var_value;
+    using var_vector = var_value<Eigen::Matrix<double,-1,1>>;
+    using stan::math::var;
+    Eigen::VectorXd x_val(10);
+    for (int i = 0; i < 10; ++i) {
+        x_val(i) = i + 0.1;
+    }
+    var_vector x(x_val);
+    var_vector y = var_vector(Eigen::Matrix<double,-1,1>::Constant(10, std::numeric_limits<double>::quiet_NaN()));
+    y = stan::math::head(x, 10);
+    var sigma = 1.0;
+    var lp = stan::math::normal_lpdf<false>(y, 0, sigma);
+    lp.grad();
+    Eigen::VectorXd x_ans_adj(10);
+    for (int i = 0; i < 10; ++i) {
+        x_ans_adj(i) = -(i + 0.1);
+    }
+    EXPECT_MATRIX_EQ(x.adj(), x_ans_adj);
+    Eigen::VectorXd y_ans_adj = Eigen::VectorXd::Zero(10);
+    EXPECT_MATRIX_EQ(y_ans_adj, y.adj());
+}
+
+TEST_F(AgradRev, assign_nullptr_vari) {
+    using stan::math::var_value;
+    using var_vector = var_value<Eigen::Matrix<double,-1,1>>;
+    using stan::math::var;
+    Eigen::VectorXd x_val(10);
+    for (int i = 0; i < 10; ++i) {
+        x_val(i) = i + 0.1;
+    }
+    var_vector x(x_val);
+    var_vector y;
+    y = stan::math::head(x, 10);
+    var sigma = 1.0;
+    var lp = stan::math::normal_lpdf<false>(y, 0, sigma);
+    lp.grad();
+    Eigen::VectorXd x_ans_adj(10);
+    for (int i = 0; i < 10; ++i) {
+        x_ans_adj(i) = -(i + 0.1);
+    }
+    EXPECT_MATRIX_EQ(x.adj(), x_ans_adj);
+    Eigen::VectorXd y_ans_adj = Eigen::VectorXd::Zero(10);
+    EXPECT_MATRIX_EQ(y_ans_adj, y.adj());
+}