Skip to content

Commit df7abfb

Browse files
committed
Fix a few more bugs
1 parent 32595af commit df7abfb

1 file changed

Lines changed: 20 additions & 11 deletions

File tree

include/xtensor/xassign.hpp

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1155,7 +1155,7 @@ namespace xt
11551155
step_dim = cut;
11561156
}
11571157
#if defined(XTENSOR_USE_OPENMP) && defined(strided_parallel_assign)
1158-
if (outer_loop_size >= XTENSOR_OPENMP_THRESHOLD / inner_loop_size)
1158+
if (outer_loop_size >= XTENSOR_OPENMP_TRESHOLD / inner_loop_size)
11591159
{
11601160
std::size_t first_step = true;
11611161
#pragma omp parallel for schedule(static) firstprivate(first_step, fct_stepper, res_stepper, idx)
@@ -1241,9 +1241,7 @@ namespace xt
12411241

12421242
for (std::size_t i = 0; i < simd_size; ++i)
12431243
{
1244-
res_stepper.template store_simd(
1245-
fct_stepper.template step_simd<value_type>()
1246-
);
1244+
res_stepper.template store_simd(fct_stepper.template step_simd<value_type>());
12471245
}
12481246
for (std::size_t i = 0; i < simd_rest; ++i)
12491247
{
@@ -1257,13 +1255,24 @@ namespace xt
12571255
? strided_assign_detail::idx_tools<layout_type::row_major>::next_idx(idx, max_shape)
12581256
: strided_assign_detail::idx_tools<layout_type::column_major>::next_idx(idx, max_shape);
12591257

1260-
// need to step E1 as well if not contigous assign (e.g. view)
12611258
fct_stepper.to_begin();
1262-
res_stepper.to_begin();
1263-
for (std::size_t i = 0; i < idx.size(); ++i)
1259+
1260+
// need to step E1 as well if not contigous assign (e.g. view)
1261+
if (!E1::contiguous_layout)
12641262
{
1265-
fct_stepper.step(i + step_dim, idx[i]);
1266-
res_stepper.step(i + step_dim, idx[i]);
1263+
res_stepper.to_begin();
1264+
for (std::size_t i = 0; i < idx.size(); ++i)
1265+
{
1266+
fct_stepper.step(i + step_dim, idx[i]);
1267+
res_stepper.step(i + step_dim, idx[i]);
1268+
}
1269+
}
1270+
else
1271+
{
1272+
for (std::size_t i = 0; i < idx.size(); ++i)
1273+
{
1274+
fct_stepper.step(i + step_dim, idx[i]);
1275+
}
12671276
}
12681277
}
12691278
},
@@ -1315,12 +1324,12 @@ namespace xt
13151324
}
13161325
#endif
13171326
}
1327+
13181328
template <>
13191329
template <class E1, class E2>
13201330
inline void strided_loop_assigner<true>::run(E1& e1, const E2& e2)
13211331
{
1322-
strided_assign_detail::loop_sizes_t
1323-
loop_sizes = strided_loop_assigner<true>::get_loop_sizes(e1, e2);
1332+
strided_assign_detail::loop_sizes_t loop_sizes = strided_loop_assigner<true>::get_loop_sizes(e1, e2);
13241333
if (loop_sizes.can_do_strided_assign)
13251334
{
13261335
run(e1, e2, loop_sizes);

0 commit comments

Comments
 (0)