We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 7d22eac commit 8809be2Copy full SHA for 8809be2
1 file changed
dpctl/tensor/libtensor/include/kernels/copy_as_contiguous.hpp
@@ -31,6 +31,7 @@
31
#include "dpctl_tensor_types.hpp"
32
#include "kernels/alignment.hpp"
33
#include "utils/offset_utils.hpp"
34
+#include "utils/sycl_utils.hpp"
35
#include "utils/type_utils.hpp"
36
37
namespace dpctl
@@ -42,6 +43,8 @@ namespace kernels
42
43
namespace copy_as_contig
44
{
45
46
+using dpctl::tensor::sycl_utils::sub_group_store;
47
+
48
template <typename T,
49
typename IndexerT,
50
std::uint8_t vec_sz = 4u,
@@ -113,7 +116,7 @@ class CopyAsCContigFunctor
113
116
const ssize_t src_offset = src_indexer(elem_id);
114
117
dst_vec[k] = src_p[src_offset];
115
118
}
- sg.store<vec_sz>(dst_multi_ptr, dst_vec);
119
+ sub_group_store<vec_sz>(sg, dst_vec, dst_multi_ptr);
120
121
122
else {
0 commit comments