2323/* *********************************************************************************/
2424
2525#include < cuda/cuda_vector.hpp>
26+ #include < cuda/cuda_matrix.hpp>
27+ #include < cuda/kernels/bin_search.cuh>
2628#include < core/error.hpp>
2729#include < utils/data_utils.hpp>
2830#include < limits>
2931
3032namespace cubool {
3133
3234 CudaVector::CudaVector (size_t nrows, CudaInstance &instance)
33- : mVectorImpl (nrows), mInstance (instance) {
35+ : mVectorImpl (nrows), mInstance (instance) {
3436
3537 }
3638
@@ -71,7 +73,7 @@ namespace cubool {
7173 }
7274
7375 void CudaVector::extractSubVector (const VectorBase &otherBase, index i, index nrows, bool checkTime) {
74- const auto * v = dynamic_cast <const CudaVector*>(&otherBase);
76+ const auto * v = dynamic_cast <const CudaVector *>(&otherBase);
7577
7678 CHECK_RAISE_ERROR (v != nullptr , InvalidArgument, " Passed vector does not belong to cuda vector class" );
7779
@@ -92,23 +94,23 @@ namespace cubool {
9294 return ;
9395 }
9496
95- auto & vec = v->mVectorImpl ;
97+ auto & vec = v->mVectorImpl ;
9698
9799 thrust::device_vector<index, DeviceAlloc<index>> region (2 );
98100 thrust::fill_n (region.begin (), 1 , std::numeric_limits<index>::max ());
99101 thrust::fill_n (region.begin () + 1 , 1 , 0 );
100102
101103 thrust::for_each (thrust::counting_iterator<index>(0 ), thrust::counting_iterator<index>(vec.m_vals ),
102104 [first = region.data (), size = region.data () + 1 ,
103- i = i, last = i + nrows, rowIndex = vec.m_rows_index .data ()]
104- __device__ (index id) {
105- auto rowId = rowIndex[id];
105+ i = i, last = i + nrows, rowIndex = vec.m_rows_index .data ()]
106+ __device__ (index id) {
107+ auto rowId = rowIndex[id];
106108
107- if (i <= rowId && rowId < last) {
108- atomicAdd (size.get (), 1 );
109- atomicMin (first.get (), id);
110- }
111- });
109+ if (i <= rowId && rowId < last) {
110+ atomicAdd (size.get (), 1 );
111+ atomicMin (first.get (), id);
112+ }
113+ });
112114
113115 index resultSize = region.back ();
114116
@@ -122,14 +124,91 @@ namespace cubool {
122124 index firstToCopy = region.front ();
123125
124126 VectorImplType::container_type result (resultSize);
125- thrust::copy (vec.m_rows_index .begin () + firstToCopy, vec.m_rows_index .begin () + firstToCopy + resultSize, result.begin ());
127+ thrust::copy (vec.m_rows_index .begin () + firstToCopy, vec.m_rows_index .begin () + firstToCopy + resultSize,
128+ result.begin ());
126129
127130 // Update this impl data
128131 mVectorImpl = std::move (VectorImplType (std::move (result), nrows, resultSize));
129132 }
130133
134+ void CudaVector::extractRow (const class MatrixBase &matrixBase, index i) {
135+ auto matrix = dynamic_cast <const CudaMatrix *>(&matrixBase);
136+
137+ CHECK_RAISE_ERROR (matrix != nullptr , InvalidArgument, " Provided matrix does not belongs to cuda matrix class" );
138+
139+ assert (getNrows () == matrix->getNcols ());
140+ assert (i <= matrix->getNrows ());
141+
142+ auto &m = matrix->mMatrixImpl ;
143+
144+ index beginOffset = m.m_row_index [i];
145+ index endOffset = m.m_row_index [i + 1 ];
146+
147+ auto size = endOffset - beginOffset;
148+ auto being = m.m_col_index .begin () + beginOffset;
149+ auto end = m.m_col_index .begin () + endOffset;
150+
151+ VectorImplType::container_type result (size);
152+ thrust::copy (being, end, result.begin ());
153+
154+ mVectorImpl = std::move (VectorImplType (std::move (result), m.m_cols , size));
155+ }
156+
157+ void CudaVector::extractCol (const class MatrixBase &matrixBase, index j) {
158+ auto matrix = dynamic_cast <const CudaMatrix *>(&matrixBase);
159+
160+ CHECK_RAISE_ERROR (matrix != nullptr , InvalidArgument, " Provided matrix does not belongs to cuda matrix class" );
161+
162+ assert (getNrows () == matrix->getNrows ());
163+ assert (j <= matrix->getNcols ());
164+
165+ auto &m = matrix->mMatrixImpl ;
166+
167+ VectorImplType::container_type nnz (1 );
168+ thrust::fill (nnz.begin (), nnz.end (), (index) 0 );
169+
170+ thrust::for_each (thrust::counting_iterator<index>(0 ), thrust::counting_iterator<index>(m.m_rows ),
171+ [rowOffset = m.m_row_index .data (), colIndex = m.m_col_index .data (),
172+ j, nnz = nnz.data ()]__device__ (index i) {
173+ auto size = rowOffset[i + 1 ] - rowOffset[i];
174+ auto begin = colIndex + rowOffset[i];
175+ auto end = colIndex + rowOffset[i + 1 ];
176+
177+ auto r = kernels::find<index>(begin, end, j);
178+
179+ if (r != end && *r == j)
180+ atomicAdd (nnz.get (), 1 );
181+ }
182+ );
183+
184+ index size = nnz.back ();
185+ VectorImplType::container_type result (size);
186+
187+ thrust::fill (nnz.begin (), nnz.end (), (index) 0 );
188+
189+ thrust::for_each (thrust::counting_iterator<index>(0 ), thrust::counting_iterator<index>(m.m_rows ),
190+ [rowOffset = m.m_row_index .data (), colIndex = m.m_col_index .data (),
191+ j, nnz = nnz.data (), result = result.data ()]__device__ (index i) {
192+ auto size = rowOffset[i + 1 ] - rowOffset[i];
193+ auto begin = colIndex + rowOffset[i];
194+ auto end = colIndex + rowOffset[i + 1 ];
195+
196+ auto r = kernels::find<index>(begin, end, j);
197+
198+ if (r != end && *r == j) {
199+ auto order = atomicAdd (nnz.get (), 1 );
200+ result[order] = i;
201+ }
202+ }
203+ );
204+
205+ thrust::sort (result.begin (), result.end ());
206+
207+ mVectorImpl = std::move (VectorImplType (std::move (result), m.m_rows , size));
208+ }
209+
131210 void CudaVector::clone (const VectorBase &otherBase) {
132- auto other = dynamic_cast <const CudaVector*>(&otherBase);
211+ auto other = dynamic_cast <const CudaVector *>(&otherBase);
133212
134213 CHECK_RAISE_ERROR (other != nullptr , InvalidArgument, " Passed vector does not belong to vector class" );
135214 CHECK_RAISE_ERROR (other != this , InvalidArgument, " Vectors must differ" );
0 commit comments