2525#include < cuda/matrix_csr.hpp>
2626#include < core/error.hpp>
2727#include < utils/exclusive_scan.hpp>
28+ #include < utils/timer.hpp>
2829#include < algorithm>
2930
3031namespace cubool {
@@ -38,131 +39,6 @@ namespace cubool {
3839 RAISE_ERROR (NotImplemented, " This function is not supported for this matrix class" );
3940 }
4041
41- void MatrixCsr::build (const index *rows, const index *cols, size_t nvals, bool isSorted, bool noDuplicates) {
42- if (nvals == 0 ) {
43- mMatrixImpl .zero_dim (); // no content, empty matrix
44- return ;
45- }
46-
47- thrust::host_vector<index, HostAlloc<index>> rowOffsets;
48- rowOffsets.resize (getNrows () + 1 , 0 );
49-
50- thrust::host_vector<index, HostAlloc<index>> colIndices;
51- colIndices.resize (nvals);
52-
53- // Compute nnz per row
54- for (size_t idx = 0 ; idx < nvals; idx++) {
55- index i = rows[idx];
56- index j = cols[idx];
57-
58- CHECK_RAISE_ERROR (i < getNrows () && j < getNcols (), InvalidArgument, " Out of matrix bounds value" );
59-
60- rowOffsets[i] += 1 ;
61- }
62-
63- // Exclusive scan to eval rows offsets
64- ::cubool::exclusive_scan (rowOffsets.begin(), rowOffsets.end(), 0);
65-
66- // Write offsets for cols
67- std::vector<size_t > writeOffsets (getNrows (), 0 );
68-
69- for (size_t idx = 0 ; idx < nvals; idx++) {
70- index i = rows[idx];
71- index j = cols[idx];
72-
73- colIndices[rowOffsets[i] + writeOffsets[i]] = j;
74- writeOffsets[i] += 1 ;
75- }
76-
77- if (!isSorted) {
78- for (size_t i = 0 ; i < getNrows (); i++) {
79- auto begin = rowOffsets[i];
80- auto end = rowOffsets[i + 1 ];
81-
82- // Sort col values within row
83- thrust::sort (colIndices.begin () + begin, colIndices.begin () + end, [](const index& a, const index& b) {
84- return a < b;
85- });
86- }
87- }
88-
89- // Reduce duplicated values
90- if (!noDuplicates) {
91- size_t unique = 0 ;
92- for (size_t i = 0 ; i < getNrows (); i++) {
93- index prev = std::numeric_limits<index>::max ();
94-
95- for (size_t k = rowOffsets[i]; k < rowOffsets[i + 1 ]; k++) {
96- if (prev != colIndices[k]) {
97- unique += 1 ;
98- }
99-
100- prev = colIndices[k];
101- }
102- }
103-
104- thrust::host_vector<index, HostAlloc<index>> rowOffsetsReduced;
105- rowOffsetsReduced.resize (getNrows () + 1 , 0 );
106-
107- thrust::host_vector<index, HostAlloc<index>> colIndicesReduced;
108- colIndicesReduced.reserve (unique);
109-
110- for (size_t i = 0 ; i < getNrows (); i++) {
111- index prev = std::numeric_limits<index>::max ();
112-
113- for (size_t k = rowOffsets[i]; k < rowOffsets[i + 1 ]; k++) {
114- if (prev != colIndices[k]) {
115- rowOffsetsReduced[i] += 1 ;
116- colIndicesReduced.push_back (colIndices[k]);
117- }
118-
119- prev = colIndices[k];
120- }
121- }
122-
123- // Exclusive scan to eval rows offsets
124- ::cubool::exclusive_scan (rowOffsetsReduced.begin(), rowOffsetsReduced.end(), 0);
125-
126- // Now result in respective place
127- std::swap (rowOffsets, rowOffsetsReduced);
128- std::swap (colIndices, colIndicesReduced);
129- }
130-
131- // Create device buffers and copy data from the cpu side
132- thrust::device_vector<index, DeviceAlloc<index>> rowsDeviceVec = rowOffsets;
133- thrust::device_vector<index, DeviceAlloc<index>> colsDeviceVec = colIndices;
134-
135- // Move actual data to the matrix implementation
136- mMatrixImpl = std::move (MatrixImplType (std::move (colsDeviceVec), std::move (rowsDeviceVec), getNrows (), getNcols (), colIndices.size ()));
137- }
138-
139- void MatrixCsr::extract (index *rows, index *cols, size_t &nvals) {
140- assert (nvals >= getNvals ());
141-
142- // Set nvals to the exact number of nnz values
143- nvals = getNvals ();
144-
145- if (nvals > 0 ) {
146- auto & rowsDeviceVec = mMatrixImpl .m_row_index ;
147- auto & colsDeviceVec = mMatrixImpl .m_col_index ;
148-
149- // Copy data to the host
150- thrust::host_vector<index, HostAlloc<index>> rowsVec = rowsDeviceVec;
151- thrust::host_vector<index, HostAlloc<index>> colsVec = colsDeviceVec;
152-
153- // Iterate over csr formatted data
154- size_t idx = 0 ;
155- for (index i = 0 ; i < getNrows (); i++) {
156- for (index j = rowsVec[i]; j < rowsVec[i + 1 ]; j++) {
157- rows[idx] = i;
158- cols[idx] = colsVec[j];
159-
160- idx += 1 ;
161- }
162- }
163- }
164- }
165-
16642 void MatrixCsr::clone (const MatrixBase &otherBase) {
16743 auto other = dynamic_cast <const MatrixCsr*>(&otherBase);
16844
@@ -190,6 +66,16 @@ namespace cubool {
19066 }
19167 }
19268
69+ void MatrixCsr::clearAndResizeStorageToDim () const {
70+ if (mMatrixImpl .m_vals > 0 ) {
71+ // Release only if have some nnz values
72+ mMatrixImpl .zero_dim ();
73+ }
74+
75+ // Normally resize if no storage is actually allocated
76+ this ->resizeStorageToDim ();
77+ }
78+
19379 index MatrixCsr::getNrows () const {
19480 return mNrows ;
19581 }
@@ -210,4 +96,24 @@ namespace cubool {
21096 return mMatrixImpl .m_vals == 0 ;
21197 }
21298
99+ void MatrixCsr::transferToDevice (const std::vector<index> &rowOffsets, const std::vector<index> &colIndices) {
100+ // Create device buffers and copy data from the cpu side
101+ thrust::device_vector<index, DeviceAlloc<index>> rowsDeviceVec (rowOffsets.size ());
102+ thrust::device_vector<index, DeviceAlloc<index>> colsDeviceVec (colIndices.size ());
103+
104+ thrust::copy (rowOffsets.begin (), rowOffsets.end (), rowsDeviceVec.begin ());
105+ thrust::copy (colIndices.begin (), colIndices.end (), colsDeviceVec.begin ());
106+
107+ // Move actual data to the matrix implementation
108+ mMatrixImpl = std::move (MatrixImplType (std::move (colsDeviceVec), std::move (rowsDeviceVec), getNrows (), getNcols (), colIndices.size ()));
109+ }
110+
111+ void MatrixCsr::transferFromDevice (std::vector<index> &rowOffsets, std::vector<index> &colIndices) const {
112+ rowOffsets.resize (mMatrixImpl .m_row_index .size ());
113+ colIndices.resize (mMatrixImpl .m_col_index .size ());
114+
115+ thrust::copy (mMatrixImpl .m_row_index .begin (), mMatrixImpl .m_row_index .end (), rowOffsets.begin ());
116+ thrust::copy (mMatrixImpl .m_col_index .begin (), mMatrixImpl .m_col_index .end (), colIndices.begin ());
117+ }
118+
213119}
0 commit comments