Skip to content

Commit 7cafa74

Browse files
committed
use reference instead of copy for several command queue calls for OpenCL
1 parent 48d79e7 commit 7cafa74

3 files changed

Lines changed: 16 additions & 16 deletions

File tree

stan/math/opencl/copy.hpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ inline auto from_matrix_cl(const T& src) {
9797
} else {
9898
try {
9999
cl::Event copy_event;
100-
const cl::CommandQueue queue = opencl_context.queue();
100+
const cl::CommandQueue& queue = opencl_context.queue();
101101
std::vector<cl::Event> copy_write_events(src.write_events().begin(),
102102
src.write_events().end());
103103
queue.enqueueReadBuffer(src.buffer(), opencl_context.in_order(), 0,
@@ -152,7 +152,7 @@ inline T_dst from_matrix_cl(const matrix_cl<T>& src) {
152152
"dst.cols()", 1);
153153
try {
154154
cl::Event copy_event;
155-
const cl::CommandQueue queue = opencl_context.queue();
155+
const cl::CommandQueue& queue = opencl_context.queue();
156156
std::vector<cl::Event> copy_write_events(src.write_events().begin(),
157157
src.write_events().end());
158158
queue.enqueueReadBuffer(src.buffer(), opencl_context.in_order(), 0,
@@ -186,7 +186,7 @@ inline T_dst from_matrix_cl(const matrix_cl<T>& src) {
186186
}
187187
try {
188188
cl::Event copy_event;
189-
const cl::CommandQueue queue = opencl_context.queue();
189+
const cl::CommandQueue& queue = opencl_context.queue();
190190
std::vector<cl::Event> copy_write_events(src.write_events().begin(),
191191
src.write_events().end());
192192
queue.enqueueReadBuffer(src.buffer(), opencl_context.in_order(), 0,
@@ -257,7 +257,7 @@ inline auto packed_copy(const T& src) {
257257
return dst;
258258
}
259259
try {
260-
const cl::CommandQueue queue = opencl_context.queue();
260+
const cl::CommandQueue& queue = opencl_context.queue();
261261
matrix_cl<T_val> packed(packed_size, 1);
262262
stan::math::opencl_kernels::pack(cl::NDRange(src.rows(), src.rows()),
263263
packed, src, src.rows(), src.rows(),
@@ -310,7 +310,7 @@ inline matrix_cl<Vec_scalar> packed_copy(Vec&& src, int rows) {
310310
try {
311311
matrix_cl<Vec_scalar> packed(packed_size, 1);
312312
cl::Event packed_event;
313-
const cl::CommandQueue queue = opencl_context.queue();
313+
const cl::CommandQueue& queue = opencl_context.queue();
314314
queue.enqueueWriteBuffer(
315315
packed.buffer(),
316316
opencl_context.in_order() || std::is_rvalue_reference<Vec&&>::value, 0,

stan/math/opencl/opencl_context.hpp

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,7 @@ class opencl_context_base {
161161
// the device
162162
bool in_order_; // Whether to use out of order execution.
163163
// Holds Default parameter values for each Kernel.
164-
using map_base_opts = std::map<std::string, int>;
164+
using map_base_opts = std::unordered_map<std::string, int>;
165165
map_base_opts base_opts_
166166
= {{"LOWER", static_cast<int>(matrix_cl_view::Lower)},
167167
{"UPPER", static_cast<int>(matrix_cl_view::Upper)},
@@ -194,7 +194,7 @@ class opencl_context_base {
194194
} tuning_opts_;
195195

196196
protected:
197-
static opencl_context_base& getInstance() {
197+
static opencl_context_base& getInstance() noexcept {
198198
static opencl_context_base instance_;
199199
return instance_;
200200
}
@@ -352,21 +352,21 @@ class opencl_context {
352352
* objects. For stan, there should only be one context, queue, device, and
353353
* program with multiple kernels.
354354
*/
355-
inline cl::Context& context() {
355+
inline cl::Context& context() noexcept {
356356
return opencl_context_base::getInstance().context_;
357357
}
358358
/** \ingroup opencl_context_group
359359
* Returns the reference to the active OpenCL command queue for the device.
360360
* One command queue will exist per device where
361361
* kernels are placed on the command queue and by default executed in order.
362362
*/
363-
inline cl::CommandQueue& queue() {
363+
inline cl::CommandQueue& queue() noexcept {
364364
return opencl_context_base::getInstance().command_queue_;
365365
}
366366
/** \ingroup opencl_context_group
367367
* Returns a copy of the map of kernel defines
368368
*/
369-
inline opencl_context_base::map_base_opts& base_opts() {
369+
inline opencl_context_base::map_base_opts& base_opts() noexcept {
370370
return opencl_context_base::getInstance().base_opts_;
371371
}
372372
/** \ingroup opencl_context_group
@@ -376,35 +376,35 @@ class opencl_context {
376376
* max workgroup of 256 would allow thread blocks of sizes (16,16), (128,2),
377377
* (8, 32), etc.
378378
*/
379-
inline int max_thread_block_size() {
379+
inline int max_thread_block_size() noexcept {
380380
return opencl_context_base::getInstance().max_thread_block_size_;
381381
}
382382

383383
/** \ingroup opencl_context_group
384384
* Returns the thread block size for the Cholesky Decompositions L_11.
385385
*/
386-
inline opencl_context_base::tuning_struct& tuning_opts() {
386+
inline opencl_context_base::tuning_struct& tuning_opts() noexcept {
387387
return opencl_context_base::getInstance().tuning_opts_;
388388
}
389389

390390
/** \ingroup opencl_context_group
391391
* Returns a vector containing the OpenCL device used to create the context
392392
*/
393-
inline std::vector<cl::Device>& device() {
393+
inline std::vector<cl::Device>& device() noexcept {
394394
return opencl_context_base::getInstance().device_;
395395
}
396396

397397
/** \ingroup opencl_context_group
398398
* Returns a vector containing the OpenCL platform used to create the context
399399
*/
400-
inline std::vector<cl::Platform>& platform() {
400+
inline std::vector<cl::Platform>& platform() noexcept {
401401
return opencl_context_base::getInstance().platform_;
402402
}
403403
/** \ingroup opencl_context_group
404404
* Return a bool representing whether the write to the OpenCL device are
405405
* blocking
406406
*/
407-
inline bool& in_order() {
407+
inline bool& in_order() noexcept {
408408
return opencl_context_base::getInstance().in_order_;
409409
}
410410

stan/math/opencl/zeros_strict_tri.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ inline void matrix_cl<T>::zeros_strict_tri() try {
4444
return;
4545
}
4646
this->view_ = both(this->view_, invert(matrix_view));
47-
cl::CommandQueue cmdQueue = opencl_context.queue();
47+
cl::CommandQueue& cmdQueue = opencl_context.queue();
4848
opencl_kernels::fill_strict_tri(cl::NDRange(this->rows(), this->cols()),
4949
*this, 0.0, this->rows(), this->cols(),
5050
matrix_view);

0 commit comments

Comments
 (0)