//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_COMMAND_QUEUE_HPP #define BOOST_COMPUTE_COMMAND_QUEUE_HPP #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include namespace boost { namespace compute { namespace detail { inline void BOOST_COMPUTE_CL_CALLBACK nullary_native_kernel_trampoline(void *user_func_ptr) { void (*user_func)(); std::memcpy(&user_func, user_func_ptr, sizeof(user_func)); user_func(); } } // end detail namespace /// \class command_queue /// \brief A command queue. /// /// Command queues provide the interface for interacting with compute /// devices. The command_queue class provides methods to copy data to /// and from a compute device as well as execute compute kernels. /// /// Command queues are created for a compute device within a compute /// context. /// /// For example, to create a context and command queue for the default device /// on the system (this is the normal set up code used by almost all OpenCL /// programs): /// \code /// #include /// /// // get the default compute device /// boost::compute::device device = boost::compute::system::default_device(); /// /// // set up a compute context and command queue /// boost::compute::context context(device); /// boost::compute::command_queue queue(context, device); /// \endcode /// /// The default command queue for the system can be obtained with the /// system::default_queue() method. /// /// \see buffer, context, kernel class command_queue { public: enum properties { enable_profiling = CL_QUEUE_PROFILING_ENABLE, enable_out_of_order_execution = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE #ifdef BOOST_COMPUTE_CL_VERSION_2_0 , on_device = CL_QUEUE_ON_DEVICE, on_device_default = CL_QUEUE_ON_DEVICE_DEFAULT #endif }; enum map_flags { map_read = CL_MAP_READ, map_write = CL_MAP_WRITE #ifdef BOOST_COMPUTE_CL_VERSION_1_2 , map_write_invalidate_region = CL_MAP_WRITE_INVALIDATE_REGION #endif }; #ifdef BOOST_COMPUTE_CL_VERSION_1_2 enum mem_migration_flags { migrate_to_host = CL_MIGRATE_MEM_OBJECT_HOST, migrate_content_undefined = CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED }; #endif // BOOST_COMPUTE_CL_VERSION_1_2 /// Creates a null command queue. command_queue() : m_queue(0) { } explicit command_queue(cl_command_queue queue, bool retain = true) : m_queue(queue) { if(m_queue && retain){ clRetainCommandQueue(m_queue); } } /// Creates a command queue in \p context for \p device with /// \p properties. /// /// \see_opencl_ref{clCreateCommandQueue} command_queue(const context &context, const device &device, cl_command_queue_properties properties = 0) { BOOST_ASSERT(device.id() != 0); cl_int error = 0; #ifdef BOOST_COMPUTE_CL_VERSION_2_0 if (device.check_version(2, 0)){ std::vector queue_properties; if(properties){ queue_properties.push_back(CL_QUEUE_PROPERTIES); queue_properties.push_back(cl_queue_properties(properties)); queue_properties.push_back(cl_queue_properties(0)); } const cl_queue_properties *queue_properties_ptr = queue_properties.empty() ? 0 : &queue_properties[0]; m_queue = clCreateCommandQueueWithProperties( context, device.id(), queue_properties_ptr, &error ); } else #endif { // Suppress deprecated declarations warning BOOST_COMPUTE_DISABLE_DEPRECATED_DECLARATIONS(); m_queue = clCreateCommandQueue( context, device.id(), properties, &error ); BOOST_COMPUTE_ENABLE_DEPRECATED_DECLARATIONS(); } if(!m_queue){ BOOST_THROW_EXCEPTION(opencl_error(error)); } } /// Creates a new command queue object as a copy of \p other. command_queue(const command_queue &other) : m_queue(other.m_queue) { if(m_queue){ clRetainCommandQueue(m_queue); } } /// Copies the command queue object from \p other to \c *this. command_queue& operator=(const command_queue &other) { if(this != &other){ if(m_queue){ clReleaseCommandQueue(m_queue); } m_queue = other.m_queue; if(m_queue){ clRetainCommandQueue(m_queue); } } return *this; } #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES /// Move-constructs a new command queue object from \p other. command_queue(command_queue&& other) BOOST_NOEXCEPT : m_queue(other.m_queue) { other.m_queue = 0; } /// Move-assigns the command queue from \p other to \c *this. command_queue& operator=(command_queue&& other) BOOST_NOEXCEPT { if(m_queue){ clReleaseCommandQueue(m_queue); } m_queue = other.m_queue; other.m_queue = 0; return *this; } #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES /// Destroys the command queue. /// /// \see_opencl_ref{clReleaseCommandQueue} ~command_queue() { if(m_queue){ BOOST_COMPUTE_ASSERT_CL_SUCCESS( clReleaseCommandQueue(m_queue) ); } } /// Returns the underlying OpenCL command queue. cl_command_queue& get() const { return const_cast(m_queue); } /// Returns the device that the command queue issues commands to. device get_device() const { return device(get_info(CL_QUEUE_DEVICE)); } /// Returns the context for the command queue. context get_context() const { return context(get_info(CL_QUEUE_CONTEXT)); } /// Returns information about the command queue. /// /// \see_opencl_ref{clGetCommandQueueInfo} template T get_info(cl_command_queue_info info) const { return detail::get_object_info(clGetCommandQueueInfo, m_queue, info); } /// \overload template typename detail::get_object_info_type::type get_info() const; /// Returns the properties for the command queue. cl_command_queue_properties get_properties() const { return get_info(CL_QUEUE_PROPERTIES); } #if defined(BOOST_COMPUTE_CL_VERSION_2_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) /// Returns the current default device command queue for the underlying device. /// /// \opencl_version_warning{2,1} command_queue get_default_device_queue() const { return command_queue(get_info(CL_QUEUE_DEVICE_DEFAULT)); } /// Replaces the default device command queue for the underlying device /// with this command queue. Command queue must have been created /// with CL_QUEUE_ON_DEVICE flag. /// /// \see_opencl21_ref{clSetDefaultDeviceCommandQueue} /// /// \opencl_version_warning{2,1} void set_as_default_device_queue() const { cl_int ret = clSetDefaultDeviceCommandQueue( this->get_context().get(), this->get_device().get(), m_queue ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } } #endif // BOOST_COMPUTE_CL_VERSION_2_1 /// Enqueues a command to read data from \p buffer to host memory. /// /// \see_opencl_ref{clEnqueueReadBuffer} /// /// \see copy() event enqueue_read_buffer(const buffer &buffer, size_t offset, size_t size, void *host_ptr, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(size <= buffer.size()); BOOST_ASSERT(buffer.get_context() == this->get_context()); BOOST_ASSERT(host_ptr != 0); event event_; cl_int ret = clEnqueueReadBuffer( m_queue, buffer.get(), CL_TRUE, offset, size, host_ptr, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } /// Enqueues a command to read data from \p buffer to host memory. The /// copy will be performed asynchronously. /// /// \see_opencl_ref{clEnqueueReadBuffer} /// /// \see copy_async() event enqueue_read_buffer_async(const buffer &buffer, size_t offset, size_t size, void *host_ptr, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(size <= buffer.size()); BOOST_ASSERT(buffer.get_context() == this->get_context()); BOOST_ASSERT(host_ptr != 0); event event_; cl_int ret = clEnqueueReadBuffer( m_queue, buffer.get(), CL_FALSE, offset, size, host_ptr, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } #if defined(BOOST_COMPUTE_CL_VERSION_1_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) /// Enqueues a command to read a rectangular region from \p buffer to /// host memory. /// /// \see_opencl_ref{clEnqueueReadBufferRect} /// /// \opencl_version_warning{1,1} event enqueue_read_buffer_rect(const buffer &buffer, const size_t buffer_origin[3], const size_t host_origin[3], const size_t region[3], size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, void *host_ptr, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(buffer.get_context() == this->get_context()); BOOST_ASSERT(host_ptr != 0); event event_; cl_int ret = clEnqueueReadBufferRect( m_queue, buffer.get(), CL_TRUE, buffer_origin, host_origin, region, buffer_row_pitch, buffer_slice_pitch, host_row_pitch, host_slice_pitch, host_ptr, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } /// Enqueues a command to read a rectangular region from \p buffer to /// host memory. The copy will be performed asynchronously. /// /// \see_opencl_ref{clEnqueueReadBufferRect} /// /// \opencl_version_warning{1,1} event enqueue_read_buffer_rect_async(const buffer &buffer, const size_t buffer_origin[3], const size_t host_origin[3], const size_t region[3], size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, void *host_ptr, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(buffer.get_context() == this->get_context()); BOOST_ASSERT(host_ptr != 0); event event_; cl_int ret = clEnqueueReadBufferRect( m_queue, buffer.get(), CL_FALSE, buffer_origin, host_origin, region, buffer_row_pitch, buffer_slice_pitch, host_row_pitch, host_slice_pitch, host_ptr, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } #endif // BOOST_COMPUTE_CL_VERSION_1_1 /// Enqueues a command to write data from host memory to \p buffer. /// /// \see_opencl_ref{clEnqueueWriteBuffer} /// /// \see copy() event enqueue_write_buffer(const buffer &buffer, size_t offset, size_t size, const void *host_ptr, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(size <= buffer.size()); BOOST_ASSERT(buffer.get_context() == this->get_context()); BOOST_ASSERT(host_ptr != 0); event event_; cl_int ret = clEnqueueWriteBuffer( m_queue, buffer.get(), CL_TRUE, offset, size, host_ptr, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } /// Enqueues a command to write data from host memory to \p buffer. /// The copy is performed asynchronously. /// /// \see_opencl_ref{clEnqueueWriteBuffer} /// /// \see copy_async() event enqueue_write_buffer_async(const buffer &buffer, size_t offset, size_t size, const void *host_ptr, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(size <= buffer.size()); BOOST_ASSERT(buffer.get_context() == this->get_context()); BOOST_ASSERT(host_ptr != 0); event event_; cl_int ret = clEnqueueWriteBuffer( m_queue, buffer.get(), CL_FALSE, offset, size, host_ptr, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } #if defined(BOOST_COMPUTE_CL_VERSION_1_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) /// Enqueues a command to write a rectangular region from host memory /// to \p buffer. /// /// \see_opencl_ref{clEnqueueWriteBufferRect} /// /// \opencl_version_warning{1,1} event enqueue_write_buffer_rect(const buffer &buffer, const size_t buffer_origin[3], const size_t host_origin[3], const size_t region[3], size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, void *host_ptr, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(buffer.get_context() == this->get_context()); BOOST_ASSERT(host_ptr != 0); event event_; cl_int ret = clEnqueueWriteBufferRect( m_queue, buffer.get(), CL_TRUE, buffer_origin, host_origin, region, buffer_row_pitch, buffer_slice_pitch, host_row_pitch, host_slice_pitch, host_ptr, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } /// Enqueues a command to write a rectangular region from host memory /// to \p buffer. The copy is performed asynchronously. /// /// \see_opencl_ref{clEnqueueWriteBufferRect} /// /// \opencl_version_warning{1,1} event enqueue_write_buffer_rect_async(const buffer &buffer, const size_t buffer_origin[3], const size_t host_origin[3], const size_t region[3], size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, void *host_ptr, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(buffer.get_context() == this->get_context()); BOOST_ASSERT(host_ptr != 0); event event_; cl_int ret = clEnqueueWriteBufferRect( m_queue, buffer.get(), CL_FALSE, buffer_origin, host_origin, region, buffer_row_pitch, buffer_slice_pitch, host_row_pitch, host_slice_pitch, host_ptr, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } #endif // BOOST_COMPUTE_CL_VERSION_1_1 /// Enqueues a command to copy data from \p src_buffer to /// \p dst_buffer. /// /// \see_opencl_ref{clEnqueueCopyBuffer} /// /// \see copy() event enqueue_copy_buffer(const buffer &src_buffer, const buffer &dst_buffer, size_t src_offset, size_t dst_offset, size_t size, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(src_offset + size <= src_buffer.size()); BOOST_ASSERT(dst_offset + size <= dst_buffer.size()); BOOST_ASSERT(src_buffer.get_context() == this->get_context()); BOOST_ASSERT(dst_buffer.get_context() == this->get_context()); event event_; cl_int ret = clEnqueueCopyBuffer( m_queue, src_buffer.get(), dst_buffer.get(), src_offset, dst_offset, size, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } #if defined(BOOST_COMPUTE_CL_VERSION_1_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) /// Enqueues a command to copy a rectangular region from /// \p src_buffer to \p dst_buffer. /// /// \see_opencl_ref{clEnqueueCopyBufferRect} /// /// \opencl_version_warning{1,1} event enqueue_copy_buffer_rect(const buffer &src_buffer, const buffer &dst_buffer, const size_t src_origin[3], const size_t dst_origin[3], const size_t region[3], size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(src_buffer.get_context() == this->get_context()); BOOST_ASSERT(dst_buffer.get_context() == this->get_context()); event event_; cl_int ret = clEnqueueCopyBufferRect( m_queue, src_buffer.get(), dst_buffer.get(), src_origin, dst_origin, region, buffer_row_pitch, buffer_slice_pitch, host_row_pitch, host_slice_pitch, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } #endif // BOOST_COMPUTE_CL_VERSION_1_1 #if defined(BOOST_COMPUTE_CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) /// Enqueues a command to fill \p buffer with \p pattern. /// /// \see_opencl_ref{clEnqueueFillBuffer} /// /// \opencl_version_warning{1,2} /// /// \see fill() event enqueue_fill_buffer(const buffer &buffer, const void *pattern, size_t pattern_size, size_t offset, size_t size, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(offset + size <= buffer.size()); BOOST_ASSERT(buffer.get_context() == this->get_context()); event event_; cl_int ret = clEnqueueFillBuffer( m_queue, buffer.get(), pattern, pattern_size, offset, size, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } #endif // BOOST_COMPUTE_CL_VERSION_1_2 /// Enqueues a command to map \p buffer into the host address space. /// Event associated with map operation is returned through /// \p map_buffer_event parameter. /// /// \see_opencl_ref{clEnqueueMapBuffer} void* enqueue_map_buffer(const buffer &buffer, cl_map_flags flags, size_t offset, size_t size, event &map_buffer_event, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(offset + size <= buffer.size()); BOOST_ASSERT(buffer.get_context() == this->get_context()); cl_int ret = 0; void *pointer = clEnqueueMapBuffer( m_queue, buffer.get(), CL_TRUE, flags, offset, size, events.size(), events.get_event_ptr(), &map_buffer_event.get(), &ret ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return pointer; } /// \overload void* enqueue_map_buffer(const buffer &buffer, cl_map_flags flags, size_t offset, size_t size, const wait_list &events = wait_list()) { event event_; return enqueue_map_buffer(buffer, flags, offset, size, event_, events); } /// Enqueues a command to map \p buffer into the host address space. /// Map operation is performed asynchronously. The pointer to the mapped /// region cannot be used until the map operation has completed. /// /// Event associated with map operation is returned through /// \p map_buffer_event parameter. /// /// \see_opencl_ref{clEnqueueMapBuffer} void* enqueue_map_buffer_async(const buffer &buffer, cl_map_flags flags, size_t offset, size_t size, event &map_buffer_event, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(offset + size <= buffer.size()); BOOST_ASSERT(buffer.get_context() == this->get_context()); cl_int ret = 0; void *pointer = clEnqueueMapBuffer( m_queue, buffer.get(), CL_FALSE, flags, offset, size, events.size(), events.get_event_ptr(), &map_buffer_event.get(), &ret ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return pointer; } /// Enqueues a command to unmap \p buffer from the host memory space. /// /// \see_opencl_ref{clEnqueueUnmapMemObject} event enqueue_unmap_buffer(const buffer &buffer, void *mapped_ptr, const wait_list &events = wait_list()) { BOOST_ASSERT(buffer.get_context() == this->get_context()); return enqueue_unmap_mem_object(buffer.get(), mapped_ptr, events); } /// Enqueues a command to unmap \p mem from the host memory space. /// /// \see_opencl_ref{clEnqueueUnmapMemObject} event enqueue_unmap_mem_object(cl_mem mem, void *mapped_ptr, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); event event_; cl_int ret = clEnqueueUnmapMemObject( m_queue, mem, mapped_ptr, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } /// Enqueues a command to read data from \p image to host memory. /// /// \see_opencl_ref{clEnqueueReadImage} event enqueue_read_image(const image_object& image, const size_t *origin, const size_t *region, size_t row_pitch, size_t slice_pitch, void *host_ptr, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); event event_; cl_int ret = clEnqueueReadImage( m_queue, image.get(), CL_TRUE, origin, region, row_pitch, slice_pitch, host_ptr, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } /// \overload template event enqueue_read_image(const image_object& image, const extents origin, const extents region, void *host_ptr, size_t row_pitch = 0, size_t slice_pitch = 0, const wait_list &events = wait_list()) { BOOST_ASSERT(image.get_context() == this->get_context()); size_t origin3[3] = { 0, 0, 0 }; size_t region3[3] = { 1, 1, 1 }; std::copy(origin.data(), origin.data() + N, origin3); std::copy(region.data(), region.data() + N, region3); return enqueue_read_image( image, origin3, region3, row_pitch, slice_pitch, host_ptr, events ); } /// Enqueues a command to write data from host memory to \p image. /// /// \see_opencl_ref{clEnqueueWriteImage} event enqueue_write_image(image_object& image, const size_t *origin, const size_t *region, const void *host_ptr, size_t input_row_pitch = 0, size_t input_slice_pitch = 0, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); event event_; cl_int ret = clEnqueueWriteImage( m_queue, image.get(), CL_TRUE, origin, region, input_row_pitch, input_slice_pitch, host_ptr, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } /// \overload template event enqueue_write_image(image_object& image, const extents origin, const extents region, const void *host_ptr, const size_t input_row_pitch = 0, const size_t input_slice_pitch = 0, const wait_list &events = wait_list()) { BOOST_ASSERT(image.get_context() == this->get_context()); size_t origin3[3] = { 0, 0, 0 }; size_t region3[3] = { 1, 1, 1 }; std::copy(origin.data(), origin.data() + N, origin3); std::copy(region.data(), region.data() + N, region3); return enqueue_write_image( image, origin3, region3, host_ptr, input_row_pitch, input_slice_pitch, events ); } /// Enqueues a command to map \p image into the host address space. /// /// Event associated with map operation is returned through /// \p map_image_event parameter. /// /// \see_opencl_ref{clEnqueueMapImage} void* enqueue_map_image(const image_object &image, cl_map_flags flags, const size_t *origin, const size_t *region, size_t &output_row_pitch, size_t &output_slice_pitch, event &map_image_event, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(image.get_context() == this->get_context()); cl_int ret = 0; void *pointer = clEnqueueMapImage( m_queue, image.get(), CL_TRUE, flags, origin, region, &output_row_pitch, &output_slice_pitch, events.size(), events.get_event_ptr(), &map_image_event.get(), &ret ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return pointer; } /// \overload void* enqueue_map_image(const image_object &image, cl_map_flags flags, const size_t *origin, const size_t *region, size_t &output_row_pitch, size_t &output_slice_pitch, const wait_list &events = wait_list()) { event event_; return enqueue_map_image( image, flags, origin, region, output_row_pitch, output_slice_pitch, event_, events ); } /// \overload template void* enqueue_map_image(image_object& image, cl_map_flags flags, const extents origin, const extents region, size_t &output_row_pitch, size_t &output_slice_pitch, event &map_image_event, const wait_list &events = wait_list()) { BOOST_ASSERT(image.get_context() == this->get_context()); size_t origin3[3] = { 0, 0, 0 }; size_t region3[3] = { 1, 1, 1 }; std::copy(origin.data(), origin.data() + N, origin3); std::copy(region.data(), region.data() + N, region3); return enqueue_map_image( image, flags, origin3, region3, output_row_pitch, output_slice_pitch, map_image_event, events ); } /// \overload template void* enqueue_map_image(image_object& image, cl_map_flags flags, const extents origin, const extents region, size_t &output_row_pitch, size_t &output_slice_pitch, const wait_list &events = wait_list()) { event event_; return enqueue_map_image( image, flags, origin, region, output_row_pitch, output_slice_pitch, event_, events ); } /// Enqueues a command to map \p image into the host address space. /// Map operation is performed asynchronously. The pointer to the mapped /// region cannot be used until the map operation has completed. /// /// Event associated with map operation is returned through /// \p map_image_event parameter. /// /// \see_opencl_ref{clEnqueueMapImage} void* enqueue_map_image_async(const image_object &image, cl_map_flags flags, const size_t *origin, const size_t *region, size_t &output_row_pitch, size_t &output_slice_pitch, event &map_image_event, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(image.get_context() == this->get_context()); cl_int ret = 0; void *pointer = clEnqueueMapImage( m_queue, image.get(), CL_FALSE, flags, origin, region, &output_row_pitch, &output_slice_pitch, events.size(), events.get_event_ptr(), &map_image_event.get(), &ret ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return pointer; } /// \overload template void* enqueue_map_image_async(image_object& image, cl_map_flags flags, const extents origin, const extents region, size_t &output_row_pitch, size_t &output_slice_pitch, event &map_image_event, const wait_list &events = wait_list()) { BOOST_ASSERT(image.get_context() == this->get_context()); size_t origin3[3] = { 0, 0, 0 }; size_t region3[3] = { 1, 1, 1 }; std::copy(origin.data(), origin.data() + N, origin3); std::copy(region.data(), region.data() + N, region3); return enqueue_map_image_async( image, flags, origin3, region3, output_row_pitch, output_slice_pitch, map_image_event, events ); } /// Enqueues a command to unmap \p image from the host memory space. /// /// \see_opencl_ref{clEnqueueUnmapMemObject} event enqueue_unmap_image(const image_object &image, void *mapped_ptr, const wait_list &events = wait_list()) { BOOST_ASSERT(image.get_context() == this->get_context()); return enqueue_unmap_mem_object(image.get(), mapped_ptr, events); } /// Enqueues a command to copy data from \p src_image to \p dst_image. /// /// \see_opencl_ref{clEnqueueCopyImage} event enqueue_copy_image(const image_object& src_image, image_object& dst_image, const size_t *src_origin, const size_t *dst_origin, const size_t *region, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); event event_; cl_int ret = clEnqueueCopyImage( m_queue, src_image.get(), dst_image.get(), src_origin, dst_origin, region, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } /// \overload template event enqueue_copy_image(const image_object& src_image, image_object& dst_image, const extents src_origin, const extents dst_origin, const extents region, const wait_list &events = wait_list()) { BOOST_ASSERT(src_image.get_context() == this->get_context()); BOOST_ASSERT(dst_image.get_context() == this->get_context()); BOOST_ASSERT_MSG(src_image.format() == dst_image.format(), "Source and destination image formats must match."); size_t src_origin3[3] = { 0, 0, 0 }; size_t dst_origin3[3] = { 0, 0, 0 }; size_t region3[3] = { 1, 1, 1 }; std::copy(src_origin.data(), src_origin.data() + N, src_origin3); std::copy(dst_origin.data(), dst_origin.data() + N, dst_origin3); std::copy(region.data(), region.data() + N, region3); return enqueue_copy_image( src_image, dst_image, src_origin3, dst_origin3, region3, events ); } /// Enqueues a command to copy data from \p src_image to \p dst_buffer. /// /// \see_opencl_ref{clEnqueueCopyImageToBuffer} event enqueue_copy_image_to_buffer(const image_object& src_image, memory_object& dst_buffer, const size_t *src_origin, const size_t *region, size_t dst_offset, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); event event_; cl_int ret = clEnqueueCopyImageToBuffer( m_queue, src_image.get(), dst_buffer.get(), src_origin, region, dst_offset, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } /// Enqueues a command to copy data from \p src_buffer to \p dst_image. /// /// \see_opencl_ref{clEnqueueCopyBufferToImage} event enqueue_copy_buffer_to_image(const memory_object& src_buffer, image_object& dst_image, size_t src_offset, const size_t *dst_origin, const size_t *region, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); event event_; cl_int ret = clEnqueueCopyBufferToImage( m_queue, src_buffer.get(), dst_image.get(), src_offset, dst_origin, region, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } #if defined(BOOST_COMPUTE_CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) /// Enqueues a command to fill \p image with \p fill_color. /// /// \see_opencl_ref{clEnqueueFillImage} /// /// \opencl_version_warning{1,2} event enqueue_fill_image(image_object& image, const void *fill_color, const size_t *origin, const size_t *region, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); event event_; cl_int ret = clEnqueueFillImage( m_queue, image.get(), fill_color, origin, region, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } /// \overload template event enqueue_fill_image(image_object& image, const void *fill_color, const extents origin, const extents region, const wait_list &events = wait_list()) { BOOST_ASSERT(image.get_context() == this->get_context()); size_t origin3[3] = { 0, 0, 0 }; size_t region3[3] = { 1, 1, 1 }; std::copy(origin.data(), origin.data() + N, origin3); std::copy(region.data(), region.data() + N, region3); return enqueue_fill_image( image, fill_color, origin3, region3, events ); } /// Enqueues a command to migrate \p mem_objects. /// /// \see_opencl_ref{clEnqueueMigrateMemObjects} /// /// \opencl_version_warning{1,2} event enqueue_migrate_memory_objects(uint_ num_mem_objects, const cl_mem *mem_objects, cl_mem_migration_flags flags, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); event event_; cl_int ret = clEnqueueMigrateMemObjects( m_queue, num_mem_objects, mem_objects, flags, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } #endif // BOOST_COMPUTE_CL_VERSION_1_2 /// Enqueues a kernel for execution. /// /// \see_opencl_ref{clEnqueueNDRangeKernel} event enqueue_nd_range_kernel(const kernel &kernel, size_t work_dim, const size_t *global_work_offset, const size_t *global_work_size, const size_t *local_work_size, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(kernel.get_context() == this->get_context()); event event_; cl_int ret = clEnqueueNDRangeKernel( m_queue, kernel, static_cast(work_dim), global_work_offset, global_work_size, local_work_size, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } /// \overload template event enqueue_nd_range_kernel(const kernel &kernel, const extents &global_work_offset, const extents &global_work_size, const extents &local_work_size, const wait_list &events = wait_list()) { return enqueue_nd_range_kernel( kernel, N, global_work_offset.data(), global_work_size.data(), local_work_size.data(), events ); } /// Convenience method which calls enqueue_nd_range_kernel() with a /// one-dimensional range. event enqueue_1d_range_kernel(const kernel &kernel, size_t global_work_offset, size_t global_work_size, size_t local_work_size, const wait_list &events = wait_list()) { return enqueue_nd_range_kernel( kernel, 1, &global_work_offset, &global_work_size, local_work_size ? &local_work_size : 0, events ); } /// Enqueues a kernel to execute using a single work-item. /// /// \see_opencl_ref{clEnqueueTask} event enqueue_task(const kernel &kernel, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(kernel.get_context() == this->get_context()); event event_; // clEnqueueTask() was deprecated in OpenCL 2.0. In that case we // just forward to the equivalent clEnqueueNDRangeKernel() call. #ifdef BOOST_COMPUTE_CL_VERSION_2_0 size_t one = 1; cl_int ret = clEnqueueNDRangeKernel( m_queue, kernel, 1, 0, &one, &one, events.size(), events.get_event_ptr(), &event_.get() ); #else cl_int ret = clEnqueueTask( m_queue, kernel, events.size(), events.get_event_ptr(), &event_.get() ); #endif if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } /// Enqueues a function to execute on the host. event enqueue_native_kernel(void (BOOST_COMPUTE_CL_CALLBACK *user_func)(void *), void *args, size_t cb_args, uint_ num_mem_objects, const cl_mem *mem_list, const void **args_mem_loc, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); event event_; cl_int ret = clEnqueueNativeKernel( m_queue, user_func, args, cb_args, num_mem_objects, mem_list, args_mem_loc, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } /// Convenience overload for enqueue_native_kernel() which enqueues a /// native kernel on the host with a nullary function. event enqueue_native_kernel(void (BOOST_COMPUTE_CL_CALLBACK *user_func)(void), const wait_list &events = wait_list()) { return enqueue_native_kernel( detail::nullary_native_kernel_trampoline, reinterpret_cast(&user_func), sizeof(user_func), 0, 0, 0, events ); } /// Flushes the command queue. /// /// \see_opencl_ref{clFlush} void flush() { BOOST_ASSERT(m_queue != 0); cl_int ret = clFlush(m_queue); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } } /// Blocks until all outstanding commands in the queue have finished. /// /// \see_opencl_ref{clFinish} void finish() { BOOST_ASSERT(m_queue != 0); cl_int ret = clFinish(m_queue); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } } /// Enqueues a barrier in the queue. void enqueue_barrier() { BOOST_ASSERT(m_queue != 0); cl_int ret = CL_SUCCESS; #ifdef BOOST_COMPUTE_CL_VERSION_1_2 if(get_device().check_version(1, 2)){ ret = clEnqueueBarrierWithWaitList(m_queue, 0, 0, 0); } else #endif // BOOST_COMPUTE_CL_VERSION_1_2 { // Suppress deprecated declarations warning BOOST_COMPUTE_DISABLE_DEPRECATED_DECLARATIONS(); ret = clEnqueueBarrier(m_queue); BOOST_COMPUTE_ENABLE_DEPRECATED_DECLARATIONS(); } if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } } #if defined(BOOST_COMPUTE_CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) /// Enqueues a barrier in the queue after \p events. /// /// \opencl_version_warning{1,2} event enqueue_barrier(const wait_list &events) { BOOST_ASSERT(m_queue != 0); event event_; cl_int ret = CL_SUCCESS; ret = clEnqueueBarrierWithWaitList( m_queue, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } #endif // BOOST_COMPUTE_CL_VERSION_1_2 /// Enqueues a marker in the queue and returns an event that can be /// used to track its progress. event enqueue_marker() { event event_; cl_int ret = CL_SUCCESS; #ifdef BOOST_COMPUTE_CL_VERSION_1_2 if(get_device().check_version(1, 2)){ ret = clEnqueueMarkerWithWaitList(m_queue, 0, 0, &event_.get()); } else #endif { // Suppress deprecated declarations warning BOOST_COMPUTE_DISABLE_DEPRECATED_DECLARATIONS(); ret = clEnqueueMarker(m_queue, &event_.get()); BOOST_COMPUTE_ENABLE_DEPRECATED_DECLARATIONS(); } if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } #if defined(BOOST_COMPUTE_CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) /// Enqueues a marker after \p events in the queue and returns an /// event that can be used to track its progress. /// /// \opencl_version_warning{1,2} event enqueue_marker(const wait_list &events) { event event_; cl_int ret = clEnqueueMarkerWithWaitList( m_queue, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } #endif // BOOST_COMPUTE_CL_VERSION_1_2 #if defined(BOOST_COMPUTE_CL_VERSION_2_0) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) /// Enqueues a command to copy \p size bytes of data from \p src_ptr to /// \p dst_ptr. /// /// \opencl_version_warning{2,0} /// /// \see_opencl2_ref{clEnqueueSVMMemcpy} event enqueue_svm_memcpy(void *dst_ptr, const void *src_ptr, size_t size, const wait_list &events = wait_list()) { event event_; cl_int ret = clEnqueueSVMMemcpy( m_queue, CL_TRUE, dst_ptr, src_ptr, size, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } /// Enqueues a command to copy \p size bytes of data from \p src_ptr to /// \p dst_ptr. The operation is performed asynchronously. /// /// \opencl_version_warning{2,0} /// /// \see_opencl2_ref{clEnqueueSVMMemcpy} event enqueue_svm_memcpy_async(void *dst_ptr, const void *src_ptr, size_t size, const wait_list &events = wait_list()) { event event_; cl_int ret = clEnqueueSVMMemcpy( m_queue, CL_FALSE, dst_ptr, src_ptr, size, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } /// Enqueues a command to fill \p size bytes of data at \p svm_ptr with /// \p pattern. /// /// \opencl_version_warning{2,0} /// /// \see_opencl2_ref{clEnqueueSVMMemFill} event enqueue_svm_fill(void *svm_ptr, const void *pattern, size_t pattern_size, size_t size, const wait_list &events = wait_list()) { event event_; cl_int ret = clEnqueueSVMMemFill( m_queue, svm_ptr, pattern, pattern_size, size, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } /// Enqueues a command to free \p svm_ptr. /// /// \opencl_version_warning{2,0} /// /// \see_opencl2_ref{clEnqueueSVMFree} /// /// \see svm_free() event enqueue_svm_free(void *svm_ptr, const wait_list &events = wait_list()) { event event_; cl_int ret = clEnqueueSVMFree( m_queue, 1, &svm_ptr, 0, 0, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } /// Enqueues a command to map \p svm_ptr to the host memory space. /// /// \opencl_version_warning{2,0} /// /// \see_opencl2_ref{clEnqueueSVMMap} event enqueue_svm_map(void *svm_ptr, size_t size, cl_map_flags flags, const wait_list &events = wait_list()) { event event_; cl_int ret = clEnqueueSVMMap( m_queue, CL_TRUE, flags, svm_ptr, size, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } /// Enqueues a command to unmap \p svm_ptr from the host memory space. /// /// \opencl_version_warning{2,0} /// /// \see_opencl2_ref{clEnqueueSVMUnmap} event enqueue_svm_unmap(void *svm_ptr, const wait_list &events = wait_list()) { event event_; cl_int ret = clEnqueueSVMUnmap( m_queue, svm_ptr, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } #endif // BOOST_COMPUTE_CL_VERSION_2_0 #if defined(BOOST_COMPUTE_CL_VERSION_2_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) /// Enqueues a command to indicate which device a set of ranges of SVM allocations /// should be associated with. The pair \p svm_ptrs[i] and \p sizes[i] together define /// the starting address and number of bytes in a range to be migrated. /// /// If \p sizes is empty, then that means every allocation containing any \p svm_ptrs[i] /// is to be migrated. Also, if \p sizes[i] is zero, then the entire allocation containing /// \p svm_ptrs[i] is migrated. /// /// \opencl_version_warning{2,1} /// /// \see_opencl21_ref{clEnqueueSVMMigrateMem} event enqueue_svm_migrate_memory(const std::vector &svm_ptrs, const std::vector &sizes, const cl_mem_migration_flags flags = 0, const wait_list &events = wait_list()) { BOOST_ASSERT(svm_ptrs.size() == sizes.size() || sizes.size() == 0); event event_; cl_int ret = clEnqueueSVMMigrateMem( m_queue, static_cast(svm_ptrs.size()), const_cast(&svm_ptrs[0]), sizes.size() > 0 ? &sizes[0] : NULL, flags, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } /// Enqueues a command to indicate which device a range of SVM allocation /// should be associated with. The pair \p svm_ptr and \p size together define /// the starting address and number of bytes in a range to be migrated. /// /// If \p size is 0, then the entire allocation containing \p svm_ptr is /// migrated. The default value for \p size is 0. /// /// \opencl_version_warning{2,1} /// /// \see_opencl21_ref{clEnqueueSVMMigrateMem} event enqueue_svm_migrate_memory(const void* svm_ptr, const size_t size = 0, const cl_mem_migration_flags flags = 0, const wait_list &events = wait_list()) { event event_; cl_int ret = clEnqueueSVMMigrateMem( m_queue, cl_uint(1), &svm_ptr, &size, flags, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } #endif // BOOST_COMPUTE_CL_VERSION_2_1 /// Returns \c true if the command queue is the same at \p other. bool operator==(const command_queue &other) const { return m_queue == other.m_queue; } /// Returns \c true if the command queue is different from \p other. bool operator!=(const command_queue &other) const { return m_queue != other.m_queue; } /// \internal_ operator cl_command_queue() const { return m_queue; } /// \internal_ bool check_device_version(int major, int minor) const { return get_device().check_version(major, minor); } private: cl_command_queue m_queue; }; inline buffer buffer::clone(command_queue &queue) const { buffer copy(get_context(), size(), get_memory_flags()); queue.enqueue_copy_buffer(*this, copy, 0, 0, size()); return copy; } inline image1d image1d::clone(command_queue &queue) const { image1d copy( get_context(), width(), format(), get_memory_flags() ); queue.enqueue_copy_image(*this, copy, origin(), copy.origin(), size()); return copy; } inline image2d image2d::clone(command_queue &queue) const { image2d copy( get_context(), width(), height(), format(), get_memory_flags() ); queue.enqueue_copy_image(*this, copy, origin(), copy.origin(), size()); return copy; } inline image3d image3d::clone(command_queue &queue) const { image3d copy( get_context(), width(), height(), depth(), format(), get_memory_flags() ); queue.enqueue_copy_image(*this, copy, origin(), copy.origin(), size()); return copy; } /// \internal_ define get_info() specializations for command_queue BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(command_queue, ((cl_context, CL_QUEUE_CONTEXT)) ((cl_device_id, CL_QUEUE_DEVICE)) ((uint_, CL_QUEUE_REFERENCE_COUNT)) ((cl_command_queue_properties, CL_QUEUE_PROPERTIES)) ) #ifdef BOOST_COMPUTE_CL_VERSION_2_1 BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(command_queue, ((cl_command_queue, CL_QUEUE_DEVICE_DEFAULT)) ) #endif // BOOST_COMPUTE_CL_VERSION_2_1 } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_COMMAND_QUEUE_HPP