123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538 |
- //---------------------------------------------------------------------------//
- // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
- //
- // Distributed under the Boost Software License, Version 1.0
- // See accompanying file LICENSE_1_0.txt or copy at
- // http://www.boost.org/LICENSE_1_0.txt
- //
- // See http://boostorg.github.com/compute for more information.
- //---------------------------------------------------------------------------//
- #ifndef BOOST_COMPUTE_KERNEL_HPP
- #define BOOST_COMPUTE_KERNEL_HPP
- #include <string>
- #include <boost/assert.hpp>
- #include <boost/utility/enable_if.hpp>
- #include <boost/optional.hpp>
- #include <boost/compute/cl_ext.hpp> // cl_khr_subgroups
- #include <boost/compute/config.hpp>
- #include <boost/compute/exception.hpp>
- #include <boost/compute/program.hpp>
- #include <boost/compute/platform.hpp>
- #include <boost/compute/type_traits/is_fundamental.hpp>
- #include <boost/compute/detail/diagnostic.hpp>
- #include <boost/compute/detail/get_object_info.hpp>
- #include <boost/compute/detail/assert_cl_success.hpp>
- namespace boost {
- namespace compute {
- namespace detail {
- template<class T> struct set_kernel_arg;
- } // end detail namespace
- /// \class kernel
- /// \brief A compute kernel.
- ///
- /// \see command_queue, program
- class kernel
- {
- public:
- /// Creates a null kernel object.
- kernel()
- : m_kernel(0)
- {
- }
- /// Creates a new kernel object for \p kernel. If \p retain is
- /// \c true, the reference count for \p kernel will be incremented.
- explicit kernel(cl_kernel kernel, bool retain = true)
- : m_kernel(kernel)
- {
- if(m_kernel && retain){
- clRetainKernel(m_kernel);
- }
- }
- /// Creates a new kernel object with \p name from \p program.
- kernel(const program &program, const std::string &name)
- {
- cl_int error = 0;
- m_kernel = clCreateKernel(program.get(), name.c_str(), &error);
- if(!m_kernel){
- BOOST_THROW_EXCEPTION(opencl_error(error));
- }
- }
- /// Creates a new kernel object as a copy of \p other.
- kernel(const kernel &other)
- : m_kernel(other.m_kernel)
- {
- if(m_kernel){
- clRetainKernel(m_kernel);
- }
- }
- /// Copies the kernel object from \p other to \c *this.
- kernel& operator=(const kernel &other)
- {
- if(this != &other){
- if(m_kernel){
- clReleaseKernel(m_kernel);
- }
- m_kernel = other.m_kernel;
- if(m_kernel){
- clRetainKernel(m_kernel);
- }
- }
- return *this;
- }
- #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES
- /// Move-constructs a new kernel object from \p other.
- kernel(kernel&& other) BOOST_NOEXCEPT
- : m_kernel(other.m_kernel)
- {
- other.m_kernel = 0;
- }
- /// Move-assigns the kernel from \p other to \c *this.
- kernel& operator=(kernel&& other) BOOST_NOEXCEPT
- {
- if(m_kernel){
- clReleaseKernel(m_kernel);
- }
- m_kernel = other.m_kernel;
- other.m_kernel = 0;
- return *this;
- }
- #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES
- /// Destroys the kernel object.
- ~kernel()
- {
- if(m_kernel){
- BOOST_COMPUTE_ASSERT_CL_SUCCESS(
- clReleaseKernel(m_kernel)
- );
- }
- }
- #if defined(BOOST_COMPUTE_CL_VERSION_2_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
- /// Creates a new kernel object based on a shallow copy of
- /// the undelying OpenCL kernel object.
- ///
- /// \opencl_version_warning{2,1}
- ///
- /// \see_opencl21_ref{clCloneKernel}
- kernel clone()
- {
- cl_int ret = 0;
- cl_kernel k = clCloneKernel(m_kernel, &ret);
- return kernel(k, false);
- }
- #endif // BOOST_COMPUTE_CL_VERSION_2_1
- /// Returns a reference to the underlying OpenCL kernel object.
- cl_kernel& get() const
- {
- return const_cast<cl_kernel &>(m_kernel);
- }
- /// Returns the function name for the kernel.
- std::string name() const
- {
- return get_info<std::string>(CL_KERNEL_FUNCTION_NAME);
- }
- /// Returns the number of arguments for the kernel.
- size_t arity() const
- {
- return get_info<cl_uint>(CL_KERNEL_NUM_ARGS);
- }
- /// Returns the program for the kernel.
- program get_program() const
- {
- return program(get_info<cl_program>(CL_KERNEL_PROGRAM));
- }
- /// Returns the context for the kernel.
- context get_context() const
- {
- return context(get_info<cl_context>(CL_KERNEL_CONTEXT));
- }
- /// Returns information about the kernel.
- ///
- /// \see_opencl_ref{clGetKernelInfo}
- template<class T>
- T get_info(cl_kernel_info info) const
- {
- return detail::get_object_info<T>(clGetKernelInfo, m_kernel, info);
- }
- /// \overload
- template<int Enum>
- typename detail::get_object_info_type<kernel, Enum>::type
- get_info() const;
- #if defined(BOOST_COMPUTE_CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
- /// Returns information about the argument at \p index.
- ///
- /// For example, to get the name of the first argument:
- /// \code
- /// std::string arg = kernel.get_arg_info<std::string>(0, CL_KERNEL_ARG_NAME);
- /// \endcode
- ///
- /// Note, this function requires that the program be compiled with the
- /// \c "-cl-kernel-arg-info" flag. For example:
- /// \code
- /// program.build("-cl-kernel-arg-info");
- /// \endcode
- ///
- /// \opencl_version_warning{1,2}
- ///
- /// \see_opencl_ref{clGetKernelArgInfo}
- template<class T>
- T get_arg_info(size_t index, cl_kernel_arg_info info) const
- {
- return detail::get_object_info<T>(
- clGetKernelArgInfo, m_kernel, info, static_cast<cl_uint>(index)
- );
- }
- /// \overload
- template<int Enum>
- typename detail::get_object_info_type<kernel, Enum>::type
- get_arg_info(size_t index) const;
- #endif // BOOST_COMPUTE_CL_VERSION_1_2
- /// Returns work-group information for the kernel with \p device.
- ///
- /// \see_opencl_ref{clGetKernelWorkGroupInfo}
- template<class T>
- T get_work_group_info(const device &device, cl_kernel_work_group_info info) const
- {
- return detail::get_object_info<T>(clGetKernelWorkGroupInfo, m_kernel, info, device.id());
- }
- #if defined(BOOST_COMPUTE_CL_VERSION_2_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
- /// Returns sub-group information for the kernel with \p device. Returns a null
- /// optional if \p device is not 2.1 device, or is not 2.0 device with support
- /// for cl_khr_subgroups extension.
- ///
- /// \opencl_version_warning{2,1}
- /// \see_opencl21_ref{clGetKernelSubGroupInfo}
- /// \see_opencl2_ref{clGetKernelSubGroupInfoKHR}
- template<class T>
- boost::optional<T> get_sub_group_info(const device &device, cl_kernel_sub_group_info info,
- const size_t input_size, const void * input) const
- {
- if(device.check_version(2, 1))
- {
- return detail::get_object_info<T>(
- clGetKernelSubGroupInfo, m_kernel, info, device.id(), input_size, input
- );
- }
- else if(!device.check_version(2, 0) || !device.supports_extension("cl_khr_subgroups"))
- {
- return boost::optional<T>();
- }
- // Only CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE and CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE
- // are supported in cl_khr_subgroups extension for 2.0 devices.
- else if(info != CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE && info != CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE)
- {
- return boost::optional<T>();
- }
- BOOST_COMPUTE_DISABLE_DEPRECATED_DECLARATIONS();
- clGetKernelSubGroupInfoKHR_fn clGetKernelSubGroupInfoKHR_fptr =
- reinterpret_cast<clGetKernelSubGroupInfoKHR_fn>(
- reinterpret_cast<size_t>(
- device.platform().get_extension_function_address("clGetKernelSubGroupInfoKHR")
- )
- );
- BOOST_COMPUTE_ENABLE_DEPRECATED_DECLARATIONS();
- return detail::get_object_info<T>(
- clGetKernelSubGroupInfoKHR_fptr, m_kernel, info, device.id(), input_size, input
- );
- }
- /// \overload
- template<class T>
- boost::optional<T> get_sub_group_info(const device &device, cl_kernel_sub_group_info info) const
- {
- return get_sub_group_info<T>(device, info, 0, 0);
- }
- /// \overload
- template<class T>
- boost::optional<T> get_sub_group_info(const device &device, cl_kernel_sub_group_info info,
- const size_t input) const
- {
- return get_sub_group_info<T>(device, info, sizeof(size_t), &input);
- }
- #endif // BOOST_COMPUTE_CL_VERSION_2_1
- #if defined(BOOST_COMPUTE_CL_VERSION_2_0) && !defined(BOOST_COMPUTE_CL_VERSION_2_1)
- /// Returns sub-group information for the kernel with \p device. Returns a null
- /// optional if cl_khr_subgroups extension is not supported by \p device.
- ///
- /// \opencl_version_warning{2,0}
- /// \see_opencl2_ref{clGetKernelSubGroupInfoKHR}
- template<class T>
- boost::optional<T> get_sub_group_info(const device &device, cl_kernel_sub_group_info info,
- const size_t input_size, const void * input) const
- {
- if(!device.check_version(2, 0) || !device.supports_extension("cl_khr_subgroups"))
- {
- return boost::optional<T>();
- }
- BOOST_COMPUTE_DISABLE_DEPRECATED_DECLARATIONS();
- clGetKernelSubGroupInfoKHR_fn clGetKernelSubGroupInfoKHR_fptr =
- reinterpret_cast<clGetKernelSubGroupInfoKHR_fn>(
- reinterpret_cast<size_t>(
- device.platform().get_extension_function_address("clGetKernelSubGroupInfoKHR")
- )
- );
- BOOST_COMPUTE_ENABLE_DEPRECATED_DECLARATIONS();
- return detail::get_object_info<T>(
- clGetKernelSubGroupInfoKHR_fptr, m_kernel, info, device.id(), input_size, input
- );
- }
- #endif // defined(BOOST_COMPUTE_CL_VERSION_2_0) && !defined(BOOST_COMPUTE_CL_VERSION_2_1)
- #if defined(BOOST_COMPUTE_CL_VERSION_2_0) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
- /// \overload
- template<class T>
- boost::optional<T> get_sub_group_info(const device &device, cl_kernel_sub_group_info info,
- const std::vector<size_t> input) const
- {
- BOOST_ASSERT(input.size() > 0);
- return get_sub_group_info<T>(device, info, input.size() * sizeof(size_t), &input[0]);
- }
- #endif // BOOST_COMPUTE_CL_VERSION_2_0
- /// Sets the argument at \p index to \p value with \p size.
- ///
- /// \see_opencl_ref{clSetKernelArg}
- void set_arg(size_t index, size_t size, const void *value)
- {
- BOOST_ASSERT(index < arity());
- cl_int ret = clSetKernelArg(m_kernel,
- static_cast<cl_uint>(index),
- size,
- value);
- if(ret != CL_SUCCESS){
- BOOST_THROW_EXCEPTION(opencl_error(ret));
- }
- }
- /// Sets the argument at \p index to \p value.
- ///
- /// For built-in types (e.g. \c float, \c int4_), this is equivalent to
- /// calling set_arg(index, sizeof(type), &value).
- ///
- /// Additionally, this method is specialized for device memory objects
- /// such as buffer and image2d. This allows for them to be passed directly
- /// without having to extract their underlying cl_mem object.
- ///
- /// This method is also specialized for device container types such as
- /// vector<T> and array<T, N>. This allows for them to be passed directly
- /// as kernel arguments without having to extract their underlying buffer.
- ///
- /// For setting local memory arguments (e.g. "__local float *buf"), the
- /// local_buffer<T> class may be used:
- /// \code
- /// // set argument to a local buffer with storage for 32 float's
- /// kernel.set_arg(0, local_buffer<float>(32));
- /// \endcode
- template<class T>
- void set_arg(size_t index, const T &value)
- {
- // if you get a compilation error pointing here it means you
- // attempted to set a kernel argument from an invalid type.
- detail::set_kernel_arg<T>()(*this, index, value);
- }
- /// \internal_
- void set_arg(size_t index, const cl_mem mem)
- {
- set_arg(index, sizeof(cl_mem), static_cast<const void *>(&mem));
- }
- /// \internal_
- void set_arg(size_t index, const cl_sampler sampler)
- {
- set_arg(index, sizeof(cl_sampler), static_cast<const void *>(&sampler));
- }
- /// \internal_
- void set_arg_svm_ptr(size_t index, void* ptr)
- {
- #ifdef BOOST_COMPUTE_CL_VERSION_2_0
- cl_int ret = clSetKernelArgSVMPointer(m_kernel, static_cast<cl_uint>(index), ptr);
- if(ret != CL_SUCCESS){
- BOOST_THROW_EXCEPTION(opencl_error(ret));
- }
- #else
- (void) index;
- (void) ptr;
- BOOST_THROW_EXCEPTION(opencl_error(CL_INVALID_ARG_VALUE));
- #endif
- }
- #ifndef BOOST_COMPUTE_NO_VARIADIC_TEMPLATES
- /// Sets the arguments for the kernel to \p args.
- template<class... T>
- void set_args(T&&... args)
- {
- BOOST_ASSERT(sizeof...(T) <= arity());
- _set_args<0>(args...);
- }
- #endif // BOOST_COMPUTE_NO_VARIADIC_TEMPLATES
- #if defined(BOOST_COMPUTE_CL_VERSION_2_0) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
- /// Sets additional execution information for the kernel.
- ///
- /// \opencl_version_warning{2,0}
- ///
- /// \see_opencl2_ref{clSetKernelExecInfo}
- void set_exec_info(cl_kernel_exec_info info, size_t size, const void *value)
- {
- cl_int ret = clSetKernelExecInfo(m_kernel, info, size, value);
- if(ret != CL_SUCCESS){
- BOOST_THROW_EXCEPTION(opencl_error(ret));
- }
- }
- #endif // BOOST_COMPUTE_CL_VERSION_2_0
- /// Returns \c true if the kernel is the same at \p other.
- bool operator==(const kernel &other) const
- {
- return m_kernel == other.m_kernel;
- }
- /// Returns \c true if the kernel is different from \p other.
- bool operator!=(const kernel &other) const
- {
- return m_kernel != other.m_kernel;
- }
- /// \internal_
- operator cl_kernel() const
- {
- return m_kernel;
- }
- /// \internal_
- static kernel create_with_source(const std::string &source,
- const std::string &name,
- const context &context)
- {
- return program::build_with_source(source, context).create_kernel(name);
- }
- private:
- #ifndef BOOST_COMPUTE_NO_VARIADIC_TEMPLATES
- /// \internal_
- template<size_t N>
- void _set_args()
- {
- }
- /// \internal_
- template<size_t N, class T, class... Args>
- void _set_args(T&& arg, Args&&... rest)
- {
- set_arg(N, arg);
- _set_args<N+1>(rest...);
- }
- #endif // BOOST_COMPUTE_NO_VARIADIC_TEMPLATES
- private:
- cl_kernel m_kernel;
- };
- inline kernel program::create_kernel(const std::string &name) const
- {
- return kernel(*this, name);
- }
- /// \internal_ define get_info() specializations for kernel
- BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(kernel,
- ((std::string, CL_KERNEL_FUNCTION_NAME))
- ((cl_uint, CL_KERNEL_NUM_ARGS))
- ((cl_uint, CL_KERNEL_REFERENCE_COUNT))
- ((cl_context, CL_KERNEL_CONTEXT))
- ((cl_program, CL_KERNEL_PROGRAM))
- )
- #ifdef BOOST_COMPUTE_CL_VERSION_1_2
- BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(kernel,
- ((std::string, CL_KERNEL_ATTRIBUTES))
- )
- #endif // BOOST_COMPUTE_CL_VERSION_1_2
- /// \internal_ define get_arg_info() specializations for kernel
- #ifdef BOOST_COMPUTE_CL_VERSION_1_2
- #define BOOST_COMPUTE_DETAIL_DEFINE_KERNEL_GET_ARG_INFO_SPECIALIZATION(result_type, value) \
- namespace detail { \
- template<> struct get_object_info_type<kernel, value> { typedef result_type type; }; \
- } \
- template<> inline result_type kernel::get_arg_info<value>(size_t index) const { \
- return get_arg_info<result_type>(index, value); \
- }
- BOOST_COMPUTE_DETAIL_DEFINE_KERNEL_GET_ARG_INFO_SPECIALIZATION(cl_kernel_arg_address_qualifier, CL_KERNEL_ARG_ADDRESS_QUALIFIER)
- BOOST_COMPUTE_DETAIL_DEFINE_KERNEL_GET_ARG_INFO_SPECIALIZATION(cl_kernel_arg_access_qualifier, CL_KERNEL_ARG_ACCESS_QUALIFIER)
- BOOST_COMPUTE_DETAIL_DEFINE_KERNEL_GET_ARG_INFO_SPECIALIZATION(std::string, CL_KERNEL_ARG_TYPE_NAME)
- BOOST_COMPUTE_DETAIL_DEFINE_KERNEL_GET_ARG_INFO_SPECIALIZATION(cl_kernel_arg_type_qualifier, CL_KERNEL_ARG_TYPE_QUALIFIER)
- BOOST_COMPUTE_DETAIL_DEFINE_KERNEL_GET_ARG_INFO_SPECIALIZATION(std::string, CL_KERNEL_ARG_NAME)
- #endif // BOOST_COMPUTE_CL_VERSION_1_2
- namespace detail {
- // set_kernel_arg implementation for built-in types
- template<class T>
- struct set_kernel_arg
- {
- typename boost::enable_if<is_fundamental<T> >::type
- operator()(kernel &kernel_, size_t index, const T &value)
- {
- kernel_.set_arg(index, sizeof(T), &value);
- }
- };
- // set_kernel_arg specialization for char (different from built-in cl_char)
- template<>
- struct set_kernel_arg<char>
- {
- void operator()(kernel &kernel_, size_t index, const char c)
- {
- kernel_.set_arg(index, sizeof(char), &c);
- }
- };
- } // end detail namespace
- } // end namespace compute
- } // end namespace boost
- #endif // BOOST_COMPUTE_KERNEL_HPP
|