count_if_with_ballot.hpp 2.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778
  1. //---------------------------------------------------------------------------//
  2. // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
  3. //
  4. // Distributed under the Boost Software License, Version 1.0
  5. // See accompanying file LICENSE_1_0.txt or copy at
  6. // http://www.boost.org/LICENSE_1_0.txt
  7. //
  8. // See http://boostorg.github.com/compute for more information.
  9. //---------------------------------------------------------------------------//
  10. #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_COUNT_IF_WITH_BALLOT_HPP
  11. #define BOOST_COMPUTE_ALGORITHM_DETAIL_COUNT_IF_WITH_BALLOT_HPP
  12. #include <boost/compute/context.hpp>
  13. #include <boost/compute/command_queue.hpp>
  14. #include <boost/compute/container/vector.hpp>
  15. #include <boost/compute/algorithm/reduce.hpp>
  16. #include <boost/compute/functional/detail/nvidia_ballot.hpp>
  17. #include <boost/compute/functional/detail/nvidia_popcount.hpp>
  18. #include <boost/compute/detail/meta_kernel.hpp>
  19. namespace boost {
  20. namespace compute {
  21. namespace detail {
  22. template<class InputIterator, class Predicate>
  23. inline size_t count_if_with_ballot(InputIterator first,
  24. InputIterator last,
  25. Predicate predicate,
  26. command_queue &queue)
  27. {
  28. size_t count = iterator_range_size(first, last);
  29. size_t block_size = 32;
  30. size_t block_count = count / block_size;
  31. if(block_count * block_size != count){
  32. block_count++;
  33. }
  34. const ::boost::compute::context &context = queue.get_context();
  35. ::boost::compute::vector<uint_> counts(block_count, context);
  36. ::boost::compute::detail::nvidia_popcount<uint_> popc;
  37. ::boost::compute::detail::nvidia_ballot<uint_> ballot;
  38. meta_kernel k("count_if_with_ballot");
  39. k <<
  40. "const uint gid = get_global_id(0);\n" <<
  41. "bool value = false;\n" <<
  42. "if(gid < count)\n" <<
  43. " value = " << predicate(first[k.var<const uint_>("gid")]) << ";\n" <<
  44. "uint bits = " << ballot(k.var<const uint_>("value")) << ";\n" <<
  45. "if(get_local_id(0) == 0)\n" <<
  46. counts.begin()[k.var<uint_>("get_group_id(0)") ]
  47. << " = " << popc(k.var<uint_>("bits")) << ";\n";
  48. k.add_set_arg<const uint_>("count", count);
  49. k.exec_1d(queue, 0, block_size * block_count, block_size);
  50. uint_ result;
  51. ::boost::compute::reduce(
  52. counts.begin(),
  53. counts.end(),
  54. &result,
  55. queue
  56. );
  57. return result;
  58. }
  59. } // end detail namespace
  60. } // end compute namespace
  61. } // end boost namespace
  62. #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_COUNT_IF_WITH_BALLOT_HPP