simple_moving_average.cpp 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139
  1. //---------------------------------------------------------------------------//
  2. // Copyright (c) 2014 Benoit Dequidt <benoit.dequidt@gmail.com>
  3. //
  4. // Distributed under the Boost Software License, Version 1.0
  5. // See accompanying file LICENSE_1_0.txt or copy at
  6. // http://www.boost.org/LICENSE_1_0.txt
  7. //
  8. // See http://boostorg.github.com/compute for more information.
  9. //---------------------------------------------------------------------------//
  10. #include <iostream>
  11. #include <cstdlib>
  12. #include <boost/compute/core.hpp>
  13. #include <boost/compute/algorithm/copy.hpp>
  14. #include <boost/compute/algorithm/inclusive_scan.hpp>
  15. #include <boost/compute/container/vector.hpp>
  16. #include <boost/compute/type_traits/type_name.hpp>
  17. #include <boost/compute/utility/source.hpp>
  18. namespace compute = boost::compute;
  19. /// warning precision is not precise due
  20. /// to the float error accumulation when size is large enough
  21. /// for more precision use double
  22. /// or a kahan sum else results can diverge
  23. /// from the CPU implementation
  24. compute::program make_sma_program(const compute::context& context)
  25. {
  26. const char source[] = BOOST_COMPUTE_STRINGIZE_SOURCE(
  27. __kernel void SMA(__global const float *scannedValues, int size, __global float *output, int wSize)
  28. {
  29. const int gid = get_global_id(0);
  30. float cumValues = 0.f;
  31. int endIdx = gid + wSize/2;
  32. int startIdx = gid -1 - wSize/2;
  33. if(endIdx > size -1)
  34. endIdx = size -1;
  35. cumValues += scannedValues[endIdx];
  36. if(startIdx < 0)
  37. startIdx = -1;
  38. else
  39. cumValues -= scannedValues[startIdx];
  40. output[gid] =(float)( cumValues / ( float )(endIdx - startIdx));
  41. }
  42. );
  43. // create sma program
  44. return compute::program::build_with_source(source,context);
  45. }
  46. bool check_results(const std::vector<float>& values, const std::vector<float>& smoothValues, unsigned int wSize)
  47. {
  48. int size = values.size();
  49. if(size != (int)smoothValues.size()) return false;
  50. int semiWidth = wSize/2;
  51. bool ret = true;
  52. for(int idx = 0 ; idx < size ; ++idx)
  53. {
  54. int start = (std::max)(idx - semiWidth,0);
  55. int end = (std::min)(idx + semiWidth,size-1);
  56. float res = 0;
  57. for(int j = start ; j <= end ; ++j)
  58. {
  59. res+= values[j];
  60. }
  61. res /= float(end - start +1);
  62. if(std::abs(res-smoothValues[idx]) > 1e-3)
  63. {
  64. std::cout << "idx = " << idx << " -- expected = " << res << " -- result = " << smoothValues[idx] << std::endl;
  65. ret = false;
  66. }
  67. }
  68. return ret;
  69. }
  70. // generate a uniform law over [0,10]
  71. float myRand()
  72. {
  73. static const double divisor = double(RAND_MAX)+1.;
  74. return double(rand())/divisor * 10.;
  75. }
  76. int main()
  77. {
  78. unsigned int size = 1024;
  79. // wSize must be odd
  80. unsigned int wSize = 21;
  81. // get the default device
  82. compute::device device = compute::system::default_device();
  83. // create a context for the device
  84. compute::context context(device);
  85. // get the program
  86. compute::program program = make_sma_program(context);
  87. // create vector of random numbers on the host
  88. std::vector<float> host_vector(size);
  89. std::vector<float> host_result(size);
  90. std::generate(host_vector.begin(), host_vector.end(), myRand);
  91. compute::vector<float> a(size,context);
  92. compute::vector<float> b(size,context);
  93. compute::vector<float> c(size,context);
  94. compute::command_queue queue(context, device);
  95. compute::copy(host_vector.begin(),host_vector.end(),a.begin(),queue);
  96. // scan values
  97. compute::inclusive_scan(a.begin(),a.end(),b.begin(),queue);
  98. // sma kernel
  99. compute::kernel kernel(program, "SMA");
  100. kernel.set_arg(0,b.get_buffer());
  101. kernel.set_arg(1,(int)b.size());
  102. kernel.set_arg(2,c.get_buffer());
  103. kernel.set_arg(3,(int)wSize);
  104. using compute::uint_;
  105. uint_ tpb = 128;
  106. uint_ workSize = size;
  107. queue.enqueue_1d_range_kernel(kernel,0,workSize,tpb);
  108. compute::copy(c.begin(),c.end(),host_result.begin(),queue);
  109. bool res = check_results(host_vector,host_result,wSize);
  110. std::string status = res ? "results are equivalent" : "GPU results differs from CPU one's";
  111. std::cout << status << std::endl;
  112. return 0;
  113. }