123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139 |
- //---------------------------------------------------------------------------//
- // Copyright (c) 2014 Benoit Dequidt <benoit.dequidt@gmail.com>
- //
- // Distributed under the Boost Software License, Version 1.0
- // See accompanying file LICENSE_1_0.txt or copy at
- // http://www.boost.org/LICENSE_1_0.txt
- //
- // See http://boostorg.github.com/compute for more information.
- //---------------------------------------------------------------------------//
- #include <iostream>
- #include <cstdlib>
- #include <boost/compute/core.hpp>
- #include <boost/compute/algorithm/copy.hpp>
- #include <boost/compute/algorithm/inclusive_scan.hpp>
- #include <boost/compute/container/vector.hpp>
- #include <boost/compute/type_traits/type_name.hpp>
- #include <boost/compute/utility/source.hpp>
- namespace compute = boost::compute;
- /// warning precision is not precise due
- /// to the float error accumulation when size is large enough
- /// for more precision use double
- /// or a kahan sum else results can diverge
- /// from the CPU implementation
- compute::program make_sma_program(const compute::context& context)
- {
- const char source[] = BOOST_COMPUTE_STRINGIZE_SOURCE(
- __kernel void SMA(__global const float *scannedValues, int size, __global float *output, int wSize)
- {
- const int gid = get_global_id(0);
- float cumValues = 0.f;
- int endIdx = gid + wSize/2;
- int startIdx = gid -1 - wSize/2;
- if(endIdx > size -1)
- endIdx = size -1;
- cumValues += scannedValues[endIdx];
- if(startIdx < 0)
- startIdx = -1;
- else
- cumValues -= scannedValues[startIdx];
- output[gid] =(float)( cumValues / ( float )(endIdx - startIdx));
- }
- );
- // create sma program
- return compute::program::build_with_source(source,context);
- }
- bool check_results(const std::vector<float>& values, const std::vector<float>& smoothValues, unsigned int wSize)
- {
- int size = values.size();
- if(size != (int)smoothValues.size()) return false;
- int semiWidth = wSize/2;
- bool ret = true;
- for(int idx = 0 ; idx < size ; ++idx)
- {
- int start = (std::max)(idx - semiWidth,0);
- int end = (std::min)(idx + semiWidth,size-1);
- float res = 0;
- for(int j = start ; j <= end ; ++j)
- {
- res+= values[j];
- }
- res /= float(end - start +1);
- if(std::abs(res-smoothValues[idx]) > 1e-3)
- {
- std::cout << "idx = " << idx << " -- expected = " << res << " -- result = " << smoothValues[idx] << std::endl;
- ret = false;
- }
- }
- return ret;
- }
- // generate a uniform law over [0,10]
- float myRand()
- {
- static const double divisor = double(RAND_MAX)+1.;
- return double(rand())/divisor * 10.;
- }
- int main()
- {
- unsigned int size = 1024;
- // wSize must be odd
- unsigned int wSize = 21;
- // get the default device
- compute::device device = compute::system::default_device();
- // create a context for the device
- compute::context context(device);
- // get the program
- compute::program program = make_sma_program(context);
- // create vector of random numbers on the host
- std::vector<float> host_vector(size);
- std::vector<float> host_result(size);
- std::generate(host_vector.begin(), host_vector.end(), myRand);
- compute::vector<float> a(size,context);
- compute::vector<float> b(size,context);
- compute::vector<float> c(size,context);
- compute::command_queue queue(context, device);
- compute::copy(host_vector.begin(),host_vector.end(),a.begin(),queue);
- // scan values
- compute::inclusive_scan(a.begin(),a.end(),b.begin(),queue);
- // sma kernel
- compute::kernel kernel(program, "SMA");
- kernel.set_arg(0,b.get_buffer());
- kernel.set_arg(1,(int)b.size());
- kernel.set_arg(2,c.get_buffer());
- kernel.set_arg(3,(int)wSize);
- using compute::uint_;
- uint_ tpb = 128;
- uint_ workSize = size;
- queue.enqueue_1d_range_kernel(kernel,0,workSize,tpb);
- compute::copy(c.begin(),c.end(),host_result.begin(),queue);
- bool res = check_results(host_vector,host_result,wSize);
- std::string status = res ? "results are equivalent" : "GPU results differs from CPU one's";
- std::cout << status << std::endl;
- return 0;
- }
|