devn00b
/
EQ2EMu


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139
							//---------------------------------------------------------------------------//
// Copyright (c) 2014 Benoit Dequidt <benoit.dequidt@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//

#include <iostream>
#include <cstdlib>

#include <boost/compute/core.hpp>
#include <boost/compute/algorithm/copy.hpp>
#include <boost/compute/algorithm/inclusive_scan.hpp>
#include <boost/compute/container/vector.hpp>
#include <boost/compute/type_traits/type_name.hpp>
#include <boost/compute/utility/source.hpp>

namespace compute = boost::compute;

/// warning precision is not precise due
/// to the float error accumulation when size is large enough
/// for more precision use double
/// or a kahan sum else results can diverge
/// from the CPU implementation
compute::program make_sma_program(const compute::context& context)
{
    const char source[] = BOOST_COMPUTE_STRINGIZE_SOURCE(
        __kernel void SMA(__global const float *scannedValues, int size, __global float *output, int wSize)
        {
            const int gid = get_global_id(0);

            float cumValues = 0.f;
            int endIdx = gid + wSize/2;
            int startIdx = gid -1 - wSize/2;

            if(endIdx > size -1)
                endIdx = size -1;

            cumValues += scannedValues[endIdx];
            if(startIdx < 0)
                startIdx = -1;
            else
                cumValues -= scannedValues[startIdx];

            output[gid] =(float)( cumValues / ( float )(endIdx - startIdx));
        }
   );

    // create sma program
    return compute::program::build_with_source(source,context);
}

bool check_results(const std::vector<float>& values, const std::vector<float>& smoothValues, unsigned int wSize)
{
    int size = values.size();
    if(size != (int)smoothValues.size()) return false;

    int semiWidth = wSize/2;

    bool ret = true;
    for(int idx = 0 ; idx < size ; ++idx)
    {
        int start = (std::max)(idx - semiWidth,0);
        int end = (std::min)(idx + semiWidth,size-1);
        float res = 0;
        for(int j = start ; j <= end ; ++j)
        {
            res+= values[j];
        }

        res /= float(end - start +1);

        if(std::abs(res-smoothValues[idx]) > 1e-3)
        {
            std::cout << "idx = " << idx << " -- expected = " << res << " -- result = " << smoothValues[idx] << std::endl;
            ret = false;
        }
    }

    return ret;
}

// generate a uniform law over [0,10]
float myRand()
{
    static const double divisor = double(RAND_MAX)+1.;
    return double(rand())/divisor * 10.;
}

int main()
{
    unsigned int size = 1024;
    // wSize must be odd
    unsigned int wSize = 21;
    // get the default device
    compute::device device = compute::system::default_device();
    // create a context for the device
    compute::context context(device);
    // get the program
    compute::program program = make_sma_program(context);

    // create vector of random numbers on the host
    std::vector<float> host_vector(size);
    std::vector<float> host_result(size);
    std::generate(host_vector.begin(), host_vector.end(), myRand);

    compute::vector<float> a(size,context);
    compute::vector<float> b(size,context);
    compute::vector<float> c(size,context);
    compute::command_queue queue(context, device);

    compute::copy(host_vector.begin(),host_vector.end(),a.begin(),queue);

    // scan values
    compute::inclusive_scan(a.begin(),a.end(),b.begin(),queue);
    // sma kernel
    compute::kernel kernel(program, "SMA");
    kernel.set_arg(0,b.get_buffer());
    kernel.set_arg(1,(int)b.size());
    kernel.set_arg(2,c.get_buffer());
    kernel.set_arg(3,(int)wSize);

    using compute::uint_;
    uint_ tpb = 128;
    uint_ workSize = size;
    queue.enqueue_1d_range_kernel(kernel,0,workSize,tpb);

    compute::copy(c.begin(),c.end(),host_result.begin(),queue);

    bool res = check_results(host_vector,host_result,wSize);
    std::string status = res ? "results are equivalent" : "GPU results differs from CPU one's";
    std::cout << status << std::endl;

    return 0;
}