amd_cpp_kernel.cpp 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116
  1. //---------------------------------------------------------------------------//
  2. // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
  3. //
  4. // Distributed under the Boost Software License, Version 1.0
  5. // See accompanying file LICENSE_1_0.txt or copy at
  6. // http://www.boost.org/LICENSE_1_0.txt
  7. //
  8. // See http://boostorg.github.com/compute for more information.
  9. //---------------------------------------------------------------------------//
  10. #include <iostream>
  11. #include <boost/compute/command_queue.hpp>
  12. #include <boost/compute/kernel.hpp>
  13. #include <boost/compute/program.hpp>
  14. #include <boost/compute/system.hpp>
  15. #include <boost/compute/algorithm/copy.hpp>
  16. #include <boost/compute/container/vector.hpp>
  17. #include <boost/compute/utility/source.hpp>
  18. namespace compute = boost::compute;
  19. // this example shows how to use the static c++ kernel language
  20. // extension (currently only supported by AMD) to compile and
  21. // execute a templated c++ kernel.
  22. // Using platform vendor info to decide if this is AMD platform
  23. int main()
  24. {
  25. // get default device and setup context
  26. compute::device device = compute::system::default_device();
  27. compute::context context(device);
  28. compute::command_queue queue(context, device);
  29. // check the platform vendor string
  30. if(device.platform().vendor() != "Advanced Micro Devices, Inc."){
  31. std::cerr << "error: static C++ kernel language is only "
  32. << "supported on AMD devices."
  33. << std::endl;
  34. return 0;
  35. }
  36. // create input int values and copy them to the device
  37. int int_data[] = { 1, 2, 3, 4};
  38. compute::vector<int> int_vector(int_data, int_data + 4, queue);
  39. // create input float values and copy them to the device
  40. float float_data[] = { 2.0f, 4.0f, 6.0f, 8.0f };
  41. compute::vector<float> float_vector(float_data, float_data + 4, queue);
  42. // create kernel source with a templated function and templated kernel
  43. const char source[] = BOOST_COMPUTE_STRINGIZE_SOURCE(
  44. // define our templated function which returns the square of its input
  45. template<typename T>
  46. inline T square(const T x)
  47. {
  48. return x * x;
  49. }
  50. // define our templated kernel which calls square on each value in data
  51. template<typename T>
  52. __kernel void square_kernel(__global T *data)
  53. {
  54. const uint i = get_global_id(0);
  55. data[i] = square(data[i]);
  56. }
  57. // explicitly instantiate the square kernel for int's. this allows
  58. // for it to be called from the host with the given mangled name.
  59. template __attribute__((mangled_name(square_kernel_int)))
  60. __kernel void square_kernel(__global int *data);
  61. // also instantiate the square kernel for float's.
  62. template __attribute__((mangled_name(square_kernel_float)))
  63. __kernel void square_kernel(__global float *data);
  64. );
  65. // build the program. must enable the c++ static kernel language
  66. // by passing the "-x clc++" compile option.
  67. compute::program square_program =
  68. compute::program::build_with_source(source, context, "-x clc++");
  69. // create the square kernel for int's by using its mangled name declared
  70. // in the explicit template instantiation.
  71. compute::kernel square_int_kernel(square_program, "square_kernel_int");
  72. square_int_kernel.set_arg(0, int_vector);
  73. // execute the square int kernel
  74. queue.enqueue_1d_range_kernel(square_int_kernel, 0, int_vector.size(), 4);
  75. // print out the squared int values
  76. std::cout << "int's: ";
  77. compute::copy(
  78. int_vector.begin(), int_vector.end(),
  79. std::ostream_iterator<int>(std::cout, " "),
  80. queue
  81. );
  82. std::cout << std::endl;
  83. // now create the square kernel for float's
  84. compute::kernel square_float_kernel(square_program, "square_kernel_float");
  85. square_float_kernel.set_arg(0, float_vector);
  86. // execute the square int kernel
  87. queue.enqueue_1d_range_kernel(square_float_kernel, 0, float_vector.size(), 4);
  88. // print out the squared float values
  89. std::cout << "float's: ";
  90. compute::copy(
  91. float_vector.begin(), float_vector.end(),
  92. std::ostream_iterator<float>(std::cout, " "),
  93. queue
  94. );
  95. std::cout << std::endl;
  96. return 0;
  97. }