hip.qbk 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111
  1. [/
  2. Copyright Oliver Kowalke 2017.
  3. Distributed under the Boost Software License, Version 1.0.
  4. (See accompanying file LICENSE_1_0.txt or copy at
  5. http://www.boost.org/LICENSE_1_0.txt
  6. ]
  7. [#hip]
  8. [section:hip ROCm/HIP]
  9. [@http://github.com/ROCm-Developer-Tools/HIP/tree/roc-1.6.0/ HIP] is part of the
  10. [@http://rocm.github.io/ ROC (Radeon Open Compute)] platform for parallel computing
  11. on AMD and NVIDIA GPUs. The application programming interface of HIP gives access to
  12. GPU's instruction set and computation resources (Execution of compute kernels).
  13. [heading Synchronization with ROCm/HIP streams]
  14. HIP operation such as compute kernels or memory transfer (between host and
  15. device) can be grouped/queued by HIP streams. are executed on the GPUs.
  16. Boost.Fiber enables a fiber to sleep (suspend) till a HIP stream has completed
  17. its operations. This enables applications to run other fibers on the CPU without
  18. the need to spawn an additional OS-threads. And resume the fiber when the HIP
  19. streams has finished.
  20. __global__
  21. void kernel( int size, int * a, int * b, int * c) {
  22. int idx = threadIdx.x + blockIdx.x * blockDim.x;
  23. if ( idx < size) {
  24. int idx1 = (idx + 1) % 256;
  25. int idx2 = (idx + 2) % 256;
  26. float as = (a[idx] + a[idx1] + a[idx2]) / 3.0f;
  27. float bs = (b[idx] + b[idx1] + b[idx2]) / 3.0f;
  28. c[idx] = (as + bs) / 2;
  29. }
  30. }
  31. boost::fibers::fiber f([&done]{
  32. hipStream_t stream;
  33. hipStreamCreate( & stream);
  34. int size = 1024 * 1024;
  35. int full_size = 20 * size;
  36. int * host_a, * host_b, * host_c;
  37. hipHostMalloc( & host_a, full_size * sizeof( int), hipHostMallocDefault);
  38. hipHostMalloc( & host_b, full_size * sizeof( int), hipHostMallocDefault);
  39. hipHostMalloc( & host_c, full_size * sizeof( int), hipHostMallocDefault);
  40. int * dev_a, * dev_b, * dev_c;
  41. hipMalloc( & dev_a, size * sizeof( int) );
  42. hipMalloc( & dev_b, size * sizeof( int) );
  43. hipMalloc( & dev_c, size * sizeof( int) );
  44. std::minstd_rand generator;
  45. std::uniform_int_distribution<> distribution(1, 6);
  46. for ( int i = 0; i < full_size; ++i) {
  47. host_a[i] = distribution( generator);
  48. host_b[i] = distribution( generator);
  49. }
  50. for ( int i = 0; i < full_size; i += size) {
  51. hipMemcpyAsync( dev_a, host_a + i, size * sizeof( int), hipMemcpyHostToDevice, stream);
  52. hipMemcpyAsync( dev_b, host_b + i, size * sizeof( int), hipMemcpyHostToDevice, stream);
  53. hipLaunchKernel(kernel, dim3(size / 256), dim3(256), 0, stream, size, dev_a, dev_b, dev_c);
  54. hipMemcpyAsync( host_c + i, dev_c, size * sizeof( int), hipMemcpyDeviceToHost, stream);
  55. }
  56. auto result = boost::fibers::hip::waitfor_all( stream); // suspend fiber till HIP stream has finished
  57. BOOST_ASSERT( stream == std::get< 0 >( result) );
  58. BOOST_ASSERT( hipSuccess == std::get< 1 >( result) );
  59. std::cout << "f1: GPU computation finished" << std::endl;
  60. hipHostFree( host_a);
  61. hipHostFree( host_b);
  62. hipHostFree( host_c);
  63. hipFree( dev_a);
  64. hipFree( dev_b);
  65. hipFree( dev_c);
  66. hipStreamDestroy( stream);
  67. });
  68. f.join();
  69. [heading Synopsis]
  70. #include <boost/fiber/hip/waitfor.hpp>
  71. namespace boost {
  72. namespace fibers {
  73. namespace hip {
  74. std::tuple< hipStream_t, hipError_t > waitfor_all( hipStream_t st);
  75. std::vector< std::tuple< hipStream_t, hipError_t > > waitfor_all( hipStream_t ... st);
  76. }}}
  77. [ns_function_heading hip..waitfor]
  78. #include <boost/fiber/hip/waitfor.hpp>
  79. namespace boost {
  80. namespace fibers {
  81. namespace hip {
  82. std::tuple< hipStream_t, hipError_t > waitfor_all( hipStream_t st);
  83. std::vector< std::tuple< hipStream_t, hipError_t > > waitfor_all( hipStream_t ... st);
  84. }}}
  85. [variablelist
  86. [[Effects:] [Suspends active fiber till HIP stream has finished its operations.]]
  87. [[Returns:] [tuple of stream reference and the HIP stream status]]
  88. ]
  89. [endsect]