fill_n.hpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340
  1. // Copyright 2019 Hans Dembinski
  2. //
  3. // Distributed under the Boost Software License, Version 1.0.
  4. // (See accompanying file LICENSE_1_0.txt
  5. // or copy at http://www.boost.org/LICENSE_1_0.txt)
  6. #ifndef BOOST_HISTOGRAM_DETAIL_FILL_N_HPP
  7. #define BOOST_HISTOGRAM_DETAIL_FILL_N_HPP
  8. #include <algorithm>
  9. #include <boost/assert.hpp>
  10. #include <boost/histogram/axis/option.hpp>
  11. #include <boost/histogram/axis/traits.hpp>
  12. #include <boost/histogram/detail/axes.hpp>
  13. #include <boost/histogram/detail/detect.hpp>
  14. #include <boost/histogram/detail/fill.hpp>
  15. #include <boost/histogram/detail/linearize.hpp>
  16. #include <boost/histogram/detail/non_member_container_access.hpp>
  17. #include <boost/histogram/detail/optional_index.hpp>
  18. #include <boost/histogram/detail/span.hpp>
  19. #include <boost/histogram/detail/static_if.hpp>
  20. #include <boost/histogram/fwd.hpp>
  21. #include <boost/mp11/algorithm.hpp>
  22. #include <boost/mp11/bind.hpp>
  23. #include <boost/mp11/utility.hpp>
  24. #include <boost/throw_exception.hpp>
  25. #include <boost/variant2/variant.hpp>
  26. #include <stdexcept>
  27. #include <type_traits>
  28. #include <utility>
  29. namespace boost {
  30. namespace histogram {
  31. namespace detail {
  32. namespace dtl = boost::histogram::detail;
  33. template <class Axes, class T>
  34. using is_convertible_to_any_value_type =
  35. mp11::mp_any_of_q<value_types<Axes>, mp11::mp_bind_front<std::is_convertible, T>>;
  36. template <class... Ts>
  37. void fold(Ts&&...) noexcept {} // helper to enable operator folding
  38. template <class T>
  39. auto to_ptr_size(const T& x) {
  40. return static_if<std::is_scalar<T>>(
  41. [](const auto& x) { return std::make_pair(&x, static_cast<std::size_t>(0)); },
  42. [](const auto& x) { return std::make_pair(dtl::data(x), dtl::size(x)); }, x);
  43. }
  44. template <class F, class V>
  45. decltype(auto) maybe_visit(F&& f, V&& v) {
  46. return static_if<is_variant<std::decay_t<V>>>(
  47. [](auto&& f, auto&& v) {
  48. return variant2::visit(std::forward<F>(f), std::forward<V>(v));
  49. },
  50. [](auto&& f, auto&& v) { return std::forward<F>(f)(std::forward<V>(v)); },
  51. std::forward<F>(f), std::forward<V>(v));
  52. }
  53. template <class Index, class Axis, class IsGrowing>
  54. struct index_visitor {
  55. using index_type = Index;
  56. using pointer = index_type*;
  57. using value_type = axis::traits::value_type<Axis>;
  58. using Opt = axis::traits::static_options<Axis>;
  59. Axis& axis_;
  60. const std::size_t stride_, start_, size_; // start and size of value collection
  61. const pointer begin_;
  62. axis::index_type* shift_;
  63. index_visitor(Axis& a, std::size_t& str, const std::size_t& sta, const std::size_t& si,
  64. const pointer it, axis::index_type* shift)
  65. : axis_(a), stride_(str), start_(sta), size_(si), begin_(it), shift_(shift) {}
  66. template <class T>
  67. void call_2(std::true_type, pointer it, const T& x) const {
  68. // must use this code for all axes if one of them is growing
  69. axis::index_type shift;
  70. linearize_growth(*it, shift, stride_, axis_,
  71. try_cast<value_type, std::invalid_argument>(x));
  72. if (shift > 0) { // shift previous indices, because axis zero-point has changed
  73. while (it != begin_) *--it += static_cast<std::size_t>(shift) * stride_;
  74. *shift_ += shift;
  75. }
  76. }
  77. template <class T>
  78. void call_2(std::false_type, pointer it, const T& x) const {
  79. // no axis is growing
  80. linearize(*it, stride_, axis_, try_cast<value_type, std::invalid_argument>(x));
  81. }
  82. template <class T>
  83. void call_1(std::false_type, const T& iterable) const {
  84. // T is iterable; fill N values
  85. const auto* tp = dtl::data(iterable) + start_;
  86. for (auto it = begin_; it != begin_ + size_; ++it) call_2(IsGrowing{}, it, *tp++);
  87. }
  88. template <class T>
  89. void call_1(std::true_type, const T& value) const {
  90. // T is compatible value; fill single value N times
  91. index_type idx{*begin_};
  92. call_2(IsGrowing{}, &idx, value);
  93. if (is_valid(idx)) {
  94. const auto delta =
  95. static_cast<std::intptr_t>(idx) - static_cast<std::intptr_t>(*begin_);
  96. for (auto&& i : make_span(begin_, size_)) i += delta;
  97. } else
  98. std::fill(begin_, begin_ + size_, invalid_index);
  99. }
  100. template <class T>
  101. void operator()(const T& iterable_or_value) const {
  102. call_1(mp11::mp_bool<(std::is_convertible<T, value_type>::value ||
  103. !is_iterable<T>::value)>{},
  104. iterable_or_value);
  105. }
  106. };
  107. template <class Index, class S, class Axes, class T>
  108. void fill_n_indices(Index* indices, const std::size_t start, const std::size_t size,
  109. const std::size_t offset, S& storage, Axes& axes, const T* viter) {
  110. axis::index_type extents[buffer_size<Axes>::value];
  111. axis::index_type shifts[buffer_size<Axes>::value];
  112. for_each_axis(axes, [eit = extents, sit = shifts](const auto& a) mutable {
  113. *sit++ = 0;
  114. *eit++ = axis::traits::extent(a);
  115. }); // LCOV_EXCL_LINE: gcc-8 is missing this line for no reason
  116. // offset must be zero for growing axes
  117. using IsGrowing = has_growing_axis<Axes>;
  118. std::fill(indices, indices + size, IsGrowing::value ? 0 : offset);
  119. for_each_axis(axes, [&, stride = static_cast<std::size_t>(1),
  120. pshift = shifts](auto& axis) mutable {
  121. using Axis = std::decay_t<decltype(axis)>;
  122. maybe_visit(
  123. index_visitor<Index, Axis, IsGrowing>{axis, stride, start, size, indices, pshift},
  124. *viter++);
  125. stride *= static_cast<std::size_t>(axis::traits::extent(axis));
  126. ++pshift;
  127. });
  128. bool update_needed = false;
  129. for_each_axis(axes, [&update_needed, eit = extents](const auto& a) mutable {
  130. update_needed |= *eit++ != axis::traits::extent(a);
  131. });
  132. if (update_needed) {
  133. storage_grower<Axes> g(axes);
  134. g.from_extents(extents);
  135. g.apply(storage, shifts);
  136. }
  137. }
  138. template <class S, class Index, class... Ts>
  139. void fill_n_storage(S& s, const Index idx, Ts&&... p) noexcept {
  140. if (is_valid(idx)) {
  141. BOOST_ASSERT(idx < s.size());
  142. fill_storage_3(s[idx], *p.first...);
  143. }
  144. fold((p.second ? ++p.first : 0)...);
  145. }
  146. template <class S, class Index, class T, class... Ts>
  147. void fill_n_storage(S& s, const Index idx, weight_type<T>&& w, Ts&&... ps) noexcept {
  148. if (is_valid(idx)) {
  149. BOOST_ASSERT(idx < s.size());
  150. fill_storage_3(s[idx], weight_type<decltype(*w.value.first)>{*w.value.first},
  151. *ps.first...);
  152. }
  153. if (w.value.second) ++w.value.first;
  154. fold((ps.second ? ++ps.first : 0)...);
  155. }
  156. // general Nd treatment
  157. template <class Index, class S, class A, class T, class... Ts>
  158. void fill_n_nd(const std::size_t offset, S& storage, A& axes, const std::size_t vsize,
  159. const T* values, Ts&&... ts) {
  160. constexpr std::size_t buffer_size = 1ul << 14;
  161. Index indices[buffer_size];
  162. /*
  163. Parallelization options.
  164. A) Run the whole fill2 method in parallel, each thread fills its own buffer of
  165. indices, synchronization (atomics) are needed to synchronize the incrementing of
  166. the storage cells. This leads to a lot of congestion for small histograms.
  167. B) Run only fill_n_indices in parallel, subsections of the indices buffer
  168. can be filled by different threads. The final loop that fills the storage runs
  169. in the main thread, this requires no synchronization for the storage, cells do
  170. not need to support atomic operations.
  171. C) Like B), then sort the indices in the main thread and fill the
  172. storage in parallel, where each thread uses a disjunct set of indices. This
  173. should create less congestion and requires no synchronization for the storage.
  174. Note on C): Let's say we have an axis with 5 bins (with *flow to simplify).
  175. Then after filling 10 values, converting to indices and sorting, the index
  176. buffer may look like this: 0 0 0 1 2 2 2 4 4 5. Let's use two threads to fill
  177. the storage. Still in the main thread, we compute an iterator to the middle of
  178. the index buffer and move it to the right until the pointee changes. Now we have
  179. two ranges which contain disjunct sets of indices. We pass these ranges to the
  180. threads which then fill the storage. Since the threads by construction do not
  181. compete to increment the same cell, no further synchronization is required.
  182. In all cases, growing axes cannot be parallelized.
  183. */
  184. for (std::size_t start = 0; start < vsize; start += buffer_size) {
  185. const std::size_t n = std::min(buffer_size, vsize - start);
  186. // fill buffer of indices...
  187. fill_n_indices(indices, start, n, offset, storage, axes, values);
  188. // ...and fill corresponding storage cells
  189. for (auto&& idx : make_span(indices, n))
  190. fill_n_storage(storage, idx, std::forward<Ts>(ts)...);
  191. }
  192. }
  193. template <class S, class... As, class T, class... Us>
  194. void fill_n_1(const std::size_t offset, S& storage, std::tuple<As...>& axes,
  195. const std::size_t vsize, const T* values, Us&&... us) {
  196. using index_type =
  197. mp11::mp_if<has_non_inclusive_axis<std::tuple<As...>>, optional_index, std::size_t>;
  198. fill_n_nd<index_type>(offset, storage, axes, vsize, values, std::forward<Us>(us)...);
  199. }
  200. template <class S, class A, class T, class... Us>
  201. void fill_n_1(const std::size_t offset, S& storage, A& axes, const std::size_t vsize,
  202. const T* values, Us&&... us) {
  203. bool all_inclusive = true;
  204. for_each_axis(axes,
  205. [&](const auto& ax) { all_inclusive &= axis::traits::inclusive(ax); });
  206. if (axes_rank(axes) == 1) {
  207. axis::visit(
  208. [&](auto& ax) {
  209. std::tuple<decltype(ax)> axes{ax};
  210. fill_n_1(offset, storage, axes, vsize, values, std::forward<Us>(us)...);
  211. },
  212. axes[0]);
  213. } else {
  214. if (all_inclusive)
  215. fill_n_nd<std::size_t>(offset, storage, axes, vsize, values,
  216. std::forward<Us>(us)...);
  217. else
  218. fill_n_nd<optional_index>(offset, storage, axes, vsize, values,
  219. std::forward<Us>(us)...);
  220. }
  221. }
  222. template <class A, class T, std::size_t N>
  223. std::size_t get_total_size(const A& axes, const dtl::span<const T, N>& values) {
  224. // supported cases (T = value type; CT = containter of T; V<T, CT, ...> = variant):
  225. // - span<CT, N>: for any histogram, N == rank
  226. // - span<V<T, CT>, N>: for any histogram, N == rank
  227. BOOST_ASSERT(axes_rank(axes) == values.size());
  228. constexpr auto unset = static_cast<std::size_t>(-1);
  229. std::size_t size = unset;
  230. for_each_axis(axes, [&size, vit = values.begin()](const auto& ax) mutable {
  231. using AV = axis::traits::value_type<std::decay_t<decltype(ax)>>;
  232. maybe_visit(
  233. [&size](const auto& v) {
  234. // v is either convertible to value or a sequence of values
  235. using V = std::remove_const_t<std::remove_reference_t<decltype(v)>>;
  236. static_if_c<(std::is_convertible<decltype(v), AV>::value ||
  237. !is_iterable<V>::value)>(
  238. [](const auto&) {},
  239. [&size](const auto& v) {
  240. const auto n = dtl::size(v);
  241. // must repeat this here for msvc :(
  242. constexpr auto unset = static_cast<std::size_t>(-1);
  243. if (size == unset)
  244. size = dtl::size(v);
  245. else if (size != n)
  246. BOOST_THROW_EXCEPTION(
  247. std::invalid_argument("spans must have compatible lengths"));
  248. },
  249. v);
  250. },
  251. *vit++);
  252. });
  253. // if all arguments are not iterables, return size of 1
  254. return size == unset ? 1 : size;
  255. }
  256. inline void fill_n_check_extra_args(std::size_t) noexcept {}
  257. template <class T, class... Ts>
  258. void fill_n_check_extra_args(std::size_t size, T&& x, Ts&&... ts) {
  259. // sequences must have same lengths, but sequences of length 0 are broadcast
  260. if (x.second != 0 && x.second != size)
  261. BOOST_THROW_EXCEPTION(std::invalid_argument("spans must have compatible lengths"));
  262. fill_n_check_extra_args(size, std::forward<Ts>(ts)...);
  263. }
  264. template <class T, class... Ts>
  265. void fill_n_check_extra_args(std::size_t size, weight_type<T>&& w, Ts&&... ts) {
  266. fill_n_check_extra_args(size, w.value, std::forward<Ts>(ts)...);
  267. }
  268. template <class S, class A, class T, std::size_t N, class... Us>
  269. void fill_n(std::true_type, const std::size_t offset, S& storage, A& axes,
  270. const dtl::span<const T, N> values, Us&&... us) {
  271. // supported cases (T = value type; CT = containter of T; V<T, CT, ...> = variant):
  272. // - span<T, N>: only valid for 1D histogram, N > 1 allowed
  273. // - span<CT, N>: for any histogram, N == rank
  274. // - span<V<T, CT>, N>: for any histogram, N == rank
  275. static_if<is_convertible_to_any_value_type<A, T>>(
  276. [&](const auto& values, auto&&... us) {
  277. // T matches one of the axis value types, must be 1D special case
  278. if (axes_rank(axes) != 1)
  279. BOOST_THROW_EXCEPTION(
  280. std::invalid_argument("number of arguments must match histogram rank"));
  281. fill_n_check_extra_args(values.size(), std::forward<Us>(us)...);
  282. fill_n_1(offset, storage, axes, values.size(), &values, std::forward<Us>(us)...);
  283. },
  284. [&](const auto& values, auto&&... us) {
  285. // generic ND case
  286. if (axes_rank(axes) != values.size())
  287. BOOST_THROW_EXCEPTION(
  288. std::invalid_argument("number of arguments must match histogram rank"));
  289. const auto vsize = get_total_size(axes, values);
  290. fill_n_check_extra_args(vsize, std::forward<Us>(us)...);
  291. fill_n_1(offset, storage, axes, vsize, values.data(), std::forward<Us>(us)...);
  292. },
  293. values, std::forward<Us>(us)...);
  294. }
  295. // empty implementation for bad arguments to stop compiler from showing internals
  296. template <class... Ts>
  297. void fill_n(std::false_type, Ts...) {}
  298. } // namespace detail
  299. } // namespace histogram
  300. } // namespace boost
  301. #endif // BOOST_HISTOGRAM_DETAIL_FILL_N_HPP