123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984 |
- ////
- Copyright 2015-2017 Peter Dimov
- Distributed under the Boost Software License, Version 1.0.
- See accompanying file LICENSE_1_0.txt or copy at
- http://www.boost.org/LICENSE_1_0.txt
- ////
- # Simple {cpp}11 metaprogramming, part 2
- Peter Dimov
- 2015-06-20
- :toc: left
- :idprefix:
- :docinfo: shared-footer
- [.lead]
- __Efficient algorithms for membership testing, random access, and retrieval by
- key__
- NOTE: Being late to the metaprogramming party, I make no claim of having
- invented the techniques in this article. A quick look at the implementations
- of, for example, Louis Dionne's https://github.com/ldionne/mpl11[mpl11] and
- Eric Niebler's https://github.com/ericniebler/meta[meta], shows that most of
- these tricks are already known. Dave Abrahams
- https://github.com/dabrahams/mpl11[has experimented] along these lines in 2012.
- The original inventor of the multiple inheritance trick and the `void*`
- arguments trick is probably Richard Smith, who has posted
- https://llvm.org/bugs/attachment.cgi?id=8825[two]
- https://llvm.org/bugs/attachment.cgi?id=8838[examples] in response to
- https://llvm.org/bugs/show_bug.cgi?id=13263[a Clang bug report].
- ## Vectors, sets, and maps
- <<simple_cxx11_metaprogramming.adoc#,Last time>>, I outlined a style of
- metaprogramming that operated on type lists -- variadic class templates:
- ```
- template<class... T> struct mp_list {};
- ```
- Classic Lisp uses lists as its only data structure, but operating on a list is
- usually linear in the number of its elements.
- In addition to `list`, the STL has `vector`, `set`, and `map`. `vector`
- supports random access by index; `set` has efficient test for membership; `map`
- associates keys with values and has efficient lookup based on key.
- Instead of introducing separate data structure such as `mp_vector`, `mp_set`,
- `mp_map`, we'll keep our data in a list form, and attempt to provide efficient
- algorithms for random access, membership testing, and lookup.
- ## mp_contains
- Let's starts with sets. A set is just a list with unique elements. To obtain a
- set from an arbitrary list, we'll need an algorithm that removes the
- duplicates. Let's call it `mp_unique<L>`:
- [subs=+quotes]
- ```
- // mp_if
- template<bool C, class T, class E> struct mp_if_c_impl;
- template<class T, class E> struct mp_if_c_impl<true, T, E>
- {
- using type = T;
- };
- template<class T, class E> struct mp_if_c_impl<false, T, E>
- {
- using type = E;
- };
- template<bool C, class T, class E>
- using mp_if_c = typename mp_if_c_impl<C, T, E>::type;
- template<class C, class T, class E>
- using mp_if = typename mp_if_c_impl<C::value != 0, T, E>::type;
- // mp_unique
- template<class L> struct mp_unique_impl;
- template<class L> using mp_unique = typename mp_unique_impl<L>::type;
- template<template<class...> class L> struct mp_unique_impl<L<>>
- {
- using type = L<>;
- };
- template<template<class...> class L, class T1, class... T>
- struct mp_unique_impl<L<T1, T...>>
- {
- using _rest = mp_unique<L<T...>>;
- using type = mp_if<**mp_contains**<_rest, T1>, _rest, mp_push_front<_rest, T1>>;
- };
- ```
- For membership testing, we've introduced an algorithm `mp_contains<L, V>` that
- returns `true` when `L` contains `V`. The straightforward recursive
- implementation of `mp_contains` is:
- ```
- template<class L, class V> struct mp_contains_impl;
- template<class L, class V> using mp_contains = typename mp_contains_impl<L, V>::type;
- template<template<class...> class L, class V> struct mp_contains_impl<L<>, V>
- {
- using type = std::false_type;
- };
- template<template<class...> class L, class... T, class V>
- struct mp_contains_impl<L<V, T...>, V>
- {
- using type = std::true_type;
- };
- template<template<class...> class L, class T1, class... T, class V>
- struct mp_contains_impl<L<T1, T...>, V>: mp_contains_impl<L<T...>, V>
- {
- };
- ```
- Note that `mp_unique<L>` makes `N` calls to `mp_contains`, where `N` is the
- length of the list `L`. This means that `mp_contains` needs to be as fast as
- possible, which the above implementation, well, isn't.
- Here are the compile times in seconds for invoking `mp_unique` on a list with
- `N` (distinct) elements:
- |===
- ||N=100 |N=200 |N=300 |N=400 |N=500 |N=600 |N=700 |N=800
- |VC$$++$$ 2013, recursive |2.1 |DNF ||||||
- |clang$$++$$ 3.5.1, recursive |0.9 |4.5 |13.2 |30.2 |DNF |||
- |g$$++$$ 4.9.2, recursive |0.7 |3.6 |10.4 |23.2 |DNF |||
- |===
- (Tests done under Windows/Cygwin. All compilers are 32 bit. No optimizations.
- DNF stands for "did not finish", which usually means that the compiler ran out
- of heap space or crashed.)
- We clearly need a better alternative.
- I ended the previous article with an implementation of `mp_contains` that
- relied on `mp_count`, which in turn relied on `mp_plus`. Let's see how it
- fares:
- |===
- ||N=100 |N=200 |N=300 |N=400 |N=500 |N=600 |N=700 |N=800
- |VC$$++$$ 2013, mp_count/mp_plus |1.1 |9.8 |50.5 |DNF ||||
- |clang$$++$$ 3.5.1, mp_count/mp_plus |0.5 |1.4 |3.1 |6.1 |DNF |||
- |g$$++$$ 4.9.2, mp_count/mp_plus |0.5 |1.3 |2.9 |5.8 |9.7 |15.6 |22.4 |32.3
- |===
- Not _that_ bad, at least if your compiler happens to be `g$$++$$`. Still, there
- ought to be room for improvement here.
- To do better, we have to somehow leverage the language features, such as pack
- expansion, to do more of the work for us. For inspiration, let's turn to
- section 14.5.3 paragraph 4 of the {cpp}11 standard, which explains that pack
- expansions can occur in the following contexts:
- * **In a function parameter pack (8.3.5); the pattern is the
- __parameter-declaration__ without the ellipsis.**
- * In a template parameter pack that is a pack expansion (14.1):
- * **In an __initializer-list__ (8.5); the pattern is an
- __initializer-clause__.**
- * **In a __base-specifier-list__ (Clause 10); the pattern is a
- __base-specifier__.**
- * In a __mem-initializer-list__ (12.6.2); the pattern is a
- __mem-initializer__.
- * In a __template-argument-list__ (14.3); the pattern is a
- __template-argument__.
- * In a __dynamic-exception-specification__ (15.4); the pattern is a
- __type-id__.
- * In an __attribute-list__ (7.6.1); the pattern is an __attribute__.
- * In an __alignment-specifier__ (7.6.2); the pattern is the
- __alignment-specifier__ without the ellipsis.
- * In a __capture-list__ (5.1.2); the pattern is a __capture__.
- * In a `sizeof$$...$$` expression (5.3.3); the pattern is an __identifier__.
- The **emphasis** is mine and indicates possible leads.
- Our first option is to expand the parameter pack into arguments for a function
- call. Since we're interested in operations that occur at compile time, calling
- a function may not appear useful; but {cpp}11 functions can be `constexpr`, and
- `constexpr` function "calls" do occur at compile time.
- Recall our `mp_count`:
- ```
- template<class L, class V> struct mp_count_impl;
- template<template<class...> class L, class... T, class V>
- struct mp_count_impl<L<T...>, V>
- {
- using type = mp_plus<std::is_same<T, V>...>;
- };
- template<class L, class V> using mp_count = typename mp_count_impl<L, V>::type;
- ```
- Instead of using the template alias `mp_plus` to sum the `is_same` expressions,
- we can use a `constexpr` function:
- ```
- constexpr std::size_t cx_plus()
- {
- return 0;
- }
- template<class T1, class... T> constexpr std::size_t cx_plus(T1 t1, T... t)
- {
- return t1 + cx_plus(t...);
- }
- // mp_size_t
- template<std::size_t N> using mp_size_t = std::integral_constant<std::size_t, N>;
- // mp_count
- template<class L, class V> struct mp_count_impl;
- template<template<class...> class L, class... T, class V>
- struct mp_count_impl<L<T...>, V>
- {
- using type = mp_size_t<cx_plus(std::is_same<T, V>::value...)>;
- };
- template<class L, class V> using mp_count = typename mp_count_impl<L, V>::type;
- ```
- with the following results:
- |===
- ||N=100 |N=200 |N=300 |N=400 |N=500 |N=600 |N=700 |N=800
- |clang$$++$$ 3.5.1, mp_count/cx_plus |0.4 |1.1 |2.5 |5.0 |DNF |||
- |g$$++$$ 4.9.2, mp_count/cx_plus |0.4 |0.9 |1.7 |2.9 |4.7 |6.7 |9.2 |11.8
- |===
- We've improved the times, but lost VC$$++$$ 2013 due to its not implementing
- `constexpr`.
- Let's try pack expansion into an __initializer-list__. Instead of passing the
- `is_same` expressions to a function, we can build a constant array out of them,
- then sum the array with a `constexpr` function:
- ```
- constexpr std::size_t cx_plus2(bool const * first, bool const * last)
- {
- return first == last? 0: *first + cx_plus2(first + 1, last);
- }
- // mp_count
- template<class L, class V> struct mp_count_impl;
- template<template<class...> class L, class... T, class V>
- struct mp_count_impl<L<T...>, V>
- {
- static constexpr bool _v[] = { std::is_same<T, V>::value... };
- using type = mp_size_t<cx_plus2(_v, _v + sizeof...(T))>;
- };
- template<class L, class V> using mp_count = typename mp_count_impl<L, V>::type;
- ```
- This is a neat trick, but is it fast?
- |===
- ||N=100 |N=200 |N=300 |N=400 |N=500 |N=600 |N=700 |N=800
- |clang$$++$$ 3.5.1, mp_count/cx_plus2 |0.4 |0.9 |1.8 |DNF ||||
- |g$$++$$ 4.9.2, mp_count/cx_plus2 |0.4 |0.9 |1.9 |3.4 |5.4 |7.8 |11.0 |14.7
- |===
- That's a bit disappointing. Let's see what can we do with expanding a parameter
- pack into a base-specifier-list. We would be able to define a class that
- derives from every element of the pack:
- ```
- struct U: T... {};
- ```
- We can then use `std::is_base_of<V, U>` to test whether a type `V` is a base of
- `U`, that is, whether it's one of the elements of the parameter pack. Which is
- exactly what we need.
- Arbitrary types such as `void`, `int`, or `void(int)` can't be used as base
- classes, but we'll wrap the types in an empty class template, which we'll call
- `mp_identity`.
- ```
- template<class T> struct mp_identity
- {
- using type = T;
- };
- template<class L, class V> struct mp_contains_impl;
- template<class L, class V> using mp_contains = typename mp_contains_impl<L, V>::type;
- template<template<class...> class L, class... T, class V>
- struct mp_contains_impl<L<T...>, V>
- {
- struct U: mp_identity<T>... {};
- using type = std::is_base_of<mp_identity<V>, U>;
- };
- ```
- Performance?
- |===
- ||N=100 |N=200 |N=300 |N=400 |N=500 |N=600 |N=700 |N=800
- |VC$$++$$ 2013, is_base_of |0.3 |0.6 |1.3 |2.5 |DNF |||
- |clang$$++$$ 3.5.1, is_base_of |0.3 |0.4 |0.6 |0.8 |DNF |||
- |g$$++$$ 4.9.2, is_base_of |0.3 |0.4 |0.6 |0.9 |1.3 |1.7 |2.3 |3.0
- |===
- This implementation is a clear winner.
- In fairness, we ought to note that the first four implementations of
- `mp_contains` do not rely on the list elements being unique. This makes
- `mp_contains` an algorithm that supports arbitrary lists, not just sets.
- The `is_base_of` implementation, however, does not support lists that contain
- duplicates, because it's not possible to inherit directly from the same type
- twice. So it does not implement the general `mp_contains`, but something that
- should probably be named `mp_set_contains`.
- We can avoid the "no duplicates" requirement by modifying the implementation to
- inherit from `mp_identity<T>` indirectly, via an intermediate base class:
- [subs=+macros]
- ```
- // indirect_inherit
- template<std::size_t I, class T> struct inherit_second: T {};
- template<class L, class S> struct indirect_inherit_impl;
- template<template<class...> class L, class... T, std::size_t... J>
- struct indirect_inherit_impl<L<T...>, http://en.cppreference.com/w/cpp/utility/integer_sequence[integer_sequence]<std::size_t, J...>>:
- inherit_second<J, mp_identity<T>>... {};
- template<class L> using indirect_inherit =
- indirect_inherit_impl<L, http://en.cppreference.com/w/cpp/utility/integer_sequence[make_index_sequence]<mp_size<L>::value>>;
- // mp_contains
- template<class L, class V> struct mp_contains_impl
- {
- using U = indirect_inherit<L>;
- using type = std::is_base_of<mp_identity<V>, U>;
- };
- template<class L, class V> using mp_contains = typename mp_contains_impl<L, V>::type;
- ```
- This, however, pretty much nullifies the spectacular performance gains we've
- observed with the original `is_base_of`-based implementation:
- |===
- ||N=100 |N=200 |N=300 |N=400 |N=500 |N=600 |N=700 |N=800
- |VC$$++$$ 2013, recursive |2.1 |DNF ||||||
- |VC$$++$$ 2013, mp_count/mp_plus |1.1 |9.8 |50.5 |DNF ||||
- |VC$$++$$ 2013, is_base_of |0.3 |0.6 |1.3 |2.5 |DNF |||
- |VC$$++$$ 2013, is_base_of/indirect |1.0 |9.3 |49.5 |153.8 |DNF |||
- |===
- |===
- ||N=100 |N=200 |N=300 |N=400 |N=500 |N=600 |N=700 |N=800
- |clang$$++$$ 3.5.1, recursive |0.9 |4.5 |13.2 |30.2 |DNF |||
- |clang$$++$$ 3.5.1, mp_count/mp_plus |0.5 |1.4 |3.1 |6.1 |DNF |||
- |clang$$++$$ 3.5.1, mp_count/cx_plus |0.4 |1.1 |2.5 |5.0 |DNF |||
- |clang$$++$$ 3.5.1, mp_count/cx_plus2 |0.4 |0.9 |1.8 |DNF ||||
- |clang$$++$$ 3.5.1, is_base_of |0.3 |0.4 |0.6 |0.8 |DNF |||
- |clang$$++$$ 3.5.1, is_base_of/indirect |0.4 |0.9 |1.6 |2.5 |DNF |||
- |===
- |===
- ||N=100 |N=200 |N=300 |N=400 |N=500 |N=600 |N=700 |N=800
- |g$$++$$ 4.9.2, recursive |0.7 |3.6 |10.4 |23.2 |DNF |||
- |g$$++$$ 4.9.2, mp_count/mp_plus |0.5 |1.3 |2.9 |5.8 |9.7 |15.6 |22.4 |32.3
- |g$$++$$ 4.9.2, mp_count/cx_plus |0.4 |0.9 |1.7 |2.9 |4.7 |6.7 |9.2 |11.8
- |g$$++$$ 4.9.2, mp_count/cx_plus2 |0.4 |0.9 |1.9 |3.4 |5.4 |7.8 |11.0 |14.7
- |g$$++$$ 4.9.2, is_base_of |0.3 |0.4 |0.6 |0.9 |1.3 |1.7 |2.3 |3.0
- |g$$++$$ 4.9.2, is_base_of/indirect |0.5 |1.1 |2.3 |4.0 |6.6 |9.8 |13.6 |18.2
- |===
- ## mp_map_find
- A map, in the STL sense, is a data structure that associates keys with values
- and can efficiently retrieve, given a key, its associated value. For our
- purposes, a map will be any list of lists for which the inner lists have at
- least one element, the key; the rest of the elements we'll consider to be the
- associated value. For example, the list
- ```
- [[A, B], [C, D, E], [F], [G, H]]
- ```
- is a map with keys `A`, `C`, `F`, and `G`, with associated values `[B]`,
- `[D, E]`, `[]`, and `[H]`, respectively. We'll require unique keys, for reasons
- that'll become evident later.
- I'll show two other examples of maps, this time using real {cpp} code:
- ```
- using Map = mp_list<mp_list<int, int*>, mp_list<void, void*>, mp_list<char, char*>>;
- ```
- ```
- using Map2 = std::tuple<std::pair<int, int[2]>, std::pair<char, char[2]>>;
- ```
- The Lisp name of the algorithm that performs retrieval based on key is `ASSOC`,
- but I'll call it `mp_map_find`. `mp_map_find<M, K>` returns the element of `M`
- whose first element is `K`. For example, `mp_map_find<Map2, int>` would return
- `std::pair<int, int[2]>`. If there's no such key, it returns `void`.
- There's almost no need to implement and benchmark the recursive version of
- `mp_map_find` -- we can be pretty sure it will perform horribly. Still,
- ```
- template<class M, class K> struct mp_map_find_impl;
- template<class M, class K> using mp_map_find = typename mp_map_find_impl<M, K>::type;
- template<template<class...> class M, class K> struct mp_map_find_impl<M<>, K>
- {
- using type = void;
- };
- template<template<class...> class M, class T1, class... T, class K>
- struct mp_map_find_impl<M<T1, T...>, K>
- {
- using type = mp_if<std::is_same<mp_front<T1>, K>, T1, mp_map_find<M<T...>, K>>;
- };
- ```
- The compile time, in seconds, for `N` lookups into a map of size `N`, is as
- follows:
- |===
- ||N=100 |N=200 |N=300 |N=400 |N=500 |N=600 |N=700 |N=800
- |VC$$++$$ 2013, recursive |38.2 |DNF ||||||
- |clang$$++$$ 3.5.1, recursive |2.5 |13.7 |DNF |||||
- |g$$++$$ 4.9.2, recursive |1.9 |10.2 |28.8 |DNF ||||
- |===
- I told you there was no point.
- But, I hear some of you say, you're evaluating the else branch even if the
- condition is true, and that's horribly inefficient!
- Well, this would only improve the performance by a factor of approximately two
- on average, and only if the element is present, but fine, let's try it. The
- element happens to be present in the benchmark, so let's see.
- ```
- // mp_eval_if
- template<bool C, class T, template<class...> class E, class... A>
- struct mp_eval_if_c_impl;
- template<class T, template<class...> class E, class... A>
- struct mp_eval_if_c_impl<true, T, E, A...>
- {
- using type = T;
- };
- template<class T, template<class...> class E, class... A>
- struct mp_eval_if_c_impl<false, T, E, A...>
- {
- using type = E<A...>;
- };
- template<bool C, class T, template<class...> class E, class... A>
- using mp_eval_if_c = typename mp_eval_if_c_impl<C, T, E, A...>::type;
- template<class C, class T, template<class...> class E, class... A>
- using mp_eval_if = typename mp_eval_if_c_impl<C::value != 0, T, E, A...>::type;
- // mp_map_find
- template<class M, class K> struct mp_map_find_impl;
- template<class M, class K> using mp_map_find = typename mp_map_find_impl<M, K>::type;
- template<template<class...> class M, class K> struct mp_map_find_impl<M<>, K>
- {
- using type = void;
- };
- template<template<class...> class M, class T1, class... T, class K>
- struct mp_map_find_impl<M<T1, T...>, K>
- {
- using type = mp_eval_if<std::is_same<mp_front<T1>, K>, T1, mp_map_find, M<T...>, K>;
- };
- ```
- There you go:
- |===
- ||N=100 |N=200 |N=300 |N=400 |N=500 |N=600 |N=700 |N=800
- |VC$$++$$ 2013, recursive |15.6 |DNF ||||||
- |clang$$++$$ 3.5.1, recursive |1.8 |9.5 |DNF |||||
- |g$$++$$ 4.9.2, recursive |1.4 |7.0 |19.7 |DNF ||||
- |===
- I told you there was no point.
- Point or no, to establish that the recursive implementation is inefficient is
- not the same as to come up with an efficient one. There are two things that
- make the `mp_contains` techniques inapplicable to our present case: first,
- `mp_contains` only had to return true or false, whereas `mp_map_find` returns a
- type, and second, in `mp_contains` we knew the exact type of the element for
- which we were looking, whereas here, we only know its `mp_front`.
- Fortunately, there does exist a language feature that can solve both: {cpp} can
- deduce the template parameters of base classes when passed a derived class. In
- this example,
- ```
- struct K1 {};
- struct V1 {};
- struct X: std::pair<K1, V1> {};
- template<class A, class B> void f(std::pair<A, B> const & p);
- int main()
- {
- f(X());
- }
- ```
- the call to `f(X())` deduces `A` as `K1` and `B` as `V1`. If we have more than
- one `std::pair` base class, we can fix `A` to be `K1`:
- ```
- struct K1 {};
- struct V1 {};
- struct K2 {};
- struct V2 {};
- struct X: std::pair<K1, V1>, std::pair<K2, V2> {};
- template<class B> void f(std::pair<K1, B> const & p);
- int main()
- {
- f(X());
- }
- ```
- and `B` will be deduced as `V1`.
- We can retrieve the results of the deduction by returning the type we want:
- ```
- template<class B> std::pair<K1, B> f(std::pair<K1, B> const & p);
- ```
- and then using `decltype(f(X()))` to obtain this return type.
- What if `X` doesn't have a base of type `std::pair<K1, B>`? The deduction will
- fail and we'll get an error that `f(X())` cannot be called. To avoid it, we can
- add an overload of `f` that takes anything and returns `void`. But in this
- case, what will happen if `X` has two bases of the form that match the first
- `f` overload, such as for example `std::pair<K1, Y>` and `std::pair<K1, Z>`?
- The deduction will fail, the second overload will again be chosen and we'll get
- `void`. This is why we require maps to have unique keys.
- Here's an implementation of `mp_map_find` based on this technique:
- ```
- template<class M, class K> struct mp_map_find_impl;
- template<class M, class K>
- using mp_map_find = typename mp_map_find_impl<M, K>::type;
- template<template<class...> class M, class... T, class K>
- struct mp_map_find_impl<M<T...>, K>
- {
- struct U: mp_identity<T>... {};
- template<template<class...> class L, class... U>
- static mp_identity<L<K, U...>>
- f( mp_identity<L<K, U...>>* );
- static mp_identity<void> f( ... );
- using V = decltype( f((U*)0) );
- using type = typename V::type;
- };
- ```
- and its corresponding compile times:
- |===
- ||N=100 |N=200 |N=300 |N=400 |N=500 |N=600 |N=700 |N=800
- |VC$$++$$ 2013, deduction |0.3 |0.7 |1.8 |3.6 |6.4 |10.4 |16.2 |DNF
- |clang$$++$$ 3.5.1, deduction |0.3 |0.4 |0.6 |0.9 |1.2 |1.6 |2.2 |2.7
- |g$$++$$ 4.9.2, deduction |0.3 |0.5 |0.9 |1.6 |2.3 |3.4 |4.7 |6.3
- |===
- This looks ready to ship.
- The implementation contains one inefficiency though. If we evaluate
- `mp_map_find<M, K1>`, then `mp_map_find<M, K2>`, the two nested `U` types are
- the same as they only depend on `M`, but the compiler doesn't know that and
- will instantiate each one separately. We should move this type outside
- `mp_map_find_impl` so that it can be reused:
- [subs=+quotes]
- ```
- template<class... T> struct **mp_inherit**: T... {};
- template<class M, class K> struct mp_map_find_impl;
- template<class M, class K>
- using mp_map_find = typename mp_map_find_impl<M, K>::type;
- template<template<class...> class M, class... T, class K>
- struct mp_map_find_impl<M<T...>, K>
- {
- **using U = mp_inherit<mp_identity<T>...>;**
- template<template<class...> class L, class... U>
- static mp_identity<L<K, U...>>
- f( mp_identity<L<K, U...>>* );
- static mp_identity<void> f( ... );
- using V = decltype( f((U*)0) );
- using type = typename V::type;
- };
- ```
- (This same optimization, by the way, applies to our `is_base_of` implementation
- of `mp_contains`.)
- The improvement in compile times on our benchmark is measurable:
- |===
- ||N=100 |N=200 |N=300 |N=400 |N=500 |N=600 |N=700 |N=800
- |VC$$++$$ 2013, deduction+mp_inherit |0.3 |0.6 |1.4 |2.6 |4.5 |7.1 |10.7 |DNF
- |clang$$++$$ 3.5.1, deduction+mp_inherit |0.3 |0.4 |0.6 |0.8 |1.0 |1.4 |1.8 |2.2
- |g$$++$$ 4.9.2, deduction+mp_inherit |0.3 |0.4 |0.6 |0.9 |1.3 |1.8 |2.3 |2.9
- |===
- ## mp_at
- With sets and maps covered, it's time to tackle vectors. Vectors for us are
- just lists, to which we'll need to add the ability to efficiently access an
- element based on its index. The customary name for this accessor is
- `mp_at<L, I>`, where `L` is a list and `I` is an `integral_constant` that
- represents the index. We'll also follow the Boost.MPL convention and add
- `mp_at_c<L, I>`, where `I` is the index of type `size_t`.
- The recursive implementation of `mp_at` is:
- ```
- template<class L, std::size_t I> struct mp_at_c_impl;
- template<class L, std::size_t I> using mp_at_c = typename mp_at_c_impl<L, I>::type;
- template<class L, class I> using mp_at = typename mp_at_c_impl<L, I::value>::type;
- template<template<class...> class L, class T1, class... T>
- struct mp_at_c_impl<L<T1, T...>, 0>
- {
- using type = T1;
- };
- template<template<class...> class L, class T1, class... T, std::size_t I>
- struct mp_at_c_impl<L<T1, T...>, I>
- {
- using type = mp_at_c<L<T...>, I-1>;
- };
- ```
- and the compile times for making `N` calls to `mp_at` with a list of size `N`
- as the first argument are:
- |===
- ||N=100 |N=200 |N=300 |N=400 |N=500 |N=600 |N=700 |N=800
- |VC$$++$$ 2013, recursive |3.6 |DNF ||||||
- |clang$$++$$ 3.5.1, recursive |1.0 |5.1 |15.3 |DNF ||||
- |g$$++$$ 4.9.2, recursive |0.9 |4.7 |14.2 |32.4 |DNF |||
- |===
- To improve upon this appalling result, we'll again exploit pack expansion into a
- function call, but in a novel way. Let's suppose that we need to access the
- fourth element (`I = 3`). We'll generate the function signature
- ```
- template<class W> W f( void*, void*, void*, W*, ... );
- ```
- and then, given a list `L<T1, T2, T3, T4, T5, T6, T7>`, we'll evaluate the
- expression
- ```
- decltype( f( (T1*)0, (T2*)0, (T3*)0, (T4*)0, (T5*)0, (T6*)0, (T7*)0 ) )
- ```
- The three `void*` parameters will eat the first three elements, `W` will be
- deduced as the fourth, and the ellipsis will take care of the rest.
- A working implementation based on this technique is shown below:
- ```
- // mp_repeat_c
- template<std::size_t N, class... T> struct mp_repeat_c_impl
- {
- using _l1 = typename mp_repeat_c_impl<N/2, T...>::type;
- using _l2 = typename mp_repeat_c_impl<N%2, T...>::type;
- using type = mp_append<_l1, _l1, _l2>;
- };
- template<class... T> struct mp_repeat_c_impl<0, T...>
- {
- using type = mp_list<>;
- };
- template<class... T> struct mp_repeat_c_impl<1, T...>
- {
- using type = mp_list<T...>;
- };
- template<std::size_t N, class... T> using mp_repeat_c =
- typename mp_repeat_c_impl<N, T...>::type;
- // mp_at
- template<class L, class L2> struct mp_at_c_impl;
- template<template<class...> class L, class... T,
- template<class...> class L2, class... U>
- struct mp_at_c_impl<L<T...>, L2<U...>>
- {
- template<class W> static W f( U*..., W*, ... );
- using R = decltype( f( (mp_identity<T>*)0 ... ) );
- using type = typename R::type;
- };
- template<class L, std::size_t I> using mp_at_c =
- typename mp_at_c_impl<L, mp_repeat_c<I, void>>::type;
- template<class L, class I> using mp_at = mp_at_c<L, I::value>;
- ```
- and the compile times in the following table show it to be good enough for most
- practical purposes.
- |===
- ||N=100 |N=200 |N=300 |N=400 |N=500 |N=600 |N=700 |N=800
- |VC$$++$$ 2013, void* |0.4 |1.1 |2.4 |4.7 |DNF |||
- |clang$$++$$ 3.5.1, void* |0.4 |0.7 |1.2 |1.9 |2.7 |3.8 |5.0 |6.6
- |g$$++$$ 4.9.2, void* |0.3 |0.5 |0.9 |1.3 |2.1 |3.0 |4.2 |5.5
- |===
- Are we done with `mp_at`, then?
- Let's try something else -- transform the input list `[T1, T2, T3]` into a map
- `[[0, T1], [1, T2], [2, T3]]`, then use `mp_map_find` for the lookup:
- [subs=+macros]
- ```
- // mp_map_from_list
- template<class L, class S> struct mp_map_from_list_impl;
- template<template<class...> class L, class... T, std::size_t... J>
- struct mp_map_from_list_impl<L<T...>, http://en.cppreference.com/w/cpp/utility/integer_sequence[integer_sequence]<std::size_t, J...>>
- {
- using type = mp_list<mp_list<mp_size_t<J>, T>...>;
- };
- template<class L> using mp_map_from_list = typename mp_map_from_list_impl<L,
- http://en.cppreference.com/w/cpp/utility/integer_sequence[make_index_sequence]<mp_size<L>::value>>::type;
- // mp_at
- template<class L, std::size_t I> struct mp_at_c_impl
- {
- using map = mp_map_from_list<L>;
- using type = mp_second<mp_map_find<map, mp_size_t<I>>>;
- };
- template<class L, std::size_t I> using mp_at_c = typename mp_at_c_impl<L, I>::type;
- template<class L, class I> using mp_at = typename mp_at_c_impl<L, I::value>::type;
- ```
- At first sight, this looks ridiculous, but metaprogramming has its own rules.
- Let's measure:
- |===
- ||N=100 |N=200 |N=300 |N=400 |N=500 |N=600 |N=700 |N=800
- |VC$$++$$ 2013, map |0.3 |0.7 |1.5 |2.9 |5.0 |7.8 |11.9 |DNF
- |clang$$++$$ 3.5.1, map |0.3 |0.4 |0.6 |0.8 |1.1 |1.5 |1.8 |2.3
- |g$$++$$ 4.9.2, map |0.3 |0.4 |0.7 |1.0 |1.4 |1.9 |2.5 |3.2
- |===
- Surprise, this is the best implementation.
- ## mp_drop
- It turned out that we didn't need the `void*` trick for `mp_at`, but I'll show
- an example where we do: `mp_drop`. `mp_drop<L, N>` returns the list `L` without
- its first `N` elements; or, in other words, it drops the first `N` elements
- (presumably on the cutting room floor) and returns what's left.
- To implement `mp_drop`, we just need to change
- ```
- template<class W> W f( void*, void*, void*, W*, ... );
- ```
- from above to return the rest of the elements, rather than just one:
- ```
- template<class... W> mp_list<W> f( void*, void*, void*, W*... );
- ```
- Adding the necessary `mp_identity` seasoning produces the following working
- implementation:
- ```
- template<class L, class L2> struct mp_drop_c_impl;
- template<template<class...> class L, class... T,
- template<class...> class L2, class... U>
- struct mp_drop_c_impl<L<T...>, L2<U...>>
- {
- template<class... W> static mp_identity<L<W...>> f( U*..., mp_identity<W>*... );
- using R = decltype( f( (mp_identity<T>*)0 ... ) );
- using type = typename R::type;
- };
- template<class L, std::size_t N> using mp_drop_c =
- typename mp_drop_c_impl<L, mp_repeat_c<N, void>>::type;
- template<class L, class N> using mp_drop = mp_drop_c<L, N::value>;
- ```
- I'll skip the recursive implementation and the performance comparison for this
- one. We can pretty much tell who's going to win, and by how much.
- ## mp_find_index
- The final algorithm that I'll bring to your attention is `mp_find_index`.
- `mp_find_index<L, V>` returns an integral constant of type `size_t` with a
- value that is the index of the first occurrence of `V` in `L`. If `V` is not in
- `L`, the return value is the size of `L`.
- We'll start with the recursive implementation, as usual:
- ```
- template<class L, class V> struct mp_find_index_impl;
- template<class L, class V> using mp_find_index = typename mp_find_index_impl<L, V>::type;
- template<template<class...> class L, class V> struct mp_find_index_impl<L<>, V>
- {
- using type = mp_size_t<0>;
- };
- template<template<class...> class L, class... T, class V>
- struct mp_find_index_impl<L<V, T...>, V>
- {
- using type = mp_size_t<0>;
- };
- template<template<class...> class L, class T1, class... T, class V>
- struct mp_find_index_impl<L<T1, T...>, V>
- {
- using type = mp_size_t<1 + mp_find_index<L<T...>, V>::value>;
- };
- ```
- and will continue with the compile times for `N` calls to `mp_find_index` on a
- list with `N` elements, as usual:
- |===
- ||N=100 |N=200 |N=300 |N=400 |N=500 |N=600 |N=700 |N=800
- |VC$$++$$ 2013, recursive |3.5 |DNF ||||||
- |clang$$++$$ 3.5.1, recursive |1.1 |5.5 |DNF |||||
- |g$$++$$ 4.9.2, recursive |0.8 |4.6 |13.6 |DNF ||||
- |===
- What can we do here?
- Let's go back to `mp_contains` and look at the "mp_count/cx_plus2"
- implementation which we rejected in favor of the inheritance-based one. It
- built a `constexpr` array of booleans and summed them in a `constexpr`
- function. We can do the same here, except instead of summing the array, we can
- find the index of the first true value:
- ```
- template<class L, class V> struct mp_find_index_impl;
- template<class L, class V> using mp_find_index = typename mp_find_index_impl<L, V>::type;
- template<template<class...> class L, class V> struct mp_find_index_impl<L<>, V>
- {
- using type = mp_size_t<0>;
- };
- constexpr std::size_t cx_find_index( bool const * first, bool const * last )
- {
- return first == last || *first? 0: 1 + cx_find_index( first + 1, last );
- }
- template<template<class...> class L, class... T, class V>
- struct mp_find_index_impl<L<T...>, V>
- {
- static constexpr bool _v[] = { std::is_same<T, V>::value... };
- using type = mp_size_t< cx_find_index( _v, _v + sizeof...(T) ) >;
- };
- ```
- The performance of this version is:
- |===
- ||N=100 |N=200 |N=300 |N=400 |N=500 |N=600 |N=700 |N=800
- |clang$$++$$ 3.5.1, constexpr |0.5 |1.3 |2.9 |DNF ||||
- |g$$++$$ 4.9.2, constexpr |0.5 |1.4 |3.1 |5.5 |8.7 |13.0 |18.0 |DNF
- |===
- which, while not ideal, is significantly better than our recursive punching
- bag. But if our compiler of choice is VC$$++$$ 2013, we can't use `constexpr`.
- We may attempt an implementation along the same lines, but with the `constexpr`
- function replaced with ordinary metaprogramming:
- ```
- template<class L, class V> struct mp_find_index_impl;
- template<class L, class V> using mp_find_index = typename mp_find_index_impl<L, V>::type;
- template<template<class...> class L, class V> struct mp_find_index_impl<L<>, V>
- {
- using type = mp_size_t<0>;
- };
- template<bool...> struct find_index_impl_;
- template<> struct find_index_impl_<>
- {
- static const std::size_t value = 0;
- };
- template<bool B1, bool... R> struct find_index_impl_<B1, R...>
- {
- static const std::size_t value = B1? 0: 1 + find_index_impl_<R...>::value;
- };
- template<bool B1, bool B2, bool B3, bool B4, bool B5,
- bool B6, bool B7, bool B8, bool B9, bool B10, bool... R>
- struct find_index_impl_<B1, B2, B3, B4, B5, B6, B7, B8, B9, B10, R...>
- {
- static const std::size_t value = B1? 0: B2? 1: B3? 2: B4? 3: B5? 4:
- B6? 5: B7? 6: B8? 7: B9? 8: B10? 9: 10 + find_index_impl_<R...>::value;
- };
- template<template<class...> class L, class... T, class V>
- struct mp_find_index_impl<L<T...>, V>
- {
- using type = mp_size_t<find_index_impl_<std::is_same<T, V>::value...>::value>;
- };
- ```
- This is still recursive, so we don't expect miracles, but it wouldn't hurt to
- measure:
- |===
- ||N=100 |N=200 |N=300 |N=400 |N=500 |N=600 |N=700 |N=800
- |VC$$++$$ 2013, bool... |4.7 |94.5 |488.3 |XFA ||||
- |clang$$++$$ 3.5.1, bool... |0.6 |2.2 |5.8 |12.0 |21.7 |35.2 |DNF |
- |g$$++$$ 4.9.2, bool... |0.6 |2.4 |6.5 |13.2 |23.8 |39.1 |59.0 |DNF
- |===
- (where XFA stands for "experimenter fell asleep".)
- This is an interesting tradeoff for VC$$++$$ 2013 and Clang. On the one hand,
- this implementation is slower; on the other, it doesn't crash the compiler as
- easily. Which to prefer is a matter of taste and of stern evaluation of one's
- needs to manipulate type lists of length 300.
- Note that once we have `mp_drop` and `mp_find_index`, we can derive the
- `mp_find<L, V>` algorithm, which returns the suffix of `L` starting with the
- first occurrence of `V`, if any, and an empty list otherwise, by using
- `mp_drop<L, mp_find_index<L, V>>`.
- ## Conclusion
- In this article, I have shown efficient algorithms that allow us to treat type
- lists as sets, maps and vectors, demonstrating various {cpp}11 implementation
- techniques in the process.
|