performance.cpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395
  1. // Boost.Units - A C++ library for zero-overhead dimensional analysis and
  2. // unit/quantity manipulation and conversion
  3. //
  4. // Copyright (C) 2003-2008 Matthias Christian Schabel
  5. // Copyright (C) 2008 Steven Watanabe
  6. //
  7. // Distributed under the Boost Software License, Version 1.0. (See
  8. // accompanying file LICENSE_1_0.txt or copy at
  9. // http://www.boost.org/LICENSE_1_0.txt)
  10. /**
  11. \file
  12. \brief performance.cpp
  13. \details
  14. Test runtime performance.
  15. Output:
  16. @verbatim
  17. multiplying ublas::matrix<double>(1000, 1000) : 25.03 seconds
  18. multiplying ublas::matrix<quantity>(1000, 1000) : 24.49 seconds
  19. tiled_matrix_multiply<double>(1000, 1000) : 1.12 seconds
  20. tiled_matrix_multiply<quantity>(1000, 1000) : 1.16 seconds
  21. solving y' = 1 - x + 4 * y with double: 1.97 seconds
  22. solving y' = 1 - x + 4 * y with quantity: 1.84 seconds
  23. @endverbatim
  24. **/
  25. #define _SCL_SECURE_NO_WARNINGS
  26. #include <cstdlib>
  27. #include <ctime>
  28. #include <algorithm>
  29. #include <iostream>
  30. #include <iomanip>
  31. #include <boost/config.hpp>
  32. #include <boost/timer.hpp>
  33. #include <boost/utility/result_of.hpp>
  34. #ifdef BOOST_MSVC
  35. #pragma warning(push)
  36. #pragma warning(disable:4267; disable:4127; disable:4244; disable:4100)
  37. #endif
  38. #include <boost/numeric/ublas/matrix.hpp>
  39. #ifdef BOOST_MSVC
  40. #pragma warning(pop)
  41. #endif
  42. #include <boost/units/quantity.hpp>
  43. #include <boost/units/systems/si.hpp>
  44. #include <boost/units/cmath.hpp>
  45. #include <boost/units/io.hpp>
  46. enum {
  47. tile_block_size = 24
  48. };
  49. template<class T0, class T1, class Out>
  50. void tiled_multiply_carray_inner(T0* first,
  51. T1* second,
  52. Out* out,
  53. int totalwidth,
  54. int width2,
  55. int height1,
  56. int common) {
  57. for(int j = 0; j < height1; ++j) {
  58. for(int i = 0; i < width2; ++i) {
  59. Out value = out[j * totalwidth + i];
  60. for(int k = 0; k < common; ++k) {
  61. value += first[k + totalwidth * j] * second[k * totalwidth + i];
  62. }
  63. out[j * totalwidth + i] = value;
  64. }
  65. }
  66. }
  67. template<class T0, class T1, class Out>
  68. void tiled_multiply_carray_outer(T0* first,
  69. T1* second,
  70. Out* out,
  71. int width2,
  72. int height1,
  73. int common) {
  74. std::fill_n(out, width2 * height1, Out());
  75. int j = 0;
  76. for(; j < height1 - tile_block_size; j += tile_block_size) {
  77. int i = 0;
  78. for(; i < width2 - tile_block_size; i += tile_block_size) {
  79. int k = 0;
  80. for(; k < common - tile_block_size; k += tile_block_size) {
  81. tiled_multiply_carray_inner(
  82. &first[k + width2 * j],
  83. &second[k * width2 + i],
  84. &out[j * width2 + i],
  85. width2,
  86. tile_block_size,
  87. tile_block_size,
  88. tile_block_size);
  89. }
  90. tiled_multiply_carray_inner(
  91. &first[k + width2 * j],
  92. &second[k * width2 + i],
  93. &out[j * width2 + i],
  94. width2,
  95. tile_block_size,
  96. tile_block_size,
  97. common - k);
  98. }
  99. int k = 0;
  100. for(; k < common - tile_block_size; k += tile_block_size) {
  101. tiled_multiply_carray_inner(
  102. &first[k + width2 * j],
  103. &second[k * width2 + i],
  104. &out[j * width2 + i],
  105. width2, width2 - i,
  106. tile_block_size,
  107. tile_block_size);
  108. }
  109. tiled_multiply_carray_inner(
  110. &first[k + width2 * j],
  111. &second[k * width2 + i],
  112. &out[j * width2 + i],
  113. width2, width2 - i,
  114. tile_block_size,
  115. common - k);
  116. }
  117. int i = 0;
  118. for(; i < width2 - tile_block_size; i += tile_block_size) {
  119. int k = 0;
  120. for(; k < common - tile_block_size; k += tile_block_size) {
  121. tiled_multiply_carray_inner(
  122. &first[k + width2 * j],
  123. &second[k * width2 + i],
  124. &out[j * width2 + i],
  125. width2,
  126. tile_block_size,
  127. height1 - j,
  128. tile_block_size);
  129. }
  130. tiled_multiply_carray_inner(
  131. &first[k + width2 * j],
  132. &second[k * width2 + i],
  133. &out[j * width2 + i],
  134. width2,
  135. tile_block_size,
  136. height1 - j,
  137. common - k);
  138. }
  139. int k = 0;
  140. for(; k < common - tile_block_size; k += tile_block_size) {
  141. tiled_multiply_carray_inner(
  142. &first[k + width2 * j],
  143. &second[k * width2 + i],
  144. &out[j * width2 + i],
  145. width2,
  146. width2 - i,
  147. height1 - j,
  148. tile_block_size);
  149. }
  150. tiled_multiply_carray_inner(
  151. &first[k + width2 * j],
  152. &second[k * width2 + i],
  153. &out[j * width2 + i],
  154. width2,
  155. width2 - i,
  156. height1 - j,
  157. common - k);
  158. }
  159. enum { max_value = 1000};
  160. template<class F, class T, class N, class R>
  161. BOOST_CXX14_CONSTEXPR
  162. R solve_differential_equation(F f, T lower, T upper, N steps, R start) {
  163. typedef typename F::template result<T, R>::type f_result;
  164. T h = (upper - lower) / (1.0*steps);
  165. for(N i = N(); i < steps; ++i) {
  166. R y = start;
  167. T x = lower + h * (1.0*i);
  168. f_result k1 = f(x, y);
  169. f_result k2 = f(x + h / 2.0, y + h * k1 / 2.0);
  170. f_result k3 = f(x + h / 2.0, y + h * k2 / 2.0);
  171. f_result k4 = f(x + h, y + h * k3);
  172. start = y + h * (k1 + 2.0 * k2 + 2.0 * k3 + k4) / 6.0;
  173. }
  174. return(start);
  175. }
  176. using namespace boost::units;
  177. //y' = 1 - x + 4 * y
  178. struct f {
  179. template<class Arg1, class Arg2> struct result;
  180. BOOST_CONSTEXPR double operator()(const double& x, const double& y) const {
  181. return(1.0 - x + 4.0 * y);
  182. }
  183. boost::units::quantity<boost::units::si::velocity>
  184. BOOST_CONSTEXPR operator()(const quantity<si::time>& x,
  185. const quantity<si::length>& y) const {
  186. using namespace boost::units;
  187. using namespace si;
  188. return(1.0 * meters / second -
  189. x * meters / pow<2>(seconds) +
  190. 4.0 * y / seconds );
  191. }
  192. };
  193. template<>
  194. struct f::result<double,double> {
  195. typedef double type;
  196. };
  197. template<>
  198. struct f::result<quantity<si::time>, quantity<si::length> > {
  199. typedef quantity<si::velocity> type;
  200. };
  201. //y' = 1 - x + 4 * y
  202. //y' - 4 * y = 1 - x
  203. //e^(-4 * x) * (dy - 4 * y * dx) = e^(-4 * x) * (1 - x) * dx
  204. //d/dx(y * e ^ (-4 * x)) = e ^ (-4 * x) (1 - x) * dx
  205. //d/dx(y * e ^ (-4 * x)) = e ^ (-4 * x) * dx - x * e ^ (-4 * x) * dx
  206. //d/dx(y * e ^ (-4 * x)) = d/dx((-3/16 + 1/4 * x) * e ^ (-4 * x))
  207. //y * e ^ (-4 * x) = (-3/16 + 1/4 * x) * e ^ (-4 * x) + C
  208. //y = (-3/16 + 1/4 * x) + C/e ^ (-4 * x)
  209. //y = 1/4 * x - 3/16 + C * e ^ (4 * x)
  210. //y(0) = 1
  211. //1 = - 3/16 + C
  212. //C = 19/16
  213. //y(x) = 1/4 * x - 3/16 + 19/16 * e ^ (4 * x)
  214. int main() {
  215. boost::numeric::ublas::matrix<double> ublas_result;
  216. {
  217. boost::numeric::ublas::matrix<double> m1(max_value, max_value);
  218. boost::numeric::ublas::matrix<double> m2(max_value, max_value);
  219. std::srand(1492);
  220. for(int i = 0; i < max_value; ++i) {
  221. for(int j = 0; j < max_value; ++j) {
  222. m1(i,j) = std::rand();
  223. m2(i,j) = std::rand();
  224. }
  225. }
  226. std::cout << "multiplying ublas::matrix<double>("
  227. << max_value << ", " << max_value << ") : ";
  228. boost::timer timer;
  229. ublas_result = (prod(m1, m2));
  230. std::cout << timer.elapsed() << " seconds" << std::endl;
  231. }
  232. typedef boost::numeric::ublas::matrix<
  233. boost::units::quantity<boost::units::si::dimensionless>
  234. > matrix_type;
  235. matrix_type ublas_resultq;
  236. {
  237. matrix_type m1(max_value, max_value);
  238. matrix_type m2(max_value, max_value);
  239. std::srand(1492);
  240. for(int i = 0; i < max_value; ++i) {
  241. for(int j = 0; j < max_value; ++j) {
  242. m1(i,j) = std::rand();
  243. m2(i,j) = std::rand();
  244. }
  245. }
  246. std::cout << "multiplying ublas::matrix<quantity>("
  247. << max_value << ", " << max_value << ") : ";
  248. boost::timer timer;
  249. ublas_resultq = (prod(m1, m2));
  250. std::cout << timer.elapsed() << " seconds" << std::endl;
  251. }
  252. std::vector<double> cresult(max_value * max_value);
  253. {
  254. std::vector<double> m1(max_value * max_value);
  255. std::vector<double> m2(max_value * max_value);
  256. std::srand(1492);
  257. for(int i = 0; i < max_value * max_value; ++i) {
  258. m1[i] = std::rand();
  259. m2[i] = std::rand();
  260. }
  261. std::cout << "tiled_matrix_multiply<double>("
  262. << max_value << ", " << max_value << ") : ";
  263. boost::timer timer;
  264. tiled_multiply_carray_outer(
  265. &m1[0],
  266. &m2[0],
  267. &cresult[0],
  268. max_value,
  269. max_value,
  270. max_value);
  271. std::cout << timer.elapsed() << " seconds" << std::endl;
  272. }
  273. std::vector<
  274. boost::units::quantity<boost::units::si::energy>
  275. > cresultq(max_value * max_value);
  276. {
  277. std::vector<
  278. boost::units::quantity<boost::units::si::force>
  279. > m1(max_value * max_value);
  280. std::vector<
  281. boost::units::quantity<boost::units::si::length>
  282. > m2(max_value * max_value);
  283. std::srand(1492);
  284. for(int i = 0; i < max_value * max_value; ++i) {
  285. m1[i] = std::rand() * boost::units::si::newtons;
  286. m2[i] = std::rand() * boost::units::si::meters;
  287. }
  288. std::cout << "tiled_matrix_multiply<quantity>("
  289. << max_value << ", " << max_value << ") : ";
  290. boost::timer timer;
  291. tiled_multiply_carray_outer(
  292. &m1[0],
  293. &m2[0],
  294. &cresultq[0],
  295. max_value,
  296. max_value,
  297. max_value);
  298. std::cout << timer.elapsed() << " seconds" << std::endl;
  299. }
  300. for(int i = 0; i < max_value; ++i) {
  301. for(int j = 0; j < max_value; ++j) {
  302. const double diff =
  303. std::abs(ublas_result(i,j) - cresult[i * max_value + j]);
  304. if(diff > ublas_result(i,j) /1e14) {
  305. std::cout << std::setprecision(15) << "Uh Oh. ublas_result("
  306. << i << "," << j << ") = " << ublas_result(i,j)
  307. << std::endl
  308. << "cresult[" << i << " * " << max_value << " + "
  309. << j << "] = " << cresult[i * max_value + j]
  310. << std::endl;
  311. return(EXIT_FAILURE);
  312. }
  313. }
  314. }
  315. {
  316. std::vector<double> values(1000);
  317. std::cout << "solving y' = 1 - x + 4 * y with double: ";
  318. boost::timer timer;
  319. for(int i = 0; i < 1000; ++i) {
  320. const double x = .1 * i;
  321. values[i] = solve_differential_equation(f(), 0.0, x, i * 100, 1.0);
  322. }
  323. std::cout << timer.elapsed() << " seconds" << std::endl;
  324. for(int i = 0; i < 1000; ++i) {
  325. const double x = .1 * i;
  326. const double value = 1.0/4.0 * x - 3.0/16.0 + 19.0/16.0 * std::exp(4 * x);
  327. if(std::abs(values[i] - value) > value / 1e9) {
  328. std::cout << std::setprecision(15) << "i = : " << i
  329. << ", value = " << value << " approx = " << values[i]
  330. << std::endl;
  331. return(EXIT_FAILURE);
  332. }
  333. }
  334. }
  335. {
  336. using namespace boost::units;
  337. using namespace si;
  338. std::vector<quantity<length> > values(1000);
  339. std::cout << "solving y' = 1 - x + 4 * y with quantity: ";
  340. boost::timer timer;
  341. for(int i = 0; i < 1000; ++i) {
  342. const quantity<si::time> x = .1 * i * seconds;
  343. values[i] = solve_differential_equation(
  344. f(),
  345. 0.0 * seconds,
  346. x,
  347. i * 100,
  348. 1.0 * meters);
  349. }
  350. std::cout << timer.elapsed() << " seconds" << std::endl;
  351. for(int i = 0; i < 1000; ++i) {
  352. const double x = .1 * i;
  353. const quantity<si::length> value =
  354. (1.0/4.0 * x - 3.0/16.0 + 19.0/16.0 * std::exp(4 * x)) * meters;
  355. if(abs(values[i] - value) > value / 1e9) {
  356. std::cout << std::setprecision(15) << "i = : " << i
  357. << ", value = " << value << " approx = "
  358. << values[i] << std::endl;
  359. return(EXIT_FAILURE);
  360. }
  361. }
  362. }
  363. }