generalizedstruct.cpp 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192
  1. // This example shows how to sort structs using complex multiple part keys using
  2. // string_sort.
  3. //
  4. // Copyright Steven Ross 2009-2014.
  5. //
  6. // Distributed under the Boost Software License, Version 1.0.
  7. // (See accompanying file LICENSE_1_0.txt or copy at
  8. // http://www.boost.org/LICENSE_1_0.txt)
  9. // See http://www.boost.org/libs/sort for library home page.
  10. #include <boost/sort/spreadsort/string_sort.hpp>
  11. #include <boost/sort/spreadsort/float_sort.hpp>
  12. #include <time.h>
  13. #include <stdio.h>
  14. #include <stdlib.h>
  15. #include <algorithm>
  16. #include <vector>
  17. #include <iostream>
  18. #include <fstream>
  19. #include <string>
  20. using std::string;
  21. using namespace boost::sort::spreadsort;
  22. //[generalized_functors
  23. struct DATA_TYPE {
  24. time_t birth;
  25. float net_worth;
  26. string first_name;
  27. string last_name;
  28. };
  29. static const int birth_size = sizeof(time_t);
  30. static const int first_name_offset = birth_size + sizeof(float);
  31. static const boost::uint64_t base_mask = 0xff;
  32. struct lessthan {
  33. inline bool operator()(const DATA_TYPE &x, const DATA_TYPE &y) const {
  34. if (x.birth != y.birth) {
  35. return x.birth < y.birth;
  36. }
  37. if (x.net_worth != y.net_worth) {
  38. return x.net_worth < y.net_worth;
  39. }
  40. if (x.first_name != y.first_name) {
  41. return x.first_name < y.first_name;
  42. }
  43. return x.last_name < y.last_name;
  44. }
  45. };
  46. struct bracket {
  47. inline unsigned char operator()(const DATA_TYPE &x, size_t offset) const {
  48. // Sort date as a signed int, returning the appropriate byte.
  49. if (offset < birth_size) {
  50. const int bit_shift = 8 * (birth_size - offset - 1);
  51. unsigned char result = (x.birth & (base_mask << bit_shift)) >> bit_shift;
  52. // Handling the sign bit. Unnecessary if the data is always positive.
  53. if (offset == 0) {
  54. return result ^ 128;
  55. }
  56. return result;
  57. }
  58. // Sort a signed float. This requires reversing the order of negatives
  59. // because of the way floats are represented in bits.
  60. if (offset < first_name_offset) {
  61. const int bit_shift = 8 * (first_name_offset - offset - 1);
  62. unsigned key = float_mem_cast<float, unsigned>(x.net_worth);
  63. unsigned char result = (key & (base_mask << bit_shift)) >> bit_shift;
  64. // Handling the sign.
  65. if (x.net_worth < 0) {
  66. return 255 - result;
  67. }
  68. // Increasing positives so they are higher than negatives.
  69. if (offset == birth_size) {
  70. return 128 + result;
  71. }
  72. return result;
  73. }
  74. // Sort a string that is before the end. This approach supports embedded
  75. // nulls. If embedded nulls are not required, then just delete the "* 2"
  76. // and the inside of the following if just becomes:
  77. // return x.first_name[offset - first_name_offset];
  78. const unsigned first_name_end_offset =
  79. first_name_offset + x.first_name.size() * 2;
  80. if (offset < first_name_end_offset) {
  81. int char_offset = offset - first_name_offset;
  82. // This signals that the string continues.
  83. if (!(char_offset & 1)) {
  84. return 1;
  85. }
  86. return x.first_name[char_offset >> 1];
  87. }
  88. // This signals that the string has ended, so that shorter strings come
  89. // before longer ones.
  90. if (offset == first_name_end_offset) {
  91. return 0;
  92. }
  93. // The final string needs no special consideration.
  94. return x.last_name[offset - first_name_end_offset - 1];
  95. }
  96. };
  97. struct getsize {
  98. inline size_t operator()(const DATA_TYPE &x) const {
  99. return first_name_offset + x.first_name.size() * 2 + 1 +
  100. x.last_name.size();
  101. }
  102. };
  103. //] [/generalized_functors]
  104. //Pass in an argument to test std::sort
  105. int main(int argc, const char ** argv) {
  106. std::ifstream indata;
  107. std::ofstream outfile;
  108. bool stdSort = false;
  109. unsigned loopCount = 1;
  110. for (int u = 1; u < argc; ++u) {
  111. if (std::string(argv[u]) == "-std")
  112. stdSort = true;
  113. else
  114. loopCount = atoi(argv[u]);
  115. }
  116. double total = 0.0;
  117. //Run multiple loops, if requested
  118. std::vector<DATA_TYPE> array;
  119. for (unsigned u = 0; u < loopCount; ++u) {
  120. indata.open("input.txt", std::ios_base::in | std::ios_base::binary);
  121. if (indata.bad()) {
  122. printf("input.txt could not be opened\n");
  123. return 1;
  124. }
  125. // Read in the data.
  126. DATA_TYPE inval;
  127. while (!indata.eof() ) {
  128. indata >> inval.first_name;
  129. indata >> inval.last_name;
  130. indata.read(reinterpret_cast<char *>(&(inval.birth)), birth_size);
  131. indata.read(reinterpret_cast<char *>(&(inval.net_worth)), sizeof(float));
  132. // Handling nan.
  133. if (inval.net_worth != inval.net_worth) {
  134. inval.net_worth = 0;
  135. }
  136. if (indata.eof())
  137. break;
  138. array.push_back(inval);
  139. }
  140. indata.close();
  141. // Sort the data.
  142. clock_t start, end;
  143. double elapsed;
  144. start = clock();
  145. if (stdSort) {
  146. std::sort(array.begin(), array.end(), lessthan());
  147. } else {
  148. //[generalized_functors_call
  149. string_sort(array.begin(), array.end(), bracket(), getsize(), lessthan());
  150. //] [/generalized_functors_call]
  151. }
  152. end = clock();
  153. elapsed = static_cast<double>(end - start);
  154. if (stdSort) {
  155. outfile.open("standard_sort_out.txt", std::ios_base::out |
  156. std::ios_base::binary | std::ios_base::trunc);
  157. } else {
  158. outfile.open("boost_sort_out.txt", std::ios_base::out |
  159. std::ios_base::binary | std::ios_base::trunc);
  160. }
  161. if (outfile.good()) {
  162. for (unsigned u = 0; u < array.size(); ++u)
  163. outfile << array[u].birth << " " << array[u].net_worth << " "
  164. << array[u].first_name << " " << array[u].last_name << "\n";
  165. outfile.close();
  166. }
  167. total += elapsed;
  168. array.clear();
  169. }
  170. if (stdSort) {
  171. printf("std::sort elapsed time %f\n", total / CLOCKS_PER_SEC);
  172. } else {
  173. printf("spreadsort elapsed time %f\n", total / CLOCKS_PER_SEC);
  174. }
  175. return 0;
  176. }