// This example shows how to sort structs using complex multiple part keys using // string_sort. // // Copyright Steven Ross 2009-2014. // // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) // See http://www.boost.org/libs/sort for library home page. #include #include #include #include #include #include #include #include #include #include using std::string; using namespace boost::sort::spreadsort; //[generalized_functors struct DATA_TYPE { time_t birth; float net_worth; string first_name; string last_name; }; static const int birth_size = sizeof(time_t); static const int first_name_offset = birth_size + sizeof(float); static const boost::uint64_t base_mask = 0xff; struct lessthan { inline bool operator()(const DATA_TYPE &x, const DATA_TYPE &y) const { if (x.birth != y.birth) { return x.birth < y.birth; } if (x.net_worth != y.net_worth) { return x.net_worth < y.net_worth; } if (x.first_name != y.first_name) { return x.first_name < y.first_name; } return x.last_name < y.last_name; } }; struct bracket { inline unsigned char operator()(const DATA_TYPE &x, size_t offset) const { // Sort date as a signed int, returning the appropriate byte. if (offset < birth_size) { const int bit_shift = 8 * (birth_size - offset - 1); unsigned char result = (x.birth & (base_mask << bit_shift)) >> bit_shift; // Handling the sign bit. Unnecessary if the data is always positive. if (offset == 0) { return result ^ 128; } return result; } // Sort a signed float. This requires reversing the order of negatives // because of the way floats are represented in bits. if (offset < first_name_offset) { const int bit_shift = 8 * (first_name_offset - offset - 1); unsigned key = float_mem_cast(x.net_worth); unsigned char result = (key & (base_mask << bit_shift)) >> bit_shift; // Handling the sign. if (x.net_worth < 0) { return 255 - result; } // Increasing positives so they are higher than negatives. if (offset == birth_size) { return 128 + result; } return result; } // Sort a string that is before the end. This approach supports embedded // nulls. If embedded nulls are not required, then just delete the "* 2" // and the inside of the following if just becomes: // return x.first_name[offset - first_name_offset]; const unsigned first_name_end_offset = first_name_offset + x.first_name.size() * 2; if (offset < first_name_end_offset) { int char_offset = offset - first_name_offset; // This signals that the string continues. if (!(char_offset & 1)) { return 1; } return x.first_name[char_offset >> 1]; } // This signals that the string has ended, so that shorter strings come // before longer ones. if (offset == first_name_end_offset) { return 0; } // The final string needs no special consideration. return x.last_name[offset - first_name_end_offset - 1]; } }; struct getsize { inline size_t operator()(const DATA_TYPE &x) const { return first_name_offset + x.first_name.size() * 2 + 1 + x.last_name.size(); } }; //] [/generalized_functors] //Pass in an argument to test std::sort int main(int argc, const char ** argv) { std::ifstream indata; std::ofstream outfile; bool stdSort = false; unsigned loopCount = 1; for (int u = 1; u < argc; ++u) { if (std::string(argv[u]) == "-std") stdSort = true; else loopCount = atoi(argv[u]); } double total = 0.0; //Run multiple loops, if requested std::vector array; for (unsigned u = 0; u < loopCount; ++u) { indata.open("input.txt", std::ios_base::in | std::ios_base::binary); if (indata.bad()) { printf("input.txt could not be opened\n"); return 1; } // Read in the data. DATA_TYPE inval; while (!indata.eof() ) { indata >> inval.first_name; indata >> inval.last_name; indata.read(reinterpret_cast(&(inval.birth)), birth_size); indata.read(reinterpret_cast(&(inval.net_worth)), sizeof(float)); // Handling nan. if (inval.net_worth != inval.net_worth) { inval.net_worth = 0; } if (indata.eof()) break; array.push_back(inval); } indata.close(); // Sort the data. clock_t start, end; double elapsed; start = clock(); if (stdSort) { std::sort(array.begin(), array.end(), lessthan()); } else { //[generalized_functors_call string_sort(array.begin(), array.end(), bracket(), getsize(), lessthan()); //] [/generalized_functors_call] } end = clock(); elapsed = static_cast(end - start); if (stdSort) { outfile.open("standard_sort_out.txt", std::ios_base::out | std::ios_base::binary | std::ios_base::trunc); } else { outfile.open("boost_sort_out.txt", std::ios_base::out | std::ios_base::binary | std::ios_base::trunc); } if (outfile.good()) { for (unsigned u = 0; u < array.size(); ++u) outfile << array[u].birth << " " << array[u].net_worth << " " << array[u].first_name << " " << array[u].last_name << "\n"; outfile.close(); } total += elapsed; array.clear(); } if (stdSort) { printf("std::sort elapsed time %f\n", total / CLOCKS_PER_SEC); } else { printf("spreadsort elapsed time %f\n", total / CLOCKS_PER_SEC); } return 0; }