// Copyright Vladimir Prus 2002-2004. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE_1_0.txt // or copy at http://www.boost.org/LICENSE_1_0.txt) #include #include #include #include #include #include #include #include #include #include #include #include "minitest.hpp" using namespace std; string file_content(const string& filename) { ifstream ifs(filename.c_str()); assert(ifs); stringstream ss; ss << ifs.rdbuf(); return ss.str(); } // A version of from_8_bit which does not use functional object, for // performance comparison. std::wstring from_8_bit_2(const std::string& s, const codecvt& cvt) { std::wstring result; std::mbstate_t state = std::mbstate_t(); const char* from = s.data(); const char* from_end = s.data() + s.size(); // The interace of cvt is not really iterator-like, and it's // not possible the tell the required output size without the conversion. // All we can is convert data by pieces. while(from != from_end) { // std::basic_string does not provide non-const pointers to the data, // so converting directly into string is not possible. wchar_t buffer[32]; wchar_t* to_next = buffer; // Try to convert remaining input. std::codecvt_base::result r = cvt.in(state, from, from_end, from, buffer, buffer + 32, to_next); if (r == std::codecvt_base::error) throw logic_error("character conversion failed"); // 'partial' is not an error, it just means not all source characters // we converted. However, we need to check that at least one new target // character was produced. If not, it means the source data is // incomplete, and since we don't have extra data to add to source, it's // error. if (to_next == buffer) throw logic_error("character conversion failed"); // Add converted characters result.append(buffer, to_next); } return result; } void test_convert(const std::string& input, const std::string& expected_output) { boost::program_options::detail::utf8_codecvt_facet facet; std::wstring output; { boost::progress_timer t; for (int i = 0; i < 10000; ++i) output = boost::from_8_bit( input, facet); } { boost::progress_timer t; for (int i = 0; i < 10000; ++i) output = from_8_bit_2( input, facet); } BOOST_CHECK(output.size()*2 == expected_output.size()); for(unsigned i = 0; i < output.size(); ++i) { { unsigned low = output[i]; low &= 0xFF; unsigned low2 = expected_output[2*i]; low2 &= 0xFF; BOOST_CHECK(low == low2); } { unsigned high = output[i]; high >>= 8; high &= 0xFF; unsigned high2 = expected_output[2*i+1]; BOOST_CHECK(high == high2); } } string ref = boost::to_8_bit(output, facet); BOOST_CHECK(ref == input); } int main(int ac, char* av[]) { std::string input = file_content("utf8.txt"); std::string expected = file_content("ucs2.txt"); test_convert(input, expected); if (ac > 1) { cout << "Trying to convert the command line argument\n"; locale::global(locale("")); std::wstring w = boost::from_local_8_bit(av[1]); cout << "Got something, printing decimal code point values\n"; for (unsigned i = 0; i < w.size(); ++i) { cout << (unsigned)w[i] << "\n"; } } return 0; }