123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146 |
- // Copyright Vladimir Prus 2002-2004.
- // Distributed under the Boost Software License, Version 1.0.
- // (See accompanying file LICENSE_1_0.txt
- // or copy at http://www.boost.org/LICENSE_1_0.txt)
- #include <cstring>
- #include <cassert>
- #include <string>
- #include <fstream>
- #include <sstream>
- #include <iostream>
- #include <boost/progress.hpp>
- #include <boost/bind.hpp>
- #include <boost/ref.hpp>
- #include <boost/program_options/detail/convert.hpp>
- #include <boost/program_options/detail/utf8_codecvt_facet.hpp>
- #include "minitest.hpp"
- using namespace std;
- string file_content(const string& filename)
- {
- ifstream ifs(filename.c_str());
- assert(ifs);
-
- stringstream ss;
- ss << ifs.rdbuf();
-
- return ss.str();
- }
- // A version of from_8_bit which does not use functional object, for
- // performance comparison.
- std::wstring from_8_bit_2(const std::string& s,
- const codecvt<wchar_t, char, mbstate_t>& cvt)
- {
- std::wstring result;
- std::mbstate_t state = std::mbstate_t();
-
- const char* from = s.data();
- const char* from_end = s.data() + s.size();
- // The interace of cvt is not really iterator-like, and it's
- // not possible the tell the required output size without the conversion.
- // All we can is convert data by pieces.
- while(from != from_end) {
-
- // std::basic_string does not provide non-const pointers to the data,
- // so converting directly into string is not possible.
- wchar_t buffer[32];
-
- wchar_t* to_next = buffer;
- // Try to convert remaining input.
- std::codecvt_base::result r =
- cvt.in(state, from, from_end, from, buffer, buffer + 32, to_next);
-
- if (r == std::codecvt_base::error)
- throw logic_error("character conversion failed");
- // 'partial' is not an error, it just means not all source characters
- // we converted. However, we need to check that at least one new target
- // character was produced. If not, it means the source data is
- // incomplete, and since we don't have extra data to add to source, it's
- // error.
- if (to_next == buffer)
- throw logic_error("character conversion failed");
- // Add converted characters
- result.append(buffer, to_next);
- }
- return result;
- }
- void test_convert(const std::string& input,
- const std::string& expected_output)
- {
- boost::program_options::detail::utf8_codecvt_facet facet;
-
- std::wstring output;
- {
- boost::progress_timer t;
- for (int i = 0; i < 10000; ++i)
- output = boost::from_8_bit(
- input,
- facet);
- }
- {
- boost::progress_timer t;
- for (int i = 0; i < 10000; ++i)
- output = from_8_bit_2(
- input,
- facet);
- }
- BOOST_CHECK(output.size()*2 == expected_output.size());
- for(unsigned i = 0; i < output.size(); ++i) {
- {
- unsigned low = output[i];
- low &= 0xFF;
- unsigned low2 = expected_output[2*i];
- low2 &= 0xFF;
- BOOST_CHECK(low == low2);
- }
- {
- unsigned high = output[i];
- high >>= 8;
- high &= 0xFF;
- unsigned high2 = expected_output[2*i+1];
- BOOST_CHECK(high == high2);
- }
- }
- string ref = boost::to_8_bit(output, facet);
- BOOST_CHECK(ref == input);
- }
- int main(int ac, char* av[])
- {
- std::string input = file_content("utf8.txt");
- std::string expected = file_content("ucs2.txt");
- test_convert(input, expected);
-
- if (ac > 1) {
- cout << "Trying to convert the command line argument\n";
-
- locale::global(locale(""));
- std::wstring w = boost::from_local_8_bit(av[1]);
-
- cout << "Got something, printing decimal code point values\n";
- for (unsigned i = 0; i < w.size(); ++i) {
- cout << (unsigned)w[i] << "\n";
- }
-
- }
-
- return 0;
- }
|