test_convert.cpp 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146
  1. // Copyright Vladimir Prus 2002-2004.
  2. // Distributed under the Boost Software License, Version 1.0.
  3. // (See accompanying file LICENSE_1_0.txt
  4. // or copy at http://www.boost.org/LICENSE_1_0.txt)
  5. #include <cstring>
  6. #include <cassert>
  7. #include <string>
  8. #include <fstream>
  9. #include <sstream>
  10. #include <iostream>
  11. #include <boost/progress.hpp>
  12. #include <boost/bind.hpp>
  13. #include <boost/ref.hpp>
  14. #include <boost/program_options/detail/convert.hpp>
  15. #include <boost/program_options/detail/utf8_codecvt_facet.hpp>
  16. #include "minitest.hpp"
  17. using namespace std;
  18. string file_content(const string& filename)
  19. {
  20. ifstream ifs(filename.c_str());
  21. assert(ifs);
  22. stringstream ss;
  23. ss << ifs.rdbuf();
  24. return ss.str();
  25. }
  26. // A version of from_8_bit which does not use functional object, for
  27. // performance comparison.
  28. std::wstring from_8_bit_2(const std::string& s,
  29. const codecvt<wchar_t, char, mbstate_t>& cvt)
  30. {
  31. std::wstring result;
  32. std::mbstate_t state = std::mbstate_t();
  33. const char* from = s.data();
  34. const char* from_end = s.data() + s.size();
  35. // The interace of cvt is not really iterator-like, and it's
  36. // not possible the tell the required output size without the conversion.
  37. // All we can is convert data by pieces.
  38. while(from != from_end) {
  39. // std::basic_string does not provide non-const pointers to the data,
  40. // so converting directly into string is not possible.
  41. wchar_t buffer[32];
  42. wchar_t* to_next = buffer;
  43. // Try to convert remaining input.
  44. std::codecvt_base::result r =
  45. cvt.in(state, from, from_end, from, buffer, buffer + 32, to_next);
  46. if (r == std::codecvt_base::error)
  47. throw logic_error("character conversion failed");
  48. // 'partial' is not an error, it just means not all source characters
  49. // we converted. However, we need to check that at least one new target
  50. // character was produced. If not, it means the source data is
  51. // incomplete, and since we don't have extra data to add to source, it's
  52. // error.
  53. if (to_next == buffer)
  54. throw logic_error("character conversion failed");
  55. // Add converted characters
  56. result.append(buffer, to_next);
  57. }
  58. return result;
  59. }
  60. void test_convert(const std::string& input,
  61. const std::string& expected_output)
  62. {
  63. boost::program_options::detail::utf8_codecvt_facet facet;
  64. std::wstring output;
  65. {
  66. boost::progress_timer t;
  67. for (int i = 0; i < 10000; ++i)
  68. output = boost::from_8_bit(
  69. input,
  70. facet);
  71. }
  72. {
  73. boost::progress_timer t;
  74. for (int i = 0; i < 10000; ++i)
  75. output = from_8_bit_2(
  76. input,
  77. facet);
  78. }
  79. BOOST_CHECK(output.size()*2 == expected_output.size());
  80. for(unsigned i = 0; i < output.size(); ++i) {
  81. {
  82. unsigned low = output[i];
  83. low &= 0xFF;
  84. unsigned low2 = expected_output[2*i];
  85. low2 &= 0xFF;
  86. BOOST_CHECK(low == low2);
  87. }
  88. {
  89. unsigned high = output[i];
  90. high >>= 8;
  91. high &= 0xFF;
  92. unsigned high2 = expected_output[2*i+1];
  93. BOOST_CHECK(high == high2);
  94. }
  95. }
  96. string ref = boost::to_8_bit(output, facet);
  97. BOOST_CHECK(ref == input);
  98. }
  99. int main(int ac, char* av[])
  100. {
  101. std::string input = file_content("utf8.txt");
  102. std::string expected = file_content("ucs2.txt");
  103. test_convert(input, expected);
  104. if (ac > 1) {
  105. cout << "Trying to convert the command line argument\n";
  106. locale::global(locale(""));
  107. std::wstring w = boost::from_local_8_bit(av[1]);
  108. cout << "Got something, printing decimal code point values\n";
  109. for (unsigned i = 0; i < w.size(); ++i) {
  110. cout << (unsigned)w[i] << "\n";
  111. }
  112. }
  113. return 0;
  114. }