utf8.hpp 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105
  1. /*=============================================================================
  2. Copyright (c) 2001-2014 Joel de Guzman
  3. Distributed under the Boost Software License, Version 1.0. (See accompanying
  4. file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  5. ==============================================================================*/
  6. #if !defined(BOOST_SPIRIT_X3_UC_TYPES_NOVEMBER_23_2008_0840PM)
  7. #define BOOST_SPIRIT_X3_UC_TYPES_NOVEMBER_23_2008_0840PM
  8. #include <boost/cstdint.hpp>
  9. #include <boost/regex/pending/unicode_iterator.hpp>
  10. #include <boost/type_traits/make_unsigned.hpp>
  11. #include <string>
  12. namespace boost { namespace spirit { namespace x3
  13. {
  14. typedef ::boost::uint32_t ucs4_char;
  15. typedef char utf8_char;
  16. typedef std::basic_string<ucs4_char> ucs4_string;
  17. typedef std::basic_string<utf8_char> utf8_string;
  18. template <typename Char>
  19. inline utf8_string to_utf8(Char value)
  20. {
  21. // always store as UTF8
  22. utf8_string result;
  23. typedef std::back_insert_iterator<utf8_string> insert_iter;
  24. insert_iter out_iter(result);
  25. utf8_output_iterator<insert_iter> utf8_iter(out_iter);
  26. typedef typename make_unsigned<Char>::type UChar;
  27. *utf8_iter = (UChar)value;
  28. return result;
  29. }
  30. template <typename Char>
  31. inline utf8_string to_utf8(Char const* str)
  32. {
  33. // always store as UTF8
  34. utf8_string result;
  35. typedef std::back_insert_iterator<utf8_string> insert_iter;
  36. insert_iter out_iter(result);
  37. utf8_output_iterator<insert_iter> utf8_iter(out_iter);
  38. typedef typename make_unsigned<Char>::type UChar;
  39. while (*str)
  40. *utf8_iter++ = (UChar)*str++;
  41. return result;
  42. }
  43. template <typename Char, typename Traits, typename Allocator>
  44. inline utf8_string
  45. to_utf8(std::basic_string<Char, Traits, Allocator> const& str)
  46. {
  47. // always store as UTF8
  48. utf8_string result;
  49. typedef std::back_insert_iterator<utf8_string> insert_iter;
  50. insert_iter out_iter(result);
  51. utf8_output_iterator<insert_iter> utf8_iter(out_iter);
  52. typedef typename make_unsigned<Char>::type UChar;
  53. for (Char ch : str)
  54. {
  55. *utf8_iter++ = (UChar)ch;
  56. }
  57. return result;
  58. }
  59. // Assume wchar_t content is UTF-16 on Windows and UCS-4 on Unix
  60. #if defined(_WIN32) || defined(__CYGWIN__)
  61. inline utf8_string to_utf8(wchar_t value)
  62. {
  63. utf8_string result;
  64. typedef std::back_insert_iterator<utf8_string> insert_iter;
  65. insert_iter out_iter(result);
  66. utf8_output_iterator<insert_iter> utf8_iter(out_iter);
  67. u16_to_u32_iterator<wchar_t const*, ucs4_char> ucs4_iter(&value);
  68. *utf8_iter++ = *ucs4_iter;
  69. return result;
  70. }
  71. inline utf8_string to_utf8(wchar_t const* str)
  72. {
  73. utf8_string result;
  74. typedef std::back_insert_iterator<utf8_string> insert_iter;
  75. insert_iter out_iter(result);
  76. utf8_output_iterator<insert_iter> utf8_iter(out_iter);
  77. u16_to_u32_iterator<wchar_t const*, ucs4_char> ucs4_iter(str);
  78. for (ucs4_char c; (c = *ucs4_iter) != ucs4_char(); ++ucs4_iter) {
  79. *utf8_iter++ = c;
  80. }
  81. return result;
  82. }
  83. template <typename Traits, typename Allocator>
  84. inline utf8_string
  85. to_utf8(std::basic_string<wchar_t, Traits, Allocator> const& str)
  86. {
  87. return to_utf8(str.c_str());
  88. }
  89. #endif
  90. }}}
  91. #endif