wide_encoding.hpp 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182
  1. #ifndef BOOST_PROPERTY_TREE_DETAIL_JSON_PARSER_WIDE_ENCODING_HPP
  2. #define BOOST_PROPERTY_TREE_DETAIL_JSON_PARSER_WIDE_ENCODING_HPP
  3. #include <boost/assert.hpp>
  4. #include <boost/range/iterator_range_core.hpp>
  5. #include <utility>
  6. namespace boost { namespace property_tree {
  7. namespace json_parser { namespace detail
  8. {
  9. struct external_wide_encoding
  10. {
  11. typedef wchar_t external_char;
  12. bool is_nl(wchar_t c) const { return c == L'\n'; }
  13. bool is_ws(wchar_t c) const {
  14. return c == L' ' || c == L'\t' || c == L'\n' || c == L'\r';
  15. }
  16. bool is_minus(wchar_t c) const { return c == L'-'; }
  17. bool is_plusminus(wchar_t c) const { return c == L'+' || c == L'-'; }
  18. bool is_dot(wchar_t c) const { return c == L'.'; }
  19. bool is_eE(wchar_t c) const { return c == L'e' || c == L'E'; }
  20. bool is_0(wchar_t c) const { return c == L'0'; }
  21. bool is_digit(wchar_t c) const { return c >= L'0' && c <= L'9'; }
  22. bool is_digit0(wchar_t c) const { return c >= L'1' && c <= L'9'; }
  23. bool is_quote(wchar_t c) const { return c == L'"'; }
  24. bool is_backslash(wchar_t c) const { return c == L'\\'; }
  25. bool is_slash(wchar_t c) const { return c == L'/'; }
  26. bool is_comma(wchar_t c) const { return c == L','; }
  27. bool is_open_bracket(wchar_t c) const { return c == L'['; }
  28. bool is_close_bracket(wchar_t c) const { return c == L']'; }
  29. bool is_colon(wchar_t c) const { return c == L':'; }
  30. bool is_open_brace(wchar_t c) const { return c == L'{'; }
  31. bool is_close_brace(wchar_t c) const { return c == L'}'; }
  32. bool is_a(wchar_t c) const { return c == L'a'; }
  33. bool is_b(wchar_t c) const { return c == L'b'; }
  34. bool is_e(wchar_t c) const { return c == L'e'; }
  35. bool is_f(wchar_t c) const { return c == L'f'; }
  36. bool is_l(wchar_t c) const { return c == L'l'; }
  37. bool is_n(wchar_t c) const { return c == L'n'; }
  38. bool is_r(wchar_t c) const { return c == L'r'; }
  39. bool is_s(wchar_t c) const { return c == L's'; }
  40. bool is_t(wchar_t c) const { return c == L't'; }
  41. bool is_u(wchar_t c) const { return c == L'u'; }
  42. int decode_hexdigit(wchar_t c) {
  43. if (c >= L'0' && c <= L'9') return c - L'0';
  44. if (c >= L'A' && c <= L'F') return c - L'A' + 10;
  45. if (c >= L'a' && c <= L'f') return c - L'a' + 10;
  46. return -1;
  47. }
  48. };
  49. template <bool B> struct is_utf16 {};
  50. class wide_wide_encoding : public external_wide_encoding
  51. {
  52. typedef is_utf16<sizeof(wchar_t) == 2> test_utf16;
  53. public:
  54. typedef wchar_t internal_char;
  55. template <typename Iterator>
  56. boost::iterator_range<Iterator>
  57. to_internal(Iterator first, Iterator last) const {
  58. return boost::make_iterator_range(first, last);
  59. }
  60. wchar_t to_internal_trivial(wchar_t c) const {
  61. BOOST_ASSERT(!is_surrogate_high(c) && !is_surrogate_low(c));
  62. return c;
  63. }
  64. template <typename Iterator, typename Sentinel,
  65. typename EncodingErrorFn>
  66. void skip_codepoint(Iterator& cur, Sentinel end,
  67. EncodingErrorFn error_fn) const {
  68. transcode_codepoint(cur, end, DoNothing(), error_fn);
  69. }
  70. template <typename Iterator, typename Sentinel, typename TranscodedFn,
  71. typename EncodingErrorFn>
  72. void transcode_codepoint(Iterator& cur, Sentinel end,
  73. TranscodedFn transcoded_fn, EncodingErrorFn error_fn) const {
  74. return transcode_codepoint(cur, end, transcoded_fn, error_fn,
  75. test_utf16());
  76. }
  77. template <typename TranscodedFn>
  78. void feed_codepoint(unsigned codepoint,
  79. TranscodedFn transcoded_fn) const {
  80. feed_codepoint(codepoint, transcoded_fn, test_utf16());
  81. }
  82. template <typename Iterator, typename Sentinel>
  83. void skip_introduction(Iterator& cur, Sentinel end) const {
  84. // Endianness is already decoded at this level.
  85. if (cur != end && *cur == 0xfeff) {
  86. ++cur;
  87. }
  88. }
  89. private:
  90. struct DoNothing {
  91. void operator ()(wchar_t) const {}
  92. };
  93. template <typename Iterator, typename Sentinel, typename TranscodedFn,
  94. typename EncodingErrorFn>
  95. void transcode_codepoint(Iterator& cur, Sentinel,
  96. TranscodedFn transcoded_fn,
  97. EncodingErrorFn error_fn,
  98. is_utf16<false>) const {
  99. wchar_t c = *cur;
  100. if (c < 0x20) {
  101. error_fn();
  102. }
  103. transcoded_fn(c);
  104. ++cur;
  105. }
  106. template <typename Iterator, typename Sentinel, typename TranscodedFn,
  107. typename EncodingErrorFn>
  108. void transcode_codepoint(Iterator& cur, Sentinel end,
  109. TranscodedFn transcoded_fn,
  110. EncodingErrorFn error_fn,
  111. is_utf16<true>) const {
  112. wchar_t c = *cur;
  113. if (c < 0x20) {
  114. error_fn();
  115. }
  116. if (is_surrogate_low(c)) {
  117. error_fn();
  118. }
  119. transcoded_fn(c);
  120. ++cur;
  121. if (is_surrogate_high(c)) {
  122. if (cur == end) {
  123. error_fn();
  124. }
  125. c = *cur;
  126. if (!is_surrogate_low(c)) {
  127. error_fn();
  128. }
  129. transcoded_fn(c);
  130. ++cur;
  131. }
  132. }
  133. template <typename TranscodedFn>
  134. void feed_codepoint(unsigned codepoint, TranscodedFn transcoded_fn,
  135. is_utf16<false>) const {
  136. transcoded_fn(static_cast<wchar_t>(codepoint));
  137. }
  138. template <typename TranscodedFn>
  139. void feed_codepoint(unsigned codepoint, TranscodedFn transcoded_fn,
  140. is_utf16<true>) const {
  141. if (codepoint < 0x10000) {
  142. transcoded_fn(static_cast<wchar_t>(codepoint));
  143. } else {
  144. codepoint -= 0x10000;
  145. transcoded_fn(static_cast<wchar_t>((codepoint >> 10) | 0xd800));
  146. transcoded_fn(static_cast<wchar_t>(
  147. (codepoint & 0x3ff) | 0xdc00));
  148. }
  149. }
  150. static bool is_surrogate_high(unsigned codepoint) {
  151. return (codepoint & 0xfc00) == 0xd800;
  152. }
  153. static bool is_surrogate_low(unsigned codepoint) {
  154. return (codepoint & 0xfc00) == 0xdc00;
  155. }
  156. };
  157. }}}}
  158. #endif