narrow_encoding.hpp 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168
  1. #ifndef BOOST_PROPERTY_TREE_DETAIL_JSON_PARSER_NARROW_ENCODING_HPP
  2. #define BOOST_PROPERTY_TREE_DETAIL_JSON_PARSER_NARROW_ENCODING_HPP
  3. #include <boost/assert.hpp>
  4. #include <boost/range/iterator_range_core.hpp>
  5. #include <utility>
  6. namespace boost { namespace property_tree {
  7. namespace json_parser { namespace detail
  8. {
  9. struct external_ascii_superset_encoding
  10. {
  11. typedef char external_char;
  12. bool is_nl(char c) const { return c == '\n'; }
  13. bool is_ws(char c) const {
  14. return c == ' ' || c == '\t' || c == '\n' || c == '\r';
  15. }
  16. bool is_minus(char c) const { return c == '-'; }
  17. bool is_plusminus(char c) const { return c == '+' || c == '-'; }
  18. bool is_dot(char c) const { return c == '.'; }
  19. bool is_eE(char c) const { return c == 'e' || c == 'E'; }
  20. bool is_0(char c) const { return c == '0'; }
  21. bool is_digit(char c) const { return c >= '0' && c <= '9'; }
  22. bool is_digit0(char c) const { return c >= '1' && c <= '9'; }
  23. bool is_quote(char c) const { return c == '"'; }
  24. bool is_backslash(char c) const { return c == '\\'; }
  25. bool is_slash(char c) const { return c == '/'; }
  26. bool is_comma(char c) const { return c == ','; }
  27. bool is_open_bracket(char c) const { return c == '['; }
  28. bool is_close_bracket(char c) const { return c == ']'; }
  29. bool is_colon(char c) const { return c == ':'; }
  30. bool is_open_brace(char c) const { return c == '{'; }
  31. bool is_close_brace(char c) const { return c == '}'; }
  32. bool is_a(char c) const { return c == 'a'; }
  33. bool is_b(char c) const { return c == 'b'; }
  34. bool is_e(char c) const { return c == 'e'; }
  35. bool is_f(char c) const { return c == 'f'; }
  36. bool is_l(char c) const { return c == 'l'; }
  37. bool is_n(char c) const { return c == 'n'; }
  38. bool is_r(char c) const { return c == 'r'; }
  39. bool is_s(char c) const { return c == 's'; }
  40. bool is_t(char c) const { return c == 't'; }
  41. bool is_u(char c) const { return c == 'u'; }
  42. int decode_hexdigit(char c) {
  43. if (c >= '0' && c <= '9') return c - '0';
  44. if (c >= 'A' && c <= 'F') return c - 'A' + 10;
  45. if (c >= 'a' && c <= 'f') return c - 'a' + 10;
  46. return -1;
  47. }
  48. };
  49. struct utf8_utf8_encoding : external_ascii_superset_encoding
  50. {
  51. typedef char internal_char;
  52. template <typename Iterator>
  53. boost::iterator_range<Iterator>
  54. to_internal(Iterator first, Iterator last) const {
  55. return boost::make_iterator_range(first, last);
  56. }
  57. char to_internal_trivial(char c) const {
  58. BOOST_ASSERT(static_cast<unsigned char>(c) <= 0x7f);
  59. return c;
  60. }
  61. template <typename Iterator, typename Sentinel,
  62. typename EncodingErrorFn>
  63. void skip_codepoint(Iterator& cur, Sentinel end,
  64. EncodingErrorFn error_fn) const {
  65. transcode_codepoint(cur, end, DoNothing(), error_fn);
  66. }
  67. template <typename Iterator, typename Sentinel, typename TranscodedFn,
  68. typename EncodingErrorFn>
  69. void transcode_codepoint(Iterator& cur, Sentinel end,
  70. TranscodedFn transcoded_fn, EncodingErrorFn error_fn) const {
  71. unsigned char c = *cur;
  72. ++cur;
  73. if (c <= 0x7f) {
  74. // Solo byte, filter out disallowed codepoints.
  75. if (c < 0x20) {
  76. error_fn();
  77. }
  78. transcoded_fn(c);
  79. return;
  80. }
  81. int trailing = trail_table(c);
  82. if (trailing == -1) {
  83. // Standalone trailing byte or overly long sequence.
  84. error_fn();
  85. }
  86. transcoded_fn(c);
  87. for (int i = 0; i < trailing; ++i) {
  88. if (cur == end || !is_trail(*cur)) {
  89. error_fn();
  90. }
  91. transcoded_fn(*cur);
  92. ++cur;
  93. }
  94. }
  95. template <typename TranscodedFn>
  96. void feed_codepoint(unsigned codepoint,
  97. TranscodedFn transcoded_fn) const {
  98. if (codepoint <= 0x7f) {
  99. transcoded_fn(static_cast<char>(codepoint));
  100. } else if (codepoint <= 0x7ff) {
  101. transcoded_fn(static_cast<char>(0xc0 | (codepoint >> 6)));
  102. transcoded_fn(trail(codepoint));
  103. } else if (codepoint <= 0xffff) {
  104. transcoded_fn(static_cast<char>(0xe0 | (codepoint >> 12)));
  105. transcoded_fn(trail(codepoint >> 6));
  106. transcoded_fn(trail(codepoint));
  107. } else if (codepoint <= 0x10ffff) {
  108. transcoded_fn(static_cast<char>(0xf0 | (codepoint >> 18)));
  109. transcoded_fn(trail(codepoint >> 12));
  110. transcoded_fn(trail(codepoint >> 6));
  111. transcoded_fn(trail(codepoint));
  112. }
  113. }
  114. template <typename Iterator, typename Sentinel>
  115. void skip_introduction(Iterator& cur, Sentinel end) const {
  116. if (cur != end && static_cast<unsigned char>(*cur) == 0xef) {
  117. if (++cur == end) return;
  118. if (++cur == end) return;
  119. if (++cur == end) return;
  120. }
  121. }
  122. private:
  123. struct DoNothing {
  124. void operator ()(char) const {}
  125. };
  126. bool is_trail(unsigned char c) const {
  127. return (c & 0xc0) == 0x80;
  128. }
  129. int trail_table(unsigned char c) const {
  130. static const signed char table[] = {
  131. /* not a lead byte */
  132. /* 0x10???sss */ -1, -1, -1, -1, -1, -1, -1, -1,
  133. /* 0x110??sss */ 1, 1, 1, 1, /* 1 trailing byte */
  134. /* 0x1110?sss */ 2, 2, /* 2 trailing bytes */
  135. /* 0x11110sss */ 3, /* 3 trailing bytes */
  136. /* 0x11111sss */ -1 /* 4 or 5 trailing bytes, disallowed */
  137. };
  138. return table[(c & 0x7f) >> 3];
  139. }
  140. char trail(unsigned unmasked) const {
  141. return static_cast<char>(0x80 | (unmasked & 0x3f));
  142. }
  143. };
  144. }}}}
  145. #endif