cpp_regex_traits.hpp 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708
  1. ///////////////////////////////////////////////////////////////////////////////
  2. /// \file cpp_regex_traits.hpp
  3. /// Contains the definition of the cpp_regex_traits\<\> template, which is a
  4. /// wrapper for std::locale that can be used to customize the behavior of
  5. /// static and dynamic regexes.
  6. //
  7. // Copyright 2008 Eric Niebler. Distributed under the Boost
  8. // Software License, Version 1.0. (See accompanying file
  9. // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  10. #ifndef BOOST_XPRESSIVE_TRAITS_CPP_REGEX_TRAITS_HPP_EAN_10_04_2005
  11. #define BOOST_XPRESSIVE_TRAITS_CPP_REGEX_TRAITS_HPP_EAN_10_04_2005
  12. // MS compatible compilers support #pragma once
  13. #if defined(_MSC_VER)
  14. # pragma once
  15. #endif
  16. #include <ios>
  17. #include <string>
  18. #include <locale>
  19. #include <sstream>
  20. #include <climits>
  21. #include <boost/config.hpp>
  22. #include <boost/assert.hpp>
  23. #include <boost/integer.hpp>
  24. #include <boost/mpl/assert.hpp>
  25. #include <boost/static_assert.hpp>
  26. #include <boost/detail/workaround.hpp>
  27. #include <boost/type_traits/is_same.hpp>
  28. #include <boost/xpressive/detail/detail_fwd.hpp>
  29. #include <boost/xpressive/detail/utility/literals.hpp>
  30. // From John Maddock:
  31. // Fix for gcc prior to 3.4: std::ctype<wchar_t> doesn't allow masks to be combined, for example:
  32. // std::use_facet<std::ctype<wchar_t> >(locale()).is(std::ctype_base::lower|std::ctype_base::upper, L'a');
  33. // incorrectly returns false.
  34. // NOTE: later version of the gcc define __GLIBCXX__, not __GLIBCPP__
  35. #if BOOST_WORKAROUND(__GLIBCPP__, != 0)
  36. # define BOOST_XPRESSIVE_BUGGY_CTYPE_FACET
  37. #endif
  38. namespace boost { namespace xpressive
  39. {
  40. namespace detail
  41. {
  42. // define an unsigned integral typedef of the same size as std::ctype_base::mask
  43. typedef boost::uint_t<sizeof(std::ctype_base::mask) * CHAR_BIT>::least umask_t;
  44. BOOST_MPL_ASSERT_RELATION(sizeof(std::ctype_base::mask), ==, sizeof(umask_t));
  45. // Calculate what the size of the umaskex_t type should be to fix the 3 extra bitmasks
  46. // 11 char categories in ctype_base
  47. // + 3 extra categories for xpressive
  48. // = 14 total bits needed
  49. int const umaskex_bits = (14 > (sizeof(umask_t) * CHAR_BIT)) ? 14 : sizeof(umask_t) * CHAR_BIT;
  50. // define an unsigned integral type with at least umaskex_bits
  51. typedef boost::uint_t<umaskex_bits>::fast umaskex_t;
  52. BOOST_MPL_ASSERT_RELATION(sizeof(umask_t), <=, sizeof(umaskex_t));
  53. // cast a ctype mask to a umaskex_t
  54. template<std::ctype_base::mask Mask>
  55. struct mask_cast
  56. {
  57. BOOST_STATIC_CONSTANT(umaskex_t, value = static_cast<umask_t>(Mask));
  58. };
  59. #ifdef __CYGWIN__
  60. // Work around a gcc warning on cygwin
  61. template<>
  62. struct mask_cast<std::ctype_base::print>
  63. {
  64. BOOST_MPL_ASSERT_RELATION('\227', ==, std::ctype_base::print);
  65. BOOST_STATIC_CONSTANT(umaskex_t, value = 0227);
  66. };
  67. #endif
  68. #ifndef BOOST_NO_INCLASS_MEMBER_INITIALIZATION
  69. template<std::ctype_base::mask Mask>
  70. umaskex_t const mask_cast<Mask>::value;
  71. #endif
  72. #ifndef BOOST_XPRESSIVE_BUGGY_CTYPE_FACET
  73. // an unsigned integer with the highest bit set
  74. umaskex_t const highest_bit = static_cast<umaskex_t>(1) << (sizeof(umaskex_t) * CHAR_BIT - 1);
  75. ///////////////////////////////////////////////////////////////////////////////
  76. // unused_mask
  77. // find a bit in an int that isn't set
  78. template<umaskex_t In, umaskex_t Out = highest_bit, bool Done = (0 == (Out & In))>
  79. struct unused_mask
  80. {
  81. BOOST_STATIC_ASSERT(1 != Out);
  82. BOOST_STATIC_CONSTANT(umaskex_t, value = (unused_mask<In, (Out >> 1)>::value));
  83. };
  84. template<umaskex_t In, umaskex_t Out>
  85. struct unused_mask<In, Out, true>
  86. {
  87. BOOST_STATIC_CONSTANT(umaskex_t, value = Out);
  88. };
  89. #ifndef BOOST_NO_INCLASS_MEMBER_INITIALIZATION
  90. template<umaskex_t In, umaskex_t Out, bool Done>
  91. umaskex_t const unused_mask<In, Out, Done>::value;
  92. #endif
  93. umaskex_t const std_ctype_alnum = mask_cast<std::ctype_base::alnum>::value;
  94. umaskex_t const std_ctype_alpha = mask_cast<std::ctype_base::alpha>::value;
  95. umaskex_t const std_ctype_cntrl = mask_cast<std::ctype_base::cntrl>::value;
  96. umaskex_t const std_ctype_digit = mask_cast<std::ctype_base::digit>::value;
  97. umaskex_t const std_ctype_graph = mask_cast<std::ctype_base::graph>::value;
  98. umaskex_t const std_ctype_lower = mask_cast<std::ctype_base::lower>::value;
  99. umaskex_t const std_ctype_print = mask_cast<std::ctype_base::print>::value;
  100. umaskex_t const std_ctype_punct = mask_cast<std::ctype_base::punct>::value;
  101. umaskex_t const std_ctype_space = mask_cast<std::ctype_base::space>::value;
  102. umaskex_t const std_ctype_upper = mask_cast<std::ctype_base::upper>::value;
  103. umaskex_t const std_ctype_xdigit = mask_cast<std::ctype_base::xdigit>::value;
  104. // Reserve some bits for the implementation
  105. #if defined(__GLIBCXX__)
  106. umaskex_t const std_ctype_reserved = 0x8000;
  107. #elif defined(_CPPLIB_VER) && defined(BOOST_WINDOWS)
  108. umaskex_t const std_ctype_reserved = 0x8200;
  109. #elif defined(_LIBCPP_VERSION)
  110. umaskex_t const std_ctype_reserved = 0x8000;
  111. #else
  112. umaskex_t const std_ctype_reserved = 0;
  113. #endif
  114. // Bitwise-or all the ctype masks together
  115. umaskex_t const all_ctype_masks = std_ctype_reserved
  116. | std_ctype_alnum | std_ctype_alpha | std_ctype_cntrl | std_ctype_digit
  117. | std_ctype_graph | std_ctype_lower | std_ctype_print | std_ctype_punct
  118. | std_ctype_space | std_ctype_upper | std_ctype_xdigit;
  119. // define a new mask for "underscore" ("word" == alnum | underscore)
  120. umaskex_t const non_std_ctype_underscore = unused_mask<all_ctype_masks>::value;
  121. // define a new mask for "blank"
  122. umaskex_t const non_std_ctype_blank = unused_mask<all_ctype_masks | non_std_ctype_underscore>::value;
  123. // define a new mask for "newline"
  124. umaskex_t const non_std_ctype_newline = unused_mask<all_ctype_masks | non_std_ctype_underscore | non_std_ctype_blank>::value;
  125. #else
  126. ///////////////////////////////////////////////////////////////////////////////
  127. // Ugly work-around for buggy ctype facets.
  128. umaskex_t const std_ctype_alnum = 1 << 0;
  129. umaskex_t const std_ctype_alpha = 1 << 1;
  130. umaskex_t const std_ctype_cntrl = 1 << 2;
  131. umaskex_t const std_ctype_digit = 1 << 3;
  132. umaskex_t const std_ctype_graph = 1 << 4;
  133. umaskex_t const std_ctype_lower = 1 << 5;
  134. umaskex_t const std_ctype_print = 1 << 6;
  135. umaskex_t const std_ctype_punct = 1 << 7;
  136. umaskex_t const std_ctype_space = 1 << 8;
  137. umaskex_t const std_ctype_upper = 1 << 9;
  138. umaskex_t const std_ctype_xdigit = 1 << 10;
  139. umaskex_t const non_std_ctype_underscore = 1 << 11;
  140. umaskex_t const non_std_ctype_blank = 1 << 12;
  141. umaskex_t const non_std_ctype_newline = 1 << 13;
  142. static umaskex_t const std_masks[] =
  143. {
  144. mask_cast<std::ctype_base::alnum>::value
  145. , mask_cast<std::ctype_base::alpha>::value
  146. , mask_cast<std::ctype_base::cntrl>::value
  147. , mask_cast<std::ctype_base::digit>::value
  148. , mask_cast<std::ctype_base::graph>::value
  149. , mask_cast<std::ctype_base::lower>::value
  150. , mask_cast<std::ctype_base::print>::value
  151. , mask_cast<std::ctype_base::punct>::value
  152. , mask_cast<std::ctype_base::space>::value
  153. , mask_cast<std::ctype_base::upper>::value
  154. , mask_cast<std::ctype_base::xdigit>::value
  155. };
  156. inline int mylog2(umaskex_t i)
  157. {
  158. return "\0\0\1\0\2\0\0\0\3"[i & 0xf]
  159. + "\0\4\5\0\6\0\0\0\7"[(i & 0xf0) >> 04]
  160. + "\0\10\11\0\12\0\0\0\13"[(i & 0xf00) >> 010];
  161. }
  162. #endif
  163. // convenient constant for the extra masks
  164. umaskex_t const non_std_ctype_masks = non_std_ctype_underscore | non_std_ctype_blank | non_std_ctype_newline;
  165. ///////////////////////////////////////////////////////////////////////////////
  166. // cpp_regex_traits_base
  167. // BUGBUG this should be replaced with a regex facet that lets you query for
  168. // an array of underscore characters and an array of line separator characters.
  169. template<typename Char, std::size_t SizeOfChar = sizeof(Char)>
  170. struct cpp_regex_traits_base
  171. {
  172. protected:
  173. void imbue(std::locale const &)
  174. {
  175. }
  176. static bool is(std::ctype<Char> const &ct, Char ch, umaskex_t mask)
  177. {
  178. #ifndef BOOST_XPRESSIVE_BUGGY_CTYPE_FACET
  179. if(ct.is((std::ctype_base::mask)(umask_t)mask, ch))
  180. {
  181. return true;
  182. }
  183. // HACKHACK Cygwin and mingw have buggy ctype facets for wchar_t
  184. #if defined(__CYGWIN__) || defined(__MINGW32_VERSION)
  185. if (std::ctype_base::xdigit == ((std::ctype_base::mask)(umask_t)mask & std::ctype_base::xdigit))
  186. {
  187. typename std::char_traits<Char>::int_type i = std::char_traits<Char>::to_int_type(ch);
  188. if(UCHAR_MAX >= i && std::isxdigit(static_cast<int>(i)))
  189. return true;
  190. }
  191. #endif
  192. #else
  193. umaskex_t tmp = mask & ~non_std_ctype_masks;
  194. for(umaskex_t i; 0 != (i = (tmp & (~tmp+1))); tmp &= ~i)
  195. {
  196. std::ctype_base::mask m = (std::ctype_base::mask)(umask_t)std_masks[mylog2(i)];
  197. if(ct.is(m, ch))
  198. {
  199. return true;
  200. }
  201. }
  202. #endif
  203. return ((mask & non_std_ctype_blank) && cpp_regex_traits_base::is_blank(ch))
  204. || ((mask & non_std_ctype_underscore) && cpp_regex_traits_base::is_underscore(ch))
  205. || ((mask & non_std_ctype_newline) && cpp_regex_traits_base::is_newline(ch));
  206. }
  207. private:
  208. static bool is_blank(Char ch)
  209. {
  210. BOOST_MPL_ASSERT_RELATION('\t', ==, L'\t');
  211. BOOST_MPL_ASSERT_RELATION(' ', ==, L' ');
  212. return L' ' == ch || L'\t' == ch;
  213. }
  214. static bool is_underscore(Char ch)
  215. {
  216. BOOST_MPL_ASSERT_RELATION('_', ==, L'_');
  217. return L'_' == ch;
  218. }
  219. static bool is_newline(Char ch)
  220. {
  221. BOOST_MPL_ASSERT_RELATION('\r', ==, L'\r');
  222. BOOST_MPL_ASSERT_RELATION('\n', ==, L'\n');
  223. BOOST_MPL_ASSERT_RELATION('\f', ==, L'\f');
  224. return L'\r' == ch || L'\n' == ch || L'\f' == ch
  225. || (1 < SizeOfChar && (0x2028u == ch || 0x2029u == ch || 0x85u == ch));
  226. }
  227. };
  228. #ifndef BOOST_XPRESSIVE_BUGGY_CTYPE_FACET
  229. template<typename Char>
  230. struct cpp_regex_traits_base<Char, 1>
  231. {
  232. protected:
  233. void imbue(std::locale const &loc)
  234. {
  235. int i = 0;
  236. Char allchars[UCHAR_MAX + 1];
  237. for(i = 0; i <= static_cast<int>(UCHAR_MAX); ++i)
  238. {
  239. allchars[i] = static_cast<Char>(i);
  240. }
  241. std::ctype<Char> const &ct = BOOST_USE_FACET(std::ctype<Char>, loc);
  242. std::ctype_base::mask tmp[UCHAR_MAX + 1];
  243. ct.is(allchars, allchars + UCHAR_MAX + 1, tmp);
  244. for(i = 0; i <= static_cast<int>(UCHAR_MAX); ++i)
  245. {
  246. this->masks_[i] = static_cast<umask_t>(tmp[i]);
  247. BOOST_ASSERT(0 == (this->masks_[i] & non_std_ctype_masks));
  248. }
  249. this->masks_[static_cast<unsigned char>('_')] |= non_std_ctype_underscore;
  250. this->masks_[static_cast<unsigned char>(' ')] |= non_std_ctype_blank;
  251. this->masks_[static_cast<unsigned char>('\t')] |= non_std_ctype_blank;
  252. this->masks_[static_cast<unsigned char>('\n')] |= non_std_ctype_newline;
  253. this->masks_[static_cast<unsigned char>('\r')] |= non_std_ctype_newline;
  254. this->masks_[static_cast<unsigned char>('\f')] |= non_std_ctype_newline;
  255. }
  256. bool is(std::ctype<Char> const &, Char ch, umaskex_t mask) const
  257. {
  258. return 0 != (this->masks_[static_cast<unsigned char>(ch)] & mask);
  259. }
  260. private:
  261. umaskex_t masks_[UCHAR_MAX + 1];
  262. };
  263. #endif
  264. } // namespace detail
  265. ///////////////////////////////////////////////////////////////////////////////
  266. // cpp_regex_traits
  267. //
  268. /// \brief Encapsaulates a \c std::locale for use by the
  269. /// \c basic_regex\<\> class template.
  270. template<typename Char>
  271. struct cpp_regex_traits
  272. : detail::cpp_regex_traits_base<Char>
  273. {
  274. typedef Char char_type;
  275. typedef std::basic_string<char_type> string_type;
  276. typedef std::locale locale_type;
  277. typedef detail::umaskex_t char_class_type;
  278. typedef regex_traits_version_2_tag version_tag;
  279. typedef detail::cpp_regex_traits_base<Char> base_type;
  280. /// Initialize a cpp_regex_traits object to use the specified std::locale,
  281. /// or the global std::locale if none is specified.
  282. ///
  283. cpp_regex_traits(locale_type const &loc = locale_type())
  284. : base_type()
  285. , loc_()
  286. {
  287. this->imbue(loc);
  288. }
  289. /// Checks two cpp_regex_traits objects for equality
  290. ///
  291. /// \return this->getloc() == that.getloc().
  292. bool operator ==(cpp_regex_traits<char_type> const &that) const
  293. {
  294. return this->loc_ == that.loc_;
  295. }
  296. /// Checks two cpp_regex_traits objects for inequality
  297. ///
  298. /// \return this->getloc() != that.getloc().
  299. bool operator !=(cpp_regex_traits<char_type> const &that) const
  300. {
  301. return this->loc_ != that.loc_;
  302. }
  303. /// Convert a char to a Char
  304. ///
  305. /// \param ch The source character.
  306. /// \return std::use_facet\<std::ctype\<char_type\> \>(this->getloc()).widen(ch).
  307. char_type widen(char ch) const
  308. {
  309. return this->ctype_->widen(ch);
  310. }
  311. /// Returns a hash value for a Char in the range [0, UCHAR_MAX]
  312. ///
  313. /// \param ch The source character.
  314. /// \return a value between 0 and UCHAR_MAX, inclusive.
  315. static unsigned char hash(char_type ch)
  316. {
  317. return static_cast<unsigned char>(std::char_traits<Char>::to_int_type(ch));
  318. }
  319. /// No-op
  320. ///
  321. /// \param ch The source character.
  322. /// \return ch
  323. static char_type translate(char_type ch)
  324. {
  325. return ch;
  326. }
  327. /// Converts a character to lower-case using the internally-stored std::locale.
  328. ///
  329. /// \param ch The source character.
  330. /// \return std::tolower(ch, this->getloc()).
  331. char_type translate_nocase(char_type ch) const
  332. {
  333. return this->ctype_->tolower(ch);
  334. }
  335. /// Converts a character to lower-case using the internally-stored std::locale.
  336. ///
  337. /// \param ch The source character.
  338. /// \return std::tolower(ch, this->getloc()).
  339. char_type tolower(char_type ch) const
  340. {
  341. return this->ctype_->tolower(ch);
  342. }
  343. /// Converts a character to upper-case using the internally-stored std::locale.
  344. ///
  345. /// \param ch The source character.
  346. /// \return std::toupper(ch, this->getloc()).
  347. char_type toupper(char_type ch) const
  348. {
  349. return this->ctype_->toupper(ch);
  350. }
  351. /// Returns a \c string_type containing all the characters that compare equal
  352. /// disregrarding case to the one passed in. This function can only be called
  353. /// if <tt>has_fold_case\<cpp_regex_traits\<Char\> \>::value</tt> is \c true.
  354. ///
  355. /// \param ch The source character.
  356. /// \return \c string_type containing all chars which are equal to \c ch when disregarding
  357. /// case
  358. string_type fold_case(char_type ch) const
  359. {
  360. BOOST_MPL_ASSERT((is_same<char_type, char>));
  361. char_type ntcs[] = {
  362. this->ctype_->tolower(ch)
  363. , this->ctype_->toupper(ch)
  364. , 0
  365. };
  366. if(ntcs[1] == ntcs[0])
  367. ntcs[1] = 0;
  368. return string_type(ntcs);
  369. }
  370. /// Checks to see if a character is within a character range.
  371. ///
  372. /// \param first The bottom of the range, inclusive.
  373. /// \param last The top of the range, inclusive.
  374. /// \param ch The source character.
  375. /// \return first <= ch && ch <= last.
  376. static bool in_range(char_type first, char_type last, char_type ch)
  377. {
  378. return first <= ch && ch <= last;
  379. }
  380. /// Checks to see if a character is within a character range, irregardless of case.
  381. ///
  382. /// \param first The bottom of the range, inclusive.
  383. /// \param last The top of the range, inclusive.
  384. /// \param ch The source character.
  385. /// \return in_range(first, last, ch) || in_range(first, last, tolower(ch, this->getloc())) ||
  386. /// in_range(first, last, toupper(ch, this->getloc()))
  387. /// \attention The default implementation doesn't do proper Unicode
  388. /// case folding, but this is the best we can do with the standard
  389. /// ctype facet.
  390. bool in_range_nocase(char_type first, char_type last, char_type ch) const
  391. {
  392. // NOTE: this default implementation doesn't do proper Unicode
  393. // case folding, but this is the best we can do with the standard
  394. // std::ctype facet.
  395. return this->in_range(first, last, ch)
  396. || this->in_range(first, last, this->ctype_->toupper(ch))
  397. || this->in_range(first, last, this->ctype_->tolower(ch));
  398. }
  399. /// INTERNAL ONLY
  400. //string_type transform(char_type const *begin, char_type const *end) const
  401. //{
  402. // return this->collate_->transform(begin, end);
  403. //}
  404. /// Returns a sort key for the character sequence designated by the iterator range [F1, F2)
  405. /// such that if the character sequence [G1, G2) sorts before the character sequence [H1, H2)
  406. /// then v.transform(G1, G2) \< v.transform(H1, H2).
  407. ///
  408. /// \attention Not currently used
  409. template<typename FwdIter>
  410. string_type transform(FwdIter, FwdIter) const
  411. {
  412. //string_type str(begin, end);
  413. //return this->transform(str.data(), str.data() + str.size());
  414. BOOST_ASSERT(false);
  415. return string_type();
  416. }
  417. /// Returns a sort key for the character sequence designated by the iterator range [F1, F2)
  418. /// such that if the character sequence [G1, G2) sorts before the character sequence [H1, H2)
  419. /// when character case is not considered then
  420. /// v.transform_primary(G1, G2) \< v.transform_primary(H1, H2).
  421. ///
  422. /// \attention Not currently used
  423. template<typename FwdIter>
  424. string_type transform_primary(FwdIter, FwdIter ) const
  425. {
  426. BOOST_ASSERT(false); // TODO implement me
  427. return string_type();
  428. }
  429. /// Returns a sequence of characters that represents the collating element
  430. /// consisting of the character sequence designated by the iterator range [F1, F2).
  431. /// Returns an empty string if the character sequence is not a valid collating element.
  432. ///
  433. /// \attention Not currently used
  434. template<typename FwdIter>
  435. string_type lookup_collatename(FwdIter, FwdIter) const
  436. {
  437. BOOST_ASSERT(false); // TODO implement me
  438. return string_type();
  439. }
  440. /// For the character class name represented by the specified character sequence,
  441. /// return the corresponding bitmask representation.
  442. ///
  443. /// \param begin A forward iterator to the start of the character sequence representing
  444. /// the name of the character class.
  445. /// \param end The end of the character sequence.
  446. /// \param icase Specifies whether the returned bitmask should represent the case-insensitive
  447. /// version of the character class.
  448. /// \return A bitmask representing the character class.
  449. template<typename FwdIter>
  450. char_class_type lookup_classname(FwdIter begin, FwdIter end, bool icase) const
  451. {
  452. static detail::umaskex_t const icase_masks =
  453. detail::std_ctype_lower | detail::std_ctype_upper;
  454. BOOST_ASSERT(begin != end);
  455. char_class_type char_class = this->lookup_classname_impl_(begin, end);
  456. if(0 == char_class)
  457. {
  458. // convert the string to lowercase
  459. string_type classname(begin, end);
  460. for(typename string_type::size_type i = 0, len = classname.size(); i < len; ++i)
  461. {
  462. classname[i] = this->translate_nocase(classname[i]);
  463. }
  464. char_class = this->lookup_classname_impl_(classname.begin(), classname.end());
  465. }
  466. // erase case-sensitivity if icase==true
  467. if(icase && 0 != (char_class & icase_masks))
  468. {
  469. char_class |= icase_masks;
  470. }
  471. return char_class;
  472. }
  473. /// Tests a character against a character class bitmask.
  474. ///
  475. /// \param ch The character to test.
  476. /// \param mask The character class bitmask against which to test.
  477. /// \pre mask is a bitmask returned by lookup_classname, or is several such masks bit-or'ed
  478. /// together.
  479. /// \return true if the character is a member of any of the specified character classes, false
  480. /// otherwise.
  481. bool isctype(char_type ch, char_class_type mask) const
  482. {
  483. return this->base_type::is(*this->ctype_, ch, mask);
  484. }
  485. /// Convert a digit character into the integer it represents.
  486. ///
  487. /// \param ch The digit character.
  488. /// \param radix The radix to use for the conversion.
  489. /// \pre radix is one of 8, 10, or 16.
  490. /// \return -1 if ch is not a digit character, the integer value of the character otherwise.
  491. /// The conversion is performed by imbueing a std::stringstream with this-\>getloc();
  492. /// setting the radix to one of oct, hex or dec; inserting ch into the stream; and
  493. /// extracting an int.
  494. int value(char_type ch, int radix) const
  495. {
  496. BOOST_ASSERT(8 == radix || 10 == radix || 16 == radix);
  497. int val = -1;
  498. std::basic_stringstream<char_type> str;
  499. str.imbue(this->getloc());
  500. str << (8 == radix ? std::oct : (16 == radix ? std::hex : std::dec));
  501. str.put(ch);
  502. str >> val;
  503. return str.fail() ? -1 : val;
  504. }
  505. /// Imbues *this with loc
  506. ///
  507. /// \param loc A std::locale.
  508. /// \return the previous std::locale used by *this.
  509. locale_type imbue(locale_type loc)
  510. {
  511. locale_type old_loc = this->loc_;
  512. this->loc_ = loc;
  513. this->ctype_ = &BOOST_USE_FACET(std::ctype<char_type>, this->loc_);
  514. //this->collate_ = &BOOST_USE_FACET(std::collate<char_type>, this->loc_);
  515. this->base_type::imbue(this->loc_);
  516. return old_loc;
  517. }
  518. /// Returns the current std::locale used by *this.
  519. ///
  520. locale_type getloc() const
  521. {
  522. return this->loc_;
  523. }
  524. private:
  525. ///////////////////////////////////////////////////////////////////////////////
  526. // char_class_pair
  527. /// INTERNAL ONLY
  528. struct char_class_pair
  529. {
  530. char_type const *class_name_;
  531. char_class_type class_type_;
  532. };
  533. ///////////////////////////////////////////////////////////////////////////////
  534. // char_class
  535. /// INTERNAL ONLY
  536. static char_class_pair const &char_class(std::size_t j)
  537. {
  538. static BOOST_CONSTEXPR_OR_CONST char_class_pair s_char_class_map[] =
  539. {
  540. { BOOST_XPR_CSTR_(char_type, "alnum"), detail::std_ctype_alnum }
  541. , { BOOST_XPR_CSTR_(char_type, "alpha"), detail::std_ctype_alpha }
  542. , { BOOST_XPR_CSTR_(char_type, "blank"), detail::non_std_ctype_blank }
  543. , { BOOST_XPR_CSTR_(char_type, "cntrl"), detail::std_ctype_cntrl }
  544. , { BOOST_XPR_CSTR_(char_type, "d"), detail::std_ctype_digit }
  545. , { BOOST_XPR_CSTR_(char_type, "digit"), detail::std_ctype_digit }
  546. , { BOOST_XPR_CSTR_(char_type, "graph"), detail::std_ctype_graph }
  547. , { BOOST_XPR_CSTR_(char_type, "lower"), detail::std_ctype_lower }
  548. , { BOOST_XPR_CSTR_(char_type, "newline"),detail::non_std_ctype_newline }
  549. , { BOOST_XPR_CSTR_(char_type, "print"), detail::std_ctype_print }
  550. , { BOOST_XPR_CSTR_(char_type, "punct"), detail::std_ctype_punct }
  551. , { BOOST_XPR_CSTR_(char_type, "s"), detail::std_ctype_space }
  552. , { BOOST_XPR_CSTR_(char_type, "space"), detail::std_ctype_space }
  553. , { BOOST_XPR_CSTR_(char_type, "upper"), detail::std_ctype_upper }
  554. , { BOOST_XPR_CSTR_(char_type, "w"), detail::std_ctype_alnum | detail::non_std_ctype_underscore }
  555. , { BOOST_XPR_CSTR_(char_type, "xdigit"), detail::std_ctype_xdigit }
  556. , { 0, 0 }
  557. };
  558. return s_char_class_map[j];
  559. }
  560. ///////////////////////////////////////////////////////////////////////////////
  561. // lookup_classname_impl
  562. /// INTERNAL ONLY
  563. template<typename FwdIter>
  564. static char_class_type lookup_classname_impl_(FwdIter begin, FwdIter end)
  565. {
  566. // find the classname
  567. typedef cpp_regex_traits<Char> this_t;
  568. for(std::size_t j = 0; 0 != this_t::char_class(j).class_name_; ++j)
  569. {
  570. if(this_t::compare_(this_t::char_class(j).class_name_, begin, end))
  571. {
  572. return this_t::char_class(j).class_type_;
  573. }
  574. }
  575. return 0;
  576. }
  577. /// INTERNAL ONLY
  578. template<typename FwdIter>
  579. static bool compare_(char_type const *name, FwdIter begin, FwdIter end)
  580. {
  581. for(; *name && begin != end; ++name, ++begin)
  582. {
  583. if(*name != *begin)
  584. {
  585. return false;
  586. }
  587. }
  588. return !*name && begin == end;
  589. }
  590. locale_type loc_;
  591. std::ctype<char_type> const *ctype_;
  592. //std::collate<char_type> const *collate_;
  593. };
  594. ///////////////////////////////////////////////////////////////////////////////
  595. // cpp_regex_traits<>::hash specializations
  596. template<>
  597. inline unsigned char cpp_regex_traits<unsigned char>::hash(unsigned char ch)
  598. {
  599. return ch;
  600. }
  601. template<>
  602. inline unsigned char cpp_regex_traits<char>::hash(char ch)
  603. {
  604. return static_cast<unsigned char>(ch);
  605. }
  606. template<>
  607. inline unsigned char cpp_regex_traits<signed char>::hash(signed char ch)
  608. {
  609. return static_cast<unsigned char>(ch);
  610. }
  611. #ifndef BOOST_XPRESSIVE_NO_WREGEX
  612. template<>
  613. inline unsigned char cpp_regex_traits<wchar_t>::hash(wchar_t ch)
  614. {
  615. return static_cast<unsigned char>(ch);
  616. }
  617. #endif
  618. // Narrow C++ traits has fold_case() member function.
  619. template<>
  620. struct has_fold_case<cpp_regex_traits<char> >
  621. : mpl::true_
  622. {
  623. };
  624. }}
  625. #endif