regex_primitives.hpp 37 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927
  1. ///////////////////////////////////////////////////////////////////////////////
  2. /// \file regex_primitives.hpp
  3. /// Contains the syntax elements for writing static regular expressions.
  4. //
  5. // Copyright 2008 Eric Niebler. Distributed under the Boost
  6. // Software License, Version 1.0. (See accompanying file
  7. // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  8. #ifndef BOOST_XPRESSIVE_REGEX_PRIMITIVES_HPP_EAN_10_04_2005
  9. #define BOOST_XPRESSIVE_REGEX_PRIMITIVES_HPP_EAN_10_04_2005
  10. #include <vector>
  11. #include <climits>
  12. #include <boost/config.hpp>
  13. #include <boost/assert.hpp>
  14. #include <boost/mpl/if.hpp>
  15. #include <boost/mpl/and.hpp>
  16. #include <boost/mpl/assert.hpp>
  17. #include <boost/detail/workaround.hpp>
  18. #include <boost/preprocessor/cat.hpp>
  19. #include <boost/xpressive/detail/detail_fwd.hpp>
  20. #include <boost/xpressive/detail/core/matchers.hpp>
  21. #include <boost/xpressive/detail/core/regex_domain.hpp>
  22. #include <boost/xpressive/detail/utility/ignore_unused.hpp>
  23. // Doxygen can't handle proto :-(
  24. #ifndef BOOST_XPRESSIVE_DOXYGEN_INVOKED
  25. # include <boost/proto/core.hpp>
  26. # include <boost/proto/transform/arg.hpp>
  27. # include <boost/proto/transform/when.hpp>
  28. # include <boost/xpressive/detail/core/icase.hpp>
  29. # include <boost/xpressive/detail/static/compile.hpp>
  30. # include <boost/xpressive/detail/static/modifier.hpp>
  31. #endif
  32. namespace boost { namespace xpressive { namespace detail
  33. {
  34. typedef assert_word_placeholder<word_boundary<mpl::true_> > assert_word_boundary;
  35. typedef assert_word_placeholder<word_begin> assert_word_begin;
  36. typedef assert_word_placeholder<word_end> assert_word_end;
  37. // workaround msvc-7.1 bug with function pointer types
  38. // within function types:
  39. #if BOOST_WORKAROUND(BOOST_MSVC, == 1310)
  40. #define mark_number(x) proto::call<mark_number(x)>
  41. #define minus_one() proto::make<minus_one()>
  42. #endif
  43. struct push_back : proto::callable
  44. {
  45. typedef int result_type;
  46. template<typename Subs>
  47. int operator ()(Subs &subs, int i) const
  48. {
  49. subs.push_back(i);
  50. return i;
  51. }
  52. };
  53. struct mark_number : proto::callable
  54. {
  55. typedef int result_type;
  56. template<typename Expr>
  57. int operator ()(Expr const &expr) const
  58. {
  59. return expr.mark_number_;
  60. }
  61. };
  62. typedef mpl::int_<-1> minus_one;
  63. // s1 or -s1
  64. struct SubMatch
  65. : proto::or_<
  66. proto::when<basic_mark_tag, push_back(proto::_data, mark_number(proto::_value)) >
  67. , proto::when<proto::negate<basic_mark_tag>, push_back(proto::_data, minus_one()) >
  68. >
  69. {};
  70. struct SubMatchList
  71. : proto::or_<SubMatch, proto::comma<SubMatchList, SubMatch> >
  72. {};
  73. template<typename Subs>
  74. typename enable_if<
  75. mpl::and_<proto::is_expr<Subs>, proto::matches<Subs, SubMatchList> >
  76. , std::vector<int>
  77. >::type
  78. to_vector(Subs const &subs)
  79. {
  80. std::vector<int> subs_;
  81. SubMatchList()(subs, 0, subs_);
  82. return subs_;
  83. }
  84. #if BOOST_WORKAROUND(BOOST_MSVC, == 1310)
  85. #undef mark_number
  86. #undef minus_one
  87. #endif
  88. // replace "Expr" with "keep(*State) >> Expr"
  89. struct skip_primitives : proto::transform<skip_primitives>
  90. {
  91. template<typename Expr, typename State, typename Data>
  92. struct impl : proto::transform_impl<Expr, State, Data>
  93. {
  94. typedef
  95. typename proto::shift_right<
  96. typename proto::unary_expr<
  97. keeper_tag
  98. , typename proto::dereference<State>::type
  99. >::type
  100. , Expr
  101. >::type
  102. result_type;
  103. result_type operator ()(
  104. typename impl::expr_param expr
  105. , typename impl::state_param state
  106. , typename impl::data_param
  107. ) const
  108. {
  109. result_type that = {{{state}}, expr};
  110. return that;
  111. }
  112. };
  113. };
  114. struct Primitives
  115. : proto::or_<
  116. proto::terminal<proto::_>
  117. , proto::comma<proto::_, proto::_>
  118. , proto::subscript<proto::terminal<set_initializer>, proto::_>
  119. , proto::assign<proto::terminal<set_initializer>, proto::_>
  120. , proto::assign<proto::terminal<attribute_placeholder<proto::_> >, proto::_>
  121. , proto::complement<Primitives>
  122. >
  123. {};
  124. struct SkipGrammar
  125. : proto::or_<
  126. proto::when<Primitives, skip_primitives>
  127. , proto::assign<proto::terminal<mark_placeholder>, SkipGrammar> // don't "skip" mark tags
  128. , proto::subscript<SkipGrammar, proto::_> // don't put skips in actions
  129. , proto::binary_expr<modifier_tag, proto::_, SkipGrammar> // don't skip modifiers
  130. , proto::unary_expr<lookbehind_tag, proto::_> // don't skip lookbehinds
  131. , proto::nary_expr<proto::_, proto::vararg<SkipGrammar> > // everything else is fair game!
  132. >
  133. {};
  134. template<typename Skip>
  135. struct skip_directive
  136. {
  137. typedef typename proto::result_of::as_expr<Skip>::type skip_type;
  138. skip_directive(Skip const &skip)
  139. : skip_(proto::as_expr(skip))
  140. {}
  141. template<typename Sig>
  142. struct result {};
  143. template<typename This, typename Expr>
  144. struct result<This(Expr)>
  145. {
  146. typedef
  147. SkipGrammar::impl<
  148. typename proto::result_of::as_expr<Expr>::type
  149. , skip_type const &
  150. , mpl::void_ &
  151. >
  152. skip_transform;
  153. typedef
  154. typename proto::shift_right<
  155. typename skip_transform::result_type
  156. , typename proto::dereference<skip_type>::type
  157. >::type
  158. type;
  159. };
  160. template<typename Expr>
  161. typename result<skip_directive(Expr)>::type
  162. operator ()(Expr const &expr) const
  163. {
  164. mpl::void_ ignore;
  165. typedef result<skip_directive(Expr)> result_fun;
  166. typename result_fun::type that = {
  167. typename result_fun::skip_transform()(proto::as_expr(expr), this->skip_, ignore)
  168. , {skip_}
  169. };
  170. return that;
  171. }
  172. private:
  173. skip_type skip_;
  174. };
  175. /*
  176. ///////////////////////////////////////////////////////////////////////////////
  177. /// INTERNAL ONLY
  178. // BOOST_XPRESSIVE_GLOBAL
  179. // for defining globals that neither violate the One Definition Rule nor
  180. // lead to undefined behavior due to global object initialization order.
  181. //#define BOOST_XPRESSIVE_GLOBAL(type, name, init) \
  182. // namespace detail \
  183. // { \
  184. // template<int Dummy> \
  185. // struct BOOST_PP_CAT(global_pod_, name) \
  186. // { \
  187. // static type const value; \
  188. // private: \
  189. // union type_must_be_pod \
  190. // { \
  191. // type t; \
  192. // char ch; \
  193. // } u; \
  194. // }; \
  195. // template<int Dummy> \
  196. // type const BOOST_PP_CAT(global_pod_, name)<Dummy>::value = init; \
  197. // } \
  198. // type const &name = detail::BOOST_PP_CAT(global_pod_, name)<0>::value
  199. */
  200. } // namespace detail
  201. /// INTERNAL ONLY (for backwards compatibility)
  202. unsigned int const repeat_max = UINT_MAX-1;
  203. ///////////////////////////////////////////////////////////////////////////////
  204. /// \brief For infinite repetition of a sub-expression.
  205. ///
  206. /// Magic value used with the repeat\<\>() function template
  207. /// to specify an unbounded repeat. Use as: repeat<17, inf>('a').
  208. /// The equivalent in perl is /a{17,}/.
  209. unsigned int const inf = UINT_MAX-1;
  210. /// INTERNAL ONLY (for backwards compatibility)
  211. proto::terminal<detail::epsilon_matcher>::type const epsilon = {{}};
  212. ///////////////////////////////////////////////////////////////////////////////
  213. /// \brief Successfully matches nothing.
  214. ///
  215. /// Successfully matches a zero-width sequence. nil always succeeds and
  216. /// never consumes any characters.
  217. proto::terminal<detail::epsilon_matcher>::type const nil = {{}};
  218. ///////////////////////////////////////////////////////////////////////////////
  219. /// \brief Matches an alpha-numeric character.
  220. ///
  221. /// The regex traits are used to determine which characters are alpha-numeric.
  222. /// To match any character that is not alpha-numeric, use ~alnum.
  223. ///
  224. /// \attention alnum is equivalent to /[[:alnum:]]/ in perl. ~alnum is equivalent
  225. /// to /[[:^alnum:]]/ in perl.
  226. proto::terminal<detail::posix_charset_placeholder>::type const alnum = {{"alnum", false}};
  227. ///////////////////////////////////////////////////////////////////////////////
  228. /// \brief Matches an alphabetic character.
  229. ///
  230. /// The regex traits are used to determine which characters are alphabetic.
  231. /// To match any character that is not alphabetic, use ~alpha.
  232. ///
  233. /// \attention alpha is equivalent to /[[:alpha:]]/ in perl. ~alpha is equivalent
  234. /// to /[[:^alpha:]]/ in perl.
  235. proto::terminal<detail::posix_charset_placeholder>::type const alpha = {{"alpha", false}};
  236. ///////////////////////////////////////////////////////////////////////////////
  237. /// \brief Matches a blank (horizonal white-space) character.
  238. ///
  239. /// The regex traits are used to determine which characters are blank characters.
  240. /// To match any character that is not blank, use ~blank.
  241. ///
  242. /// \attention blank is equivalent to /[[:blank:]]/ in perl. ~blank is equivalent
  243. /// to /[[:^blank:]]/ in perl.
  244. proto::terminal<detail::posix_charset_placeholder>::type const blank = {{"blank", false}};
  245. ///////////////////////////////////////////////////////////////////////////////
  246. /// \brief Matches a control character.
  247. ///
  248. /// The regex traits are used to determine which characters are control characters.
  249. /// To match any character that is not a control character, use ~cntrl.
  250. ///
  251. /// \attention cntrl is equivalent to /[[:cntrl:]]/ in perl. ~cntrl is equivalent
  252. /// to /[[:^cntrl:]]/ in perl.
  253. proto::terminal<detail::posix_charset_placeholder>::type const cntrl = {{"cntrl", false}};
  254. ///////////////////////////////////////////////////////////////////////////////
  255. /// \brief Matches a digit character.
  256. ///
  257. /// The regex traits are used to determine which characters are digits.
  258. /// To match any character that is not a digit, use ~digit.
  259. ///
  260. /// \attention digit is equivalent to /[[:digit:]]/ in perl. ~digit is equivalent
  261. /// to /[[:^digit:]]/ in perl.
  262. proto::terminal<detail::posix_charset_placeholder>::type const digit = {{"digit", false}};
  263. ///////////////////////////////////////////////////////////////////////////////
  264. /// \brief Matches a graph character.
  265. ///
  266. /// The regex traits are used to determine which characters are graphable.
  267. /// To match any character that is not graphable, use ~graph.
  268. ///
  269. /// \attention graph is equivalent to /[[:graph:]]/ in perl. ~graph is equivalent
  270. /// to /[[:^graph:]]/ in perl.
  271. proto::terminal<detail::posix_charset_placeholder>::type const graph = {{"graph", false}};
  272. ///////////////////////////////////////////////////////////////////////////////
  273. /// \brief Matches a lower-case character.
  274. ///
  275. /// The regex traits are used to determine which characters are lower-case.
  276. /// To match any character that is not a lower-case character, use ~lower.
  277. ///
  278. /// \attention lower is equivalent to /[[:lower:]]/ in perl. ~lower is equivalent
  279. /// to /[[:^lower:]]/ in perl.
  280. proto::terminal<detail::posix_charset_placeholder>::type const lower = {{"lower", false}};
  281. ///////////////////////////////////////////////////////////////////////////////
  282. /// \brief Matches a printable character.
  283. ///
  284. /// The regex traits are used to determine which characters are printable.
  285. /// To match any character that is not printable, use ~print.
  286. ///
  287. /// \attention print is equivalent to /[[:print:]]/ in perl. ~print is equivalent
  288. /// to /[[:^print:]]/ in perl.
  289. proto::terminal<detail::posix_charset_placeholder>::type const print = {{"print", false}};
  290. ///////////////////////////////////////////////////////////////////////////////
  291. /// \brief Matches a punctuation character.
  292. ///
  293. /// The regex traits are used to determine which characters are punctuation.
  294. /// To match any character that is not punctuation, use ~punct.
  295. ///
  296. /// \attention punct is equivalent to /[[:punct:]]/ in perl. ~punct is equivalent
  297. /// to /[[:^punct:]]/ in perl.
  298. proto::terminal<detail::posix_charset_placeholder>::type const punct = {{"punct", false}};
  299. ///////////////////////////////////////////////////////////////////////////////
  300. /// \brief Matches a space character.
  301. ///
  302. /// The regex traits are used to determine which characters are space characters.
  303. /// To match any character that is not white-space, use ~space.
  304. ///
  305. /// \attention space is equivalent to /[[:space:]]/ in perl. ~space is equivalent
  306. /// to /[[:^space:]]/ in perl.
  307. proto::terminal<detail::posix_charset_placeholder>::type const space = {{"space", false}};
  308. ///////////////////////////////////////////////////////////////////////////////
  309. /// \brief Matches an upper-case character.
  310. ///
  311. /// The regex traits are used to determine which characters are upper-case.
  312. /// To match any character that is not upper-case, use ~upper.
  313. ///
  314. /// \attention upper is equivalent to /[[:upper:]]/ in perl. ~upper is equivalent
  315. /// to /[[:^upper:]]/ in perl.
  316. proto::terminal<detail::posix_charset_placeholder>::type const upper = {{"upper", false}};
  317. ///////////////////////////////////////////////////////////////////////////////
  318. /// \brief Matches a hexadecimal digit character.
  319. ///
  320. /// The regex traits are used to determine which characters are hex digits.
  321. /// To match any character that is not a hex digit, use ~xdigit.
  322. ///
  323. /// \attention xdigit is equivalent to /[[:xdigit:]]/ in perl. ~xdigit is equivalent
  324. /// to /[[:^xdigit:]]/ in perl.
  325. proto::terminal<detail::posix_charset_placeholder>::type const xdigit = {{"xdigit", false}};
  326. ///////////////////////////////////////////////////////////////////////////////
  327. /// \brief Beginning of sequence assertion.
  328. ///
  329. /// For the character sequence [begin, end), 'bos' matches the
  330. /// zero-width sub-sequence [begin, begin).
  331. proto::terminal<detail::assert_bos_matcher>::type const bos = {{}};
  332. ///////////////////////////////////////////////////////////////////////////////
  333. /// \brief End of sequence assertion.
  334. ///
  335. /// For the character sequence [begin, end),
  336. /// 'eos' matches the zero-width sub-sequence [end, end).
  337. ///
  338. /// \attention Unlike the perl end of sequence assertion \$, 'eos' will
  339. /// not match at the position [end-1, end-1) if *(end-1) is '\\n'. To
  340. /// get that behavior, use (!_n >> eos).
  341. proto::terminal<detail::assert_eos_matcher>::type const eos = {{}};
  342. ///////////////////////////////////////////////////////////////////////////////
  343. /// \brief Beginning of line assertion.
  344. ///
  345. /// 'bol' matches the zero-width sub-sequence
  346. /// immediately following a logical newline sequence. The regex traits
  347. /// is used to determine what constitutes a logical newline sequence.
  348. proto::terminal<detail::assert_bol_placeholder>::type const bol = {{}};
  349. ///////////////////////////////////////////////////////////////////////////////
  350. /// \brief End of line assertion.
  351. ///
  352. /// 'eol' matches the zero-width sub-sequence
  353. /// immediately preceeding a logical newline sequence. The regex traits
  354. /// is used to determine what constitutes a logical newline sequence.
  355. proto::terminal<detail::assert_eol_placeholder>::type const eol = {{}};
  356. ///////////////////////////////////////////////////////////////////////////////
  357. /// \brief Beginning of word assertion.
  358. ///
  359. /// 'bow' matches the zero-width sub-sequence
  360. /// immediately following a non-word character and preceeding a word character.
  361. /// The regex traits are used to determine what constitutes a word character.
  362. proto::terminal<detail::assert_word_begin>::type const bow = {{}};
  363. ///////////////////////////////////////////////////////////////////////////////
  364. /// \brief End of word assertion.
  365. ///
  366. /// 'eow' matches the zero-width sub-sequence
  367. /// immediately following a word character and preceeding a non-word character.
  368. /// The regex traits are used to determine what constitutes a word character.
  369. proto::terminal<detail::assert_word_end>::type const eow = {{}};
  370. ///////////////////////////////////////////////////////////////////////////////
  371. /// \brief Word boundary assertion.
  372. ///
  373. /// '_b' matches the zero-width sub-sequence at the beginning or the end of a word.
  374. /// It is equivalent to (bow | eow). The regex traits are used to determine what
  375. /// constitutes a word character. To match a non-word boundary, use ~_b.
  376. ///
  377. /// \attention _b is like \\b in perl. ~_b is like \\B in perl.
  378. proto::terminal<detail::assert_word_boundary>::type const _b = {{}};
  379. ///////////////////////////////////////////////////////////////////////////////
  380. /// \brief Matches a word character.
  381. ///
  382. /// '_w' matches a single word character. The regex traits are used to determine which
  383. /// characters are word characters. Use ~_w to match a character that is not a word
  384. /// character.
  385. ///
  386. /// \attention _w is like \\w in perl. ~_w is like \\W in perl.
  387. proto::terminal<detail::posix_charset_placeholder>::type const _w = {{"w", false}};
  388. ///////////////////////////////////////////////////////////////////////////////
  389. /// \brief Matches a digit character.
  390. ///
  391. /// '_d' matches a single digit character. The regex traits are used to determine which
  392. /// characters are digits. Use ~_d to match a character that is not a digit
  393. /// character.
  394. ///
  395. /// \attention _d is like \\d in perl. ~_d is like \\D in perl.
  396. proto::terminal<detail::posix_charset_placeholder>::type const _d = {{"d", false}};
  397. ///////////////////////////////////////////////////////////////////////////////
  398. /// \brief Matches a space character.
  399. ///
  400. /// '_s' matches a single space character. The regex traits are used to determine which
  401. /// characters are space characters. Use ~_s to match a character that is not a space
  402. /// character.
  403. ///
  404. /// \attention _s is like \\s in perl. ~_s is like \\S in perl.
  405. proto::terminal<detail::posix_charset_placeholder>::type const _s = {{"s", false}};
  406. ///////////////////////////////////////////////////////////////////////////////
  407. /// \brief Matches a literal newline character, '\\n'.
  408. ///
  409. /// '_n' matches a single newline character, '\\n'. Use ~_n to match a character
  410. /// that is not a newline.
  411. ///
  412. /// \attention ~_n is like '.' in perl without the /s modifier.
  413. proto::terminal<char>::type const _n = {'\n'};
  414. ///////////////////////////////////////////////////////////////////////////////
  415. /// \brief Matches a logical newline sequence.
  416. ///
  417. /// '_ln' matches a logical newline sequence. This can be any character in the
  418. /// line separator class, as determined by the regex traits, or the '\\r\\n' sequence.
  419. /// For the purpose of back-tracking, '\\r\\n' is treated as a unit.
  420. /// To match any one character that is not a logical newline, use ~_ln.
  421. detail::logical_newline_xpression const _ln = {{}};
  422. ///////////////////////////////////////////////////////////////////////////////
  423. /// \brief Matches any one character.
  424. ///
  425. /// Match any character, similar to '.' in perl syntax with the /s modifier.
  426. /// '_' matches any one character, including the newline.
  427. ///
  428. /// \attention To match any character except the newline, use ~_n
  429. proto::terminal<detail::any_matcher>::type const _ = {{}};
  430. ///////////////////////////////////////////////////////////////////////////////
  431. /// \brief Reference to the current regex object
  432. ///
  433. /// Useful when constructing recursive regular expression objects. The 'self'
  434. /// identifier is a short-hand for the current regex object. For instance,
  435. /// sregex rx = '(' >> (self | nil) >> ')'; will create a regex object that
  436. /// matches balanced parens such as "((()))".
  437. proto::terminal<detail::self_placeholder>::type const self = {{}};
  438. ///////////////////////////////////////////////////////////////////////////////
  439. /// \brief Used to create character sets.
  440. ///
  441. /// There are two ways to create character sets with the 'set' identifier. The
  442. /// easiest is to create a comma-separated list of the characters in the set,
  443. /// as in (set= 'a','b','c'). This set will match 'a', 'b', or 'c'. The other
  444. /// way is to define the set as an argument to the set subscript operator.
  445. /// For instance, set[ 'a' | range('b','c') | digit ] will match an 'a', 'b',
  446. /// 'c' or a digit character.
  447. ///
  448. /// To complement a set, apply the '~' operator. For instance, ~(set= 'a','b','c')
  449. /// will match any character that is not an 'a', 'b', or 'c'.
  450. ///
  451. /// Sets can be composed of other, possibly complemented, sets. For instance,
  452. /// set[ ~digit | ~(set= 'a','b','c') ].
  453. detail::set_initializer_type const set = {{}};
  454. ///////////////////////////////////////////////////////////////////////////////
  455. /// \brief Sub-match placeholder type, used to create named captures in
  456. /// static regexes.
  457. ///
  458. /// \c mark_tag is the type of the global sub-match placeholders \c s0, \c s1, etc.. You
  459. /// can use the \c mark_tag type to create your own sub-match placeholders with
  460. /// more meaningful names. This is roughly equivalent to the "named capture"
  461. /// feature of dynamic regular expressions.
  462. ///
  463. /// To create a named sub-match placeholder, initialize it with a unique integer.
  464. /// The integer must only be unique within the regex in which the placeholder
  465. /// is used. Then you can use it within static regexes to created sub-matches
  466. /// by assigning a sub-expression to it, or to refer back to already created
  467. /// sub-matches.
  468. ///
  469. /// \code
  470. /// mark_tag number(1); // "number" is now equivalent to "s1"
  471. /// // Match a number, followed by a space and the same number again
  472. /// sregex rx = (number = +_d) >> ' ' >> number;
  473. /// \endcode
  474. ///
  475. /// After a successful \c regex_match() or \c regex_search(), the sub-match placeholder
  476. /// can be used to index into the <tt>match_results\<\></tt> object to retrieve the
  477. /// corresponding sub-match.
  478. struct mark_tag
  479. : proto::extends<detail::basic_mark_tag, mark_tag, detail::regex_domain>
  480. {
  481. private:
  482. typedef proto::extends<detail::basic_mark_tag, mark_tag, detail::regex_domain> base_type;
  483. static detail::basic_mark_tag make_tag(int mark_nbr)
  484. {
  485. detail::basic_mark_tag mark = {{mark_nbr}};
  486. return mark;
  487. }
  488. public:
  489. /// \brief Initialize a mark_tag placeholder
  490. /// \param mark_nbr An integer that uniquely identifies this \c mark_tag
  491. /// within the static regexes in which this \c mark_tag will be used.
  492. /// \pre <tt>mark_nbr \> 0</tt>
  493. mark_tag(int mark_nbr)
  494. : base_type(mark_tag::make_tag(mark_nbr))
  495. {
  496. // Marks numbers must be integers greater than 0.
  497. BOOST_ASSERT(mark_nbr > 0);
  498. }
  499. /// INTERNAL ONLY
  500. operator detail::basic_mark_tag const &() const
  501. {
  502. return this->proto_base();
  503. }
  504. BOOST_PROTO_EXTENDS_USING_ASSIGN_NON_DEPENDENT(mark_tag)
  505. };
  506. // This macro is used when declaring mark_tags that are global because
  507. // it guarantees that they are statically initialized. That avoids
  508. // order-of-initialization bugs. In user code, the simpler: mark_tag s0(0);
  509. // would be preferable.
  510. /// INTERNAL ONLY
  511. #define BOOST_XPRESSIVE_GLOBAL_MARK_TAG(NAME, VALUE) \
  512. boost::xpressive::mark_tag::proto_base_expr const NAME = {{VALUE}} \
  513. /**/
  514. ///////////////////////////////////////////////////////////////////////////////
  515. /// \brief Sub-match placeholder, like $& in Perl
  516. BOOST_XPRESSIVE_GLOBAL_MARK_TAG(s0, 0);
  517. ///////////////////////////////////////////////////////////////////////////////
  518. /// \brief Sub-match placeholder, like $1 in perl.
  519. ///
  520. /// To create a sub-match, assign a sub-expression to the sub-match placeholder.
  521. /// For instance, (s1= _) will match any one character and remember which
  522. /// character was matched in the 1st sub-match. Later in the pattern, you can
  523. /// refer back to the sub-match. For instance, (s1= _) >> s1 will match any
  524. /// character, and then match the same character again.
  525. ///
  526. /// After a successful regex_match() or regex_search(), the sub-match placeholders
  527. /// can be used to index into the match_results\<\> object to retrieve the Nth
  528. /// sub-match.
  529. BOOST_XPRESSIVE_GLOBAL_MARK_TAG(s1, 1);
  530. BOOST_XPRESSIVE_GLOBAL_MARK_TAG(s2, 2);
  531. BOOST_XPRESSIVE_GLOBAL_MARK_TAG(s3, 3);
  532. BOOST_XPRESSIVE_GLOBAL_MARK_TAG(s4, 4);
  533. BOOST_XPRESSIVE_GLOBAL_MARK_TAG(s5, 5);
  534. BOOST_XPRESSIVE_GLOBAL_MARK_TAG(s6, 6);
  535. BOOST_XPRESSIVE_GLOBAL_MARK_TAG(s7, 7);
  536. BOOST_XPRESSIVE_GLOBAL_MARK_TAG(s8, 8);
  537. BOOST_XPRESSIVE_GLOBAL_MARK_TAG(s9, 9);
  538. // NOTE: For the purpose of xpressive's documentation, make icase() look like an
  539. // ordinary function. In reality, it is a function object defined in detail/icase.hpp
  540. // so that it can serve double-duty as regex_constants::icase, the syntax_option_type.
  541. #ifdef BOOST_XPRESSIVE_DOXYGEN_INVOKED
  542. ///////////////////////////////////////////////////////////////////////////////
  543. /// \brief Makes a sub-expression case-insensitive.
  544. ///
  545. /// Use icase() to make a sub-expression case-insensitive. For instance,
  546. /// "foo" >> icase(set['b'] >> "ar") will match "foo" exactly followed by
  547. /// "bar" irrespective of case.
  548. template<typename Expr> detail::unspecified icase(Expr const &expr) { return 0; }
  549. #endif
  550. ///////////////////////////////////////////////////////////////////////////////
  551. /// \brief Makes a literal into a regular expression.
  552. ///
  553. /// Use as_xpr() to turn a literal into a regular expression. For instance,
  554. /// "foo" >> "bar" will not compile because both operands to the right-shift
  555. /// operator are const char*, and no such operator exists. Use as_xpr("foo") >> "bar"
  556. /// instead.
  557. ///
  558. /// You can use as_xpr() with character literals in addition to string literals.
  559. /// For instance, as_xpr('a') will match an 'a'. You can also complement a
  560. /// character literal, as with ~as_xpr('a'). This will match any one character
  561. /// that is not an 'a'.
  562. #ifdef BOOST_XPRESSIVE_DOXYGEN_INVOKED
  563. template<typename Literal> detail::unspecified as_xpr(Literal const &literal) { return 0; }
  564. #else
  565. proto::functional::as_expr<> const as_xpr = {};
  566. #endif
  567. ///////////////////////////////////////////////////////////////////////////////
  568. /// \brief Embed a regex object by reference.
  569. ///
  570. /// \param rex The basic_regex object to embed by reference.
  571. template<typename BidiIter>
  572. inline typename proto::terminal<reference_wrapper<basic_regex<BidiIter> const> >::type const
  573. by_ref(basic_regex<BidiIter> const &rex)
  574. {
  575. reference_wrapper<basic_regex<BidiIter> const> ref(rex);
  576. return proto::terminal<reference_wrapper<basic_regex<BidiIter> const> >::type::make(ref);
  577. }
  578. ///////////////////////////////////////////////////////////////////////////////
  579. /// \brief Match a range of characters.
  580. ///
  581. /// Match any character in the range [ch_min, ch_max].
  582. ///
  583. /// \param ch_min The lower end of the range to match.
  584. /// \param ch_max The upper end of the range to match.
  585. template<typename Char>
  586. inline typename proto::terminal<detail::range_placeholder<Char> >::type const
  587. range(Char ch_min, Char ch_max)
  588. {
  589. detail::range_placeholder<Char> that = {ch_min, ch_max, false};
  590. return proto::terminal<detail::range_placeholder<Char> >::type::make(that);
  591. }
  592. ///////////////////////////////////////////////////////////////////////////////
  593. /// \brief Make a sub-expression optional. Equivalent to !as_xpr(expr).
  594. ///
  595. /// \param expr The sub-expression to make optional.
  596. template<typename Expr>
  597. typename proto::result_of::make_expr<
  598. proto::tag::logical_not
  599. , proto::default_domain
  600. , Expr const &
  601. >::type const
  602. optional(Expr const &expr)
  603. {
  604. return proto::make_expr<
  605. proto::tag::logical_not
  606. , proto::default_domain
  607. >(boost::ref(expr));
  608. }
  609. ///////////////////////////////////////////////////////////////////////////////
  610. /// \brief Repeat a sub-expression multiple times.
  611. ///
  612. /// There are two forms of the repeat\<\>() function template. To match a
  613. /// sub-expression N times, use repeat\<N\>(expr). To match a sub-expression
  614. /// from M to N times, use repeat\<M,N\>(expr).
  615. ///
  616. /// The repeat\<\>() function creates a greedy quantifier. To make the quantifier
  617. /// non-greedy, apply the unary minus operator, as in -repeat\<M,N\>(expr).
  618. ///
  619. /// \param expr The sub-expression to repeat.
  620. template<unsigned int Min, unsigned int Max, typename Expr>
  621. typename proto::result_of::make_expr<
  622. detail::generic_quant_tag<Min, Max>
  623. , proto::default_domain
  624. , Expr const &
  625. >::type const
  626. repeat(Expr const &expr)
  627. {
  628. return proto::make_expr<
  629. detail::generic_quant_tag<Min, Max>
  630. , proto::default_domain
  631. >(boost::ref(expr));
  632. }
  633. /// \overload
  634. ///
  635. template<unsigned int Count, typename Expr2>
  636. typename proto::result_of::make_expr<
  637. detail::generic_quant_tag<Count, Count>
  638. , proto::default_domain
  639. , Expr2 const &
  640. >::type const
  641. repeat(Expr2 const &expr2)
  642. {
  643. return proto::make_expr<
  644. detail::generic_quant_tag<Count, Count>
  645. , proto::default_domain
  646. >(boost::ref(expr2));
  647. }
  648. ///////////////////////////////////////////////////////////////////////////////
  649. /// \brief Create an independent sub-expression.
  650. ///
  651. /// Turn off back-tracking for a sub-expression. Any branches or repeats within
  652. /// the sub-expression will match only one way, and no other alternatives are
  653. /// tried.
  654. ///
  655. /// \attention keep(expr) is equivalent to the perl (?>...) extension.
  656. ///
  657. /// \param expr The sub-expression to modify.
  658. template<typename Expr>
  659. typename proto::result_of::make_expr<
  660. detail::keeper_tag
  661. , proto::default_domain
  662. , Expr const &
  663. >::type const
  664. keep(Expr const &expr)
  665. {
  666. return proto::make_expr<
  667. detail::keeper_tag
  668. , proto::default_domain
  669. >(boost::ref(expr));
  670. }
  671. ///////////////////////////////////////////////////////////////////////////////
  672. /// \brief Look-ahead assertion.
  673. ///
  674. /// before(expr) succeeds if the expr sub-expression would match at the current
  675. /// position in the sequence, but expr is not included in the match. For instance,
  676. /// before("foo") succeeds if we are before a "foo". Look-ahead assertions can be
  677. /// negated with the bit-compliment operator.
  678. ///
  679. /// \attention before(expr) is equivalent to the perl (?=...) extension.
  680. /// ~before(expr) is a negative look-ahead assertion, equivalent to the
  681. /// perl (?!...) extension.
  682. ///
  683. /// \param expr The sub-expression to put in the look-ahead assertion.
  684. template<typename Expr>
  685. typename proto::result_of::make_expr<
  686. detail::lookahead_tag
  687. , proto::default_domain
  688. , Expr const &
  689. >::type const
  690. before(Expr const &expr)
  691. {
  692. return proto::make_expr<
  693. detail::lookahead_tag
  694. , proto::default_domain
  695. >(boost::ref(expr));
  696. }
  697. ///////////////////////////////////////////////////////////////////////////////
  698. /// \brief Look-behind assertion.
  699. ///
  700. /// after(expr) succeeds if the expr sub-expression would match at the current
  701. /// position minus N in the sequence, where N is the width of expr. expr is not included in
  702. /// the match. For instance, after("foo") succeeds if we are after a "foo". Look-behind
  703. /// assertions can be negated with the bit-complement operator.
  704. ///
  705. /// \attention after(expr) is equivalent to the perl (?<=...) extension.
  706. /// ~after(expr) is a negative look-behind assertion, equivalent to the
  707. /// perl (?<!...) extension.
  708. ///
  709. /// \param expr The sub-expression to put in the look-ahead assertion.
  710. ///
  711. /// \pre expr cannot match a variable number of characters.
  712. template<typename Expr>
  713. typename proto::result_of::make_expr<
  714. detail::lookbehind_tag
  715. , proto::default_domain
  716. , Expr const &
  717. >::type const
  718. after(Expr const &expr)
  719. {
  720. return proto::make_expr<
  721. detail::lookbehind_tag
  722. , proto::default_domain
  723. >(boost::ref(expr));
  724. }
  725. ///////////////////////////////////////////////////////////////////////////////
  726. /// \brief Specify a regex traits or a std::locale.
  727. ///
  728. /// imbue() instructs the regex engine to use the specified traits or locale
  729. /// when matching the regex. The entire expression must use the same traits/locale.
  730. /// For instance, the following specifies a locale for use with a regex:
  731. /// std::locale loc;
  732. /// sregex rx = imbue(loc)(+digit);
  733. ///
  734. /// \param loc The std::locale or regex traits object.
  735. template<typename Locale>
  736. inline detail::modifier_op<detail::locale_modifier<Locale> > const
  737. imbue(Locale const &loc)
  738. {
  739. detail::modifier_op<detail::locale_modifier<Locale> > mod =
  740. {
  741. detail::locale_modifier<Locale>(loc)
  742. , regex_constants::ECMAScript
  743. };
  744. return mod;
  745. }
  746. proto::terminal<detail::attribute_placeholder<mpl::int_<1> > >::type const a1 = {{}};
  747. proto::terminal<detail::attribute_placeholder<mpl::int_<2> > >::type const a2 = {{}};
  748. proto::terminal<detail::attribute_placeholder<mpl::int_<3> > >::type const a3 = {{}};
  749. proto::terminal<detail::attribute_placeholder<mpl::int_<4> > >::type const a4 = {{}};
  750. proto::terminal<detail::attribute_placeholder<mpl::int_<5> > >::type const a5 = {{}};
  751. proto::terminal<detail::attribute_placeholder<mpl::int_<6> > >::type const a6 = {{}};
  752. proto::terminal<detail::attribute_placeholder<mpl::int_<7> > >::type const a7 = {{}};
  753. proto::terminal<detail::attribute_placeholder<mpl::int_<8> > >::type const a8 = {{}};
  754. proto::terminal<detail::attribute_placeholder<mpl::int_<9> > >::type const a9 = {{}};
  755. ///////////////////////////////////////////////////////////////////////////////
  756. /// \brief Specify which characters to skip when matching a regex.
  757. ///
  758. /// <tt>skip()</tt> instructs the regex engine to skip certain characters when matching
  759. /// a regex. It is most useful for writing regexes that ignore whitespace.
  760. /// For instance, the following specifies a regex that skips whitespace and
  761. /// punctuation:
  762. ///
  763. /// \code
  764. /// // A sentence is one or more words separated by whitespace
  765. /// // and punctuation.
  766. /// sregex word = +alpha;
  767. /// sregex sentence = skip(set[_s | punct])( +word );
  768. /// \endcode
  769. ///
  770. /// The way it works in the above example is to insert
  771. /// <tt>keep(*set[_s | punct])</tt> before each primitive within the regex.
  772. /// A "primitive" includes terminals like strings, character sets and nested
  773. /// regexes. A final <tt>*set[_s | punct]</tt> is added to the end of the
  774. /// regex. The regex <tt>sentence</tt> specified above is equivalent to
  775. /// the following:
  776. ///
  777. /// \code
  778. /// sregex sentence = +( keep(*set[_s | punct]) >> word )
  779. /// >> *set[_s | punct];
  780. /// \endcode
  781. ///
  782. /// \attention Skipping does not affect how nested regexes are handled because
  783. /// they are treated atomically. String literals are also treated
  784. /// atomically; that is, no skipping is done within a string literal. So
  785. /// <tt>skip(_s)("this that")</tt> is not the same as
  786. /// <tt>skip(_s)("this" >> as_xpr("that"))</tt>. The first will only match
  787. /// when there is only one space between "this" and "that". The second will
  788. /// skip any and all whitespace between "this" and "that".
  789. ///
  790. /// \param skip A regex that specifies which characters to skip.
  791. template<typename Skip>
  792. detail::skip_directive<Skip> skip(Skip const &skip)
  793. {
  794. return detail::skip_directive<Skip>(skip);
  795. }
  796. namespace detail
  797. {
  798. inline void ignore_unused_regex_primitives()
  799. {
  800. detail::ignore_unused(repeat_max);
  801. detail::ignore_unused(inf);
  802. detail::ignore_unused(epsilon);
  803. detail::ignore_unused(nil);
  804. detail::ignore_unused(alnum);
  805. detail::ignore_unused(bos);
  806. detail::ignore_unused(eos);
  807. detail::ignore_unused(bol);
  808. detail::ignore_unused(eol);
  809. detail::ignore_unused(bow);
  810. detail::ignore_unused(eow);
  811. detail::ignore_unused(_b);
  812. detail::ignore_unused(_w);
  813. detail::ignore_unused(_d);
  814. detail::ignore_unused(_s);
  815. detail::ignore_unused(_n);
  816. detail::ignore_unused(_ln);
  817. detail::ignore_unused(_);
  818. detail::ignore_unused(self);
  819. detail::ignore_unused(set);
  820. detail::ignore_unused(s0);
  821. detail::ignore_unused(s1);
  822. detail::ignore_unused(s2);
  823. detail::ignore_unused(s3);
  824. detail::ignore_unused(s4);
  825. detail::ignore_unused(s5);
  826. detail::ignore_unused(s6);
  827. detail::ignore_unused(s7);
  828. detail::ignore_unused(s8);
  829. detail::ignore_unused(s9);
  830. detail::ignore_unused(a1);
  831. detail::ignore_unused(a2);
  832. detail::ignore_unused(a3);
  833. detail::ignore_unused(a4);
  834. detail::ignore_unused(a5);
  835. detail::ignore_unused(a6);
  836. detail::ignore_unused(a7);
  837. detail::ignore_unused(a8);
  838. detail::ignore_unused(a9);
  839. detail::ignore_unused(as_xpr);
  840. }
  841. }
  842. }} // namespace boost::xpressive
  843. #endif