xlex_lexer.hpp 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588
  1. /*=============================================================================
  2. Boost.Wave: A Standard compliant C++ preprocessor library
  3. Xpressive based C++ lexer
  4. http://www.boost.org/
  5. Copyright (c) 2001-2010 Hartmut Kaiser. Distributed under the Boost
  6. Software License, Version 1.0. (See accompanying file
  7. LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  8. =============================================================================*/
  9. #if !defined(XLEX_LEXER_HPP)
  10. #define XLEX_LEXER_HPP
  11. #include <string>
  12. #include <cstdio>
  13. #include <cstdarg>
  14. #if defined(BOOST_SPIRIT_DEBUG)
  15. #include <iostream>
  16. #endif // defined(BOOST_SPIRIT_DEBUG)
  17. #include <boost/concept_check.hpp>
  18. #include <boost/assert.hpp>
  19. #include <boost/spirit/include/classic_core.hpp>
  20. #include <boost/wave/token_ids.hpp>
  21. #include <boost/wave/language_support.hpp>
  22. #include <boost/wave/util/file_position.hpp>
  23. #include <boost/wave/cpplexer/validate_universal_char.hpp>
  24. #include <boost/wave/cpplexer/cpplexer_exceptions.hpp>
  25. #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
  26. #include <boost/wave/cpplexer/detect_include_guards.hpp>
  27. #endif
  28. #include <boost/wave/cpplexer/cpp_lex_interface.hpp>
  29. // reuse the default token type
  30. #include "../xlex_iterator.hpp"
  31. // include the xpressive headers
  32. #include "xpressive_lexer.hpp"
  33. ///////////////////////////////////////////////////////////////////////////////
  34. namespace boost {
  35. namespace wave {
  36. namespace cpplexer {
  37. namespace xlex {
  38. namespace lexer {
  39. ///////////////////////////////////////////////////////////////////////////////
  40. //
  41. // encapsulation of the xpressive based C++ lexer
  42. //
  43. ///////////////////////////////////////////////////////////////////////////////
  44. template <
  45. typename Iterator,
  46. typename Position = boost::wave::util::file_position_type
  47. >
  48. class lexer
  49. {
  50. public:
  51. typedef char char_type;
  52. typedef boost::wave::cpplexer::lex_token<Position> token_type;
  53. typedef typename token_type::string_type string_type;
  54. lexer(Iterator const &first, Iterator const &last,
  55. Position const &pos, boost::wave::language_support language);
  56. ~lexer() {}
  57. token_type& get(token_type& t);
  58. void set_position(Position const &pos)
  59. {
  60. // set position has to change the file name and line number only
  61. filename = pos.get_file();
  62. line = pos.get_line();
  63. }
  64. #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
  65. bool has_include_guards(std::string& guard_name) const
  66. { return guards.detected(guard_name); }
  67. #endif
  68. private:
  69. typedef xpressive_lexer<Iterator, token_id> lexer_type;
  70. typedef typename lexer_type::callback_type callback_type;
  71. lexer_type xlexer;
  72. Iterator first;
  73. Iterator last;
  74. string_type filename;
  75. int line;
  76. bool at_eof;
  77. boost::wave::language_support language;
  78. // initialization data (regular expressions for the token definitions)
  79. struct lexer_data {
  80. token_id tokenid; // token data
  81. char_type const *tokenregex; // associated token to match
  82. callback_type tokencb; // associated callback function
  83. };
  84. static lexer_data const init_data[]; // common patterns
  85. static lexer_data const init_data_cpp[]; // C++ only patterns
  86. #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
  87. boost::wave::cpplexer::include_guards<token_type> guards;
  88. #endif
  89. };
  90. ///////////////////////////////////////////////////////////////////////////////
  91. // helper for initializing token data
  92. #define TOKEN_DATA(id, regex) \
  93. { id, regex, 0 }
  94. #define TOKEN_DATA_EX(id, regex, callback) \
  95. { id, regex, callback }
  96. ///////////////////////////////////////////////////////////////////////////////
  97. // data required for initialization of the lexer (token definitions)
  98. #define OR "|"
  99. #define Q(c) "\\" c
  100. #define TRI(c) Q("?") Q("?") c
  101. // definition of some subtoken regexps to simplify the regex definitions
  102. #define BLANK "[ \t]"
  103. #define CCOMMENT Q("/") Q("*") ".*?" Q("*") Q("/")
  104. #define PPSPACE "(" BLANK OR CCOMMENT ")*"
  105. #define OCTALDIGIT "[0-7]"
  106. #define DIGIT "[0-9]"
  107. #define HEXDIGIT "[0-9a-fA-F]"
  108. #define SIGN "[-+]?"
  109. #define EXPONENT "(" "[eE]" SIGN "[0-9]+" ")"
  110. #define INTEGER "(" \
  111. "(0x|0X)" HEXDIGIT "+" OR \
  112. "0" OCTALDIGIT "*" OR \
  113. "[1-9]" DIGIT "*" \
  114. ")"
  115. #define INTEGER_SUFFIX "(" "[uU][lL]?|[lL][uU]?" ")"
  116. #if BOOST_WAVE_SUPPORT_MS_EXTENSIONS != 0
  117. #define LONGINTEGER_SUFFIX "(" "[uU]" "(" "[lL][lL]" ")" OR \
  118. "(" "[lL][lL]" ")" "[uU]" "?" OR \
  119. "i64" \
  120. ")"
  121. #else
  122. #define LONGINTEGER_SUFFIX "(" "[uU]" "(" "[lL][lL]" ")" OR \
  123. "(" "[lL][lL]" ")" "[uU]" "?" ")"
  124. #endif
  125. #define FLOAT_SUFFIX "(" "[fF][lL]?|[lL][fF]?" ")"
  126. #define CHAR_SPEC "L?"
  127. #define BACKSLASH "(" Q("\\") OR TRI(Q("/")) ")"
  128. #define ESCAPESEQ BACKSLASH "(" \
  129. "[abfnrtv?'\"]" OR \
  130. BACKSLASH OR \
  131. "x" HEXDIGIT "+" OR \
  132. OCTALDIGIT OCTALDIGIT "?" OCTALDIGIT "?" \
  133. ")"
  134. #define HEXQUAD HEXDIGIT HEXDIGIT HEXDIGIT HEXDIGIT
  135. #define UNIVERSALCHAR BACKSLASH "(" \
  136. "u" HEXQUAD OR \
  137. "U" HEXQUAD HEXQUAD \
  138. ")"
  139. #define POUNDDEF "(" "#" OR TRI("=") OR Q("%:") ")"
  140. #define NEWLINEDEF "(" "\n" OR "\r\n" OR "\r" ")"
  141. #if BOOST_WAVE_SUPPORT_INCLUDE_NEXT != 0
  142. #define INCLUDEDEF "(include_next|include)"
  143. #else
  144. #define INCLUDEDEF "include"
  145. #endif
  146. ///////////////////////////////////////////////////////////////////////////////
  147. // common C++/C99 token definitions
  148. template <typename Iterator, typename Position>
  149. typename lexer<Iterator, Position>::lexer_data const
  150. lexer<Iterator, Position>::init_data[] =
  151. {
  152. TOKEN_DATA(T_CCOMMENT, CCOMMENT),
  153. TOKEN_DATA(T_CPPCOMMENT, Q("/") Q("/.*?") NEWLINEDEF ),
  154. TOKEN_DATA(T_CHARLIT, CHAR_SPEC "'"
  155. "(" ESCAPESEQ OR "[^\n\r']" OR UNIVERSALCHAR ")+" "'"),
  156. TOKEN_DATA(T_STRINGLIT, CHAR_SPEC Q("\"")
  157. "(" ESCAPESEQ OR "[^\n\r\"]" OR UNIVERSALCHAR ")*" Q("\"")),
  158. TOKEN_DATA(T_ANDAND, "&&"),
  159. TOKEN_DATA(T_ANDASSIGN, "&="),
  160. TOKEN_DATA(T_AND, "&"),
  161. TOKEN_DATA(T_EQUAL, "=="),
  162. TOKEN_DATA(T_ASSIGN, "="),
  163. TOKEN_DATA(T_ORASSIGN, Q("|=")),
  164. TOKEN_DATA(T_ORASSIGN_TRIGRAPH, TRI("!=")),
  165. TOKEN_DATA(T_OROR, Q("|") Q("|")),
  166. TOKEN_DATA(T_OROR_TRIGRAPH, TRI("!") Q("|") OR Q("|") TRI("!") OR TRI("!") TRI("!")),
  167. TOKEN_DATA(T_OR, Q("|")),
  168. TOKEN_DATA(T_OR_TRIGRAPH, TRI("!")),
  169. TOKEN_DATA(T_XORASSIGN, Q("^=")),
  170. TOKEN_DATA(T_XORASSIGN_TRIGRAPH, TRI("'=")),
  171. TOKEN_DATA(T_XOR, Q("^")),
  172. TOKEN_DATA(T_XOR_TRIGRAPH, TRI("'")),
  173. TOKEN_DATA(T_COMMA, ","),
  174. TOKEN_DATA(T_RIGHTBRACKET_ALT, ":>"),
  175. TOKEN_DATA(T_COLON, ":"),
  176. TOKEN_DATA(T_DIVIDEASSIGN, Q("/=")),
  177. TOKEN_DATA(T_DIVIDE, Q("/")),
  178. TOKEN_DATA(T_ELLIPSIS, Q(".") Q(".") Q(".")),
  179. TOKEN_DATA(T_SHIFTRIGHTASSIGN, ">>="),
  180. TOKEN_DATA(T_SHIFTRIGHT, ">>"),
  181. TOKEN_DATA(T_GREATEREQUAL, ">="),
  182. TOKEN_DATA(T_GREATER, ">"),
  183. TOKEN_DATA(T_LEFTBRACE, Q("{")),
  184. TOKEN_DATA(T_SHIFTLEFTASSIGN, "<<="),
  185. TOKEN_DATA(T_SHIFTLEFT, "<<"),
  186. TOKEN_DATA(T_LEFTBRACE_ALT, "<" Q("%")),
  187. TOKEN_DATA(T_LESSEQUAL, "<="),
  188. TOKEN_DATA(T_LEFTBRACKET_ALT, "<:"),
  189. TOKEN_DATA(T_LESS, "<"),
  190. TOKEN_DATA(T_LEFTBRACE_TRIGRAPH, TRI("<")),
  191. TOKEN_DATA(T_LEFTPAREN, Q("(")),
  192. TOKEN_DATA(T_LEFTBRACKET, Q("[")),
  193. TOKEN_DATA(T_LEFTBRACKET_TRIGRAPH, TRI(Q("("))),
  194. TOKEN_DATA(T_MINUSMINUS, Q("-") Q("-")),
  195. TOKEN_DATA(T_MINUSASSIGN, Q("-=")),
  196. TOKEN_DATA(T_ARROW, Q("->")),
  197. TOKEN_DATA(T_MINUS, Q("-")),
  198. TOKEN_DATA(T_POUND_POUND_ALT, Q("%:") Q("%:")),
  199. TOKEN_DATA(T_PERCENTASSIGN, Q("%=")),
  200. TOKEN_DATA(T_RIGHTBRACE_ALT, Q("%>")),
  201. TOKEN_DATA(T_POUND_ALT, Q("%:")),
  202. TOKEN_DATA(T_PERCENT, Q("%")),
  203. TOKEN_DATA(T_NOTEQUAL, "!="),
  204. TOKEN_DATA(T_NOT, "!"),
  205. TOKEN_DATA(T_PLUSASSIGN, Q("+=")),
  206. TOKEN_DATA(T_PLUSPLUS, Q("+") Q("+")),
  207. TOKEN_DATA(T_PLUS, Q("+")),
  208. TOKEN_DATA(T_RIGHTBRACE, Q("}")),
  209. TOKEN_DATA(T_RIGHTBRACE_TRIGRAPH, TRI(">")),
  210. TOKEN_DATA(T_RIGHTPAREN, Q(")")),
  211. TOKEN_DATA(T_RIGHTBRACKET, Q("]")),
  212. TOKEN_DATA(T_RIGHTBRACKET_TRIGRAPH, TRI(Q(")"))),
  213. TOKEN_DATA(T_SEMICOLON, ";"),
  214. TOKEN_DATA(T_STARASSIGN, Q("*=")),
  215. TOKEN_DATA(T_STAR, Q("*")),
  216. TOKEN_DATA(T_COMPL, Q("~")),
  217. TOKEN_DATA(T_COMPL_TRIGRAPH, TRI("-")),
  218. TOKEN_DATA(T_ASM, "asm"),
  219. TOKEN_DATA(T_AUTO, "auto"),
  220. TOKEN_DATA(T_BOOL, "bool"),
  221. TOKEN_DATA(T_FALSE, "false"),
  222. TOKEN_DATA(T_TRUE, "true"),
  223. TOKEN_DATA(T_BREAK, "break"),
  224. TOKEN_DATA(T_CASE, "case"),
  225. TOKEN_DATA(T_CATCH, "catch"),
  226. TOKEN_DATA(T_CHAR, "char"),
  227. TOKEN_DATA(T_CLASS, "class"),
  228. TOKEN_DATA(T_CONSTCAST, "const_cast"),
  229. TOKEN_DATA(T_CONST, "const"),
  230. TOKEN_DATA(T_CONTINUE, "continue"),
  231. TOKEN_DATA(T_DEFAULT, "default"),
  232. TOKEN_DATA(T_DELETE, "delete"),
  233. TOKEN_DATA(T_DOUBLE, "double"),
  234. TOKEN_DATA(T_DO, "do"),
  235. TOKEN_DATA(T_DYNAMICCAST, "dynamic_cast"),
  236. TOKEN_DATA(T_ELSE, "else"),
  237. TOKEN_DATA(T_ENUM, "enum"),
  238. TOKEN_DATA(T_EXPLICIT, "explicit"),
  239. TOKEN_DATA(T_EXPORT, "export"),
  240. TOKEN_DATA(T_EXTERN, "extern"),
  241. TOKEN_DATA(T_FLOAT, "float"),
  242. TOKEN_DATA(T_FOR, "for"),
  243. TOKEN_DATA(T_FRIEND, "friend"),
  244. TOKEN_DATA(T_GOTO, "goto"),
  245. TOKEN_DATA(T_IF, "if"),
  246. TOKEN_DATA(T_INLINE, "inline"),
  247. TOKEN_DATA(T_INT, "int"),
  248. TOKEN_DATA(T_LONG, "long"),
  249. TOKEN_DATA(T_MUTABLE, "mutable"),
  250. TOKEN_DATA(T_NAMESPACE, "namespace"),
  251. TOKEN_DATA(T_NEW, "new"),
  252. TOKEN_DATA(T_OPERATOR, "operator"),
  253. TOKEN_DATA(T_PRIVATE, "private"),
  254. TOKEN_DATA(T_PROTECTED, "protected"),
  255. TOKEN_DATA(T_PUBLIC, "public"),
  256. TOKEN_DATA(T_REGISTER, "register"),
  257. TOKEN_DATA(T_REINTERPRETCAST, "reinterpret_cast"),
  258. TOKEN_DATA(T_RETURN, "return"),
  259. TOKEN_DATA(T_SHORT, "short"),
  260. TOKEN_DATA(T_SIGNED, "signed"),
  261. TOKEN_DATA(T_SIZEOF, "sizeof"),
  262. TOKEN_DATA(T_STATICCAST, "static_cast"),
  263. TOKEN_DATA(T_STATIC, "static"),
  264. TOKEN_DATA(T_STRUCT, "struct"),
  265. TOKEN_DATA(T_SWITCH, "switch"),
  266. TOKEN_DATA(T_TEMPLATE, "template"),
  267. TOKEN_DATA(T_THIS, "this"),
  268. TOKEN_DATA(T_THROW, "throw"),
  269. TOKEN_DATA(T_TRY, "try"),
  270. TOKEN_DATA(T_TYPEDEF, "typedef"),
  271. TOKEN_DATA(T_TYPEID, "typeid"),
  272. TOKEN_DATA(T_TYPENAME, "typename"),
  273. TOKEN_DATA(T_UNION, "union"),
  274. TOKEN_DATA(T_UNSIGNED, "unsigned"),
  275. TOKEN_DATA(T_USING, "using"),
  276. TOKEN_DATA(T_VIRTUAL, "virtual"),
  277. TOKEN_DATA(T_VOID, "void"),
  278. TOKEN_DATA(T_VOLATILE, "volatile"),
  279. TOKEN_DATA(T_WCHART, "wchar_t"),
  280. TOKEN_DATA(T_WHILE, "while"),
  281. #if BOOST_WAVE_SUPPORT_MS_EXTENSIONS != 0
  282. TOKEN_DATA(T_MSEXT_INT8, "__int8"),
  283. TOKEN_DATA(T_MSEXT_INT16, "__int16"),
  284. TOKEN_DATA(T_MSEXT_INT32, "__int32"),
  285. TOKEN_DATA(T_MSEXT_INT64, "__int64"),
  286. TOKEN_DATA(T_MSEXT_BASED, "_?" "_based"),
  287. TOKEN_DATA(T_MSEXT_DECLSPEC, "_?" "_declspec"),
  288. TOKEN_DATA(T_MSEXT_CDECL, "_?" "_cdecl"),
  289. TOKEN_DATA(T_MSEXT_FASTCALL, "_?" "_fastcall"),
  290. TOKEN_DATA(T_MSEXT_STDCALL, "_?" "_stdcall"),
  291. TOKEN_DATA(T_MSEXT_TRY , "__try"),
  292. TOKEN_DATA(T_MSEXT_EXCEPT, "__except"),
  293. TOKEN_DATA(T_MSEXT_FINALLY, "__finally"),
  294. TOKEN_DATA(T_MSEXT_LEAVE, "__leave"),
  295. TOKEN_DATA(T_MSEXT_INLINE, "_?" "_inline"),
  296. TOKEN_DATA(T_MSEXT_ASM, "_?" "_asm"),
  297. TOKEN_DATA(T_MSEXT_PP_REGION, POUNDDEF PPSPACE "region"),
  298. TOKEN_DATA(T_MSEXT_PP_ENDREGION, POUNDDEF PPSPACE "endregion"),
  299. #endif // BOOST_WAVE_SUPPORT_MS_EXTENSIONS != 0
  300. TOKEN_DATA(T_PP_DEFINE, POUNDDEF PPSPACE "define"),
  301. TOKEN_DATA(T_PP_IFDEF, POUNDDEF PPSPACE "ifdef"),
  302. TOKEN_DATA(T_PP_IFNDEF, POUNDDEF PPSPACE "ifndef"),
  303. TOKEN_DATA(T_PP_IF, POUNDDEF PPSPACE "if"),
  304. TOKEN_DATA(T_PP_ELSE, POUNDDEF PPSPACE "else"),
  305. TOKEN_DATA(T_PP_ELIF, POUNDDEF PPSPACE "elif"),
  306. TOKEN_DATA(T_PP_ENDIF, POUNDDEF PPSPACE "endif"),
  307. TOKEN_DATA(T_PP_ERROR, POUNDDEF PPSPACE "error"),
  308. TOKEN_DATA(T_PP_QHEADER, POUNDDEF PPSPACE \
  309. INCLUDEDEF PPSPACE Q("\"") "[^\n\r\"]+" Q("\"")),
  310. TOKEN_DATA(T_PP_HHEADER, POUNDDEF PPSPACE \
  311. INCLUDEDEF PPSPACE "<" "[^\n\r>]+" ">"),
  312. TOKEN_DATA(T_PP_INCLUDE, POUNDDEF PPSPACE \
  313. INCLUDEDEF PPSPACE),
  314. TOKEN_DATA(T_PP_LINE, POUNDDEF PPSPACE "line"),
  315. TOKEN_DATA(T_PP_PRAGMA, POUNDDEF PPSPACE "pragma"),
  316. TOKEN_DATA(T_PP_UNDEF, POUNDDEF PPSPACE "undef"),
  317. TOKEN_DATA(T_PP_WARNING, POUNDDEF PPSPACE "warning"),
  318. TOKEN_DATA(T_FLOATLIT,
  319. "(" DIGIT "*" Q(".") DIGIT "+" OR DIGIT "+" Q(".") ")"
  320. EXPONENT "?" FLOAT_SUFFIX "?" OR
  321. DIGIT "+" EXPONENT FLOAT_SUFFIX "?"),
  322. TOKEN_DATA(T_LONGINTLIT, INTEGER LONGINTEGER_SUFFIX),
  323. TOKEN_DATA(T_INTLIT, INTEGER INTEGER_SUFFIX "?"),
  324. #if BOOST_WAVE_USE_STRICT_LEXER != 0
  325. TOKEN_DATA(T_IDENTIFIER, "([a-zA-Z_]" OR UNIVERSALCHAR ")([a-zA-Z0-9_]" OR UNIVERSALCHAR ")*"),
  326. #else
  327. TOKEN_DATA(T_IDENTIFIER, "([a-zA-Z_$]" OR UNIVERSALCHAR ")([a-zA-Z0-9_$]" OR UNIVERSALCHAR ")*"),
  328. #endif
  329. TOKEN_DATA(T_SPACE, BLANK "+"),
  330. TOKEN_DATA(T_SPACE2, "[\v\f]+"),
  331. TOKEN_DATA(T_CONTLINE, Q("\\") "\n"),
  332. TOKEN_DATA(T_NEWLINE, NEWLINEDEF),
  333. TOKEN_DATA(T_POUND_POUND, "##"),
  334. TOKEN_DATA(T_POUND_POUND_TRIGRAPH, TRI("=") TRI("=")),
  335. TOKEN_DATA(T_POUND, "#"),
  336. TOKEN_DATA(T_POUND_TRIGRAPH, TRI("=")),
  337. TOKEN_DATA(T_ANY_TRIGRAPH, TRI(Q("/"))),
  338. TOKEN_DATA(T_QUESTION_MARK, Q("?")),
  339. TOKEN_DATA(T_DOT, Q(".")),
  340. TOKEN_DATA(T_ANY, "."),
  341. { token_id(0) } // this should be the last entry
  342. };
  343. ///////////////////////////////////////////////////////////////////////////////
  344. // C++ only token definitions
  345. template <typename Iterator, typename Position>
  346. typename lexer<Iterator, Position>::lexer_data const
  347. lexer<Iterator, Position>::init_data_cpp[] =
  348. {
  349. TOKEN_DATA(T_AND_ALT, "bitand"),
  350. TOKEN_DATA(T_ANDASSIGN_ALT, "and_eq"),
  351. TOKEN_DATA(T_ANDAND_ALT, "and"),
  352. TOKEN_DATA(T_OR_ALT, "bitor"),
  353. TOKEN_DATA(T_ORASSIGN_ALT, "or_eq"),
  354. TOKEN_DATA(T_OROR_ALT, "or"),
  355. TOKEN_DATA(T_XORASSIGN_ALT, "xor_eq"),
  356. TOKEN_DATA(T_XOR_ALT, "xor"),
  357. TOKEN_DATA(T_NOTEQUAL_ALT, "not_eq"),
  358. TOKEN_DATA(T_NOT_ALT, "not"),
  359. TOKEN_DATA(T_COMPL_ALT, "compl"),
  360. TOKEN_DATA(T_ARROWSTAR, Q("->") Q("*")),
  361. TOKEN_DATA(T_DOTSTAR, Q(".") Q("*")),
  362. TOKEN_DATA(T_COLON_COLON, "::"),
  363. { token_id(0) } // this should be the last entry
  364. };
  365. ///////////////////////////////////////////////////////////////////////////////
  366. // undefine macros, required for regular expression definitions
  367. #undef INCLUDEDEF
  368. #undef POUNDDEF
  369. #undef CCOMMENT
  370. #undef PPSPACE
  371. #undef DIGIT
  372. #undef OCTALDIGIT
  373. #undef HEXDIGIT
  374. #undef SIGN
  375. #undef EXPONENT
  376. #undef LONGINTEGER_SUFFIX
  377. #undef INTEGER_SUFFIX
  378. #undef INTEGER
  379. #undef FLOAT_SUFFIX
  380. #undef CHAR_SPEC
  381. #undef BACKSLASH
  382. #undef ESCAPESEQ
  383. #undef HEXQUAD
  384. #undef UNIVERSALCHAR
  385. #undef Q
  386. #undef TRI
  387. #undef OR
  388. #undef TOKEN_DATA
  389. #undef TOKEN_DATA_EX
  390. ///////////////////////////////////////////////////////////////////////////////
  391. // initialize cpp lexer
  392. template <typename Iterator, typename Position>
  393. inline
  394. lexer<Iterator, Position>::lexer(Iterator const &first,
  395. Iterator const &last, Position const &pos,
  396. boost::wave::language_support language)
  397. : first(first), last(last),
  398. filename(pos.get_file()), line(0), at_eof(false), language(language)
  399. {
  400. // if in C99 mode, some of the keywords/operators are not valid
  401. if (!boost::wave::need_c99(language)) {
  402. for (int j = 0; 0 != init_data_cpp[j].tokenid; ++j) {
  403. xlexer.register_regex(init_data_cpp[j].tokenregex,
  404. init_data_cpp[j].tokenid, init_data_cpp[j].tokencb);
  405. }
  406. }
  407. // tokens valid for C++ and C99
  408. for (int i = 0; 0 != init_data[i].tokenid; ++i) {
  409. xlexer.register_regex(init_data[i].tokenregex, init_data[i].tokenid,
  410. init_data[i].tokencb);
  411. }
  412. }
  413. ///////////////////////////////////////////////////////////////////////////////
  414. // get the next token from the input stream
  415. template <typename Iterator, typename Position>
  416. inline boost::wave::cpplexer::lex_token<Position>&
  417. lexer<Iterator, Position>::get(boost::wave::cpplexer::lex_token<Position>& t)
  418. {
  419. using namespace boost::wave; // to import token ids to this scope
  420. if (at_eof)
  421. return t = cpplexer::lex_token<Position>(); // return T_EOI
  422. std::string tokval;
  423. token_id id = xlexer.next_token(first, last, tokval);
  424. string_type value = tokval.c_str();
  425. if ((token_id)(-1) == id)
  426. id = T_EOF; // end of input reached
  427. if (T_IDENTIFIER == id) {
  428. // test identifier characters for validity (throws if invalid chars found)
  429. if (!boost::wave::need_no_character_validation(language)) {
  430. cpplexer::impl::validate_identifier_name(value, line, -1, filename);
  431. }
  432. }
  433. else if (T_STRINGLIT == id || T_CHARLIT == id) {
  434. // test literal characters for validity (throws if invalid chars found)
  435. if (!boost::wave::need_no_character_validation(language)) {
  436. cpplexer::impl::validate_literal(value, line, -1, filename);
  437. }
  438. }
  439. else if (T_EOF == id) {
  440. // T_EOF is returned as a valid token, the next call will return T_EOI,
  441. // i.e. the actual end of input
  442. at_eof = true;
  443. value.clear();
  444. }
  445. #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
  446. cpplexer::lex_token<Position> tok(id, value, Position(filename, line, -1));
  447. return t = guards.detect_guard(tok);
  448. #else
  449. return t = cpplexer::lex_token<Position>(id, value,
  450. Position(filename, line, -1));
  451. #endif
  452. }
  453. ///////////////////////////////////////////////////////////////////////////////
  454. //
  455. // lex_functor
  456. //
  457. ///////////////////////////////////////////////////////////////////////////////
  458. template <
  459. typename Iterator,
  460. typename Position = boost::wave::util::file_position_type
  461. >
  462. class xlex_functor
  463. : public xlex_input_interface<typename lexer<Iterator, Position>::token_type>
  464. {
  465. public:
  466. typedef typename lexer<Iterator, Position>::token_type token_type;
  467. xlex_functor(Iterator const &first, Iterator const &last,
  468. Position const &pos, boost::wave::language_support language)
  469. : lexer_(first, last, pos, language)
  470. {}
  471. virtual ~xlex_functor() {}
  472. // get the next token from the input stream
  473. token_type& get(token_type& t) { return lexer_.get(t); }
  474. void set_position(Position const &pos) { lexer_.set_position(pos); }
  475. #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
  476. bool has_include_guards(std::string& guard_name) const
  477. { return lexer_.has_include_guards(guard_name); }
  478. #endif
  479. private:
  480. lexer<Iterator, Position> lexer_;
  481. };
  482. } // namespace lexer
  483. ///////////////////////////////////////////////////////////////////////////////
  484. //
  485. // The new_lexer_gen<>::new_lexer function (declared in cpp_slex_token.hpp)
  486. // should be defined inline, if the lex_functor shouldn't be instantiated
  487. // separately from the lex_iterator.
  488. //
  489. // Separate (explicit) instantiation helps to reduce compilation time.
  490. //
  491. ///////////////////////////////////////////////////////////////////////////////
  492. #if BOOST_WAVE_SEPARATE_LEXER_INSTANTIATION != 0
  493. #define BOOST_WAVE_XLEX_NEW_LEXER_INLINE
  494. #else
  495. #define BOOST_WAVE_XLEX_NEW_LEXER_INLINE inline
  496. #endif
  497. ///////////////////////////////////////////////////////////////////////////////
  498. //
  499. // The 'new_lexer' function allows the opaque generation of a new lexer object.
  500. // It is coupled to the iterator type to allow to decouple the lexer/iterator
  501. // configurations at compile time.
  502. //
  503. // This function is declared inside the xlex_interface.hpp file, which is
  504. // referenced by the source file calling the lexer and the source file, which
  505. // instantiates the lex_functor. But it is defined here, so it will be
  506. // instantiated only while compiling the source file, which instantiates the
  507. // lex_functor. While the xlex_interface.hpp file may be included everywhere,
  508. // this file (xlex_lexer.hpp) should be included only once. This allows
  509. // to decouple the lexer interface from the lexer implementation and reduces
  510. // compilation time.
  511. //
  512. ///////////////////////////////////////////////////////////////////////////////
  513. template <typename Iterator, typename Position>
  514. BOOST_WAVE_XLEX_NEW_LEXER_INLINE
  515. lex_input_interface<boost::wave::cpplexer::lex_token<Position> > *
  516. new_lexer_gen<Iterator, Position>::new_lexer(Iterator const &first,
  517. Iterator const &last, Position const &pos,
  518. wave::language_support language)
  519. {
  520. return new lexer::xlex_functor<Iterator, Position>(
  521. first, last, pos, language);
  522. }
  523. #undef BOOST_WAVE_XLEX_NEW_LEXER_INLINE
  524. ///////////////////////////////////////////////////////////////////////////////
  525. } // namespace xlex
  526. } // namespace cpplexer
  527. } // namespace wave
  528. } // namespace boost
  529. #endif // !defined(XLEX_LEXER_HPP)