cpp_re.hpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410
  1. /*=============================================================================
  2. Boost.Wave: A Standard compliant C++ preprocessor library
  3. Re2C based C++ lexer
  4. http://www.boost.org/
  5. Copyright (c) 2001-2012 Hartmut Kaiser. Distributed under the Boost
  6. Software License, Version 1.0. (See accompanying file
  7. LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  8. =============================================================================*/
  9. #if !defined(CPP_RE_HPP_B76C4F5E_63E9_4B8A_9975_EC32FA6BF027_INCLUDED)
  10. #define CPP_RE_HPP_B76C4F5E_63E9_4B8A_9975_EC32FA6BF027_INCLUDED
  11. #include <boost/assert.hpp>
  12. #include <boost/wave/wave_config.hpp>
  13. #include <boost/wave/token_ids.hpp>
  14. #include <boost/wave/cpplexer/cpplexer_exceptions.hpp>
  15. // this must occur after all of the includes and before any code appears
  16. #ifdef BOOST_HAS_ABI_HEADERS
  17. #include BOOST_ABI_PREFIX
  18. #endif
  19. // suppress warnings about dependent classes not being exported from the dll
  20. #ifdef BOOST_MSVC
  21. #pragma warning(push)
  22. #pragma warning(disable : 4251 4231 4660)
  23. #endif
  24. ///////////////////////////////////////////////////////////////////////////////
  25. #define YYCTYPE uchar
  26. #define YYCURSOR cursor
  27. #define YYLIMIT limit
  28. #define YYMARKER marker
  29. #define YYFILL(n) \
  30. { \
  31. cursor = uchar_wrapper(fill(s, cursor), cursor.column); \
  32. limit = uchar_wrapper (s->lim); \
  33. } \
  34. /**/
  35. #include <iostream>
  36. ///////////////////////////////////////////////////////////////////////////////
  37. #define BOOST_WAVE_UPDATE_CURSOR() \
  38. { \
  39. s->line += count_backslash_newlines(s, cursor); \
  40. s->curr_column = cursor.column; \
  41. s->cur = cursor; \
  42. s->lim = limit; \
  43. s->ptr = marker; \
  44. } \
  45. /**/
  46. ///////////////////////////////////////////////////////////////////////////////
  47. #define BOOST_WAVE_RET(i) \
  48. { \
  49. BOOST_WAVE_UPDATE_CURSOR() \
  50. if (s->cur > s->lim) \
  51. return T_EOF; /* may happen for empty files */ \
  52. return (i); \
  53. } \
  54. /**/
  55. ///////////////////////////////////////////////////////////////////////////////
  56. namespace boost {
  57. namespace wave {
  58. namespace cpplexer {
  59. namespace re2clex {
  60. template<typename Iterator>
  61. struct Scanner;
  62. ///////////////////////////////////////////////////////////////////////////////
  63. // The scanner function to call whenever a new token is requested
  64. template<typename Iterator>
  65. BOOST_WAVE_DECL boost::wave::token_id scan(Scanner<Iterator> *s);
  66. ///////////////////////////////////////////////////////////////////////////////
  67. ///////////////////////////////////////////////////////////////////////////////
  68. // Utility functions
  69. #define RE2C_ASSERT BOOST_ASSERT
  70. template<typename Iterator>
  71. int get_one_char(Scanner<Iterator> *s)
  72. {
  73. RE2C_ASSERT(s->first <= s->act && s->act <= s->last);
  74. if (s->act < s->last)
  75. return *(s->act)++;
  76. return -1;
  77. }
  78. template<typename Iterator>
  79. std::ptrdiff_t rewind_stream (Scanner<Iterator> *s, int cnt)
  80. {
  81. std::advance(s->act, cnt);
  82. RE2C_ASSERT(s->first <= s->act && s->act <= s->last);
  83. return std::distance(s->first, s->act);
  84. }
  85. template<typename Iterator>
  86. std::size_t get_first_eol_offset(Scanner<Iterator>* s)
  87. {
  88. if (!AQ_EMPTY(s->eol_offsets))
  89. {
  90. return s->eol_offsets->queue[s->eol_offsets->head];
  91. }
  92. else
  93. {
  94. return (unsigned int)-1;
  95. }
  96. }
  97. template<typename Iterator>
  98. void adjust_eol_offsets(Scanner<Iterator>* s, std::size_t adjustment)
  99. {
  100. aq_queue q;
  101. std::size_t i;
  102. if (!s->eol_offsets)
  103. s->eol_offsets = aq_create();
  104. q = s->eol_offsets;
  105. if (AQ_EMPTY(q))
  106. return;
  107. i = q->head;
  108. while (i != q->tail)
  109. {
  110. if (adjustment > q->queue[i])
  111. q->queue[i] = 0;
  112. else
  113. q->queue[i] -= adjustment;
  114. ++i;
  115. if (i == q->max_size)
  116. i = 0;
  117. }
  118. if (adjustment > q->queue[i])
  119. q->queue[i] = 0;
  120. else
  121. q->queue[i] -= adjustment;
  122. }
  123. template<typename Iterator>
  124. int count_backslash_newlines(Scanner<Iterator> *s, uchar *cursor)
  125. {
  126. std::size_t diff, offset;
  127. int skipped = 0;
  128. /* figure out how many backslash-newlines skipped over unknowingly. */
  129. diff = cursor - s->bot;
  130. offset = get_first_eol_offset(s);
  131. while (offset <= diff && offset != (unsigned int)-1)
  132. {
  133. skipped++;
  134. aq_pop(s->eol_offsets);
  135. offset = get_first_eol_offset(s);
  136. }
  137. return skipped;
  138. }
  139. BOOST_WAVE_DECL bool is_backslash(uchar *p, uchar *end, int &len);
  140. #define BOOST_WAVE_BSIZE 196608
  141. template<typename Iterator>
  142. uchar *fill(Scanner<Iterator> *s, uchar *cursor)
  143. {
  144. using namespace std; // some systems have memcpy etc. in namespace std
  145. if(!s->eof)
  146. {
  147. uchar* p;
  148. std::ptrdiff_t cnt = s->tok - s->bot;
  149. if(cnt)
  150. {
  151. if (NULL == s->lim)
  152. s->lim = s->top;
  153. memmove(s->bot, s->tok, s->lim - s->tok);
  154. s->tok = s->cur = s->bot;
  155. s->ptr -= cnt;
  156. cursor -= cnt;
  157. s->lim -= cnt;
  158. adjust_eol_offsets(s, cnt);
  159. }
  160. if((s->top - s->lim) < BOOST_WAVE_BSIZE)
  161. {
  162. uchar *buf = (uchar*) malloc(((s->lim - s->bot) + BOOST_WAVE_BSIZE)*sizeof(uchar));
  163. if (buf == 0)
  164. {
  165. (*s->error_proc)(s, lexing_exception::unexpected_error,
  166. "Out of memory!");
  167. /* get the scanner to stop */
  168. *cursor = 0;
  169. return cursor;
  170. }
  171. memmove(buf, s->tok, s->lim - s->tok);
  172. s->tok = s->cur = buf;
  173. s->ptr = &buf[s->ptr - s->bot];
  174. cursor = &buf[cursor - s->bot];
  175. s->lim = &buf[s->lim - s->bot];
  176. s->top = &s->lim[BOOST_WAVE_BSIZE];
  177. free(s->bot);
  178. s->bot = buf;
  179. }
  180. cnt = std::distance(s->act, s->last);
  181. if (cnt > BOOST_WAVE_BSIZE)
  182. cnt = BOOST_WAVE_BSIZE;
  183. uchar * dst = s->lim;
  184. for (std::ptrdiff_t idx = 0; idx < cnt; ++idx)
  185. {
  186. *dst++ = *s->act++;
  187. }
  188. if (cnt != BOOST_WAVE_BSIZE)
  189. {
  190. s->eof = &s->lim[cnt]; *(s->eof)++ = '\0';
  191. }
  192. /* backslash-newline erasing time */
  193. /* first scan for backslash-newline and erase them */
  194. for (p = s->lim; p < s->lim + cnt - 2; ++p)
  195. {
  196. int len = 0;
  197. if (is_backslash(p, s->lim + cnt, len))
  198. {
  199. if (*(p+len) == '\n')
  200. {
  201. int offset = len + 1;
  202. memmove(p, p + offset, s->lim + cnt - p - offset);
  203. cnt -= offset;
  204. --p;
  205. aq_enqueue(s->eol_offsets, p - s->bot + 1);
  206. }
  207. else if (*(p+len) == '\r')
  208. {
  209. if (*(p+len+1) == '\n')
  210. {
  211. int offset = len + 2;
  212. memmove(p, p + offset, s->lim + cnt - p - offset);
  213. cnt -= offset;
  214. --p;
  215. }
  216. else
  217. {
  218. int offset = len + 1;
  219. memmove(p, p + offset, s->lim + cnt - p - offset);
  220. cnt -= offset;
  221. --p;
  222. }
  223. aq_enqueue(s->eol_offsets, p - s->bot + 1);
  224. }
  225. }
  226. }
  227. /* FIXME: the following code should be fixed to recognize correctly the
  228. trigraph backslash token */
  229. /* check to see if what we just read ends in a backslash */
  230. if (cnt >= 2)
  231. {
  232. uchar last = s->lim[cnt-1];
  233. uchar last2 = s->lim[cnt-2];
  234. /* check \ EOB */
  235. if (last == '\\')
  236. {
  237. int next = get_one_char(s);
  238. /* check for \ \n or \ \r or \ \r \n straddling the border */
  239. if (next == '\n')
  240. {
  241. --cnt; /* chop the final \, we've already read the \n. */
  242. aq_enqueue(s->eol_offsets, cnt + (s->lim - s->bot));
  243. }
  244. else if (next == '\r')
  245. {
  246. int next2 = get_one_char(s);
  247. if (next2 == '\n')
  248. {
  249. --cnt; /* skip the backslash */
  250. }
  251. else
  252. {
  253. /* rewind one, and skip one char */
  254. rewind_stream(s, -1);
  255. --cnt;
  256. }
  257. aq_enqueue(s->eol_offsets, cnt + (s->lim - s->bot));
  258. }
  259. else if (next != -1) /* -1 means end of file */
  260. {
  261. /* next was something else, so rewind the stream */
  262. rewind_stream(s, -1);
  263. }
  264. }
  265. /* check \ \r EOB */
  266. else if (last == '\r' && last2 == '\\')
  267. {
  268. int next = get_one_char(s);
  269. if (next == '\n')
  270. {
  271. cnt -= 2; /* skip the \ \r */
  272. }
  273. else
  274. {
  275. /* rewind one, and skip two chars */
  276. rewind_stream(s, -1);
  277. cnt -= 2;
  278. }
  279. aq_enqueue(s->eol_offsets, cnt + (s->lim - s->bot));
  280. }
  281. /* check \ \n EOB */
  282. else if (last == '\n' && last2 == '\\')
  283. {
  284. cnt -= 2;
  285. aq_enqueue(s->eol_offsets, cnt + (s->lim - s->bot));
  286. }
  287. }
  288. s->lim += cnt;
  289. if (s->eof) /* eof needs adjusting if we erased backslash-newlines */
  290. {
  291. s->eof = s->lim;
  292. *(s->eof)++ = '\0';
  293. }
  294. }
  295. return cursor;
  296. }
  297. #undef BOOST_WAVE_BSIZE
  298. ///////////////////////////////////////////////////////////////////////////////
  299. // Special wrapper class holding the current cursor position
  300. struct BOOST_WAVE_DECL uchar_wrapper
  301. {
  302. uchar_wrapper (uchar *base_cursor, std::size_t column = 1);
  303. uchar_wrapper& operator++();
  304. uchar_wrapper& operator--();
  305. uchar operator* () const;
  306. operator uchar *() const;
  307. friend BOOST_WAVE_DECL std::ptrdiff_t
  308. operator- (uchar_wrapper const& lhs, uchar_wrapper const& rhs);
  309. uchar *base_cursor;
  310. std::size_t column;
  311. };
  312. ///////////////////////////////////////////////////////////////////////////////
  313. template<typename Iterator>
  314. boost::wave::token_id scan(Scanner<Iterator> *s)
  315. {
  316. BOOST_ASSERT(0 != s->error_proc); // error handler must be given
  317. uchar_wrapper cursor (s->tok = s->cur, s->column = s->curr_column);
  318. uchar_wrapper marker (s->ptr);
  319. uchar_wrapper limit (s->lim);
  320. typedef BOOST_WAVE_STRINGTYPE string_type;
  321. string_type rawstringdelim; // for use with C++11 raw string literals
  322. // include the correct Re2C token definition rules
  323. #if (defined (__FreeBSD__) || defined (__DragonFly__) || defined (__OpenBSD__)) && defined (T_DIVIDE)
  324. #undef T_DIVIDE
  325. #endif
  326. #if BOOST_WAVE_USE_STRICT_LEXER != 0
  327. #include "strict_cpp_re.inc"
  328. #else
  329. #include "cpp_re.inc"
  330. #endif
  331. } /* end of scan */
  332. ///////////////////////////////////////////////////////////////////////////////
  333. } // namespace re2clex
  334. } // namespace cpplexer
  335. } // namespace wave
  336. } // namespace boost
  337. #ifdef BOOST_MSVC
  338. #pragma warning(pop)
  339. #endif
  340. #undef BOOST_WAVE_RET
  341. #undef YYCTYPE
  342. #undef YYCURSOR
  343. #undef YYLIMIT
  344. #undef YYMARKER
  345. #undef YYFILL
  346. // the suffix header occurs after all of the code
  347. #ifdef BOOST_HAS_ABI_HEADERS
  348. #include BOOST_ABI_SUFFIX
  349. #endif
  350. #endif // !defined(CPP_RE_HPP_B76C4F5E_63E9_4B8A_9975_EC32FA6BF027_INCLUDED)