123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588 |
- /*=============================================================================
- Boost.Wave: A Standard compliant C++ preprocessor library
- Sample: IDL lexer
- http://www.boost.org/
- Copyright (c) 2001-2010 Hartmut Kaiser. Distributed under the Boost
- Software License, Version 1.0. (See accompanying file
- LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
- =============================================================================*/
- #include <ctime>
- #include <cstdlib>
- #include <cstdio>
- #include <cstring>
- #include <sys/types.h>
- #include <sys/stat.h>
- #include <fcntl.h>
- #include <boost/config.hpp>
- #if defined(BOOST_HAS_UNISTD_H)
- #include <unistd.h>
- #else
- #include <io.h>
- #endif
- #include <boost/assert.hpp>
- #include <boost/detail/workaround.hpp>
- // reuse the token ids and re2c helper functions from the default C++ lexer
- #include <boost/wave/token_ids.hpp>
- #include <boost/wave/cpplexer/re2clex/aq.hpp>
- #include <boost/wave/cpplexer/re2clex/scanner.hpp>
- #include <boost/wave/cpplexer/cpplexer_exceptions.hpp>
- #include "idl_re.hpp"
- #if defined(_MSC_VER) && !defined(__COMO__)
- #pragma warning (disable: 4101) // 'foo' : unreferenced local variable
- #pragma warning (disable: 4102) // 'foo' : unreferenced label
- #endif
- #define BOOST_WAVE_BSIZE 196608
- #define YYCTYPE uchar
- #define YYCURSOR cursor
- #define YYLIMIT s->lim
- #define YYMARKER s->ptr
- #define YYFILL(n) {cursor = fill(s, cursor);}
- //#define BOOST_WAVE_RET(i) {s->cur = cursor; return (i);}
- #define BOOST_WAVE_RET(i) \
- { \
- s->line += count_backslash_newlines(s, cursor); \
- s->cur = cursor; \
- return (i); \
- } \
- /**/
- ///////////////////////////////////////////////////////////////////////////////
- namespace boost {
- namespace wave {
- namespace idllexer {
- namespace re2clex {
- #define RE2C_ASSERT BOOST_ASSERT
- int
- get_one_char(boost::wave::cpplexer::re2clex::Scanner *s)
- {
- using namespace boost::wave::cpplexer::re2clex;
- if (0 != s->act) {
- RE2C_ASSERT(s->first != 0 && s->last != 0);
- RE2C_ASSERT(s->first <= s->act && s->act <= s->last);
- if (s->act < s->last)
- return *(s->act)++;
- }
- return -1;
- }
- std::ptrdiff_t
- rewind_stream (boost::wave::cpplexer::re2clex::Scanner *s, int cnt)
- {
- if (0 != s->act) {
- RE2C_ASSERT(s->first != 0 && s->last != 0);
- s->act += cnt;
- RE2C_ASSERT(s->first <= s->act && s->act <= s->last);
- return s->act - s->first;
- }
- return 0;
- }
- std::size_t
- get_first_eol_offset(boost::wave::cpplexer::re2clex::Scanner* s)
- {
- if (!AQ_EMPTY(s->eol_offsets))
- {
- return s->eol_offsets->queue[s->eol_offsets->head];
- }
- else
- {
- return (unsigned int)-1;
- }
- }
- void
- adjust_eol_offsets(boost::wave::cpplexer::re2clex::Scanner* s,
- std::size_t adjustment)
- {
- boost::wave::cpplexer::re2clex::aq_queue q;
- std::size_t i;
-
- if (!s->eol_offsets)
- s->eol_offsets = boost::wave::cpplexer::re2clex::aq_create();
- q = s->eol_offsets;
- if (AQ_EMPTY(q))
- return;
- i = q->head;
- while (i != q->tail)
- {
- if (adjustment > q->queue[i])
- q->queue[i] = 0;
- else
- q->queue[i] -= adjustment;
- ++i;
- if (i == q->max_size)
- i = 0;
- }
- if (adjustment > q->queue[i])
- q->queue[i] = 0;
- else
- q->queue[i] -= adjustment;
- }
- int
- count_backslash_newlines(boost::wave::cpplexer::re2clex::Scanner *s,
- boost::wave::cpplexer::re2clex::uchar *cursor)
- {
- using namespace boost::wave::cpplexer::re2clex;
- std::size_t diff, offset;
- int skipped = 0;
-
- /* figure out how many backslash-newlines skipped over unknowingly. */
- diff = cursor - s->bot;
- offset = get_first_eol_offset(s);
- while (offset <= diff && offset != (unsigned int)-1)
- {
- skipped++;
- boost::wave::cpplexer::re2clex::aq_pop(s->eol_offsets);
- offset = get_first_eol_offset(s);
- }
- return skipped;
- }
- bool is_backslash(
- boost::wave::cpplexer::re2clex::uchar *p,
- boost::wave::cpplexer::re2clex::uchar *end, int &len)
- {
- if (*p == '\\') {
- len = 1;
- return true;
- }
- else if (*p == '?' && *(p+1) == '?' && (p+2 < end && *(p+2) == '/')) {
- len = 3;
- return true;
- }
- return false;
- }
- boost::wave::cpplexer::re2clex::uchar *
- fill(boost::wave::cpplexer::re2clex::Scanner *s,
- boost::wave::cpplexer::re2clex::uchar *cursor)
- {
- using namespace std; // some systems have memcpy etc. in namespace std
- using namespace boost::wave::cpplexer::re2clex;
- if(!s->eof)
- {
- uchar* p;
- std::ptrdiff_t cnt = s->tok - s->bot;
- if(cnt)
- {
- memcpy(s->bot, s->tok, s->lim - s->tok);
- s->tok = s->bot;
- s->ptr -= cnt;
- cursor -= cnt;
- s->lim -= cnt;
- adjust_eol_offsets(s, cnt);
- }
- if((s->top - s->lim) < BOOST_WAVE_BSIZE)
- {
- uchar *buf = (uchar*) malloc(((s->lim - s->bot) + BOOST_WAVE_BSIZE)*sizeof(uchar));
- if (buf == 0)
- {
- using namespace std; // some systems have printf in std
- if (0 != s->error_proc) {
- (*s->error_proc)(s,
- cpplexer::lexing_exception::unexpected_error,
- "Out of memory!");
- }
- else
- printf("Out of memory!\n");
-
- /* get the scanner to stop */
- *cursor = 0;
- return cursor;
- }
- memcpy(buf, s->tok, s->lim - s->tok);
- s->tok = buf;
- s->ptr = &buf[s->ptr - s->bot];
- cursor = &buf[cursor - s->bot];
- s->lim = &buf[s->lim - s->bot];
- s->top = &s->lim[BOOST_WAVE_BSIZE];
- free(s->bot);
- s->bot = buf;
- }
- if (s->act != 0) {
- cnt = s->last - s->act;
- if (cnt > BOOST_WAVE_BSIZE)
- cnt = BOOST_WAVE_BSIZE;
- memcpy(s->lim, s->act, cnt);
- s->act += cnt;
- if (cnt != BOOST_WAVE_BSIZE)
- {
- s->eof = &s->lim[cnt]; *(s->eof)++ = '\0';
- }
- }
-
- /* backslash-newline erasing time */
- /* first scan for backslash-newline and erase them */
- for (p = s->lim; p < s->lim + cnt - 2; ++p)
- {
- int len = 0;
- if (is_backslash(p, s->lim + cnt, len))
- {
- if (*(p+len) == '\n')
- {
- int offset = len + 1;
- memmove(p, p + offset, s->lim + cnt - p - offset);
- cnt -= offset;
- --p;
- aq_enqueue(s->eol_offsets, p - s->bot + 1);
- }
- else if (*(p+len) == '\r')
- {
- if (*(p+len+1) == '\n')
- {
- int offset = len + 2;
- memmove(p, p + offset, s->lim + cnt - p - offset);
- cnt -= offset;
- --p;
- }
- else
- {
- int offset = len + 1;
- memmove(p, p + offset, s->lim + cnt - p - offset);
- cnt -= offset;
- --p;
- }
- aq_enqueue(s->eol_offsets, p - s->bot + 1);
- }
- }
- }
- /* FIXME: the following code should be fixed to recognize correctly the
- trigraph backslash token */
- /* check to see if what we just read ends in a backslash */
- if (cnt >= 2)
- {
- uchar last = s->lim[cnt-1];
- uchar last2 = s->lim[cnt-2];
- /* check \ EOB */
- if (last == '\\')
- {
- int next = get_one_char(s);
- /* check for \ \n or \ \r or \ \r \n straddling the border */
- if (next == '\n')
- {
- --cnt; /* chop the final \, we've already read the \n. */
- boost::wave::cpplexer::re2clex::aq_enqueue(s->eol_offsets,
- cnt + (s->lim - s->bot));
- }
- else if (next == '\r')
- {
- int next2 = get_one_char(s);
- if (next2 == '\n')
- {
- --cnt; /* skip the backslash */
- }
- else
- {
- /* rewind one, and skip one char */
- rewind_stream(s, -1);
- --cnt;
- }
- boost::wave::cpplexer::re2clex::aq_enqueue(s->eol_offsets,
- cnt + (s->lim - s->bot));
- }
- else if (next != -1) /* -1 means end of file */
- {
- /* next was something else, so rewind the stream */
- rewind_stream(s, -1);
- }
- }
- /* check \ \r EOB */
- else if (last == '\r' && last2 == '\\')
- {
- int next = get_one_char(s);
- if (next == '\n')
- {
- cnt -= 2; /* skip the \ \r */
- }
- else
- {
- /* rewind one, and skip two chars */
- rewind_stream(s, -1);
- cnt -= 2;
- }
- boost::wave::cpplexer::re2clex::aq_enqueue(s->eol_offsets,
- cnt + (s->lim - s->bot));
- }
- /* check \ \n EOB */
- else if (last == '\n' && last2 == '\\')
- {
- cnt -= 2;
- boost::wave::cpplexer::re2clex::aq_enqueue(s->eol_offsets,
- cnt + (s->lim - s->bot));
- }
- }
-
- s->lim += cnt;
- if (s->eof) /* eof needs adjusting if we erased backslash-newlines */
- {
- s->eof = s->lim;
- *(s->eof)++ = '\0';
- }
- }
- return cursor;
- }
- boost::wave::token_id
- scan(boost::wave::cpplexer::re2clex::Scanner *s)
- {
- using namespace boost::wave::cpplexer::re2clex;
- uchar *cursor = s->tok = s->cur;
- /*!re2c
- re2c:indent:string = " ";
- any = [\t\v\f\r\n\040-\377];
- anyctrl = [\000-\377];
- OctalDigit = [0-7];
- Digit = [0-9];
- HexDigit = [a-fA-F0-9];
- ExponentPart = [Ee] [+-]? Digit+;
- FractionalConstant = (Digit* "." Digit+) | (Digit+ ".");
- FloatingSuffix = [fF][lL]?|[lL][fF]?;
- IntegerSuffix = [uU][lL]?|[lL][uU]?;
- FixedPointSuffix = [dD];
- Backslash = [\\]|"??/";
- EscapeSequence = Backslash ([abfnrtv?'"] | Backslash | "x" HexDigit+ | OctalDigit OctalDigit? OctalDigit?);
- HexQuad = HexDigit HexDigit HexDigit HexDigit;
- UniversalChar = Backslash ("u" HexQuad | "U" HexQuad HexQuad);
- Newline = "\r\n" | "\n" | "\r";
- PPSpace = ([ \t]|("/*"(any\[*]|Newline|("*"+(any\[*/]|Newline)))*"*"+"/"))*;
- Pound = "#" | "??=" | "%:";
- */
- /*!re2c
- "/*" { goto ccomment; }
- "//" { goto cppcomment; }
-
- "TRUE" { BOOST_WAVE_RET(T_TRUE); }
- "FALSE" { BOOST_WAVE_RET(T_FALSE); }
-
- "{" { BOOST_WAVE_RET(T_LEFTBRACE); }
- "}" { BOOST_WAVE_RET(T_RIGHTBRACE); }
- "[" { BOOST_WAVE_RET(T_LEFTBRACKET); }
- "]" { BOOST_WAVE_RET(T_RIGHTBRACKET); }
- "#" { BOOST_WAVE_RET(T_POUND); }
- "##" { BOOST_WAVE_RET(T_POUND_POUND); }
- "(" { BOOST_WAVE_RET(T_LEFTPAREN); }
- ")" { BOOST_WAVE_RET(T_RIGHTPAREN); }
- ";" { BOOST_WAVE_RET(T_SEMICOLON); }
- ":" { BOOST_WAVE_RET(T_COLON); }
- "?" { BOOST_WAVE_RET(T_QUESTION_MARK); }
- "." { BOOST_WAVE_RET(T_DOT); }
- "+" { BOOST_WAVE_RET(T_PLUS); }
- "-" { BOOST_WAVE_RET(T_MINUS); }
- "*" { BOOST_WAVE_RET(T_STAR); }
- "/" { BOOST_WAVE_RET(T_DIVIDE); }
- "%" { BOOST_WAVE_RET(T_PERCENT); }
- "^" { BOOST_WAVE_RET(T_XOR); }
- "&" { BOOST_WAVE_RET(T_AND); }
- "|" { BOOST_WAVE_RET(T_OR); }
- "~" { BOOST_WAVE_RET(T_COMPL); }
- "!" { BOOST_WAVE_RET(T_NOT); }
- "=" { BOOST_WAVE_RET(T_ASSIGN); }
- "<" { BOOST_WAVE_RET(T_LESS); }
- ">" { BOOST_WAVE_RET(T_GREATER); }
- "<<" { BOOST_WAVE_RET(T_SHIFTLEFT); }
- ">>" { BOOST_WAVE_RET(T_SHIFTRIGHT); }
- "==" { BOOST_WAVE_RET(T_EQUAL); }
- "!=" { BOOST_WAVE_RET(T_NOTEQUAL); }
- "<=" { BOOST_WAVE_RET(T_LESSEQUAL); }
- ">=" { BOOST_WAVE_RET(T_GREATEREQUAL); }
- "&&" { BOOST_WAVE_RET(T_ANDAND); }
- "||" { BOOST_WAVE_RET(T_OROR); }
- "++" { BOOST_WAVE_RET(T_PLUSPLUS); }
- "--" { BOOST_WAVE_RET(T_MINUSMINUS); }
- "," { BOOST_WAVE_RET(T_COMMA); }
- ([a-zA-Z_] | UniversalChar) ([a-zA-Z_0-9] | UniversalChar)*
- { BOOST_WAVE_RET(T_IDENTIFIER); }
-
- (("0" [xX] HexDigit+) | ("0" OctalDigit*) | ([1-9] Digit*)) IntegerSuffix?
- { BOOST_WAVE_RET(T_INTLIT); }
- ((FractionalConstant ExponentPart?) | (Digit+ ExponentPart)) FloatingSuffix?
- { BOOST_WAVE_RET(T_FLOATLIT); }
- (FractionalConstant | Digit+) FixedPointSuffix
- { BOOST_WAVE_RET(T_FIXEDPOINTLIT); }
-
- "L"? (['] (EscapeSequence|any\[\n\r\\']|UniversalChar)+ ['])
- { BOOST_WAVE_RET(T_CHARLIT); }
-
- "L"? (["] (EscapeSequence|any\[\n\r\\"]|UniversalChar)* ["])
- { BOOST_WAVE_RET(T_STRINGLIT); }
-
- Pound PPSpace "include" PPSpace "<" (any\[\n\r>])+ ">"
- { BOOST_WAVE_RET(T_PP_HHEADER); }
- Pound PPSpace "include" PPSpace "\"" (any\[\n\r"])+ "\""
- { BOOST_WAVE_RET(T_PP_QHEADER); }
- Pound PPSpace "include" PPSpace
- { BOOST_WAVE_RET(T_PP_INCLUDE); }
- Pound PPSpace "if" { BOOST_WAVE_RET(T_PP_IF); }
- Pound PPSpace "ifdef" { BOOST_WAVE_RET(T_PP_IFDEF); }
- Pound PPSpace "ifndef" { BOOST_WAVE_RET(T_PP_IFNDEF); }
- Pound PPSpace "else" { BOOST_WAVE_RET(T_PP_ELSE); }
- Pound PPSpace "elif" { BOOST_WAVE_RET(T_PP_ELIF); }
- Pound PPSpace "endif" { BOOST_WAVE_RET(T_PP_ENDIF); }
- Pound PPSpace "define" { BOOST_WAVE_RET(T_PP_DEFINE); }
- Pound PPSpace "undef" { BOOST_WAVE_RET(T_PP_UNDEF); }
- Pound PPSpace "line" { BOOST_WAVE_RET(T_PP_LINE); }
- Pound PPSpace "error" { BOOST_WAVE_RET(T_PP_ERROR); }
- Pound PPSpace "pragma" { BOOST_WAVE_RET(T_PP_PRAGMA); }
- Pound PPSpace "warning" { BOOST_WAVE_RET(T_PP_WARNING); }
-
- [ \t\v\f]+
- { BOOST_WAVE_RET(T_SPACE); }
- Newline
- {
- s->line++;
- BOOST_WAVE_RET(T_NEWLINE);
- }
- "\000"
- {
- if(cursor != s->eof)
- {
- using namespace std; // some systems have printf in std
- if (0 != s->error_proc) {
- (*s->error_proc)(s,
- cpplexer::lexing_exception::generic_lexing_error,
- "'\\000' in input stream");
- }
- else
- printf("Error: 0 in file\n");
- }
- BOOST_WAVE_RET(T_EOF);
- }
- anyctrl
- {
- BOOST_WAVE_RET(TOKEN_FROM_ID(*s->tok, UnknownTokenType));
- }
- */
- ccomment:
- /*!re2c
- "*/" { BOOST_WAVE_RET(T_CCOMMENT); }
- Newline
- {
- /*if(cursor == s->eof) BOOST_WAVE_RET(T_EOF);*/
- /*s->tok = cursor; */
- s->line += count_backslash_newlines(s, cursor) +1;
- goto ccomment;
- }
- any { goto ccomment; }
- "\000"
- {
- using namespace std; // some systems have printf in std
- if(cursor == s->eof)
- {
- if (s->error_proc)
- (*s->error_proc)(s,
- cpplexer::lexing_exception::generic_lexing_warning,
- "Unterminated comment");
- else
- printf("Error: Unterminated comment\n");
- }
- else
- {
- if (s->error_proc)
- (*s->error_proc)(s,
- cpplexer::lexing_exception::generic_lexing_error,
- "'\\000' in input stream");
- else
- printf("Error: 0 in file");
- }
- /* adjust cursor such next call returns T_EOF */
- --YYCURSOR;
- /* the comment is unterminated, but nevertheless its a comment */
- BOOST_WAVE_RET(T_CCOMMENT);
- }
- anyctrl
- {
- if (s->error_proc)
- (*s->error_proc)(s,
- cpplexer::lexing_exception::generic_lexing_error,
- "invalid character in input stream");
- else
- printf("Error: 0 in file");
- }
- */
- cppcomment:
- /*!re2c
- Newline
- {
- /*if(cursor == s->eof) BOOST_WAVE_RET(T_EOF); */
- /*s->tok = cursor; */
- s->line++;
- BOOST_WAVE_RET(T_CPPCOMMENT);
- }
- any { goto cppcomment; }
- "\000"
- {
- using namespace std; // some systems have printf in std
- if(cursor != s->eof)
- {
- if (s->error_proc)
- (*s->error_proc)(s,
- cpplexer::lexing_exception::generic_lexing_error,
- "'\\000' in input stream");
- else
- printf("Error: 0 in file");
- }
- /* adjust cursor such next call returns T_EOF */
- --YYCURSOR;
- /* the comment is unterminated, but nevertheless its a comment */
- BOOST_WAVE_RET(T_CPPCOMMENT);
- }
- */
- } /* end of scan */
- #undef RE2C_ASSERT
- ///////////////////////////////////////////////////////////////////////////////
- } // namespace re2clex
- } // namespace idllexer
- } // namespace wave
- } // namespace boost
|