generate_static.hpp 36 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023
  1. // Copyright (c) 2008-2009 Ben Hanson
  2. // Copyright (c) 2008-2011 Hartmut Kaiser
  3. //
  4. // Distributed under the Boost Software License, Version 1.0. (See accompanying
  5. // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  6. #if !defined(BOOST_SPIRIT_LEX_LEXERTL_GENERATE_CPP_FEB_10_2008_0855PM)
  7. #define BOOST_SPIRIT_LEX_LEXERTL_GENERATE_CPP_FEB_10_2008_0855PM
  8. #if defined(_MSC_VER)
  9. #pragma once
  10. #endif
  11. #include <boost/spirit/home/support/detail/lexer/char_traits.hpp>
  12. #include <boost/spirit/home/support/detail/lexer/consts.hpp>
  13. #include <boost/spirit/home/support/detail/lexer/rules.hpp>
  14. #include <boost/spirit/home/support/detail/lexer/size_t.hpp>
  15. #include <boost/spirit/home/support/detail/lexer/state_machine.hpp>
  16. #include <boost/spirit/home/support/detail/lexer/debug.hpp>
  17. #include <boost/spirit/home/lex/lexer/lexertl/static_version.hpp>
  18. #include <boost/algorithm/string.hpp>
  19. #include <boost/scoped_array.hpp>
  20. ///////////////////////////////////////////////////////////////////////////////
  21. namespace boost { namespace spirit { namespace lex { namespace lexertl
  22. {
  23. namespace detail
  24. {
  25. ///////////////////////////////////////////////////////////////////////////
  26. template <typename CharT>
  27. struct string_lit;
  28. template <>
  29. struct string_lit<char>
  30. {
  31. static char get(char c) { return c; }
  32. static std::string get(char const* str = "") { return str; }
  33. };
  34. template <>
  35. struct string_lit<wchar_t>
  36. {
  37. static wchar_t get(char c)
  38. {
  39. typedef std::ctype<wchar_t> ctype_t;
  40. return std::use_facet<ctype_t>(std::locale()).widen(c);
  41. }
  42. static std::basic_string<wchar_t> get(char const* source = "")
  43. {
  44. using namespace std; // some systems have size_t in ns std
  45. size_t len = strlen(source);
  46. boost::scoped_array<wchar_t> result (new wchar_t[len+1]);
  47. result.get()[len] = '\0';
  48. // working with wide character streams is supported only if the
  49. // platform provides the std::ctype<wchar_t> facet
  50. BOOST_ASSERT(std::has_facet<std::ctype<wchar_t> >(std::locale()));
  51. std::use_facet<std::ctype<wchar_t> >(std::locale())
  52. .widen(source, source + len, result.get());
  53. return result.get();
  54. }
  55. };
  56. template <typename Char>
  57. inline Char L(char c)
  58. {
  59. return string_lit<Char>::get(c);
  60. }
  61. template <typename Char>
  62. inline std::basic_string<Char> L(char const* c = "")
  63. {
  64. return string_lit<Char>::get(c);
  65. }
  66. ///////////////////////////////////////////////////////////////////////////
  67. template <typename Char>
  68. inline bool
  69. generate_delimiter(std::basic_ostream<Char> &os_)
  70. {
  71. os_ << std::basic_string<Char>(80, '/') << "\n";
  72. return os_.good();
  73. }
  74. ///////////////////////////////////////////////////////////////////////////
  75. // Generate a table of the names of the used lexer states, which is a bit
  76. // tricky, because the table stored with the rules is sorted based on the
  77. // names, but we need it sorted using the state ids.
  78. template <typename Char>
  79. inline bool
  80. generate_cpp_state_info (boost::lexer::basic_rules<Char> const& rules_
  81. , std::basic_ostream<Char> &os_, Char const* name_suffix)
  82. {
  83. // we need to re-sort the state names in ascending order of the state
  84. // ids, filling possible gaps in between later
  85. typedef typename
  86. boost::lexer::basic_rules<Char>::string_size_t_map::const_iterator
  87. state_iterator;
  88. typedef std::map<std::size_t, Char const*> reverse_state_map_type;
  89. reverse_state_map_type reverse_state_map;
  90. state_iterator send = rules_.statemap().end();
  91. for (state_iterator sit = rules_.statemap().begin(); sit != send; ++sit)
  92. {
  93. typedef typename reverse_state_map_type::value_type value_type;
  94. reverse_state_map.insert(value_type((*sit).second, (*sit).first.c_str()));
  95. }
  96. generate_delimiter(os_);
  97. os_ << "// this table defines the names of the lexer states\n";
  98. os_ << boost::lexer::detail::strings<Char>::char_name()
  99. << " const* const lexer_state_names"
  100. << (name_suffix[0] ? "_" : "") << name_suffix
  101. << "[" << rules_.statemap().size() << "] = \n{\n";
  102. typedef typename reverse_state_map_type::iterator iterator;
  103. iterator rend = reverse_state_map.end();
  104. std::size_t last_id = 0;
  105. for (iterator rit = reverse_state_map.begin(); rit != rend; ++last_id)
  106. {
  107. for (/**/; last_id < (*rit).first; ++last_id)
  108. {
  109. os_ << " 0, // \"<undefined state>\"\n";
  110. }
  111. os_ << " "
  112. << boost::lexer::detail::strings<Char>::char_prefix()
  113. << "\"" << (*rit).second << "\"";
  114. if (++rit != rend)
  115. os_ << ",\n";
  116. else
  117. os_ << "\n"; // don't generate the final comma
  118. }
  119. os_ << "};\n\n";
  120. generate_delimiter(os_);
  121. os_ << "// this variable defines the number of lexer states\n";
  122. os_ << "std::size_t const lexer_state_count"
  123. << (name_suffix[0] ? "_" : "") << name_suffix
  124. << " = " << rules_.statemap().size() << ";\n\n";
  125. return os_.good();
  126. }
  127. template <typename Char>
  128. inline bool
  129. generate_cpp_state_table (std::basic_ostream<Char> &os_
  130. , Char const* name_suffix, bool bol, bool eol)
  131. {
  132. std::basic_string<Char> suffix(L<Char>(name_suffix[0] ? "_" : ""));
  133. suffix += name_suffix;
  134. generate_delimiter(os_);
  135. os_ << "// this defines a generic accessors for the information above\n";
  136. os_ << "struct lexer" << suffix << "\n{\n";
  137. os_ << " // version number and feature-set of compatible static lexer engine\n";
  138. os_ << " enum\n";
  139. os_ << " {\n static_version = " << SPIRIT_STATIC_LEXER_VERSION << ",\n";
  140. os_ << " supports_bol = " << std::boolalpha << bol << ",\n";
  141. os_ << " supports_eol = " << std::boolalpha << eol << "\n";
  142. os_ << " };\n\n";
  143. os_ << " // return the number of lexer states\n";
  144. os_ << " static std::size_t state_count()\n";
  145. os_ << " {\n return lexer_state_count" << suffix << "; \n }\n\n";
  146. os_ << " // return the name of the lexer state as given by 'idx'\n";
  147. os_ << " static " << boost::lexer::detail::strings<Char>::char_name()
  148. << " const* state_name(std::size_t idx)\n";
  149. os_ << " {\n return lexer_state_names" << suffix << "[idx]; \n }\n\n";
  150. os_ << " // return the next matched token\n";
  151. os_ << " template<typename Iterator>\n";
  152. os_ << " static std::size_t next(std::size_t &start_state_, bool& bol_\n";
  153. os_ << " , Iterator &start_token_, Iterator const& end_, std::size_t& unique_id_)\n";
  154. os_ << " {\n return next_token" << suffix
  155. << "(start_state_, bol_, start_token_, end_, unique_id_);\n }\n";
  156. os_ << "};\n\n";
  157. return os_.good();
  158. }
  159. ///////////////////////////////////////////////////////////////////////////
  160. // generate function body based on traversing the DFA tables
  161. template <typename Char>
  162. bool generate_function_body_dfa(std::basic_ostream<Char>& os_
  163. , boost::lexer::basic_state_machine<Char> const &sm_)
  164. {
  165. std::size_t const dfas_ = sm_.data()._dfa->size();
  166. std::size_t const lookups_ = sm_.data()._lookup->front()->size();
  167. os_ << " enum {end_state_index, id_index, unique_id_index, "
  168. "state_index, bol_index,\n";
  169. os_ << " eol_index, dead_state_index, dfa_offset};\n\n";
  170. os_ << " static std::size_t const npos = "
  171. "static_cast<std::size_t>(~0);\n";
  172. if (dfas_ > 1)
  173. {
  174. for (std::size_t state_ = 0; state_ < dfas_; ++state_)
  175. {
  176. std::size_t i_ = 0;
  177. std::size_t j_ = 1;
  178. std::size_t count_ = lookups_ / 8;
  179. std::size_t const* lookup_ = &sm_.data()._lookup[state_]->front();
  180. std::size_t const* dfa_ = &sm_.data()._dfa[state_]->front();
  181. os_ << " static std::size_t const lookup" << state_
  182. << "_[" << lookups_ << "] = {\n ";
  183. for (/**/; i_ < count_; ++i_)
  184. {
  185. std::size_t const index_ = i_ * 8;
  186. os_ << lookup_[index_];
  187. for (/**/; j_ < 8; ++j_)
  188. {
  189. os_ << ", " << lookup_[index_ + j_];
  190. }
  191. if (i_ < count_ - 1)
  192. {
  193. os_ << ",\n ";
  194. }
  195. j_ = 1;
  196. }
  197. os_ << " };\n";
  198. count_ = sm_.data()._dfa[state_]->size ();
  199. os_ << " static const std::size_t dfa" << state_ << "_["
  200. << count_ << "] = {\n ";
  201. count_ /= 8;
  202. for (i_ = 0; i_ < count_; ++i_)
  203. {
  204. std::size_t const index_ = i_ * 8;
  205. os_ << dfa_[index_];
  206. for (j_ = 1; j_ < 8; ++j_)
  207. {
  208. os_ << ", " << dfa_[index_ + j_];
  209. }
  210. if (i_ < count_ - 1)
  211. {
  212. os_ << ",\n ";
  213. }
  214. }
  215. std::size_t const mod_ = sm_.data()._dfa[state_]->size () % 8;
  216. if (mod_)
  217. {
  218. std::size_t const index_ = count_ * 8;
  219. if (count_)
  220. {
  221. os_ << ",\n ";
  222. }
  223. os_ << dfa_[index_];
  224. for (j_ = 1; j_ < mod_; ++j_)
  225. {
  226. os_ << ", " << dfa_[index_ + j_];
  227. }
  228. }
  229. os_ << " };\n";
  230. }
  231. std::size_t count_ = sm_.data()._dfa_alphabet.size();
  232. std::size_t i_ = 1;
  233. os_ << " static std::size_t const* lookup_arr_[" << count_
  234. << "] = { lookup0_";
  235. for (i_ = 1; i_ < count_; ++i_)
  236. {
  237. os_ << ", " << "lookup" << i_ << "_";
  238. }
  239. os_ << " };\n";
  240. os_ << " static std::size_t const dfa_alphabet_arr_["
  241. << count_ << "] = { ";
  242. os_ << sm_.data()._dfa_alphabet.front ();
  243. for (i_ = 1; i_ < count_; ++i_)
  244. {
  245. os_ << ", " << sm_.data()._dfa_alphabet[i_];
  246. }
  247. os_ << " };\n";
  248. os_ << " static std::size_t const* dfa_arr_[" << count_
  249. << "] = { ";
  250. os_ << "dfa0_";
  251. for (i_ = 1; i_ < count_; ++i_)
  252. {
  253. os_ << ", " << "dfa" << i_ << "_";
  254. }
  255. os_ << " };\n";
  256. }
  257. else
  258. {
  259. std::size_t const* lookup_ = &sm_.data()._lookup[0]->front();
  260. std::size_t const* dfa_ = &sm_.data()._dfa[0]->front();
  261. std::size_t i_ = 0;
  262. std::size_t j_ = 1;
  263. std::size_t count_ = lookups_ / 8;
  264. os_ << " static std::size_t const lookup_[";
  265. os_ << sm_.data()._lookup[0]->size() << "] = {\n ";
  266. for (/**/; i_ < count_; ++i_)
  267. {
  268. const std::size_t index_ = i_ * 8;
  269. os_ << lookup_[index_];
  270. for (/**/; j_ < 8; ++j_)
  271. {
  272. os_ << ", " << lookup_[index_ + j_];
  273. }
  274. if (i_ < count_ - 1)
  275. {
  276. os_ << ",\n ";
  277. }
  278. j_ = 1;
  279. }
  280. os_ << " };\n";
  281. os_ << " static std::size_t const dfa_alphabet_ = "
  282. << sm_.data()._dfa_alphabet.front () << ";\n";
  283. os_ << " static std::size_t const dfa_["
  284. << sm_.data()._dfa[0]->size () << "] = {\n ";
  285. count_ = sm_.data()._dfa[0]->size () / 8;
  286. for (i_ = 0; i_ < count_; ++i_)
  287. {
  288. const std::size_t index_ = i_ * 8;
  289. os_ << dfa_[index_];
  290. for (j_ = 1; j_ < 8; ++j_)
  291. {
  292. os_ << ", " << dfa_[index_ + j_];
  293. }
  294. if (i_ < count_ - 1)
  295. {
  296. os_ << ",\n ";
  297. }
  298. }
  299. const std::size_t mod_ = sm_.data()._dfa[0]->size () % 8;
  300. if (mod_)
  301. {
  302. const std::size_t index_ = count_ * 8;
  303. if (count_)
  304. {
  305. os_ << ",\n ";
  306. }
  307. os_ << dfa_[index_];
  308. for (j_ = 1; j_ < mod_; ++j_)
  309. {
  310. os_ << ", " << dfa_[index_ + j_];
  311. }
  312. }
  313. os_ << " };\n";
  314. }
  315. os_ << "\n if (start_token_ == end_)\n";
  316. os_ << " {\n";
  317. os_ << " unique_id_ = npos;\n";
  318. os_ << " return 0;\n";
  319. os_ << " }\n\n";
  320. if (sm_.data()._seen_BOL_assertion)
  321. {
  322. os_ << " bool bol = bol_;\n\n";
  323. }
  324. if (dfas_ > 1)
  325. {
  326. os_ << "again:\n";
  327. os_ << " std::size_t const* lookup_ = lookup_arr_[start_state_];\n";
  328. os_ << " std::size_t dfa_alphabet_ = dfa_alphabet_arr_[start_state_];\n";
  329. os_ << " std::size_t const*dfa_ = dfa_arr_[start_state_];\n";
  330. }
  331. os_ << " std::size_t const* ptr_ = dfa_ + dfa_alphabet_;\n";
  332. os_ << " Iterator curr_ = start_token_;\n";
  333. os_ << " bool end_state_ = *ptr_ != 0;\n";
  334. os_ << " std::size_t id_ = *(ptr_ + id_index);\n";
  335. os_ << " std::size_t uid_ = *(ptr_ + unique_id_index);\n";
  336. if (dfas_ > 1)
  337. {
  338. os_ << " std::size_t end_start_state_ = start_state_;\n";
  339. }
  340. if (sm_.data()._seen_BOL_assertion)
  341. {
  342. os_ << " bool end_bol_ = bol_;\n";
  343. }
  344. os_ << " Iterator end_token_ = start_token_;\n\n";
  345. os_ << " while (curr_ != end_)\n";
  346. os_ << " {\n";
  347. if (sm_.data()._seen_BOL_assertion)
  348. {
  349. os_ << " std::size_t const BOL_state_ = ptr_[bol_index];\n\n";
  350. }
  351. if (sm_.data()._seen_EOL_assertion)
  352. {
  353. os_ << " std::size_t const EOL_state_ = ptr_[eol_index];\n\n";
  354. }
  355. if (sm_.data()._seen_BOL_assertion && sm_.data()._seen_EOL_assertion)
  356. {
  357. os_ << " if (BOL_state_ && bol)\n";
  358. os_ << " {\n";
  359. os_ << " ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];\n";
  360. os_ << " }\n";
  361. os_ << " else if (EOL_state_ && *curr_ == '\\n')\n";
  362. os_ << " {\n";
  363. os_ << " ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];\n";
  364. os_ << " }\n";
  365. os_ << " else\n";
  366. os_ << " {\n";
  367. if (lookups_ == 256)
  368. {
  369. os_ << " unsigned char index = \n";
  370. os_ << " static_cast<unsigned char>(*curr_++);\n";
  371. }
  372. else
  373. {
  374. os_ << " std::size_t index = *curr_++\n";
  375. }
  376. os_ << " bol = (index == '\\n') ? true : false;\n";
  377. os_ << " std::size_t const state_ = ptr_[\n";
  378. os_ << " lookup_[static_cast<std::size_t>(index)]];\n";
  379. os_ << '\n';
  380. os_ << " if (state_ == 0) break;\n";
  381. os_ << '\n';
  382. os_ << " ptr_ = &dfa_[state_ * dfa_alphabet_];\n";
  383. os_ << " }\n\n";
  384. }
  385. else if (sm_.data()._seen_BOL_assertion)
  386. {
  387. os_ << " if (BOL_state_ && bol)\n";
  388. os_ << " {\n";
  389. os_ << " ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];\n";
  390. os_ << " }\n";
  391. os_ << " else\n";
  392. os_ << " {\n";
  393. if (lookups_ == 256)
  394. {
  395. os_ << " unsigned char index = \n";
  396. os_ << " static_cast<unsigned char>(*curr_++);\n";
  397. }
  398. else
  399. {
  400. os_ << " std::size_t index = *curr_++\n";
  401. }
  402. os_ << " bol = (index == '\\n') ? true : false;\n";
  403. os_ << " std::size_t const state_ = ptr_[\n";
  404. os_ << " lookup_[static_cast<std::size_t>(index)]];\n";
  405. os_ << '\n';
  406. os_ << " if (state_ == 0) break;\n";
  407. os_ << '\n';
  408. os_ << " ptr_ = &dfa_[state_ * dfa_alphabet_];\n";
  409. os_ << " }\n\n";
  410. }
  411. else if (sm_.data()._seen_EOL_assertion)
  412. {
  413. os_ << " if (EOL_state_ && *curr_ == '\\n')\n";
  414. os_ << " {\n";
  415. os_ << " ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];\n";
  416. os_ << " }\n";
  417. os_ << " else\n";
  418. os_ << " {\n";
  419. if (lookups_ == 256)
  420. {
  421. os_ << " unsigned char index = \n";
  422. os_ << " static_cast<unsigned char>(*curr_++);\n";
  423. }
  424. else
  425. {
  426. os_ << " std::size_t index = *curr_++\n";
  427. }
  428. os_ << " bol = (index == '\\n') ? true : false;\n";
  429. os_ << " std::size_t const state_ = ptr_[\n";
  430. os_ << " lookup_[static_cast<std::size_t>(index)]];\n";
  431. os_ << '\n';
  432. os_ << " if (state_ == 0) break;\n";
  433. os_ << '\n';
  434. os_ << " ptr_ = &dfa_[state_ * dfa_alphabet_];\n";
  435. os_ << " }\n\n";
  436. }
  437. else
  438. {
  439. os_ << " std::size_t const state_ =\n";
  440. if (lookups_ == 256)
  441. {
  442. os_ << " ptr_[lookup_["
  443. "static_cast<unsigned char>(*curr_++)]];\n";
  444. }
  445. else
  446. {
  447. os_ << " ptr_[lookup_[*curr_++]];\n";
  448. }
  449. os_ << '\n';
  450. os_ << " if (state_ == 0) break;\n";
  451. os_ << '\n';
  452. os_ << " ptr_ = &dfa_[state_ * dfa_alphabet_];\n\n";
  453. }
  454. os_ << " if (*ptr_)\n";
  455. os_ << " {\n";
  456. os_ << " end_state_ = true;\n";
  457. os_ << " id_ = *(ptr_ + id_index);\n";
  458. os_ << " uid_ = *(ptr_ + unique_id_index);\n";
  459. if (dfas_ > 1)
  460. {
  461. os_ << " end_start_state_ = *(ptr_ + state_index);\n";
  462. }
  463. if (sm_.data()._seen_BOL_assertion)
  464. {
  465. os_ << " end_bol_ = bol;\n";
  466. }
  467. os_ << " end_token_ = curr_;\n";
  468. os_ << " }\n";
  469. os_ << " }\n\n";
  470. if (sm_.data()._seen_EOL_assertion)
  471. {
  472. os_ << " std::size_t const EOL_state_ = ptr_[eol_index];\n\n";
  473. os_ << " if (EOL_state_ && curr_ == end_)\n";
  474. os_ << " {\n";
  475. os_ << " ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];\n\n";
  476. os_ << " if (*ptr_)\n";
  477. os_ << " {\n";
  478. os_ << " end_state_ = true;\n";
  479. os_ << " id_ = *(ptr_ + id_index);\n";
  480. os_ << " uid_ = *(ptr_ + unique_id_index);\n";
  481. if (dfas_ > 1)
  482. {
  483. os_ << " end_start_state_ = *(ptr_ + state_index);\n";
  484. }
  485. if (sm_.data()._seen_BOL_assertion)
  486. {
  487. os_ << " end_bol_ = bol;\n";
  488. }
  489. os_ << " end_token_ = curr_;\n";
  490. os_ << " }\n";
  491. os_ << " }\n\n";
  492. }
  493. os_ << " if (end_state_)\n";
  494. os_ << " {\n";
  495. os_ << " // return longest match\n";
  496. os_ << " start_token_ = end_token_;\n";
  497. if (dfas_ > 1)
  498. {
  499. os_ << " start_state_ = end_start_state_;\n";
  500. os_ << " if (id_ == 0)\n";
  501. os_ << " {\n";
  502. if (sm_.data()._seen_BOL_assertion)
  503. {
  504. os_ << " bol = end_bol_;\n";
  505. }
  506. os_ << " goto again;\n";
  507. os_ << " }\n";
  508. if (sm_.data()._seen_BOL_assertion)
  509. {
  510. os_ << " else\n";
  511. os_ << " {\n";
  512. os_ << " bol_ = end_bol_;\n";
  513. os_ << " }\n";
  514. }
  515. }
  516. else if (sm_.data()._seen_BOL_assertion)
  517. {
  518. os_ << " bol_ = end_bol_;\n";
  519. }
  520. os_ << " }\n";
  521. os_ << " else\n";
  522. os_ << " {\n";
  523. if (sm_.data()._seen_BOL_assertion)
  524. {
  525. os_ << " bol_ = (*start_token_ == '\\n') ? true : false;\n";
  526. }
  527. os_ << " id_ = npos;\n";
  528. os_ << " uid_ = npos;\n";
  529. os_ << " }\n\n";
  530. os_ << " unique_id_ = uid_;\n";
  531. os_ << " return id_;\n";
  532. return os_.good();
  533. }
  534. ///////////////////////////////////////////////////////////////////////////
  535. template <typename Char>
  536. inline std::basic_string<Char> get_charlit(Char ch)
  537. {
  538. std::basic_string<Char> result;
  539. boost::lexer::basic_string_token<Char>::escape_char(ch, result);
  540. return result;
  541. }
  542. // check whether state0_0 is referenced from any of the other states
  543. template <typename Char>
  544. bool need_label0_0(boost::lexer::basic_state_machine<Char> const &sm_)
  545. {
  546. typedef typename boost::lexer::basic_state_machine<Char>::iterator
  547. iterator_type;
  548. iterator_type iter_ = sm_.begin();
  549. std::size_t const states_ = iter_->states;
  550. for (std::size_t state_ = 0; state_ < states_; ++state_)
  551. {
  552. if (0 == iter_->bol_index || 0 == iter_->eol_index)
  553. {
  554. return true;
  555. }
  556. std::size_t const transitions_ = iter_->transitions;
  557. for (std::size_t t_ = 0; t_ < transitions_; ++t_)
  558. {
  559. if (0 == iter_->goto_state)
  560. {
  561. return true;
  562. }
  563. ++iter_;
  564. }
  565. if (transitions_ == 0) ++iter_;
  566. }
  567. return false;
  568. }
  569. ///////////////////////////////////////////////////////////////////////////
  570. template <typename Char>
  571. bool generate_function_body_switch(std::basic_ostream<Char> & os_
  572. , boost::lexer::basic_state_machine<Char> const &sm_)
  573. {
  574. typedef typename boost::lexer::basic_state_machine<Char>::iterator
  575. iterator_type;
  576. std::size_t const lookups_ = sm_.data()._lookup->front ()->size ();
  577. iterator_type iter_ = sm_.begin();
  578. iterator_type labeliter_ = iter_;
  579. iterator_type end_ = sm_.end();
  580. std::size_t const dfas_ = sm_.data()._dfa->size ();
  581. os_ << " static std::size_t const npos = "
  582. "static_cast<std::size_t>(~0);\n";
  583. os_ << "\n if (start_token_ == end_)\n";
  584. os_ << " {\n";
  585. os_ << " unique_id_ = npos;\n";
  586. os_ << " return 0;\n";
  587. os_ << " }\n\n";
  588. if (sm_.data()._seen_BOL_assertion)
  589. {
  590. os_ << " bool bol = bol_;\n";
  591. }
  592. if (dfas_ > 1)
  593. {
  594. os_ << "again:\n";
  595. }
  596. os_ << " Iterator curr_ = start_token_;\n";
  597. os_ << " bool end_state_ = false;\n";
  598. os_ << " std::size_t id_ = npos;\n";
  599. os_ << " std::size_t uid_ = npos;\n";
  600. if (dfas_ > 1)
  601. {
  602. os_ << " std::size_t end_start_state_ = start_state_;\n";
  603. }
  604. if (sm_.data()._seen_BOL_assertion)
  605. {
  606. os_ << " bool end_bol_ = bol_;\n";
  607. }
  608. os_ << " Iterator end_token_ = start_token_;\n";
  609. os_ << '\n';
  610. os_ << " " << ((lookups_ == 256) ? "char" : "wchar_t")
  611. << " ch_ = 0;\n\n";
  612. if (dfas_ > 1)
  613. {
  614. os_ << " switch (start_state_)\n";
  615. os_ << " {\n";
  616. for (std::size_t i_ = 0; i_ < dfas_; ++i_)
  617. {
  618. os_ << " case " << i_ << ":\n";
  619. os_ << " goto state" << i_ << "_0;\n";
  620. os_ << " break;\n";
  621. }
  622. os_ << " default:\n";
  623. os_ << " goto end;\n";
  624. os_ << " break;\n";
  625. os_ << " }\n";
  626. }
  627. bool need_state0_0_label = need_label0_0(sm_);
  628. for (std::size_t dfa_ = 0; dfa_ < dfas_; ++dfa_)
  629. {
  630. std::size_t const states_ = iter_->states;
  631. for (std::size_t state_ = 0; state_ < states_; ++state_)
  632. {
  633. std::size_t const transitions_ = iter_->transitions;
  634. std::size_t t_ = 0;
  635. if (dfas_ > 1 || dfa_ != 0 || state_ != 0 || need_state0_0_label)
  636. {
  637. os_ << "\nstate" << dfa_ << '_' << state_ << ":\n";
  638. }
  639. if (iter_->end_state)
  640. {
  641. os_ << " end_state_ = true;\n";
  642. os_ << " id_ = " << iter_->id << ";\n";
  643. os_ << " uid_ = " << iter_->unique_id << ";\n";
  644. os_ << " end_token_ = curr_;\n";
  645. if (dfas_ > 1)
  646. {
  647. os_ << " end_start_state_ = " << iter_->goto_dfa <<
  648. ";\n";
  649. }
  650. if (sm_.data()._seen_BOL_assertion)
  651. {
  652. os_ << " end_bol_ = bol;\n";
  653. }
  654. if (transitions_) os_ << '\n';
  655. }
  656. if (t_ < transitions_ ||
  657. iter_->bol_index != boost::lexer::npos ||
  658. iter_->eol_index != boost::lexer::npos)
  659. {
  660. os_ << " if (curr_ == end_) goto end;\n";
  661. os_ << " ch_ = *curr_;\n";
  662. if (iter_->bol_index != boost::lexer::npos)
  663. {
  664. os_ << "\n if (bol) goto state" << dfa_ << '_'
  665. << iter_->bol_index << ";\n";
  666. }
  667. if (iter_->eol_index != boost::lexer::npos)
  668. {
  669. os_ << "\n if (ch_ == '\\n') goto state" << dfa_
  670. << '_' << iter_->eol_index << ";\n";
  671. }
  672. os_ << " ++curr_;\n";
  673. }
  674. for (/**/; t_ < transitions_; ++t_)
  675. {
  676. Char const *ptr_ = iter_->token._charset.c_str();
  677. Char const *end_ = ptr_ + iter_->token._charset.size();
  678. Char start_char_ = 0;
  679. Char curr_char_ = 0;
  680. bool range_ = false;
  681. bool first_char_ = true;
  682. os_ << "\n if (";
  683. while (ptr_ != end_)
  684. {
  685. curr_char_ = *ptr_++;
  686. if (*ptr_ == curr_char_ + 1)
  687. {
  688. if (!range_)
  689. {
  690. start_char_ = curr_char_;
  691. }
  692. range_ = true;
  693. }
  694. else
  695. {
  696. if (!first_char_)
  697. {
  698. os_ << ((iter_->token._negated) ? " && " : " || ");
  699. }
  700. else
  701. {
  702. first_char_ = false;
  703. }
  704. if (range_)
  705. {
  706. if (iter_->token._negated)
  707. {
  708. os_ << "!";
  709. }
  710. os_ << "(ch_ >= '" << get_charlit(start_char_)
  711. << "' && ch_ <= '"
  712. << get_charlit(curr_char_) << "')";
  713. range_ = false;
  714. }
  715. else
  716. {
  717. os_ << "ch_ "
  718. << ((iter_->token._negated) ? "!=" : "==")
  719. << " '" << get_charlit(curr_char_) << "'";
  720. }
  721. }
  722. }
  723. os_ << ") goto state" << dfa_ << '_' << iter_->goto_state
  724. << ";\n";
  725. ++iter_;
  726. }
  727. if (!(dfa_ == dfas_ - 1 && state_ == states_ - 1))
  728. {
  729. os_ << " goto end;\n";
  730. }
  731. if (transitions_ == 0) ++iter_;
  732. }
  733. }
  734. os_ << "\nend:\n";
  735. os_ << " if (end_state_)\n";
  736. os_ << " {\n";
  737. os_ << " // return longest match\n";
  738. os_ << " start_token_ = end_token_;\n";
  739. if (dfas_ > 1)
  740. {
  741. os_ << " start_state_ = end_start_state_;\n";
  742. os_ << "\n if (id_ == 0)\n";
  743. os_ << " {\n";
  744. if (sm_.data()._seen_BOL_assertion)
  745. {
  746. os_ << " bol = end_bol_;\n";
  747. }
  748. os_ << " goto again;\n";
  749. os_ << " }\n";
  750. if (sm_.data()._seen_BOL_assertion)
  751. {
  752. os_ << " else\n";
  753. os_ << " {\n";
  754. os_ << " bol_ = end_bol_;\n";
  755. os_ << " }\n";
  756. }
  757. }
  758. else if (sm_.data()._seen_BOL_assertion)
  759. {
  760. os_ << " bol_ = end_bol_;\n";
  761. }
  762. os_ << " }\n";
  763. os_ << " else\n";
  764. os_ << " {\n";
  765. if (sm_.data()._seen_BOL_assertion)
  766. {
  767. os_ << " bol_ = (*start_token_ == '\\n') ? true : false;\n";
  768. }
  769. os_ << " id_ = npos;\n";
  770. os_ << " uid_ = npos;\n";
  771. os_ << " }\n\n";
  772. os_ << " unique_id_ = uid_;\n";
  773. os_ << " return id_;\n";
  774. return os_.good();
  775. }
  776. ///////////////////////////////////////////////////////////////////////////
  777. // Generate a tokenizer for the given state machine.
  778. template <typename Char, typename F>
  779. inline bool
  780. generate_cpp (boost::lexer::basic_state_machine<Char> const& sm_
  781. , boost::lexer::basic_rules<Char> const& rules_
  782. , std::basic_ostream<Char> &os_, Char const* name_suffix
  783. , F generate_function_body)
  784. {
  785. if (sm_.data()._lookup->empty())
  786. return false;
  787. std::size_t const dfas_ = sm_.data()._dfa->size();
  788. // std::size_t const lookups_ = sm_.data()._lookup->front()->size();
  789. os_ << "// Copyright (c) 2008-2009 Ben Hanson\n";
  790. os_ << "// Copyright (c) 2008-2011 Hartmut Kaiser\n";
  791. os_ << "//\n";
  792. os_ << "// Distributed under the Boost Software License, "
  793. "Version 1.0. (See accompanying\n";
  794. os_ << "// file licence_1_0.txt or copy at "
  795. "http://www.boost.org/LICENSE_1_0.txt)\n\n";
  796. os_ << "// Auto-generated by boost::lexer, do not edit\n\n";
  797. std::basic_string<Char> guard(name_suffix);
  798. guard += L<Char>(name_suffix[0] ? "_" : "");
  799. guard += L<Char>(__DATE__ "_" __TIME__);
  800. typename std::basic_string<Char>::size_type p =
  801. guard.find_first_of(L<Char>(": "));
  802. while (std::string::npos != p)
  803. {
  804. guard.replace(p, 1, L<Char>("_"));
  805. p = guard.find_first_of(L<Char>(": "), p);
  806. }
  807. boost::to_upper(guard);
  808. os_ << "#if !defined(BOOST_SPIRIT_LEXER_NEXT_TOKEN_" << guard << ")\n";
  809. os_ << "#define BOOST_SPIRIT_LEXER_NEXT_TOKEN_" << guard << "\n\n";
  810. os_ << "#include <boost/spirit/home/support/detail/lexer/char_traits.hpp>\n\n";
  811. generate_delimiter(os_);
  812. os_ << "// the generated table of state names and the tokenizer have to be\n"
  813. "// defined in the boost::spirit::lex::lexertl::static_ namespace\n";
  814. os_ << "namespace boost { namespace spirit { namespace lex { "
  815. "namespace lexertl { namespace static_ {\n\n";
  816. // generate the lexer state information variables
  817. if (!generate_cpp_state_info(rules_, os_, name_suffix))
  818. return false;
  819. generate_delimiter(os_);
  820. os_ << "// this function returns the next matched token\n";
  821. os_ << "template<typename Iterator>\n";
  822. os_ << "std::size_t next_token" << (name_suffix[0] ? "_" : "")
  823. << name_suffix << " (";
  824. if (dfas_ > 1)
  825. {
  826. os_ << "std::size_t& start_state_, ";
  827. }
  828. else
  829. {
  830. os_ << "std::size_t& /*start_state_*/, ";
  831. }
  832. if (sm_.data()._seen_BOL_assertion)
  833. {
  834. os_ << "bool& bol_, ";
  835. }
  836. else
  837. {
  838. os_ << "bool& /*bol_*/, ";
  839. }
  840. os_ << "\n ";
  841. os_ << "Iterator &start_token_, Iterator const& end_, ";
  842. os_ << "std::size_t& unique_id_)\n";
  843. os_ << "{\n";
  844. if (!generate_function_body(os_, sm_))
  845. return false;
  846. os_ << "}\n\n";
  847. if (!generate_cpp_state_table<Char>(os_, name_suffix
  848. , sm_.data()._seen_BOL_assertion, sm_.data()._seen_EOL_assertion))
  849. {
  850. return false;
  851. }
  852. os_ << "}}}}} // namespace boost::spirit::lex::lexertl::static_\n\n";
  853. os_ << "#endif\n";
  854. return os_.good();
  855. }
  856. } // namespace detail
  857. ///////////////////////////////////////////////////////////////////////////
  858. template <typename Lexer, typename F>
  859. inline bool
  860. generate_static(Lexer const& lexer
  861. , std::basic_ostream<typename Lexer::char_type>& os
  862. , typename Lexer::char_type const* name_suffix, F f)
  863. {
  864. if (!lexer.init_dfa(true)) // always minimize DFA for static lexers
  865. return false;
  866. return detail::generate_cpp(lexer.state_machine_, lexer.rules_, os
  867. , name_suffix, f);
  868. }
  869. ///////////////////////////////////////////////////////////////////////////
  870. // deprecated function, will be removed in the future (this has been
  871. // replaced by the function generate_static_dfa - see below).
  872. template <typename Lexer>
  873. inline bool
  874. generate_static(Lexer const& lexer
  875. , std::basic_ostream<typename Lexer::char_type>& os
  876. , typename Lexer::char_type const* name_suffix =
  877. detail::L<typename Lexer::char_type>())
  878. {
  879. return generate_static(lexer, os, name_suffix
  880. , &detail::generate_function_body_dfa<typename Lexer::char_type>);
  881. }
  882. ///////////////////////////////////////////////////////////////////////////
  883. template <typename Lexer>
  884. inline bool
  885. generate_static_dfa(Lexer const& lexer
  886. , std::basic_ostream<typename Lexer::char_type>& os
  887. , typename Lexer::char_type const* name_suffix =
  888. detail::L<typename Lexer::char_type>())
  889. {
  890. return generate_static(lexer, os, name_suffix
  891. , &detail::generate_function_body_dfa<typename Lexer::char_type>);
  892. }
  893. ///////////////////////////////////////////////////////////////////////////
  894. template <typename Lexer>
  895. inline bool
  896. generate_static_switch(Lexer const& lexer
  897. , std::basic_ostream<typename Lexer::char_type>& os
  898. , typename Lexer::char_type const* name_suffix =
  899. detail::L<typename Lexer::char_type>())
  900. {
  901. return generate_static(lexer, os, name_suffix
  902. , &detail::generate_function_body_switch<typename Lexer::char_type>);
  903. }
  904. ///////////////////////////////////////////////////////////////////////////////
  905. }}}}
  906. #endif