parsing.hpp 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564
  1. // ----------------------------------------------------------------------------
  2. // parsing.hpp : implementation of the parsing member functions
  3. // ( parse, parse_printf_directive)
  4. // ----------------------------------------------------------------------------
  5. // Copyright Samuel Krempp 2003. Use, modification, and distribution are
  6. // subject to the Boost Software License, Version 1.0. (See accompanying
  7. // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  8. // see http://www.boost.org/libs/format for library home page
  9. // ----------------------------------------------------------------------------
  10. #ifndef BOOST_FORMAT_PARSING_HPP
  11. #define BOOST_FORMAT_PARSING_HPP
  12. #include <boost/format/format_class.hpp>
  13. #include <boost/format/exceptions.hpp>
  14. #include <boost/throw_exception.hpp>
  15. #include <boost/assert.hpp>
  16. #include <boost/config.hpp>
  17. #include <boost/core/ignore_unused.hpp>
  18. namespace boost {
  19. namespace io {
  20. namespace detail {
  21. #if defined(BOOST_NO_STD_LOCALE)
  22. // streams will be used for narrow / widen. but these methods are not const
  23. template<class T>
  24. T& const_or_not(const T& x) {
  25. return const_cast<T&> (x);
  26. }
  27. #else
  28. template<class T>
  29. const T& const_or_not(const T& x) {
  30. return x;
  31. }
  32. #endif
  33. template<class Ch, class Facet> inline
  34. char wrap_narrow(const Facet& fac, Ch c, char deflt) {
  35. return const_or_not(fac).narrow(c, deflt);
  36. }
  37. template<class Ch, class Facet> inline
  38. bool wrap_isdigit(const Facet& fac, Ch c) {
  39. #if ! defined( BOOST_NO_LOCALE_ISDIGIT )
  40. return fac.is(std::ctype<Ch>::digit, c);
  41. # else
  42. ignore_unused(fac);
  43. using namespace std;
  44. return isdigit(c) != 0;
  45. #endif
  46. }
  47. template<class Iter, class Facet>
  48. Iter wrap_scan_notdigit(const Facet & fac, Iter beg, Iter end) {
  49. using namespace std;
  50. for( ; beg!=end && wrap_isdigit(fac, *beg); ++beg) ;
  51. return beg;
  52. }
  53. // Input : [start, last) iterators range and a
  54. // a Facet to use its widen/narrow member function
  55. // Effects : read sequence and convert digits into integral n, of type Res
  56. // Returns : n
  57. template<class Res, class Iter, class Facet>
  58. Iter str2int (const Iter & start, const Iter & last, Res & res,
  59. const Facet& fac)
  60. {
  61. using namespace std;
  62. Iter it;
  63. res=0;
  64. for(it=start; it != last && wrap_isdigit(fac, *it); ++it ) {
  65. char cur_ch = wrap_narrow(fac, *it, 0); // cant fail.
  66. res *= 10;
  67. res += cur_ch - '0'; // 22.2.1.1.2.13 of the C++ standard
  68. }
  69. return it;
  70. }
  71. // auxiliary func called by parse_printf_directive
  72. // for centralising error handling
  73. // it either throws if user sets the corresponding flag, or does nothing.
  74. inline void maybe_throw_exception(unsigned char exceptions,
  75. std::size_t pos, std::size_t size)
  76. {
  77. if(exceptions & io::bad_format_string_bit)
  78. boost::throw_exception(io::bad_format_string(pos, size) );
  79. }
  80. // Input: the position of a printf-directive in the format-string
  81. // a basic_ios& merely to use its widen/narrow member function
  82. // a bitset'exceptions' telling whether to throw exceptions on errors.
  83. // Returns:
  84. // true if parse succeeded (ignore some errors if exceptions disabled)
  85. // false if it failed so bad that the directive should be printed verbatim
  86. // Effects:
  87. // start is incremented so that *start is the first char after
  88. // this directive
  89. // *fpar is set with the parameters read in the directive
  90. template<class Ch, class Tr, class Alloc, class Iter, class Facet>
  91. bool parse_printf_directive(Iter & start, const Iter& last,
  92. detail::format_item<Ch, Tr, Alloc> * fpar,
  93. const Facet& fac,
  94. std::size_t offset, unsigned char exceptions)
  95. {
  96. typedef typename basic_format<Ch, Tr, Alloc>::format_item_t format_item_t;
  97. fpar->argN_ = format_item_t::argN_no_posit; // if no positional-directive
  98. bool precision_set = false;
  99. bool in_brackets=false;
  100. Iter start0 = start;
  101. std::size_t fstring_size = last-start0+offset;
  102. char mssiz = 0;
  103. if(start>= last) { // empty directive : this is a trailing %
  104. maybe_throw_exception(exceptions, start-start0 + offset, fstring_size);
  105. return false;
  106. }
  107. if(*start== const_or_not(fac).widen( '|')) {
  108. in_brackets=true;
  109. if( ++start >= last ) {
  110. maybe_throw_exception(exceptions, start-start0 + offset, fstring_size);
  111. return false;
  112. }
  113. }
  114. // the flag '0' would be picked as a digit for argument order, but here it's a flag :
  115. if(*start== const_or_not(fac).widen( '0'))
  116. goto parse_flags;
  117. // handle argument order (%2$d) or possibly width specification: %2d
  118. if(wrap_isdigit(fac, *start)) {
  119. int n;
  120. start = str2int(start, last, n, fac);
  121. if( start >= last ) {
  122. maybe_throw_exception(exceptions, start-start0+offset, fstring_size);
  123. return false;
  124. }
  125. // %N% case : this is already the end of the directive
  126. if( *start == const_or_not(fac).widen( '%') ) {
  127. fpar->argN_ = n-1;
  128. ++start;
  129. if( in_brackets)
  130. maybe_throw_exception(exceptions, start-start0+offset, fstring_size);
  131. return true;
  132. }
  133. if ( *start== const_or_not(fac).widen( '$') ) {
  134. fpar->argN_ = n-1;
  135. ++start;
  136. }
  137. else {
  138. // non-positional directive
  139. fpar->fmtstate_.width_ = n;
  140. fpar->argN_ = format_item_t::argN_no_posit;
  141. goto parse_precision;
  142. }
  143. }
  144. parse_flags:
  145. // handle flags
  146. while (start != last) { // as long as char is one of + - = _ # 0 or ' '
  147. switch ( wrap_narrow(fac, *start, 0)) {
  148. case '\'':
  149. break; // no effect yet. (painful to implement)
  150. case '-':
  151. fpar->fmtstate_.flags_ |= std::ios_base::left;
  152. break;
  153. case '=':
  154. fpar->pad_scheme_ |= format_item_t::centered;
  155. break;
  156. case '_':
  157. fpar->fmtstate_.flags_ |= std::ios_base::internal;
  158. break;
  159. case ' ':
  160. fpar->pad_scheme_ |= format_item_t::spacepad;
  161. break;
  162. case '+':
  163. fpar->fmtstate_.flags_ |= std::ios_base::showpos;
  164. break;
  165. case '0':
  166. fpar->pad_scheme_ |= format_item_t::zeropad;
  167. // need to know alignment before really setting flags,
  168. // so just add 'zeropad' flag for now, it will be processed later.
  169. break;
  170. case '#':
  171. fpar->fmtstate_.flags_ |= std::ios_base::showpoint | std::ios_base::showbase;
  172. break;
  173. default:
  174. goto parse_width;
  175. }
  176. ++start;
  177. } // loop on flag.
  178. if( start>=last) {
  179. maybe_throw_exception(exceptions, start-start0+offset, fstring_size);
  180. return true;
  181. }
  182. // first skip 'asterisk fields' : * or num (length)
  183. parse_width:
  184. if(*start == const_or_not(fac).widen( '*') )
  185. ++start;
  186. else if(start!=last && wrap_isdigit(fac, *start))
  187. start = str2int(start, last, fpar->fmtstate_.width_, fac);
  188. parse_precision:
  189. if( start>= last) {
  190. maybe_throw_exception(exceptions, start-start0+offset, fstring_size);
  191. return true;
  192. }
  193. // handle precision spec
  194. if (*start== const_or_not(fac).widen( '.')) {
  195. ++start;
  196. if(start != last && *start == const_or_not(fac).widen( '*') )
  197. ++start;
  198. else if(start != last && wrap_isdigit(fac, *start)) {
  199. start = str2int(start, last, fpar->fmtstate_.precision_, fac);
  200. precision_set = true;
  201. }
  202. else
  203. fpar->fmtstate_.precision_ =0;
  204. }
  205. // argument type modifiers
  206. while (start != last) {
  207. switch (wrap_narrow(fac, *start, 0)) {
  208. case 'h':
  209. case 'l':
  210. case 'j':
  211. case 'z':
  212. case 'L':
  213. // boost::format ignores argument type modifiers as it relies on
  214. // the type of the argument fed into it by operator %
  215. break;
  216. // Note that the ptrdiff_t argument type 't' from C++11 is not honored
  217. // because it was already in use as the tabulation specifier in boost::format
  218. // case 't':
  219. // Microsoft extensions:
  220. // https://msdn.microsoft.com/en-us/library/tcxf1dw6.aspx
  221. case 'w':
  222. break;
  223. case 'I':
  224. mssiz = 'I';
  225. break;
  226. case '3':
  227. if (mssiz != 'I') {
  228. maybe_throw_exception(exceptions, start - start0 + offset, fstring_size);
  229. return true;
  230. }
  231. mssiz = '3';
  232. break;
  233. case '2':
  234. if (mssiz != '3') {
  235. maybe_throw_exception(exceptions, start - start0 + offset, fstring_size);
  236. return true;
  237. }
  238. mssiz = 0x00;
  239. break;
  240. case '6':
  241. if (mssiz != 'I') {
  242. maybe_throw_exception(exceptions, start - start0 + offset, fstring_size);
  243. return true;
  244. }
  245. mssiz = '6';
  246. break;
  247. case '4':
  248. if (mssiz != '6') {
  249. maybe_throw_exception(exceptions, start - start0 + offset, fstring_size);
  250. return true;
  251. }
  252. mssiz = 0x00;
  253. break;
  254. default:
  255. if (mssiz && mssiz == 'I') {
  256. mssiz = 0;
  257. }
  258. goto parse_conversion_specification;
  259. }
  260. ++start;
  261. } // loop on argument type modifiers to pick up 'hh', 'll', and the more complex microsoft ones
  262. parse_conversion_specification:
  263. if (start >= last || mssiz) {
  264. maybe_throw_exception(exceptions, start - start0 + offset, fstring_size);
  265. return true;
  266. }
  267. if( in_brackets && *start== const_or_not(fac).widen( '|') ) {
  268. ++start;
  269. return true;
  270. }
  271. // The default flags are "dec" and "skipws"
  272. // so if changing the base, need to unset basefield first
  273. switch (wrap_narrow(fac, *start, 0))
  274. {
  275. // Boolean
  276. case 'b':
  277. fpar->fmtstate_.flags_ |= std::ios_base::boolalpha;
  278. break;
  279. // Decimal
  280. case 'u':
  281. case 'd':
  282. case 'i':
  283. // Defaults are sufficient
  284. break;
  285. // Hex
  286. case 'X':
  287. fpar->fmtstate_.flags_ |= std::ios_base::uppercase;
  288. BOOST_FALLTHROUGH;
  289. case 'x':
  290. case 'p': // pointer => set hex.
  291. fpar->fmtstate_.flags_ &= ~std::ios_base::basefield;
  292. fpar->fmtstate_.flags_ |= std::ios_base::hex;
  293. break;
  294. // Octal
  295. case 'o':
  296. fpar->fmtstate_.flags_ &= ~std::ios_base::basefield;
  297. fpar->fmtstate_.flags_ |= std::ios_base::oct;
  298. break;
  299. // Floating
  300. case 'A':
  301. fpar->fmtstate_.flags_ |= std::ios_base::uppercase;
  302. BOOST_FALLTHROUGH;
  303. case 'a':
  304. fpar->fmtstate_.flags_ &= ~std::ios_base::basefield;
  305. fpar->fmtstate_.flags_ |= std::ios_base::fixed;
  306. fpar->fmtstate_.flags_ |= std::ios_base::scientific;
  307. break;
  308. case 'E':
  309. fpar->fmtstate_.flags_ |= std::ios_base::uppercase;
  310. BOOST_FALLTHROUGH;
  311. case 'e':
  312. fpar->fmtstate_.flags_ |= std::ios_base::scientific;
  313. break;
  314. case 'F':
  315. fpar->fmtstate_.flags_ |= std::ios_base::uppercase;
  316. BOOST_FALLTHROUGH;
  317. case 'f':
  318. fpar->fmtstate_.flags_ |= std::ios_base::fixed;
  319. break;
  320. case 'G':
  321. fpar->fmtstate_.flags_ |= std::ios_base::uppercase;
  322. BOOST_FALLTHROUGH;
  323. case 'g':
  324. // default flags are correct here
  325. break;
  326. // Tabulation (a boost::format extension)
  327. case 'T':
  328. ++start;
  329. if( start >= last) {
  330. maybe_throw_exception(exceptions, start-start0+offset, fstring_size);
  331. return false;
  332. } else {
  333. fpar->fmtstate_.fill_ = *start;
  334. }
  335. fpar->pad_scheme_ |= format_item_t::tabulation;
  336. fpar->argN_ = format_item_t::argN_tabulation;
  337. break;
  338. case 't':
  339. fpar->fmtstate_.fill_ = const_or_not(fac).widen( ' ');
  340. fpar->pad_scheme_ |= format_item_t::tabulation;
  341. fpar->argN_ = format_item_t::argN_tabulation;
  342. break;
  343. // Character
  344. case 'C':
  345. case 'c':
  346. fpar->truncate_ = 1;
  347. break;
  348. // String
  349. case 'S':
  350. case 's':
  351. if(precision_set) // handle truncation manually, with own parameter.
  352. fpar->truncate_ = fpar->fmtstate_.precision_;
  353. fpar->fmtstate_.precision_ = 6; // default stream precision.
  354. break;
  355. // %n is insecure and ignored by boost::format
  356. case 'n' :
  357. fpar->argN_ = format_item_t::argN_ignored;
  358. break;
  359. default:
  360. maybe_throw_exception(exceptions, start-start0+offset, fstring_size);
  361. }
  362. ++start;
  363. if( in_brackets ) {
  364. if( start != last && *start== const_or_not(fac).widen( '|') ) {
  365. ++start;
  366. return true;
  367. }
  368. else maybe_throw_exception(exceptions, start-start0+offset, fstring_size);
  369. }
  370. return true;
  371. }
  372. // -end parse_printf_directive()
  373. template<class String, class Facet>
  374. int upper_bound_from_fstring(const String& buf,
  375. const typename String::value_type arg_mark,
  376. const Facet& fac,
  377. unsigned char exceptions)
  378. {
  379. // quick-parsing of the format-string to count arguments mark (arg_mark, '%')
  380. // returns : upper bound on the number of format items in the format strings
  381. using namespace boost::io;
  382. typename String::size_type i1=0;
  383. int num_items=0;
  384. while( (i1=buf.find(arg_mark,i1)) != String::npos ) {
  385. if( i1+1 >= buf.size() ) {
  386. if(exceptions & bad_format_string_bit)
  387. boost::throw_exception(bad_format_string(i1, buf.size() )); // must not end in ".. %"
  388. else {
  389. ++num_items;
  390. break;
  391. }
  392. }
  393. if(buf[i1+1] == buf[i1] ) {// escaped "%%"
  394. i1+=2; continue;
  395. }
  396. ++i1;
  397. // in case of %N% directives, dont count it double (wastes allocations..) :
  398. i1 = detail::wrap_scan_notdigit(fac, buf.begin()+i1, buf.end()) - buf.begin();
  399. if( i1 < buf.size() && buf[i1] == arg_mark )
  400. ++i1;
  401. ++num_items;
  402. }
  403. return num_items;
  404. }
  405. template<class String> inline
  406. void append_string(String& dst, const String& src,
  407. const typename String::size_type beg,
  408. const typename String::size_type end) {
  409. dst.append(src.begin()+beg, src.begin()+end);
  410. }
  411. } // detail namespace
  412. } // io namespace
  413. // -----------------------------------------------
  414. // format :: parse(..)
  415. template<class Ch, class Tr, class Alloc>
  416. basic_format<Ch, Tr, Alloc>& basic_format<Ch, Tr, Alloc>::
  417. parse (const string_type& buf) {
  418. // parse the format-string
  419. using namespace std;
  420. #if !defined(BOOST_NO_STD_LOCALE)
  421. const std::ctype<Ch> & fac = BOOST_USE_FACET( std::ctype<Ch>, getloc());
  422. #else
  423. io::basic_oaltstringstream<Ch, Tr, Alloc> fac;
  424. //has widen and narrow even on compilers without locale
  425. #endif
  426. const Ch arg_mark = io::detail::const_or_not(fac).widen( '%');
  427. bool ordered_args=true;
  428. int max_argN=-1;
  429. // A: find upper_bound on num_items and allocates arrays
  430. int num_items = io::detail::upper_bound_from_fstring(buf, arg_mark, fac, exceptions());
  431. make_or_reuse_data(num_items);
  432. // B: Now the real parsing of the format string :
  433. num_items=0;
  434. typename string_type::size_type i0=0, i1=0;
  435. typename string_type::const_iterator it;
  436. bool special_things=false;
  437. int cur_item=0;
  438. while( (i1=buf.find(arg_mark,i1)) != string_type::npos ) {
  439. string_type & piece = (cur_item==0) ? prefix_ : items_[cur_item-1].appendix_;
  440. if( buf[i1+1] == buf[i1] ) { // escaped mark, '%%'
  441. io::detail::append_string(piece, buf, i0, i1+1);
  442. i1+=2; i0=i1;
  443. continue;
  444. }
  445. BOOST_ASSERT( static_cast<unsigned int>(cur_item) < items_.size() || cur_item==0);
  446. if(i1!=i0) {
  447. io::detail::append_string(piece, buf, i0, i1);
  448. i0=i1;
  449. }
  450. ++i1;
  451. it = buf.begin()+i1;
  452. bool parse_ok = io::detail::parse_printf_directive(
  453. it, buf.end(), &items_[cur_item], fac, i1, exceptions());
  454. i1 = it - buf.begin();
  455. if( ! parse_ok ) // the directive will be printed verbatim
  456. continue;
  457. i0=i1;
  458. items_[cur_item].compute_states(); // process complex options, like zeropad, into params
  459. int argN=items_[cur_item].argN_;
  460. if(argN == format_item_t::argN_ignored)
  461. continue;
  462. if(argN ==format_item_t::argN_no_posit)
  463. ordered_args=false;
  464. else if(argN == format_item_t::argN_tabulation) special_things=true;
  465. else if(argN > max_argN) max_argN = argN;
  466. ++num_items;
  467. ++cur_item;
  468. } // loop on %'s
  469. BOOST_ASSERT(cur_item == num_items);
  470. // store the final piece of string
  471. {
  472. string_type & piece = (cur_item==0) ? prefix_ : items_[cur_item-1].appendix_;
  473. io::detail::append_string(piece, buf, i0, buf.size());
  474. }
  475. if( !ordered_args) {
  476. if(max_argN >= 0 ) { // dont mix positional with non-positionnal directives
  477. if(exceptions() & io::bad_format_string_bit)
  478. boost::throw_exception(
  479. io::bad_format_string(static_cast<std::size_t>(max_argN), 0));
  480. // else do nothing. => positionnal arguments are processed as non-positionnal
  481. }
  482. // set things like it would have been with positional directives :
  483. int non_ordered_items = 0;
  484. for(int i=0; i< num_items; ++i)
  485. if(items_[i].argN_ == format_item_t::argN_no_posit) {
  486. items_[i].argN_ = non_ordered_items;
  487. ++non_ordered_items;
  488. }
  489. max_argN = non_ordered_items-1;
  490. }
  491. // C: set some member data :
  492. items_.resize(num_items, format_item_t(io::detail::const_or_not(fac).widen( ' ')) );
  493. if(special_things) style_ |= special_needs;
  494. num_args_ = max_argN + 1;
  495. if(ordered_args) style_ |= ordered;
  496. else style_ &= ~ordered;
  497. return *this;
  498. }
  499. } // namespace boost
  500. #endif // BOOST_FORMAT_PARSING_HPP