perl2xpr.cpp 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188
  1. ///////////////////////////////////////////////////////////////////////////////
  2. // perl2xpr.cpp
  3. // A utility for translating a Perl regular expression into an
  4. // xpressive static regular expression.
  5. //
  6. // Copyright 2007 Eric Niebler. Distributed under the Boost
  7. // Software License, Version 1.0. (See accompanying file
  8. // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  9. #include <stack>
  10. #include <string>
  11. #include <iostream>
  12. #include <boost/xpressive/xpressive_static.hpp>
  13. #include <boost/xpressive/regex_actions.hpp>
  14. namespace x = boost::xpressive;
  15. using namespace x;
  16. int main(int argc, char *argv[])
  17. {
  18. int i = 1, j = 1;
  19. bool nocase = false;
  20. char const *dot = " ~_n ";
  21. char const *bos = " bos ";
  22. char const *eos = " eos ";
  23. for(; i < argc && '-' == *argv[i]; argv[i][++j]? 0: (j=1,++i))
  24. {
  25. switch(argv[i][j])
  26. {
  27. case 'i': // perl /i modifier
  28. nocase = true;
  29. break;
  30. case 's': // perl /s modifier
  31. dot = " _ ";
  32. break;
  33. case 'm': // perl /m modifier
  34. bos = " bol ";
  35. eos = " eol ";
  36. break;
  37. default:
  38. std::cerr << "Unknown option : " << argv[i] << std::endl;
  39. return -1;
  40. }
  41. }
  42. if(i == argc)
  43. {
  44. std::cerr << "Usage:\n perl2xpr [-i] [-s] [-m] 're'\n";
  45. return -1;
  46. }
  47. // Local variables used by the semantic actions below
  48. local<int> mark_nbr;
  49. local<std::string> tmp;
  50. local<std::stack<std::string> > strings;
  51. // The rules in the dynamic regex grammar
  52. cregex regex, alts, seq, quant, repeat, atom, escape, group, lit, charset, setelem;
  53. lit = ~(set='.','^','$','*','+','?','(',')','{','}','[',']','\\','|')
  54. ;
  55. escape = as_xpr('b') [top(strings) += " _b "]
  56. | as_xpr('B') [top(strings) += " ~_b "]
  57. | as_xpr('d') [top(strings) += " _d "]
  58. | as_xpr('D') [top(strings) += " ~_d "]
  59. | as_xpr('s') [top(strings) += " _s "]
  60. | as_xpr('S') [top(strings) += " ~_s "]
  61. | as_xpr('w') [top(strings) += " _w "]
  62. | as_xpr('W') [top(strings) += " ~_w "]
  63. | _d [top(strings) += " s" + _ + " "]
  64. | _ [top(strings) += " as_xpr('" + _ + "') "]
  65. ;
  66. group = (
  67. as_xpr("?:") [top(strings) += " ( "]
  68. | as_xpr("?i:") [top(strings) += " icase( "]
  69. | as_xpr("?>") [top(strings) += " keep( "]
  70. | as_xpr("?=") [top(strings) += " before( "]
  71. | as_xpr("?!") [top(strings) += " ~before( "]
  72. | as_xpr("?<=") [top(strings) += " after( "]
  73. | as_xpr("?<!") [top(strings) += " ~after( "]
  74. | nil [top(strings) += " ( s" + as<std::string>(++mark_nbr) + "= "]
  75. )
  76. >> x::ref(regex)
  77. >> as_xpr(')') [top(strings) += " ) "]
  78. ;
  79. setelem = as_xpr('\\') >> _ [top(strings) += " as_xpr('" + _ + "') "]
  80. | "[:" >> !as_xpr('^') [top(strings) += "~"]
  81. >> (+_w) [top(strings) += _ ]
  82. >> ":]"
  83. | (
  84. (s1=~as_xpr(']'))
  85. >> '-'
  86. >> (s2=~as_xpr(']'))
  87. ) [top(strings) += "range('" + s1 + "','" + s2 + "')"]
  88. ;
  89. charset = !as_xpr('^') [top(strings) += " ~ "]
  90. >> nil [top(strings) += " set[ "]
  91. >> (
  92. setelem
  93. | (~as_xpr(']')) [top(strings) += " as_xpr('" + _ + "') "]
  94. )
  95. >>*(
  96. nil [top(strings) += " | "]
  97. >> (
  98. setelem
  99. | (~as_xpr(']')) [top(strings) += "'" + _ + "'"]
  100. )
  101. )
  102. >> as_xpr(']') [top(strings) += " ] "]
  103. ;
  104. atom = (
  105. +(lit >> ~before((set='*','+','?','{')))
  106. | lit
  107. ) [top(strings) += " as_xpr(\"" + _ + "\") "]
  108. | as_xpr('.') [top(strings) += dot]
  109. | as_xpr('^') [top(strings) += bos]
  110. | as_xpr('$') [top(strings) += eos]
  111. | '\\' >> escape
  112. | '(' >> group
  113. | '[' >> charset
  114. ;
  115. repeat = as_xpr('{') [tmp = " repeat<"]
  116. >> (+_d) [tmp += _]
  117. >> !(
  118. as_xpr(',') [tmp += ","]
  119. >> (
  120. (+_d) [tmp += _]
  121. | nil [tmp += "inf"]
  122. )
  123. )
  124. >> as_xpr('}') [top(strings) = tmp + ">( " + top(strings) + " ) "]
  125. ;
  126. quant = nil [push(strings, "")]
  127. >> atom
  128. >> !(
  129. (
  130. as_xpr("*") [insert(top(strings), 0, " * ")] // [strings->*top()->*insert(0, " * ")]
  131. | as_xpr("+") [insert(top(strings), 0, " + ")] // [strings->*top()->*insert(0, " + ")]
  132. | as_xpr("?") [insert(top(strings), 0, " ! ")] // [strings->*top()->*insert(0, " ! ")]
  133. | repeat
  134. )
  135. >> !as_xpr('?') [insert(top(strings), 0, " - ")]
  136. )
  137. >> nil [tmp = top(strings), pop(strings), top(strings) += tmp]
  138. ;
  139. seq = quant
  140. >> *(
  141. nil [top(strings) += " >> "]
  142. >> quant
  143. )
  144. ;
  145. alts = seq
  146. >> *(
  147. as_xpr('|') [top(strings) += " | "]
  148. >> seq
  149. )
  150. ;
  151. regex = alts
  152. ;
  153. strings.get().push("");
  154. if(!regex_match(argv[i], regex))
  155. {
  156. std::cerr << "ERROR: unrecognized regular expression" << std::endl;
  157. return -1;
  158. }
  159. else if(nocase)
  160. {
  161. std::cout << "icase( " << strings.get().top() << " )" << std::endl;
  162. }
  163. else
  164. {
  165. std::cout << strings.get().top() << std::endl;
  166. }
  167. return 0;
  168. }