/////////////////////////////////////////////////////////////////////////////// // perl2xpr.cpp // A utility for translating a Perl regular expression into an // xpressive static regular expression. // // Copyright 2007 Eric Niebler. Distributed under the Boost // Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) #include #include #include #include #include namespace x = boost::xpressive; using namespace x; int main(int argc, char *argv[]) { int i = 1, j = 1; bool nocase = false; char const *dot = " ~_n "; char const *bos = " bos "; char const *eos = " eos "; for(; i < argc && '-' == *argv[i]; argv[i][++j]? 0: (j=1,++i)) { switch(argv[i][j]) { case 'i': // perl /i modifier nocase = true; break; case 's': // perl /s modifier dot = " _ "; break; case 'm': // perl /m modifier bos = " bol "; eos = " eol "; break; default: std::cerr << "Unknown option : " << argv[i] << std::endl; return -1; } } if(i == argc) { std::cerr << "Usage:\n perl2xpr [-i] [-s] [-m] 're'\n"; return -1; } // Local variables used by the semantic actions below local mark_nbr; local tmp; local > strings; // The rules in the dynamic regex grammar cregex regex, alts, seq, quant, repeat, atom, escape, group, lit, charset, setelem; lit = ~(set='.','^','$','*','+','?','(',')','{','}','[',']','\\','|') ; escape = as_xpr('b') [top(strings) += " _b "] | as_xpr('B') [top(strings) += " ~_b "] | as_xpr('d') [top(strings) += " _d "] | as_xpr('D') [top(strings) += " ~_d "] | as_xpr('s') [top(strings) += " _s "] | as_xpr('S') [top(strings) += " ~_s "] | as_xpr('w') [top(strings) += " _w "] | as_xpr('W') [top(strings) += " ~_w "] | _d [top(strings) += " s" + _ + " "] | _ [top(strings) += " as_xpr('" + _ + "') "] ; group = ( as_xpr("?:") [top(strings) += " ( "] | as_xpr("?i:") [top(strings) += " icase( "] | as_xpr("?>") [top(strings) += " keep( "] | as_xpr("?=") [top(strings) += " before( "] | as_xpr("?!") [top(strings) += " ~before( "] | as_xpr("?<=") [top(strings) += " after( "] | as_xpr("?(++mark_nbr) + "= "] ) >> x::ref(regex) >> as_xpr(')') [top(strings) += " ) "] ; setelem = as_xpr('\\') >> _ [top(strings) += " as_xpr('" + _ + "') "] | "[:" >> !as_xpr('^') [top(strings) += "~"] >> (+_w) [top(strings) += _ ] >> ":]" | ( (s1=~as_xpr(']')) >> '-' >> (s2=~as_xpr(']')) ) [top(strings) += "range('" + s1 + "','" + s2 + "')"] ; charset = !as_xpr('^') [top(strings) += " ~ "] >> nil [top(strings) += " set[ "] >> ( setelem | (~as_xpr(']')) [top(strings) += " as_xpr('" + _ + "') "] ) >>*( nil [top(strings) += " | "] >> ( setelem | (~as_xpr(']')) [top(strings) += "'" + _ + "'"] ) ) >> as_xpr(']') [top(strings) += " ] "] ; atom = ( +(lit >> ~before((set='*','+','?','{'))) | lit ) [top(strings) += " as_xpr(\"" + _ + "\") "] | as_xpr('.') [top(strings) += dot] | as_xpr('^') [top(strings) += bos] | as_xpr('$') [top(strings) += eos] | '\\' >> escape | '(' >> group | '[' >> charset ; repeat = as_xpr('{') [tmp = " repeat<"] >> (+_d) [tmp += _] >> !( as_xpr(',') [tmp += ","] >> ( (+_d) [tmp += _] | nil [tmp += "inf"] ) ) >> as_xpr('}') [top(strings) = tmp + ">( " + top(strings) + " ) "] ; quant = nil [push(strings, "")] >> atom >> !( ( as_xpr("*") [insert(top(strings), 0, " * ")] // [strings->*top()->*insert(0, " * ")] | as_xpr("+") [insert(top(strings), 0, " + ")] // [strings->*top()->*insert(0, " + ")] | as_xpr("?") [insert(top(strings), 0, " ! ")] // [strings->*top()->*insert(0, " ! ")] | repeat ) >> !as_xpr('?') [insert(top(strings), 0, " - ")] ) >> nil [tmp = top(strings), pop(strings), top(strings) += tmp] ; seq = quant >> *( nil [top(strings) += " >> "] >> quant ) ; alts = seq >> *( as_xpr('|') [top(strings) += " | "] >> seq ) ; regex = alts ; strings.get().push(""); if(!regex_match(argv[i], regex)) { std::cerr << "ERROR: unrecognized regular expression" << std::endl; return -1; } else if(nocase) { std::cout << "icase( " << strings.get().top() << " )" << std::endl; } else { std::cout << strings.get().top() << std::endl; } return 0; }