123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188 |
- ///////////////////////////////////////////////////////////////////////////////
- // perl2xpr.cpp
- // A utility for translating a Perl regular expression into an
- // xpressive static regular expression.
- //
- // Copyright 2007 Eric Niebler. Distributed under the Boost
- // Software License, Version 1.0. (See accompanying file
- // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
- #include <stack>
- #include <string>
- #include <iostream>
- #include <boost/xpressive/xpressive_static.hpp>
- #include <boost/xpressive/regex_actions.hpp>
- namespace x = boost::xpressive;
- using namespace x;
- int main(int argc, char *argv[])
- {
- int i = 1, j = 1;
- bool nocase = false;
- char const *dot = " ~_n ";
- char const *bos = " bos ";
- char const *eos = " eos ";
- for(; i < argc && '-' == *argv[i]; argv[i][++j]? 0: (j=1,++i))
- {
- switch(argv[i][j])
- {
- case 'i': // perl /i modifier
- nocase = true;
- break;
- case 's': // perl /s modifier
- dot = " _ ";
- break;
- case 'm': // perl /m modifier
- bos = " bol ";
- eos = " eol ";
- break;
- default:
- std::cerr << "Unknown option : " << argv[i] << std::endl;
- return -1;
- }
- }
- if(i == argc)
- {
- std::cerr << "Usage:\n perl2xpr [-i] [-s] [-m] 're'\n";
- return -1;
- }
- // Local variables used by the semantic actions below
- local<int> mark_nbr;
- local<std::string> tmp;
- local<std::stack<std::string> > strings;
- // The rules in the dynamic regex grammar
- cregex regex, alts, seq, quant, repeat, atom, escape, group, lit, charset, setelem;
- lit = ~(set='.','^','$','*','+','?','(',')','{','}','[',']','\\','|')
- ;
- escape = as_xpr('b') [top(strings) += " _b "]
- | as_xpr('B') [top(strings) += " ~_b "]
- | as_xpr('d') [top(strings) += " _d "]
- | as_xpr('D') [top(strings) += " ~_d "]
- | as_xpr('s') [top(strings) += " _s "]
- | as_xpr('S') [top(strings) += " ~_s "]
- | as_xpr('w') [top(strings) += " _w "]
- | as_xpr('W') [top(strings) += " ~_w "]
- | _d [top(strings) += " s" + _ + " "]
- | _ [top(strings) += " as_xpr('" + _ + "') "]
- ;
- group = (
- as_xpr("?:") [top(strings) += " ( "]
- | as_xpr("?i:") [top(strings) += " icase( "]
- | as_xpr("?>") [top(strings) += " keep( "]
- | as_xpr("?=") [top(strings) += " before( "]
- | as_xpr("?!") [top(strings) += " ~before( "]
- | as_xpr("?<=") [top(strings) += " after( "]
- | as_xpr("?<!") [top(strings) += " ~after( "]
- | nil [top(strings) += " ( s" + as<std::string>(++mark_nbr) + "= "]
- )
- >> x::ref(regex)
- >> as_xpr(')') [top(strings) += " ) "]
- ;
- setelem = as_xpr('\\') >> _ [top(strings) += " as_xpr('" + _ + "') "]
- | "[:" >> !as_xpr('^') [top(strings) += "~"]
- >> (+_w) [top(strings) += _ ]
- >> ":]"
- | (
- (s1=~as_xpr(']'))
- >> '-'
- >> (s2=~as_xpr(']'))
- ) [top(strings) += "range('" + s1 + "','" + s2 + "')"]
- ;
- charset = !as_xpr('^') [top(strings) += " ~ "]
- >> nil [top(strings) += " set[ "]
- >> (
- setelem
- | (~as_xpr(']')) [top(strings) += " as_xpr('" + _ + "') "]
- )
- >>*(
- nil [top(strings) += " | "]
- >> (
- setelem
- | (~as_xpr(']')) [top(strings) += "'" + _ + "'"]
- )
- )
- >> as_xpr(']') [top(strings) += " ] "]
- ;
- atom = (
- +(lit >> ~before((set='*','+','?','{')))
- | lit
- ) [top(strings) += " as_xpr(\"" + _ + "\") "]
- | as_xpr('.') [top(strings) += dot]
- | as_xpr('^') [top(strings) += bos]
- | as_xpr('$') [top(strings) += eos]
- | '\\' >> escape
- | '(' >> group
- | '[' >> charset
- ;
- repeat = as_xpr('{') [tmp = " repeat<"]
- >> (+_d) [tmp += _]
- >> !(
- as_xpr(',') [tmp += ","]
- >> (
- (+_d) [tmp += _]
- | nil [tmp += "inf"]
- )
- )
- >> as_xpr('}') [top(strings) = tmp + ">( " + top(strings) + " ) "]
- ;
- quant = nil [push(strings, "")]
- >> atom
- >> !(
- (
- as_xpr("*") [insert(top(strings), 0, " * ")] // [strings->*top()->*insert(0, " * ")]
- | as_xpr("+") [insert(top(strings), 0, " + ")] // [strings->*top()->*insert(0, " + ")]
- | as_xpr("?") [insert(top(strings), 0, " ! ")] // [strings->*top()->*insert(0, " ! ")]
- | repeat
- )
- >> !as_xpr('?') [insert(top(strings), 0, " - ")]
- )
- >> nil [tmp = top(strings), pop(strings), top(strings) += tmp]
- ;
- seq = quant
- >> *(
- nil [top(strings) += " >> "]
- >> quant
- )
- ;
- alts = seq
- >> *(
- as_xpr('|') [top(strings) += " | "]
- >> seq
- )
- ;
- regex = alts
- ;
- strings.get().push("");
- if(!regex_match(argv[i], regex))
- {
- std::cerr << "ERROR: unrecognized regular expression" << std::endl;
- return -1;
- }
- else if(nocase)
- {
- std::cout << "icase( " << strings.get().top() << " )" << std::endl;
- }
- else
- {
- std::cout << strings.get().top() << std::endl;
- }
- return 0;
- }
|