/* * * Copyright (c) 2004 * John Maddock * * Use, modification and distribution are subject to the * Boost Software License, Version 1.0. (See accompanying file * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) * */ /* * LOCATION: see http://www.boost.org for most recent version. * FILE mfc_example.cpp * VERSION see * DESCRIPTION: examples of using Boost.Regex with MFC and ATL string types. */ #include #ifdef BOOST_HAS_ICU #include #include #include // // Find out if *password* meets our password requirements, // as defined by the regular expression *requirements*. // bool is_valid_password(const U_NAMESPACE_QUALIFIER UnicodeString& password, const U_NAMESPACE_QUALIFIER UnicodeString& requirements) { return boost::u32regex_match(password, boost::make_u32regex(requirements)); } // // Extract filename part of a path from a UTF-8 encoded std::string and return the result // as another std::string: // std::string get_filename(const std::string& path) { boost::u32regex r = boost::make_u32regex("(?:\\A|.*\\\\)([^\\\\]+)"); boost::smatch what; if(boost::u32regex_match(path, what, r)) { // extract $1 as a std::string: return what.str(1); } else { throw std::runtime_error("Invalid pathname"); } } U_NAMESPACE_QUALIFIER UnicodeString extract_greek(const U_NAMESPACE_QUALIFIER UnicodeString& text) { // searches through some UTF-16 encoded text for a block encoded in Greek, // this expression is imperfect, but the best we can do for now - searching // for specific scripts is actually pretty hard to do right. boost::u32regex r = boost::make_u32regex(L"[\\x{370}-\\x{3FF}](?:[^[:L*:]]|[\\x{370}-\\x{3FF}])*"); boost::u16match what; if(boost::u32regex_search(text, what, r)) { // extract $0 as a UnicodeString: return U_NAMESPACE_QUALIFIER UnicodeString(what[0].first, what.length(0)); } else { throw std::runtime_error("No Greek found!"); } } void enumerate_currencies(const std::string& text) { // enumerate and print all the currency symbols, along // with any associated numeric values: const char* re = "([[:Sc:]][[:Cf:][:Cc:][:Z*:]]*)?" "([[:Nd:]]+(?:[[:Po:]][[:Nd:]]+)?)?" "(?(1)" "|(?(2)" "[[:Cf:][:Cc:][:Z*:]]*" ")" "[[:Sc:]]" ")"; boost::u32regex r = boost::make_u32regex(re); boost::u32regex_iterator i(boost::make_u32regex_iterator(text, r)), j; while(i != j) { std::cout << (*i)[0] << std::endl; ++i; } } void enumerate_currencies2(const std::string& text) { // enumerate and print all the currency symbols, along // with any associated numeric values: const char* re = "([[:Sc:]][[:Cf:][:Cc:][:Z*:]]*)?" "([[:Nd:]]+(?:[[:Po:]][[:Nd:]]+)?)?" "(?(1)" "|(?(2)" "[[:Cf:][:Cc:][:Z*:]]*" ")" "[[:Sc:]]" ")"; boost::u32regex r = boost::make_u32regex(re); boost::u32regex_token_iterator i(boost::make_u32regex_token_iterator(text, r, 1)), j; while(i != j) { std::cout << *i << std::endl; ++i; } } // // Take a credit card number as a string of digits, // and reformat it as a human readable string with "-" // separating each group of four digit;, // note that we're mixing a UTF-32 regex, with a UTF-16 // string and a UTF-8 format specifier, and it still all // just works: // const boost::u32regex e = boost::make_u32regex("\\A(\\d{3,4})[- ]?(\\d{4})[- ]?(\\d{4})[- ]?(\\d{4})\\z"); const char* human_format = "$1-$2-$3-$4"; U_NAMESPACE_QUALIFIER UnicodeString human_readable_card_number(const U_NAMESPACE_QUALIFIER UnicodeString& s) { return boost::u32regex_replace(s, e, human_format); } int main() { // password checks using u32regex_match: U_NAMESPACE_QUALIFIER UnicodeString pwd = "abcDEF---"; U_NAMESPACE_QUALIFIER UnicodeString pwd_check = "(?=.*[[:lower:]])(?=.*[[:upper:]])(?=.*[[:punct:]]).{6,}"; bool b = is_valid_password(pwd, pwd_check); assert(b); pwd = "abcD-"; b = is_valid_password(pwd, pwd_check); assert(!b); // filename extraction with u32regex_match: std::string file = "abc.hpp"; file = get_filename(file); assert(file == "abc.hpp"); file = "c:\\a\\b\\c\\d.h"; file = get_filename(file); assert(file == "d.h"); // Greek text extraction with u32regex_search: const UChar t[] = { 'S', 'o', 'm', 'e', ' ', 'w', 'h', 'e', 'r', 'e', ' ', 'i', 'n', 0x0391, 0x039D, 0x0395, 0x0398, 0x0391, 0 }; const UChar g[] = { 0x0391, 0x039D, 0x0395, 0x0398, 0x0391, 0 }; U_NAMESPACE_QUALIFIER UnicodeString text = t; U_NAMESPACE_QUALIFIER UnicodeString greek = extract_greek(text); assert(greek == g); // extract currency symbols with associated value, use iterator interface: std::string text2 = " $100.23 or \xC2\xA3""198.12 "; // \xC2\xA3 is the pound sign encoded in UTF-8 enumerate_currencies(text2); enumerate_currencies2(text2); U_NAMESPACE_QUALIFIER UnicodeString credit_card_number = "1234567887654321"; credit_card_number = human_readable_card_number(credit_card_number); assert(credit_card_number == "1234-5678-8765-4321"); return 0; } #else #include int main() { std::cout << "ICU support not enabled, feature unavailable"; return 0; } #endif