// // Copyright (c) 2009-2011 Artyom Beilis (Tonkikh) // // Distributed under the Boost Software License, Version 1.0. (See // accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) // #ifndef BOOST_LOCALE_WITH_ICU #include int main() { std::cout << "ICU is not build... Skipping" << std::endl; } #else #define BOOST_LOCALE_ERROR_LIMIT 100000 #include #include #include "test_locale.hpp" #include "test_locale_tools.hpp" #include #include // Debugging code template void print_str(std::basic_string const &/*s*/) { } template<> void print_str(std::basic_string const &s) { std::cout << "[" << s <<"]" << std::endl; } namespace lb = boost::locale::boundary; template void test_word_container(Iterator begin,Iterator end, std::vector const &ipos, std::vector const &imasks, std::vector > const &ichunks, std::locale l, lb::boundary_type bt=lb::word ) { for(int sm=(bt == lb::word ? 31 : 3 ) ;sm>=0;sm--) { unsigned mask = ((sm & 1 ) != 0) * 0xF + ((sm & 2 ) != 0) * 0xF0 + ((sm & 4 ) != 0) * 0xF00 + ((sm & 8 ) != 0) * 0xF000 + ((sm & 16) != 0) * 0xF0000; std::vector masks,pos; std::vector bmasks; std::basic_string empty_chunk; std::vector > chunks; std::vector > fchunks; std::vector iters; iters.push_back(begin); bmasks.push_back(0); for(unsigned i=0;i map(bt,begin,end,l); typedef typename lb::segment_index::iterator iter_type; map.rule(mask); { unsigned i=0; iter_type p; map.full_select(false); for(p=map.begin();p!=map.end();++p,i++) { TEST(p->str()==chunks[i]); TEST(p->rule() == unsigned(masks[i])); } TEST(chunks.size() == i); for(;;) { if(p==map.begin()) { TEST(i==0); break; } else { --p; TEST(p->str()==chunks[--i]); TEST(p->rule() == unsigned(masks[i])); } } for(i=0,p=map.end();istr()==chunks[index]); TEST(p->rule() == unsigned(masks[index])); } TEST(p==map.begin()); } { unsigned i=0; iter_type p; map.full_select(true); for(p=map.begin();p!=map.end();++p,i++) { TEST(p->str()==fchunks[i]); TEST(p->rule() == unsigned(masks[i])); } TEST(chunks.size() == i); for(;;) { if(p==map.begin()) { TEST(i==0); break; } else { --p; if(p->str()!=fchunks[i-1]) { print_str(p->str()); print_str(fchunks[i-1]); } TEST(p->str()==fchunks[--i]); TEST(p->rule() == unsigned(masks[i])); } } for(i=0,p=map.end();istr()==fchunks[index]); TEST(p->rule() == unsigned(masks[index])); } TEST(p==map.begin()); } { iter_type p; unsigned chunk_ptr=0; unsigned i=0; map.full_select(false); for(Iterator optr=begin;optr!=end;optr++,i++) { p=map.find(optr); if(chunk_ptr < pos.size() && i>=unsigned(pos[chunk_ptr])){ chunk_ptr++; } if(chunk_ptr>=pos.size()) { TEST(p==map.end()); } else { TEST(p->str()==chunks[chunk_ptr]); TEST(p->rule()==unsigned(masks[chunk_ptr])); } } } { iter_type p; unsigned chunk_ptr=0; unsigned i=0; map.full_select(true); for(Iterator optr=begin;optr!=end;optr++,i++) { p=map.find(optr); if(chunk_ptr < pos.size() && i>=unsigned(pos[chunk_ptr])){ chunk_ptr++; } if(chunk_ptr>=pos.size()) { TEST(p==map.end()); } else { TEST(p->str()==fchunks[chunk_ptr]); TEST(p->rule()==unsigned(masks[chunk_ptr])); } } } } // segment iterator tests { // break iterator tests lb::boundary_point_index map(bt,begin,end,l); typedef typename lb::boundary_point_index::iterator iter_type; map.rule(mask); unsigned i=0; iter_type p; for(p=map.begin();p!=map.end();++p,i++) { TEST(p->iterator()==iters[i]); TEST(p->rule()==bmasks[i]); } TEST(iters.size() == i); do { --p; --i; TEST(p->iterator()==iters.at(i)); } while(p!=map.begin()); TEST(i==0); unsigned iters_ptr=0; for(Iterator optr=begin;optr!=end;optr++) { p=map.find(optr); TEST(p->iterator()==iters[iters_ptr]); if(iters.at(iters_ptr)==optr) iters_ptr++; } } // break iterator tests { // copy test typedef lb::segment_index ti_type; typedef lb::boundary_point_index bi_type; { // segment to bound ti_type ti(bt,begin,end,l); ti.rule(mask); { bi_type bi(ti); bi.rule(mask); unsigned i=0; typename bi_type::iterator p; for(p=bi.begin();p!=bi.end();++p,i++) { TEST(p->iterator()==iters[i]); TEST(p->rule()==bmasks[i]); } } { bi_type bi; bi.rule(mask); bi = ti; unsigned i=0; typename bi_type::iterator p; for(p=bi.begin();p!=bi.end();++p,i++) { TEST(p->iterator()==iters[i]); TEST(p->rule()==bmasks[i]); } } // boundary_point to bound bi_type bi_2(bt,begin,end,l); bi_2.rule(mask); { bi_type bi(bi_2); unsigned i=0; typename bi_type::iterator p; for(p=bi.begin();p!=bi.end();++p,i++) { TEST(p->iterator()==iters[i]); TEST(p->rule()==bmasks[i]); } } { bi_type bi; bi = bi_2; unsigned i=0; typename bi_type::iterator p; for(p=bi.begin();p!=bi.end();++p,i++) { TEST(p->iterator()==iters[i]); TEST(p->rule()==bmasks[i]); } } } { // boundary_point to segment bi_type bi(bt,begin,end,l); { ti_type ti(bi); ti.rule(mask); unsigned i=0; typename ti_type::iterator p; for(p=ti.begin();p!=ti.end();++p,i++) { TEST(p->str()==chunks[i]); TEST(p->rule()==unsigned(masks[i])); } } { ti_type ti; ti.rule(mask); ti = (bi); unsigned i=0; typename ti_type::iterator p; for(p=ti.begin();p!=ti.end();++p,i++) { TEST(p->str()==chunks[i]); TEST(p->rule()==unsigned(masks[i])); } } ti_type ti_2(bt,begin,end,l); ti_2.rule(mask); { ti_type ti(ti_2); unsigned i=0; typename ti_type::iterator p; for(p=ti.begin();p!=ti.end();++p,i++) { TEST(p->str()==chunks[i]); TEST(p->rule()==unsigned(masks[i])); } } { ti_type ti; ti = (ti_2); unsigned i=0; typename ti_type::iterator p; for(p=ti.begin();p!=ti.end();++p,i++) { TEST(p->str()==chunks[i]); TEST(p->rule()==unsigned(masks[i])); } } } } } // for mask } template void run_word(std::string *original,int *none,int *num,int *word,int *kana,int *ideo,std::locale l,lb::boundary_type b=lb::word) { std::vector pos; std::vector > chunks; std::vector masks; std::basic_string test_string; for(int i=0;!original[i].empty();i++) { chunks.push_back(to_correct_string(original[i],l)); test_string+=chunks.back(); pos.push_back(test_string.size()); masks.push_back( ( none ? none[i]*15 : 0) | ( num ? ((num[i]*15) << 4) : 0) | ( word ? ((word[i]*15) << 8) : 0) | ( kana ? ((kana[i]*15) << 12) : 0) | ( ideo ? ((ideo[i]*15) << 16) : 0) ); } std::list lst(test_string.begin(),test_string.end()); test_word_container(lst.begin(),lst.end(),pos,masks,chunks,l,b); test_word_container(test_string.begin(),test_string.end(),pos,masks,chunks,l,b); } std::string character[]={"שָ","ל","וֹ","ם","!",""}; int nones[]={1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}; std::string sentence1[]={"To be\n","or not\n","to be?\n"," That is the question. ","Or maybe not",""}; int sentence1a[]={ 0, 0, 1, 1, 0, 0}; int sentence1b[]={ 1, 1, 0, 0, 1, 0}; std::string line1[]={"To ","be\n","or ","not\n","to ","be",""}; int line1a[]={ 1, 0, 1 , 0, 1, 1 , 0 }; int line1b[]={ 0, 1, 0 , 1, 0, 0 , 0 }; void test_boundaries(std::string *all,int *first,int *second,lb::boundary_type t) { boost::locale::generator g; std::cout << " char UTF-8" << std::endl; run_word(all,first,second,0,0,0,g("he_IL.UTF-8"),t); std::cout << " char CP1255" << std::endl; run_word(all,first,second,0,0,0,g("he_IL.cp1255"),t); std::cout << " wchar_t"<(all,first,second,0,0,0,g("he_IL.UTF-8"),t); #ifdef BOOST_LOCALE_ENABLE_CHAR16_T std::cout << " char16_t"<(all,first,second,0,0,0,g("he_IL.UTF-8"),t); #endif #ifdef BOOST_LOCALE_ENABLE_CHAR32_T std::cout << " char32_t"<(all,first,second,0,0,0,g("he_IL.UTF-8"),t); #endif } void word_boundary() { boost::locale::generator g; //std::string all1[]={"10"," ","Hello"," ","Windows7"," ","平仮名","ひらがな","ヒラガナ",""}; //std::string all1[]={"10"," ","Hello"," ","Windows7"," ","平仮名","ひん","アヒル",""}; std::string all1[]={"10"," ","Hello"," ","Windows7"," ","平仮名","アヒル",""}; int none1[]={ 0, 1, 0, 1, 0, 1, 0, 0, 0}; int num1[]={ 1, 0, 0, 0, 1, 0, 0 , 0 , 0}; int word1[]={ 0, 0, 1, 0, 1, 0, 0 , 0 , 0}; #if U_ICU_VERSION_MAJOR_NUM >= 50 int kana1[]={ 0, 0, 0, 0, 0, 0, 0, 0 , 0}; int ideo1[]={ 0, 0, 0, 0, 0, 0, 1, 1 , 1}; #else int kana1[]={ 0, 0, 0, 0, 0, 0, 0, 1 , 1}; int ideo1[]={ 0, 0, 0, 0, 0, 0, 1, 0 , 0}; #endif int zero[25]={0}; std::string all2[]={""}; std::string all3[]={" "," ","Hello",",","World","!"," ",""}; int none3[]={ 1, 1, 0, 1, 0, 1, 1, 0}; int word3[]={ 0, 0, 1, 0, 1, 0, 0, 0}; std::cout << " char UTF-8" << std::endl; run_word(all1,none1,num1,word1,kana1,ideo1,g("ja_JP.UTF-8")); run_word(all2,zero,zero,zero,zero,zero,g("en_US.UTF-8")); run_word(all3,none3,zero,word3,zero,zero,g("en_US.UTF-8")); std::cout << " char Shift-JIS" << std::endl; run_word(all1,none1,num1,word1,kana1,ideo1,g("ja_JP.Shift-JIS")); run_word(all2,zero,zero,zero,zero,zero,g("ja_JP.Shift-JIS")); run_word(all3,none3,zero,word3,zero,zero,g("ja_JP.Shift-JIS")); std::cout << " wchar_t"<(all1,none1,num1,word1,kana1,ideo1,g("ja_JP.UTF-8")); run_word(all2,zero,zero,zero,zero,zero,g("en_US.UTF-8")); run_word(all3,none3,zero,word3,zero,zero,g("en_US.UTF-8")); #ifdef BOOST_LOCALE_ENABLE_CHAR16_T std::cout << " char16_t"<(all1,none1,num1,word1,kana1,ideo1,g("ja_JP.UTF-8")); run_word(all2,zero,zero,zero,zero,zero,g("en_US.UTF-8")); run_word(all3,none3,zero,word3,zero,zero,g("en_US.UTF-8")); #endif #ifdef BOOST_LOCALE_ENABLE_CHAR32_T std::cout << " char32_t"<(all1,none1,num1,word1,kana1,ideo1,g("ja_JP.UTF-8")); run_word(all2,zero,zero,zero,zero,zero,g("en_US.UTF-8")); run_word(all3,none3,zero,word3,zero,zero,g("en_US.UTF-8")); #endif } void test_op_one_side(std::string const &sl,std::string const &sr,int val) { boost::locale::boundary::ssegment l(sl.begin(),sl.end(),0),r(sr.begin(),sr.end(),0); // segment TEST( (l==r) == (val==0)); TEST( (l!=r) == (val!=0)); TEST( (l<=r) == (val<=0)); TEST( (l< r) == (val<0)); TEST( (l>=r) == (val>=0)); TEST( (l> r) == (val>0)); // C string TEST( (l==sr.c_str()) == (val==0)); TEST( (l!=sr.c_str()) == (val!=0)); TEST( (l<=sr.c_str()) == (val<=0)); TEST( (l< sr.c_str()) == (val<0)); TEST( (l>=sr.c_str()) == (val>=0)); TEST( (l> sr.c_str()) == (val>0)); TEST( (sl.c_str()==r) == (val==0)); TEST( (sl.c_str()!=r) == (val!=0)); TEST( (sl.c_str()<=r) == (val<=0)); TEST( (sl.c_str()< r) == (val<0)); TEST( (sl.c_str()>=r) == (val>=0)); TEST( (sl.c_str()> r) == (val>0)); // C++ string TEST( (l==sr) == (val==0)); TEST( (l!=sr) == (val!=0)); TEST( (l<=sr) == (val<=0)); TEST( (l< sr) == (val<0)); TEST( (l>=sr) == (val>=0)); TEST( (l> sr) == (val>0)); TEST( (sl==r) == (val==0)); TEST( (sl!=r) == (val!=0)); TEST( (sl<=r) == (val<=0)); TEST( (sl< r) == (val<0)); TEST( (sl>=r) == (val>=0)); TEST( (sl> r) == (val>0)); // self check TEST( (sl==sr) == (val==0)); TEST( (sl!=sr) == (val!=0)); TEST( (sl<=sr) == (val<=0)); TEST( (sl< sr) == (val<0)); TEST( (sl>=sr) == (val>=0)); TEST( (sl> sr) == (val>0)); } void test_op(std::string const &sl,std::string const &sr,int val) { test_op_one_side(sl,sr,val); test_op_one_side(sr,sl,-val); } void segment_operator() { test_op("","a",-1); test_op("","",0); test_op("aa","aaa",-1); test_op("aa","ab",-1); } int main() { try { std::cout << "Testing segment operators" << std::endl; segment_operator(); std::cout << "Testing word boundary" << std::endl; word_boundary(); std::cout << "Testing character boundary" << std::endl; test_boundaries(character,nones,0,lb::character); std::cout << "Testing sentence boundary" << std::endl; test_boundaries(sentence1,sentence1a,sentence1b,lb::sentence); std::cout << "Testing line boundary" << std::endl; test_boundaries(line1,line1a,line1b,lb::line); } catch(std::exception const &e) { std::cerr << "Failed " << e.what() << std::endl; return EXIT_FAILURE; } FINALIZE(); } #endif // NOICU // vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 // boostinspect:noascii