test_boundary.cpp 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537
  1. //
  2. // Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
  3. //
  4. // Distributed under the Boost Software License, Version 1.0. (See
  5. // accompanying file LICENSE_1_0.txt or copy at
  6. // http://www.boost.org/LICENSE_1_0.txt)
  7. //
  8. #ifndef BOOST_LOCALE_WITH_ICU
  9. #include <iostream>
  10. int main()
  11. {
  12. std::cout << "ICU is not build... Skipping" << std::endl;
  13. }
  14. #else
  15. #define BOOST_LOCALE_ERROR_LIMIT 100000
  16. #include <boost/locale/boundary.hpp>
  17. #include <boost/locale/generator.hpp>
  18. #include "test_locale.hpp"
  19. #include "test_locale_tools.hpp"
  20. #include <list>
  21. #include <unicode/uversion.h>
  22. // Debugging code
  23. template<typename Char>
  24. void print_str(std::basic_string<Char> const &/*s*/)
  25. {
  26. }
  27. template<>
  28. void print_str<char>(std::basic_string<char> const &s)
  29. {
  30. std::cout << "[" << s <<"]" << std::endl;
  31. }
  32. namespace lb = boost::locale::boundary;
  33. template<typename Char,typename Iterator>
  34. void test_word_container(Iterator begin,Iterator end,
  35. std::vector<int> const &ipos,
  36. std::vector<int> const &imasks,
  37. std::vector<std::basic_string<Char> > const &ichunks,
  38. std::locale l,
  39. lb::boundary_type bt=lb::word
  40. )
  41. {
  42. for(int sm=(bt == lb::word ? 31 : 3 ) ;sm>=0;sm--) {
  43. unsigned mask =
  44. ((sm & 1 ) != 0) * 0xF
  45. + ((sm & 2 ) != 0) * 0xF0
  46. + ((sm & 4 ) != 0) * 0xF00
  47. + ((sm & 8 ) != 0) * 0xF000
  48. + ((sm & 16) != 0) * 0xF0000;
  49. std::vector<int> masks,pos;
  50. std::vector<unsigned> bmasks;
  51. std::basic_string<Char> empty_chunk;
  52. std::vector<std::basic_string<Char> > chunks;
  53. std::vector<std::basic_string<Char> > fchunks;
  54. std::vector<Iterator> iters;
  55. iters.push_back(begin);
  56. bmasks.push_back(0);
  57. for(unsigned i=0;i<imasks.size();i++) {
  58. if(imasks[i] & mask) {
  59. masks.push_back(imasks[i]);
  60. chunks.push_back(ichunks[i]);
  61. fchunks.push_back(empty_chunk + ichunks[i]);
  62. empty_chunk.clear();
  63. pos.push_back(ipos[i]);
  64. }
  65. else {
  66. empty_chunk+=ichunks[i];
  67. }
  68. if((imasks[i] & mask) || i==imasks.size()-1){
  69. Iterator ptr=begin;
  70. std::advance(ptr,ipos[i]);
  71. iters.push_back(ptr);
  72. bmasks.push_back(imasks[i]);
  73. }
  74. }
  75. //
  76. // segment iterator tests
  77. //
  78. {
  79. lb::segment_index<Iterator> map(bt,begin,end,l);
  80. typedef typename lb::segment_index<Iterator>::iterator iter_type;
  81. map.rule(mask);
  82. {
  83. unsigned i=0;
  84. iter_type p;
  85. map.full_select(false);
  86. for(p=map.begin();p!=map.end();++p,i++) {
  87. TEST(p->str()==chunks[i]);
  88. TEST(p->rule() == unsigned(masks[i]));
  89. }
  90. TEST(chunks.size() == i);
  91. for(;;) {
  92. if(p==map.begin()) {
  93. TEST(i==0);
  94. break;
  95. }
  96. else {
  97. --p;
  98. TEST(p->str()==chunks[--i]);
  99. TEST(p->rule() == unsigned(masks[i]));
  100. }
  101. }
  102. for(i=0,p=map.end();i<chunks.size();i++){
  103. --p;
  104. unsigned index = chunks.size() - i - 1;
  105. TEST(p->str()==chunks[index]);
  106. TEST(p->rule() == unsigned(masks[index]));
  107. }
  108. TEST(p==map.begin());
  109. }
  110. {
  111. unsigned i=0;
  112. iter_type p;
  113. map.full_select(true);
  114. for(p=map.begin();p!=map.end();++p,i++) {
  115. TEST(p->str()==fchunks[i]);
  116. TEST(p->rule() == unsigned(masks[i]));
  117. }
  118. TEST(chunks.size() == i);
  119. for(;;) {
  120. if(p==map.begin()) {
  121. TEST(i==0);
  122. break;
  123. }
  124. else {
  125. --p;
  126. if(p->str()!=fchunks[i-1]) {
  127. print_str(p->str());
  128. print_str(fchunks[i-1]);
  129. }
  130. TEST(p->str()==fchunks[--i]);
  131. TEST(p->rule() == unsigned(masks[i]));
  132. }
  133. }
  134. for(i=0,p=map.end();i<chunks.size();i++){
  135. --p;
  136. unsigned index = chunks.size() - i - 1;
  137. TEST(p->str()==fchunks[index]);
  138. TEST(p->rule() == unsigned(masks[index]));
  139. }
  140. TEST(p==map.begin());
  141. }
  142. {
  143. iter_type p;
  144. unsigned chunk_ptr=0;
  145. unsigned i=0;
  146. map.full_select(false);
  147. for(Iterator optr=begin;optr!=end;optr++,i++) {
  148. p=map.find(optr);
  149. if(chunk_ptr < pos.size() && i>=unsigned(pos[chunk_ptr])){
  150. chunk_ptr++;
  151. }
  152. if(chunk_ptr>=pos.size()) {
  153. TEST(p==map.end());
  154. }
  155. else {
  156. TEST(p->str()==chunks[chunk_ptr]);
  157. TEST(p->rule()==unsigned(masks[chunk_ptr]));
  158. }
  159. }
  160. }
  161. {
  162. iter_type p;
  163. unsigned chunk_ptr=0;
  164. unsigned i=0;
  165. map.full_select(true);
  166. for(Iterator optr=begin;optr!=end;optr++,i++) {
  167. p=map.find(optr);
  168. if(chunk_ptr < pos.size() && i>=unsigned(pos[chunk_ptr])){
  169. chunk_ptr++;
  170. }
  171. if(chunk_ptr>=pos.size()) {
  172. TEST(p==map.end());
  173. }
  174. else {
  175. TEST(p->str()==fchunks[chunk_ptr]);
  176. TEST(p->rule()==unsigned(masks[chunk_ptr]));
  177. }
  178. }
  179. }
  180. } // segment iterator tests
  181. { // break iterator tests
  182. lb::boundary_point_index<Iterator> map(bt,begin,end,l);
  183. typedef typename lb::boundary_point_index<Iterator>::iterator iter_type;
  184. map.rule(mask);
  185. unsigned i=0;
  186. iter_type p;
  187. for(p=map.begin();p!=map.end();++p,i++) {
  188. TEST(p->iterator()==iters[i]);
  189. TEST(p->rule()==bmasks[i]);
  190. }
  191. TEST(iters.size() == i);
  192. do {
  193. --p;
  194. --i;
  195. TEST(p->iterator()==iters.at(i));
  196. } while(p!=map.begin());
  197. TEST(i==0);
  198. unsigned iters_ptr=0;
  199. for(Iterator optr=begin;optr!=end;optr++) {
  200. p=map.find(optr);
  201. TEST(p->iterator()==iters[iters_ptr]);
  202. if(iters.at(iters_ptr)==optr)
  203. iters_ptr++;
  204. }
  205. } // break iterator tests
  206. { // copy test
  207. typedef lb::segment_index<Iterator> ti_type;
  208. typedef lb::boundary_point_index<Iterator> bi_type;
  209. { // segment to bound
  210. ti_type ti(bt,begin,end,l);
  211. ti.rule(mask);
  212. {
  213. bi_type bi(ti);
  214. bi.rule(mask);
  215. unsigned i=0;
  216. typename bi_type::iterator p;
  217. for(p=bi.begin();p!=bi.end();++p,i++) {
  218. TEST(p->iterator()==iters[i]);
  219. TEST(p->rule()==bmasks[i]);
  220. }
  221. }
  222. {
  223. bi_type bi;
  224. bi.rule(mask);
  225. bi = ti;
  226. unsigned i=0;
  227. typename bi_type::iterator p;
  228. for(p=bi.begin();p!=bi.end();++p,i++) {
  229. TEST(p->iterator()==iters[i]);
  230. TEST(p->rule()==bmasks[i]);
  231. }
  232. }
  233. // boundary_point to bound
  234. bi_type bi_2(bt,begin,end,l);
  235. bi_2.rule(mask);
  236. {
  237. bi_type bi(bi_2);
  238. unsigned i=0;
  239. typename bi_type::iterator p;
  240. for(p=bi.begin();p!=bi.end();++p,i++) {
  241. TEST(p->iterator()==iters[i]);
  242. TEST(p->rule()==bmasks[i]);
  243. }
  244. }
  245. {
  246. bi_type bi;
  247. bi = bi_2;
  248. unsigned i=0;
  249. typename bi_type::iterator p;
  250. for(p=bi.begin();p!=bi.end();++p,i++) {
  251. TEST(p->iterator()==iters[i]);
  252. TEST(p->rule()==bmasks[i]);
  253. }
  254. }
  255. }
  256. { // boundary_point to segment
  257. bi_type bi(bt,begin,end,l);
  258. {
  259. ti_type ti(bi);
  260. ti.rule(mask);
  261. unsigned i=0;
  262. typename ti_type::iterator p;
  263. for(p=ti.begin();p!=ti.end();++p,i++) {
  264. TEST(p->str()==chunks[i]);
  265. TEST(p->rule()==unsigned(masks[i]));
  266. }
  267. }
  268. {
  269. ti_type ti;
  270. ti.rule(mask);
  271. ti = (bi);
  272. unsigned i=0;
  273. typename ti_type::iterator p;
  274. for(p=ti.begin();p!=ti.end();++p,i++) {
  275. TEST(p->str()==chunks[i]);
  276. TEST(p->rule()==unsigned(masks[i]));
  277. }
  278. }
  279. ti_type ti_2(bt,begin,end,l);
  280. ti_2.rule(mask);
  281. {
  282. ti_type ti(ti_2);
  283. unsigned i=0;
  284. typename ti_type::iterator p;
  285. for(p=ti.begin();p!=ti.end();++p,i++) {
  286. TEST(p->str()==chunks[i]);
  287. TEST(p->rule()==unsigned(masks[i]));
  288. }
  289. }
  290. {
  291. ti_type ti;
  292. ti = (ti_2);
  293. unsigned i=0;
  294. typename ti_type::iterator p;
  295. for(p=ti.begin();p!=ti.end();++p,i++) {
  296. TEST(p->str()==chunks[i]);
  297. TEST(p->rule()==unsigned(masks[i]));
  298. }
  299. }
  300. }
  301. }
  302. } // for mask
  303. }
  304. template<typename Char>
  305. void run_word(std::string *original,int *none,int *num,int *word,int *kana,int *ideo,std::locale l,lb::boundary_type b=lb::word)
  306. {
  307. std::vector<int> pos;
  308. std::vector<std::basic_string<Char> > chunks;
  309. std::vector<int> masks;
  310. std::basic_string<Char> test_string;
  311. for(int i=0;!original[i].empty();i++) {
  312. chunks.push_back(to_correct_string<Char>(original[i],l));
  313. test_string+=chunks.back();
  314. pos.push_back(test_string.size());
  315. masks.push_back(
  316. ( none ? none[i]*15 : 0)
  317. | ( num ? ((num[i]*15) << 4) : 0)
  318. | ( word ? ((word[i]*15) << 8) : 0)
  319. | ( kana ? ((kana[i]*15) << 12) : 0)
  320. | ( ideo ? ((ideo[i]*15) << 16) : 0)
  321. );
  322. }
  323. std::list<Char> lst(test_string.begin(),test_string.end());
  324. test_word_container<Char>(lst.begin(),lst.end(),pos,masks,chunks,l,b);
  325. test_word_container<Char>(test_string.begin(),test_string.end(),pos,masks,chunks,l,b);
  326. }
  327. std::string character[]={"שָ","ל","וֹ","ם","!",""};
  328. int nones[]={1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1};
  329. std::string sentence1[]={"To be\n","or not\n","to be?\n"," That is the question. ","Or maybe not",""};
  330. int sentence1a[]={ 0, 0, 1, 1, 0, 0};
  331. int sentence1b[]={ 1, 1, 0, 0, 1, 0};
  332. std::string line1[]={"To ","be\n","or ","not\n","to ","be",""};
  333. int line1a[]={ 1, 0, 1 , 0, 1, 1 , 0 };
  334. int line1b[]={ 0, 1, 0 , 1, 0, 0 , 0 };
  335. void test_boundaries(std::string *all,int *first,int *second,lb::boundary_type t)
  336. {
  337. boost::locale::generator g;
  338. std::cout << " char UTF-8" << std::endl;
  339. run_word<char>(all,first,second,0,0,0,g("he_IL.UTF-8"),t);
  340. std::cout << " char CP1255" << std::endl;
  341. run_word<char>(all,first,second,0,0,0,g("he_IL.cp1255"),t);
  342. std::cout << " wchar_t"<<std::endl;
  343. run_word<wchar_t>(all,first,second,0,0,0,g("he_IL.UTF-8"),t);
  344. #ifdef BOOST_LOCALE_ENABLE_CHAR16_T
  345. std::cout << " char16_t"<<std::endl;
  346. run_word<char16_t>(all,first,second,0,0,0,g("he_IL.UTF-8"),t);
  347. #endif
  348. #ifdef BOOST_LOCALE_ENABLE_CHAR32_T
  349. std::cout << " char32_t"<<std::endl;
  350. run_word<char32_t>(all,first,second,0,0,0,g("he_IL.UTF-8"),t);
  351. #endif
  352. }
  353. void word_boundary()
  354. {
  355. boost::locale::generator g;
  356. //std::string all1[]={"10"," ","Hello"," ","Windows7"," ","平仮名","ひらがな","ヒラガナ",""};
  357. //std::string all1[]={"10"," ","Hello"," ","Windows7"," ","平仮名","ひん","アヒル",""};
  358. std::string all1[]={"10"," ","Hello"," ","Windows7"," ","平仮名","アヒル",""};
  359. int none1[]={ 0, 1, 0, 1, 0, 1, 0, 0, 0};
  360. int num1[]={ 1, 0, 0, 0, 1, 0, 0 , 0 , 0};
  361. int word1[]={ 0, 0, 1, 0, 1, 0, 0 , 0 , 0};
  362. #if U_ICU_VERSION_MAJOR_NUM >= 50
  363. int kana1[]={ 0, 0, 0, 0, 0, 0, 0, 0 , 0};
  364. int ideo1[]={ 0, 0, 0, 0, 0, 0, 1, 1 , 1};
  365. #else
  366. int kana1[]={ 0, 0, 0, 0, 0, 0, 0, 1 , 1};
  367. int ideo1[]={ 0, 0, 0, 0, 0, 0, 1, 0 , 0};
  368. #endif
  369. int zero[25]={0};
  370. std::string all2[]={""};
  371. std::string all3[]={" "," ","Hello",",","World","!"," ",""};
  372. int none3[]={ 1, 1, 0, 1, 0, 1, 1, 0};
  373. int word3[]={ 0, 0, 1, 0, 1, 0, 0, 0};
  374. std::cout << " char UTF-8" << std::endl;
  375. run_word<char>(all1,none1,num1,word1,kana1,ideo1,g("ja_JP.UTF-8"));
  376. run_word<char>(all2,zero,zero,zero,zero,zero,g("en_US.UTF-8"));
  377. run_word<char>(all3,none3,zero,word3,zero,zero,g("en_US.UTF-8"));
  378. std::cout << " char Shift-JIS" << std::endl;
  379. run_word<char>(all1,none1,num1,word1,kana1,ideo1,g("ja_JP.Shift-JIS"));
  380. run_word<char>(all2,zero,zero,zero,zero,zero,g("ja_JP.Shift-JIS"));
  381. run_word<char>(all3,none3,zero,word3,zero,zero,g("ja_JP.Shift-JIS"));
  382. std::cout << " wchar_t"<<std::endl;
  383. run_word<wchar_t>(all1,none1,num1,word1,kana1,ideo1,g("ja_JP.UTF-8"));
  384. run_word<wchar_t>(all2,zero,zero,zero,zero,zero,g("en_US.UTF-8"));
  385. run_word<wchar_t>(all3,none3,zero,word3,zero,zero,g("en_US.UTF-8"));
  386. #ifdef BOOST_LOCALE_ENABLE_CHAR16_T
  387. std::cout << " char16_t"<<std::endl;
  388. run_word<char16_t>(all1,none1,num1,word1,kana1,ideo1,g("ja_JP.UTF-8"));
  389. run_word<char16_t>(all2,zero,zero,zero,zero,zero,g("en_US.UTF-8"));
  390. run_word<char16_t>(all3,none3,zero,word3,zero,zero,g("en_US.UTF-8"));
  391. #endif
  392. #ifdef BOOST_LOCALE_ENABLE_CHAR32_T
  393. std::cout << " char32_t"<<std::endl;
  394. run_word<char32_t>(all1,none1,num1,word1,kana1,ideo1,g("ja_JP.UTF-8"));
  395. run_word<char32_t>(all2,zero,zero,zero,zero,zero,g("en_US.UTF-8"));
  396. run_word<char32_t>(all3,none3,zero,word3,zero,zero,g("en_US.UTF-8"));
  397. #endif
  398. }
  399. void test_op_one_side(std::string const &sl,std::string const &sr,int val)
  400. {
  401. boost::locale::boundary::ssegment l(sl.begin(),sl.end(),0),r(sr.begin(),sr.end(),0);
  402. // segment
  403. TEST( (l==r) == (val==0));
  404. TEST( (l!=r) == (val!=0));
  405. TEST( (l<=r) == (val<=0));
  406. TEST( (l< r) == (val<0));
  407. TEST( (l>=r) == (val>=0));
  408. TEST( (l> r) == (val>0));
  409. // C string
  410. TEST( (l==sr.c_str()) == (val==0));
  411. TEST( (l!=sr.c_str()) == (val!=0));
  412. TEST( (l<=sr.c_str()) == (val<=0));
  413. TEST( (l< sr.c_str()) == (val<0));
  414. TEST( (l>=sr.c_str()) == (val>=0));
  415. TEST( (l> sr.c_str()) == (val>0));
  416. TEST( (sl.c_str()==r) == (val==0));
  417. TEST( (sl.c_str()!=r) == (val!=0));
  418. TEST( (sl.c_str()<=r) == (val<=0));
  419. TEST( (sl.c_str()< r) == (val<0));
  420. TEST( (sl.c_str()>=r) == (val>=0));
  421. TEST( (sl.c_str()> r) == (val>0));
  422. // C++ string
  423. TEST( (l==sr) == (val==0));
  424. TEST( (l!=sr) == (val!=0));
  425. TEST( (l<=sr) == (val<=0));
  426. TEST( (l< sr) == (val<0));
  427. TEST( (l>=sr) == (val>=0));
  428. TEST( (l> sr) == (val>0));
  429. TEST( (sl==r) == (val==0));
  430. TEST( (sl!=r) == (val!=0));
  431. TEST( (sl<=r) == (val<=0));
  432. TEST( (sl< r) == (val<0));
  433. TEST( (sl>=r) == (val>=0));
  434. TEST( (sl> r) == (val>0));
  435. // self check
  436. TEST( (sl==sr) == (val==0));
  437. TEST( (sl!=sr) == (val!=0));
  438. TEST( (sl<=sr) == (val<=0));
  439. TEST( (sl< sr) == (val<0));
  440. TEST( (sl>=sr) == (val>=0));
  441. TEST( (sl> sr) == (val>0));
  442. }
  443. void test_op(std::string const &sl,std::string const &sr,int val)
  444. {
  445. test_op_one_side(sl,sr,val);
  446. test_op_one_side(sr,sl,-val);
  447. }
  448. void segment_operator()
  449. {
  450. test_op("","a",-1);
  451. test_op("","",0);
  452. test_op("aa","aaa",-1);
  453. test_op("aa","ab",-1);
  454. }
  455. int main()
  456. {
  457. try {
  458. std::cout << "Testing segment operators" << std::endl;
  459. segment_operator();
  460. std::cout << "Testing word boundary" << std::endl;
  461. word_boundary();
  462. std::cout << "Testing character boundary" << std::endl;
  463. test_boundaries(character,nones,0,lb::character);
  464. std::cout << "Testing sentence boundary" << std::endl;
  465. test_boundaries(sentence1,sentence1a,sentence1b,lb::sentence);
  466. std::cout << "Testing line boundary" << std::endl;
  467. test_boundaries(line1,line1a,line1b,lb::line);
  468. }
  469. catch(std::exception const &e) {
  470. std::cerr << "Failed " << e.what() << std::endl;
  471. return EXIT_FAILURE;
  472. }
  473. FINALIZE();
  474. }
  475. #endif // NOICU
  476. // vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
  477. // boostinspect:noascii