segment.hpp 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483
  1. //
  2. // Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
  3. //
  4. // Distributed under the Boost Software License, Version 1.0. (See
  5. // accompanying file LICENSE_1_0.txt or copy at
  6. // http://www.boost.org/LICENSE_1_0.txt)
  7. //
  8. #ifndef BOOST_LOCALE_BOUNDARY_SEGMENT_HPP_INCLUDED
  9. #define BOOST_LOCALE_BOUNDARY_SEGMENT_HPP_INCLUDED
  10. #include <boost/locale/config.hpp>
  11. #ifdef BOOST_MSVC
  12. # pragma warning(push)
  13. # pragma warning(disable : 4275 4251 4231 4660)
  14. #endif
  15. #include <locale>
  16. #include <string>
  17. #include <iosfwd>
  18. #include <iterator>
  19. namespace boost {
  20. namespace locale {
  21. namespace boundary {
  22. /// \cond INTERNAL
  23. namespace details {
  24. template<typename LeftIterator,typename RightIterator>
  25. int compare_text(LeftIterator l_begin,LeftIterator l_end,RightIterator r_begin,RightIterator r_end)
  26. {
  27. typedef LeftIterator left_iterator;
  28. typedef typename std::iterator_traits<left_iterator>::value_type char_type;
  29. typedef std::char_traits<char_type> traits;
  30. while(l_begin!=l_end && r_begin!=r_end) {
  31. char_type lchar = *l_begin++;
  32. char_type rchar = *r_begin++;
  33. if(traits::eq(lchar,rchar))
  34. continue;
  35. if(traits::lt(lchar,rchar))
  36. return -1;
  37. else
  38. return 1;
  39. }
  40. if(l_begin==l_end && r_begin==r_end)
  41. return 0;
  42. if(l_begin==l_end)
  43. return -1;
  44. else
  45. return 1;
  46. }
  47. template<typename Left,typename Right>
  48. int compare_text(Left const &l,Right const &r)
  49. {
  50. return compare_text(l.begin(),l.end(),r.begin(),r.end());
  51. }
  52. template<typename Left,typename Char>
  53. int compare_string(Left const &l,Char const *begin)
  54. {
  55. Char const *end = begin;
  56. while(*end!=0)
  57. end++;
  58. return compare_text(l.begin(),l.end(),begin,end);
  59. }
  60. template<typename Right,typename Char>
  61. int compare_string(Char const *begin,Right const &r)
  62. {
  63. Char const *end = begin;
  64. while(*end!=0)
  65. end++;
  66. return compare_text(begin,end,r.begin(),r.end());
  67. }
  68. }
  69. /// \endcond
  70. ///
  71. /// \addtogroup boundary
  72. /// @{
  73. ///
  74. /// \brief a segment object that represents a pair of two iterators that define the range where
  75. /// this segment exits and a rule that defines it.
  76. ///
  77. /// This type of object is dereferenced by the iterators of segment_index. Using a rule() member function
  78. /// you can get a specific rule this segment was selected with. For example, when you use
  79. /// word boundary analysis, you can check if the specific word contains Kana letters by checking (rule() & \ref word_kana)!=0
  80. /// For a sentence analysis you can check if the sentence is selected because a sentence terminator is found (\ref sentence_term) or
  81. /// there is a line break (\ref sentence_sep).
  82. ///
  83. /// This object can be automatically converted to std::basic_string with the same type of character. It is also
  84. /// valid range that has begin() and end() member functions returning iterators on the location of the segment.
  85. ///
  86. /// \see
  87. ///
  88. /// - \ref segment_index
  89. /// - \ref boundary_point
  90. /// - \ref boundary_point_index
  91. ///
  92. template<typename IteratorType>
  93. class segment : public std::pair<IteratorType,IteratorType> {
  94. public:
  95. ///
  96. /// The type of the underlying character
  97. ///
  98. typedef typename std::iterator_traits<IteratorType>::value_type char_type;
  99. ///
  100. /// The type of the string it is converted to
  101. ///
  102. typedef std::basic_string<char_type> string_type;
  103. ///
  104. /// The value that iterators return - the character itself
  105. ///
  106. typedef char_type value_type;
  107. ///
  108. /// The iterator that allows to iterate the range
  109. ///
  110. typedef IteratorType iterator;
  111. ///
  112. /// The iterator that allows to iterate the range
  113. ///
  114. typedef IteratorType const_iterator;
  115. ///
  116. /// The type that represent a difference between two iterators
  117. ///
  118. typedef typename std::iterator_traits<IteratorType>::difference_type difference_type;
  119. ///
  120. /// Default constructor
  121. ///
  122. segment() {}
  123. ///
  124. /// Create a segment using two iterators and a rule that represents this point
  125. ///
  126. segment(iterator b,iterator e,rule_type r) :
  127. std::pair<IteratorType,IteratorType>(b,e),
  128. rule_(r)
  129. {
  130. }
  131. ///
  132. /// Set the start of the range
  133. ///
  134. void begin(iterator const &v)
  135. {
  136. this->first = v;
  137. }
  138. ///
  139. /// Set the end of the range
  140. ///
  141. void end(iterator const &v)
  142. {
  143. this->second = v;
  144. }
  145. ///
  146. /// Get the start of the range
  147. ///
  148. IteratorType begin() const
  149. {
  150. return this->first;
  151. }
  152. ///
  153. /// Set the end of the range
  154. ///
  155. IteratorType end() const
  156. {
  157. return this->second;
  158. }
  159. ///
  160. /// Convert the range to a string automatically
  161. ///
  162. template <class T, class A>
  163. operator std::basic_string<char_type, T, A> ()const
  164. {
  165. return std::basic_string<char_type, T, A>(this->first, this->second);
  166. }
  167. ///
  168. /// Create a string from the range explicitly
  169. ///
  170. string_type str() const
  171. {
  172. return string_type(begin(),end());
  173. }
  174. ///
  175. /// Get the length of the text chunk
  176. ///
  177. size_t length() const
  178. {
  179. return std::distance(begin(),end());
  180. }
  181. ///
  182. /// Check if the segment is empty
  183. ///
  184. bool empty() const
  185. {
  186. return begin() == end();
  187. }
  188. ///
  189. /// Get the rule that is used for selection of this segment.
  190. ///
  191. rule_type rule() const
  192. {
  193. return rule_;
  194. }
  195. ///
  196. /// Set a rule that is used for segment selection
  197. ///
  198. void rule(rule_type r)
  199. {
  200. rule_ = r;
  201. }
  202. // make sure we override std::pair's operator==
  203. /// Compare two segments
  204. bool operator==(segment const &other)
  205. {
  206. return details::compare_text(*this,other) == 0;
  207. }
  208. /// Compare two segments
  209. bool operator!=(segment const &other)
  210. {
  211. return details::compare_text(*this,other) != 0;
  212. }
  213. private:
  214. rule_type rule_;
  215. };
  216. /// Compare two segments
  217. template<typename IteratorL,typename IteratorR>
  218. bool operator==(segment<IteratorL> const &l,segment<IteratorR> const &r)
  219. {
  220. return details::compare_text(l,r) == 0;
  221. }
  222. /// Compare two segments
  223. template<typename IteratorL,typename IteratorR>
  224. bool operator!=(segment<IteratorL> const &l,segment<IteratorR> const &r)
  225. {
  226. return details::compare_text(l,r) != 0;
  227. }
  228. /// Compare two segments
  229. template<typename IteratorL,typename IteratorR>
  230. bool operator<(segment<IteratorL> const &l,segment<IteratorR> const &r)
  231. {
  232. return details::compare_text(l,r) < 0;
  233. }
  234. /// Compare two segments
  235. template<typename IteratorL,typename IteratorR>
  236. bool operator<=(segment<IteratorL> const &l,segment<IteratorR> const &r)
  237. {
  238. return details::compare_text(l,r) <= 0;
  239. }
  240. /// Compare two segments
  241. template<typename IteratorL,typename IteratorR>
  242. bool operator>(segment<IteratorL> const &l,segment<IteratorR> const &r)
  243. {
  244. return details::compare_text(l,r) > 0;
  245. }
  246. /// Compare two segments
  247. template<typename IteratorL,typename IteratorR>
  248. bool operator>=(segment<IteratorL> const &l,segment<IteratorR> const &r)
  249. {
  250. return details::compare_text(l,r) >= 0;
  251. }
  252. /// Compare string and segment
  253. template<typename CharType,typename Traits,typename Alloc,typename IteratorR>
  254. bool operator==(std::basic_string<CharType,Traits,Alloc> const &l,segment<IteratorR> const &r)
  255. {
  256. return details::compare_text(l,r) == 0;
  257. }
  258. /// Compare string and segment
  259. template<typename CharType,typename Traits,typename Alloc,typename IteratorR>
  260. bool operator!=(std::basic_string<CharType,Traits,Alloc> const &l,segment<IteratorR> const &r)
  261. {
  262. return details::compare_text(l,r) != 0;
  263. }
  264. /// Compare string and segment
  265. template<typename CharType,typename Traits,typename Alloc,typename IteratorR>
  266. bool operator<(std::basic_string<CharType,Traits,Alloc> const &l,segment<IteratorR> const &r)
  267. {
  268. return details::compare_text(l,r) < 0;
  269. }
  270. /// Compare string and segment
  271. template<typename CharType,typename Traits,typename Alloc,typename IteratorR>
  272. bool operator<=(std::basic_string<CharType,Traits,Alloc> const &l,segment<IteratorR> const &r)
  273. {
  274. return details::compare_text(l,r) <= 0;
  275. }
  276. /// Compare string and segment
  277. template<typename CharType,typename Traits,typename Alloc,typename IteratorR>
  278. bool operator>(std::basic_string<CharType,Traits,Alloc> const &l,segment<IteratorR> const &r)
  279. {
  280. return details::compare_text(l,r) > 0;
  281. }
  282. /// Compare string and segment
  283. template<typename CharType,typename Traits,typename Alloc,typename IteratorR>
  284. bool operator>=(std::basic_string<CharType,Traits,Alloc> const &l,segment<IteratorR> const &r)
  285. {
  286. return details::compare_text(l,r) >= 0;
  287. }
  288. /// Compare string and segment
  289. template<typename Iterator,typename CharType,typename Traits,typename Alloc>
  290. bool operator==(segment<Iterator> const &l,std::basic_string<CharType,Traits,Alloc> const &r)
  291. {
  292. return details::compare_text(l,r) == 0;
  293. }
  294. /// Compare string and segment
  295. template<typename Iterator,typename CharType,typename Traits,typename Alloc>
  296. bool operator!=(segment<Iterator> const &l,std::basic_string<CharType,Traits,Alloc> const &r)
  297. {
  298. return details::compare_text(l,r) != 0;
  299. }
  300. /// Compare string and segment
  301. template<typename Iterator,typename CharType,typename Traits,typename Alloc>
  302. bool operator<(segment<Iterator> const &l,std::basic_string<CharType,Traits,Alloc> const &r)
  303. {
  304. return details::compare_text(l,r) < 0;
  305. }
  306. /// Compare string and segment
  307. template<typename Iterator,typename CharType,typename Traits,typename Alloc>
  308. bool operator<=(segment<Iterator> const &l,std::basic_string<CharType,Traits,Alloc> const &r)
  309. {
  310. return details::compare_text(l,r) <= 0;
  311. }
  312. /// Compare string and segment
  313. template<typename Iterator,typename CharType,typename Traits,typename Alloc>
  314. bool operator>(segment<Iterator> const &l,std::basic_string<CharType,Traits,Alloc> const &r)
  315. {
  316. return details::compare_text(l,r) > 0;
  317. }
  318. /// Compare string and segment
  319. template<typename Iterator,typename CharType,typename Traits,typename Alloc>
  320. bool operator>=(segment<Iterator> const &l,std::basic_string<CharType,Traits,Alloc> const &r)
  321. {
  322. return details::compare_text(l,r) >= 0;
  323. }
  324. /// Compare C string and segment
  325. template<typename CharType,typename IteratorR>
  326. bool operator==(CharType const *l,segment<IteratorR> const &r)
  327. {
  328. return details::compare_string(l,r) == 0;
  329. }
  330. /// Compare C string and segment
  331. template<typename CharType,typename IteratorR>
  332. bool operator!=(CharType const *l,segment<IteratorR> const &r)
  333. {
  334. return details::compare_string(l,r) != 0;
  335. }
  336. /// Compare C string and segment
  337. template<typename CharType,typename IteratorR>
  338. bool operator<(CharType const *l,segment<IteratorR> const &r)
  339. {
  340. return details::compare_string(l,r) < 0;
  341. }
  342. /// Compare C string and segment
  343. template<typename CharType,typename IteratorR>
  344. bool operator<=(CharType const *l,segment<IteratorR> const &r)
  345. {
  346. return details::compare_string(l,r) <= 0;
  347. }
  348. /// Compare C string and segment
  349. template<typename CharType,typename IteratorR>
  350. bool operator>(CharType const *l,segment<IteratorR> const &r)
  351. {
  352. return details::compare_string(l,r) > 0;
  353. }
  354. /// Compare C string and segment
  355. template<typename CharType,typename IteratorR>
  356. bool operator>=(CharType const *l,segment<IteratorR> const &r)
  357. {
  358. return details::compare_string(l,r) >= 0;
  359. }
  360. /// Compare C string and segment
  361. template<typename Iterator,typename CharType>
  362. bool operator==(segment<Iterator> const &l,CharType const *r)
  363. {
  364. return details::compare_string(l,r) == 0;
  365. }
  366. /// Compare C string and segment
  367. template<typename Iterator,typename CharType>
  368. bool operator!=(segment<Iterator> const &l,CharType const *r)
  369. {
  370. return details::compare_string(l,r) != 0;
  371. }
  372. /// Compare C string and segment
  373. template<typename Iterator,typename CharType>
  374. bool operator<(segment<Iterator> const &l,CharType const *r)
  375. {
  376. return details::compare_string(l,r) < 0;
  377. }
  378. /// Compare C string and segment
  379. template<typename Iterator,typename CharType>
  380. bool operator<=(segment<Iterator> const &l,CharType const *r)
  381. {
  382. return details::compare_string(l,r) <= 0;
  383. }
  384. /// Compare C string and segment
  385. template<typename Iterator,typename CharType>
  386. bool operator>(segment<Iterator> const &l,CharType const *r)
  387. {
  388. return details::compare_string(l,r) > 0;
  389. }
  390. /// Compare C string and segment
  391. template<typename Iterator,typename CharType>
  392. bool operator>=(segment<Iterator> const &l,CharType const *r)
  393. {
  394. return details::compare_string(l,r) >= 0;
  395. }
  396. typedef segment<std::string::const_iterator> ssegment; ///< convenience typedef
  397. typedef segment<std::wstring::const_iterator> wssegment; ///< convenience typedef
  398. #ifdef BOOST_LOCALE_ENABLE_CHAR16_T
  399. typedef segment<std::u16string::const_iterator> u16ssegment;///< convenience typedef
  400. #endif
  401. #ifdef BOOST_LOCALE_ENABLE_CHAR32_T
  402. typedef segment<std::u32string::const_iterator> u32ssegment;///< convenience typedef
  403. #endif
  404. typedef segment<char const *> csegment; ///< convenience typedef
  405. typedef segment<wchar_t const *> wcsegment; ///< convenience typedef
  406. #ifdef BOOST_LOCALE_ENABLE_CHAR16_T
  407. typedef segment<char16_t const *> u16csegment; ///< convenience typedef
  408. #endif
  409. #ifdef BOOST_LOCALE_ENABLE_CHAR32_T
  410. typedef segment<char32_t const *> u32csegment; ///< convenience typedef
  411. #endif
  412. ///
  413. /// Write the segment to the stream character by character
  414. ///
  415. template<typename CharType,typename TraitsType,typename Iterator>
  416. std::basic_ostream<CharType,TraitsType> &operator<<(
  417. std::basic_ostream<CharType,TraitsType> &out,
  418. segment<Iterator> const &tok)
  419. {
  420. for(Iterator p=tok.begin(),e=tok.end();p!=e;++p)
  421. out << *p;
  422. return out;
  423. }
  424. /// @}
  425. } // boundary
  426. } // locale
  427. } // boost
  428. #ifdef BOOST_MSVC
  429. #pragma warning(pop)
  430. #endif
  431. #endif
  432. // vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4