123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248 |
- /////////1/////////2/////////3/////////4/////////5/////////6/////////7/////////8
- // test_utf8_codecvt.cpp
- // (C) Copyright 2002-4 Robert Ramey - http://www.rrsd.com .
- // Use, modification and distribution is subject to the Boost Software
- // License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
- // http://www.boost.org/LICENSE_1_0.txt)
- #include <algorithm>
- #include <fstream>
- #include <iostream>
- #include <iterator>
- #include <locale>
- #include <vector>
- #include <string>
- #include <cstddef> // size_t
- #include <boost/config.hpp>
- #if defined(BOOST_NO_STDC_NAMESPACE)
- namespace std{
- using ::size_t;
- } // namespace std
- #endif
- #include <cwchar>
- #ifdef BOOST_NO_STDC_NAMESPACE
- namespace std{ using ::wcslen; }
- #endif
- #include "../test/test_tools.hpp"
- #include <boost/archive/iterators/istream_iterator.hpp>
- #include <boost/archive/iterators/ostream_iterator.hpp>
- #include <boost/archive/add_facet.hpp>
- #include <boost/archive/detail/utf8_codecvt_facet.hpp>
- template<std::size_t s>
- struct test_data
- {
- static unsigned char utf8_encoding[];
- static wchar_t wchar_encoding[];
- };
- template<>
- unsigned char test_data<2>::utf8_encoding[] = {
- 0x01,
- 0x7f,
- 0xc2, 0x80,
- 0xdf, 0xbf,
- 0xe0, 0xa0, 0x80,
- 0xe7, 0xbf, 0xbf
- };
- template<>
- wchar_t test_data<2>::wchar_encoding[] = {
- 0x0001,
- 0x007f,
- 0x0080,
- 0x07ff,
- 0x0800,
- 0x7fff
- };
- template<>
- unsigned char test_data<4>::utf8_encoding[] = {
- 0x01,
- 0x7f,
- 0xc2, 0x80,
- 0xdf, 0xbf,
- 0xe0, 0xa0, 0x80,
- 0xef, 0xbf, 0xbf,
- 0xf0, 0x90, 0x80, 0x80,
- 0xf4, 0x8f, 0xbf, 0xbf,
- 0xf7, 0xbf, 0xbf, 0xbf,
- 0xf8, 0x88, 0x80, 0x80, 0x80,
- 0xfb, 0xbf, 0xbf, 0xbf, 0xbf,
- 0xfc, 0x84, 0x80, 0x80, 0x80, 0x80,
- 0xfd, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf
- };
- template<>
- wchar_t test_data<4>::wchar_encoding[] = {
- 0x00000001,
- 0x0000007f,
- 0x00000080,
- 0x000007ff,
- 0x00000800,
- 0x0000ffff,
- 0x00010000,
- 0x0010ffff,
- 0x001fffff,
- 0x00200000,
- 0x03ffffff,
- 0x04000000,
- 0x7fffffff
- };
- int
- test_main(int /* argc */, char * /* argv */[]) {
- std::locale old_loc;
- std::locale * utf8_locale
- = boost::archive::add_facet(
- old_loc,
- new boost::archive::detail::utf8_codecvt_facet
- );
- typedef char utf8_t;
- typedef test_data<sizeof(wchar_t)> td;
- // Send our test UTF-8 data to file
- {
- std::ofstream ofs;
- ofs.open("test.dat", std::ios::binary);
- std::copy(
- td::utf8_encoding,
- #if ! defined(__BORLANDC__)
- // borland 5.60 complains about this
- td::utf8_encoding + sizeof(td::utf8_encoding) / sizeof(unsigned char),
- #else
- // so use this instead
- td::utf8_encoding + 12,
- #endif
- boost::archive::iterators::ostream_iterator<utf8_t>(ofs)
- );
- }
- // Read the test data back in, converting to UCS-4 on the way in
- std::vector<wchar_t> from_file;
- {
- std::wifstream ifs;
- ifs.imbue(*utf8_locale);
- ifs.open("test.dat");
- wchar_t item = 0;
- // note can't use normal vector from iterator constructor because
- // dinkumware doesn't have it.
- for(;;){
- item = ifs.get();
- if(item == WEOF)
- break;
- //ifs >> item;
- //if(ifs.eof())
- // break;
- from_file.push_back(item);
- }
- }
- // compare the data read back in with the orginal
- #if ! defined(__BORLANDC__)
- // borland 5.60 complains about this
- BOOST_CHECK(from_file.size() == sizeof(td::wchar_encoding)/sizeof(wchar_t));
- #else
- // so use this instead
- BOOST_CHECK(from_file.size() == 6);
- #endif
- BOOST_CHECK(std::equal(from_file.begin(), from_file.end(), td::wchar_encoding));
-
- // Send the UCS4_data back out, converting to UTF-8
- {
- std::wofstream ofs;
- ofs.imbue(*utf8_locale);
- ofs.open("test2.dat");
- std::copy(
- from_file.begin(),
- from_file.end(),
- boost::archive::iterators::ostream_iterator<wchar_t>(ofs)
- );
- }
- // Make sure that both files are the same
- {
- typedef boost::archive::iterators::istream_iterator<utf8_t> is_iter;
- is_iter end_iter;
- std::ifstream ifs1("test.dat");
- is_iter it1(ifs1);
- std::vector<utf8_t> data1;
- std::copy(it1, end_iter, std::back_inserter(data1));
- std::ifstream ifs2("test2.dat");
- is_iter it2(ifs2);
- std::vector<utf8_t> data2;
- std::copy(it2, end_iter, std::back_inserter(data2));
- BOOST_CHECK(data1 == data2);
- }
- // some libraries have trouble that only shows up with longer strings
-
- wchar_t * test3_data = L"\
- <?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\" ?>\
- <!DOCTYPE boost_serialization>\
- <boost_serialization signature=\"serialization::archive\" version=\"3\">\
- <a class_id=\"0\" tracking_level=\"0\">\
- <b>1</b>\
- <f>96953204</f>\
- <g>177129195</g>\
- <l>1</l>\
- <m>5627</m>\
- <n>23010</n>\
- <o>7419</o>\
- <p>16212</p>\
- <q>4086</q>\
- <r>2749</r>\
- <c>-33</c>\
- <s>124</s>\
- <t>28</t>\
- <u>32225</u>\
- <v>17543</v>\
- <w>0.84431422</w>\
- <x>1.0170664757130923</x>\
- <y>tjbx</y>\
- <z>cuwjentqpkejp</z>\
- </a>\
- </boost_serialization>\
- ";
-
- // Send the UCS4_data back out, converting to UTF-8
- std::size_t l = std::wcslen(test3_data);
- {
- std::wofstream ofs;
- ofs.imbue(*utf8_locale);
- ofs.open("test3.dat");
- std::copy(
- test3_data,
- test3_data + l,
- boost::archive::iterators::ostream_iterator<wchar_t>(ofs)
- );
- }
- // Make sure that both files are the same
- {
- std::wifstream ifs;
- ifs.imbue(*utf8_locale);
- ifs.open("test3.dat");
- BOOST_CHECK(
- std::equal(
- test3_data,
- test3_data + l,
- boost::archive::iterators::istream_iterator<wchar_t>(ifs)
- )
- );
- }
- delete utf8_locale;
- return EXIT_SUCCESS;
- }
|