123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517 |
- //
- // Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
- //
- // Distributed under the Boost Software License, Version 1.0. (See
- // accompanying file LICENSE_1_0.txt or copy at
- // http://www.boost.org/LICENSE_1_0.txt)
- //
- #include <boost/locale/encoding.hpp>
- #include <boost/locale/generator.hpp>
- #include <boost/locale/localization_backend.hpp>
- #include <boost/locale/info.hpp>
- #include <boost/locale/config.hpp>
- #include <fstream>
- #include "test_locale.hpp"
- #include "test_locale_tools.hpp"
- #ifndef BOOST_LOCALE_NO_POSIX_BACKEND
- # ifdef __APPLE__
- # include <xlocale.h>
- # endif
- # include <locale.h>
- #endif
- #if !defined(BOOST_LOCALE_WITH_ICU) && !defined(BOOST_LOCALE_WITH_ICONV) && (defined(BOOST_WINDOWS) || defined(__CYGWIN__))
- #ifndef NOMINMAX
- # define NOMINMAX
- #endif
- #include <windows.h>
- #endif
- bool test_iso;
- bool test_iso_8859_8 = true;
- bool test_utf;
- bool test_sjis;
- std::string he_il_8bit;
- std::string en_us_8bit;
- std::string ja_jp_shiftjis;
- template<typename Char>
- std::basic_string<Char> read_file(std::basic_istream<Char> &in)
- {
- std::basic_string<Char> res;
- Char c;
- while(in.get(c))
- res+=c;
- return res;
- }
- template<typename Char>
- void test_ok(std::string file,std::locale const &l,std::basic_string<Char> cmp=std::basic_string<Char>())
- {
- if(cmp.empty())
- cmp=to<Char>(file);
- std::ofstream test("testi.txt");
- test << file;
- test.close();
- typedef std::basic_fstream<Char> stream_type;
- stream_type f1("testi.txt",stream_type::in);
- f1.imbue(l);
- TEST(read_file<Char>(f1) == cmp);
- f1.close();
- stream_type f2("testo.txt",stream_type::out);
- f2.imbue(l);
- f2 << cmp;
- f2.close();
- std::ifstream testo("testo.txt");
- TEST(read_file<char>(testo) == file);
- }
- template<typename Char>
- void test_rfail(std::string file,std::locale const &l,int pos)
- {
- std::ofstream test("testi.txt");
- test << file;
- test.close();
- typedef std::basic_fstream<Char> stream_type;
- stream_type f1("testi.txt",stream_type::in);
- f1.imbue(l);
- Char c;
- for(int i=0;i<pos;i++) {
- f1.get(c);
- if(f1.fail()) { // failed before as detected errors at forward;
- return;
- }
- TEST(f1);
- }
- // if the pos above suceed, at this point
- // it MUST fail
- TEST(f1.get(c).fail());
- }
- template<typename Char>
- void test_wfail(std::string file,std::locale const &l,int pos)
- {
- typedef std::basic_fstream<Char> stream_type;
- stream_type f1("testo.txt",stream_type::out);
- f1.imbue(l);
- std::basic_string<Char> out=to<Char>(file);
- int i;
- for(i=0;i<pos;i++) {
- f1 << out.at(i);
- f1<<std::flush;
- TEST(f1.good());
- }
- f1 << out.at(i);
- TEST(f1.fail() || (f1<<std::flush).fail());
- }
- template<typename Char>
- void test_for_char()
- {
- boost::locale::generator g;
- if(test_utf) {
- std::cout << " UTF-8" << std::endl;
- test_ok<Char>("grüße\nn i",g("en_US.UTF-8"));
- test_rfail<Char>("abc\xFF\xFF",g("en_US.UTF-8"),3);
- std::cout << " Testing codepoints above 0xFFFF" << std::endl;
- std::cout << " Single U+2008A" << std::endl;
- test_ok<Char>("\xf0\xa0\x82\x8a",g("en_US.UTF-8")); // U+2008A
- std::cout << " Single U+2008A withing text" << std::endl;
- test_ok<Char>("abc\"\xf0\xa0\x82\x8a\"",g("en_US.UTF-8")); // U+2008A
- std::string one = "\xf0\xa0\x82\x8a";
- std::string res;
- for(unsigned i=0;i<1000;i++)
- res+=one;
- std::cout << " U+2008A x 1000" << std::endl;
- test_ok<Char>(res.c_str(),g("en_US.UTF-8")); // U+2008A
- }
- else {
- std::cout << " UTF-8 Not supported " << std::endl;
- }
-
- if(test_iso) {
- if(test_iso_8859_8) {
- std::cout << " ISO8859-8" << std::endl;
- test_ok<Char>("hello \xf9\xec\xe5\xed",g(he_il_8bit),to<Char>("hello שלום"));
- }
- std::cout << " ISO8859-1" << std::endl;
- test_ok<Char>(to<char>("grüße\nn i"),g(en_us_8bit),to<Char>("grüße\nn i"));
- test_wfail<Char>("grüßen שלום",g(en_us_8bit),7);
- }
- if(test_sjis) {
- std::cout << " Shift-JIS" << std::endl;
- test_ok<Char>("\x93\xfa\x96\x7b",g(ja_jp_shiftjis),
- boost::locale::conv::to_utf<Char>("\xe6\x97\xa5\xe6\x9c\xac","UTF-8")); // Japan
- }
- }
- void test_wide_io()
- {
- std::cout << " wchar_t" << std::endl;
- test_for_char<wchar_t>();
-
- #if defined BOOST_LOCALE_ENABLE_CHAR16_T && !defined(BOOST_NO_CHAR16_T_CODECVT)
- std::cout << " char16_t" << std::endl;
- test_for_char<char16_t>();
- #endif
- #if defined BOOST_LOCALE_ENABLE_CHAR32_T && !defined(BOOST_NO_CHAR32_T_CODECVT)
- std::cout << " char32_t" << std::endl;
- test_for_char<char32_t>();
- #endif
- }
- template<typename Char>
- void test_pos(std::string source,std::basic_string<Char> target,std::string encoding)
- {
- using namespace boost::locale::conv;
- boost::locale::generator g;
- std::locale l= encoding == "ISO8859-8" ? g("he_IL."+encoding) : g("en_US."+encoding);
- TEST(to_utf<Char>(source,encoding)==target);
- TEST(to_utf<Char>(source.c_str(),encoding)==target);
- TEST(to_utf<Char>(source.c_str(),source.c_str()+source.size(),encoding)==target);
-
- TEST(to_utf<Char>(source,l)==target);
- TEST(to_utf<Char>(source.c_str(),l)==target);
- TEST(to_utf<Char>(source.c_str(),source.c_str()+source.size(),l)==target);
- TEST(from_utf<Char>(target,encoding)==source);
- TEST(from_utf<Char>(target.c_str(),encoding)==source);
- TEST(from_utf<Char>(target.c_str(),target.c_str()+target.size(),encoding)==source);
-
- TEST(from_utf<Char>(target,l)==source);
- TEST(from_utf<Char>(target.c_str(),l)==source);
- TEST(from_utf<Char>(target.c_str(),target.c_str()+target.size(),l)==source);
- }
- #define TESTF(X) TEST_THROWS(X,boost::locale::conv::conversion_error)
- template<typename Char>
- void test_to_neg(std::string source,std::basic_string<Char> target,std::string encoding)
- {
- using namespace boost::locale::conv;
- boost::locale::generator g;
- std::locale l=g("en_US."+encoding);
- TEST(to_utf<Char>(source,encoding)==target);
- TEST(to_utf<Char>(source.c_str(),encoding)==target);
- TEST(to_utf<Char>(source.c_str(),source.c_str()+source.size(),encoding)==target);
- TEST(to_utf<Char>(source,l)==target);
- TEST(to_utf<Char>(source.c_str(),l)==target);
- TEST(to_utf<Char>(source.c_str(),source.c_str()+source.size(),l)==target);
- TESTF(to_utf<Char>(source,encoding,stop));
- TESTF(to_utf<Char>(source.c_str(),encoding,stop));
- TESTF(to_utf<Char>(source.c_str(),source.c_str()+source.size(),encoding,stop));
- TESTF(to_utf<Char>(source,l,stop));
- TESTF(to_utf<Char>(source.c_str(),l,stop));
- TESTF(to_utf<Char>(source.c_str(),source.c_str()+source.size(),l,stop));
- }
- template<typename Char>
- void test_from_neg(std::basic_string<Char> source,std::string target,std::string encoding)
- {
- using namespace boost::locale::conv;
- boost::locale::generator g;
- std::locale l=g("en_US."+encoding);
- TEST(from_utf<Char>(source,encoding)==target);
- TEST(from_utf<Char>(source.c_str(),encoding)==target);
- TEST(from_utf<Char>(source.c_str(),source.c_str()+source.size(),encoding)==target);
- TEST(from_utf<Char>(source,l)==target);
- TEST(from_utf<Char>(source.c_str(),l)==target);
- TEST(from_utf<Char>(source.c_str(),source.c_str()+source.size(),l)==target);
- TESTF(from_utf<Char>(source,encoding,stop));
- TESTF(from_utf<Char>(source.c_str(),encoding,stop));
- TESTF(from_utf<Char>(source.c_str(),source.c_str()+source.size(),encoding,stop));
- TESTF(from_utf<Char>(source,l,stop));
- TESTF(from_utf<Char>(source.c_str(),l,stop));
- TESTF(from_utf<Char>(source.c_str(),source.c_str()+source.size(),l,stop));
- }
- template<typename Char>
- std::basic_string<Char> utf(char const *s)
- {
- return to<Char>(s);
- }
- template<>
- std::basic_string<char> utf(char const *s)
- {
- return s;
- }
- template<typename Char>
- void test_with_0()
- {
- std::string a("abc\0\0 yz\0",3+2+3+1);
- TEST(boost::locale::conv::from_utf<Char>(boost::locale::conv::to_utf<Char>(a,"UTF-8"),"UTF-8") == a);
- TEST(boost::locale::conv::from_utf<Char>(boost::locale::conv::to_utf<Char>(a,"ISO8859-1"),"ISO8859-1") == a);
- }
- template<typename Char,int n=sizeof(Char)>
- struct utfutf;
- template<>
- struct utfutf<char,1> {
- static char const *ok() {return "grüßen";}
- static char const *bad() { return "gr\xFF" "üßen"; }
- // split into 2 to make SunCC happy
- };
- template<>
- struct utfutf<wchar_t,2> {
- static wchar_t const *ok(){ return L"\x67\x72\xfc\xdf\x65\x6e"; }
- static wchar_t const *bad() {
- static wchar_t buf[256] = L"\x67\x72\xFF\xfc\xFE\xFD\xdf\x65\x6e";
- buf[2]=0xDC01; // second surrogate must not be
- buf[4]=0xD801; // First
- buf[5]=0xD801; // Must be surrogate trail
- return buf;
- }
- };
- template<>
- struct utfutf<wchar_t,4> {
- static wchar_t const *ok(){ return L"\x67\x72\xfc\xdf\x65\x6e"; }
- static wchar_t const *bad() {
- static wchar_t buf[256] = L"\x67\x72\xFF\xfc\xdf\x65\x6e";
- buf[2]=static_cast<wchar_t>(0x1000000); // > 10FFFF
- return buf;
- }
- };
- template<typename CharOut,typename CharIn>
- void test_combinations()
- {
- using boost::locale::conv::utf_to_utf;
- typedef utfutf<CharOut> out;
- typedef utfutf<CharIn> in;
- TEST( (utf_to_utf<CharOut,CharIn>(in::ok())==out::ok()) );
- TESTF( (utf_to_utf<CharOut,CharIn>(in::bad(),boost::locale::conv::stop)) );
- TEST( (utf_to_utf<CharOut,CharIn>(in::bad())==out::ok()) );
- }
- void test_all_combinations()
- {
- std::cout << "Testing utf_to_utf" << std::endl;
- std::cout <<" char<-char"<<std::endl;
- test_combinations<char,char>();
- std::cout <<" char<-wchar"<<std::endl;
- test_combinations<char,wchar_t>();
- std::cout <<" wchar<-char"<<std::endl;
- test_combinations<wchar_t,char>();
- std::cout <<" wchar<-wchar"<<std::endl;
- test_combinations<wchar_t,wchar_t>();
- }
- template<typename Char>
- void test_to()
- {
- test_pos<Char>(to<char>("grüßen"),utf<Char>("grüßen"),"ISO8859-1");
- if(test_iso_8859_8)
- test_pos<Char>("\xf9\xec\xe5\xed",utf<Char>("שלום"),"ISO8859-8");
- test_pos<Char>("grüßen",utf<Char>("grüßen"),"UTF-8");
- test_pos<Char>("abc\"\xf0\xa0\x82\x8a\"",utf<Char>("abc\"\xf0\xa0\x82\x8a\""),"UTF-8");
-
- test_to_neg<Char>("g\xFFrüßen",utf<Char>("grüßen"),"UTF-8");
- test_from_neg<Char>(utf<Char>("hello שלום"),"hello ","ISO8859-1");
-
- test_with_0<Char>();
- }
- void test_skip(char const *enc,char const *utf,char const *name,char const *opt=0)
- {
- if(opt!=0) {
- if(boost::locale::conv::to_utf<char>(enc,name) == opt) {
- test_skip(enc,opt,name);
- return;
- }
- }
- TEST(boost::locale::conv::to_utf<char>(enc,name) == utf);
- TEST(boost::locale::conv::to_utf<wchar_t>(enc,name) == boost::locale::conv::utf_to_utf<wchar_t>(utf));
- #ifdef BOOST_LOCALE_ENABLE_CHAR16_T
- TEST(boost::locale::conv::to_utf<char16_t>(enc,name) == boost::locale::conv::utf_to_utf<char16_t>(utf));
- #endif
- #ifdef BOOST_LOCALE_ENABLE_CHAR32_T
- TEST(boost::locale::conv::to_utf<char32_t>(enc,name) == boost::locale::conv::utf_to_utf<char32_t>(utf));
- #endif
- }
- void test_simple_conversions()
- {
- namespace blc=boost::locale::conv;
- std::cout << "- Testing correct invalid bytes skipping" << std::endl;
- try {
- std::cout << "-- ISO-8859-8" << std::endl;
- test_skip("test \xE0\xE1\xFB-","test \xd7\x90\xd7\x91-","ISO-8859-8");
- test_skip("\xFB","","ISO-8859-8");
- test_skip("test \xE0\xE1\xFB","test \xd7\x90\xd7\x91","ISO-8859-8");
- test_skip("\xFB-","-","ISO-8859-8");
- }
- catch(blc::invalid_charset_error const &) {
- std::cout <<"--- not supported" << std::endl;
- }
- try {
- std::cout << "-- cp932" << std::endl;
- test_skip("test\xE0\xA0 \x83\xF8-","test\xe7\x87\xbf -","cp932","test\xe7\x87\xbf ");
- test_skip("\x83\xF8","","cp932");
- test_skip("test\xE0\xA0 \x83\xF8","test\xe7\x87\xbf ","cp932");
- test_skip("\x83\xF8-","-","cp932","");
- }
- catch(blc::invalid_charset_error const &) {
- std::cout <<"--- not supported" << std::endl;
- }
- }
- int main()
- {
- try {
- std::vector<std::string> def;
- #ifdef BOOST_LOCALE_WITH_ICU
- def.push_back("icu");
- #endif
- #ifndef BOOST_LOCALE_NO_STD_BACKEND
- def.push_back("std");
- #endif
- #ifndef BOOST_LOCALE_NO_WINAPI_BACKEND
- def.push_back("winapi");
- #endif
- #ifndef BOOST_LOCALE_NO_POSIX_BACKEND
- def.push_back("posix");
- #endif
- #if !defined(BOOST_LOCALE_WITH_ICU) && !defined(BOOST_LOCALE_WITH_ICONV) && (defined(BOOST_WINDOWS) || defined(__CYGWIN__))
- test_iso_8859_8 = IsValidCodePage(28598)!=0;
- #endif
- test_simple_conversions();
-
-
- for(int type = 0; type < int(def.size()); type ++ ) {
- boost::locale::localization_backend_manager tmp_backend = boost::locale::localization_backend_manager::global();
- tmp_backend.select(def[type]);
- boost::locale::localization_backend_manager::global(tmp_backend);
-
- std::string bname = def[type];
-
- if(bname=="std") {
- en_us_8bit = get_std_name("en_US.ISO8859-1");
- he_il_8bit = get_std_name("he_IL.ISO8859-8");
- ja_jp_shiftjis = get_std_name("ja_JP.SJIS");
- if(!ja_jp_shiftjis.empty() && !test_std_supports_SJIS_codecvt(ja_jp_shiftjis))
- {
- std::cout << "Warning: detected unproper support of " << ja_jp_shiftjis << " locale, disableling it" << std::endl;
- ja_jp_shiftjis = "";
- }
- }
- else {
- en_us_8bit = "en_US.ISO8859-1";
- he_il_8bit = "he_IL.ISO8859-8";
- ja_jp_shiftjis = "ja_JP.SJIS";
- }
- std::cout << "Testing for backend " << def[type] << std::endl;
- test_iso = true;
- if(bname=="std" && (he_il_8bit.empty() || en_us_8bit.empty())) {
- std::cout << "no iso locales availible, passing" << std::endl;
- test_iso = false;
- }
- test_sjis = true;
- if(bname=="std" && ja_jp_shiftjis.empty()) {
- test_sjis = false;
- }
- if(bname=="winapi") {
- test_iso = false;
- test_sjis = false;
- }
- test_utf = true;
- #ifndef BOOST_LOCALE_NO_POSIX_BACKEND
- if(bname=="posix") {
- {
- locale_t l = newlocale(LC_ALL_MASK,he_il_8bit.c_str(),0);
- if(!l)
- test_iso = false;
- else
- freelocale(l);
- }
- {
- locale_t l = newlocale(LC_ALL_MASK,en_us_8bit.c_str(),0);
- if(!l)
- test_iso = false;
- else
- freelocale(l);
- }
- {
- locale_t l = newlocale(LC_ALL_MASK,"en_US.UTF-8",0);
- if(!l)
- test_utf = false;
- else
- freelocale(l);
- }
- #ifdef BOOST_LOCALE_WITH_ICONV
- {
- locale_t l = newlocale(LC_ALL_MASK,ja_jp_shiftjis.c_str(),0);
- if(!l)
- test_sjis = false;
- else
- freelocale(l);
- }
- #else
- test_sjis = false;
- #endif
- }
- #endif
- if(def[type]=="std" && (get_std_name("en_US.UTF-8").empty() || get_std_name("he_IL.UTF-8").empty()))
- {
- test_utf = false;
- }
-
- std::cout << "Testing wide I/O" << std::endl;
- test_wide_io();
- std::cout << "Testing charset to/from UTF conversion functions" << std::endl;
- std::cout << " char" << std::endl;
- test_to<char>();
- std::cout << " wchar_t" << std::endl;
- test_to<wchar_t>();
- #ifdef BOOST_LOCALE_ENABLE_CHAR16_T
- if(bname == "icu" || bname == "std") {
- std::cout << " char16_t" << std::endl;
- test_to<char16_t>();
- }
- #endif
- #ifdef BOOST_LOCALE_ENABLE_CHAR32_T
- if(bname == "icu" || bname == "std") {
- std::cout << " char32_t" << std::endl;
- test_to<char32_t>();
- }
- #endif
- test_all_combinations();
- }
- }
- catch(std::exception const &e) {
- std::cerr << "Failed " << e.what() << std::endl;
- return EXIT_FAILURE;
- }
- FINALIZE();
- }
- // vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
- // boostinspect:noascii
|