diff --git a/utf8/utf8.cpp b/utf8/utf8.cpp index 6f9ef38..cfb20e5 100644 --- a/utf8/utf8.cpp +++ b/utf8/utf8.cpp @@ -277,60 +277,6 @@ static void IntToWide(int c, std::wstring & res) } -static void IntToWide(int c, WTextStream & res) -{ - if( sizeof(wchar_t)==2 && c>0xffff ) - { - // UTF16 surrogate pairs - c -= 0x10000; - res << static_cast(((c >> 10) & 0x3FF) + 0xD800); - res << static_cast((c & 0x3FF) + 0xDC00); - } - else - { - res << static_cast(c); - } -} - - -static bool UTF8ToWideGeneric(const char * utf8, size_t utf8_len, int mode, auto convert_function) -{ -int z; -size_t len; -bool correct, was_error = false; - - while( utf8_len > 0 ) - { - if( (unsigned char)*utf8 <= 0x7f ) - { - // small optimization - len = 1; - correct = true; - z = static_cast(*utf8); - } - else - { - len = UTF8ToInt(utf8, utf8_len, z, correct); // the len will be different from zero - } - - if( !correct ) - { - if( mode == 1 ) - convert_function(0xFFFD); // U+FFFD "replacement character" - - was_error = true; - } - else - { - convert_function(z); - } - - utf8 += len; - utf8_len -= len; - } - -return !was_error; -} /*! @@ -353,7 +299,7 @@ bool UTF8ToWide(const char * utf8, size_t utf8_len, std::wstring & res, bool cle if( clear ) res.clear(); - bool status = UTF8ToWideGeneric(utf8, utf8_len, mode, [&res](int c) { + bool status = private_namespace::UTF8ToWideGeneric(utf8, utf8_len, mode, [&res](int c) { IntToWide(c, res); }); @@ -452,66 +398,6 @@ return !was_error; } -// need to be tested -bool UTF8ToWide(const char * utf8, size_t utf8_len, WTextStream & res, bool clear, int mode) -{ - if( clear ) - res.clear(); - - bool status = UTF8ToWideGeneric(utf8, utf8_len, mode, [&res](int c) { - IntToWide(c, res); - }); - - return status; -} - - -// need to be tested -bool UTF8ToWide(const char * utf8, WTextStream & res, bool clear, int mode) -{ -size_t utf8_len = 0; - - while( utf8[utf8_len] != 0 ) - utf8_len += 1; - -return UTF8ToWide(utf8, utf8_len, res, clear, mode); -} - - -// need to be tested -bool UTF8ToWide(const std::string & utf8, WTextStream & res, bool clear, int mode) -{ - return UTF8ToWide(utf8.c_str(), utf8.size(), res, clear, mode); -} - - -// need to be tested -bool UTF8ToWide(std::istream & utf8, WTextStream & res, bool clear, int mode) -{ -int z; -bool correct, was_error = false; - - if( clear ) - res.clear(); - - while( UTF8ToInt(utf8, z, correct) > 0 ) - { - if( !correct ) - { - if( mode == 1 ) - res << 0xFFFD; // U+FFFD "replacement character" - - was_error = true; - } - else - { - IntToWide(z, res); - } - } - -return !was_error; -} - /*! @@ -1262,57 +1148,6 @@ return res; -// not tested -// FIX ME it is not using surrogate pairs from input stream -// and mode parameter -static void WideToUTF8Generic(PT::WTextStream & buffer, int mode, auto write_function) -{ - char utf8_buffer[256]; - std::size_t buffer_len = sizeof(utf8_buffer) / sizeof(char); - std::size_t utf8_sequence_max_length = 10; - std::size_t index = 0; - - PT::WTextStream::const_iterator i = buffer.begin(); - - while( i != buffer.end() ) - { - if( index + utf8_sequence_max_length > buffer_len ) - { - write_function(utf8_buffer, index); - index = 0; - } - - index += PT::IntToUTF8(*i, utf8_buffer + index, buffer_len - index); - ++i; - } - - if( index > 0 ) - { - write_function(utf8_buffer, index); - } -} - - -// not tested -void WideToUTF8(PT::WTextStream & buffer, std::string & utf8, bool clear, int mode) -{ - if( clear ) - utf8.clear(); - - WideToUTF8Generic(buffer, mode, [&utf8](const char * utf8_buffer, std::size_t buffer_len){ - utf8.append(utf8_buffer, buffer_len); - }); -} - - -// not tested -void WideToUTF8(PT::WTextStream & buffer, std::ostream & utf8, int mode) -{ - WideToUTF8Generic(buffer, mode, [&utf8](const char * utf8_buffer, std::size_t buffer_len){ - utf8.write(utf8_buffer, buffer_len); - }); -} - diff --git a/utf8/utf8.h b/utf8/utf8.h index f87e15a..c33a68d 100644 --- a/utf8/utf8.h +++ b/utf8/utf8.h @@ -90,11 +90,13 @@ bool UTF8ToWide(std::istream & utf8, std::wstring & res, bool cle converting UTF-8 string to a WTextStream stream (need to be tested) */ +/* + implemented as templates below bool UTF8ToWide(const char * utf8, size_t utf8_len, WTextStream & res, bool clear = true, int mode = 1); bool UTF8ToWide(const char * utf8, WTextStream & res, bool clear = true, int mode = 1); bool UTF8ToWide(const std::string & utf8, WTextStream & res, bool clear = true, int mode = 1); bool UTF8ToWide(std::istream & utf8, WTextStream & res, bool clear = true, int mode = 1); - +*/ /*! converting one int character to UTF-8 @@ -110,12 +112,16 @@ size_t IntToUTF8(int z, std::ostream & utf8); bool WideToUTF8(const wchar_t * wide_string, size_t string_len, std::string & utf8, bool clear = true, int mode = 1); bool WideToUTF8(const wchar_t * wide_string, std::string & utf8, bool clear = true, int mode = 1); bool WideToUTF8(const std::wstring & wide_string, std::string & utf8, bool clear = true, int mode = 1); -void WideToUTF8(PT::WTextStream & buffer, std::string & utf8, bool clear = true, int mode = 1);// not tested + +// implemented as a template below +//void WideToUTF8(PT::WTextStream & buffer, std::string & utf8, bool clear = true, int mode = 1);// not tested bool WideToUTF8(const wchar_t * wide_string, size_t string_len, std::ostream & utf8, int mode = 1); bool WideToUTF8(const wchar_t * wide_string, std::ostream & utf8, int mode = 1); bool WideToUTF8(const std::wstring & wide_string, std::ostream & utf8, int mode = 1); -void WideToUTF8(PT::WTextStream & buffer, std::ostream & utf8, int mode = 1);// not tested + +// implemented as a template below +//void WideToUTF8(PT::WTextStream & buffer, std::ostream & utf8, int mode = 1);// not tested bool WideToUTF8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len, size_t & utf8_written, int mode = 1); bool WideToUTF8(const wchar_t * wide_string, char * utf8, size_t utf8_len, size_t & utf8_written, int mode = 1); @@ -129,6 +135,198 @@ bool WideToUTF8(const std::wstring & wide_string, char * utf8, siz +namespace private_namespace +{ +template +bool UTF8ToWideGeneric(const char * utf8, size_t utf8_len, int mode, function_type convert_function) +{ +int z; +size_t len; +bool correct, was_error = false; + + while( utf8_len > 0 ) + { + if( (unsigned char)*utf8 <= 0x7f ) + { + // small optimization + len = 1; + correct = true; + z = static_cast(*utf8); + } + else + { + len = UTF8ToInt(utf8, utf8_len, z, correct); // the len will be different from zero + } + + if( !correct ) + { + if( mode == 1 ) + convert_function(0xFFFD); // U+FFFD "replacement character" + + was_error = true; + } + else + { + convert_function(z); + } + + utf8 += len; + utf8_len -= len; + } + +return !was_error; +} + + + +template +void IntToWide(int c, TextStreamBase & res) +{ + if( sizeof(wchar_t)==2 && c>0xffff ) + { + // UTF16 surrogate pairs + c -= 0x10000; + res << static_cast(((c >> 10) & 0x3FF) + 0xD800); + res << static_cast((c & 0x3FF) + 0xDC00); + } + else + { + res << static_cast(c); + } +} + + +// not tested +// FIX ME it is not using surrogate pairs from input stream +// and mode parameter +template +void WideToUTF8Generic(TextStreamBase & buffer, int mode, function_type write_function) +{ + char utf8_buffer[256]; + std::size_t buffer_len = sizeof(utf8_buffer) / sizeof(char); + std::size_t utf8_sequence_max_length = 10; + std::size_t index = 0; + + typename TextStreamBase::const_iterator i = buffer.begin(); + + while( i != buffer.end() ) + { + if( index + utf8_sequence_max_length > buffer_len ) + { + write_function(utf8_buffer, index); + index = 0; + } + + index += PT::IntToUTF8(*i, utf8_buffer + index, buffer_len - index); + ++i; + } + + if( index > 0 ) + { + write_function(utf8_buffer, index); + } +} + + +} // namespace + + + + +// need to be tested +template +bool UTF8ToWide(const char * utf8, size_t utf8_len, TextStreamBase & res, bool clear = true, int mode = 1) +{ + if( clear ) + res.clear(); + + bool status = private_namespace::UTF8ToWideGeneric(utf8, utf8_len, mode, [&res](int c) { + private_namespace::IntToWide(c, res); + }); + + return status; +} + + +// need to be tested +template +bool UTF8ToWide(const char * utf8, TextStreamBase & res, bool clear = true, int mode = 1) +{ +size_t utf8_len = 0; + + while( utf8[utf8_len] != 0 ) + utf8_len += 1; + +return UTF8ToWide(utf8, utf8_len, res, clear, mode); +} + + +// need to be tested +template +bool UTF8ToWide(const std::string & utf8, TextStreamBase & res, bool clear = true, int mode = 1) +{ + return UTF8ToWide(utf8.c_str(), utf8.size(), res, clear, mode); +} + + +// need to be tested +template +bool UTF8ToWide(std::istream & utf8, TextStreamBase & res, bool clear = true, int mode = 1) +{ +int z; +bool correct, was_error = false; + + if( clear ) + res.clear(); + + while( UTF8ToInt(utf8, z, correct) > 0 ) + { + if( !correct ) + { + if( mode == 1 ) + res << 0xFFFD; // U+FFFD "replacement character" + + was_error = true; + } + else + { + private_namespace::IntToWide(z, res); + } + } + +return !was_error; +} + + + + +// not tested +template +void WideToUTF8(TextStreamBase & buffer, std::string & utf8, bool clear = true, int mode = 1) +{ + if( clear ) + utf8.clear(); + + private_namespace::WideToUTF8Generic(buffer, mode, [&utf8](const char * utf8_buffer, std::size_t buffer_len){ + utf8.append(utf8_buffer, buffer_len); + }); +} + + +// not tested +template +void WideToUTF8(TextStreamBase & buffer, std::ostream & utf8, int mode = 1) +{ + private_namespace::WideToUTF8Generic(buffer, mode, [&utf8](const char * utf8_buffer, std::size_t buffer_len){ + utf8.write(utf8_buffer, buffer_len); + }); +} + + + + + + } // namespace