|
|
|
@ -50,7 +50,7 @@ namespace pt
|
|
|
|
|
/*!
|
|
|
|
|
returns true if 'c' is a correct unicode character
|
|
|
|
|
*/
|
|
|
|
|
bool UTF8_CheckRange(int c)
|
|
|
|
|
bool utf8_check_range(int c)
|
|
|
|
|
{
|
|
|
|
|
return c>=0 && c<=0x10FFFF && !(c>=0xD800 && c<=0xDFFF);
|
|
|
|
|
}
|
|
|
|
@ -62,7 +62,7 @@ bool UTF8_CheckRange(int c)
|
|
|
|
|
this method is used when reading from an utf8 string
|
|
|
|
|
how_many_bytes - means how many bytes from the utf8 string were read
|
|
|
|
|
*/
|
|
|
|
|
bool UTF8_CheckRange(int c, int how_many_bytes)
|
|
|
|
|
bool utf8_check_range(int c, int how_many_bytes)
|
|
|
|
|
{
|
|
|
|
|
if( c >= 0x0000 && c <= 0x007f && how_many_bytes == 1 )
|
|
|
|
|
{
|
|
|
|
@ -111,7 +111,7 @@ return false;
|
|
|
|
|
(returns zero only if utf8_len is zero)
|
|
|
|
|
even if there are errors the functions returns a different from zero value
|
|
|
|
|
*/
|
|
|
|
|
size_t UTF8ToInt(const char * utf8, size_t utf8_len, int & res, bool & correct)
|
|
|
|
|
size_t utf8_to_int(const char * utf8, size_t utf8_len, int & res, bool & correct)
|
|
|
|
|
{
|
|
|
|
|
size_t i, len;
|
|
|
|
|
|
|
|
|
@ -121,7 +121,7 @@ size_t i, len;
|
|
|
|
|
if( utf8_len == 0 )
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
|
|
if( !private_namespace::UTF8ToInt_FirstOctet(utf8[0], len, res) )
|
|
|
|
|
if( !private_namespace::utf8_to_int_first_octet(utf8[0], len, res) )
|
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
|
|
if( utf8_len < len )
|
|
|
|
@ -129,11 +129,11 @@ size_t i, len;
|
|
|
|
|
|
|
|
|
|
for(i=1 ; i<len ; ++i)
|
|
|
|
|
{
|
|
|
|
|
if( !private_namespace::UTF8ToInt_AddNextOctet(utf8[i], res) )
|
|
|
|
|
if( !private_namespace::utf8_to_int_add_next_octet(utf8[i], res) )
|
|
|
|
|
return i;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if( UTF8_CheckRange(res, len) )
|
|
|
|
|
if( utf8_check_range(res, len) )
|
|
|
|
|
correct = true;
|
|
|
|
|
|
|
|
|
|
return len;
|
|
|
|
@ -155,7 +155,7 @@ return len;
|
|
|
|
|
(returns zero only if the string has '\0' at the first character)
|
|
|
|
|
even if there are errors the functions returns a different from zero value
|
|
|
|
|
*/
|
|
|
|
|
size_t UTF8ToInt(const char * utf8, int & res, bool & correct)
|
|
|
|
|
size_t utf8_to_int(const char * utf8, int & res, bool & correct)
|
|
|
|
|
{
|
|
|
|
|
size_t i, len;
|
|
|
|
|
|
|
|
|
@ -165,7 +165,7 @@ size_t i, len;
|
|
|
|
|
if( *utf8 == 0 )
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
|
|
if( !private_namespace::UTF8ToInt_FirstOctet(utf8[0], len, res) )
|
|
|
|
|
if( !private_namespace::utf8_to_int_first_octet(utf8[0], len, res) )
|
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
|
|
for(i=1 ; i<len ; ++i)
|
|
|
|
@ -173,11 +173,11 @@ size_t i, len;
|
|
|
|
|
if( utf8[i] == 0 )
|
|
|
|
|
return i;
|
|
|
|
|
|
|
|
|
|
if( !private_namespace::UTF8ToInt_AddNextOctet(utf8[i], res) )
|
|
|
|
|
if( !private_namespace::utf8_to_int_add_next_octet(utf8[i], res) )
|
|
|
|
|
return i;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if( UTF8_CheckRange(res, len) )
|
|
|
|
|
if( utf8_check_range(res, len) )
|
|
|
|
|
correct = true;
|
|
|
|
|
|
|
|
|
|
return len;
|
|
|
|
@ -200,9 +200,9 @@ return len;
|
|
|
|
|
(returns zero only if utf8 is empty)
|
|
|
|
|
even if there are errors the functions returns a different from zero value
|
|
|
|
|
*/
|
|
|
|
|
size_t UTF8ToInt(const std::string & utf8, int & res, bool & correct)
|
|
|
|
|
size_t utf8_to_int(const std::string & utf8, int & res, bool & correct)
|
|
|
|
|
{
|
|
|
|
|
return UTF8ToInt(utf8.c_str(), utf8.size(), res, correct);
|
|
|
|
|
return utf8_to_int(utf8.c_str(), utf8.size(), res, correct);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@ -219,7 +219,7 @@ size_t UTF8ToInt(const std::string & utf8, int & res, bool & correct)
|
|
|
|
|
|
|
|
|
|
the function returns how many characters have been used from the input stream
|
|
|
|
|
*/
|
|
|
|
|
size_t UTF8ToInt(std::istream & utf8, int & res, bool & correct)
|
|
|
|
|
size_t utf8_to_int(std::istream & utf8, int & res, bool & correct)
|
|
|
|
|
{
|
|
|
|
|
size_t i, len;
|
|
|
|
|
unsigned char uz;
|
|
|
|
@ -232,7 +232,7 @@ unsigned char uz;
|
|
|
|
|
if( !utf8 )
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
|
|
if( !private_namespace::UTF8ToInt_FirstOctet(uz, len, res) )
|
|
|
|
|
if( !private_namespace::utf8_to_int_first_octet(uz, len, res) )
|
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
|
|
for(i=1 ; i<len ; ++i)
|
|
|
|
@ -242,11 +242,11 @@ unsigned char uz;
|
|
|
|
|
if( !utf8 )
|
|
|
|
|
return i;
|
|
|
|
|
|
|
|
|
|
if( !private_namespace::UTF8ToInt_AddNextOctet(uz, res) )
|
|
|
|
|
if( !private_namespace::utf8_to_int_add_next_octet(uz, res) )
|
|
|
|
|
return i;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if( UTF8_CheckRange(res, len) )
|
|
|
|
|
if( utf8_check_range(res, len) )
|
|
|
|
|
correct = true;
|
|
|
|
|
|
|
|
|
|
return len;
|
|
|
|
@ -258,7 +258,7 @@ return len;
|
|
|
|
|
/*
|
|
|
|
|
|
|
|
|
|
*/
|
|
|
|
|
static void IntToWide(int c, std::wstring & res)
|
|
|
|
|
static void int_to_wide(int c, std::wstring & res)
|
|
|
|
|
{
|
|
|
|
|
if( sizeof(wchar_t)==2 && c>0xffff )
|
|
|
|
|
{
|
|
|
|
@ -291,13 +291,13 @@ static void IntToWide(int c, std::wstring & res)
|
|
|
|
|
|
|
|
|
|
the function returns false if there were some errors when converting
|
|
|
|
|
*/
|
|
|
|
|
bool UTF8ToWide(const char * utf8, size_t utf8_len, std::wstring & res, bool clear, int mode)
|
|
|
|
|
bool utf8_to_wide(const char * utf8, size_t utf8_len, std::wstring & res, bool clear, int mode)
|
|
|
|
|
{
|
|
|
|
|
if( clear )
|
|
|
|
|
res.clear();
|
|
|
|
|
|
|
|
|
|
bool status = private_namespace::UTF8ToWideGeneric(utf8, utf8_len, mode, [&res](int c) {
|
|
|
|
|
IntToWide(c, res);
|
|
|
|
|
bool status = private_namespace::utf8_to_wide_generic(utf8, utf8_len, mode, [&res](int c) {
|
|
|
|
|
int_to_wide(c, res);
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
return status;
|
|
|
|
@ -321,14 +321,14 @@ bool UTF8ToWide(const char * utf8, size_t utf8_len, std::wstring & res, bool cle
|
|
|
|
|
|
|
|
|
|
the function returns false if there were some errors when converting
|
|
|
|
|
*/
|
|
|
|
|
bool UTF8ToWide(const char * utf8, std::wstring & res, bool clear, int mode)
|
|
|
|
|
bool utf8_to_wide(const char * utf8, std::wstring & res, bool clear, int mode)
|
|
|
|
|
{
|
|
|
|
|
size_t utf8_len = 0;
|
|
|
|
|
|
|
|
|
|
while( utf8[utf8_len] != 0 )
|
|
|
|
|
utf8_len += 1;
|
|
|
|
|
|
|
|
|
|
return UTF8ToWide(utf8, utf8_len, res, clear, mode);
|
|
|
|
|
return utf8_to_wide(utf8, utf8_len, res, clear, mode);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@ -347,9 +347,9 @@ return UTF8ToWide(utf8, utf8_len, res, clear, mode);
|
|
|
|
|
|
|
|
|
|
the function returns false if there were some errors when converting
|
|
|
|
|
*/
|
|
|
|
|
bool UTF8ToWide(const std::string & utf8, std::wstring & res, bool clear, int mode)
|
|
|
|
|
bool utf8_to_wide(const std::string & utf8, std::wstring & res, bool clear, int mode)
|
|
|
|
|
{
|
|
|
|
|
return UTF8ToWide(utf8.c_str(), utf8.size(), res, clear, mode);
|
|
|
|
|
return utf8_to_wide(utf8.c_str(), utf8.size(), res, clear, mode);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@ -368,7 +368,7 @@ bool UTF8ToWide(const std::string & utf8, std::wstring & res, bool clear, int mo
|
|
|
|
|
|
|
|
|
|
the function returns false if there were some errors when converting
|
|
|
|
|
*/
|
|
|
|
|
bool UTF8ToWide(std::istream & utf8, std::wstring & res, bool clear, int mode)
|
|
|
|
|
bool utf8_to_wide(std::istream & utf8, std::wstring & res, bool clear, int mode)
|
|
|
|
|
{
|
|
|
|
|
int z;
|
|
|
|
|
bool correct, was_error = false;
|
|
|
|
@ -376,7 +376,7 @@ bool correct, was_error = false;
|
|
|
|
|
if( clear )
|
|
|
|
|
res.clear();
|
|
|
|
|
|
|
|
|
|
while( UTF8ToInt(utf8, z, correct) > 0 )
|
|
|
|
|
while( utf8_to_int(utf8, z, correct) > 0 )
|
|
|
|
|
{
|
|
|
|
|
if( !correct )
|
|
|
|
|
{
|
|
|
|
@ -387,7 +387,7 @@ bool correct, was_error = false;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
IntToWide(z, res);
|
|
|
|
|
int_to_wide(z, res);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
@ -410,13 +410,13 @@ return !was_error;
|
|
|
|
|
the function returns how many characters have been written to the utf8,
|
|
|
|
|
zero means the utf8 buffer is too small or 'z' is an incorrect unicode character
|
|
|
|
|
*/
|
|
|
|
|
size_t IntToUTF8(int z, char * utf8, size_t utf8_max_len)
|
|
|
|
|
size_t int_to_utf8(int z, char * utf8, size_t utf8_max_len)
|
|
|
|
|
{
|
|
|
|
|
char buf[10];
|
|
|
|
|
int i = 0;
|
|
|
|
|
int mask = 0x3f; // 6 first bits set
|
|
|
|
|
|
|
|
|
|
if( utf8_max_len==0 || !UTF8_CheckRange(z) )
|
|
|
|
|
if( utf8_max_len==0 || !utf8_check_range(z) )
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
|
|
if( z <= 0x7f )
|
|
|
|
@ -464,14 +464,14 @@ return a;
|
|
|
|
|
the function returns how many characters have been written to the utf8 string,
|
|
|
|
|
zero means that 'z' is an incorrect unicode character
|
|
|
|
|
*/
|
|
|
|
|
size_t IntToUTF8(int z, std::string & utf8, bool clear)
|
|
|
|
|
size_t int_to_utf8(int z, std::string & utf8, bool clear)
|
|
|
|
|
{
|
|
|
|
|
char buf[10];
|
|
|
|
|
|
|
|
|
|
if( clear )
|
|
|
|
|
utf8.clear();
|
|
|
|
|
|
|
|
|
|
size_t len = IntToUTF8(z, buf, sizeof(buf)/sizeof(char));
|
|
|
|
|
size_t len = int_to_utf8(z, buf, sizeof(buf)/sizeof(char));
|
|
|
|
|
size_t i;
|
|
|
|
|
|
|
|
|
|
for(i=0 ; i<len ; ++i)
|
|
|
|
@ -497,7 +497,7 @@ return len;
|
|
|
|
|
|
|
|
|
|
this function returns false if there were some errors when converting
|
|
|
|
|
*/
|
|
|
|
|
bool WideToUTF8(const wchar_t * wide_string, size_t string_len, std::string & utf8, bool clear, int mode)
|
|
|
|
|
bool wide_to_utf8(const wchar_t * wide_string, size_t string_len, std::string & utf8, bool clear, int mode)
|
|
|
|
|
{
|
|
|
|
|
bool was_error = false;
|
|
|
|
|
size_t chars;
|
|
|
|
@ -507,7 +507,7 @@ size_t chars;
|
|
|
|
|
|
|
|
|
|
while( string_len > 0 )
|
|
|
|
|
{
|
|
|
|
|
chars = private_namespace::WideOneToUTF8(wide_string, string_len, utf8, was_error, mode);
|
|
|
|
|
chars = private_namespace::wide_one_to_utf8(wide_string, string_len, utf8, was_error, mode);
|
|
|
|
|
wide_string += chars;
|
|
|
|
|
string_len -= chars;
|
|
|
|
|
}
|
|
|
|
@ -531,7 +531,7 @@ return !was_error;
|
|
|
|
|
|
|
|
|
|
this function returns false if there were some errors when converting
|
|
|
|
|
*/
|
|
|
|
|
bool WideToUTF8(const wchar_t * wide_string, std::string & utf8, bool clear, int mode)
|
|
|
|
|
bool wide_to_utf8(const wchar_t * wide_string, std::string & utf8, bool clear, int mode)
|
|
|
|
|
{
|
|
|
|
|
bool was_error = false;
|
|
|
|
|
|
|
|
|
@ -539,7 +539,7 @@ bool was_error = false;
|
|
|
|
|
utf8.clear();
|
|
|
|
|
|
|
|
|
|
while( *wide_string )
|
|
|
|
|
wide_string += private_namespace::WideOneToUTF8(wide_string, utf8, was_error, mode);
|
|
|
|
|
wide_string += private_namespace::wide_one_to_utf8(wide_string, utf8, was_error, mode);
|
|
|
|
|
|
|
|
|
|
return !was_error;
|
|
|
|
|
}
|
|
|
|
@ -560,9 +560,9 @@ return !was_error;
|
|
|
|
|
|
|
|
|
|
this function returns false if there were some errors when converting
|
|
|
|
|
*/
|
|
|
|
|
bool WideToUTF8(const std::wstring & wide_string, std::string & utf8, bool clear, int mode)
|
|
|
|
|
bool wide_to_utf8(const std::wstring & wide_string, std::string & utf8, bool clear, int mode)
|
|
|
|
|
{
|
|
|
|
|
return WideToUTF8(wide_string.c_str(), wide_string.size(), utf8, clear, mode);
|
|
|
|
|
return wide_to_utf8(wide_string.c_str(), wide_string.size(), utf8, clear, mode);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@ -590,7 +590,7 @@ bool WideToUTF8(const std::wstring & wide_string, std::string & utf8, bool clear
|
|
|
|
|
if there is an error when converting (there is an incorrect character in the wide string) the function
|
|
|
|
|
will continue converting but if the buffer is too small the function breaks immediately
|
|
|
|
|
*/
|
|
|
|
|
bool WideToUTF8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len, size_t & utf8_written, int mode)
|
|
|
|
|
bool wide_to_utf8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len, size_t & utf8_written, int mode)
|
|
|
|
|
{
|
|
|
|
|
bool was_error = false;
|
|
|
|
|
bool was_buffer_to_small;
|
|
|
|
@ -600,7 +600,7 @@ size_t chars, utf8_saved;
|
|
|
|
|
|
|
|
|
|
while( string_len > 0 )
|
|
|
|
|
{
|
|
|
|
|
chars = private_namespace::WideOneToUTF8(wide_string, string_len, utf8, utf8_len, utf8_saved, was_buffer_to_small, was_error, mode);
|
|
|
|
|
chars = private_namespace::wide_one_to_utf8(wide_string, string_len, utf8, utf8_len, utf8_saved, was_buffer_to_small, was_error, mode);
|
|
|
|
|
|
|
|
|
|
if( was_buffer_to_small )
|
|
|
|
|
{
|
|
|
|
@ -644,9 +644,9 @@ return !was_error;
|
|
|
|
|
if there is an error when converting (there is an incorrect character in the wide string) the function
|
|
|
|
|
will continue converting but if the buffer is too small the function breaks immediately
|
|
|
|
|
*/
|
|
|
|
|
bool WideToUTF8(const std::wstring & wide_string, char * utf8, size_t utf8_len, size_t & utf8_written, int mode)
|
|
|
|
|
bool wide_to_utf8(const std::wstring & wide_string, char * utf8, size_t utf8_len, size_t & utf8_written, int mode)
|
|
|
|
|
{
|
|
|
|
|
return WideToUTF8(wide_string.c_str(), wide_string.size(), utf8, utf8_len, utf8_written, mode);
|
|
|
|
|
return wide_to_utf8(wide_string.c_str(), wide_string.size(), utf8, utf8_len, utf8_written, mode);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@ -672,7 +672,7 @@ bool WideToUTF8(const std::wstring & wide_string, char * utf8, size_t utf8_len,
|
|
|
|
|
will continue converting but if the buffer is too small the function breaks immediately
|
|
|
|
|
(in both cases the utf8 buffer is null terminated)
|
|
|
|
|
*/
|
|
|
|
|
bool WideToUTF8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len, int mode)
|
|
|
|
|
bool wide_to_utf8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len, int mode)
|
|
|
|
|
{
|
|
|
|
|
size_t utf8_saved;
|
|
|
|
|
bool res;
|
|
|
|
@ -680,7 +680,7 @@ bool res;
|
|
|
|
|
if( utf8_len == 0 )
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
res = WideToUTF8(wide_string, string_len, utf8, utf8_len - 1, utf8_saved, mode);
|
|
|
|
|
res = wide_to_utf8(wide_string, string_len, utf8, utf8_len - 1, utf8_saved, mode);
|
|
|
|
|
utf8[utf8_saved] = 0;
|
|
|
|
|
|
|
|
|
|
return res;
|
|
|
|
@ -708,9 +708,9 @@ return res;
|
|
|
|
|
will continue converting but if the buffer is too small the function breaks immediately
|
|
|
|
|
(in both cases the utf8 buffer is null terminated)
|
|
|
|
|
*/
|
|
|
|
|
bool WideToUTF8(const std::wstring & wide_string, char * utf8, size_t utf8_len, int mode)
|
|
|
|
|
bool wide_to_utf8(const std::wstring & wide_string, char * utf8, size_t utf8_len, int mode)
|
|
|
|
|
{
|
|
|
|
|
return WideToUTF8(wide_string.c_str(), wide_string.size(), utf8, utf8_len, mode);
|
|
|
|
|
return wide_to_utf8(wide_string.c_str(), wide_string.size(), utf8, utf8_len, mode);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@ -735,7 +735,7 @@ bool WideToUTF8(const std::wstring & wide_string, char * utf8, size_t utf8_len,
|
|
|
|
|
if there is an error when converting (there is an incorrect character in the wide string) the function
|
|
|
|
|
will continue converting but if the buffer is too small the function breaks immediately
|
|
|
|
|
*/
|
|
|
|
|
bool WideToUTF8(const wchar_t * wide_string, char * utf8, size_t utf8_len, size_t & utf8_written, int mode)
|
|
|
|
|
bool wide_to_utf8(const wchar_t * wide_string, char * utf8, size_t utf8_len, size_t & utf8_written, int mode)
|
|
|
|
|
{
|
|
|
|
|
bool was_error = false;
|
|
|
|
|
bool was_buffer_to_small;
|
|
|
|
@ -747,7 +747,7 @@ size_t len;
|
|
|
|
|
while( *wide_string )
|
|
|
|
|
{
|
|
|
|
|
len = (*(wide_string+1) == 0) ? 1 : 2;
|
|
|
|
|
chars = private_namespace::WideOneToUTF8(wide_string, len, utf8, utf8_len, utf8_saved, was_buffer_to_small, was_error, mode);
|
|
|
|
|
chars = private_namespace::wide_one_to_utf8(wide_string, len, utf8, utf8_len, utf8_saved, was_buffer_to_small, was_error, mode);
|
|
|
|
|
|
|
|
|
|
if( was_buffer_to_small )
|
|
|
|
|
{
|
|
|
|
@ -790,7 +790,7 @@ return !was_error;
|
|
|
|
|
will continue converting but if the buffer is too small the function breaks immediately
|
|
|
|
|
(in both cases the utf8 buffer is null terminated)
|
|
|
|
|
*/
|
|
|
|
|
bool WideToUTF8(const wchar_t * wide_string, char * utf8, size_t utf8_len, int mode)
|
|
|
|
|
bool wide_to_utf8(const wchar_t * wide_string, char * utf8, size_t utf8_len, int mode)
|
|
|
|
|
{
|
|
|
|
|
size_t utf8_saved;
|
|
|
|
|
bool res;
|
|
|
|
@ -798,7 +798,7 @@ bool res;
|
|
|
|
|
if( utf8_len == 0 )
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
res = WideToUTF8(wide_string, utf8, utf8_len - 1, utf8_saved, mode);
|
|
|
|
|
res = wide_to_utf8(wide_string, utf8, utf8_len - 1, utf8_saved, mode);
|
|
|
|
|
utf8[utf8_saved] = 0;
|
|
|
|
|
|
|
|
|
|
return res;
|
|
|
|
|