update utf8 functions comments
while here: - rename pt::utf8_check_range(...) -> pt::is_correct_unicode_char(...)
This commit is contained in:
@@ -41,21 +41,21 @@ namespace pt
|
||||
{
|
||||
|
||||
/*
|
||||
* returns true if 'c' is a correct unicode character
|
||||
* return true if 'c' is a correct unicode character
|
||||
*/
|
||||
bool utf8_check_range(int c)
|
||||
bool is_correct_unicode_char(int c)
|
||||
{
|
||||
return c>=0 && c<=0x10FFFF && !(c>=0xD800 && c<=0xDFFF);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* returns true if 'c' is a correct unicode character
|
||||
* return true if 'c' is a correct unicode character
|
||||
*
|
||||
* this method is used when reading from an utf8 string
|
||||
* how_many_bytes - means how many bytes from the utf8 string were read
|
||||
*/
|
||||
bool utf8_check_range(int c, int how_many_bytes)
|
||||
bool is_correct_unicode_char(int c, int how_many_bytes)
|
||||
{
|
||||
if( c >= 0x0000 && c <= 0x007f && how_many_bytes == 1 )
|
||||
{
|
||||
@@ -124,10 +124,9 @@ bool surrogate_pair_to_int(int c1, int c2, int & z)
|
||||
|
||||
|
||||
/*
|
||||
* an auxiliary function for converting from wide characters to UTF-8
|
||||
* converting a wide character into one int
|
||||
* convert one wide (or two wide) characters to an int
|
||||
*
|
||||
* returns how many wide characters were used
|
||||
* return how many wide characters were used
|
||||
* if string_len is greater than 0 then the return value is always greater than zero too
|
||||
*/
|
||||
size_t wide_to_int(const wchar_t * wide_string, size_t string_len, int & z, bool & correct)
|
||||
@@ -167,7 +166,7 @@ size_t wide_to_int(const wchar_t * wide_string, size_t string_len, int & z, bool
|
||||
}
|
||||
else
|
||||
{
|
||||
correct = utf8_check_range(z);
|
||||
correct = is_correct_unicode_char(z);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
@@ -178,7 +177,7 @@ size_t wide_to_int(const wchar_t * wide_string, size_t string_len, int & z, bool
|
||||
* an auxiliary function for converting from wide characters to UTF-8
|
||||
* converting a wide character into one int
|
||||
|
||||
* returns how many wide characters were used
|
||||
* return how many wide characters were used
|
||||
* if wide_string has at least one character then the return value is always greater than zero too
|
||||
*/
|
||||
size_t wide_to_int(const wchar_t * wide_string, int & z, bool & correct)
|
||||
@@ -201,7 +200,7 @@ return wide_to_int(wide_string, min_str_len, z, correct);
|
||||
|
||||
|
||||
/*
|
||||
* converts an int to a wide string
|
||||
* convert an int to a wide string
|
||||
*
|
||||
* this method will not terminate the output string with a null character
|
||||
* return how many characters have been written (0, 1 or 2)
|
||||
@@ -233,9 +232,9 @@ size_t int_to_wide(int c, wchar_t * res, size_t max_buf_len)
|
||||
|
||||
|
||||
/*
|
||||
* converts an int to a wide string
|
||||
* convert an int to a wide string
|
||||
*
|
||||
* returns true if a character was inserted to the string
|
||||
* return true if a character was inserted to the string
|
||||
*/
|
||||
bool int_to_wide(int c, std::wstring & res)
|
||||
{
|
||||
@@ -258,6 +257,9 @@ bool int_to_wide(int c, std::wstring & res)
|
||||
|
||||
|
||||
/*
|
||||
* convert one character into a stream
|
||||
* stream can be an utf8 or a wide stream
|
||||
*
|
||||
* return true if c was a correct unicode character
|
||||
* and has been put the the stream
|
||||
*/
|
||||
@@ -280,17 +282,17 @@ bool int_to_stream(int c, pt::Stream & stream)
|
||||
|
||||
|
||||
/*
|
||||
* this function converts one UTF-8 character into one wide-character
|
||||
* convert one UTF-8 character into one wide-character
|
||||
*
|
||||
* input:
|
||||
* utf8 - an input UTF-8 string
|
||||
* utf8_len - size of the input string,
|
||||
* the string should be at least 4 bytes length for correctly
|
||||
* recognized the utf-8 sequence
|
||||
* utf8 - an input UTF-8 string
|
||||
* utf8_len - size of the input string,
|
||||
* the string should be at least 4 bytes length for correctly
|
||||
* recognized the utf-8 sequence
|
||||
*
|
||||
* output:
|
||||
* res - an output character
|
||||
* correct - true if it is a correct character
|
||||
* res - an output character
|
||||
* correct - true if it is a correct character
|
||||
*
|
||||
* the function returns how many characters have been used from the input string
|
||||
* (returns zero only if utf8_len is zero)
|
||||
@@ -318,7 +320,7 @@ size_t i, len;
|
||||
return i;
|
||||
}
|
||||
|
||||
if( utf8_check_range(res, len) )
|
||||
if( is_correct_unicode_char(res, len) )
|
||||
correct = true;
|
||||
|
||||
return len;
|
||||
@@ -326,20 +328,20 @@ return len;
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
this function converts one UTF-8 character into one wide-character
|
||||
|
||||
input:
|
||||
utf8 - an input UTF-8 string (null terminated)
|
||||
|
||||
output:
|
||||
res - an output character
|
||||
correct - true if it is a correct character
|
||||
|
||||
the function returns how many characters have been used from the input string
|
||||
(returns zero only if the string has '\0' at the first character)
|
||||
even if there are errors the functions returns a different from zero value
|
||||
*/
|
||||
/*
|
||||
* convert one UTF-8 character into one wide-character
|
||||
*
|
||||
* input:
|
||||
* utf8 - an input UTF-8 string (null terminated)
|
||||
*
|
||||
* output:
|
||||
* res - an output character
|
||||
* correct - true if it is a correct character
|
||||
*
|
||||
* the function returns how many characters have been used from the input string
|
||||
* (returns zero only if the string has '\0' at the first character)
|
||||
* even if there are errors the functions returns a different from zero value
|
||||
*/
|
||||
size_t utf8_to_int(const char * utf8, int & res, bool & correct)
|
||||
{
|
||||
size_t i, len;
|
||||
@@ -362,7 +364,7 @@ size_t i, len;
|
||||
return i;
|
||||
}
|
||||
|
||||
if( utf8_check_range(res, len) )
|
||||
if( is_correct_unicode_char(res, len) )
|
||||
correct = true;
|
||||
|
||||
return len;
|
||||
@@ -371,20 +373,20 @@ return len;
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
this function converts one UTF-8 character into one wide-character
|
||||
|
||||
input:
|
||||
utf8 - an input UTF-8 string
|
||||
|
||||
output:
|
||||
res - an output character
|
||||
correct - true if it is a correct character
|
||||
|
||||
the function returns how many characters have been used from the input string
|
||||
(returns zero only if utf8 is empty)
|
||||
even if there are errors the functions returns a different from zero value
|
||||
*/
|
||||
/*
|
||||
* convert one UTF-8 character into one wide-character
|
||||
*
|
||||
* input:
|
||||
* utf8 - an input UTF-8 string
|
||||
*
|
||||
* output:
|
||||
* res - an output character
|
||||
* correct - true if it is a correct character
|
||||
*
|
||||
* the function returns how many characters have been used from the input string
|
||||
* (returns zero only if utf8 is empty)
|
||||
* even if there are errors the functions returns a different from zero value
|
||||
*/
|
||||
size_t utf8_to_int(const std::string & utf8, int & res, bool & correct)
|
||||
{
|
||||
return utf8_to_int(utf8.c_str(), utf8.size(), res, correct);
|
||||
@@ -392,18 +394,18 @@ size_t utf8_to_int(const std::string & utf8, int & res, bool & correct)
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
this function converts one UTF-8 character into one wide-character
|
||||
|
||||
input:
|
||||
utf8 - an input UTF-8 stream
|
||||
|
||||
output:
|
||||
res - an output character
|
||||
correct - true if it is a correct character
|
||||
|
||||
the function returns how many characters have been used from the input stream
|
||||
*/
|
||||
/*
|
||||
* convert one UTF-8 character into one wide-character
|
||||
*
|
||||
* input:
|
||||
* utf8 - an input UTF-8 stream
|
||||
*
|
||||
* output:
|
||||
* res - an output character
|
||||
* correct - true if it is a correct character
|
||||
*
|
||||
* the function returns how many characters have been used from the input stream
|
||||
*/
|
||||
size_t utf8_to_int(std::istream & utf8, int & res, bool & correct)
|
||||
{
|
||||
size_t i, len;
|
||||
@@ -431,7 +433,7 @@ unsigned char uz;
|
||||
return i;
|
||||
}
|
||||
|
||||
if( utf8_check_range(res, len) )
|
||||
if( is_correct_unicode_char(res, len) )
|
||||
correct = true;
|
||||
|
||||
return len;
|
||||
@@ -465,7 +467,7 @@ unsigned char uz;
|
||||
return i + 1;
|
||||
}
|
||||
|
||||
if( utf8_check_range(res, len) )
|
||||
if( is_correct_unicode_char(res, len) )
|
||||
correct = true;
|
||||
}
|
||||
else
|
||||
@@ -482,21 +484,21 @@ unsigned char uz;
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
this function converts an utf8 string into wide string (std::wstring)
|
||||
|
||||
input:
|
||||
utf8 - an input utf8 string
|
||||
utf8_len - size of the input string
|
||||
mode - what to do with errors when converting
|
||||
0: skip an invalid character
|
||||
1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||
|
||||
output:
|
||||
res - an output wide string
|
||||
|
||||
the function returns false if there were some errors when converting
|
||||
*/
|
||||
/*
|
||||
* convert an utf8 string into a wide string (std::wstring)
|
||||
*
|
||||
* input:
|
||||
* utf8 - an input utf8 string
|
||||
* utf8_len - size of the input string
|
||||
* mode - what to do with errors when converting
|
||||
* 0: skip an invalid character
|
||||
* 1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||
*
|
||||
* output:
|
||||
* res - an output wide string
|
||||
*
|
||||
* the function returns false if there were some errors when converting
|
||||
*/
|
||||
bool utf8_to_wide(const char * utf8, size_t utf8_len, std::wstring & res, bool clear, int mode)
|
||||
{
|
||||
if( clear )
|
||||
@@ -513,20 +515,20 @@ bool utf8_to_wide(const char * utf8, size_t utf8_len, std::wstring & res, bool c
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
this function converts an utf8 string into wide string (std::wstring)
|
||||
|
||||
input:
|
||||
utf8 - an input utf8 null terminated string
|
||||
mode - what to do with errors when converting
|
||||
0: skip an invalid character
|
||||
1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||
|
||||
output:
|
||||
res - an output wide string
|
||||
|
||||
the function returns false if there were some errors when converting
|
||||
*/
|
||||
/*
|
||||
* convert an utf8 string into a wide string (std::wstring)
|
||||
*
|
||||
* input:
|
||||
* utf8 - an input utf8 null terminated string
|
||||
* mode - what to do with errors when converting
|
||||
* 0: skip an invalid character
|
||||
* 1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||
*
|
||||
* output:
|
||||
* res - an output wide string
|
||||
*
|
||||
* the function returns false if there were some errors when converting
|
||||
*/
|
||||
bool utf8_to_wide(const char * utf8, std::wstring & res, bool clear, int mode)
|
||||
{
|
||||
size_t utf8_len = 0;
|
||||
@@ -539,20 +541,20 @@ return utf8_to_wide(utf8, utf8_len, res, clear, mode);
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
this function converts an utf8 string into wide string (std::wstring)
|
||||
|
||||
input:
|
||||
utf8 - an input utf8 string
|
||||
mode - what to do with errors when converting
|
||||
0: skip an invalid character
|
||||
1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||
|
||||
output:
|
||||
res - an output wide string
|
||||
|
||||
the function returns false if there were some errors when converting
|
||||
*/
|
||||
/*
|
||||
* convert an utf8 string into a wide string (std::wstring)
|
||||
*
|
||||
* input:
|
||||
* utf8 - an input utf8 string
|
||||
* mode - what to do with errors when converting
|
||||
* 0: skip an invalid character
|
||||
* 1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||
*
|
||||
* output:
|
||||
* res - an output wide string
|
||||
*
|
||||
* the function returns false if there were some errors when converting
|
||||
*/
|
||||
bool utf8_to_wide(const std::string & utf8, std::wstring & res, bool clear, int mode)
|
||||
{
|
||||
return utf8_to_wide(utf8.c_str(), utf8.size(), res, clear, mode);
|
||||
@@ -560,20 +562,20 @@ bool utf8_to_wide(const std::string & utf8, std::wstring & res, bool clear, int
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
this function converts an utf8 stream into wide string (std::wstring)
|
||||
|
||||
input:
|
||||
utf8 - an input utf8 stream
|
||||
mode - what to do with errors when converting
|
||||
0: skip an invalid character
|
||||
1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||
|
||||
output:
|
||||
res - an output wide string
|
||||
|
||||
the function returns false if there were some errors when converting
|
||||
*/
|
||||
/*
|
||||
* convert an utf8 stream into a wide string (std::wstring)
|
||||
*
|
||||
* input:
|
||||
* utf8 - an input utf8 stream
|
||||
* mode - what to do with errors when converting
|
||||
* 0: skip an invalid character
|
||||
* 1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||
*
|
||||
* output:
|
||||
* res - an output wide string
|
||||
*
|
||||
* the function returns false if there were some errors when converting
|
||||
*/
|
||||
bool utf8_to_wide(std::istream & utf8, std::wstring & res, bool clear, int mode)
|
||||
{
|
||||
int z;
|
||||
@@ -603,26 +605,26 @@ return !was_error;
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
this function converts one wide character into UTF-8 sequence
|
||||
|
||||
input:
|
||||
z - wide character
|
||||
|
||||
output:
|
||||
utf8 - a buffer for the output sequence
|
||||
utf8_len - the size of the buffer
|
||||
|
||||
the function returns how many characters have been written to the utf8,
|
||||
zero means the utf8 buffer is too small or 'z' is an incorrect unicode character
|
||||
*/
|
||||
/*
|
||||
* convert one wide character into an UTF-8 sequence
|
||||
*
|
||||
* input:
|
||||
* z - wide character
|
||||
*
|
||||
* output:
|
||||
* utf8 - a buffer for the output sequence
|
||||
* utf8_len - the size of the buffer
|
||||
*
|
||||
* the function returns how many characters have been written to the utf8,
|
||||
* zero means the utf8 buffer is too small or 'z' is an incorrect unicode character
|
||||
*/
|
||||
size_t int_to_utf8(int z, char * utf8, size_t utf8_max_len)
|
||||
{
|
||||
char buf[10];
|
||||
int i = 0;
|
||||
int mask = 0x3f; // 6 first bits set
|
||||
|
||||
if( utf8_max_len==0 || !utf8_check_range(z) )
|
||||
if( utf8_max_len==0 || !is_correct_unicode_char(z) )
|
||||
return 0;
|
||||
|
||||
if( z <= 0x7f )
|
||||
@@ -658,18 +660,18 @@ return a;
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
this function converts one wide character into UTF-8 string
|
||||
|
||||
input:
|
||||
z - wide character
|
||||
|
||||
output:
|
||||
utf8 - a UTF-8 string for the output sequence (the string is not cleared)
|
||||
|
||||
the function returns how many characters have been written to the utf8 string,
|
||||
zero means that 'z' is an incorrect unicode character
|
||||
*/
|
||||
/*
|
||||
* convert one wide character into an UTF-8 string
|
||||
*
|
||||
* input:
|
||||
* z - wide character
|
||||
*
|
||||
* output:
|
||||
* utf8 - a UTF-8 string for the output sequence (the string is not cleared)
|
||||
*
|
||||
* the function returns how many characters have been written to the utf8 string,
|
||||
* zero means that 'z' is an incorrect unicode character
|
||||
*/
|
||||
size_t int_to_utf8(int z, std::string & utf8, bool clear)
|
||||
{
|
||||
char buf[10];
|
||||
@@ -688,21 +690,21 @@ return len;
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
this function converts a wide string into UTF-8 string
|
||||
|
||||
input:
|
||||
wide_string - a wide string for converting
|
||||
string_len - the size of the string
|
||||
mode - what to do with errors when converting
|
||||
0: skip an invalid character
|
||||
1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||
|
||||
output:
|
||||
utf8 - a UTF-8 string for the output sequence (the string is not cleared)
|
||||
|
||||
this function returns false if there were some errors when converting
|
||||
*/
|
||||
/*
|
||||
* convert a wide string into an UTF-8 string
|
||||
*
|
||||
* input:
|
||||
* wide_string - a wide string for converting
|
||||
* string_len - the size of the string
|
||||
* mode - what to do with errors when converting
|
||||
* 0: skip an invalid character
|
||||
* 1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||
*
|
||||
* output:
|
||||
* utf8 - a UTF-8 string for the output sequence (the string is not cleared)
|
||||
*
|
||||
* this function returns false if there were some errors when converting
|
||||
*/
|
||||
bool wide_to_utf8(const wchar_t * wide_string, size_t string_len, std::string & utf8, bool clear, int mode)
|
||||
{
|
||||
bool was_error = false;
|
||||
@@ -723,20 +725,20 @@ return !was_error;
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
this function converts a wide string into UTF-8 string
|
||||
|
||||
input:
|
||||
wide_string - a null terminated wide string for converting
|
||||
mode - what to do with errors when converting
|
||||
0: skip an invalid character
|
||||
1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||
|
||||
output:
|
||||
utf8 - a UTF-8 string for the output sequence (the string is not cleared)
|
||||
|
||||
this function returns false if there were some errors when converting
|
||||
*/
|
||||
/*
|
||||
* convert a wide string into an UTF-8 string
|
||||
*
|
||||
* input:
|
||||
* wide_string - a null terminated wide string for converting
|
||||
* mode - what to do with errors when converting
|
||||
* 0: skip an invalid character
|
||||
* 1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||
*
|
||||
* output:
|
||||
* utf8 - a UTF-8 string for the output sequence (the string is not cleared)
|
||||
*
|
||||
* this function returns false if there were some errors when converting
|
||||
*/
|
||||
bool wide_to_utf8(const wchar_t * wide_string, std::string & utf8, bool clear, int mode)
|
||||
{
|
||||
bool was_error = false;
|
||||
@@ -752,20 +754,20 @@ return !was_error;
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
this function converts a wide string (std::wstring) into UTF-8 string
|
||||
|
||||
input:
|
||||
wide_string - a wide string for converting
|
||||
mode - what to do with errors when converting
|
||||
0: skip an invalid character
|
||||
1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||
|
||||
output:
|
||||
utf8 - a UTF-8 string for the output sequence (the string is not cleared)
|
||||
|
||||
this function returns false if there were some errors when converting
|
||||
*/
|
||||
/*
|
||||
* convert a wide string (std::wstring) into an UTF-8 string
|
||||
*
|
||||
* input:
|
||||
* wide_string - a wide string for converting
|
||||
* mode - what to do with errors when converting
|
||||
* 0: skip an invalid character
|
||||
* 1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||
*
|
||||
* output:
|
||||
* utf8 - a UTF-8 string for the output sequence (the string is not cleared)
|
||||
*
|
||||
* this function returns false if there were some errors when converting
|
||||
*/
|
||||
bool wide_to_utf8(const std::wstring & wide_string, std::string & utf8, bool clear, int mode)
|
||||
{
|
||||
return wide_to_utf8(wide_string.c_str(), wide_string.size(), utf8, clear, mode);
|
||||
@@ -775,27 +777,27 @@ bool wide_to_utf8(const std::wstring & wide_string, std::string & utf8, bool cle
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
this function converts a wide string into UTF-8 stream
|
||||
|
||||
input:
|
||||
wide_string - a wide string for converting
|
||||
string_len - lenght of the wide string
|
||||
mode - what to do with errors when converting
|
||||
0: skip an invalid character
|
||||
1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||
|
||||
output:
|
||||
utf8 - a buffer for the UTF-8 stream
|
||||
utf8_len - the size of the buffer
|
||||
utf8_written - how many bytes have been written to the buffer
|
||||
|
||||
this function returns false if there were some errors when converting or the output buffer was too small,
|
||||
the output string is not null terminated
|
||||
|
||||
if there is an error when converting (there is an incorrect character in the wide string) the function
|
||||
will continue converting but if the buffer is too small the function breaks immediately
|
||||
*/
|
||||
/*
|
||||
* convert a wide string into an UTF-8 stream
|
||||
*
|
||||
* input:
|
||||
* wide_string - a wide string for converting
|
||||
* string_len - lenght of the wide string
|
||||
* mode - what to do with errors when converting
|
||||
* 0: skip an invalid character
|
||||
* 1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||
*
|
||||
* output:
|
||||
* utf8 - a buffer for the UTF-8 stream
|
||||
* utf8_len - the size of the buffer
|
||||
* utf8_written - how many bytes have been written to the buffer
|
||||
*
|
||||
* this function returns false if there were some errors when converting or the output buffer was too small,
|
||||
* the output string is not null terminated
|
||||
*
|
||||
* if there is an error when converting (there is an incorrect character in the wide string) the function
|
||||
* will continue converting but if the buffer is too small the function breaks immediately
|
||||
*/
|
||||
bool wide_to_utf8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len, size_t & utf8_written, int mode)
|
||||
{
|
||||
bool was_error = false;
|
||||
@@ -830,26 +832,26 @@ return !was_error;
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
this function converts a wide string (std::wstring) into UTF-8 stream
|
||||
|
||||
input:
|
||||
wide_string - a wide string for converting
|
||||
mode - what to do with errors when converting
|
||||
0: skip an invalid character
|
||||
1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||
|
||||
output:
|
||||
utf8 - a buffer for the UTF-8 stream
|
||||
utf8_len - the size of the buffer
|
||||
utf8_written - how many bytes have been written to the buffer
|
||||
|
||||
this function returns false if there were some errors when converting or the output buffer was too small,
|
||||
the output string is not null terminated
|
||||
|
||||
if there is an error when converting (there is an incorrect character in the wide string) the function
|
||||
will continue converting but if the buffer is too small the function breaks immediately
|
||||
*/
|
||||
/*
|
||||
* convert a wide string (std::wstring) into an UTF-8 stream
|
||||
*
|
||||
* input:
|
||||
* wide_string - a wide string for converting
|
||||
* mode - what to do with errors when converting
|
||||
* 0: skip an invalid character
|
||||
* 1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||
*
|
||||
* output:
|
||||
* utf8 - a buffer for the UTF-8 stream
|
||||
* utf8_len - the size of the buffer
|
||||
* utf8_written - how many bytes have been written to the buffer
|
||||
*
|
||||
* this function returns false if there were some errors when converting or the output buffer was too small,
|
||||
* the output string is not null terminated
|
||||
*
|
||||
* if there is an error when converting (there is an incorrect character in the wide string) the function
|
||||
* will continue converting but if the buffer is too small the function breaks immediately
|
||||
*/
|
||||
bool wide_to_utf8(const std::wstring & wide_string, char * utf8, size_t utf8_len, size_t & utf8_written, int mode)
|
||||
{
|
||||
return wide_to_utf8(wide_string.c_str(), wide_string.size(), utf8, utf8_len, utf8_written, mode);
|
||||
@@ -857,27 +859,27 @@ bool wide_to_utf8(const std::wstring & wide_string, char * utf8, size_t utf8_len
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
this function converts a wide string into UTF-8 stream
|
||||
|
||||
input:
|
||||
wide_string - a wide string for converting
|
||||
string_len - lenght of the wide string
|
||||
mode - what to do with errors when converting
|
||||
0: skip an invalid character
|
||||
1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||
|
||||
output:
|
||||
utf8 - a buffer for the UTF-8 stream
|
||||
utf8_len - the size of the buffer
|
||||
|
||||
this function returns false if there were some errors when converting or the output buffer was too small,
|
||||
the output string is null terminated (even if there were errors during converting)
|
||||
|
||||
if there is an error when converting (there is an incorrect character in the wide string) the function
|
||||
will continue converting but if the buffer is too small the function breaks immediately
|
||||
(in both cases the utf8 buffer is null terminated)
|
||||
*/
|
||||
/*
|
||||
* convert a wide string into an UTF-8 stream
|
||||
*
|
||||
* input:
|
||||
* wide_string - a wide string for converting
|
||||
* string_len - lenght of the wide string
|
||||
* mode - what to do with errors when converting
|
||||
* 0: skip an invalid character
|
||||
* 1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||
*
|
||||
* output:
|
||||
* utf8 - a buffer for the UTF-8 stream
|
||||
* utf8_len - the size of the buffer
|
||||
*
|
||||
* this function returns false if there were some errors when converting or the output buffer was too small,
|
||||
* the output string is null terminated (even if there were errors during converting)
|
||||
*
|
||||
* if there is an error when converting (there is an incorrect character in the wide string) the function
|
||||
* will continue converting but if the buffer is too small the function breaks immediately
|
||||
* (in both cases the utf8 buffer is null terminated)
|
||||
*/
|
||||
bool wide_to_utf8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len, int mode)
|
||||
{
|
||||
size_t utf8_saved;
|
||||
@@ -894,26 +896,26 @@ return res;
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
this function converts a wide string (std::wstring) into UTF-8 stream
|
||||
|
||||
input:
|
||||
wide_string - a wide string for converting
|
||||
mode - what to do with errors when converting
|
||||
0: skip an invalid character
|
||||
1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||
|
||||
output:
|
||||
utf8 - a buffer for the UTF-8 stream
|
||||
utf8_len - the size of the buffer
|
||||
|
||||
this function returns false if there were some errors when converting or the output buffer was too small,
|
||||
the output string is null terminated (even if there were errors during converting)
|
||||
|
||||
if there is an error when converting (there is an incorrect character in the wide string) the function
|
||||
will continue converting but if the buffer is too small the function breaks immediately
|
||||
(in both cases the utf8 buffer is null terminated)
|
||||
*/
|
||||
/*
|
||||
* convert a wide string (std::wstring) into an UTF-8 stream
|
||||
*
|
||||
* input:
|
||||
* wide_string - a wide string for converting
|
||||
* mode - what to do with errors when converting
|
||||
* 0: skip an invalid character
|
||||
* 1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||
*
|
||||
* output:
|
||||
* utf8 - a buffer for the UTF-8 stream
|
||||
* utf8_len - the size of the buffer
|
||||
*
|
||||
* this function returns false if there were some errors when converting or the output buffer was too small,
|
||||
* the output string is null terminated (even if there were errors during converting)
|
||||
*
|
||||
* if there is an error when converting (there is an incorrect character in the wide string) the function
|
||||
* will continue converting but if the buffer is too small the function breaks immediately
|
||||
* (in both cases the utf8 buffer is null terminated)
|
||||
*/
|
||||
bool wide_to_utf8(const std::wstring & wide_string, char * utf8, size_t utf8_len, int mode)
|
||||
{
|
||||
return wide_to_utf8(wide_string.c_str(), wide_string.size(), utf8, utf8_len, mode);
|
||||
@@ -921,26 +923,26 @@ bool wide_to_utf8(const std::wstring & wide_string, char * utf8, size_t utf8_len
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
this function converts a wide string into UTF-8 stream
|
||||
|
||||
input:
|
||||
wide_string - a null terminated wide string for converting
|
||||
mode - what to do with errors when converting
|
||||
0: skip an invalid character
|
||||
1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||
|
||||
output:
|
||||
utf8 - a buffer for the UTF-8 stream
|
||||
utf8_len - the size of the buffer
|
||||
utf8_written - how many bytes have been written to the buffer
|
||||
|
||||
this function returns false if there were some errors when converting or the output buffer was too small,
|
||||
the output string is not null terminated
|
||||
|
||||
if there is an error when converting (there is an incorrect character in the wide string) the function
|
||||
will continue converting but if the buffer is too small the function breaks immediately
|
||||
*/
|
||||
/*
|
||||
* convert a wide string into an UTF-8 stream
|
||||
*
|
||||
* input:
|
||||
* wide_string - a null terminated wide string for converting
|
||||
* mode - what to do with errors when converting
|
||||
* 0: skip an invalid character
|
||||
* 1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||
*
|
||||
* output:
|
||||
* utf8 - a buffer for the UTF-8 stream
|
||||
* utf8_len - the size of the buffer
|
||||
* utf8_written - how many bytes have been written to the buffer
|
||||
*
|
||||
* this function returns false if there were some errors when converting or the output buffer was too small,
|
||||
* the output string is not null terminated
|
||||
*
|
||||
* if there is an error when converting (there is an incorrect character in the wide string) the function
|
||||
* will continue converting but if the buffer is too small the function breaks immediately
|
||||
*/
|
||||
bool wide_to_utf8(const wchar_t * wide_string, char * utf8, size_t utf8_len, size_t & utf8_written, int mode)
|
||||
{
|
||||
bool was_error = false;
|
||||
@@ -976,26 +978,26 @@ return !was_error;
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
this function converts a wide string into UTF-8 stream
|
||||
|
||||
input:
|
||||
wide_string - a wide string for converting
|
||||
mode - what to do with errors when converting
|
||||
0: skip an invalid character
|
||||
1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||
|
||||
output:
|
||||
utf8 - a buffer for the UTF-8 stream
|
||||
utf8_len - the size of the buffer
|
||||
|
||||
this function returns false if there were some errors when converting or the output buffer was too small,
|
||||
the output string is null terminated (even if there were errors during converting)
|
||||
|
||||
if there is an error when converting (there is an incorrect character in the wide string) the function
|
||||
will continue converting but if the buffer is too small the function breaks immediately
|
||||
(in both cases the utf8 buffer is null terminated)
|
||||
*/
|
||||
/*
|
||||
* convert a wide string into an UTF-8 stream
|
||||
*
|
||||
* input:
|
||||
* wide_string - a wide string for converting
|
||||
* mode - what to do with errors when converting
|
||||
* 0: skip an invalid character
|
||||
* 1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||
*
|
||||
* output:
|
||||
* utf8 - a buffer for the UTF-8 stream
|
||||
* utf8_len - the size of the buffer
|
||||
*
|
||||
* this function returns false if there were some errors when converting or the output buffer was too small,
|
||||
* the output string is null terminated (even if there were errors during converting)
|
||||
*
|
||||
* if there is an error when converting (there is an incorrect character in the wide string) the function
|
||||
* will continue converting but if the buffer is too small the function breaks immediately
|
||||
* (in both cases the utf8 buffer is null terminated)
|
||||
*/
|
||||
bool wide_to_utf8(const wchar_t * wide_string, char * utf8, size_t utf8_len, int mode)
|
||||
{
|
||||
size_t utf8_saved;
|
||||
@@ -1017,9 +1019,9 @@ return res;
|
||||
namespace private_namespace
|
||||
{
|
||||
|
||||
/*!
|
||||
an auxiliary function for converting from UTF-8 string
|
||||
*/
|
||||
/*
|
||||
* an auxiliary function for converting from UTF-8 string
|
||||
*/
|
||||
bool utf8_to_int_first_octet(unsigned char uz, size_t & len, int & res)
|
||||
{
|
||||
for(len=0 ; (uz & 0x80) != 0 ; ++len)
|
||||
@@ -1041,9 +1043,9 @@ return true;
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
an auxiliary function for converting from UTF-8 string
|
||||
*/
|
||||
/*
|
||||
* an auxiliary function for converting from UTF-8 string
|
||||
*/
|
||||
bool utf8_to_int_add_next_octet(unsigned char uz, int & res)
|
||||
{
|
||||
if( (uz & 0xc0) != 0x80 )
|
||||
@@ -1057,20 +1059,20 @@ return true;
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
an auxiliary function for converting from wide characters to UTF-8
|
||||
|
||||
returns how many wide characters were used
|
||||
if string_len is greater than 0 then the return value is always greater than zero too
|
||||
|
||||
utf8_written - how many characters were saved in the utf8 string (the string doesn't have
|
||||
a null terminating character)
|
||||
it can be equal to zero if the utf8 buffer is too small or there was an incorrect wide character read
|
||||
was_utf8_buf_too_small - will be true if the utf8 buffer is too small
|
||||
if this flag is true then utf8_written is equal to zero
|
||||
was_error - will be true if there is an error when converting (there was an incorrect wide character)
|
||||
(was_error will not be true if the utf8 buffer is too small)
|
||||
*/
|
||||
/*
|
||||
* an auxiliary function for converting from wide characters to UTF-8
|
||||
*
|
||||
* return how many wide characters were used
|
||||
* if string_len is greater than 0 then the return value is always greater than zero too
|
||||
*
|
||||
* utf8_written - how many characters were saved in the utf8 string (the string doesn't have
|
||||
* a null terminating character)
|
||||
* it can be equal to zero if the utf8 buffer is too small or there was an incorrect wide character read
|
||||
* was_utf8_buf_too_small - will be true if the utf8 buffer is too small
|
||||
* if this flag is true then utf8_written is equal to zero
|
||||
* was_error - will be true if there is an error when converting (there was an incorrect wide character)
|
||||
* (was_error will not be true if the utf8 buffer is too small)
|
||||
*/
|
||||
size_t wide_one_to_utf8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len,
|
||||
size_t & utf8_written, bool & was_utf8_buf_too_small, bool & was_error, int mode)
|
||||
{
|
||||
@@ -1107,12 +1109,12 @@ return chars;
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
an auxiliary function for converting from wide characters to UTF-8
|
||||
|
||||
returns how many wide characters were used
|
||||
if string_len is greater than 0 then the return value is always greater than zero too
|
||||
*/
|
||||
/*
|
||||
* an auxiliary function for converting from wide characters to UTF-8
|
||||
*
|
||||
* return how many wide characters were used
|
||||
* if string_len is greater than 0 then the return value is always greater than zero too
|
||||
*/
|
||||
size_t wide_one_to_utf8(const wchar_t * wide_string, size_t string_len, std::string & utf8, bool & was_error, int mode)
|
||||
{
|
||||
int z;
|
||||
@@ -1137,12 +1139,12 @@ return chars;
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
an auxiliary function for converting from wide characters to UTF-8
|
||||
|
||||
returns how many wide characters were used
|
||||
if wide_string has at least one character then the return value is always greater than zero too
|
||||
*/
|
||||
/*
|
||||
* an auxiliary function for converting from wide characters to UTF-8
|
||||
*
|
||||
* return how many wide characters were used
|
||||
* if wide_string has at least one character then the return value is always greater than zero too
|
||||
*/
|
||||
size_t wide_one_to_utf8(const wchar_t * wide_string, std::string & utf8, bool & was_error, int mode)
|
||||
{
|
||||
int z;
|
||||
|
Reference in New Issue
Block a user