update utf8 functions comments
while here: - rename pt::utf8_check_range(...) -> pt::is_correct_unicode_char(...)
This commit is contained in:
@@ -1178,7 +1178,7 @@ void SpaceParser::read_unicode_json_format(bool has_first_byte, int first_byte)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if( !ok || !pt::utf8_check_range(lastc) )
|
if( !ok || !pt::is_correct_unicode_char(lastc) )
|
||||||
{
|
{
|
||||||
lastc = 0xFFFD; // U+FFFD "replacement character";
|
lastc = 0xFFFD; // U+FFFD "replacement character";
|
||||||
}
|
}
|
||||||
@@ -1207,7 +1207,7 @@ int i;
|
|||||||
value = (value << 4) | hex_to_int(c);
|
value = (value << 4) | hex_to_int(c);
|
||||||
}
|
}
|
||||||
|
|
||||||
if( i > 0 && c == '}' && pt::utf8_check_range(value) )
|
if( i > 0 && c == '}' && pt::is_correct_unicode_char(value) )
|
||||||
{
|
{
|
||||||
lastc = static_cast<wchar_t>(value);
|
lastc = static_cast<wchar_t>(value);
|
||||||
}
|
}
|
||||||
|
@@ -41,21 +41,21 @@ namespace pt
|
|||||||
{
|
{
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* returns true if 'c' is a correct unicode character
|
* return true if 'c' is a correct unicode character
|
||||||
*/
|
*/
|
||||||
bool utf8_check_range(int c)
|
bool is_correct_unicode_char(int c)
|
||||||
{
|
{
|
||||||
return c>=0 && c<=0x10FFFF && !(c>=0xD800 && c<=0xDFFF);
|
return c>=0 && c<=0x10FFFF && !(c>=0xD800 && c<=0xDFFF);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* returns true if 'c' is a correct unicode character
|
* return true if 'c' is a correct unicode character
|
||||||
*
|
*
|
||||||
* this method is used when reading from an utf8 string
|
* this method is used when reading from an utf8 string
|
||||||
* how_many_bytes - means how many bytes from the utf8 string were read
|
* how_many_bytes - means how many bytes from the utf8 string were read
|
||||||
*/
|
*/
|
||||||
bool utf8_check_range(int c, int how_many_bytes)
|
bool is_correct_unicode_char(int c, int how_many_bytes)
|
||||||
{
|
{
|
||||||
if( c >= 0x0000 && c <= 0x007f && how_many_bytes == 1 )
|
if( c >= 0x0000 && c <= 0x007f && how_many_bytes == 1 )
|
||||||
{
|
{
|
||||||
@@ -124,10 +124,9 @@ bool surrogate_pair_to_int(int c1, int c2, int & z)
|
|||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* an auxiliary function for converting from wide characters to UTF-8
|
* convert one wide (or two wide) characters to an int
|
||||||
* converting a wide character into one int
|
|
||||||
*
|
*
|
||||||
* returns how many wide characters were used
|
* return how many wide characters were used
|
||||||
* if string_len is greater than 0 then the return value is always greater than zero too
|
* if string_len is greater than 0 then the return value is always greater than zero too
|
||||||
*/
|
*/
|
||||||
size_t wide_to_int(const wchar_t * wide_string, size_t string_len, int & z, bool & correct)
|
size_t wide_to_int(const wchar_t * wide_string, size_t string_len, int & z, bool & correct)
|
||||||
@@ -167,7 +166,7 @@ size_t wide_to_int(const wchar_t * wide_string, size_t string_len, int & z, bool
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
correct = utf8_check_range(z);
|
correct = is_correct_unicode_char(z);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -178,7 +177,7 @@ size_t wide_to_int(const wchar_t * wide_string, size_t string_len, int & z, bool
|
|||||||
* an auxiliary function for converting from wide characters to UTF-8
|
* an auxiliary function for converting from wide characters to UTF-8
|
||||||
* converting a wide character into one int
|
* converting a wide character into one int
|
||||||
|
|
||||||
* returns how many wide characters were used
|
* return how many wide characters were used
|
||||||
* if wide_string has at least one character then the return value is always greater than zero too
|
* if wide_string has at least one character then the return value is always greater than zero too
|
||||||
*/
|
*/
|
||||||
size_t wide_to_int(const wchar_t * wide_string, int & z, bool & correct)
|
size_t wide_to_int(const wchar_t * wide_string, int & z, bool & correct)
|
||||||
@@ -201,7 +200,7 @@ return wide_to_int(wide_string, min_str_len, z, correct);
|
|||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* converts an int to a wide string
|
* convert an int to a wide string
|
||||||
*
|
*
|
||||||
* this method will not terminate the output string with a null character
|
* this method will not terminate the output string with a null character
|
||||||
* return how many characters have been written (0, 1 or 2)
|
* return how many characters have been written (0, 1 or 2)
|
||||||
@@ -233,9 +232,9 @@ size_t int_to_wide(int c, wchar_t * res, size_t max_buf_len)
|
|||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* converts an int to a wide string
|
* convert an int to a wide string
|
||||||
*
|
*
|
||||||
* returns true if a character was inserted to the string
|
* return true if a character was inserted to the string
|
||||||
*/
|
*/
|
||||||
bool int_to_wide(int c, std::wstring & res)
|
bool int_to_wide(int c, std::wstring & res)
|
||||||
{
|
{
|
||||||
@@ -258,6 +257,9 @@ bool int_to_wide(int c, std::wstring & res)
|
|||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
* convert one character into a stream
|
||||||
|
* stream can be an utf8 or a wide stream
|
||||||
|
*
|
||||||
* return true if c was a correct unicode character
|
* return true if c was a correct unicode character
|
||||||
* and has been put the the stream
|
* and has been put the the stream
|
||||||
*/
|
*/
|
||||||
@@ -280,17 +282,17 @@ bool int_to_stream(int c, pt::Stream & stream)
|
|||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* this function converts one UTF-8 character into one wide-character
|
* convert one UTF-8 character into one wide-character
|
||||||
*
|
*
|
||||||
* input:
|
* input:
|
||||||
* utf8 - an input UTF-8 string
|
* utf8 - an input UTF-8 string
|
||||||
* utf8_len - size of the input string,
|
* utf8_len - size of the input string,
|
||||||
* the string should be at least 4 bytes length for correctly
|
* the string should be at least 4 bytes length for correctly
|
||||||
* recognized the utf-8 sequence
|
* recognized the utf-8 sequence
|
||||||
*
|
*
|
||||||
* output:
|
* output:
|
||||||
* res - an output character
|
* res - an output character
|
||||||
* correct - true if it is a correct character
|
* correct - true if it is a correct character
|
||||||
*
|
*
|
||||||
* the function returns how many characters have been used from the input string
|
* the function returns how many characters have been used from the input string
|
||||||
* (returns zero only if utf8_len is zero)
|
* (returns zero only if utf8_len is zero)
|
||||||
@@ -318,7 +320,7 @@ size_t i, len;
|
|||||||
return i;
|
return i;
|
||||||
}
|
}
|
||||||
|
|
||||||
if( utf8_check_range(res, len) )
|
if( is_correct_unicode_char(res, len) )
|
||||||
correct = true;
|
correct = true;
|
||||||
|
|
||||||
return len;
|
return len;
|
||||||
@@ -326,20 +328,20 @@ return len;
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*!
|
/*
|
||||||
this function converts one UTF-8 character into one wide-character
|
* convert one UTF-8 character into one wide-character
|
||||||
|
*
|
||||||
input:
|
* input:
|
||||||
utf8 - an input UTF-8 string (null terminated)
|
* utf8 - an input UTF-8 string (null terminated)
|
||||||
|
*
|
||||||
output:
|
* output:
|
||||||
res - an output character
|
* res - an output character
|
||||||
correct - true if it is a correct character
|
* correct - true if it is a correct character
|
||||||
|
*
|
||||||
the function returns how many characters have been used from the input string
|
* the function returns how many characters have been used from the input string
|
||||||
(returns zero only if the string has '\0' at the first character)
|
* (returns zero only if the string has '\0' at the first character)
|
||||||
even if there are errors the functions returns a different from zero value
|
* even if there are errors the functions returns a different from zero value
|
||||||
*/
|
*/
|
||||||
size_t utf8_to_int(const char * utf8, int & res, bool & correct)
|
size_t utf8_to_int(const char * utf8, int & res, bool & correct)
|
||||||
{
|
{
|
||||||
size_t i, len;
|
size_t i, len;
|
||||||
@@ -362,7 +364,7 @@ size_t i, len;
|
|||||||
return i;
|
return i;
|
||||||
}
|
}
|
||||||
|
|
||||||
if( utf8_check_range(res, len) )
|
if( is_correct_unicode_char(res, len) )
|
||||||
correct = true;
|
correct = true;
|
||||||
|
|
||||||
return len;
|
return len;
|
||||||
@@ -371,20 +373,20 @@ return len;
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*!
|
/*
|
||||||
this function converts one UTF-8 character into one wide-character
|
* convert one UTF-8 character into one wide-character
|
||||||
|
*
|
||||||
input:
|
* input:
|
||||||
utf8 - an input UTF-8 string
|
* utf8 - an input UTF-8 string
|
||||||
|
*
|
||||||
output:
|
* output:
|
||||||
res - an output character
|
* res - an output character
|
||||||
correct - true if it is a correct character
|
* correct - true if it is a correct character
|
||||||
|
*
|
||||||
the function returns how many characters have been used from the input string
|
* the function returns how many characters have been used from the input string
|
||||||
(returns zero only if utf8 is empty)
|
* (returns zero only if utf8 is empty)
|
||||||
even if there are errors the functions returns a different from zero value
|
* even if there are errors the functions returns a different from zero value
|
||||||
*/
|
*/
|
||||||
size_t utf8_to_int(const std::string & utf8, int & res, bool & correct)
|
size_t utf8_to_int(const std::string & utf8, int & res, bool & correct)
|
||||||
{
|
{
|
||||||
return utf8_to_int(utf8.c_str(), utf8.size(), res, correct);
|
return utf8_to_int(utf8.c_str(), utf8.size(), res, correct);
|
||||||
@@ -392,18 +394,18 @@ size_t utf8_to_int(const std::string & utf8, int & res, bool & correct)
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*!
|
/*
|
||||||
this function converts one UTF-8 character into one wide-character
|
* convert one UTF-8 character into one wide-character
|
||||||
|
*
|
||||||
input:
|
* input:
|
||||||
utf8 - an input UTF-8 stream
|
* utf8 - an input UTF-8 stream
|
||||||
|
*
|
||||||
output:
|
* output:
|
||||||
res - an output character
|
* res - an output character
|
||||||
correct - true if it is a correct character
|
* correct - true if it is a correct character
|
||||||
|
*
|
||||||
the function returns how many characters have been used from the input stream
|
* the function returns how many characters have been used from the input stream
|
||||||
*/
|
*/
|
||||||
size_t utf8_to_int(std::istream & utf8, int & res, bool & correct)
|
size_t utf8_to_int(std::istream & utf8, int & res, bool & correct)
|
||||||
{
|
{
|
||||||
size_t i, len;
|
size_t i, len;
|
||||||
@@ -431,7 +433,7 @@ unsigned char uz;
|
|||||||
return i;
|
return i;
|
||||||
}
|
}
|
||||||
|
|
||||||
if( utf8_check_range(res, len) )
|
if( is_correct_unicode_char(res, len) )
|
||||||
correct = true;
|
correct = true;
|
||||||
|
|
||||||
return len;
|
return len;
|
||||||
@@ -465,7 +467,7 @@ unsigned char uz;
|
|||||||
return i + 1;
|
return i + 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if( utf8_check_range(res, len) )
|
if( is_correct_unicode_char(res, len) )
|
||||||
correct = true;
|
correct = true;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@@ -482,21 +484,21 @@ unsigned char uz;
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*!
|
/*
|
||||||
this function converts an utf8 string into wide string (std::wstring)
|
* convert an utf8 string into a wide string (std::wstring)
|
||||||
|
*
|
||||||
input:
|
* input:
|
||||||
utf8 - an input utf8 string
|
* utf8 - an input utf8 string
|
||||||
utf8_len - size of the input string
|
* utf8_len - size of the input string
|
||||||
mode - what to do with errors when converting
|
* mode - what to do with errors when converting
|
||||||
0: skip an invalid character
|
* 0: skip an invalid character
|
||||||
1: put U+FFFD "replacement character" istead of the invalid character (default)
|
* 1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||||
|
*
|
||||||
output:
|
* output:
|
||||||
res - an output wide string
|
* res - an output wide string
|
||||||
|
*
|
||||||
the function returns false if there were some errors when converting
|
* the function returns false if there were some errors when converting
|
||||||
*/
|
*/
|
||||||
bool utf8_to_wide(const char * utf8, size_t utf8_len, std::wstring & res, bool clear, int mode)
|
bool utf8_to_wide(const char * utf8, size_t utf8_len, std::wstring & res, bool clear, int mode)
|
||||||
{
|
{
|
||||||
if( clear )
|
if( clear )
|
||||||
@@ -513,20 +515,20 @@ bool utf8_to_wide(const char * utf8, size_t utf8_len, std::wstring & res, bool c
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*!
|
/*
|
||||||
this function converts an utf8 string into wide string (std::wstring)
|
* convert an utf8 string into a wide string (std::wstring)
|
||||||
|
*
|
||||||
input:
|
* input:
|
||||||
utf8 - an input utf8 null terminated string
|
* utf8 - an input utf8 null terminated string
|
||||||
mode - what to do with errors when converting
|
* mode - what to do with errors when converting
|
||||||
0: skip an invalid character
|
* 0: skip an invalid character
|
||||||
1: put U+FFFD "replacement character" istead of the invalid character (default)
|
* 1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||||
|
*
|
||||||
output:
|
* output:
|
||||||
res - an output wide string
|
* res - an output wide string
|
||||||
|
*
|
||||||
the function returns false if there were some errors when converting
|
* the function returns false if there were some errors when converting
|
||||||
*/
|
*/
|
||||||
bool utf8_to_wide(const char * utf8, std::wstring & res, bool clear, int mode)
|
bool utf8_to_wide(const char * utf8, std::wstring & res, bool clear, int mode)
|
||||||
{
|
{
|
||||||
size_t utf8_len = 0;
|
size_t utf8_len = 0;
|
||||||
@@ -539,20 +541,20 @@ return utf8_to_wide(utf8, utf8_len, res, clear, mode);
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*!
|
/*
|
||||||
this function converts an utf8 string into wide string (std::wstring)
|
* convert an utf8 string into a wide string (std::wstring)
|
||||||
|
*
|
||||||
input:
|
* input:
|
||||||
utf8 - an input utf8 string
|
* utf8 - an input utf8 string
|
||||||
mode - what to do with errors when converting
|
* mode - what to do with errors when converting
|
||||||
0: skip an invalid character
|
* 0: skip an invalid character
|
||||||
1: put U+FFFD "replacement character" istead of the invalid character (default)
|
* 1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||||
|
*
|
||||||
output:
|
* output:
|
||||||
res - an output wide string
|
* res - an output wide string
|
||||||
|
*
|
||||||
the function returns false if there were some errors when converting
|
* the function returns false if there were some errors when converting
|
||||||
*/
|
*/
|
||||||
bool utf8_to_wide(const std::string & utf8, std::wstring & res, bool clear, int mode)
|
bool utf8_to_wide(const std::string & utf8, std::wstring & res, bool clear, int mode)
|
||||||
{
|
{
|
||||||
return utf8_to_wide(utf8.c_str(), utf8.size(), res, clear, mode);
|
return utf8_to_wide(utf8.c_str(), utf8.size(), res, clear, mode);
|
||||||
@@ -560,20 +562,20 @@ bool utf8_to_wide(const std::string & utf8, std::wstring & res, bool clear, int
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*!
|
/*
|
||||||
this function converts an utf8 stream into wide string (std::wstring)
|
* convert an utf8 stream into a wide string (std::wstring)
|
||||||
|
*
|
||||||
input:
|
* input:
|
||||||
utf8 - an input utf8 stream
|
* utf8 - an input utf8 stream
|
||||||
mode - what to do with errors when converting
|
* mode - what to do with errors when converting
|
||||||
0: skip an invalid character
|
* 0: skip an invalid character
|
||||||
1: put U+FFFD "replacement character" istead of the invalid character (default)
|
* 1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||||
|
*
|
||||||
output:
|
* output:
|
||||||
res - an output wide string
|
* res - an output wide string
|
||||||
|
*
|
||||||
the function returns false if there were some errors when converting
|
* the function returns false if there were some errors when converting
|
||||||
*/
|
*/
|
||||||
bool utf8_to_wide(std::istream & utf8, std::wstring & res, bool clear, int mode)
|
bool utf8_to_wide(std::istream & utf8, std::wstring & res, bool clear, int mode)
|
||||||
{
|
{
|
||||||
int z;
|
int z;
|
||||||
@@ -603,26 +605,26 @@ return !was_error;
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*!
|
/*
|
||||||
this function converts one wide character into UTF-8 sequence
|
* convert one wide character into an UTF-8 sequence
|
||||||
|
*
|
||||||
input:
|
* input:
|
||||||
z - wide character
|
* z - wide character
|
||||||
|
*
|
||||||
output:
|
* output:
|
||||||
utf8 - a buffer for the output sequence
|
* utf8 - a buffer for the output sequence
|
||||||
utf8_len - the size of the buffer
|
* utf8_len - the size of the buffer
|
||||||
|
*
|
||||||
the function returns how many characters have been written to the utf8,
|
* the function returns how many characters have been written to the utf8,
|
||||||
zero means the utf8 buffer is too small or 'z' is an incorrect unicode character
|
* zero means the utf8 buffer is too small or 'z' is an incorrect unicode character
|
||||||
*/
|
*/
|
||||||
size_t int_to_utf8(int z, char * utf8, size_t utf8_max_len)
|
size_t int_to_utf8(int z, char * utf8, size_t utf8_max_len)
|
||||||
{
|
{
|
||||||
char buf[10];
|
char buf[10];
|
||||||
int i = 0;
|
int i = 0;
|
||||||
int mask = 0x3f; // 6 first bits set
|
int mask = 0x3f; // 6 first bits set
|
||||||
|
|
||||||
if( utf8_max_len==0 || !utf8_check_range(z) )
|
if( utf8_max_len==0 || !is_correct_unicode_char(z) )
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
if( z <= 0x7f )
|
if( z <= 0x7f )
|
||||||
@@ -658,18 +660,18 @@ return a;
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*!
|
/*
|
||||||
this function converts one wide character into UTF-8 string
|
* convert one wide character into an UTF-8 string
|
||||||
|
*
|
||||||
input:
|
* input:
|
||||||
z - wide character
|
* z - wide character
|
||||||
|
*
|
||||||
output:
|
* output:
|
||||||
utf8 - a UTF-8 string for the output sequence (the string is not cleared)
|
* utf8 - a UTF-8 string for the output sequence (the string is not cleared)
|
||||||
|
*
|
||||||
the function returns how many characters have been written to the utf8 string,
|
* the function returns how many characters have been written to the utf8 string,
|
||||||
zero means that 'z' is an incorrect unicode character
|
* zero means that 'z' is an incorrect unicode character
|
||||||
*/
|
*/
|
||||||
size_t int_to_utf8(int z, std::string & utf8, bool clear)
|
size_t int_to_utf8(int z, std::string & utf8, bool clear)
|
||||||
{
|
{
|
||||||
char buf[10];
|
char buf[10];
|
||||||
@@ -688,21 +690,21 @@ return len;
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*!
|
/*
|
||||||
this function converts a wide string into UTF-8 string
|
* convert a wide string into an UTF-8 string
|
||||||
|
*
|
||||||
input:
|
* input:
|
||||||
wide_string - a wide string for converting
|
* wide_string - a wide string for converting
|
||||||
string_len - the size of the string
|
* string_len - the size of the string
|
||||||
mode - what to do with errors when converting
|
* mode - what to do with errors when converting
|
||||||
0: skip an invalid character
|
* 0: skip an invalid character
|
||||||
1: put U+FFFD "replacement character" istead of the invalid character (default)
|
* 1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||||
|
*
|
||||||
output:
|
* output:
|
||||||
utf8 - a UTF-8 string for the output sequence (the string is not cleared)
|
* utf8 - a UTF-8 string for the output sequence (the string is not cleared)
|
||||||
|
*
|
||||||
this function returns false if there were some errors when converting
|
* this function returns false if there were some errors when converting
|
||||||
*/
|
*/
|
||||||
bool wide_to_utf8(const wchar_t * wide_string, size_t string_len, std::string & utf8, bool clear, int mode)
|
bool wide_to_utf8(const wchar_t * wide_string, size_t string_len, std::string & utf8, bool clear, int mode)
|
||||||
{
|
{
|
||||||
bool was_error = false;
|
bool was_error = false;
|
||||||
@@ -723,20 +725,20 @@ return !was_error;
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*!
|
/*
|
||||||
this function converts a wide string into UTF-8 string
|
* convert a wide string into an UTF-8 string
|
||||||
|
*
|
||||||
input:
|
* input:
|
||||||
wide_string - a null terminated wide string for converting
|
* wide_string - a null terminated wide string for converting
|
||||||
mode - what to do with errors when converting
|
* mode - what to do with errors when converting
|
||||||
0: skip an invalid character
|
* 0: skip an invalid character
|
||||||
1: put U+FFFD "replacement character" istead of the invalid character (default)
|
* 1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||||
|
*
|
||||||
output:
|
* output:
|
||||||
utf8 - a UTF-8 string for the output sequence (the string is not cleared)
|
* utf8 - a UTF-8 string for the output sequence (the string is not cleared)
|
||||||
|
*
|
||||||
this function returns false if there were some errors when converting
|
* this function returns false if there were some errors when converting
|
||||||
*/
|
*/
|
||||||
bool wide_to_utf8(const wchar_t * wide_string, std::string & utf8, bool clear, int mode)
|
bool wide_to_utf8(const wchar_t * wide_string, std::string & utf8, bool clear, int mode)
|
||||||
{
|
{
|
||||||
bool was_error = false;
|
bool was_error = false;
|
||||||
@@ -752,20 +754,20 @@ return !was_error;
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*!
|
/*
|
||||||
this function converts a wide string (std::wstring) into UTF-8 string
|
* convert a wide string (std::wstring) into an UTF-8 string
|
||||||
|
*
|
||||||
input:
|
* input:
|
||||||
wide_string - a wide string for converting
|
* wide_string - a wide string for converting
|
||||||
mode - what to do with errors when converting
|
* mode - what to do with errors when converting
|
||||||
0: skip an invalid character
|
* 0: skip an invalid character
|
||||||
1: put U+FFFD "replacement character" istead of the invalid character (default)
|
* 1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||||
|
*
|
||||||
output:
|
* output:
|
||||||
utf8 - a UTF-8 string for the output sequence (the string is not cleared)
|
* utf8 - a UTF-8 string for the output sequence (the string is not cleared)
|
||||||
|
*
|
||||||
this function returns false if there were some errors when converting
|
* this function returns false if there were some errors when converting
|
||||||
*/
|
*/
|
||||||
bool wide_to_utf8(const std::wstring & wide_string, std::string & utf8, bool clear, int mode)
|
bool wide_to_utf8(const std::wstring & wide_string, std::string & utf8, bool clear, int mode)
|
||||||
{
|
{
|
||||||
return wide_to_utf8(wide_string.c_str(), wide_string.size(), utf8, clear, mode);
|
return wide_to_utf8(wide_string.c_str(), wide_string.size(), utf8, clear, mode);
|
||||||
@@ -775,27 +777,27 @@ bool wide_to_utf8(const std::wstring & wide_string, std::string & utf8, bool cle
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*!
|
/*
|
||||||
this function converts a wide string into UTF-8 stream
|
* convert a wide string into an UTF-8 stream
|
||||||
|
*
|
||||||
input:
|
* input:
|
||||||
wide_string - a wide string for converting
|
* wide_string - a wide string for converting
|
||||||
string_len - lenght of the wide string
|
* string_len - lenght of the wide string
|
||||||
mode - what to do with errors when converting
|
* mode - what to do with errors when converting
|
||||||
0: skip an invalid character
|
* 0: skip an invalid character
|
||||||
1: put U+FFFD "replacement character" istead of the invalid character (default)
|
* 1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||||
|
*
|
||||||
output:
|
* output:
|
||||||
utf8 - a buffer for the UTF-8 stream
|
* utf8 - a buffer for the UTF-8 stream
|
||||||
utf8_len - the size of the buffer
|
* utf8_len - the size of the buffer
|
||||||
utf8_written - how many bytes have been written to the buffer
|
* utf8_written - how many bytes have been written to the buffer
|
||||||
|
*
|
||||||
this function returns false if there were some errors when converting or the output buffer was too small,
|
* this function returns false if there were some errors when converting or the output buffer was too small,
|
||||||
the output string is not null terminated
|
* the output string is not null terminated
|
||||||
|
*
|
||||||
if there is an error when converting (there is an incorrect character in the wide string) the function
|
* if there is an error when converting (there is an incorrect character in the wide string) the function
|
||||||
will continue converting but if the buffer is too small the function breaks immediately
|
* will continue converting but if the buffer is too small the function breaks immediately
|
||||||
*/
|
*/
|
||||||
bool wide_to_utf8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len, size_t & utf8_written, int mode)
|
bool wide_to_utf8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len, size_t & utf8_written, int mode)
|
||||||
{
|
{
|
||||||
bool was_error = false;
|
bool was_error = false;
|
||||||
@@ -830,26 +832,26 @@ return !was_error;
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*!
|
/*
|
||||||
this function converts a wide string (std::wstring) into UTF-8 stream
|
* convert a wide string (std::wstring) into an UTF-8 stream
|
||||||
|
*
|
||||||
input:
|
* input:
|
||||||
wide_string - a wide string for converting
|
* wide_string - a wide string for converting
|
||||||
mode - what to do with errors when converting
|
* mode - what to do with errors when converting
|
||||||
0: skip an invalid character
|
* 0: skip an invalid character
|
||||||
1: put U+FFFD "replacement character" istead of the invalid character (default)
|
* 1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||||
|
*
|
||||||
output:
|
* output:
|
||||||
utf8 - a buffer for the UTF-8 stream
|
* utf8 - a buffer for the UTF-8 stream
|
||||||
utf8_len - the size of the buffer
|
* utf8_len - the size of the buffer
|
||||||
utf8_written - how many bytes have been written to the buffer
|
* utf8_written - how many bytes have been written to the buffer
|
||||||
|
*
|
||||||
this function returns false if there were some errors when converting or the output buffer was too small,
|
* this function returns false if there were some errors when converting or the output buffer was too small,
|
||||||
the output string is not null terminated
|
* the output string is not null terminated
|
||||||
|
*
|
||||||
if there is an error when converting (there is an incorrect character in the wide string) the function
|
* if there is an error when converting (there is an incorrect character in the wide string) the function
|
||||||
will continue converting but if the buffer is too small the function breaks immediately
|
* will continue converting but if the buffer is too small the function breaks immediately
|
||||||
*/
|
*/
|
||||||
bool wide_to_utf8(const std::wstring & wide_string, char * utf8, size_t utf8_len, size_t & utf8_written, int mode)
|
bool wide_to_utf8(const std::wstring & wide_string, char * utf8, size_t utf8_len, size_t & utf8_written, int mode)
|
||||||
{
|
{
|
||||||
return wide_to_utf8(wide_string.c_str(), wide_string.size(), utf8, utf8_len, utf8_written, mode);
|
return wide_to_utf8(wide_string.c_str(), wide_string.size(), utf8, utf8_len, utf8_written, mode);
|
||||||
@@ -857,27 +859,27 @@ bool wide_to_utf8(const std::wstring & wide_string, char * utf8, size_t utf8_len
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*!
|
/*
|
||||||
this function converts a wide string into UTF-8 stream
|
* convert a wide string into an UTF-8 stream
|
||||||
|
*
|
||||||
input:
|
* input:
|
||||||
wide_string - a wide string for converting
|
* wide_string - a wide string for converting
|
||||||
string_len - lenght of the wide string
|
* string_len - lenght of the wide string
|
||||||
mode - what to do with errors when converting
|
* mode - what to do with errors when converting
|
||||||
0: skip an invalid character
|
* 0: skip an invalid character
|
||||||
1: put U+FFFD "replacement character" istead of the invalid character (default)
|
* 1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||||
|
*
|
||||||
output:
|
* output:
|
||||||
utf8 - a buffer for the UTF-8 stream
|
* utf8 - a buffer for the UTF-8 stream
|
||||||
utf8_len - the size of the buffer
|
* utf8_len - the size of the buffer
|
||||||
|
*
|
||||||
this function returns false if there were some errors when converting or the output buffer was too small,
|
* this function returns false if there were some errors when converting or the output buffer was too small,
|
||||||
the output string is null terminated (even if there were errors during converting)
|
* the output string is null terminated (even if there were errors during converting)
|
||||||
|
*
|
||||||
if there is an error when converting (there is an incorrect character in the wide string) the function
|
* if there is an error when converting (there is an incorrect character in the wide string) the function
|
||||||
will continue converting but if the buffer is too small the function breaks immediately
|
* will continue converting but if the buffer is too small the function breaks immediately
|
||||||
(in both cases the utf8 buffer is null terminated)
|
* (in both cases the utf8 buffer is null terminated)
|
||||||
*/
|
*/
|
||||||
bool wide_to_utf8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len, int mode)
|
bool wide_to_utf8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len, int mode)
|
||||||
{
|
{
|
||||||
size_t utf8_saved;
|
size_t utf8_saved;
|
||||||
@@ -894,26 +896,26 @@ return res;
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*!
|
/*
|
||||||
this function converts a wide string (std::wstring) into UTF-8 stream
|
* convert a wide string (std::wstring) into an UTF-8 stream
|
||||||
|
*
|
||||||
input:
|
* input:
|
||||||
wide_string - a wide string for converting
|
* wide_string - a wide string for converting
|
||||||
mode - what to do with errors when converting
|
* mode - what to do with errors when converting
|
||||||
0: skip an invalid character
|
* 0: skip an invalid character
|
||||||
1: put U+FFFD "replacement character" istead of the invalid character (default)
|
* 1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||||
|
*
|
||||||
output:
|
* output:
|
||||||
utf8 - a buffer for the UTF-8 stream
|
* utf8 - a buffer for the UTF-8 stream
|
||||||
utf8_len - the size of the buffer
|
* utf8_len - the size of the buffer
|
||||||
|
*
|
||||||
this function returns false if there were some errors when converting or the output buffer was too small,
|
* this function returns false if there were some errors when converting or the output buffer was too small,
|
||||||
the output string is null terminated (even if there were errors during converting)
|
* the output string is null terminated (even if there were errors during converting)
|
||||||
|
*
|
||||||
if there is an error when converting (there is an incorrect character in the wide string) the function
|
* if there is an error when converting (there is an incorrect character in the wide string) the function
|
||||||
will continue converting but if the buffer is too small the function breaks immediately
|
* will continue converting but if the buffer is too small the function breaks immediately
|
||||||
(in both cases the utf8 buffer is null terminated)
|
* (in both cases the utf8 buffer is null terminated)
|
||||||
*/
|
*/
|
||||||
bool wide_to_utf8(const std::wstring & wide_string, char * utf8, size_t utf8_len, int mode)
|
bool wide_to_utf8(const std::wstring & wide_string, char * utf8, size_t utf8_len, int mode)
|
||||||
{
|
{
|
||||||
return wide_to_utf8(wide_string.c_str(), wide_string.size(), utf8, utf8_len, mode);
|
return wide_to_utf8(wide_string.c_str(), wide_string.size(), utf8, utf8_len, mode);
|
||||||
@@ -921,26 +923,26 @@ bool wide_to_utf8(const std::wstring & wide_string, char * utf8, size_t utf8_len
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*!
|
/*
|
||||||
this function converts a wide string into UTF-8 stream
|
* convert a wide string into an UTF-8 stream
|
||||||
|
*
|
||||||
input:
|
* input:
|
||||||
wide_string - a null terminated wide string for converting
|
* wide_string - a null terminated wide string for converting
|
||||||
mode - what to do with errors when converting
|
* mode - what to do with errors when converting
|
||||||
0: skip an invalid character
|
* 0: skip an invalid character
|
||||||
1: put U+FFFD "replacement character" istead of the invalid character (default)
|
* 1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||||
|
*
|
||||||
output:
|
* output:
|
||||||
utf8 - a buffer for the UTF-8 stream
|
* utf8 - a buffer for the UTF-8 stream
|
||||||
utf8_len - the size of the buffer
|
* utf8_len - the size of the buffer
|
||||||
utf8_written - how many bytes have been written to the buffer
|
* utf8_written - how many bytes have been written to the buffer
|
||||||
|
*
|
||||||
this function returns false if there were some errors when converting or the output buffer was too small,
|
* this function returns false if there were some errors when converting or the output buffer was too small,
|
||||||
the output string is not null terminated
|
* the output string is not null terminated
|
||||||
|
*
|
||||||
if there is an error when converting (there is an incorrect character in the wide string) the function
|
* if there is an error when converting (there is an incorrect character in the wide string) the function
|
||||||
will continue converting but if the buffer is too small the function breaks immediately
|
* will continue converting but if the buffer is too small the function breaks immediately
|
||||||
*/
|
*/
|
||||||
bool wide_to_utf8(const wchar_t * wide_string, char * utf8, size_t utf8_len, size_t & utf8_written, int mode)
|
bool wide_to_utf8(const wchar_t * wide_string, char * utf8, size_t utf8_len, size_t & utf8_written, int mode)
|
||||||
{
|
{
|
||||||
bool was_error = false;
|
bool was_error = false;
|
||||||
@@ -976,26 +978,26 @@ return !was_error;
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*!
|
/*
|
||||||
this function converts a wide string into UTF-8 stream
|
* convert a wide string into an UTF-8 stream
|
||||||
|
*
|
||||||
input:
|
* input:
|
||||||
wide_string - a wide string for converting
|
* wide_string - a wide string for converting
|
||||||
mode - what to do with errors when converting
|
* mode - what to do with errors when converting
|
||||||
0: skip an invalid character
|
* 0: skip an invalid character
|
||||||
1: put U+FFFD "replacement character" istead of the invalid character (default)
|
* 1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||||
|
*
|
||||||
output:
|
* output:
|
||||||
utf8 - a buffer for the UTF-8 stream
|
* utf8 - a buffer for the UTF-8 stream
|
||||||
utf8_len - the size of the buffer
|
* utf8_len - the size of the buffer
|
||||||
|
*
|
||||||
this function returns false if there were some errors when converting or the output buffer was too small,
|
* this function returns false if there were some errors when converting or the output buffer was too small,
|
||||||
the output string is null terminated (even if there were errors during converting)
|
* the output string is null terminated (even if there were errors during converting)
|
||||||
|
*
|
||||||
if there is an error when converting (there is an incorrect character in the wide string) the function
|
* if there is an error when converting (there is an incorrect character in the wide string) the function
|
||||||
will continue converting but if the buffer is too small the function breaks immediately
|
* will continue converting but if the buffer is too small the function breaks immediately
|
||||||
(in both cases the utf8 buffer is null terminated)
|
* (in both cases the utf8 buffer is null terminated)
|
||||||
*/
|
*/
|
||||||
bool wide_to_utf8(const wchar_t * wide_string, char * utf8, size_t utf8_len, int mode)
|
bool wide_to_utf8(const wchar_t * wide_string, char * utf8, size_t utf8_len, int mode)
|
||||||
{
|
{
|
||||||
size_t utf8_saved;
|
size_t utf8_saved;
|
||||||
@@ -1017,9 +1019,9 @@ return res;
|
|||||||
namespace private_namespace
|
namespace private_namespace
|
||||||
{
|
{
|
||||||
|
|
||||||
/*!
|
/*
|
||||||
an auxiliary function for converting from UTF-8 string
|
* an auxiliary function for converting from UTF-8 string
|
||||||
*/
|
*/
|
||||||
bool utf8_to_int_first_octet(unsigned char uz, size_t & len, int & res)
|
bool utf8_to_int_first_octet(unsigned char uz, size_t & len, int & res)
|
||||||
{
|
{
|
||||||
for(len=0 ; (uz & 0x80) != 0 ; ++len)
|
for(len=0 ; (uz & 0x80) != 0 ; ++len)
|
||||||
@@ -1041,9 +1043,9 @@ return true;
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*!
|
/*
|
||||||
an auxiliary function for converting from UTF-8 string
|
* an auxiliary function for converting from UTF-8 string
|
||||||
*/
|
*/
|
||||||
bool utf8_to_int_add_next_octet(unsigned char uz, int & res)
|
bool utf8_to_int_add_next_octet(unsigned char uz, int & res)
|
||||||
{
|
{
|
||||||
if( (uz & 0xc0) != 0x80 )
|
if( (uz & 0xc0) != 0x80 )
|
||||||
@@ -1057,20 +1059,20 @@ return true;
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*!
|
/*
|
||||||
an auxiliary function for converting from wide characters to UTF-8
|
* an auxiliary function for converting from wide characters to UTF-8
|
||||||
|
*
|
||||||
returns how many wide characters were used
|
* return how many wide characters were used
|
||||||
if string_len is greater than 0 then the return value is always greater than zero too
|
* if string_len is greater than 0 then the return value is always greater than zero too
|
||||||
|
*
|
||||||
utf8_written - how many characters were saved in the utf8 string (the string doesn't have
|
* utf8_written - how many characters were saved in the utf8 string (the string doesn't have
|
||||||
a null terminating character)
|
* a null terminating character)
|
||||||
it can be equal to zero if the utf8 buffer is too small or there was an incorrect wide character read
|
* it can be equal to zero if the utf8 buffer is too small or there was an incorrect wide character read
|
||||||
was_utf8_buf_too_small - will be true if the utf8 buffer is too small
|
* was_utf8_buf_too_small - will be true if the utf8 buffer is too small
|
||||||
if this flag is true then utf8_written is equal to zero
|
* if this flag is true then utf8_written is equal to zero
|
||||||
was_error - will be true if there is an error when converting (there was an incorrect wide character)
|
* was_error - will be true if there is an error when converting (there was an incorrect wide character)
|
||||||
(was_error will not be true if the utf8 buffer is too small)
|
* (was_error will not be true if the utf8 buffer is too small)
|
||||||
*/
|
*/
|
||||||
size_t wide_one_to_utf8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len,
|
size_t wide_one_to_utf8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len,
|
||||||
size_t & utf8_written, bool & was_utf8_buf_too_small, bool & was_error, int mode)
|
size_t & utf8_written, bool & was_utf8_buf_too_small, bool & was_error, int mode)
|
||||||
{
|
{
|
||||||
@@ -1107,12 +1109,12 @@ return chars;
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*!
|
/*
|
||||||
an auxiliary function for converting from wide characters to UTF-8
|
* an auxiliary function for converting from wide characters to UTF-8
|
||||||
|
*
|
||||||
returns how many wide characters were used
|
* return how many wide characters were used
|
||||||
if string_len is greater than 0 then the return value is always greater than zero too
|
* if string_len is greater than 0 then the return value is always greater than zero too
|
||||||
*/
|
*/
|
||||||
size_t wide_one_to_utf8(const wchar_t * wide_string, size_t string_len, std::string & utf8, bool & was_error, int mode)
|
size_t wide_one_to_utf8(const wchar_t * wide_string, size_t string_len, std::string & utf8, bool & was_error, int mode)
|
||||||
{
|
{
|
||||||
int z;
|
int z;
|
||||||
@@ -1137,12 +1139,12 @@ return chars;
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*!
|
/*
|
||||||
an auxiliary function for converting from wide characters to UTF-8
|
* an auxiliary function for converting from wide characters to UTF-8
|
||||||
|
*
|
||||||
returns how many wide characters were used
|
* return how many wide characters were used
|
||||||
if wide_string has at least one character then the return value is always greater than zero too
|
* if wide_string has at least one character then the return value is always greater than zero too
|
||||||
*/
|
*/
|
||||||
size_t wide_one_to_utf8(const wchar_t * wide_string, std::string & utf8, bool & was_error, int mode)
|
size_t wide_one_to_utf8(const wchar_t * wide_string, std::string & utf8, bool & was_error, int mode)
|
||||||
{
|
{
|
||||||
int z;
|
int z;
|
||||||
|
494
src/utf8/utf8.h
494
src/utf8/utf8.h
@@ -42,85 +42,80 @@
|
|||||||
namespace pt
|
namespace pt
|
||||||
{
|
{
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* public methods are also defined in utf8_stream.h
|
* UTF-8, a transformation format of ISO 10646
|
||||||
|
* http://tools.ietf.org/html/rfc3629
|
||||||
*
|
*
|
||||||
|
* when wchar_t is 4 bytes length we use UTF-32
|
||||||
|
* when wchar_t is 2 bytes length we use UTF-16 (with surrogate pairs)
|
||||||
|
*
|
||||||
|
* UTF-16
|
||||||
|
* http://www.ietf.org/rfc/rfc2781.txt
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
/*!
|
|
||||||
UTF-8, a transformation format of ISO 10646
|
|
||||||
http://tools.ietf.org/html/rfc3629
|
|
||||||
|
|
||||||
when wchar_t is 4 bytes length we use UTF-32
|
|
||||||
when wchar_t is 2 bytes length we use UTF-16 (with surrogate pairs)
|
|
||||||
|
|
||||||
UTF-16
|
|
||||||
http://www.ietf.org/rfc/rfc2781.txt
|
|
||||||
*/
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*!
|
|
||||||
returns true if 'c' is a correct unicode character
|
|
||||||
|
|
||||||
RENAMEME to is_correct_unicode_char
|
|
||||||
*/
|
|
||||||
bool utf8_check_range(int c);
|
|
||||||
|
|
||||||
|
|
||||||
/*!
|
|
||||||
returns true if 'c' is a correct unicode character
|
|
||||||
|
|
||||||
this method is used when reading from an utf8 string
|
|
||||||
how_many_chars - means how many characters from utf8 string were read
|
|
||||||
*/
|
|
||||||
bool utf8_check_range(int c, int how_many_bytes);
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* return true if 'c' is a correct unicode character
|
||||||
|
*/
|
||||||
|
bool is_correct_unicode_char(int c);
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* returns true if 'c' is a characters from the surrogate range
|
* return true if 'c' is a correct unicode character
|
||||||
|
*
|
||||||
|
* this method is used when reading from an utf8 string
|
||||||
|
* how_many_chars - means how many characters from utf8 string were read
|
||||||
|
*/
|
||||||
|
bool is_correct_unicode_char(int c, int how_many_bytes);
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* return true if 'c' is a character from the surrogate range
|
||||||
* (c>=0xD800 && c<=0xDFFF)
|
* (c>=0xD800 && c<=0xDFFF)
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
bool is_surrogate_char(int c);
|
bool is_surrogate_char(int c);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* returns true if 'c' is a first character from the surrogate pair
|
* return true if 'c' is a first character from the surrogate pair
|
||||||
* (c>=0xD800 && c<=0xDBFF)
|
* (c>=0xD800 && c<=0xDBFF)
|
||||||
*/
|
*/
|
||||||
bool is_first_surrogate_char(int c);
|
bool is_first_surrogate_char(int c);
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* returns true if 'c' is a second character from the surrogate pair
|
* return true if 'c' is a second character from the surrogate pair
|
||||||
* (c>=0xDC00 && c<=0xDFFF)
|
* (c>=0xDC00 && c<=0xDFFF)
|
||||||
*/
|
*/
|
||||||
bool is_second_surrogate_char(int c);
|
bool is_second_surrogate_char(int c);
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* returns a code point from two surrogate pair characters
|
* return a code point from two surrogate pair characters
|
||||||
*/
|
*/
|
||||||
bool surrogate_pair_to_int(int c1, int c2, int & z);
|
bool surrogate_pair_to_int(int c1, int c2, int & z);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* converting one character into a stream
|
* convert one character into a stream
|
||||||
* stream can be an utf8 or wide stream
|
* stream can be an utf8 or a wide stream
|
||||||
|
*
|
||||||
|
* return true if c was a correct unicode character
|
||||||
|
* and has been put the the stream
|
||||||
*/
|
*/
|
||||||
bool int_to_stream(int c, pt::Stream & stream);
|
bool int_to_stream(int c, pt::Stream & stream);
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* converting a one unicode character to an int
|
* convert one wide (or two wide) characters to an int
|
||||||
* such an unicode character can consists of one or two wide characters
|
*
|
||||||
|
* return how many wide characters were used
|
||||||
|
* if string_len is greater than 0 then the return value is always greater than zero too
|
||||||
*/
|
*/
|
||||||
size_t wide_to_int(const wchar_t * wide_string, size_t string_len, int & z, bool & correct); // may these methods make public?
|
size_t wide_to_int(const wchar_t * wide_string, size_t string_len, int & z, bool & correct);
|
||||||
size_t wide_to_int(const wchar_t * wide_string, int & z, bool & correct);
|
size_t wide_to_int(const wchar_t * wide_string, int & z, bool & correct);
|
||||||
|
|
||||||
|
|
||||||
@@ -134,9 +129,9 @@ size_t wide_to_int(const wchar_t * wide_string, int & z, bool & correct);
|
|||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*!
|
/*
|
||||||
converting one character from UTF-8 to an int
|
* convert one character from UTF-8 to an int
|
||||||
*/
|
*/
|
||||||
size_t utf8_to_int(const char * utf8, size_t utf8_len, int & res, bool & correct);
|
size_t utf8_to_int(const char * utf8, size_t utf8_len, int & res, bool & correct);
|
||||||
size_t utf8_to_int(const char * utf8, int & res, bool & correct);
|
size_t utf8_to_int(const char * utf8, int & res, bool & correct);
|
||||||
size_t utf8_to_int(const std::string & utf8, int & res, bool & correct);
|
size_t utf8_to_int(const std::string & utf8, int & res, bool & correct);
|
||||||
@@ -147,43 +142,46 @@ template<typename StreamIteratorType>
|
|||||||
size_t utf8_to_int(StreamIteratorType & iterator_in, const StreamIteratorType & iterator_end, int & res, bool & correct);
|
size_t utf8_to_int(StreamIteratorType & iterator_in, const StreamIteratorType & iterator_end, int & res, bool & correct);
|
||||||
|
|
||||||
|
|
||||||
/*!
|
/*
|
||||||
converting one character from int to wide stream
|
* convert one character from an int to a wide stream
|
||||||
|
*
|
||||||
returns true if a character was inserted to the stream
|
* return true if a character was inserted to the stream
|
||||||
*/
|
*/
|
||||||
template<typename StreamType>
|
template<typename StreamType>
|
||||||
bool int_to_wide(int c, StreamType & res);
|
bool int_to_wide(int c, StreamType & res);
|
||||||
|
|
||||||
|
|
||||||
/*!
|
/*
|
||||||
converting one character from int to wide string
|
* convert one character from an int to a wide string
|
||||||
|
*
|
||||||
this method will not terminate the output string with a null character
|
* this method will not terminate the output string with a null character
|
||||||
return how many characters have been written (0, 1 or 2)
|
* return how many characters have been written (0, 1 or 2)
|
||||||
*/
|
*/
|
||||||
size_t int_to_wide(int c, wchar_t * res, size_t max_buf_len);
|
size_t int_to_wide(int c, wchar_t * res, size_t max_buf_len);
|
||||||
|
|
||||||
|
|
||||||
/*!
|
/*
|
||||||
converting one character from int to wide string
|
* convert one character from an int to a wide string
|
||||||
|
*
|
||||||
returns true if a character was inserted to the string
|
* return true if a character was inserted to the string
|
||||||
*/
|
*/
|
||||||
bool int_to_wide(int c, std::wstring & res);
|
bool int_to_wide(int c, std::wstring & res);
|
||||||
|
|
||||||
|
|
||||||
/*!
|
/*
|
||||||
call a convert_function for each character from an utf8 string
|
* call a convert_function for each character from an utf8 string
|
||||||
|
*
|
||||||
|
* this function takes one int parameter:
|
||||||
|
* output_function(int c)
|
||||||
*/
|
*/
|
||||||
template<typename OutputFunction>
|
template<typename OutputFunction>
|
||||||
bool utf8_to_output_function(const char * utf8, size_t utf8_len, OutputFunction output_function, int mode = 1);
|
bool utf8_to_output_function(const char * utf8, size_t utf8_len, OutputFunction output_function, int mode = 1);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*!
|
/*
|
||||||
converting UTF-8 string to a wide string
|
* convert an UTF-8 string to a wide string
|
||||||
*/
|
*/
|
||||||
bool utf8_to_wide(const char * utf8, size_t utf8_len, std::wstring & res, bool clear = true, int mode = 1);
|
bool utf8_to_wide(const char * utf8, size_t utf8_len, std::wstring & res, bool clear = true, int mode = 1);
|
||||||
bool utf8_to_wide(const char * utf8, std::wstring & res, bool clear = true, int mode = 1);
|
bool utf8_to_wide(const char * utf8, std::wstring & res, bool clear = true, int mode = 1);
|
||||||
bool utf8_to_wide(const std::string & utf8, std::wstring & res, bool clear = true, int mode = 1);
|
bool utf8_to_wide(const std::string & utf8, std::wstring & res, bool clear = true, int mode = 1);
|
||||||
@@ -216,7 +214,9 @@ bool utf8_to_output_function(StreamIteratorType & iterator_in, const StreamItera
|
|||||||
template<typename CharT, size_t stack_size, size_t heap_block_size>
|
template<typename CharT, size_t stack_size, size_t heap_block_size>
|
||||||
class TextStreamBase;
|
class TextStreamBase;
|
||||||
|
|
||||||
// defined at the end in textstream.h
|
/*
|
||||||
|
* this method is defined at the end of textstream.h
|
||||||
|
*/
|
||||||
template<size_t stack_size, size_t heap_block_size, typename StreamOrStringType>
|
template<size_t stack_size, size_t heap_block_size, typename StreamOrStringType>
|
||||||
bool utf8_to_wide(const TextStreamBase<char, stack_size, heap_block_size> & utf8, StreamOrStringType & out_stream, bool clear_stream = true, int mode = 1);
|
bool utf8_to_wide(const TextStreamBase<char, stack_size, heap_block_size> & utf8, StreamOrStringType & out_stream, bool clear_stream = true, int mode = 1);
|
||||||
|
|
||||||
@@ -239,9 +239,9 @@ bool utf8_to_wide(const StreamType & stream, wchar_t * out_buffer, size_t max_bu
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
/*!
|
/*
|
||||||
converting one int character to UTF-8
|
* convert one int character to UTF-8
|
||||||
*/
|
*/
|
||||||
size_t int_to_utf8(int z, char * utf8, size_t utf8_max_len);
|
size_t int_to_utf8(int z, char * utf8, size_t utf8_max_len);
|
||||||
size_t int_to_utf8(int z, std::string & utf8, bool clear = true);
|
size_t int_to_utf8(int z, std::string & utf8, bool clear = true);
|
||||||
|
|
||||||
@@ -249,21 +249,23 @@ template<typename StreamType>
|
|||||||
size_t int_to_utf8(int z, StreamType & utf8);
|
size_t int_to_utf8(int z, StreamType & utf8);
|
||||||
|
|
||||||
|
|
||||||
/*!
|
/*
|
||||||
call an output_function for some sequence of wide characters from the stream buffer
|
* call an output_function for some sequence of wide characters from the stream buffer
|
||||||
|
*
|
||||||
output_function has two arguments: const char * buf, size_t len:
|
* output_function takes two arguments: const char * buf, size_t len:
|
||||||
output_function(const char * buf, size_t len)
|
* output_function(const char * buf, size_t len)
|
||||||
|
* this is a buffer which was filled with utf8 characters
|
||||||
StreamType should have a const_iterator and begin() and end() methods
|
* (this buffer can have up to 256 characters)
|
||||||
*/
|
*
|
||||||
|
* StreamType should have a const_iterator and begin() and end() methods
|
||||||
|
*/
|
||||||
template<typename StreamType, typename OutputFunction>
|
template<typename StreamType, typename OutputFunction>
|
||||||
bool wide_to_output_function(StreamType & buffer, OutputFunction output_function, int mode = 1);
|
bool wide_to_output_function(StreamType & buffer, OutputFunction output_function, int mode = 1);
|
||||||
|
|
||||||
|
|
||||||
/*!
|
/*
|
||||||
converting a wide string to UTF-8 string
|
* convert a wide string to an UTF-8 string
|
||||||
*/
|
*/
|
||||||
bool wide_to_utf8(const wchar_t * wide_string, size_t string_len, std::string & utf8, bool clear = true, int mode = 1);
|
bool wide_to_utf8(const wchar_t * wide_string, size_t string_len, std::string & utf8, bool clear = true, int mode = 1);
|
||||||
bool wide_to_utf8(const wchar_t * wide_string, std::string & utf8, bool clear = true, int mode = 1);
|
bool wide_to_utf8(const wchar_t * wide_string, std::string & utf8, bool clear = true, int mode = 1);
|
||||||
bool wide_to_utf8(const std::wstring & wide_string, std::string & utf8, bool clear = true, int mode = 1);
|
bool wide_to_utf8(const std::wstring & wide_string, std::string & utf8, bool clear = true, int mode = 1);
|
||||||
@@ -299,7 +301,9 @@ bool wide_stream_to_utf8(StreamType & buffer, char * utf8, std::size_t max_buffe
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* some private/auxiliary methods
|
||||||
|
*/
|
||||||
namespace private_namespace
|
namespace private_namespace
|
||||||
{
|
{
|
||||||
bool utf8_to_int_first_octet(unsigned char uz, size_t & len, int & res);
|
bool utf8_to_int_first_octet(unsigned char uz, size_t & len, int & res);
|
||||||
@@ -313,12 +317,12 @@ size_t wide_one_to_utf8(const wchar_t * wide_string, size_t string_len, std::str
|
|||||||
size_t wide_one_to_utf8(const wchar_t * wide_string, std::string & utf8, bool & was_error, int mode);
|
size_t wide_one_to_utf8(const wchar_t * wide_string, std::string & utf8, bool & was_error, int mode);
|
||||||
|
|
||||||
|
|
||||||
/*!
|
/*
|
||||||
an auxiliary function for converting from wide characters to UTF-8
|
* an auxiliary function for converting from wide characters to UTF-8
|
||||||
|
*
|
||||||
returns how many wide characters were used
|
* return how many wide characters were used
|
||||||
if string_len is greater than 0 then the return value is always greater than zero too
|
* if string_len is greater than 0 then the return value is always greater than zero too
|
||||||
*/
|
*/
|
||||||
template<typename StreamType>
|
template<typename StreamType>
|
||||||
static size_t wide_one_to_utf8(const wchar_t * wide_string, size_t string_len, StreamType & utf8, bool & was_error, int mode)
|
static size_t wide_one_to_utf8(const wchar_t * wide_string, size_t string_len, StreamType & utf8, bool & was_error, int mode)
|
||||||
{
|
{
|
||||||
@@ -339,13 +343,13 @@ size_t chars;
|
|||||||
was_error = true;
|
was_error = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
return chars;
|
return chars;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*!
|
/*
|
||||||
an auxiliary function for converting from wide characters to UTF-8
|
* an auxiliary function for converting from wide characters to UTF-8
|
||||||
*/
|
*/
|
||||||
template<typename StreamType>
|
template<typename StreamType>
|
||||||
static size_t wide_one_to_utf8(const wchar_t * wide_string, StreamType & utf8, bool & was_error, int mode)
|
static size_t wide_one_to_utf8(const wchar_t * wide_string, StreamType & utf8, bool & was_error, int mode)
|
||||||
{
|
{
|
||||||
@@ -357,7 +361,7 @@ static size_t wide_one_to_utf8(const wchar_t * wide_string, StreamType & utf8, b
|
|||||||
if( *(wide_string+1) != 0 )
|
if( *(wide_string+1) != 0 )
|
||||||
min_str_len = 2;
|
min_str_len = 2;
|
||||||
|
|
||||||
return wide_one_to_utf8(wide_string, min_str_len, utf8, was_error, mode);
|
return wide_one_to_utf8(wide_string, min_str_len, utf8, was_error, mode);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace private_namespace
|
} // namespace private_namespace
|
||||||
@@ -389,19 +393,19 @@ bool int_to_wide(int c, StreamType & res)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*!
|
/*
|
||||||
this function converts one UTF-8 character into int
|
* convert one UTF-8 character into int
|
||||||
|
*
|
||||||
input:
|
* input:
|
||||||
iterator_in - an stream iterator for reading from (the stream can by any stream, we use *, ++ and == operators only)
|
* iterator_in - an stream iterator for reading from (the stream can by any stream, we use *, ++ and == operators only)
|
||||||
iterator_end - an end iterator
|
* iterator_end - an end iterator
|
||||||
|
*
|
||||||
output:
|
* output:
|
||||||
res - an output character
|
* res - an output character
|
||||||
correct - true if it is a correct character
|
* correct - true if it is a correct character
|
||||||
|
*
|
||||||
the function returns how many characters have been used from the input stream
|
* the function returns how many characters have been used from the input stream
|
||||||
*/
|
*/
|
||||||
template<typename StreamIteratorType>
|
template<typename StreamIteratorType>
|
||||||
size_t utf8_to_int(StreamIteratorType & iterator_in, const StreamIteratorType & iterator_end, int & res, bool & correct)
|
size_t utf8_to_int(StreamIteratorType & iterator_in, const StreamIteratorType & iterator_end, int & res, bool & correct)
|
||||||
{
|
{
|
||||||
@@ -432,7 +436,7 @@ unsigned char uz;
|
|||||||
return i + 1;
|
return i + 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if( utf8_check_range(res, len) )
|
if( is_correct_unicode_char(res, len) )
|
||||||
correct = true;
|
correct = true;
|
||||||
|
|
||||||
return len;
|
return len;
|
||||||
@@ -440,11 +444,10 @@ return len;
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*!
|
/*
|
||||||
converting UTF-8 string to a TextStreamBase<wchar_t,...> stream
|
* convert UTF-8 string to a TextStreamBase<wchar_t,...> stream
|
||||||
(need to be tested)
|
* (need to be tested)
|
||||||
*/
|
*/
|
||||||
// need to be tested
|
|
||||||
template<typename StreamType>
|
template<typename StreamType>
|
||||||
bool utf8_to_wide(const char * utf8, size_t utf8_len, StreamType & res, bool clear, int mode)
|
bool utf8_to_wide(const char * utf8, size_t utf8_len, StreamType & res, bool clear, int mode)
|
||||||
{
|
{
|
||||||
@@ -642,19 +645,19 @@ bool wide_to_output_function(StreamType & buffer, OutputFunction output_function
|
|||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
this function converts a UTF-8 stream into a wide stream or a wide string
|
* convert a UTF-8 stream into a wide stream or a wide string
|
||||||
|
*
|
||||||
input:
|
* input:
|
||||||
stream - a UTF-8 stream for converting
|
* stream - a UTF-8 stream for converting
|
||||||
mode - what to do with errors when converting
|
* mode - what to do with errors when converting
|
||||||
0: skip an invalid character
|
* 0: skip an invalid character
|
||||||
1: put U+FFFD "replacement character" istead of the invalid character (default)
|
* 1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||||
|
*
|
||||||
output:
|
* output:
|
||||||
res - a wide stream or a wide string for the output sequence
|
* res - a wide stream or a wide string for the output sequence
|
||||||
|
*
|
||||||
this function returns false if there were some errors when converting
|
* this function returns false if there were some errors when converting
|
||||||
*/
|
*/
|
||||||
template<typename StreamOrStringType>
|
template<typename StreamOrStringType>
|
||||||
bool utf8_to_wide(const Stream & stream, StreamOrStringType & res, bool clear, int mode)
|
bool utf8_to_wide(const Stream & stream, StreamOrStringType & res, bool clear, int mode)
|
||||||
{
|
{
|
||||||
@@ -668,21 +671,20 @@ bool utf8_to_wide(const Stream & stream, StreamOrStringType & res, bool clear, i
|
|||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
this function reads characters from a UTF-8 stream and calls an output_function
|
* read characters from an UTF-8 stream and call an output_function
|
||||||
|
*
|
||||||
input:
|
* input:
|
||||||
stream - a UTF-8 stream for converting
|
* stream - a UTF-8 stream for converting
|
||||||
mode - what to do with errors when converting
|
* mode - what to do with errors when converting
|
||||||
0: skip an invalid character
|
* 0: skip an invalid character
|
||||||
1: put U+FFFD "replacement character" istead of the invalid character (default)
|
* 1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||||
|
*
|
||||||
output:
|
* output:
|
||||||
output_function - is a function which gets two artuments: int (character) and a reference to StreamOrStringType
|
* output_function - a function which takes one artument: an int (a character):
|
||||||
and should put the character to the output string/stream, this function should have the signature like this:
|
* output_function(int c)
|
||||||
output_function(int z, StreamOrStringType & res)
|
*
|
||||||
|
* this function returns false if there were some errors when converting
|
||||||
this function returns false if there were some errors when converting
|
*/
|
||||||
*/
|
|
||||||
template<typename OutputFunction>
|
template<typename OutputFunction>
|
||||||
bool utf8_to_output_function(const Stream & stream, OutputFunction output_function, int mode)
|
bool utf8_to_output_function(const Stream & stream, OutputFunction output_function, int mode)
|
||||||
{
|
{
|
||||||
@@ -723,18 +725,18 @@ bool utf8_to_output_function(const Stream & stream, OutputFunction output_functi
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*!
|
/*
|
||||||
this function converts UTF-8 stream into a wide stream or a wide string
|
* convert an UTF-8 stream into a wide stream or a wide string
|
||||||
|
*
|
||||||
input:
|
* input:
|
||||||
iterator_in - a stream iterator for reading from (the stream can by any stream, we use *, ++ and == operators only)
|
* iterator_in - a stream iterator for reading from (the stream can by any stream, we use *, ++ and == operators only)
|
||||||
iterator_end - an end iterator
|
* iterator_end - an end iterator
|
||||||
|
*
|
||||||
output:
|
* output:
|
||||||
out_stream - an output wide stream or wide string (the stream can by of any kind, we use only << operator for a stream and += for a string)
|
* out_stream - an output wide stream or wide string (the stream can by of any kind, we use only << operator for a stream and += for a string)
|
||||||
|
*
|
||||||
this function returns false if there were some errors when converting
|
* this function returns false if there were some errors when converting
|
||||||
*/
|
*/
|
||||||
template<typename StreamIteratorType, typename StreamOrStringType>
|
template<typename StreamIteratorType, typename StreamOrStringType>
|
||||||
bool utf8_to_wide(StreamIteratorType & iterator_in, const StreamIteratorType & iterator_end, StreamOrStringType & out_stream, bool clear_stream, int mode)
|
bool utf8_to_wide(StreamIteratorType & iterator_in, const StreamIteratorType & iterator_end, StreamOrStringType & out_stream, bool clear_stream, int mode)
|
||||||
{
|
{
|
||||||
@@ -776,20 +778,20 @@ bool utf8_to_output_function(StreamIteratorType & iterator_in, const StreamItera
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*!
|
/*
|
||||||
this function converts UTF-8 stream into a wide string
|
* convert an UTF-8 stream into a wide string
|
||||||
|
*
|
||||||
input:
|
* input:
|
||||||
iterator_in - a stream iterator for reading from (the stream can by any stream, we use *, ++ and == operators only)
|
* iterator_in - a stream iterator for reading from (the stream can by any stream, we use *, ++ and == operators only)
|
||||||
iterator_end - an end iterator
|
* iterator_end - an end iterator
|
||||||
|
*
|
||||||
output:
|
* output:
|
||||||
out_buffer - an output wide string
|
* out_buffer - an output wide string
|
||||||
max_buffer_len - how many characters can be write (we write the terminating null character too)
|
* max_buffer_len - how many characters can be write (we write the terminating null character too)
|
||||||
was_buffer_sufficient_large - a pointer to a bool value - if provided it is set to true if the buffer was sufficient large
|
* was_buffer_sufficient_large - a pointer to a bool value - if provided it is set to true if the buffer was sufficient large
|
||||||
|
*
|
||||||
this function returns false if there were some errors when converting or if the output buffer was too short
|
* this function returns false if there were some errors when converting or if the output buffer was too short
|
||||||
*/
|
*/
|
||||||
template<typename StreamIteratorType>
|
template<typename StreamIteratorType>
|
||||||
bool utf8_to_wide(StreamIteratorType & iterator_in, const StreamIteratorType & iterator_end, wchar_t * out_buffer, size_t max_buffer_len, int mode, bool * was_buffer_sufficient_large)
|
bool utf8_to_wide(StreamIteratorType & iterator_in, const StreamIteratorType & iterator_end, wchar_t * out_buffer, size_t max_buffer_len, int mode, bool * was_buffer_sufficient_large)
|
||||||
{
|
{
|
||||||
@@ -849,19 +851,19 @@ bool utf8_to_wide(StreamIteratorType & iterator_in, const StreamIteratorType & i
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*!
|
/*
|
||||||
this function converts UTF-8 stream into a wide string
|
* convert an UTF-8 stream into a wide string
|
||||||
|
*
|
||||||
input:
|
* input:
|
||||||
stream - a stream for reading from
|
* stream - a stream for reading from
|
||||||
|
*
|
||||||
output:
|
* output:
|
||||||
out_buffer - an output wide string
|
* out_buffer - an output wide string
|
||||||
max_buffer_len - how many characters can be write (we write the terminating null character too)
|
* max_buffer_len - how many characters can be write (we write the terminating null character too)
|
||||||
was_buffer_sufficient_large - a pointer to a bool value - if provided it is set to true if the buffer was sufficient large
|
* was_buffer_sufficient_large - a pointer to a bool value - if provided it is set to true if the buffer was sufficient large
|
||||||
|
*
|
||||||
this function returns false if there were some errors when converting or if the output buffer was too short
|
* this function returns false if there were some errors when converting or if the output buffer was too short
|
||||||
*/
|
*/
|
||||||
template<typename StreamType>
|
template<typename StreamType>
|
||||||
bool utf8_to_wide(const StreamType & stream, wchar_t * out_buffer, size_t max_buffer_len, bool * was_buffer_sufficient_large, int mode)
|
bool utf8_to_wide(const StreamType & stream, wchar_t * out_buffer, size_t max_buffer_len, bool * was_buffer_sufficient_large, int mode)
|
||||||
{
|
{
|
||||||
@@ -873,18 +875,18 @@ bool utf8_to_wide(const StreamType & stream, wchar_t * out_buffer, size_t max_bu
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*!
|
/*
|
||||||
this function converts one wide character into UTF-8 stream
|
* convert one wide character into an UTF-8 stream
|
||||||
|
*
|
||||||
input:
|
* input:
|
||||||
z - wide character
|
* z - wide character
|
||||||
|
*
|
||||||
output:
|
* output:
|
||||||
utf8 - a UTF-8 stream for the output sequence
|
* utf8 - a UTF-8 stream for the output sequence
|
||||||
|
*
|
||||||
the function returns how many characters have been written to the utf8 stream,
|
* the function returns how many characters have been written to the utf8 stream,
|
||||||
zero means that 'z' is an incorrect unicode character
|
* zero means that 'z' is an incorrect unicode character
|
||||||
*/
|
*/
|
||||||
template<typename StreamType>
|
template<typename StreamType>
|
||||||
size_t int_to_utf8(int z, StreamType & utf8)
|
size_t int_to_utf8(int z, StreamType & utf8)
|
||||||
{
|
{
|
||||||
@@ -902,21 +904,21 @@ size_t int_to_utf8(int z, StreamType & utf8)
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*!
|
/*
|
||||||
this function converts a wide string into UTF-8 stream
|
* convert a wide string into an UTF-8 stream
|
||||||
|
*
|
||||||
input:
|
* input:
|
||||||
wide_string - a wide string for converting
|
* wide_string - a wide string for converting
|
||||||
string_len - size of the string
|
* string_len - size of the string
|
||||||
mode - what to do with errors when converting
|
* mode - what to do with errors when converting
|
||||||
0: skip an invalid character
|
* 0: skip an invalid character
|
||||||
1: put U+FFFD "replacement character" istead of the invalid character (default)
|
* 1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||||
|
*
|
||||||
output:
|
* output:
|
||||||
utf8 - a UTF-8 stream for the output sequence
|
* utf8 - a UTF-8 stream for the output sequence
|
||||||
|
*
|
||||||
this function returns false if there were some errors when converting
|
* this function returns false if there were some errors when converting
|
||||||
*/
|
*/
|
||||||
template<typename StreamType>
|
template<typename StreamType>
|
||||||
bool wide_to_utf8(const wchar_t * wide_string, size_t string_len, StreamType & utf8, int mode)
|
bool wide_to_utf8(const wchar_t * wide_string, size_t string_len, StreamType & utf8, int mode)
|
||||||
{
|
{
|
||||||
@@ -937,20 +939,20 @@ return !was_error;
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*!
|
/*
|
||||||
this function converts a wide string into UTF-8 stream
|
* convert a wide string into an UTF-8 stream
|
||||||
|
*
|
||||||
input:
|
* input:
|
||||||
wide_string - a null terminated wide string for converting
|
* wide_string - a null terminated wide string for converting
|
||||||
mode - what to do with errors when converting
|
* mode - what to do with errors when converting
|
||||||
0: skip an invalid character
|
* 0: skip an invalid character
|
||||||
1: put U+FFFD "replacement character" istead of the invalid character (default)
|
* 1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||||
|
*
|
||||||
output:
|
* output:
|
||||||
utf8 - a UTF-8 stream for the output sequence
|
* utf8 - a UTF-8 stream for the output sequence
|
||||||
|
*
|
||||||
this function returns false if there were some errors when converting
|
* this function returns false if there were some errors when converting
|
||||||
*/
|
*/
|
||||||
template<typename StreamType>
|
template<typename StreamType>
|
||||||
bool wide_to_utf8(const wchar_t * wide_string, StreamType & utf8, int mode)
|
bool wide_to_utf8(const wchar_t * wide_string, StreamType & utf8, int mode)
|
||||||
{
|
{
|
||||||
@@ -964,20 +966,20 @@ return !was_error;
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*!
|
/*
|
||||||
this function converts a wide string (std::wstring) into UTF-8 stream
|
* convert a wide string (std::wstring) into an UTF-8 stream
|
||||||
|
*
|
||||||
input:
|
* input:
|
||||||
wide_string - a wide string for converting
|
* wide_string - a wide string for converting
|
||||||
mode - what to do with errors when converting
|
* mode - what to do with errors when converting
|
||||||
0: skip an invalid character
|
* 0: skip an invalid character
|
||||||
1: put U+FFFD "replacement character" istead of the invalid character (default)
|
* 1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||||
|
*
|
||||||
output:
|
* output:
|
||||||
utf8 - a UTF-8 stream for the output sequence
|
* utf8 - a UTF-8 stream for the output sequence
|
||||||
|
*
|
||||||
this function returns false if there were some errors when converting
|
* this function returns false if there were some errors when converting
|
||||||
*/
|
*/
|
||||||
template<typename StreamType>
|
template<typename StreamType>
|
||||||
bool wide_to_utf8(const std::wstring & wide_string, StreamType & utf8, int mode)
|
bool wide_to_utf8(const std::wstring & wide_string, StreamType & utf8, int mode)
|
||||||
{
|
{
|
||||||
@@ -1014,7 +1016,7 @@ bool wide_stream_to_utf8(const Stream & stream, StreamType & utf8, bool clear, i
|
|||||||
int c = static_cast<int>(stream.get_wchar(i));
|
int c = static_cast<int>(stream.get_wchar(i));
|
||||||
bool is_correct = false;
|
bool is_correct = false;
|
||||||
|
|
||||||
if( utf8_check_range(c) )
|
if( is_correct_unicode_char(c) )
|
||||||
{
|
{
|
||||||
// CHECKME test me when sizeof(wchar_t) == 2
|
// CHECKME test me when sizeof(wchar_t) == 2
|
||||||
if( is_first_surrogate_char(c) )
|
if( is_first_surrogate_char(c) )
|
||||||
@@ -1067,19 +1069,19 @@ bool wide_stream_to_utf8(StreamTypeIn & buffer, StreamTypeOut & utf8, bool clear
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*!
|
/*
|
||||||
this function converts a wide stream into a utf8 string
|
* convert a wide stream into an UTF-8 string
|
||||||
|
*
|
||||||
input:
|
* input:
|
||||||
buffer - a wide stream for reading from
|
* buffer - a wide stream for reading from
|
||||||
|
*
|
||||||
output:
|
* output:
|
||||||
utf8 - an output utf8 string
|
* utf8 - an output utf8 string
|
||||||
max_buffer_len - how many characters can be write (we write the terminating null character too)
|
* max_buffer_len - how many characters can be write (we write the terminating null character too)
|
||||||
was_buffer_sufficient_large - a pointer to a bool value - if provided it is set to true if the buffer was sufficient large
|
* was_buffer_sufficient_large - a pointer to a bool value - if provided it is set to true if the buffer was sufficient large
|
||||||
|
*
|
||||||
this function returns false if there were some errors when converting or if the output buffer was too short
|
* this function returns false if there were some errors when converting or if the output buffer was too short
|
||||||
*/
|
*/
|
||||||
template<typename StreamType>
|
template<typename StreamType>
|
||||||
bool wide_stream_to_utf8(StreamType & buffer, char * utf8, std::size_t max_buffer_size, bool * was_buffer_sufficient_large, int mode)
|
bool wide_stream_to_utf8(StreamType & buffer, char * utf8, std::size_t max_buffer_size, bool * was_buffer_sufficient_large, int mode)
|
||||||
{
|
{
|
||||||
|
Reference in New Issue
Block a user