update utf8 functions comments
while here: - rename pt::utf8_check_range(...) -> pt::is_correct_unicode_char(...)
This commit is contained in:
@@ -1178,7 +1178,7 @@ void SpaceParser::read_unicode_json_format(bool has_first_byte, int first_byte)
|
||||
}
|
||||
}
|
||||
|
||||
if( !ok || !pt::utf8_check_range(lastc) )
|
||||
if( !ok || !pt::is_correct_unicode_char(lastc) )
|
||||
{
|
||||
lastc = 0xFFFD; // U+FFFD "replacement character";
|
||||
}
|
||||
@@ -1207,7 +1207,7 @@ int i;
|
||||
value = (value << 4) | hex_to_int(c);
|
||||
}
|
||||
|
||||
if( i > 0 && c == '}' && pt::utf8_check_range(value) )
|
||||
if( i > 0 && c == '}' && pt::is_correct_unicode_char(value) )
|
||||
{
|
||||
lastc = static_cast<wchar_t>(value);
|
||||
}
|
||||
|
@@ -41,21 +41,21 @@ namespace pt
|
||||
{
|
||||
|
||||
/*
|
||||
* returns true if 'c' is a correct unicode character
|
||||
* return true if 'c' is a correct unicode character
|
||||
*/
|
||||
bool utf8_check_range(int c)
|
||||
bool is_correct_unicode_char(int c)
|
||||
{
|
||||
return c>=0 && c<=0x10FFFF && !(c>=0xD800 && c<=0xDFFF);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* returns true if 'c' is a correct unicode character
|
||||
* return true if 'c' is a correct unicode character
|
||||
*
|
||||
* this method is used when reading from an utf8 string
|
||||
* how_many_bytes - means how many bytes from the utf8 string were read
|
||||
*/
|
||||
bool utf8_check_range(int c, int how_many_bytes)
|
||||
bool is_correct_unicode_char(int c, int how_many_bytes)
|
||||
{
|
||||
if( c >= 0x0000 && c <= 0x007f && how_many_bytes == 1 )
|
||||
{
|
||||
@@ -124,10 +124,9 @@ bool surrogate_pair_to_int(int c1, int c2, int & z)
|
||||
|
||||
|
||||
/*
|
||||
* an auxiliary function for converting from wide characters to UTF-8
|
||||
* converting a wide character into one int
|
||||
* convert one wide (or two wide) characters to an int
|
||||
*
|
||||
* returns how many wide characters were used
|
||||
* return how many wide characters were used
|
||||
* if string_len is greater than 0 then the return value is always greater than zero too
|
||||
*/
|
||||
size_t wide_to_int(const wchar_t * wide_string, size_t string_len, int & z, bool & correct)
|
||||
@@ -167,7 +166,7 @@ size_t wide_to_int(const wchar_t * wide_string, size_t string_len, int & z, bool
|
||||
}
|
||||
else
|
||||
{
|
||||
correct = utf8_check_range(z);
|
||||
correct = is_correct_unicode_char(z);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
@@ -178,7 +177,7 @@ size_t wide_to_int(const wchar_t * wide_string, size_t string_len, int & z, bool
|
||||
* an auxiliary function for converting from wide characters to UTF-8
|
||||
* converting a wide character into one int
|
||||
|
||||
* returns how many wide characters were used
|
||||
* return how many wide characters were used
|
||||
* if wide_string has at least one character then the return value is always greater than zero too
|
||||
*/
|
||||
size_t wide_to_int(const wchar_t * wide_string, int & z, bool & correct)
|
||||
@@ -201,7 +200,7 @@ return wide_to_int(wide_string, min_str_len, z, correct);
|
||||
|
||||
|
||||
/*
|
||||
* converts an int to a wide string
|
||||
* convert an int to a wide string
|
||||
*
|
||||
* this method will not terminate the output string with a null character
|
||||
* return how many characters have been written (0, 1 or 2)
|
||||
@@ -233,9 +232,9 @@ size_t int_to_wide(int c, wchar_t * res, size_t max_buf_len)
|
||||
|
||||
|
||||
/*
|
||||
* converts an int to a wide string
|
||||
* convert an int to a wide string
|
||||
*
|
||||
* returns true if a character was inserted to the string
|
||||
* return true if a character was inserted to the string
|
||||
*/
|
||||
bool int_to_wide(int c, std::wstring & res)
|
||||
{
|
||||
@@ -258,6 +257,9 @@ bool int_to_wide(int c, std::wstring & res)
|
||||
|
||||
|
||||
/*
|
||||
* convert one character into a stream
|
||||
* stream can be an utf8 or a wide stream
|
||||
*
|
||||
* return true if c was a correct unicode character
|
||||
* and has been put the the stream
|
||||
*/
|
||||
@@ -280,7 +282,7 @@ bool int_to_stream(int c, pt::Stream & stream)
|
||||
|
||||
|
||||
/*
|
||||
* this function converts one UTF-8 character into one wide-character
|
||||
* convert one UTF-8 character into one wide-character
|
||||
*
|
||||
* input:
|
||||
* utf8 - an input UTF-8 string
|
||||
@@ -318,7 +320,7 @@ size_t i, len;
|
||||
return i;
|
||||
}
|
||||
|
||||
if( utf8_check_range(res, len) )
|
||||
if( is_correct_unicode_char(res, len) )
|
||||
correct = true;
|
||||
|
||||
return len;
|
||||
@@ -326,19 +328,19 @@ return len;
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
this function converts one UTF-8 character into one wide-character
|
||||
|
||||
input:
|
||||
utf8 - an input UTF-8 string (null terminated)
|
||||
|
||||
output:
|
||||
res - an output character
|
||||
correct - true if it is a correct character
|
||||
|
||||
the function returns how many characters have been used from the input string
|
||||
(returns zero only if the string has '\0' at the first character)
|
||||
even if there are errors the functions returns a different from zero value
|
||||
/*
|
||||
* convert one UTF-8 character into one wide-character
|
||||
*
|
||||
* input:
|
||||
* utf8 - an input UTF-8 string (null terminated)
|
||||
*
|
||||
* output:
|
||||
* res - an output character
|
||||
* correct - true if it is a correct character
|
||||
*
|
||||
* the function returns how many characters have been used from the input string
|
||||
* (returns zero only if the string has '\0' at the first character)
|
||||
* even if there are errors the functions returns a different from zero value
|
||||
*/
|
||||
size_t utf8_to_int(const char * utf8, int & res, bool & correct)
|
||||
{
|
||||
@@ -362,7 +364,7 @@ size_t i, len;
|
||||
return i;
|
||||
}
|
||||
|
||||
if( utf8_check_range(res, len) )
|
||||
if( is_correct_unicode_char(res, len) )
|
||||
correct = true;
|
||||
|
||||
return len;
|
||||
@@ -371,19 +373,19 @@ return len;
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
this function converts one UTF-8 character into one wide-character
|
||||
|
||||
input:
|
||||
utf8 - an input UTF-8 string
|
||||
|
||||
output:
|
||||
res - an output character
|
||||
correct - true if it is a correct character
|
||||
|
||||
the function returns how many characters have been used from the input string
|
||||
(returns zero only if utf8 is empty)
|
||||
even if there are errors the functions returns a different from zero value
|
||||
/*
|
||||
* convert one UTF-8 character into one wide-character
|
||||
*
|
||||
* input:
|
||||
* utf8 - an input UTF-8 string
|
||||
*
|
||||
* output:
|
||||
* res - an output character
|
||||
* correct - true if it is a correct character
|
||||
*
|
||||
* the function returns how many characters have been used from the input string
|
||||
* (returns zero only if utf8 is empty)
|
||||
* even if there are errors the functions returns a different from zero value
|
||||
*/
|
||||
size_t utf8_to_int(const std::string & utf8, int & res, bool & correct)
|
||||
{
|
||||
@@ -392,17 +394,17 @@ size_t utf8_to_int(const std::string & utf8, int & res, bool & correct)
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
this function converts one UTF-8 character into one wide-character
|
||||
|
||||
input:
|
||||
utf8 - an input UTF-8 stream
|
||||
|
||||
output:
|
||||
res - an output character
|
||||
correct - true if it is a correct character
|
||||
|
||||
the function returns how many characters have been used from the input stream
|
||||
/*
|
||||
* convert one UTF-8 character into one wide-character
|
||||
*
|
||||
* input:
|
||||
* utf8 - an input UTF-8 stream
|
||||
*
|
||||
* output:
|
||||
* res - an output character
|
||||
* correct - true if it is a correct character
|
||||
*
|
||||
* the function returns how many characters have been used from the input stream
|
||||
*/
|
||||
size_t utf8_to_int(std::istream & utf8, int & res, bool & correct)
|
||||
{
|
||||
@@ -431,7 +433,7 @@ unsigned char uz;
|
||||
return i;
|
||||
}
|
||||
|
||||
if( utf8_check_range(res, len) )
|
||||
if( is_correct_unicode_char(res, len) )
|
||||
correct = true;
|
||||
|
||||
return len;
|
||||
@@ -465,7 +467,7 @@ unsigned char uz;
|
||||
return i + 1;
|
||||
}
|
||||
|
||||
if( utf8_check_range(res, len) )
|
||||
if( is_correct_unicode_char(res, len) )
|
||||
correct = true;
|
||||
}
|
||||
else
|
||||
@@ -482,20 +484,20 @@ unsigned char uz;
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
this function converts an utf8 string into wide string (std::wstring)
|
||||
|
||||
input:
|
||||
utf8 - an input utf8 string
|
||||
utf8_len - size of the input string
|
||||
mode - what to do with errors when converting
|
||||
0: skip an invalid character
|
||||
1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||
|
||||
output:
|
||||
res - an output wide string
|
||||
|
||||
the function returns false if there were some errors when converting
|
||||
/*
|
||||
* convert an utf8 string into a wide string (std::wstring)
|
||||
*
|
||||
* input:
|
||||
* utf8 - an input utf8 string
|
||||
* utf8_len - size of the input string
|
||||
* mode - what to do with errors when converting
|
||||
* 0: skip an invalid character
|
||||
* 1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||
*
|
||||
* output:
|
||||
* res - an output wide string
|
||||
*
|
||||
* the function returns false if there were some errors when converting
|
||||
*/
|
||||
bool utf8_to_wide(const char * utf8, size_t utf8_len, std::wstring & res, bool clear, int mode)
|
||||
{
|
||||
@@ -513,19 +515,19 @@ bool utf8_to_wide(const char * utf8, size_t utf8_len, std::wstring & res, bool c
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
this function converts an utf8 string into wide string (std::wstring)
|
||||
|
||||
input:
|
||||
utf8 - an input utf8 null terminated string
|
||||
mode - what to do with errors when converting
|
||||
0: skip an invalid character
|
||||
1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||
|
||||
output:
|
||||
res - an output wide string
|
||||
|
||||
the function returns false if there were some errors when converting
|
||||
/*
|
||||
* convert an utf8 string into a wide string (std::wstring)
|
||||
*
|
||||
* input:
|
||||
* utf8 - an input utf8 null terminated string
|
||||
* mode - what to do with errors when converting
|
||||
* 0: skip an invalid character
|
||||
* 1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||
*
|
||||
* output:
|
||||
* res - an output wide string
|
||||
*
|
||||
* the function returns false if there were some errors when converting
|
||||
*/
|
||||
bool utf8_to_wide(const char * utf8, std::wstring & res, bool clear, int mode)
|
||||
{
|
||||
@@ -539,19 +541,19 @@ return utf8_to_wide(utf8, utf8_len, res, clear, mode);
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
this function converts an utf8 string into wide string (std::wstring)
|
||||
|
||||
input:
|
||||
utf8 - an input utf8 string
|
||||
mode - what to do with errors when converting
|
||||
0: skip an invalid character
|
||||
1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||
|
||||
output:
|
||||
res - an output wide string
|
||||
|
||||
the function returns false if there were some errors when converting
|
||||
/*
|
||||
* convert an utf8 string into a wide string (std::wstring)
|
||||
*
|
||||
* input:
|
||||
* utf8 - an input utf8 string
|
||||
* mode - what to do with errors when converting
|
||||
* 0: skip an invalid character
|
||||
* 1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||
*
|
||||
* output:
|
||||
* res - an output wide string
|
||||
*
|
||||
* the function returns false if there were some errors when converting
|
||||
*/
|
||||
bool utf8_to_wide(const std::string & utf8, std::wstring & res, bool clear, int mode)
|
||||
{
|
||||
@@ -560,19 +562,19 @@ bool utf8_to_wide(const std::string & utf8, std::wstring & res, bool clear, int
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
this function converts an utf8 stream into wide string (std::wstring)
|
||||
|
||||
input:
|
||||
utf8 - an input utf8 stream
|
||||
mode - what to do with errors when converting
|
||||
0: skip an invalid character
|
||||
1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||
|
||||
output:
|
||||
res - an output wide string
|
||||
|
||||
the function returns false if there were some errors when converting
|
||||
/*
|
||||
* convert an utf8 stream into a wide string (std::wstring)
|
||||
*
|
||||
* input:
|
||||
* utf8 - an input utf8 stream
|
||||
* mode - what to do with errors when converting
|
||||
* 0: skip an invalid character
|
||||
* 1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||
*
|
||||
* output:
|
||||
* res - an output wide string
|
||||
*
|
||||
* the function returns false if there were some errors when converting
|
||||
*/
|
||||
bool utf8_to_wide(std::istream & utf8, std::wstring & res, bool clear, int mode)
|
||||
{
|
||||
@@ -603,18 +605,18 @@ return !was_error;
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
this function converts one wide character into UTF-8 sequence
|
||||
|
||||
input:
|
||||
z - wide character
|
||||
|
||||
output:
|
||||
utf8 - a buffer for the output sequence
|
||||
utf8_len - the size of the buffer
|
||||
|
||||
the function returns how many characters have been written to the utf8,
|
||||
zero means the utf8 buffer is too small or 'z' is an incorrect unicode character
|
||||
/*
|
||||
* convert one wide character into an UTF-8 sequence
|
||||
*
|
||||
* input:
|
||||
* z - wide character
|
||||
*
|
||||
* output:
|
||||
* utf8 - a buffer for the output sequence
|
||||
* utf8_len - the size of the buffer
|
||||
*
|
||||
* the function returns how many characters have been written to the utf8,
|
||||
* zero means the utf8 buffer is too small or 'z' is an incorrect unicode character
|
||||
*/
|
||||
size_t int_to_utf8(int z, char * utf8, size_t utf8_max_len)
|
||||
{
|
||||
@@ -622,7 +624,7 @@ char buf[10];
|
||||
int i = 0;
|
||||
int mask = 0x3f; // 6 first bits set
|
||||
|
||||
if( utf8_max_len==0 || !utf8_check_range(z) )
|
||||
if( utf8_max_len==0 || !is_correct_unicode_char(z) )
|
||||
return 0;
|
||||
|
||||
if( z <= 0x7f )
|
||||
@@ -658,17 +660,17 @@ return a;
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
this function converts one wide character into UTF-8 string
|
||||
|
||||
input:
|
||||
z - wide character
|
||||
|
||||
output:
|
||||
utf8 - a UTF-8 string for the output sequence (the string is not cleared)
|
||||
|
||||
the function returns how many characters have been written to the utf8 string,
|
||||
zero means that 'z' is an incorrect unicode character
|
||||
/*
|
||||
* convert one wide character into an UTF-8 string
|
||||
*
|
||||
* input:
|
||||
* z - wide character
|
||||
*
|
||||
* output:
|
||||
* utf8 - a UTF-8 string for the output sequence (the string is not cleared)
|
||||
*
|
||||
* the function returns how many characters have been written to the utf8 string,
|
||||
* zero means that 'z' is an incorrect unicode character
|
||||
*/
|
||||
size_t int_to_utf8(int z, std::string & utf8, bool clear)
|
||||
{
|
||||
@@ -688,20 +690,20 @@ return len;
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
this function converts a wide string into UTF-8 string
|
||||
|
||||
input:
|
||||
wide_string - a wide string for converting
|
||||
string_len - the size of the string
|
||||
mode - what to do with errors when converting
|
||||
0: skip an invalid character
|
||||
1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||
|
||||
output:
|
||||
utf8 - a UTF-8 string for the output sequence (the string is not cleared)
|
||||
|
||||
this function returns false if there were some errors when converting
|
||||
/*
|
||||
* convert a wide string into an UTF-8 string
|
||||
*
|
||||
* input:
|
||||
* wide_string - a wide string for converting
|
||||
* string_len - the size of the string
|
||||
* mode - what to do with errors when converting
|
||||
* 0: skip an invalid character
|
||||
* 1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||
*
|
||||
* output:
|
||||
* utf8 - a UTF-8 string for the output sequence (the string is not cleared)
|
||||
*
|
||||
* this function returns false if there were some errors when converting
|
||||
*/
|
||||
bool wide_to_utf8(const wchar_t * wide_string, size_t string_len, std::string & utf8, bool clear, int mode)
|
||||
{
|
||||
@@ -723,19 +725,19 @@ return !was_error;
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
this function converts a wide string into UTF-8 string
|
||||
|
||||
input:
|
||||
wide_string - a null terminated wide string for converting
|
||||
mode - what to do with errors when converting
|
||||
0: skip an invalid character
|
||||
1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||
|
||||
output:
|
||||
utf8 - a UTF-8 string for the output sequence (the string is not cleared)
|
||||
|
||||
this function returns false if there were some errors when converting
|
||||
/*
|
||||
* convert a wide string into an UTF-8 string
|
||||
*
|
||||
* input:
|
||||
* wide_string - a null terminated wide string for converting
|
||||
* mode - what to do with errors when converting
|
||||
* 0: skip an invalid character
|
||||
* 1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||
*
|
||||
* output:
|
||||
* utf8 - a UTF-8 string for the output sequence (the string is not cleared)
|
||||
*
|
||||
* this function returns false if there were some errors when converting
|
||||
*/
|
||||
bool wide_to_utf8(const wchar_t * wide_string, std::string & utf8, bool clear, int mode)
|
||||
{
|
||||
@@ -752,19 +754,19 @@ return !was_error;
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
this function converts a wide string (std::wstring) into UTF-8 string
|
||||
|
||||
input:
|
||||
wide_string - a wide string for converting
|
||||
mode - what to do with errors when converting
|
||||
0: skip an invalid character
|
||||
1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||
|
||||
output:
|
||||
utf8 - a UTF-8 string for the output sequence (the string is not cleared)
|
||||
|
||||
this function returns false if there were some errors when converting
|
||||
/*
|
||||
* convert a wide string (std::wstring) into an UTF-8 string
|
||||
*
|
||||
* input:
|
||||
* wide_string - a wide string for converting
|
||||
* mode - what to do with errors when converting
|
||||
* 0: skip an invalid character
|
||||
* 1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||
*
|
||||
* output:
|
||||
* utf8 - a UTF-8 string for the output sequence (the string is not cleared)
|
||||
*
|
||||
* this function returns false if there were some errors when converting
|
||||
*/
|
||||
bool wide_to_utf8(const std::wstring & wide_string, std::string & utf8, bool clear, int mode)
|
||||
{
|
||||
@@ -775,26 +777,26 @@ bool wide_to_utf8(const std::wstring & wide_string, std::string & utf8, bool cle
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
this function converts a wide string into UTF-8 stream
|
||||
|
||||
input:
|
||||
wide_string - a wide string for converting
|
||||
string_len - lenght of the wide string
|
||||
mode - what to do with errors when converting
|
||||
0: skip an invalid character
|
||||
1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||
|
||||
output:
|
||||
utf8 - a buffer for the UTF-8 stream
|
||||
utf8_len - the size of the buffer
|
||||
utf8_written - how many bytes have been written to the buffer
|
||||
|
||||
this function returns false if there were some errors when converting or the output buffer was too small,
|
||||
the output string is not null terminated
|
||||
|
||||
if there is an error when converting (there is an incorrect character in the wide string) the function
|
||||
will continue converting but if the buffer is too small the function breaks immediately
|
||||
/*
|
||||
* convert a wide string into an UTF-8 stream
|
||||
*
|
||||
* input:
|
||||
* wide_string - a wide string for converting
|
||||
* string_len - lenght of the wide string
|
||||
* mode - what to do with errors when converting
|
||||
* 0: skip an invalid character
|
||||
* 1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||
*
|
||||
* output:
|
||||
* utf8 - a buffer for the UTF-8 stream
|
||||
* utf8_len - the size of the buffer
|
||||
* utf8_written - how many bytes have been written to the buffer
|
||||
*
|
||||
* this function returns false if there were some errors when converting or the output buffer was too small,
|
||||
* the output string is not null terminated
|
||||
*
|
||||
* if there is an error when converting (there is an incorrect character in the wide string) the function
|
||||
* will continue converting but if the buffer is too small the function breaks immediately
|
||||
*/
|
||||
bool wide_to_utf8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len, size_t & utf8_written, int mode)
|
||||
{
|
||||
@@ -830,25 +832,25 @@ return !was_error;
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
this function converts a wide string (std::wstring) into UTF-8 stream
|
||||
|
||||
input:
|
||||
wide_string - a wide string for converting
|
||||
mode - what to do with errors when converting
|
||||
0: skip an invalid character
|
||||
1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||
|
||||
output:
|
||||
utf8 - a buffer for the UTF-8 stream
|
||||
utf8_len - the size of the buffer
|
||||
utf8_written - how many bytes have been written to the buffer
|
||||
|
||||
this function returns false if there were some errors when converting or the output buffer was too small,
|
||||
the output string is not null terminated
|
||||
|
||||
if there is an error when converting (there is an incorrect character in the wide string) the function
|
||||
will continue converting but if the buffer is too small the function breaks immediately
|
||||
/*
|
||||
* convert a wide string (std::wstring) into an UTF-8 stream
|
||||
*
|
||||
* input:
|
||||
* wide_string - a wide string for converting
|
||||
* mode - what to do with errors when converting
|
||||
* 0: skip an invalid character
|
||||
* 1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||
*
|
||||
* output:
|
||||
* utf8 - a buffer for the UTF-8 stream
|
||||
* utf8_len - the size of the buffer
|
||||
* utf8_written - how many bytes have been written to the buffer
|
||||
*
|
||||
* this function returns false if there were some errors when converting or the output buffer was too small,
|
||||
* the output string is not null terminated
|
||||
*
|
||||
* if there is an error when converting (there is an incorrect character in the wide string) the function
|
||||
* will continue converting but if the buffer is too small the function breaks immediately
|
||||
*/
|
||||
bool wide_to_utf8(const std::wstring & wide_string, char * utf8, size_t utf8_len, size_t & utf8_written, int mode)
|
||||
{
|
||||
@@ -857,26 +859,26 @@ bool wide_to_utf8(const std::wstring & wide_string, char * utf8, size_t utf8_len
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
this function converts a wide string into UTF-8 stream
|
||||
|
||||
input:
|
||||
wide_string - a wide string for converting
|
||||
string_len - lenght of the wide string
|
||||
mode - what to do with errors when converting
|
||||
0: skip an invalid character
|
||||
1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||
|
||||
output:
|
||||
utf8 - a buffer for the UTF-8 stream
|
||||
utf8_len - the size of the buffer
|
||||
|
||||
this function returns false if there were some errors when converting or the output buffer was too small,
|
||||
the output string is null terminated (even if there were errors during converting)
|
||||
|
||||
if there is an error when converting (there is an incorrect character in the wide string) the function
|
||||
will continue converting but if the buffer is too small the function breaks immediately
|
||||
(in both cases the utf8 buffer is null terminated)
|
||||
/*
|
||||
* convert a wide string into an UTF-8 stream
|
||||
*
|
||||
* input:
|
||||
* wide_string - a wide string for converting
|
||||
* string_len - lenght of the wide string
|
||||
* mode - what to do with errors when converting
|
||||
* 0: skip an invalid character
|
||||
* 1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||
*
|
||||
* output:
|
||||
* utf8 - a buffer for the UTF-8 stream
|
||||
* utf8_len - the size of the buffer
|
||||
*
|
||||
* this function returns false if there were some errors when converting or the output buffer was too small,
|
||||
* the output string is null terminated (even if there were errors during converting)
|
||||
*
|
||||
* if there is an error when converting (there is an incorrect character in the wide string) the function
|
||||
* will continue converting but if the buffer is too small the function breaks immediately
|
||||
* (in both cases the utf8 buffer is null terminated)
|
||||
*/
|
||||
bool wide_to_utf8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len, int mode)
|
||||
{
|
||||
@@ -894,25 +896,25 @@ return res;
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
this function converts a wide string (std::wstring) into UTF-8 stream
|
||||
|
||||
input:
|
||||
wide_string - a wide string for converting
|
||||
mode - what to do with errors when converting
|
||||
0: skip an invalid character
|
||||
1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||
|
||||
output:
|
||||
utf8 - a buffer for the UTF-8 stream
|
||||
utf8_len - the size of the buffer
|
||||
|
||||
this function returns false if there were some errors when converting or the output buffer was too small,
|
||||
the output string is null terminated (even if there were errors during converting)
|
||||
|
||||
if there is an error when converting (there is an incorrect character in the wide string) the function
|
||||
will continue converting but if the buffer is too small the function breaks immediately
|
||||
(in both cases the utf8 buffer is null terminated)
|
||||
/*
|
||||
* convert a wide string (std::wstring) into an UTF-8 stream
|
||||
*
|
||||
* input:
|
||||
* wide_string - a wide string for converting
|
||||
* mode - what to do with errors when converting
|
||||
* 0: skip an invalid character
|
||||
* 1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||
*
|
||||
* output:
|
||||
* utf8 - a buffer for the UTF-8 stream
|
||||
* utf8_len - the size of the buffer
|
||||
*
|
||||
* this function returns false if there were some errors when converting or the output buffer was too small,
|
||||
* the output string is null terminated (even if there were errors during converting)
|
||||
*
|
||||
* if there is an error when converting (there is an incorrect character in the wide string) the function
|
||||
* will continue converting but if the buffer is too small the function breaks immediately
|
||||
* (in both cases the utf8 buffer is null terminated)
|
||||
*/
|
||||
bool wide_to_utf8(const std::wstring & wide_string, char * utf8, size_t utf8_len, int mode)
|
||||
{
|
||||
@@ -921,25 +923,25 @@ bool wide_to_utf8(const std::wstring & wide_string, char * utf8, size_t utf8_len
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
this function converts a wide string into UTF-8 stream
|
||||
|
||||
input:
|
||||
wide_string - a null terminated wide string for converting
|
||||
mode - what to do with errors when converting
|
||||
0: skip an invalid character
|
||||
1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||
|
||||
output:
|
||||
utf8 - a buffer for the UTF-8 stream
|
||||
utf8_len - the size of the buffer
|
||||
utf8_written - how many bytes have been written to the buffer
|
||||
|
||||
this function returns false if there were some errors when converting or the output buffer was too small,
|
||||
the output string is not null terminated
|
||||
|
||||
if there is an error when converting (there is an incorrect character in the wide string) the function
|
||||
will continue converting but if the buffer is too small the function breaks immediately
|
||||
/*
|
||||
* convert a wide string into an UTF-8 stream
|
||||
*
|
||||
* input:
|
||||
* wide_string - a null terminated wide string for converting
|
||||
* mode - what to do with errors when converting
|
||||
* 0: skip an invalid character
|
||||
* 1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||
*
|
||||
* output:
|
||||
* utf8 - a buffer for the UTF-8 stream
|
||||
* utf8_len - the size of the buffer
|
||||
* utf8_written - how many bytes have been written to the buffer
|
||||
*
|
||||
* this function returns false if there were some errors when converting or the output buffer was too small,
|
||||
* the output string is not null terminated
|
||||
*
|
||||
* if there is an error when converting (there is an incorrect character in the wide string) the function
|
||||
* will continue converting but if the buffer is too small the function breaks immediately
|
||||
*/
|
||||
bool wide_to_utf8(const wchar_t * wide_string, char * utf8, size_t utf8_len, size_t & utf8_written, int mode)
|
||||
{
|
||||
@@ -976,25 +978,25 @@ return !was_error;
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
this function converts a wide string into UTF-8 stream
|
||||
|
||||
input:
|
||||
wide_string - a wide string for converting
|
||||
mode - what to do with errors when converting
|
||||
0: skip an invalid character
|
||||
1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||
|
||||
output:
|
||||
utf8 - a buffer for the UTF-8 stream
|
||||
utf8_len - the size of the buffer
|
||||
|
||||
this function returns false if there were some errors when converting or the output buffer was too small,
|
||||
the output string is null terminated (even if there were errors during converting)
|
||||
|
||||
if there is an error when converting (there is an incorrect character in the wide string) the function
|
||||
will continue converting but if the buffer is too small the function breaks immediately
|
||||
(in both cases the utf8 buffer is null terminated)
|
||||
/*
|
||||
* convert a wide string into an UTF-8 stream
|
||||
*
|
||||
* input:
|
||||
* wide_string - a wide string for converting
|
||||
* mode - what to do with errors when converting
|
||||
* 0: skip an invalid character
|
||||
* 1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||
*
|
||||
* output:
|
||||
* utf8 - a buffer for the UTF-8 stream
|
||||
* utf8_len - the size of the buffer
|
||||
*
|
||||
* this function returns false if there were some errors when converting or the output buffer was too small,
|
||||
* the output string is null terminated (even if there were errors during converting)
|
||||
*
|
||||
* if there is an error when converting (there is an incorrect character in the wide string) the function
|
||||
* will continue converting but if the buffer is too small the function breaks immediately
|
||||
* (in both cases the utf8 buffer is null terminated)
|
||||
*/
|
||||
bool wide_to_utf8(const wchar_t * wide_string, char * utf8, size_t utf8_len, int mode)
|
||||
{
|
||||
@@ -1017,8 +1019,8 @@ return res;
|
||||
namespace private_namespace
|
||||
{
|
||||
|
||||
/*!
|
||||
an auxiliary function for converting from UTF-8 string
|
||||
/*
|
||||
* an auxiliary function for converting from UTF-8 string
|
||||
*/
|
||||
bool utf8_to_int_first_octet(unsigned char uz, size_t & len, int & res)
|
||||
{
|
||||
@@ -1041,8 +1043,8 @@ return true;
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
an auxiliary function for converting from UTF-8 string
|
||||
/*
|
||||
* an auxiliary function for converting from UTF-8 string
|
||||
*/
|
||||
bool utf8_to_int_add_next_octet(unsigned char uz, int & res)
|
||||
{
|
||||
@@ -1057,19 +1059,19 @@ return true;
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
an auxiliary function for converting from wide characters to UTF-8
|
||||
|
||||
returns how many wide characters were used
|
||||
if string_len is greater than 0 then the return value is always greater than zero too
|
||||
|
||||
utf8_written - how many characters were saved in the utf8 string (the string doesn't have
|
||||
a null terminating character)
|
||||
it can be equal to zero if the utf8 buffer is too small or there was an incorrect wide character read
|
||||
was_utf8_buf_too_small - will be true if the utf8 buffer is too small
|
||||
if this flag is true then utf8_written is equal to zero
|
||||
was_error - will be true if there is an error when converting (there was an incorrect wide character)
|
||||
(was_error will not be true if the utf8 buffer is too small)
|
||||
/*
|
||||
* an auxiliary function for converting from wide characters to UTF-8
|
||||
*
|
||||
* return how many wide characters were used
|
||||
* if string_len is greater than 0 then the return value is always greater than zero too
|
||||
*
|
||||
* utf8_written - how many characters were saved in the utf8 string (the string doesn't have
|
||||
* a null terminating character)
|
||||
* it can be equal to zero if the utf8 buffer is too small or there was an incorrect wide character read
|
||||
* was_utf8_buf_too_small - will be true if the utf8 buffer is too small
|
||||
* if this flag is true then utf8_written is equal to zero
|
||||
* was_error - will be true if there is an error when converting (there was an incorrect wide character)
|
||||
* (was_error will not be true if the utf8 buffer is too small)
|
||||
*/
|
||||
size_t wide_one_to_utf8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len,
|
||||
size_t & utf8_written, bool & was_utf8_buf_too_small, bool & was_error, int mode)
|
||||
@@ -1107,11 +1109,11 @@ return chars;
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
an auxiliary function for converting from wide characters to UTF-8
|
||||
|
||||
returns how many wide characters were used
|
||||
if string_len is greater than 0 then the return value is always greater than zero too
|
||||
/*
|
||||
* an auxiliary function for converting from wide characters to UTF-8
|
||||
*
|
||||
* return how many wide characters were used
|
||||
* if string_len is greater than 0 then the return value is always greater than zero too
|
||||
*/
|
||||
size_t wide_one_to_utf8(const wchar_t * wide_string, size_t string_len, std::string & utf8, bool & was_error, int mode)
|
||||
{
|
||||
@@ -1137,11 +1139,11 @@ return chars;
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
an auxiliary function for converting from wide characters to UTF-8
|
||||
|
||||
returns how many wide characters were used
|
||||
if wide_string has at least one character then the return value is always greater than zero too
|
||||
/*
|
||||
* an auxiliary function for converting from wide characters to UTF-8
|
||||
*
|
||||
* return how many wide characters were used
|
||||
* if wide_string has at least one character then the return value is always greater than zero too
|
||||
*/
|
||||
size_t wide_one_to_utf8(const wchar_t * wide_string, std::string & utf8, bool & was_error, int mode)
|
||||
{
|
||||
|
450
src/utf8/utf8.h
450
src/utf8/utf8.h
@@ -42,85 +42,80 @@
|
||||
namespace pt
|
||||
{
|
||||
|
||||
|
||||
/*
|
||||
* public methods are also defined in utf8_stream.h
|
||||
* UTF-8, a transformation format of ISO 10646
|
||||
* http://tools.ietf.org/html/rfc3629
|
||||
*
|
||||
* when wchar_t is 4 bytes length we use UTF-32
|
||||
* when wchar_t is 2 bytes length we use UTF-16 (with surrogate pairs)
|
||||
*
|
||||
* UTF-16
|
||||
* http://www.ietf.org/rfc/rfc2781.txt
|
||||
*/
|
||||
|
||||
|
||||
/*!
|
||||
UTF-8, a transformation format of ISO 10646
|
||||
http://tools.ietf.org/html/rfc3629
|
||||
|
||||
when wchar_t is 4 bytes length we use UTF-32
|
||||
when wchar_t is 2 bytes length we use UTF-16 (with surrogate pairs)
|
||||
|
||||
UTF-16
|
||||
http://www.ietf.org/rfc/rfc2781.txt
|
||||
*/
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
returns true if 'c' is a correct unicode character
|
||||
|
||||
RENAMEME to is_correct_unicode_char
|
||||
*/
|
||||
bool utf8_check_range(int c);
|
||||
|
||||
|
||||
/*!
|
||||
returns true if 'c' is a correct unicode character
|
||||
|
||||
this method is used when reading from an utf8 string
|
||||
how_many_chars - means how many characters from utf8 string were read
|
||||
*/
|
||||
bool utf8_check_range(int c, int how_many_bytes);
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* returns true if 'c' is a characters from the surrogate range
|
||||
* return true if 'c' is a correct unicode character
|
||||
*/
|
||||
bool is_correct_unicode_char(int c);
|
||||
|
||||
|
||||
/*
|
||||
* return true if 'c' is a correct unicode character
|
||||
*
|
||||
* this method is used when reading from an utf8 string
|
||||
* how_many_chars - means how many characters from utf8 string were read
|
||||
*/
|
||||
bool is_correct_unicode_char(int c, int how_many_bytes);
|
||||
|
||||
|
||||
/*
|
||||
* return true if 'c' is a character from the surrogate range
|
||||
* (c>=0xD800 && c<=0xDFFF)
|
||||
*
|
||||
*/
|
||||
bool is_surrogate_char(int c);
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* returns true if 'c' is a first character from the surrogate pair
|
||||
* return true if 'c' is a first character from the surrogate pair
|
||||
* (c>=0xD800 && c<=0xDBFF)
|
||||
*/
|
||||
bool is_first_surrogate_char(int c);
|
||||
|
||||
|
||||
/*
|
||||
* returns true if 'c' is a second character from the surrogate pair
|
||||
* return true if 'c' is a second character from the surrogate pair
|
||||
* (c>=0xDC00 && c<=0xDFFF)
|
||||
*/
|
||||
bool is_second_surrogate_char(int c);
|
||||
|
||||
|
||||
/*
|
||||
* returns a code point from two surrogate pair characters
|
||||
* return a code point from two surrogate pair characters
|
||||
*/
|
||||
bool surrogate_pair_to_int(int c1, int c2, int & z);
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* converting one character into a stream
|
||||
* stream can be an utf8 or wide stream
|
||||
* convert one character into a stream
|
||||
* stream can be an utf8 or a wide stream
|
||||
*
|
||||
* return true if c was a correct unicode character
|
||||
* and has been put the the stream
|
||||
*/
|
||||
bool int_to_stream(int c, pt::Stream & stream);
|
||||
|
||||
|
||||
/*
|
||||
* converting a one unicode character to an int
|
||||
* such an unicode character can consists of one or two wide characters
|
||||
* convert one wide (or two wide) characters to an int
|
||||
*
|
||||
* return how many wide characters were used
|
||||
* if string_len is greater than 0 then the return value is always greater than zero too
|
||||
*/
|
||||
size_t wide_to_int(const wchar_t * wide_string, size_t string_len, int & z, bool & correct); // may these methods make public?
|
||||
size_t wide_to_int(const wchar_t * wide_string, size_t string_len, int & z, bool & correct);
|
||||
size_t wide_to_int(const wchar_t * wide_string, int & z, bool & correct);
|
||||
|
||||
|
||||
@@ -134,8 +129,8 @@ size_t wide_to_int(const wchar_t * wide_string, int & z, bool & correct);
|
||||
*
|
||||
*/
|
||||
|
||||
/*!
|
||||
converting one character from UTF-8 to an int
|
||||
/*
|
||||
* convert one character from UTF-8 to an int
|
||||
*/
|
||||
size_t utf8_to_int(const char * utf8, size_t utf8_len, int & res, bool & correct);
|
||||
size_t utf8_to_int(const char * utf8, int & res, bool & correct);
|
||||
@@ -147,42 +142,45 @@ template<typename StreamIteratorType>
|
||||
size_t utf8_to_int(StreamIteratorType & iterator_in, const StreamIteratorType & iterator_end, int & res, bool & correct);
|
||||
|
||||
|
||||
/*!
|
||||
converting one character from int to wide stream
|
||||
|
||||
returns true if a character was inserted to the stream
|
||||
/*
|
||||
* convert one character from an int to a wide stream
|
||||
*
|
||||
* return true if a character was inserted to the stream
|
||||
*/
|
||||
template<typename StreamType>
|
||||
bool int_to_wide(int c, StreamType & res);
|
||||
|
||||
|
||||
/*!
|
||||
converting one character from int to wide string
|
||||
|
||||
this method will not terminate the output string with a null character
|
||||
return how many characters have been written (0, 1 or 2)
|
||||
/*
|
||||
* convert one character from an int to a wide string
|
||||
*
|
||||
* this method will not terminate the output string with a null character
|
||||
* return how many characters have been written (0, 1 or 2)
|
||||
*/
|
||||
size_t int_to_wide(int c, wchar_t * res, size_t max_buf_len);
|
||||
|
||||
|
||||
/*!
|
||||
converting one character from int to wide string
|
||||
|
||||
returns true if a character was inserted to the string
|
||||
/*
|
||||
* convert one character from an int to a wide string
|
||||
*
|
||||
* return true if a character was inserted to the string
|
||||
*/
|
||||
bool int_to_wide(int c, std::wstring & res);
|
||||
|
||||
|
||||
/*!
|
||||
call a convert_function for each character from an utf8 string
|
||||
/*
|
||||
* call a convert_function for each character from an utf8 string
|
||||
*
|
||||
* this function takes one int parameter:
|
||||
* output_function(int c)
|
||||
*/
|
||||
template<typename OutputFunction>
|
||||
bool utf8_to_output_function(const char * utf8, size_t utf8_len, OutputFunction output_function, int mode = 1);
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
converting UTF-8 string to a wide string
|
||||
/*
|
||||
* convert an UTF-8 string to a wide string
|
||||
*/
|
||||
bool utf8_to_wide(const char * utf8, size_t utf8_len, std::wstring & res, bool clear = true, int mode = 1);
|
||||
bool utf8_to_wide(const char * utf8, std::wstring & res, bool clear = true, int mode = 1);
|
||||
@@ -216,7 +214,9 @@ bool utf8_to_output_function(StreamIteratorType & iterator_in, const StreamItera
|
||||
template<typename CharT, size_t stack_size, size_t heap_block_size>
|
||||
class TextStreamBase;
|
||||
|
||||
// defined at the end in textstream.h
|
||||
/*
|
||||
* this method is defined at the end of textstream.h
|
||||
*/
|
||||
template<size_t stack_size, size_t heap_block_size, typename StreamOrStringType>
|
||||
bool utf8_to_wide(const TextStreamBase<char, stack_size, heap_block_size> & utf8, StreamOrStringType & out_stream, bool clear_stream = true, int mode = 1);
|
||||
|
||||
@@ -239,8 +239,8 @@ bool utf8_to_wide(const StreamType & stream, wchar_t * out_buffer, size_t max_bu
|
||||
*/
|
||||
|
||||
|
||||
/*!
|
||||
converting one int character to UTF-8
|
||||
/*
|
||||
* convert one int character to UTF-8
|
||||
*/
|
||||
size_t int_to_utf8(int z, char * utf8, size_t utf8_max_len);
|
||||
size_t int_to_utf8(int z, std::string & utf8, bool clear = true);
|
||||
@@ -249,20 +249,22 @@ template<typename StreamType>
|
||||
size_t int_to_utf8(int z, StreamType & utf8);
|
||||
|
||||
|
||||
/*!
|
||||
call an output_function for some sequence of wide characters from the stream buffer
|
||||
|
||||
output_function has two arguments: const char * buf, size_t len:
|
||||
output_function(const char * buf, size_t len)
|
||||
|
||||
StreamType should have a const_iterator and begin() and end() methods
|
||||
/*
|
||||
* call an output_function for some sequence of wide characters from the stream buffer
|
||||
*
|
||||
* output_function takes two arguments: const char * buf, size_t len:
|
||||
* output_function(const char * buf, size_t len)
|
||||
* this is a buffer which was filled with utf8 characters
|
||||
* (this buffer can have up to 256 characters)
|
||||
*
|
||||
* StreamType should have a const_iterator and begin() and end() methods
|
||||
*/
|
||||
template<typename StreamType, typename OutputFunction>
|
||||
bool wide_to_output_function(StreamType & buffer, OutputFunction output_function, int mode = 1);
|
||||
|
||||
|
||||
/*!
|
||||
converting a wide string to UTF-8 string
|
||||
/*
|
||||
* convert a wide string to an UTF-8 string
|
||||
*/
|
||||
bool wide_to_utf8(const wchar_t * wide_string, size_t string_len, std::string & utf8, bool clear = true, int mode = 1);
|
||||
bool wide_to_utf8(const wchar_t * wide_string, std::string & utf8, bool clear = true, int mode = 1);
|
||||
@@ -299,7 +301,9 @@ bool wide_stream_to_utf8(StreamType & buffer, char * utf8, std::size_t max_buffe
|
||||
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* some private/auxiliary methods
|
||||
*/
|
||||
namespace private_namespace
|
||||
{
|
||||
bool utf8_to_int_first_octet(unsigned char uz, size_t & len, int & res);
|
||||
@@ -313,11 +317,11 @@ size_t wide_one_to_utf8(const wchar_t * wide_string, size_t string_len, std::str
|
||||
size_t wide_one_to_utf8(const wchar_t * wide_string, std::string & utf8, bool & was_error, int mode);
|
||||
|
||||
|
||||
/*!
|
||||
an auxiliary function for converting from wide characters to UTF-8
|
||||
|
||||
returns how many wide characters were used
|
||||
if string_len is greater than 0 then the return value is always greater than zero too
|
||||
/*
|
||||
* an auxiliary function for converting from wide characters to UTF-8
|
||||
*
|
||||
* return how many wide characters were used
|
||||
* if string_len is greater than 0 then the return value is always greater than zero too
|
||||
*/
|
||||
template<typename StreamType>
|
||||
static size_t wide_one_to_utf8(const wchar_t * wide_string, size_t string_len, StreamType & utf8, bool & was_error, int mode)
|
||||
@@ -343,8 +347,8 @@ return chars;
|
||||
}
|
||||
|
||||
|
||||
/*!
|
||||
an auxiliary function for converting from wide characters to UTF-8
|
||||
/*
|
||||
* an auxiliary function for converting from wide characters to UTF-8
|
||||
*/
|
||||
template<typename StreamType>
|
||||
static size_t wide_one_to_utf8(const wchar_t * wide_string, StreamType & utf8, bool & was_error, int mode)
|
||||
@@ -389,18 +393,18 @@ bool int_to_wide(int c, StreamType & res)
|
||||
}
|
||||
|
||||
|
||||
/*!
|
||||
this function converts one UTF-8 character into int
|
||||
|
||||
input:
|
||||
iterator_in - an stream iterator for reading from (the stream can by any stream, we use *, ++ and == operators only)
|
||||
iterator_end - an end iterator
|
||||
|
||||
output:
|
||||
res - an output character
|
||||
correct - true if it is a correct character
|
||||
|
||||
the function returns how many characters have been used from the input stream
|
||||
/*
|
||||
* convert one UTF-8 character into int
|
||||
*
|
||||
* input:
|
||||
* iterator_in - an stream iterator for reading from (the stream can by any stream, we use *, ++ and == operators only)
|
||||
* iterator_end - an end iterator
|
||||
*
|
||||
* output:
|
||||
* res - an output character
|
||||
* correct - true if it is a correct character
|
||||
*
|
||||
* the function returns how many characters have been used from the input stream
|
||||
*/
|
||||
template<typename StreamIteratorType>
|
||||
size_t utf8_to_int(StreamIteratorType & iterator_in, const StreamIteratorType & iterator_end, int & res, bool & correct)
|
||||
@@ -432,7 +436,7 @@ unsigned char uz;
|
||||
return i + 1;
|
||||
}
|
||||
|
||||
if( utf8_check_range(res, len) )
|
||||
if( is_correct_unicode_char(res, len) )
|
||||
correct = true;
|
||||
|
||||
return len;
|
||||
@@ -440,11 +444,10 @@ return len;
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
converting UTF-8 string to a TextStreamBase<wchar_t,...> stream
|
||||
(need to be tested)
|
||||
/*
|
||||
* convert UTF-8 string to a TextStreamBase<wchar_t,...> stream
|
||||
* (need to be tested)
|
||||
*/
|
||||
// need to be tested
|
||||
template<typename StreamType>
|
||||
bool utf8_to_wide(const char * utf8, size_t utf8_len, StreamType & res, bool clear, int mode)
|
||||
{
|
||||
@@ -642,18 +645,18 @@ bool wide_to_output_function(StreamType & buffer, OutputFunction output_function
|
||||
|
||||
|
||||
/*
|
||||
this function converts a UTF-8 stream into a wide stream or a wide string
|
||||
|
||||
input:
|
||||
stream - a UTF-8 stream for converting
|
||||
mode - what to do with errors when converting
|
||||
0: skip an invalid character
|
||||
1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||
|
||||
output:
|
||||
res - a wide stream or a wide string for the output sequence
|
||||
|
||||
this function returns false if there were some errors when converting
|
||||
* convert a UTF-8 stream into a wide stream or a wide string
|
||||
*
|
||||
* input:
|
||||
* stream - a UTF-8 stream for converting
|
||||
* mode - what to do with errors when converting
|
||||
* 0: skip an invalid character
|
||||
* 1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||
*
|
||||
* output:
|
||||
* res - a wide stream or a wide string for the output sequence
|
||||
*
|
||||
* this function returns false if there were some errors when converting
|
||||
*/
|
||||
template<typename StreamOrStringType>
|
||||
bool utf8_to_wide(const Stream & stream, StreamOrStringType & res, bool clear, int mode)
|
||||
@@ -668,20 +671,19 @@ bool utf8_to_wide(const Stream & stream, StreamOrStringType & res, bool clear, i
|
||||
|
||||
|
||||
/*
|
||||
this function reads characters from a UTF-8 stream and calls an output_function
|
||||
|
||||
input:
|
||||
stream - a UTF-8 stream for converting
|
||||
mode - what to do with errors when converting
|
||||
0: skip an invalid character
|
||||
1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||
|
||||
output:
|
||||
output_function - is a function which gets two artuments: int (character) and a reference to StreamOrStringType
|
||||
and should put the character to the output string/stream, this function should have the signature like this:
|
||||
output_function(int z, StreamOrStringType & res)
|
||||
|
||||
this function returns false if there were some errors when converting
|
||||
* read characters from an UTF-8 stream and call an output_function
|
||||
*
|
||||
* input:
|
||||
* stream - a UTF-8 stream for converting
|
||||
* mode - what to do with errors when converting
|
||||
* 0: skip an invalid character
|
||||
* 1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||
*
|
||||
* output:
|
||||
* output_function - a function which takes one artument: an int (a character):
|
||||
* output_function(int c)
|
||||
*
|
||||
* this function returns false if there were some errors when converting
|
||||
*/
|
||||
template<typename OutputFunction>
|
||||
bool utf8_to_output_function(const Stream & stream, OutputFunction output_function, int mode)
|
||||
@@ -723,17 +725,17 @@ bool utf8_to_output_function(const Stream & stream, OutputFunction output_functi
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
this function converts UTF-8 stream into a wide stream or a wide string
|
||||
|
||||
input:
|
||||
iterator_in - a stream iterator for reading from (the stream can by any stream, we use *, ++ and == operators only)
|
||||
iterator_end - an end iterator
|
||||
|
||||
output:
|
||||
out_stream - an output wide stream or wide string (the stream can by of any kind, we use only << operator for a stream and += for a string)
|
||||
|
||||
this function returns false if there were some errors when converting
|
||||
/*
|
||||
* convert an UTF-8 stream into a wide stream or a wide string
|
||||
*
|
||||
* input:
|
||||
* iterator_in - a stream iterator for reading from (the stream can by any stream, we use *, ++ and == operators only)
|
||||
* iterator_end - an end iterator
|
||||
*
|
||||
* output:
|
||||
* out_stream - an output wide stream or wide string (the stream can by of any kind, we use only << operator for a stream and += for a string)
|
||||
*
|
||||
* this function returns false if there were some errors when converting
|
||||
*/
|
||||
template<typename StreamIteratorType, typename StreamOrStringType>
|
||||
bool utf8_to_wide(StreamIteratorType & iterator_in, const StreamIteratorType & iterator_end, StreamOrStringType & out_stream, bool clear_stream, int mode)
|
||||
@@ -776,19 +778,19 @@ bool utf8_to_output_function(StreamIteratorType & iterator_in, const StreamItera
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
this function converts UTF-8 stream into a wide string
|
||||
|
||||
input:
|
||||
iterator_in - a stream iterator for reading from (the stream can by any stream, we use *, ++ and == operators only)
|
||||
iterator_end - an end iterator
|
||||
|
||||
output:
|
||||
out_buffer - an output wide string
|
||||
max_buffer_len - how many characters can be write (we write the terminating null character too)
|
||||
was_buffer_sufficient_large - a pointer to a bool value - if provided it is set to true if the buffer was sufficient large
|
||||
|
||||
this function returns false if there were some errors when converting or if the output buffer was too short
|
||||
/*
|
||||
* convert an UTF-8 stream into a wide string
|
||||
*
|
||||
* input:
|
||||
* iterator_in - a stream iterator for reading from (the stream can by any stream, we use *, ++ and == operators only)
|
||||
* iterator_end - an end iterator
|
||||
*
|
||||
* output:
|
||||
* out_buffer - an output wide string
|
||||
* max_buffer_len - how many characters can be write (we write the terminating null character too)
|
||||
* was_buffer_sufficient_large - a pointer to a bool value - if provided it is set to true if the buffer was sufficient large
|
||||
*
|
||||
* this function returns false if there were some errors when converting or if the output buffer was too short
|
||||
*/
|
||||
template<typename StreamIteratorType>
|
||||
bool utf8_to_wide(StreamIteratorType & iterator_in, const StreamIteratorType & iterator_end, wchar_t * out_buffer, size_t max_buffer_len, int mode, bool * was_buffer_sufficient_large)
|
||||
@@ -849,18 +851,18 @@ bool utf8_to_wide(StreamIteratorType & iterator_in, const StreamIteratorType & i
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
this function converts UTF-8 stream into a wide string
|
||||
|
||||
input:
|
||||
stream - a stream for reading from
|
||||
|
||||
output:
|
||||
out_buffer - an output wide string
|
||||
max_buffer_len - how many characters can be write (we write the terminating null character too)
|
||||
was_buffer_sufficient_large - a pointer to a bool value - if provided it is set to true if the buffer was sufficient large
|
||||
|
||||
this function returns false if there were some errors when converting or if the output buffer was too short
|
||||
/*
|
||||
* convert an UTF-8 stream into a wide string
|
||||
*
|
||||
* input:
|
||||
* stream - a stream for reading from
|
||||
*
|
||||
* output:
|
||||
* out_buffer - an output wide string
|
||||
* max_buffer_len - how many characters can be write (we write the terminating null character too)
|
||||
* was_buffer_sufficient_large - a pointer to a bool value - if provided it is set to true if the buffer was sufficient large
|
||||
*
|
||||
* this function returns false if there were some errors when converting or if the output buffer was too short
|
||||
*/
|
||||
template<typename StreamType>
|
||||
bool utf8_to_wide(const StreamType & stream, wchar_t * out_buffer, size_t max_buffer_len, bool * was_buffer_sufficient_large, int mode)
|
||||
@@ -873,17 +875,17 @@ bool utf8_to_wide(const StreamType & stream, wchar_t * out_buffer, size_t max_bu
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
this function converts one wide character into UTF-8 stream
|
||||
|
||||
input:
|
||||
z - wide character
|
||||
|
||||
output:
|
||||
utf8 - a UTF-8 stream for the output sequence
|
||||
|
||||
the function returns how many characters have been written to the utf8 stream,
|
||||
zero means that 'z' is an incorrect unicode character
|
||||
/*
|
||||
* convert one wide character into an UTF-8 stream
|
||||
*
|
||||
* input:
|
||||
* z - wide character
|
||||
*
|
||||
* output:
|
||||
* utf8 - a UTF-8 stream for the output sequence
|
||||
*
|
||||
* the function returns how many characters have been written to the utf8 stream,
|
||||
* zero means that 'z' is an incorrect unicode character
|
||||
*/
|
||||
template<typename StreamType>
|
||||
size_t int_to_utf8(int z, StreamType & utf8)
|
||||
@@ -902,20 +904,20 @@ size_t int_to_utf8(int z, StreamType & utf8)
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
this function converts a wide string into UTF-8 stream
|
||||
|
||||
input:
|
||||
wide_string - a wide string for converting
|
||||
string_len - size of the string
|
||||
mode - what to do with errors when converting
|
||||
0: skip an invalid character
|
||||
1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||
|
||||
output:
|
||||
utf8 - a UTF-8 stream for the output sequence
|
||||
|
||||
this function returns false if there were some errors when converting
|
||||
/*
|
||||
* convert a wide string into an UTF-8 stream
|
||||
*
|
||||
* input:
|
||||
* wide_string - a wide string for converting
|
||||
* string_len - size of the string
|
||||
* mode - what to do with errors when converting
|
||||
* 0: skip an invalid character
|
||||
* 1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||
*
|
||||
* output:
|
||||
* utf8 - a UTF-8 stream for the output sequence
|
||||
*
|
||||
* this function returns false if there were some errors when converting
|
||||
*/
|
||||
template<typename StreamType>
|
||||
bool wide_to_utf8(const wchar_t * wide_string, size_t string_len, StreamType & utf8, int mode)
|
||||
@@ -937,19 +939,19 @@ return !was_error;
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
this function converts a wide string into UTF-8 stream
|
||||
|
||||
input:
|
||||
wide_string - a null terminated wide string for converting
|
||||
mode - what to do with errors when converting
|
||||
0: skip an invalid character
|
||||
1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||
|
||||
output:
|
||||
utf8 - a UTF-8 stream for the output sequence
|
||||
|
||||
this function returns false if there were some errors when converting
|
||||
/*
|
||||
* convert a wide string into an UTF-8 stream
|
||||
*
|
||||
* input:
|
||||
* wide_string - a null terminated wide string for converting
|
||||
* mode - what to do with errors when converting
|
||||
* 0: skip an invalid character
|
||||
* 1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||
*
|
||||
* output:
|
||||
* utf8 - a UTF-8 stream for the output sequence
|
||||
*
|
||||
* this function returns false if there were some errors when converting
|
||||
*/
|
||||
template<typename StreamType>
|
||||
bool wide_to_utf8(const wchar_t * wide_string, StreamType & utf8, int mode)
|
||||
@@ -964,19 +966,19 @@ return !was_error;
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
this function converts a wide string (std::wstring) into UTF-8 stream
|
||||
|
||||
input:
|
||||
wide_string - a wide string for converting
|
||||
mode - what to do with errors when converting
|
||||
0: skip an invalid character
|
||||
1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||
|
||||
output:
|
||||
utf8 - a UTF-8 stream for the output sequence
|
||||
|
||||
this function returns false if there were some errors when converting
|
||||
/*
|
||||
* convert a wide string (std::wstring) into an UTF-8 stream
|
||||
*
|
||||
* input:
|
||||
* wide_string - a wide string for converting
|
||||
* mode - what to do with errors when converting
|
||||
* 0: skip an invalid character
|
||||
* 1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||
*
|
||||
* output:
|
||||
* utf8 - a UTF-8 stream for the output sequence
|
||||
*
|
||||
* this function returns false if there were some errors when converting
|
||||
*/
|
||||
template<typename StreamType>
|
||||
bool wide_to_utf8(const std::wstring & wide_string, StreamType & utf8, int mode)
|
||||
@@ -1014,7 +1016,7 @@ bool wide_stream_to_utf8(const Stream & stream, StreamType & utf8, bool clear, i
|
||||
int c = static_cast<int>(stream.get_wchar(i));
|
||||
bool is_correct = false;
|
||||
|
||||
if( utf8_check_range(c) )
|
||||
if( is_correct_unicode_char(c) )
|
||||
{
|
||||
// CHECKME test me when sizeof(wchar_t) == 2
|
||||
if( is_first_surrogate_char(c) )
|
||||
@@ -1067,18 +1069,18 @@ bool wide_stream_to_utf8(StreamTypeIn & buffer, StreamTypeOut & utf8, bool clear
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
this function converts a wide stream into a utf8 string
|
||||
|
||||
input:
|
||||
buffer - a wide stream for reading from
|
||||
|
||||
output:
|
||||
utf8 - an output utf8 string
|
||||
max_buffer_len - how many characters can be write (we write the terminating null character too)
|
||||
was_buffer_sufficient_large - a pointer to a bool value - if provided it is set to true if the buffer was sufficient large
|
||||
|
||||
this function returns false if there were some errors when converting or if the output buffer was too short
|
||||
/*
|
||||
* convert a wide stream into an UTF-8 string
|
||||
*
|
||||
* input:
|
||||
* buffer - a wide stream for reading from
|
||||
*
|
||||
* output:
|
||||
* utf8 - an output utf8 string
|
||||
* max_buffer_len - how many characters can be write (we write the terminating null character too)
|
||||
* was_buffer_sufficient_large - a pointer to a bool value - if provided it is set to true if the buffer was sufficient large
|
||||
*
|
||||
* this function returns false if there were some errors when converting or if the output buffer was too short
|
||||
*/
|
||||
template<typename StreamType>
|
||||
bool wide_stream_to_utf8(StreamType & buffer, char * utf8, std::size_t max_buffer_size, bool * was_buffer_sufficient_large, int mode)
|
||||
|
Reference in New Issue
Block a user