update utf8 functions comments

while here:
- rename pt::utf8_check_range(...) -> pt::is_correct_unicode_char(...)
This commit is contained in:
2024-05-31 00:23:43 +02:00
parent 450c5d55e9
commit 2689c9fece
3 changed files with 595 additions and 591 deletions

View File

@@ -1178,7 +1178,7 @@ void SpaceParser::read_unicode_json_format(bool has_first_byte, int first_byte)
} }
} }
if( !ok || !pt::utf8_check_range(lastc) ) if( !ok || !pt::is_correct_unicode_char(lastc) )
{ {
lastc = 0xFFFD; // U+FFFD "replacement character"; lastc = 0xFFFD; // U+FFFD "replacement character";
} }
@@ -1207,7 +1207,7 @@ int i;
value = (value << 4) | hex_to_int(c); value = (value << 4) | hex_to_int(c);
} }
if( i > 0 && c == '}' && pt::utf8_check_range(value) ) if( i > 0 && c == '}' && pt::is_correct_unicode_char(value) )
{ {
lastc = static_cast<wchar_t>(value); lastc = static_cast<wchar_t>(value);
} }

View File

@@ -41,21 +41,21 @@ namespace pt
{ {
/* /*
* returns true if 'c' is a correct unicode character * return true if 'c' is a correct unicode character
*/ */
bool utf8_check_range(int c) bool is_correct_unicode_char(int c)
{ {
return c>=0 && c<=0x10FFFF && !(c>=0xD800 && c<=0xDFFF); return c>=0 && c<=0x10FFFF && !(c>=0xD800 && c<=0xDFFF);
} }
/* /*
* returns true if 'c' is a correct unicode character * return true if 'c' is a correct unicode character
* *
* this method is used when reading from an utf8 string * this method is used when reading from an utf8 string
* how_many_bytes - means how many bytes from the utf8 string were read * how_many_bytes - means how many bytes from the utf8 string were read
*/ */
bool utf8_check_range(int c, int how_many_bytes) bool is_correct_unicode_char(int c, int how_many_bytes)
{ {
if( c >= 0x0000 && c <= 0x007f && how_many_bytes == 1 ) if( c >= 0x0000 && c <= 0x007f && how_many_bytes == 1 )
{ {
@@ -124,10 +124,9 @@ bool surrogate_pair_to_int(int c1, int c2, int & z)
/* /*
* an auxiliary function for converting from wide characters to UTF-8 * convert one wide (or two wide) characters to an int
* converting a wide character into one int
* *
* returns how many wide characters were used * return how many wide characters were used
* if string_len is greater than 0 then the return value is always greater than zero too * if string_len is greater than 0 then the return value is always greater than zero too
*/ */
size_t wide_to_int(const wchar_t * wide_string, size_t string_len, int & z, bool & correct) size_t wide_to_int(const wchar_t * wide_string, size_t string_len, int & z, bool & correct)
@@ -167,7 +166,7 @@ size_t wide_to_int(const wchar_t * wide_string, size_t string_len, int & z, bool
} }
else else
{ {
correct = utf8_check_range(z); correct = is_correct_unicode_char(z);
return 1; return 1;
} }
} }
@@ -178,7 +177,7 @@ size_t wide_to_int(const wchar_t * wide_string, size_t string_len, int & z, bool
* an auxiliary function for converting from wide characters to UTF-8 * an auxiliary function for converting from wide characters to UTF-8
* converting a wide character into one int * converting a wide character into one int
* returns how many wide characters were used * return how many wide characters were used
* if wide_string has at least one character then the return value is always greater than zero too * if wide_string has at least one character then the return value is always greater than zero too
*/ */
size_t wide_to_int(const wchar_t * wide_string, int & z, bool & correct) size_t wide_to_int(const wchar_t * wide_string, int & z, bool & correct)
@@ -201,7 +200,7 @@ return wide_to_int(wide_string, min_str_len, z, correct);
/* /*
* converts an int to a wide string * convert an int to a wide string
* *
* this method will not terminate the output string with a null character * this method will not terminate the output string with a null character
* return how many characters have been written (0, 1 or 2) * return how many characters have been written (0, 1 or 2)
@@ -233,9 +232,9 @@ size_t int_to_wide(int c, wchar_t * res, size_t max_buf_len)
/* /*
* converts an int to a wide string * convert an int to a wide string
* *
* returns true if a character was inserted to the string * return true if a character was inserted to the string
*/ */
bool int_to_wide(int c, std::wstring & res) bool int_to_wide(int c, std::wstring & res)
{ {
@@ -258,6 +257,9 @@ bool int_to_wide(int c, std::wstring & res)
/* /*
* convert one character into a stream
* stream can be an utf8 or a wide stream
*
* return true if c was a correct unicode character * return true if c was a correct unicode character
* and has been put the the stream * and has been put the the stream
*/ */
@@ -280,7 +282,7 @@ bool int_to_stream(int c, pt::Stream & stream)
/* /*
* this function converts one UTF-8 character into one wide-character * convert one UTF-8 character into one wide-character
* *
* input: * input:
* utf8 - an input UTF-8 string * utf8 - an input UTF-8 string
@@ -318,7 +320,7 @@ size_t i, len;
return i; return i;
} }
if( utf8_check_range(res, len) ) if( is_correct_unicode_char(res, len) )
correct = true; correct = true;
return len; return len;
@@ -326,20 +328,20 @@ return len;
/*! /*
this function converts one UTF-8 character into one wide-character * convert one UTF-8 character into one wide-character
*
input: * input:
utf8 - an input UTF-8 string (null terminated) * utf8 - an input UTF-8 string (null terminated)
*
output: * output:
res - an output character * res - an output character
correct - true if it is a correct character * correct - true if it is a correct character
*
the function returns how many characters have been used from the input string * the function returns how many characters have been used from the input string
(returns zero only if the string has '\0' at the first character) * (returns zero only if the string has '\0' at the first character)
even if there are errors the functions returns a different from zero value * even if there are errors the functions returns a different from zero value
*/ */
size_t utf8_to_int(const char * utf8, int & res, bool & correct) size_t utf8_to_int(const char * utf8, int & res, bool & correct)
{ {
size_t i, len; size_t i, len;
@@ -362,7 +364,7 @@ size_t i, len;
return i; return i;
} }
if( utf8_check_range(res, len) ) if( is_correct_unicode_char(res, len) )
correct = true; correct = true;
return len; return len;
@@ -371,20 +373,20 @@ return len;
/*! /*
this function converts one UTF-8 character into one wide-character * convert one UTF-8 character into one wide-character
*
input: * input:
utf8 - an input UTF-8 string * utf8 - an input UTF-8 string
*
output: * output:
res - an output character * res - an output character
correct - true if it is a correct character * correct - true if it is a correct character
*
the function returns how many characters have been used from the input string * the function returns how many characters have been used from the input string
(returns zero only if utf8 is empty) * (returns zero only if utf8 is empty)
even if there are errors the functions returns a different from zero value * even if there are errors the functions returns a different from zero value
*/ */
size_t utf8_to_int(const std::string & utf8, int & res, bool & correct) size_t utf8_to_int(const std::string & utf8, int & res, bool & correct)
{ {
return utf8_to_int(utf8.c_str(), utf8.size(), res, correct); return utf8_to_int(utf8.c_str(), utf8.size(), res, correct);
@@ -392,18 +394,18 @@ size_t utf8_to_int(const std::string & utf8, int & res, bool & correct)
/*! /*
this function converts one UTF-8 character into one wide-character * convert one UTF-8 character into one wide-character
*
input: * input:
utf8 - an input UTF-8 stream * utf8 - an input UTF-8 stream
*
output: * output:
res - an output character * res - an output character
correct - true if it is a correct character * correct - true if it is a correct character
*
the function returns how many characters have been used from the input stream * the function returns how many characters have been used from the input stream
*/ */
size_t utf8_to_int(std::istream & utf8, int & res, bool & correct) size_t utf8_to_int(std::istream & utf8, int & res, bool & correct)
{ {
size_t i, len; size_t i, len;
@@ -431,7 +433,7 @@ unsigned char uz;
return i; return i;
} }
if( utf8_check_range(res, len) ) if( is_correct_unicode_char(res, len) )
correct = true; correct = true;
return len; return len;
@@ -465,7 +467,7 @@ unsigned char uz;
return i + 1; return i + 1;
} }
if( utf8_check_range(res, len) ) if( is_correct_unicode_char(res, len) )
correct = true; correct = true;
} }
else else
@@ -482,21 +484,21 @@ unsigned char uz;
/*! /*
this function converts an utf8 string into wide string (std::wstring) * convert an utf8 string into a wide string (std::wstring)
*
input: * input:
utf8 - an input utf8 string * utf8 - an input utf8 string
utf8_len - size of the input string * utf8_len - size of the input string
mode - what to do with errors when converting * mode - what to do with errors when converting
0: skip an invalid character * 0: skip an invalid character
1: put U+FFFD "replacement character" istead of the invalid character (default) * 1: put U+FFFD "replacement character" istead of the invalid character (default)
*
output: * output:
res - an output wide string * res - an output wide string
*
the function returns false if there were some errors when converting * the function returns false if there were some errors when converting
*/ */
bool utf8_to_wide(const char * utf8, size_t utf8_len, std::wstring & res, bool clear, int mode) bool utf8_to_wide(const char * utf8, size_t utf8_len, std::wstring & res, bool clear, int mode)
{ {
if( clear ) if( clear )
@@ -513,20 +515,20 @@ bool utf8_to_wide(const char * utf8, size_t utf8_len, std::wstring & res, bool c
/*! /*
this function converts an utf8 string into wide string (std::wstring) * convert an utf8 string into a wide string (std::wstring)
*
input: * input:
utf8 - an input utf8 null terminated string * utf8 - an input utf8 null terminated string
mode - what to do with errors when converting * mode - what to do with errors when converting
0: skip an invalid character * 0: skip an invalid character
1: put U+FFFD "replacement character" istead of the invalid character (default) * 1: put U+FFFD "replacement character" istead of the invalid character (default)
*
output: * output:
res - an output wide string * res - an output wide string
*
the function returns false if there were some errors when converting * the function returns false if there were some errors when converting
*/ */
bool utf8_to_wide(const char * utf8, std::wstring & res, bool clear, int mode) bool utf8_to_wide(const char * utf8, std::wstring & res, bool clear, int mode)
{ {
size_t utf8_len = 0; size_t utf8_len = 0;
@@ -539,20 +541,20 @@ return utf8_to_wide(utf8, utf8_len, res, clear, mode);
/*! /*
this function converts an utf8 string into wide string (std::wstring) * convert an utf8 string into a wide string (std::wstring)
*
input: * input:
utf8 - an input utf8 string * utf8 - an input utf8 string
mode - what to do with errors when converting * mode - what to do with errors when converting
0: skip an invalid character * 0: skip an invalid character
1: put U+FFFD "replacement character" istead of the invalid character (default) * 1: put U+FFFD "replacement character" istead of the invalid character (default)
*
output: * output:
res - an output wide string * res - an output wide string
*
the function returns false if there were some errors when converting * the function returns false if there were some errors when converting
*/ */
bool utf8_to_wide(const std::string & utf8, std::wstring & res, bool clear, int mode) bool utf8_to_wide(const std::string & utf8, std::wstring & res, bool clear, int mode)
{ {
return utf8_to_wide(utf8.c_str(), utf8.size(), res, clear, mode); return utf8_to_wide(utf8.c_str(), utf8.size(), res, clear, mode);
@@ -560,20 +562,20 @@ bool utf8_to_wide(const std::string & utf8, std::wstring & res, bool clear, int
/*! /*
this function converts an utf8 stream into wide string (std::wstring) * convert an utf8 stream into a wide string (std::wstring)
*
input: * input:
utf8 - an input utf8 stream * utf8 - an input utf8 stream
mode - what to do with errors when converting * mode - what to do with errors when converting
0: skip an invalid character * 0: skip an invalid character
1: put U+FFFD "replacement character" istead of the invalid character (default) * 1: put U+FFFD "replacement character" istead of the invalid character (default)
*
output: * output:
res - an output wide string * res - an output wide string
*
the function returns false if there were some errors when converting * the function returns false if there were some errors when converting
*/ */
bool utf8_to_wide(std::istream & utf8, std::wstring & res, bool clear, int mode) bool utf8_to_wide(std::istream & utf8, std::wstring & res, bool clear, int mode)
{ {
int z; int z;
@@ -603,26 +605,26 @@ return !was_error;
/*! /*
this function converts one wide character into UTF-8 sequence * convert one wide character into an UTF-8 sequence
*
input: * input:
z - wide character * z - wide character
*
output: * output:
utf8 - a buffer for the output sequence * utf8 - a buffer for the output sequence
utf8_len - the size of the buffer * utf8_len - the size of the buffer
*
the function returns how many characters have been written to the utf8, * the function returns how many characters have been written to the utf8,
zero means the utf8 buffer is too small or 'z' is an incorrect unicode character * zero means the utf8 buffer is too small or 'z' is an incorrect unicode character
*/ */
size_t int_to_utf8(int z, char * utf8, size_t utf8_max_len) size_t int_to_utf8(int z, char * utf8, size_t utf8_max_len)
{ {
char buf[10]; char buf[10];
int i = 0; int i = 0;
int mask = 0x3f; // 6 first bits set int mask = 0x3f; // 6 first bits set
if( utf8_max_len==0 || !utf8_check_range(z) ) if( utf8_max_len==0 || !is_correct_unicode_char(z) )
return 0; return 0;
if( z <= 0x7f ) if( z <= 0x7f )
@@ -658,18 +660,18 @@ return a;
/*! /*
this function converts one wide character into UTF-8 string * convert one wide character into an UTF-8 string
*
input: * input:
z - wide character * z - wide character
*
output: * output:
utf8 - a UTF-8 string for the output sequence (the string is not cleared) * utf8 - a UTF-8 string for the output sequence (the string is not cleared)
*
the function returns how many characters have been written to the utf8 string, * the function returns how many characters have been written to the utf8 string,
zero means that 'z' is an incorrect unicode character * zero means that 'z' is an incorrect unicode character
*/ */
size_t int_to_utf8(int z, std::string & utf8, bool clear) size_t int_to_utf8(int z, std::string & utf8, bool clear)
{ {
char buf[10]; char buf[10];
@@ -688,21 +690,21 @@ return len;
/*! /*
this function converts a wide string into UTF-8 string * convert a wide string into an UTF-8 string
*
input: * input:
wide_string - a wide string for converting * wide_string - a wide string for converting
string_len - the size of the string * string_len - the size of the string
mode - what to do with errors when converting * mode - what to do with errors when converting
0: skip an invalid character * 0: skip an invalid character
1: put U+FFFD "replacement character" istead of the invalid character (default) * 1: put U+FFFD "replacement character" istead of the invalid character (default)
*
output: * output:
utf8 - a UTF-8 string for the output sequence (the string is not cleared) * utf8 - a UTF-8 string for the output sequence (the string is not cleared)
*
this function returns false if there were some errors when converting * this function returns false if there were some errors when converting
*/ */
bool wide_to_utf8(const wchar_t * wide_string, size_t string_len, std::string & utf8, bool clear, int mode) bool wide_to_utf8(const wchar_t * wide_string, size_t string_len, std::string & utf8, bool clear, int mode)
{ {
bool was_error = false; bool was_error = false;
@@ -723,20 +725,20 @@ return !was_error;
/*! /*
this function converts a wide string into UTF-8 string * convert a wide string into an UTF-8 string
*
input: * input:
wide_string - a null terminated wide string for converting * wide_string - a null terminated wide string for converting
mode - what to do with errors when converting * mode - what to do with errors when converting
0: skip an invalid character * 0: skip an invalid character
1: put U+FFFD "replacement character" istead of the invalid character (default) * 1: put U+FFFD "replacement character" istead of the invalid character (default)
*
output: * output:
utf8 - a UTF-8 string for the output sequence (the string is not cleared) * utf8 - a UTF-8 string for the output sequence (the string is not cleared)
*
this function returns false if there were some errors when converting * this function returns false if there were some errors when converting
*/ */
bool wide_to_utf8(const wchar_t * wide_string, std::string & utf8, bool clear, int mode) bool wide_to_utf8(const wchar_t * wide_string, std::string & utf8, bool clear, int mode)
{ {
bool was_error = false; bool was_error = false;
@@ -752,20 +754,20 @@ return !was_error;
/*! /*
this function converts a wide string (std::wstring) into UTF-8 string * convert a wide string (std::wstring) into an UTF-8 string
*
input: * input:
wide_string - a wide string for converting * wide_string - a wide string for converting
mode - what to do with errors when converting * mode - what to do with errors when converting
0: skip an invalid character * 0: skip an invalid character
1: put U+FFFD "replacement character" istead of the invalid character (default) * 1: put U+FFFD "replacement character" istead of the invalid character (default)
*
output: * output:
utf8 - a UTF-8 string for the output sequence (the string is not cleared) * utf8 - a UTF-8 string for the output sequence (the string is not cleared)
*
this function returns false if there were some errors when converting * this function returns false if there were some errors when converting
*/ */
bool wide_to_utf8(const std::wstring & wide_string, std::string & utf8, bool clear, int mode) bool wide_to_utf8(const std::wstring & wide_string, std::string & utf8, bool clear, int mode)
{ {
return wide_to_utf8(wide_string.c_str(), wide_string.size(), utf8, clear, mode); return wide_to_utf8(wide_string.c_str(), wide_string.size(), utf8, clear, mode);
@@ -775,27 +777,27 @@ bool wide_to_utf8(const std::wstring & wide_string, std::string & utf8, bool cle
/*! /*
this function converts a wide string into UTF-8 stream * convert a wide string into an UTF-8 stream
*
input: * input:
wide_string - a wide string for converting * wide_string - a wide string for converting
string_len - lenght of the wide string * string_len - lenght of the wide string
mode - what to do with errors when converting * mode - what to do with errors when converting
0: skip an invalid character * 0: skip an invalid character
1: put U+FFFD "replacement character" istead of the invalid character (default) * 1: put U+FFFD "replacement character" istead of the invalid character (default)
*
output: * output:
utf8 - a buffer for the UTF-8 stream * utf8 - a buffer for the UTF-8 stream
utf8_len - the size of the buffer * utf8_len - the size of the buffer
utf8_written - how many bytes have been written to the buffer * utf8_written - how many bytes have been written to the buffer
*
this function returns false if there were some errors when converting or the output buffer was too small, * this function returns false if there were some errors when converting or the output buffer was too small,
the output string is not null terminated * the output string is not null terminated
*
if there is an error when converting (there is an incorrect character in the wide string) the function * if there is an error when converting (there is an incorrect character in the wide string) the function
will continue converting but if the buffer is too small the function breaks immediately * will continue converting but if the buffer is too small the function breaks immediately
*/ */
bool wide_to_utf8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len, size_t & utf8_written, int mode) bool wide_to_utf8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len, size_t & utf8_written, int mode)
{ {
bool was_error = false; bool was_error = false;
@@ -830,26 +832,26 @@ return !was_error;
/*! /*
this function converts a wide string (std::wstring) into UTF-8 stream * convert a wide string (std::wstring) into an UTF-8 stream
*
input: * input:
wide_string - a wide string for converting * wide_string - a wide string for converting
mode - what to do with errors when converting * mode - what to do with errors when converting
0: skip an invalid character * 0: skip an invalid character
1: put U+FFFD "replacement character" istead of the invalid character (default) * 1: put U+FFFD "replacement character" istead of the invalid character (default)
*
output: * output:
utf8 - a buffer for the UTF-8 stream * utf8 - a buffer for the UTF-8 stream
utf8_len - the size of the buffer * utf8_len - the size of the buffer
utf8_written - how many bytes have been written to the buffer * utf8_written - how many bytes have been written to the buffer
*
this function returns false if there were some errors when converting or the output buffer was too small, * this function returns false if there were some errors when converting or the output buffer was too small,
the output string is not null terminated * the output string is not null terminated
*
if there is an error when converting (there is an incorrect character in the wide string) the function * if there is an error when converting (there is an incorrect character in the wide string) the function
will continue converting but if the buffer is too small the function breaks immediately * will continue converting but if the buffer is too small the function breaks immediately
*/ */
bool wide_to_utf8(const std::wstring & wide_string, char * utf8, size_t utf8_len, size_t & utf8_written, int mode) bool wide_to_utf8(const std::wstring & wide_string, char * utf8, size_t utf8_len, size_t & utf8_written, int mode)
{ {
return wide_to_utf8(wide_string.c_str(), wide_string.size(), utf8, utf8_len, utf8_written, mode); return wide_to_utf8(wide_string.c_str(), wide_string.size(), utf8, utf8_len, utf8_written, mode);
@@ -857,27 +859,27 @@ bool wide_to_utf8(const std::wstring & wide_string, char * utf8, size_t utf8_len
/*! /*
this function converts a wide string into UTF-8 stream * convert a wide string into an UTF-8 stream
*
input: * input:
wide_string - a wide string for converting * wide_string - a wide string for converting
string_len - lenght of the wide string * string_len - lenght of the wide string
mode - what to do with errors when converting * mode - what to do with errors when converting
0: skip an invalid character * 0: skip an invalid character
1: put U+FFFD "replacement character" istead of the invalid character (default) * 1: put U+FFFD "replacement character" istead of the invalid character (default)
*
output: * output:
utf8 - a buffer for the UTF-8 stream * utf8 - a buffer for the UTF-8 stream
utf8_len - the size of the buffer * utf8_len - the size of the buffer
*
this function returns false if there were some errors when converting or the output buffer was too small, * this function returns false if there were some errors when converting or the output buffer was too small,
the output string is null terminated (even if there were errors during converting) * the output string is null terminated (even if there were errors during converting)
*
if there is an error when converting (there is an incorrect character in the wide string) the function * if there is an error when converting (there is an incorrect character in the wide string) the function
will continue converting but if the buffer is too small the function breaks immediately * will continue converting but if the buffer is too small the function breaks immediately
(in both cases the utf8 buffer is null terminated) * (in both cases the utf8 buffer is null terminated)
*/ */
bool wide_to_utf8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len, int mode) bool wide_to_utf8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len, int mode)
{ {
size_t utf8_saved; size_t utf8_saved;
@@ -894,26 +896,26 @@ return res;
/*! /*
this function converts a wide string (std::wstring) into UTF-8 stream * convert a wide string (std::wstring) into an UTF-8 stream
*
input: * input:
wide_string - a wide string for converting * wide_string - a wide string for converting
mode - what to do with errors when converting * mode - what to do with errors when converting
0: skip an invalid character * 0: skip an invalid character
1: put U+FFFD "replacement character" istead of the invalid character (default) * 1: put U+FFFD "replacement character" istead of the invalid character (default)
*
output: * output:
utf8 - a buffer for the UTF-8 stream * utf8 - a buffer for the UTF-8 stream
utf8_len - the size of the buffer * utf8_len - the size of the buffer
*
this function returns false if there were some errors when converting or the output buffer was too small, * this function returns false if there were some errors when converting or the output buffer was too small,
the output string is null terminated (even if there were errors during converting) * the output string is null terminated (even if there were errors during converting)
*
if there is an error when converting (there is an incorrect character in the wide string) the function * if there is an error when converting (there is an incorrect character in the wide string) the function
will continue converting but if the buffer is too small the function breaks immediately * will continue converting but if the buffer is too small the function breaks immediately
(in both cases the utf8 buffer is null terminated) * (in both cases the utf8 buffer is null terminated)
*/ */
bool wide_to_utf8(const std::wstring & wide_string, char * utf8, size_t utf8_len, int mode) bool wide_to_utf8(const std::wstring & wide_string, char * utf8, size_t utf8_len, int mode)
{ {
return wide_to_utf8(wide_string.c_str(), wide_string.size(), utf8, utf8_len, mode); return wide_to_utf8(wide_string.c_str(), wide_string.size(), utf8, utf8_len, mode);
@@ -921,26 +923,26 @@ bool wide_to_utf8(const std::wstring & wide_string, char * utf8, size_t utf8_len
/*! /*
this function converts a wide string into UTF-8 stream * convert a wide string into an UTF-8 stream
*
input: * input:
wide_string - a null terminated wide string for converting * wide_string - a null terminated wide string for converting
mode - what to do with errors when converting * mode - what to do with errors when converting
0: skip an invalid character * 0: skip an invalid character
1: put U+FFFD "replacement character" istead of the invalid character (default) * 1: put U+FFFD "replacement character" istead of the invalid character (default)
*
output: * output:
utf8 - a buffer for the UTF-8 stream * utf8 - a buffer for the UTF-8 stream
utf8_len - the size of the buffer * utf8_len - the size of the buffer
utf8_written - how many bytes have been written to the buffer * utf8_written - how many bytes have been written to the buffer
*
this function returns false if there were some errors when converting or the output buffer was too small, * this function returns false if there were some errors when converting or the output buffer was too small,
the output string is not null terminated * the output string is not null terminated
*
if there is an error when converting (there is an incorrect character in the wide string) the function * if there is an error when converting (there is an incorrect character in the wide string) the function
will continue converting but if the buffer is too small the function breaks immediately * will continue converting but if the buffer is too small the function breaks immediately
*/ */
bool wide_to_utf8(const wchar_t * wide_string, char * utf8, size_t utf8_len, size_t & utf8_written, int mode) bool wide_to_utf8(const wchar_t * wide_string, char * utf8, size_t utf8_len, size_t & utf8_written, int mode)
{ {
bool was_error = false; bool was_error = false;
@@ -976,26 +978,26 @@ return !was_error;
/*! /*
this function converts a wide string into UTF-8 stream * convert a wide string into an UTF-8 stream
*
input: * input:
wide_string - a wide string for converting * wide_string - a wide string for converting
mode - what to do with errors when converting * mode - what to do with errors when converting
0: skip an invalid character * 0: skip an invalid character
1: put U+FFFD "replacement character" istead of the invalid character (default) * 1: put U+FFFD "replacement character" istead of the invalid character (default)
*
output: * output:
utf8 - a buffer for the UTF-8 stream * utf8 - a buffer for the UTF-8 stream
utf8_len - the size of the buffer * utf8_len - the size of the buffer
*
this function returns false if there were some errors when converting or the output buffer was too small, * this function returns false if there were some errors when converting or the output buffer was too small,
the output string is null terminated (even if there were errors during converting) * the output string is null terminated (even if there were errors during converting)
*
if there is an error when converting (there is an incorrect character in the wide string) the function * if there is an error when converting (there is an incorrect character in the wide string) the function
will continue converting but if the buffer is too small the function breaks immediately * will continue converting but if the buffer is too small the function breaks immediately
(in both cases the utf8 buffer is null terminated) * (in both cases the utf8 buffer is null terminated)
*/ */
bool wide_to_utf8(const wchar_t * wide_string, char * utf8, size_t utf8_len, int mode) bool wide_to_utf8(const wchar_t * wide_string, char * utf8, size_t utf8_len, int mode)
{ {
size_t utf8_saved; size_t utf8_saved;
@@ -1017,9 +1019,9 @@ return res;
namespace private_namespace namespace private_namespace
{ {
/*! /*
an auxiliary function for converting from UTF-8 string * an auxiliary function for converting from UTF-8 string
*/ */
bool utf8_to_int_first_octet(unsigned char uz, size_t & len, int & res) bool utf8_to_int_first_octet(unsigned char uz, size_t & len, int & res)
{ {
for(len=0 ; (uz & 0x80) != 0 ; ++len) for(len=0 ; (uz & 0x80) != 0 ; ++len)
@@ -1041,9 +1043,9 @@ return true;
/*! /*
an auxiliary function for converting from UTF-8 string * an auxiliary function for converting from UTF-8 string
*/ */
bool utf8_to_int_add_next_octet(unsigned char uz, int & res) bool utf8_to_int_add_next_octet(unsigned char uz, int & res)
{ {
if( (uz & 0xc0) != 0x80 ) if( (uz & 0xc0) != 0x80 )
@@ -1057,20 +1059,20 @@ return true;
/*! /*
an auxiliary function for converting from wide characters to UTF-8 * an auxiliary function for converting from wide characters to UTF-8
*
returns how many wide characters were used * return how many wide characters were used
if string_len is greater than 0 then the return value is always greater than zero too * if string_len is greater than 0 then the return value is always greater than zero too
*
utf8_written - how many characters were saved in the utf8 string (the string doesn't have * utf8_written - how many characters were saved in the utf8 string (the string doesn't have
a null terminating character) * a null terminating character)
it can be equal to zero if the utf8 buffer is too small or there was an incorrect wide character read * it can be equal to zero if the utf8 buffer is too small or there was an incorrect wide character read
was_utf8_buf_too_small - will be true if the utf8 buffer is too small * was_utf8_buf_too_small - will be true if the utf8 buffer is too small
if this flag is true then utf8_written is equal to zero * if this flag is true then utf8_written is equal to zero
was_error - will be true if there is an error when converting (there was an incorrect wide character) * was_error - will be true if there is an error when converting (there was an incorrect wide character)
(was_error will not be true if the utf8 buffer is too small) * (was_error will not be true if the utf8 buffer is too small)
*/ */
size_t wide_one_to_utf8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len, size_t wide_one_to_utf8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len,
size_t & utf8_written, bool & was_utf8_buf_too_small, bool & was_error, int mode) size_t & utf8_written, bool & was_utf8_buf_too_small, bool & was_error, int mode)
{ {
@@ -1107,12 +1109,12 @@ return chars;
/*! /*
an auxiliary function for converting from wide characters to UTF-8 * an auxiliary function for converting from wide characters to UTF-8
*
returns how many wide characters were used * return how many wide characters were used
if string_len is greater than 0 then the return value is always greater than zero too * if string_len is greater than 0 then the return value is always greater than zero too
*/ */
size_t wide_one_to_utf8(const wchar_t * wide_string, size_t string_len, std::string & utf8, bool & was_error, int mode) size_t wide_one_to_utf8(const wchar_t * wide_string, size_t string_len, std::string & utf8, bool & was_error, int mode)
{ {
int z; int z;
@@ -1137,12 +1139,12 @@ return chars;
/*! /*
an auxiliary function for converting from wide characters to UTF-8 * an auxiliary function for converting from wide characters to UTF-8
*
returns how many wide characters were used * return how many wide characters were used
if wide_string has at least one character then the return value is always greater than zero too * if wide_string has at least one character then the return value is always greater than zero too
*/ */
size_t wide_one_to_utf8(const wchar_t * wide_string, std::string & utf8, bool & was_error, int mode) size_t wide_one_to_utf8(const wchar_t * wide_string, std::string & utf8, bool & was_error, int mode)
{ {
int z; int z;

View File

@@ -42,85 +42,80 @@
namespace pt namespace pt
{ {
/* /*
* public methods are also defined in utf8_stream.h * UTF-8, a transformation format of ISO 10646
* http://tools.ietf.org/html/rfc3629
* *
* when wchar_t is 4 bytes length we use UTF-32
* when wchar_t is 2 bytes length we use UTF-16 (with surrogate pairs)
*
* UTF-16
* http://www.ietf.org/rfc/rfc2781.txt
*/ */
/*!
UTF-8, a transformation format of ISO 10646
http://tools.ietf.org/html/rfc3629
when wchar_t is 4 bytes length we use UTF-32
when wchar_t is 2 bytes length we use UTF-16 (with surrogate pairs)
UTF-16
http://www.ietf.org/rfc/rfc2781.txt
*/
/*!
returns true if 'c' is a correct unicode character
RENAMEME to is_correct_unicode_char
*/
bool utf8_check_range(int c);
/*!
returns true if 'c' is a correct unicode character
this method is used when reading from an utf8 string
how_many_chars - means how many characters from utf8 string were read
*/
bool utf8_check_range(int c, int how_many_bytes);
/*
* return true if 'c' is a correct unicode character
*/
bool is_correct_unicode_char(int c);
/* /*
* returns true if 'c' is a characters from the surrogate range * return true if 'c' is a correct unicode character
*
* this method is used when reading from an utf8 string
* how_many_chars - means how many characters from utf8 string were read
*/
bool is_correct_unicode_char(int c, int how_many_bytes);
/*
* return true if 'c' is a character from the surrogate range
* (c>=0xD800 && c<=0xDFFF) * (c>=0xD800 && c<=0xDFFF)
* *
*/ */
bool is_surrogate_char(int c); bool is_surrogate_char(int c);
/* /*
* returns true if 'c' is a first character from the surrogate pair * return true if 'c' is a first character from the surrogate pair
* (c>=0xD800 && c<=0xDBFF) * (c>=0xD800 && c<=0xDBFF)
*/ */
bool is_first_surrogate_char(int c); bool is_first_surrogate_char(int c);
/* /*
* returns true if 'c' is a second character from the surrogate pair * return true if 'c' is a second character from the surrogate pair
* (c>=0xDC00 && c<=0xDFFF) * (c>=0xDC00 && c<=0xDFFF)
*/ */
bool is_second_surrogate_char(int c); bool is_second_surrogate_char(int c);
/* /*
* returns a code point from two surrogate pair characters * return a code point from two surrogate pair characters
*/ */
bool surrogate_pair_to_int(int c1, int c2, int & z); bool surrogate_pair_to_int(int c1, int c2, int & z);
/* /*
* converting one character into a stream * convert one character into a stream
* stream can be an utf8 or wide stream * stream can be an utf8 or a wide stream
*
* return true if c was a correct unicode character
* and has been put the the stream
*/ */
bool int_to_stream(int c, pt::Stream & stream); bool int_to_stream(int c, pt::Stream & stream);
/* /*
* converting a one unicode character to an int * convert one wide (or two wide) characters to an int
* such an unicode character can consists of one or two wide characters *
* return how many wide characters were used
* if string_len is greater than 0 then the return value is always greater than zero too
*/ */
size_t wide_to_int(const wchar_t * wide_string, size_t string_len, int & z, bool & correct); // may these methods make public? size_t wide_to_int(const wchar_t * wide_string, size_t string_len, int & z, bool & correct);
size_t wide_to_int(const wchar_t * wide_string, int & z, bool & correct); size_t wide_to_int(const wchar_t * wide_string, int & z, bool & correct);
@@ -134,9 +129,9 @@ size_t wide_to_int(const wchar_t * wide_string, int & z, bool & correct);
* *
*/ */
/*! /*
converting one character from UTF-8 to an int * convert one character from UTF-8 to an int
*/ */
size_t utf8_to_int(const char * utf8, size_t utf8_len, int & res, bool & correct); size_t utf8_to_int(const char * utf8, size_t utf8_len, int & res, bool & correct);
size_t utf8_to_int(const char * utf8, int & res, bool & correct); size_t utf8_to_int(const char * utf8, int & res, bool & correct);
size_t utf8_to_int(const std::string & utf8, int & res, bool & correct); size_t utf8_to_int(const std::string & utf8, int & res, bool & correct);
@@ -147,43 +142,46 @@ template<typename StreamIteratorType>
size_t utf8_to_int(StreamIteratorType & iterator_in, const StreamIteratorType & iterator_end, int & res, bool & correct); size_t utf8_to_int(StreamIteratorType & iterator_in, const StreamIteratorType & iterator_end, int & res, bool & correct);
/*! /*
converting one character from int to wide stream * convert one character from an int to a wide stream
*
returns true if a character was inserted to the stream * return true if a character was inserted to the stream
*/ */
template<typename StreamType> template<typename StreamType>
bool int_to_wide(int c, StreamType & res); bool int_to_wide(int c, StreamType & res);
/*! /*
converting one character from int to wide string * convert one character from an int to a wide string
*
this method will not terminate the output string with a null character * this method will not terminate the output string with a null character
return how many characters have been written (0, 1 or 2) * return how many characters have been written (0, 1 or 2)
*/ */
size_t int_to_wide(int c, wchar_t * res, size_t max_buf_len); size_t int_to_wide(int c, wchar_t * res, size_t max_buf_len);
/*! /*
converting one character from int to wide string * convert one character from an int to a wide string
*
returns true if a character was inserted to the string * return true if a character was inserted to the string
*/ */
bool int_to_wide(int c, std::wstring & res); bool int_to_wide(int c, std::wstring & res);
/*! /*
call a convert_function for each character from an utf8 string * call a convert_function for each character from an utf8 string
*
* this function takes one int parameter:
* output_function(int c)
*/ */
template<typename OutputFunction> template<typename OutputFunction>
bool utf8_to_output_function(const char * utf8, size_t utf8_len, OutputFunction output_function, int mode = 1); bool utf8_to_output_function(const char * utf8, size_t utf8_len, OutputFunction output_function, int mode = 1);
/*! /*
converting UTF-8 string to a wide string * convert an UTF-8 string to a wide string
*/ */
bool utf8_to_wide(const char * utf8, size_t utf8_len, std::wstring & res, bool clear = true, int mode = 1); bool utf8_to_wide(const char * utf8, size_t utf8_len, std::wstring & res, bool clear = true, int mode = 1);
bool utf8_to_wide(const char * utf8, std::wstring & res, bool clear = true, int mode = 1); bool utf8_to_wide(const char * utf8, std::wstring & res, bool clear = true, int mode = 1);
bool utf8_to_wide(const std::string & utf8, std::wstring & res, bool clear = true, int mode = 1); bool utf8_to_wide(const std::string & utf8, std::wstring & res, bool clear = true, int mode = 1);
@@ -216,7 +214,9 @@ bool utf8_to_output_function(StreamIteratorType & iterator_in, const StreamItera
template<typename CharT, size_t stack_size, size_t heap_block_size> template<typename CharT, size_t stack_size, size_t heap_block_size>
class TextStreamBase; class TextStreamBase;
// defined at the end in textstream.h /*
* this method is defined at the end of textstream.h
*/
template<size_t stack_size, size_t heap_block_size, typename StreamOrStringType> template<size_t stack_size, size_t heap_block_size, typename StreamOrStringType>
bool utf8_to_wide(const TextStreamBase<char, stack_size, heap_block_size> & utf8, StreamOrStringType & out_stream, bool clear_stream = true, int mode = 1); bool utf8_to_wide(const TextStreamBase<char, stack_size, heap_block_size> & utf8, StreamOrStringType & out_stream, bool clear_stream = true, int mode = 1);
@@ -239,9 +239,9 @@ bool utf8_to_wide(const StreamType & stream, wchar_t * out_buffer, size_t max_bu
*/ */
/*! /*
converting one int character to UTF-8 * convert one int character to UTF-8
*/ */
size_t int_to_utf8(int z, char * utf8, size_t utf8_max_len); size_t int_to_utf8(int z, char * utf8, size_t utf8_max_len);
size_t int_to_utf8(int z, std::string & utf8, bool clear = true); size_t int_to_utf8(int z, std::string & utf8, bool clear = true);
@@ -249,21 +249,23 @@ template<typename StreamType>
size_t int_to_utf8(int z, StreamType & utf8); size_t int_to_utf8(int z, StreamType & utf8);
/*! /*
call an output_function for some sequence of wide characters from the stream buffer * call an output_function for some sequence of wide characters from the stream buffer
*
output_function has two arguments: const char * buf, size_t len: * output_function takes two arguments: const char * buf, size_t len:
output_function(const char * buf, size_t len) * output_function(const char * buf, size_t len)
* this is a buffer which was filled with utf8 characters
StreamType should have a const_iterator and begin() and end() methods * (this buffer can have up to 256 characters)
*/ *
* StreamType should have a const_iterator and begin() and end() methods
*/
template<typename StreamType, typename OutputFunction> template<typename StreamType, typename OutputFunction>
bool wide_to_output_function(StreamType & buffer, OutputFunction output_function, int mode = 1); bool wide_to_output_function(StreamType & buffer, OutputFunction output_function, int mode = 1);
/*! /*
converting a wide string to UTF-8 string * convert a wide string to an UTF-8 string
*/ */
bool wide_to_utf8(const wchar_t * wide_string, size_t string_len, std::string & utf8, bool clear = true, int mode = 1); bool wide_to_utf8(const wchar_t * wide_string, size_t string_len, std::string & utf8, bool clear = true, int mode = 1);
bool wide_to_utf8(const wchar_t * wide_string, std::string & utf8, bool clear = true, int mode = 1); bool wide_to_utf8(const wchar_t * wide_string, std::string & utf8, bool clear = true, int mode = 1);
bool wide_to_utf8(const std::wstring & wide_string, std::string & utf8, bool clear = true, int mode = 1); bool wide_to_utf8(const std::wstring & wide_string, std::string & utf8, bool clear = true, int mode = 1);
@@ -299,7 +301,9 @@ bool wide_stream_to_utf8(StreamType & buffer, char * utf8, std::size_t max_buffe
/*
* some private/auxiliary methods
*/
namespace private_namespace namespace private_namespace
{ {
bool utf8_to_int_first_octet(unsigned char uz, size_t & len, int & res); bool utf8_to_int_first_octet(unsigned char uz, size_t & len, int & res);
@@ -313,12 +317,12 @@ size_t wide_one_to_utf8(const wchar_t * wide_string, size_t string_len, std::str
size_t wide_one_to_utf8(const wchar_t * wide_string, std::string & utf8, bool & was_error, int mode); size_t wide_one_to_utf8(const wchar_t * wide_string, std::string & utf8, bool & was_error, int mode);
/*! /*
an auxiliary function for converting from wide characters to UTF-8 * an auxiliary function for converting from wide characters to UTF-8
*
returns how many wide characters were used * return how many wide characters were used
if string_len is greater than 0 then the return value is always greater than zero too * if string_len is greater than 0 then the return value is always greater than zero too
*/ */
template<typename StreamType> template<typename StreamType>
static size_t wide_one_to_utf8(const wchar_t * wide_string, size_t string_len, StreamType & utf8, bool & was_error, int mode) static size_t wide_one_to_utf8(const wchar_t * wide_string, size_t string_len, StreamType & utf8, bool & was_error, int mode)
{ {
@@ -339,13 +343,13 @@ size_t chars;
was_error = true; was_error = true;
} }
return chars; return chars;
} }
/*! /*
an auxiliary function for converting from wide characters to UTF-8 * an auxiliary function for converting from wide characters to UTF-8
*/ */
template<typename StreamType> template<typename StreamType>
static size_t wide_one_to_utf8(const wchar_t * wide_string, StreamType & utf8, bool & was_error, int mode) static size_t wide_one_to_utf8(const wchar_t * wide_string, StreamType & utf8, bool & was_error, int mode)
{ {
@@ -357,7 +361,7 @@ static size_t wide_one_to_utf8(const wchar_t * wide_string, StreamType & utf8, b
if( *(wide_string+1) != 0 ) if( *(wide_string+1) != 0 )
min_str_len = 2; min_str_len = 2;
return wide_one_to_utf8(wide_string, min_str_len, utf8, was_error, mode); return wide_one_to_utf8(wide_string, min_str_len, utf8, was_error, mode);
} }
} // namespace private_namespace } // namespace private_namespace
@@ -389,19 +393,19 @@ bool int_to_wide(int c, StreamType & res)
} }
/*! /*
this function converts one UTF-8 character into int * convert one UTF-8 character into int
*
input: * input:
iterator_in - an stream iterator for reading from (the stream can by any stream, we use *, ++ and == operators only) * iterator_in - an stream iterator for reading from (the stream can by any stream, we use *, ++ and == operators only)
iterator_end - an end iterator * iterator_end - an end iterator
*
output: * output:
res - an output character * res - an output character
correct - true if it is a correct character * correct - true if it is a correct character
*
the function returns how many characters have been used from the input stream * the function returns how many characters have been used from the input stream
*/ */
template<typename StreamIteratorType> template<typename StreamIteratorType>
size_t utf8_to_int(StreamIteratorType & iterator_in, const StreamIteratorType & iterator_end, int & res, bool & correct) size_t utf8_to_int(StreamIteratorType & iterator_in, const StreamIteratorType & iterator_end, int & res, bool & correct)
{ {
@@ -432,7 +436,7 @@ unsigned char uz;
return i + 1; return i + 1;
} }
if( utf8_check_range(res, len) ) if( is_correct_unicode_char(res, len) )
correct = true; correct = true;
return len; return len;
@@ -440,11 +444,10 @@ return len;
/*! /*
converting UTF-8 string to a TextStreamBase<wchar_t,...> stream * convert UTF-8 string to a TextStreamBase<wchar_t,...> stream
(need to be tested) * (need to be tested)
*/ */
// need to be tested
template<typename StreamType> template<typename StreamType>
bool utf8_to_wide(const char * utf8, size_t utf8_len, StreamType & res, bool clear, int mode) bool utf8_to_wide(const char * utf8, size_t utf8_len, StreamType & res, bool clear, int mode)
{ {
@@ -642,19 +645,19 @@ bool wide_to_output_function(StreamType & buffer, OutputFunction output_function
/* /*
this function converts a UTF-8 stream into a wide stream or a wide string * convert a UTF-8 stream into a wide stream or a wide string
*
input: * input:
stream - a UTF-8 stream for converting * stream - a UTF-8 stream for converting
mode - what to do with errors when converting * mode - what to do with errors when converting
0: skip an invalid character * 0: skip an invalid character
1: put U+FFFD "replacement character" istead of the invalid character (default) * 1: put U+FFFD "replacement character" istead of the invalid character (default)
*
output: * output:
res - a wide stream or a wide string for the output sequence * res - a wide stream or a wide string for the output sequence
*
this function returns false if there were some errors when converting * this function returns false if there were some errors when converting
*/ */
template<typename StreamOrStringType> template<typename StreamOrStringType>
bool utf8_to_wide(const Stream & stream, StreamOrStringType & res, bool clear, int mode) bool utf8_to_wide(const Stream & stream, StreamOrStringType & res, bool clear, int mode)
{ {
@@ -668,21 +671,20 @@ bool utf8_to_wide(const Stream & stream, StreamOrStringType & res, bool clear, i
/* /*
this function reads characters from a UTF-8 stream and calls an output_function * read characters from an UTF-8 stream and call an output_function
*
input: * input:
stream - a UTF-8 stream for converting * stream - a UTF-8 stream for converting
mode - what to do with errors when converting * mode - what to do with errors when converting
0: skip an invalid character * 0: skip an invalid character
1: put U+FFFD "replacement character" istead of the invalid character (default) * 1: put U+FFFD "replacement character" istead of the invalid character (default)
*
output: * output:
output_function - is a function which gets two artuments: int (character) and a reference to StreamOrStringType * output_function - a function which takes one artument: an int (a character):
and should put the character to the output string/stream, this function should have the signature like this: * output_function(int c)
output_function(int z, StreamOrStringType & res) *
* this function returns false if there were some errors when converting
this function returns false if there were some errors when converting */
*/
template<typename OutputFunction> template<typename OutputFunction>
bool utf8_to_output_function(const Stream & stream, OutputFunction output_function, int mode) bool utf8_to_output_function(const Stream & stream, OutputFunction output_function, int mode)
{ {
@@ -723,18 +725,18 @@ bool utf8_to_output_function(const Stream & stream, OutputFunction output_functi
/*! /*
this function converts UTF-8 stream into a wide stream or a wide string * convert an UTF-8 stream into a wide stream or a wide string
*
input: * input:
iterator_in - a stream iterator for reading from (the stream can by any stream, we use *, ++ and == operators only) * iterator_in - a stream iterator for reading from (the stream can by any stream, we use *, ++ and == operators only)
iterator_end - an end iterator * iterator_end - an end iterator
*
output: * output:
out_stream - an output wide stream or wide string (the stream can by of any kind, we use only << operator for a stream and += for a string) * out_stream - an output wide stream or wide string (the stream can by of any kind, we use only << operator for a stream and += for a string)
*
this function returns false if there were some errors when converting * this function returns false if there were some errors when converting
*/ */
template<typename StreamIteratorType, typename StreamOrStringType> template<typename StreamIteratorType, typename StreamOrStringType>
bool utf8_to_wide(StreamIteratorType & iterator_in, const StreamIteratorType & iterator_end, StreamOrStringType & out_stream, bool clear_stream, int mode) bool utf8_to_wide(StreamIteratorType & iterator_in, const StreamIteratorType & iterator_end, StreamOrStringType & out_stream, bool clear_stream, int mode)
{ {
@@ -776,20 +778,20 @@ bool utf8_to_output_function(StreamIteratorType & iterator_in, const StreamItera
/*! /*
this function converts UTF-8 stream into a wide string * convert an UTF-8 stream into a wide string
*
input: * input:
iterator_in - a stream iterator for reading from (the stream can by any stream, we use *, ++ and == operators only) * iterator_in - a stream iterator for reading from (the stream can by any stream, we use *, ++ and == operators only)
iterator_end - an end iterator * iterator_end - an end iterator
*
output: * output:
out_buffer - an output wide string * out_buffer - an output wide string
max_buffer_len - how many characters can be write (we write the terminating null character too) * max_buffer_len - how many characters can be write (we write the terminating null character too)
was_buffer_sufficient_large - a pointer to a bool value - if provided it is set to true if the buffer was sufficient large * was_buffer_sufficient_large - a pointer to a bool value - if provided it is set to true if the buffer was sufficient large
*
this function returns false if there were some errors when converting or if the output buffer was too short * this function returns false if there were some errors when converting or if the output buffer was too short
*/ */
template<typename StreamIteratorType> template<typename StreamIteratorType>
bool utf8_to_wide(StreamIteratorType & iterator_in, const StreamIteratorType & iterator_end, wchar_t * out_buffer, size_t max_buffer_len, int mode, bool * was_buffer_sufficient_large) bool utf8_to_wide(StreamIteratorType & iterator_in, const StreamIteratorType & iterator_end, wchar_t * out_buffer, size_t max_buffer_len, int mode, bool * was_buffer_sufficient_large)
{ {
@@ -849,19 +851,19 @@ bool utf8_to_wide(StreamIteratorType & iterator_in, const StreamIteratorType & i
/*! /*
this function converts UTF-8 stream into a wide string * convert an UTF-8 stream into a wide string
*
input: * input:
stream - a stream for reading from * stream - a stream for reading from
*
output: * output:
out_buffer - an output wide string * out_buffer - an output wide string
max_buffer_len - how many characters can be write (we write the terminating null character too) * max_buffer_len - how many characters can be write (we write the terminating null character too)
was_buffer_sufficient_large - a pointer to a bool value - if provided it is set to true if the buffer was sufficient large * was_buffer_sufficient_large - a pointer to a bool value - if provided it is set to true if the buffer was sufficient large
*
this function returns false if there were some errors when converting or if the output buffer was too short * this function returns false if there were some errors when converting or if the output buffer was too short
*/ */
template<typename StreamType> template<typename StreamType>
bool utf8_to_wide(const StreamType & stream, wchar_t * out_buffer, size_t max_buffer_len, bool * was_buffer_sufficient_large, int mode) bool utf8_to_wide(const StreamType & stream, wchar_t * out_buffer, size_t max_buffer_len, bool * was_buffer_sufficient_large, int mode)
{ {
@@ -873,18 +875,18 @@ bool utf8_to_wide(const StreamType & stream, wchar_t * out_buffer, size_t max_bu
/*! /*
this function converts one wide character into UTF-8 stream * convert one wide character into an UTF-8 stream
*
input: * input:
z - wide character * z - wide character
*
output: * output:
utf8 - a UTF-8 stream for the output sequence * utf8 - a UTF-8 stream for the output sequence
*
the function returns how many characters have been written to the utf8 stream, * the function returns how many characters have been written to the utf8 stream,
zero means that 'z' is an incorrect unicode character * zero means that 'z' is an incorrect unicode character
*/ */
template<typename StreamType> template<typename StreamType>
size_t int_to_utf8(int z, StreamType & utf8) size_t int_to_utf8(int z, StreamType & utf8)
{ {
@@ -902,21 +904,21 @@ size_t int_to_utf8(int z, StreamType & utf8)
/*! /*
this function converts a wide string into UTF-8 stream * convert a wide string into an UTF-8 stream
*
input: * input:
wide_string - a wide string for converting * wide_string - a wide string for converting
string_len - size of the string * string_len - size of the string
mode - what to do with errors when converting * mode - what to do with errors when converting
0: skip an invalid character * 0: skip an invalid character
1: put U+FFFD "replacement character" istead of the invalid character (default) * 1: put U+FFFD "replacement character" istead of the invalid character (default)
*
output: * output:
utf8 - a UTF-8 stream for the output sequence * utf8 - a UTF-8 stream for the output sequence
*
this function returns false if there were some errors when converting * this function returns false if there were some errors when converting
*/ */
template<typename StreamType> template<typename StreamType>
bool wide_to_utf8(const wchar_t * wide_string, size_t string_len, StreamType & utf8, int mode) bool wide_to_utf8(const wchar_t * wide_string, size_t string_len, StreamType & utf8, int mode)
{ {
@@ -937,20 +939,20 @@ return !was_error;
/*! /*
this function converts a wide string into UTF-8 stream * convert a wide string into an UTF-8 stream
*
input: * input:
wide_string - a null terminated wide string for converting * wide_string - a null terminated wide string for converting
mode - what to do with errors when converting * mode - what to do with errors when converting
0: skip an invalid character * 0: skip an invalid character
1: put U+FFFD "replacement character" istead of the invalid character (default) * 1: put U+FFFD "replacement character" istead of the invalid character (default)
*
output: * output:
utf8 - a UTF-8 stream for the output sequence * utf8 - a UTF-8 stream for the output sequence
*
this function returns false if there were some errors when converting * this function returns false if there were some errors when converting
*/ */
template<typename StreamType> template<typename StreamType>
bool wide_to_utf8(const wchar_t * wide_string, StreamType & utf8, int mode) bool wide_to_utf8(const wchar_t * wide_string, StreamType & utf8, int mode)
{ {
@@ -964,20 +966,20 @@ return !was_error;
/*! /*
this function converts a wide string (std::wstring) into UTF-8 stream * convert a wide string (std::wstring) into an UTF-8 stream
*
input: * input:
wide_string - a wide string for converting * wide_string - a wide string for converting
mode - what to do with errors when converting * mode - what to do with errors when converting
0: skip an invalid character * 0: skip an invalid character
1: put U+FFFD "replacement character" istead of the invalid character (default) * 1: put U+FFFD "replacement character" istead of the invalid character (default)
*
output: * output:
utf8 - a UTF-8 stream for the output sequence * utf8 - a UTF-8 stream for the output sequence
*
this function returns false if there were some errors when converting * this function returns false if there were some errors when converting
*/ */
template<typename StreamType> template<typename StreamType>
bool wide_to_utf8(const std::wstring & wide_string, StreamType & utf8, int mode) bool wide_to_utf8(const std::wstring & wide_string, StreamType & utf8, int mode)
{ {
@@ -1014,7 +1016,7 @@ bool wide_stream_to_utf8(const Stream & stream, StreamType & utf8, bool clear, i
int c = static_cast<int>(stream.get_wchar(i)); int c = static_cast<int>(stream.get_wchar(i));
bool is_correct = false; bool is_correct = false;
if( utf8_check_range(c) ) if( is_correct_unicode_char(c) )
{ {
// CHECKME test me when sizeof(wchar_t) == 2 // CHECKME test me when sizeof(wchar_t) == 2
if( is_first_surrogate_char(c) ) if( is_first_surrogate_char(c) )
@@ -1067,19 +1069,19 @@ bool wide_stream_to_utf8(StreamTypeIn & buffer, StreamTypeOut & utf8, bool clear
/*! /*
this function converts a wide stream into a utf8 string * convert a wide stream into an UTF-8 string
*
input: * input:
buffer - a wide stream for reading from * buffer - a wide stream for reading from
*
output: * output:
utf8 - an output utf8 string * utf8 - an output utf8 string
max_buffer_len - how many characters can be write (we write the terminating null character too) * max_buffer_len - how many characters can be write (we write the terminating null character too)
was_buffer_sufficient_large - a pointer to a bool value - if provided it is set to true if the buffer was sufficient large * was_buffer_sufficient_large - a pointer to a bool value - if provided it is set to true if the buffer was sufficient large
*
this function returns false if there were some errors when converting or if the output buffer was too short * this function returns false if there were some errors when converting or if the output buffer was too short
*/ */
template<typename StreamType> template<typename StreamType>
bool wide_stream_to_utf8(StreamType & buffer, char * utf8, std::size_t max_buffer_size, bool * was_buffer_sufficient_large, int mode) bool wide_stream_to_utf8(StreamType & buffer, char * utf8, std::size_t max_buffer_size, bool * was_buffer_sufficient_large, int mode)
{ {