fixed: in static size_t WideToInt(const wchar_t * wide_string, size_t string_len, int & z, bool & correct)

we didn't test UTF8_CheckRange()

added: functions for converting from a wide string into an utf8 c-string:
       bool WideToUTF8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len, size_t & utf8_written, int mode = 1);
       bool WideToUTF8(const wchar_t * wide_string,                    char * utf8, size_t utf8_len, size_t & utf8_written, int mode = 1);
       bool WideToUTF8(const std::wstring & wide_string,               char * utf8, size_t utf8_len, size_t & utf8_written, int mode = 1);

       bool WideToUTF8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len, int mode = 1);
       bool WideToUTF8(const wchar_t * wide_string,                    char * utf8, size_t utf8_len, int mode = 1);
       bool WideToUTF8(const std::wstring & wide_string,               char * utf8, size_t utf8_len, int mode = 1);




git-svn-id: svn://ttmath.org/publicrep/pikotools/trunk@962 e52654a7-88a9-db11-a3e9-0013d4bc506e
This commit is contained in:
Tomasz Sowa 2014-10-04 11:18:33 +00:00
parent 28ea8f3c3e
commit 39717a4dd2
2 changed files with 299 additions and 7 deletions

View File

@ -5,7 +5,7 @@
*/ */
/* /*
* Copyright (c) 2010-2012, Tomasz Sowa * Copyright (c) 2010-2014, Tomasz Sowa
* All rights reserved. * All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
@ -426,7 +426,6 @@ return !was_error;
/*! /*!
this function converts one wide character into UTF-8 sequence this function converts one wide character into UTF-8 sequence
@ -539,7 +538,6 @@ return len;
/* /*
an auxiliary function for converting from wide characters to UTF-8 an auxiliary function for converting from wide characters to UTF-8
converting a wide character into one int converting a wide character into one int
@ -584,6 +582,7 @@ static size_t WideToInt(const wchar_t * wide_string, size_t string_len, int & z,
} }
else else
{ {
correct = UTF8_CheckRange(z);
return 1; return 1;
} }
} }
@ -616,6 +615,55 @@ return WideToInt(wide_string, min_str_len, z, correct);
/*!
an auxiliary function for converting from wide characters to UTF-8
returns how many wide characters were used
if string_len is greater than 0 then the return value is always greater than zero too
utf8_written - how many characters were saved in the utf8 string (the string doesn't have
a null terminating character)
it can be equal to zero if the utf8 buffer is too small or there was an incorrect wide character read
was_utf8_buf_too_small - will be true if the utf8 buffer is too small
if this flag is true then utf8_written is equal to zero
was_error - will be true if there is an error when converting (there was an incorrect wide character)
(was_error will not be true if the utf8 buffer is too small)
*/
static size_t WideOneToUTF8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len,
size_t & utf8_written, bool & was_utf8_buf_too_small, bool & was_error, int mode)
{
int z;
bool correct;
size_t chars;
utf8_written = 0;
was_utf8_buf_too_small = false;
chars = WideToInt(wide_string, string_len, z, correct);
if( correct )
{
utf8_written = IntToUTF8(z, utf8, utf8_len);
if( utf8_written == 0 )
was_utf8_buf_too_small = true;
}
else
{
if( mode == 1 )
{
utf8_written = IntToUTF8(0xFFFD, utf8, utf8_len); // U+FFFD "replacement character"
if( utf8_written == 0 )
was_utf8_buf_too_small = true;
}
was_error = true;
}
return chars;
}
/*! /*!
an auxiliary function for converting from wide characters to UTF-8 an auxiliary function for converting from wide characters to UTF-8
@ -725,7 +773,6 @@ return WideOneToUTF8(wide_string, min_str_len, utf8, was_error, mode);
/*! /*!
this function converts a wide string into UTF-8 string this function converts a wide string into UTF-8 string
@ -842,6 +889,7 @@ return !was_error;
} }
/*! /*!
this function converts a wide string into UTF-8 stream this function converts a wide string into UTF-8 stream
@ -889,8 +937,244 @@ bool WideToUTF8(const std::wstring & wide_string, std::ostream & utf8, int mode)
/*!
this function converts a wide string into UTF-8 stream
input:
wide_string - a wide string for converting
string_len - lenght of the wide string
mode - what to do with errors when converting
0: skip an invalid character
1: put U+FFFD "replacement character" istead of the invalid character (default)
output:
utf8 - a buffer for the UTF-8 stream
utf8_len - the size of the buffer
utf8_written - how many bytes have been written to the buffer
this function returns false if there were some errors when converting or the output buffer was too small,
the output string is not null terminated
if there is an error when converting (there is an incorrect character in the wide string) the function
will continue converting but if the buffer is too small the function breaks immediately
*/
bool WideToUTF8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len, size_t & utf8_written, int mode)
{
bool was_error = false;
bool was_buffer_to_small;
size_t chars, utf8_saved;
utf8_written = 0;
while( string_len > 0 )
{
chars = WideOneToUTF8(wide_string, string_len, utf8, utf8_len, utf8_saved, was_buffer_to_small, was_error, mode);
if( was_buffer_to_small )
{
/*
* if the buffer was too small break immediately
* and set the was_error flag
*/
was_error = true;
break;
}
wide_string += chars;
string_len -= chars;
utf8 += utf8_saved;
utf8_len -= utf8_saved;
utf8_written += utf8_saved;
}
return !was_error;
}
/*!
this function converts a wide string (std::wstring) into UTF-8 stream
input:
wide_string - a wide string for converting
mode - what to do with errors when converting
0: skip an invalid character
1: put U+FFFD "replacement character" istead of the invalid character (default)
output:
utf8 - a buffer for the UTF-8 stream
utf8_len - the size of the buffer
utf8_written - how many bytes have been written to the buffer
this function returns false if there were some errors when converting or the output buffer was too small,
the output string is not null terminated
if there is an error when converting (there is an incorrect character in the wide string) the function
will continue converting but if the buffer is too small the function breaks immediately
*/
bool WideToUTF8(const std::wstring & wide_string, char * utf8, size_t utf8_len, size_t & utf8_written, int mode)
{
return WideToUTF8(wide_string.c_str(), wide_string.size(), utf8, utf8_len, utf8_written, mode);
}
/*!
this function converts a wide string into UTF-8 stream
input:
wide_string - a wide string for converting
string_len - lenght of the wide string
mode - what to do with errors when converting
0: skip an invalid character
1: put U+FFFD "replacement character" istead of the invalid character (default)
output:
utf8 - a buffer for the UTF-8 stream
utf8_len - the size of the buffer
this function returns false if there were some errors when converting or the output buffer was too small,
the output string is null terminated (even if there were errors during converting)
if there is an error when converting (there is an incorrect character in the wide string) the function
will continue converting but if the buffer is too small the function breaks immediately
(in both cases the utf8 buffer is null terminated)
*/
bool WideToUTF8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len, int mode)
{
size_t utf8_saved;
bool res;
if( utf8_len == 0 )
return false;
res = WideToUTF8(wide_string, string_len, utf8, utf8_len - 1, utf8_saved, mode);
utf8[utf8_saved] = 0;
return res;
}
/*!
this function converts a wide string (std::wstring) into UTF-8 stream
input:
wide_string - a wide string for converting
mode - what to do with errors when converting
0: skip an invalid character
1: put U+FFFD "replacement character" istead of the invalid character (default)
output:
utf8 - a buffer for the UTF-8 stream
utf8_len - the size of the buffer
this function returns false if there were some errors when converting or the output buffer was too small,
the output string is null terminated (even if there were errors during converting)
if there is an error when converting (there is an incorrect character in the wide string) the function
will continue converting but if the buffer is too small the function breaks immediately
(in both cases the utf8 buffer is null terminated)
*/
bool WideToUTF8(const std::wstring & wide_string, char * utf8, size_t utf8_len, int mode)
{
return WideToUTF8(wide_string.c_str(), wide_string.size(), utf8, utf8_len, mode);
}
/*!
this function converts a wide string into UTF-8 stream
input:
wide_string - a null terminated wide string for converting
mode - what to do with errors when converting
0: skip an invalid character
1: put U+FFFD "replacement character" istead of the invalid character (default)
output:
utf8 - a buffer for the UTF-8 stream
utf8_len - the size of the buffer
utf8_written - how many bytes have been written to the buffer
this function returns false if there were some errors when converting or the output buffer was too small,
the output string is not null terminated
if there is an error when converting (there is an incorrect character in the wide string) the function
will continue converting but if the buffer is too small the function breaks immediately
*/
bool WideToUTF8(const wchar_t * wide_string, char * utf8, size_t utf8_len, size_t & utf8_written, int mode)
{
bool was_error = false;
bool was_buffer_to_small;
size_t chars, utf8_saved;
size_t len;
utf8_written = 0;
while( *wide_string )
{
len = (*(wide_string+1) == 0) ? 1 : 2;
chars = WideOneToUTF8(wide_string, len, utf8, utf8_len, utf8_saved, was_buffer_to_small, was_error, mode);
if( was_buffer_to_small )
{
/*
* if the buffer was too small break immediately
* and set the was_error flag
*/
was_error = true;
break;
}
wide_string += chars;
utf8 += utf8_saved;
utf8_len -= utf8_saved;
utf8_written += utf8_saved;
}
return !was_error;
}
/*!
this function converts a wide string into UTF-8 stream
input:
wide_string - a wide string for converting
mode - what to do with errors when converting
0: skip an invalid character
1: put U+FFFD "replacement character" istead of the invalid character (default)
output:
utf8 - a buffer for the UTF-8 stream
utf8_len - the size of the buffer
this function returns false if there were some errors when converting or the output buffer was too small,
the output string is null terminated (even if there were errors during converting)
if there is an error when converting (there is an incorrect character in the wide string) the function
will continue converting but if the buffer is too small the function breaks immediately
(in both cases the utf8 buffer is null terminated)
*/
bool WideToUTF8(const wchar_t * wide_string, char * utf8, size_t utf8_len, int mode)
{
size_t utf8_saved;
bool res;
if( utf8_len == 0 )
return false;
res = WideToUTF8(wide_string, utf8, utf8_len - 1, utf8_saved, mode);
utf8[utf8_saved] = 0;
return res;
}
} // namespace } // namespace

View File

@ -5,7 +5,7 @@
*/ */
/* /*
* Copyright (c) 2010-2012, Tomasz Sowa * Copyright (c) 2010-2014, Tomasz Sowa
* All rights reserved. * All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
@ -97,10 +97,18 @@ size_t IntToUTF8(int z, std::ostream & utf8);
bool WideToUTF8(const wchar_t * wide_string, size_t string_len, std::string & utf8, bool clear = true, int mode = 1); bool WideToUTF8(const wchar_t * wide_string, size_t string_len, std::string & utf8, bool clear = true, int mode = 1);
bool WideToUTF8(const wchar_t * wide_string, std::string & utf8, bool clear = true, int mode = 1); bool WideToUTF8(const wchar_t * wide_string, std::string & utf8, bool clear = true, int mode = 1);
bool WideToUTF8(const std::wstring & wide_string, std::string & utf8, bool clear = true, int mode = 1); bool WideToUTF8(const std::wstring & wide_string, std::string & utf8, bool clear = true, int mode = 1);
bool WideToUTF8(const wchar_t * wide_string, size_t string_len, std::ostream & utf8, int mode = 1); bool WideToUTF8(const wchar_t * wide_string, size_t string_len, std::ostream & utf8, int mode = 1);
bool WideToUTF8(const wchar_t * wide_string, std::ostream & utf8, int mode = 1); bool WideToUTF8(const wchar_t * wide_string, std::ostream & utf8, int mode = 1);
bool WideToUTF8(const std::wstring & wide_string, std::ostream & utf8, int mode = 1); bool WideToUTF8(const std::wstring & wide_string, std::ostream & utf8, int mode = 1);
bool WideToUTF8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len, size_t & utf8_written, int mode = 1);
bool WideToUTF8(const wchar_t * wide_string, char * utf8, size_t utf8_len, size_t & utf8_written, int mode = 1);
bool WideToUTF8(const std::wstring & wide_string, char * utf8, size_t utf8_len, size_t & utf8_written, int mode = 1);
bool WideToUTF8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len, int mode = 1);
bool WideToUTF8(const wchar_t * wide_string, char * utf8, size_t utf8_len, int mode = 1);
bool WideToUTF8(const std::wstring & wide_string, char * utf8, size_t utf8_len, int mode = 1);