fixed: in static size_t WideToInt(const wchar_t * wide_string, size_t string_len, int & z, bool & correct)
we didn't test UTF8_CheckRange() added: functions for converting from a wide string into an utf8 c-string: bool WideToUTF8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len, size_t & utf8_written, int mode = 1); bool WideToUTF8(const wchar_t * wide_string, char * utf8, size_t utf8_len, size_t & utf8_written, int mode = 1); bool WideToUTF8(const std::wstring & wide_string, char * utf8, size_t utf8_len, size_t & utf8_written, int mode = 1); bool WideToUTF8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len, int mode = 1); bool WideToUTF8(const wchar_t * wide_string, char * utf8, size_t utf8_len, int mode = 1); bool WideToUTF8(const std::wstring & wide_string, char * utf8, size_t utf8_len, int mode = 1); git-svn-id: svn://ttmath.org/publicrep/pikotools/trunk@962 e52654a7-88a9-db11-a3e9-0013d4bc506e
This commit is contained in:
parent
28ea8f3c3e
commit
39717a4dd2
296
utf8/utf8.cpp
296
utf8/utf8.cpp
|
@ -5,7 +5,7 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2010-2012, Tomasz Sowa
|
* Copyright (c) 2010-2014, Tomasz Sowa
|
||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -426,7 +426,6 @@ return !was_error;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
this function converts one wide character into UTF-8 sequence
|
this function converts one wide character into UTF-8 sequence
|
||||||
|
|
||||||
|
@ -539,7 +538,6 @@ return len;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
an auxiliary function for converting from wide characters to UTF-8
|
an auxiliary function for converting from wide characters to UTF-8
|
||||||
converting a wide character into one int
|
converting a wide character into one int
|
||||||
|
@ -584,6 +582,7 @@ static size_t WideToInt(const wchar_t * wide_string, size_t string_len, int & z,
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
correct = UTF8_CheckRange(z);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -616,6 +615,55 @@ return WideToInt(wide_string, min_str_len, z, correct);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*!
|
||||||
|
an auxiliary function for converting from wide characters to UTF-8
|
||||||
|
|
||||||
|
returns how many wide characters were used
|
||||||
|
if string_len is greater than 0 then the return value is always greater than zero too
|
||||||
|
|
||||||
|
utf8_written - how many characters were saved in the utf8 string (the string doesn't have
|
||||||
|
a null terminating character)
|
||||||
|
it can be equal to zero if the utf8 buffer is too small or there was an incorrect wide character read
|
||||||
|
was_utf8_buf_too_small - will be true if the utf8 buffer is too small
|
||||||
|
if this flag is true then utf8_written is equal to zero
|
||||||
|
was_error - will be true if there is an error when converting (there was an incorrect wide character)
|
||||||
|
(was_error will not be true if the utf8 buffer is too small)
|
||||||
|
*/
|
||||||
|
static size_t WideOneToUTF8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len,
|
||||||
|
size_t & utf8_written, bool & was_utf8_buf_too_small, bool & was_error, int mode)
|
||||||
|
{
|
||||||
|
int z;
|
||||||
|
bool correct;
|
||||||
|
size_t chars;
|
||||||
|
|
||||||
|
utf8_written = 0;
|
||||||
|
was_utf8_buf_too_small = false;
|
||||||
|
chars = WideToInt(wide_string, string_len, z, correct);
|
||||||
|
|
||||||
|
if( correct )
|
||||||
|
{
|
||||||
|
utf8_written = IntToUTF8(z, utf8, utf8_len);
|
||||||
|
|
||||||
|
if( utf8_written == 0 )
|
||||||
|
was_utf8_buf_too_small = true;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if( mode == 1 )
|
||||||
|
{
|
||||||
|
utf8_written = IntToUTF8(0xFFFD, utf8, utf8_len); // U+FFFD "replacement character"
|
||||||
|
|
||||||
|
if( utf8_written == 0 )
|
||||||
|
was_utf8_buf_too_small = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
was_error = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return chars;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
an auxiliary function for converting from wide characters to UTF-8
|
an auxiliary function for converting from wide characters to UTF-8
|
||||||
|
@ -725,7 +773,6 @@ return WideOneToUTF8(wide_string, min_str_len, utf8, was_error, mode);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
this function converts a wide string into UTF-8 string
|
this function converts a wide string into UTF-8 string
|
||||||
|
|
||||||
|
@ -842,6 +889,7 @@ return !was_error;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
this function converts a wide string into UTF-8 stream
|
this function converts a wide string into UTF-8 stream
|
||||||
|
|
||||||
|
@ -889,8 +937,244 @@ bool WideToUTF8(const std::wstring & wide_string, std::ostream & utf8, int mode)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*!
|
||||||
|
this function converts a wide string into UTF-8 stream
|
||||||
|
|
||||||
|
input:
|
||||||
|
wide_string - a wide string for converting
|
||||||
|
string_len - lenght of the wide string
|
||||||
|
mode - what to do with errors when converting
|
||||||
|
0: skip an invalid character
|
||||||
|
1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||||
|
|
||||||
|
output:
|
||||||
|
utf8 - a buffer for the UTF-8 stream
|
||||||
|
utf8_len - the size of the buffer
|
||||||
|
utf8_written - how many bytes have been written to the buffer
|
||||||
|
|
||||||
|
this function returns false if there were some errors when converting or the output buffer was too small,
|
||||||
|
the output string is not null terminated
|
||||||
|
|
||||||
|
if there is an error when converting (there is an incorrect character in the wide string) the function
|
||||||
|
will continue converting but if the buffer is too small the function breaks immediately
|
||||||
|
*/
|
||||||
|
bool WideToUTF8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len, size_t & utf8_written, int mode)
|
||||||
|
{
|
||||||
|
bool was_error = false;
|
||||||
|
bool was_buffer_to_small;
|
||||||
|
size_t chars, utf8_saved;
|
||||||
|
|
||||||
|
utf8_written = 0;
|
||||||
|
|
||||||
|
while( string_len > 0 )
|
||||||
|
{
|
||||||
|
chars = WideOneToUTF8(wide_string, string_len, utf8, utf8_len, utf8_saved, was_buffer_to_small, was_error, mode);
|
||||||
|
|
||||||
|
if( was_buffer_to_small )
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* if the buffer was too small break immediately
|
||||||
|
* and set the was_error flag
|
||||||
|
*/
|
||||||
|
was_error = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
wide_string += chars;
|
||||||
|
string_len -= chars;
|
||||||
|
utf8 += utf8_saved;
|
||||||
|
utf8_len -= utf8_saved;
|
||||||
|
utf8_written += utf8_saved;
|
||||||
|
}
|
||||||
|
|
||||||
|
return !was_error;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*!
|
||||||
|
this function converts a wide string (std::wstring) into UTF-8 stream
|
||||||
|
|
||||||
|
input:
|
||||||
|
wide_string - a wide string for converting
|
||||||
|
mode - what to do with errors when converting
|
||||||
|
0: skip an invalid character
|
||||||
|
1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||||
|
|
||||||
|
output:
|
||||||
|
utf8 - a buffer for the UTF-8 stream
|
||||||
|
utf8_len - the size of the buffer
|
||||||
|
utf8_written - how many bytes have been written to the buffer
|
||||||
|
|
||||||
|
this function returns false if there were some errors when converting or the output buffer was too small,
|
||||||
|
the output string is not null terminated
|
||||||
|
|
||||||
|
if there is an error when converting (there is an incorrect character in the wide string) the function
|
||||||
|
will continue converting but if the buffer is too small the function breaks immediately
|
||||||
|
*/
|
||||||
|
bool WideToUTF8(const std::wstring & wide_string, char * utf8, size_t utf8_len, size_t & utf8_written, int mode)
|
||||||
|
{
|
||||||
|
return WideToUTF8(wide_string.c_str(), wide_string.size(), utf8, utf8_len, utf8_written, mode);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*!
|
||||||
|
this function converts a wide string into UTF-8 stream
|
||||||
|
|
||||||
|
input:
|
||||||
|
wide_string - a wide string for converting
|
||||||
|
string_len - lenght of the wide string
|
||||||
|
mode - what to do with errors when converting
|
||||||
|
0: skip an invalid character
|
||||||
|
1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||||
|
|
||||||
|
output:
|
||||||
|
utf8 - a buffer for the UTF-8 stream
|
||||||
|
utf8_len - the size of the buffer
|
||||||
|
|
||||||
|
this function returns false if there were some errors when converting or the output buffer was too small,
|
||||||
|
the output string is null terminated (even if there were errors during converting)
|
||||||
|
|
||||||
|
if there is an error when converting (there is an incorrect character in the wide string) the function
|
||||||
|
will continue converting but if the buffer is too small the function breaks immediately
|
||||||
|
(in both cases the utf8 buffer is null terminated)
|
||||||
|
*/
|
||||||
|
bool WideToUTF8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len, int mode)
|
||||||
|
{
|
||||||
|
size_t utf8_saved;
|
||||||
|
bool res;
|
||||||
|
|
||||||
|
if( utf8_len == 0 )
|
||||||
|
return false;
|
||||||
|
|
||||||
|
res = WideToUTF8(wide_string, string_len, utf8, utf8_len - 1, utf8_saved, mode);
|
||||||
|
utf8[utf8_saved] = 0;
|
||||||
|
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*!
|
||||||
|
this function converts a wide string (std::wstring) into UTF-8 stream
|
||||||
|
|
||||||
|
input:
|
||||||
|
wide_string - a wide string for converting
|
||||||
|
mode - what to do with errors when converting
|
||||||
|
0: skip an invalid character
|
||||||
|
1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||||
|
|
||||||
|
output:
|
||||||
|
utf8 - a buffer for the UTF-8 stream
|
||||||
|
utf8_len - the size of the buffer
|
||||||
|
|
||||||
|
this function returns false if there were some errors when converting or the output buffer was too small,
|
||||||
|
the output string is null terminated (even if there were errors during converting)
|
||||||
|
|
||||||
|
if there is an error when converting (there is an incorrect character in the wide string) the function
|
||||||
|
will continue converting but if the buffer is too small the function breaks immediately
|
||||||
|
(in both cases the utf8 buffer is null terminated)
|
||||||
|
*/
|
||||||
|
bool WideToUTF8(const std::wstring & wide_string, char * utf8, size_t utf8_len, int mode)
|
||||||
|
{
|
||||||
|
return WideToUTF8(wide_string.c_str(), wide_string.size(), utf8, utf8_len, mode);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*!
|
||||||
|
this function converts a wide string into UTF-8 stream
|
||||||
|
|
||||||
|
input:
|
||||||
|
wide_string - a null terminated wide string for converting
|
||||||
|
mode - what to do with errors when converting
|
||||||
|
0: skip an invalid character
|
||||||
|
1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||||
|
|
||||||
|
output:
|
||||||
|
utf8 - a buffer for the UTF-8 stream
|
||||||
|
utf8_len - the size of the buffer
|
||||||
|
utf8_written - how many bytes have been written to the buffer
|
||||||
|
|
||||||
|
this function returns false if there were some errors when converting or the output buffer was too small,
|
||||||
|
the output string is not null terminated
|
||||||
|
|
||||||
|
if there is an error when converting (there is an incorrect character in the wide string) the function
|
||||||
|
will continue converting but if the buffer is too small the function breaks immediately
|
||||||
|
*/
|
||||||
|
bool WideToUTF8(const wchar_t * wide_string, char * utf8, size_t utf8_len, size_t & utf8_written, int mode)
|
||||||
|
{
|
||||||
|
bool was_error = false;
|
||||||
|
bool was_buffer_to_small;
|
||||||
|
size_t chars, utf8_saved;
|
||||||
|
size_t len;
|
||||||
|
|
||||||
|
utf8_written = 0;
|
||||||
|
|
||||||
|
while( *wide_string )
|
||||||
|
{
|
||||||
|
len = (*(wide_string+1) == 0) ? 1 : 2;
|
||||||
|
chars = WideOneToUTF8(wide_string, len, utf8, utf8_len, utf8_saved, was_buffer_to_small, was_error, mode);
|
||||||
|
|
||||||
|
if( was_buffer_to_small )
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* if the buffer was too small break immediately
|
||||||
|
* and set the was_error flag
|
||||||
|
*/
|
||||||
|
was_error = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
wide_string += chars;
|
||||||
|
utf8 += utf8_saved;
|
||||||
|
utf8_len -= utf8_saved;
|
||||||
|
utf8_written += utf8_saved;
|
||||||
|
}
|
||||||
|
|
||||||
|
return !was_error;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*!
|
||||||
|
this function converts a wide string into UTF-8 stream
|
||||||
|
|
||||||
|
input:
|
||||||
|
wide_string - a wide string for converting
|
||||||
|
mode - what to do with errors when converting
|
||||||
|
0: skip an invalid character
|
||||||
|
1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||||
|
|
||||||
|
output:
|
||||||
|
utf8 - a buffer for the UTF-8 stream
|
||||||
|
utf8_len - the size of the buffer
|
||||||
|
|
||||||
|
this function returns false if there were some errors when converting or the output buffer was too small,
|
||||||
|
the output string is null terminated (even if there were errors during converting)
|
||||||
|
|
||||||
|
if there is an error when converting (there is an incorrect character in the wide string) the function
|
||||||
|
will continue converting but if the buffer is too small the function breaks immediately
|
||||||
|
(in both cases the utf8 buffer is null terminated)
|
||||||
|
*/
|
||||||
|
bool WideToUTF8(const wchar_t * wide_string, char * utf8, size_t utf8_len, int mode)
|
||||||
|
{
|
||||||
|
size_t utf8_saved;
|
||||||
|
bool res;
|
||||||
|
|
||||||
|
if( utf8_len == 0 )
|
||||||
|
return false;
|
||||||
|
|
||||||
|
res = WideToUTF8(wide_string, utf8, utf8_len - 1, utf8_saved, mode);
|
||||||
|
utf8[utf8_saved] = 0;
|
||||||
|
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
10
utf8/utf8.h
10
utf8/utf8.h
|
@ -5,7 +5,7 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2010-2012, Tomasz Sowa
|
* Copyright (c) 2010-2014, Tomasz Sowa
|
||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -97,10 +97,18 @@ size_t IntToUTF8(int z, std::ostream & utf8);
|
||||||
bool WideToUTF8(const wchar_t * wide_string, size_t string_len, std::string & utf8, bool clear = true, int mode = 1);
|
bool WideToUTF8(const wchar_t * wide_string, size_t string_len, std::string & utf8, bool clear = true, int mode = 1);
|
||||||
bool WideToUTF8(const wchar_t * wide_string, std::string & utf8, bool clear = true, int mode = 1);
|
bool WideToUTF8(const wchar_t * wide_string, std::string & utf8, bool clear = true, int mode = 1);
|
||||||
bool WideToUTF8(const std::wstring & wide_string, std::string & utf8, bool clear = true, int mode = 1);
|
bool WideToUTF8(const std::wstring & wide_string, std::string & utf8, bool clear = true, int mode = 1);
|
||||||
|
|
||||||
bool WideToUTF8(const wchar_t * wide_string, size_t string_len, std::ostream & utf8, int mode = 1);
|
bool WideToUTF8(const wchar_t * wide_string, size_t string_len, std::ostream & utf8, int mode = 1);
|
||||||
bool WideToUTF8(const wchar_t * wide_string, std::ostream & utf8, int mode = 1);
|
bool WideToUTF8(const wchar_t * wide_string, std::ostream & utf8, int mode = 1);
|
||||||
bool WideToUTF8(const std::wstring & wide_string, std::ostream & utf8, int mode = 1);
|
bool WideToUTF8(const std::wstring & wide_string, std::ostream & utf8, int mode = 1);
|
||||||
|
|
||||||
|
bool WideToUTF8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len, size_t & utf8_written, int mode = 1);
|
||||||
|
bool WideToUTF8(const wchar_t * wide_string, char * utf8, size_t utf8_len, size_t & utf8_written, int mode = 1);
|
||||||
|
bool WideToUTF8(const std::wstring & wide_string, char * utf8, size_t utf8_len, size_t & utf8_written, int mode = 1);
|
||||||
|
|
||||||
|
bool WideToUTF8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len, int mode = 1);
|
||||||
|
bool WideToUTF8(const wchar_t * wide_string, char * utf8, size_t utf8_len, int mode = 1);
|
||||||
|
bool WideToUTF8(const std::wstring & wide_string, char * utf8, size_t utf8_len, int mode = 1);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue