fixed: in static size_t WideToInt(const wchar_t * wide_string, size_t string_len, int & z, bool & correct)

we didn't test UTF8_CheckRange() added: functions for converting from a wide string into an utf8 c-string: bool WideToUTF8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len, size_t & utf8_written, int mode = 1); bool WideToUTF8(const wchar_t * wide_string, char * utf8, size_t utf8_len, size_t & utf8_written, int mode = 1); bool WideToUTF8(const std::wstring & wide_string, char * utf8, size_t utf8_len, size_t & utf8_written, int mode = 1); bool WideToUTF8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len, int mode = 1); bool WideToUTF8(const wchar_t * wide_string, char * utf8, size_t utf8_len, int mode = 1); bool WideToUTF8(const std::wstring & wide_string, char * utf8, size_t utf8_len, int mode = 1); git-svn-id: svn://ttmath.org/publicrep/pikotools/trunk@962 e52654a7-88a9-db11-a3e9-0013d4bc506e
2014-10-04 11:18:33 +00:00
parent 28ea8f3c3e
commit 39717a4dd2
2 changed files with 299 additions and 7 deletions
@@ -5,7 +5,7 @@
 */

 /* 
- * Copyright (c) 2010-2012, Tomasz Sowa
+ * Copyright (c) 2010-2014, Tomasz Sowa
 * All rights reserved.
 * 
 * Redistribution and use in source and binary forms, with or without
@@ -426,7 +426,6 @@ return !was_error;



-
 /*!
 	this function converts one wide character into UTF-8 sequence

@@ -539,7 +538,6 @@ return len;



-
 /*
 	an auxiliary function for converting from wide characters to UTF-8
 	converting a wide character into one int
@@ -584,6 +582,7 @@ static size_t WideToInt(const wchar_t * wide_string, size_t string_len, int & z,
 	}
 	else
 	{
+		correct = UTF8_CheckRange(z);
 		return 1;
 	}
 }
@@ -616,6 +615,55 @@ return WideToInt(wide_string, min_str_len, z, correct);



+/*!
+	an auxiliary function for converting from wide characters to UTF-8
+
+	returns how many wide characters were used
+	if string_len is greater than 0 then the return value is always greater than zero too
+
+	utf8_written - how many characters were saved in the utf8 string (the string doesn't have
+	               a null terminating character)
+	               it can be equal to zero if the utf8 buffer is too small or there was an incorrect wide character read
+	was_utf8_buf_too_small -  will be true if the utf8 buffer is too small
+	               if this flag is true then utf8_written is equal to zero
+	was_error    - will be true if there is an error when converting (there was an incorrect wide character)
+	               (was_error will not be true if the utf8 buffer is too small)
+*/
+static size_t WideOneToUTF8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len,
+							size_t & utf8_written, bool & was_utf8_buf_too_small, bool & was_error, int mode)
+{
+int z;
+bool correct;
+size_t chars;
+
+	utf8_written = 0;
+	was_utf8_buf_too_small = false;
+	chars = WideToInt(wide_string, string_len, z, correct);
+
+	if( correct )
+	{
+		utf8_written = IntToUTF8(z, utf8, utf8_len);
+
+		if( utf8_written == 0 )
+			was_utf8_buf_too_small = true;
+	}
+	else
+	{
+		if( mode == 1 )
+		{
+			utf8_written = IntToUTF8(0xFFFD, utf8, utf8_len); // U+FFFD "replacement character"
+
+			if( utf8_written == 0 )
+				was_utf8_buf_too_small = true;
+		}
+
+		was_error = true;
+	}
+
+return chars;
+}
+
+

 /*!
 	an auxiliary function for converting from wide characters to UTF-8
@@ -725,7 +773,6 @@ return WideOneToUTF8(wide_string, min_str_len, utf8, was_error, mode);



-
 /*!
 	this function converts a wide string into UTF-8 string

@@ -842,6 +889,7 @@ return !was_error;
 }


+
 /*!
 	this function converts a wide string into UTF-8 stream

@@ -889,8 +937,244 @@ bool WideToUTF8(const std::wstring & wide_string, std::ostream & utf8, int mode)



+/*!
+	this function converts a wide string into UTF-8 stream
+
+	input:
+		wide_string - a wide string for converting
+		string_len  - lenght of the wide string
+		mode - what to do with errors when converting
+			0: skip an invalid character
+			1: put U+FFFD "replacement character" istead of the invalid character (default)
+
+	output:
+		utf8 - a buffer for the UTF-8 stream
+		utf8_len - the size of the buffer
+		utf8_written - how many bytes have been written to the buffer
+
+	this function returns false if there were some errors when converting or the output buffer was too small,
+	the output string is not null terminated
+
+	if there is an error when converting (there is an incorrect character in the wide string) the function
+	will continue converting but if the buffer is too small the function breaks immediately
+*/
+bool WideToUTF8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len, size_t & utf8_written, int mode)
+{
+bool was_error = false;
+bool was_buffer_to_small;
+size_t chars, utf8_saved;
+
+	utf8_written = 0;
+
+	while( string_len > 0 )
+	{
+		chars = WideOneToUTF8(wide_string, string_len, utf8, utf8_len, utf8_saved, was_buffer_to_small, was_error, mode);
+
+		if( was_buffer_to_small )
+		{
+			/*
+			 * if the buffer was too small break immediately
+			 * and set the was_error flag
+			 */
+			was_error = true;
+			break;
+		}
+
+		wide_string  += chars;
+		string_len   -= chars;
+		utf8         += utf8_saved;
+		utf8_len     -= utf8_saved;
+		utf8_written += utf8_saved;
+	}
+
+return !was_error;
+}
+
+
+
+/*!
+	this function converts a wide string (std::wstring) into UTF-8 stream
+
+	input:
+		wide_string - a wide string for converting
+		mode - what to do with errors when converting
+			0: skip an invalid character
+			1: put U+FFFD "replacement character" istead of the invalid character (default)
+
+	output:
+		utf8 - a buffer for the UTF-8 stream
+		utf8_len - the size of the buffer
+		utf8_written - how many bytes have been written to the buffer
+
+	this function returns false if there were some errors when converting or the output buffer was too small,
+	the output string is not null terminated
+
+	if there is an error when converting (there is an incorrect character in the wide string) the function
+	will continue converting but if the buffer is too small the function breaks immediately
+*/
+bool WideToUTF8(const std::wstring & wide_string, char * utf8, size_t utf8_len, size_t & utf8_written, int mode)
+{
+	return WideToUTF8(wide_string.c_str(), wide_string.size(), utf8, utf8_len, utf8_written, mode);
+}
+
+
+
+/*!
+	this function converts a wide string into UTF-8 stream
+
+	input:
+		wide_string - a wide string for converting
+		string_len  - lenght of the wide string
+		mode - what to do with errors when converting
+			0: skip an invalid character
+			1: put U+FFFD "replacement character" istead of the invalid character (default)
+
+	output:
+		utf8 - a buffer for the UTF-8 stream
+		utf8_len - the size of the buffer
+
+	this function returns false if there were some errors when converting or the output buffer was too small,
+	the output string is null terminated (even if there were errors during converting)
+
+	if there is an error when converting (there is an incorrect character in the wide string) the function
+	will continue converting but if the buffer is too small the function breaks immediately
+	(in both cases the utf8 buffer is null terminated)
+*/
+bool WideToUTF8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len, int mode)
+{
+size_t utf8_saved;
+bool res;
+
+	if( utf8_len == 0 )
+		return false;
+
+	res = WideToUTF8(wide_string, string_len, utf8, utf8_len - 1, utf8_saved, mode);
+	utf8[utf8_saved] = 0;
+
+return res;
+}
+
+
+
+/*!
+	this function converts a wide string (std::wstring) into UTF-8 stream
+
+	input:
+		wide_string - a wide string for converting
+		mode - what to do with errors when converting
+			0: skip an invalid character
+			1: put U+FFFD "replacement character" istead of the invalid character (default)
+
+	output:
+		utf8 - a buffer for the UTF-8 stream
+		utf8_len - the size of the buffer
+
+	this function returns false if there were some errors when converting or the output buffer was too small,
+	the output string is null terminated (even if there were errors during converting)
+
+	if there is an error when converting (there is an incorrect character in the wide string) the function
+	will continue converting but if the buffer is too small the function breaks immediately
+	(in both cases the utf8 buffer is null terminated)
+*/
+bool WideToUTF8(const std::wstring & wide_string, char * utf8, size_t utf8_len, int mode)
+{
+	return WideToUTF8(wide_string.c_str(), wide_string.size(), utf8, utf8_len, mode);
+}
+
+
+
+/*!
+	this function converts a wide string into UTF-8 stream
+
+	input:
+		wide_string - a null terminated wide string for converting
+		mode - what to do with errors when converting
+			0: skip an invalid character
+			1: put U+FFFD "replacement character" istead of the invalid character (default)
+
+	output:
+		utf8 - a buffer for the UTF-8 stream
+		utf8_len - the size of the buffer
+		utf8_written - how many bytes have been written to the buffer
+
+	this function returns false if there were some errors when converting or the output buffer was too small,
+	the output string is not null terminated
+
+	if there is an error when converting (there is an incorrect character in the wide string) the function
+	will continue converting but if the buffer is too small the function breaks immediately
+*/
+bool WideToUTF8(const wchar_t * wide_string, char * utf8, size_t utf8_len, size_t & utf8_written, int mode)
+{
+bool was_error = false;
+bool was_buffer_to_small;
+size_t chars, utf8_saved;
+size_t len;
+
+	utf8_written = 0;
+
+	while( *wide_string )
+	{
+		len = (*(wide_string+1) == 0) ? 1 : 2;
+		chars = WideOneToUTF8(wide_string, len, utf8, utf8_len, utf8_saved, was_buffer_to_small, was_error, mode);
+
+		if( was_buffer_to_small )
+		{
+			/*
+			 * if the buffer was too small break immediately
+			 * and set the was_error flag
+			 */
+			was_error = true;
+			break;
+		}
+
+		wide_string  += chars;
+		utf8         += utf8_saved;
+		utf8_len     -= utf8_saved;
+		utf8_written += utf8_saved;
+	}
+
+return !was_error;
+}
+
+
+
+/*!
+	this function converts a wide string into UTF-8 stream
+
+	input:
+		wide_string - a wide string for converting
+		mode - what to do with errors when converting
+			0: skip an invalid character
+			1: put U+FFFD "replacement character" istead of the invalid character (default)
+
+	output:
+		utf8 - a buffer for the UTF-8 stream
+		utf8_len - the size of the buffer
+
+	this function returns false if there were some errors when converting or the output buffer was too small,
+	the output string is null terminated (even if there were errors during converting)
+
+	if there is an error when converting (there is an incorrect character in the wide string) the function
+	will continue converting but if the buffer is too small the function breaks immediately
+	(in both cases the utf8 buffer is null terminated)
+*/
+bool WideToUTF8(const wchar_t * wide_string, char * utf8, size_t utf8_len, int mode)
+{
+size_t utf8_saved;
+bool res;
+
+	if( utf8_len == 0 )
+		return false;
+
+	res = WideToUTF8(wide_string, utf8, utf8_len - 1, utf8_saved, mode);
+	utf8[utf8_saved] = 0;
+
+return res;
+}
+
+
+
+

 } // namespace

-
-
@@ -5,7 +5,7 @@
 */

 /* 
- * Copyright (c) 2010-2012, Tomasz Sowa
+ * Copyright (c) 2010-2014, Tomasz Sowa
 * All rights reserved.
 * 
 * Redistribution and use in source and binary forms, with or without
@@ -97,10 +97,18 @@ size_t IntToUTF8(int z, std::ostream & utf8);
 bool WideToUTF8(const wchar_t * wide_string, size_t string_len, std::string & utf8,  bool clear = true, int mode = 1);
 bool WideToUTF8(const wchar_t * wide_string,                    std::string & utf8,  bool clear = true, int mode = 1);
 bool WideToUTF8(const std::wstring & wide_string,               std::string & utf8,  bool clear = true, int mode = 1);
+
 bool WideToUTF8(const wchar_t * wide_string, size_t string_len, std::ostream & utf8, int mode = 1);
 bool WideToUTF8(const wchar_t * wide_string,                    std::ostream & utf8, int mode = 1);
 bool WideToUTF8(const std::wstring & wide_string,               std::ostream & utf8, int mode = 1);

+bool WideToUTF8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len, size_t & utf8_written, int mode = 1);
+bool WideToUTF8(const wchar_t * wide_string,                    char * utf8, size_t utf8_len, size_t & utf8_written, int mode = 1);
+bool WideToUTF8(const std::wstring & wide_string,               char * utf8, size_t utf8_len, size_t & utf8_written, int mode = 1);
+
+bool WideToUTF8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len, int mode = 1);
+bool WideToUTF8(const wchar_t * wide_string,                    char * utf8, size_t utf8_len, int mode = 1);
+bool WideToUTF8(const std::wstring & wide_string,               char * utf8, size_t utf8_len, int mode = 1);