fixed: in static size_t WideToInt(const wchar_t * wide_string, size_t string_len, int & z, bool & correct)

we didn't test UTF8_CheckRange() added: functions for converting from a wide string into an utf8 c-string: bool WideToUTF8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len, size_t & utf8_written, int mode = 1); bool WideToUTF8(const wchar_t * wide_string, char * utf8, size_t utf8_len, size_t & utf8_written, int mode = 1); bool WideToUTF8(const std::wstring & wide_string, char * utf8, size_t utf8_len, size_t & utf8_written, int mode = 1); bool WideToUTF8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len, int mode = 1); bool WideToUTF8(const wchar_t * wide_string, char * utf8, size_t utf8_len, int mode = 1); bool WideToUTF8(const std::wstring & wide_string, char * utf8, size_t utf8_len, int mode = 1); git-svn-id: svn://ttmath.org/publicrep/pikotools/trunk@962 e52654a7-88a9-db11-a3e9-0013d4bc506e
2014-10-04 11:18:33 +00:00
parent 28ea8f3c3e
commit 39717a4dd2
2 changed files with 299 additions and 7 deletions
--- a/utf8/utf8.cpp
+++ b/utf8/utf8.cpp
@@ -5,7 +5,7 @@
 */
 /* 
- * Copyright (c) 2010-2012, Tomasz Sowa
+ * Copyright (c) 2010-2014, Tomasz Sowa
 * All rights reserved.
 * 
 * Redistribution and use in source and binary forms, with or without
@@ -426,7 +426,6 @@ return !was_error;
 /*!
 	this function converts one wide character into UTF-8 sequence
@@ -539,7 +538,6 @@ return len;
 /*
 	an auxiliary function for converting from wide characters to UTF-8
 	converting a wide character into one int
@@ -584,6 +582,7 @@ static size_t WideToInt(const wchar_t * wide_string, size_t string_len, int & z,
 	}
 	else
 	{
 		correct = UTF8_CheckRange(z);
 		return 1;
 	}
 }
@@ -616,6 +615,55 @@ return WideToInt(wide_string, min_str_len, z, correct);
 /*!
 	an auxiliary function for converting from wide characters to UTF-8
 	returns how many wide characters were used
 	if string_len is greater than 0 then the return value is always greater than zero too
 	utf8_written - how many characters were saved in the utf8 string (the string doesn't have
 	               a null terminating character)
 	               it can be equal to zero if the utf8 buffer is too small or there was an incorrect wide character read
 	was_utf8_buf_too_small -  will be true if the utf8 buffer is too small
 	               if this flag is true then utf8_written is equal to zero
 	was_error    - will be true if there is an error when converting (there was an incorrect wide character)
 	               (was_error will not be true if the utf8 buffer is too small)
 */
 static size_t WideOneToUTF8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len,
 							size_t & utf8_written, bool & was_utf8_buf_too_small, bool & was_error, int mode)
 {
 int z;
 bool correct;
 size_t chars;
 	utf8_written = 0;
 	was_utf8_buf_too_small = false;
 	chars = WideToInt(wide_string, string_len, z, correct);
 	if( correct )
 	{
 		utf8_written = IntToUTF8(z, utf8, utf8_len);
 		if( utf8_written == 0 )
 			was_utf8_buf_too_small = true;
 	}
 	else
 	{
 		if( mode == 1 )
 		{
 			utf8_written = IntToUTF8(0xFFFD, utf8, utf8_len); // U+FFFD "replacement character"
 			if( utf8_written == 0 )
 				was_utf8_buf_too_small = true;
 		}
 		was_error = true;
 	}
 return chars;
 }
 /*!
 	an auxiliary function for converting from wide characters to UTF-8
@@ -725,7 +773,6 @@ return WideOneToUTF8(wide_string, min_str_len, utf8, was_error, mode);
 /*!
 	this function converts a wide string into UTF-8 string
@@ -842,6 +889,7 @@ return !was_error;
 }
 /*!
 	this function converts a wide string into UTF-8 stream
@@ -889,8 +937,244 @@ bool WideToUTF8(const std::wstring & wide_string, std::ostream & utf8, int mode)
 /*!
 	this function converts a wide string into UTF-8 stream
 	input:
 		wide_string - a wide string for converting
 		string_len  - lenght of the wide string
 		mode - what to do with errors when converting
 			0: skip an invalid character
 			1: put U+FFFD "replacement character" istead of the invalid character (default)
 	output:
 		utf8 - a buffer for the UTF-8 stream
 		utf8_len - the size of the buffer
 		utf8_written - how many bytes have been written to the buffer
 	this function returns false if there were some errors when converting or the output buffer was too small,
 	the output string is not null terminated
 	if there is an error when converting (there is an incorrect character in the wide string) the function
 	will continue converting but if the buffer is too small the function breaks immediately
 */
 bool WideToUTF8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len, size_t & utf8_written, int mode)
 {
 bool was_error = false;
 bool was_buffer_to_small;
 size_t chars, utf8_saved;
 	utf8_written = 0;
 	while( string_len > 0 )
 	{
 		chars = WideOneToUTF8(wide_string, string_len, utf8, utf8_len, utf8_saved, was_buffer_to_small, was_error, mode);
 		if( was_buffer_to_small )
 		{
 			/*
 			 * if the buffer was too small break immediately
 			 * and set the was_error flag
 			 */
 			was_error = true;
 			break;
 		}
 		wide_string  += chars;
 		string_len   -= chars;
 		utf8         += utf8_saved;
 		utf8_len     -= utf8_saved;
 		utf8_written += utf8_saved;
 	}
 return !was_error;
 }
 /*!
 	this function converts a wide string (std::wstring) into UTF-8 stream
 	input:
 		wide_string - a wide string for converting
 		mode - what to do with errors when converting
 			0: skip an invalid character
 			1: put U+FFFD "replacement character" istead of the invalid character (default)
 	output:
 		utf8 - a buffer for the UTF-8 stream
 		utf8_len - the size of the buffer
 		utf8_written - how many bytes have been written to the buffer
 	this function returns false if there were some errors when converting or the output buffer was too small,
 	the output string is not null terminated
 	if there is an error when converting (there is an incorrect character in the wide string) the function
 	will continue converting but if the buffer is too small the function breaks immediately
 */
 bool WideToUTF8(const std::wstring & wide_string, char * utf8, size_t utf8_len, size_t & utf8_written, int mode)
 {
 	return WideToUTF8(wide_string.c_str(), wide_string.size(), utf8, utf8_len, utf8_written, mode);
 }
 /*!
 	this function converts a wide string into UTF-8 stream
 	input:
 		wide_string - a wide string for converting
 		string_len  - lenght of the wide string
 		mode - what to do with errors when converting
 			0: skip an invalid character
 			1: put U+FFFD "replacement character" istead of the invalid character (default)
 	output:
 		utf8 - a buffer for the UTF-8 stream
 		utf8_len - the size of the buffer
 	this function returns false if there were some errors when converting or the output buffer was too small,
 	the output string is null terminated (even if there were errors during converting)
 	if there is an error when converting (there is an incorrect character in the wide string) the function
 	will continue converting but if the buffer is too small the function breaks immediately
 	(in both cases the utf8 buffer is null terminated)
 */
 bool WideToUTF8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len, int mode)
 {
 size_t utf8_saved;
 bool res;
 	if( utf8_len == 0 )
 		return false;
 	res = WideToUTF8(wide_string, string_len, utf8, utf8_len - 1, utf8_saved, mode);
 	utf8[utf8_saved] = 0;
 return res;
 }
 /*!
 	this function converts a wide string (std::wstring) into UTF-8 stream
 	input:
 		wide_string - a wide string for converting
 		mode - what to do with errors when converting
 			0: skip an invalid character
 			1: put U+FFFD "replacement character" istead of the invalid character (default)
 	output:
 		utf8 - a buffer for the UTF-8 stream
 		utf8_len - the size of the buffer
 	this function returns false if there were some errors when converting or the output buffer was too small,
 	the output string is null terminated (even if there were errors during converting)
 	if there is an error when converting (there is an incorrect character in the wide string) the function
 	will continue converting but if the buffer is too small the function breaks immediately
 	(in both cases the utf8 buffer is null terminated)
 */
 bool WideToUTF8(const std::wstring & wide_string, char * utf8, size_t utf8_len, int mode)
 {
 	return WideToUTF8(wide_string.c_str(), wide_string.size(), utf8, utf8_len, mode);
 }
 /*!
 	this function converts a wide string into UTF-8 stream
 	input:
 		wide_string - a null terminated wide string for converting
 		mode - what to do with errors when converting
 			0: skip an invalid character
 			1: put U+FFFD "replacement character" istead of the invalid character (default)
 	output:
 		utf8 - a buffer for the UTF-8 stream
 		utf8_len - the size of the buffer
 		utf8_written - how many bytes have been written to the buffer
 	this function returns false if there were some errors when converting or the output buffer was too small,
 	the output string is not null terminated
 	if there is an error when converting (there is an incorrect character in the wide string) the function
 	will continue converting but if the buffer is too small the function breaks immediately
 */
 bool WideToUTF8(const wchar_t * wide_string, char * utf8, size_t utf8_len, size_t & utf8_written, int mode)
 {
 bool was_error = false;
 bool was_buffer_to_small;
 size_t chars, utf8_saved;
 size_t len;
 	utf8_written = 0;
 	while( *wide_string )
 	{
 		len = (*(wide_string+1) == 0) ? 1 : 2;
 		chars = WideOneToUTF8(wide_string, len, utf8, utf8_len, utf8_saved, was_buffer_to_small, was_error, mode);
 		if( was_buffer_to_small )
 		{
 			/*
 			 * if the buffer was too small break immediately
 			 * and set the was_error flag
 			 */
 			was_error = true;
 			break;
 		}
 		wide_string  += chars;
 		utf8         += utf8_saved;
 		utf8_len     -= utf8_saved;
 		utf8_written += utf8_saved;
 	}
 return !was_error;
 }
 /*!
 	this function converts a wide string into UTF-8 stream
 	input:
 		wide_string - a wide string for converting
 		mode - what to do with errors when converting
 			0: skip an invalid character
 			1: put U+FFFD "replacement character" istead of the invalid character (default)
 	output:
 		utf8 - a buffer for the UTF-8 stream
 		utf8_len - the size of the buffer
 	this function returns false if there were some errors when converting or the output buffer was too small,
 	the output string is null terminated (even if there were errors during converting)
 	if there is an error when converting (there is an incorrect character in the wide string) the function
 	will continue converting but if the buffer is too small the function breaks immediately
 	(in both cases the utf8 buffer is null terminated)
 */
 bool WideToUTF8(const wchar_t * wide_string, char * utf8, size_t utf8_len, int mode)
 {
 size_t utf8_saved;
 bool res;
 	if( utf8_len == 0 )
 		return false;
 	res = WideToUTF8(wide_string, utf8, utf8_len - 1, utf8_saved, mode);
 	utf8[utf8_saved] = 0;
 return res;
 }
 } // namespace
--- a/utf8/utf8.h
+++ b/utf8/utf8.h
@@ -5,7 +5,7 @@
 */
 /* 
- * Copyright (c) 2010-2012, Tomasz Sowa
+ * Copyright (c) 2010-2014, Tomasz Sowa
 * All rights reserved.
 * 
 * Redistribution and use in source and binary forms, with or without
@@ -97,10 +97,18 @@ size_t IntToUTF8(int z, std::ostream & utf8);
 bool WideToUTF8(const wchar_t * wide_string, size_t string_len, std::string & utf8,  bool clear = true, int mode = 1);
 bool WideToUTF8(const wchar_t * wide_string,                    std::string & utf8,  bool clear = true, int mode = 1);
 bool WideToUTF8(const std::wstring & wide_string,               std::string & utf8,  bool clear = true, int mode = 1);
 bool WideToUTF8(const wchar_t * wide_string, size_t string_len, std::ostream & utf8, int mode = 1);
 bool WideToUTF8(const wchar_t * wide_string,                    std::ostream & utf8, int mode = 1);
 bool WideToUTF8(const std::wstring & wide_string,               std::ostream & utf8, int mode = 1);
 bool WideToUTF8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len, size_t & utf8_written, int mode = 1);
 bool WideToUTF8(const wchar_t * wide_string,                    char * utf8, size_t utf8_len, size_t & utf8_written, int mode = 1);
 bool WideToUTF8(const std::wstring & wide_string,               char * utf8, size_t utf8_len, size_t & utf8_written, int mode = 1);
 bool WideToUTF8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len, int mode = 1);
 bool WideToUTF8(const wchar_t * wide_string,                    char * utf8, size_t utf8_len, int mode = 1);
 bool WideToUTF8(const std::wstring & wide_string,               char * utf8, size_t utf8_len, int mode = 1);