reorganization in utf8

- utf8 auxiliary functions moved to utf8_private.h file - in utf8.h are shown only functions available for consumers - template functions has been moved to utf8_template.h (in utf8.h are only declarations) utf8_template.h is included at the end of utf8.h - functions which take std::ostream changed to template (the stream is a template argument now)
2021-03-15 19:34:51 +01:00
parent effe9be0a3
commit fac3a7eb71
5 changed files with 845 additions and 606 deletions
@@ -5,7 +5,7 @@
 */
 /* 
- * Copyright (c) 2010-2018, Tomasz Sowa
+ * Copyright (c) 2010-2021, Tomasz Sowa
 * All rights reserved.
 * 
 * Redistribution and use in source and binary forms, with or without
@@ -36,6 +36,7 @@
 */
 #include "utf8.h"
 #include "utf8_private.h"
@@ -44,48 +45,6 @@ namespace PT
 /*!
 	an auxiliary function for converting from UTF-8 string
 */
 static bool UTF8ToInt_FirstOctet(unsigned char uz, size_t & len, int & res)
 {
 	for(len=0 ; (uz & 0x80) != 0 ; ++len)
 		uz <<= 1;
 	if( len == 1 )
 		return false;
 	res = uz;
 	if( len > 0 )
 		res >>= len;
 	if( res == 0 )
 		return false;
 	if( len == 0 )
 		len = 1;
 return true;
 }
 /*!
 	an auxiliary function for converting from UTF-8 string
 */
 static bool UTF8ToInt_AddNextOctet(unsigned char uz, int & res)
 {
 	if( (uz & 0xc0) != 0x80 )
 		return false;
 	res <<= 6;
 	res |= (uz & 0x3F);
 return true;
 }
 /*!
@@ -126,15 +85,17 @@ size_t i, len;
 	if( utf8_len == 0 )
 		return 0;
-	if( !UTF8ToInt_FirstOctet(utf8[0], len, res) )
+	if( !private_namespace::UTF8ToInt_FirstOctet(utf8[0], len, res) )
 		return 1;
 	if( utf8_len < len )
 		return utf8_len;
 	for(i=1 ; i<len ; ++i)
-		if( !UTF8ToInt_AddNextOctet(utf8[i], res) )
+	{
 		if( !private_namespace::UTF8ToInt_AddNextOctet(utf8[i], res) )
 			return i;
 	}
 	if( UTF8_CheckRange(res) )
 		correct = true;
@@ -168,7 +129,7 @@ size_t i, len;
 	if( *utf8 == 0 )
 		return 0;
-	if( !UTF8ToInt_FirstOctet(utf8[0], len, res) )
+	if( !private_namespace::UTF8ToInt_FirstOctet(utf8[0], len, res) )
 		return 1;
 	for(i=1 ; i<len ; ++i)
@@ -176,7 +137,7 @@ size_t i, len;
 		if( utf8[i] == 0 )
 			return i;
-		if( !UTF8ToInt_AddNextOctet(utf8[i], res) )
+		if( !private_namespace::UTF8ToInt_AddNextOctet(utf8[i], res) )
 			return i;
 	}
@@ -235,7 +196,7 @@ unsigned char uz;
 	if( !utf8 )
 		return 0;
-	if( !UTF8ToInt_FirstOctet(uz, len, res) )
+	if( !private_namespace::UTF8ToInt_FirstOctet(uz, len, res) )
 		return 1;
 	for(i=1 ; i<len ; ++i)
@@ -245,7 +206,7 @@ unsigned char uz;
 		if( !utf8 )
 			return i;
-		if( !UTF8ToInt_AddNextOctet(uz, res) )
+		if( !private_namespace::UTF8ToInt_AddNextOctet(uz, res) )
 			return i;
 	}
@@ -485,268 +446,6 @@ return len;
 /*!
 	this function converts one wide character into UTF-8 stream
 	input:
 		z - wide character
 	output:
 		utf8 - a UTF-8 stream for the output sequence
 	the function returns how many characters have been written to the utf8 stream,
 	zero means that 'z' is an incorrect unicode character
 */
 size_t IntToUTF8(int z, std::ostream & utf8)
 {
 char buf[10];
 	size_t len = IntToUTF8(z, buf, sizeof(buf)/sizeof(char));
 	size_t i;
 	for(i=0 ; i<len ; ++i)
 		utf8 << buf[i];
 return len;
 }
 /*
 	an auxiliary function for converting from wide characters to UTF-8
 	converting a wide character into one int
 	returns how many wide characters were used
 	if string_len is greater than 0 then the return value is always greater than zero too
 */
 static size_t WideToInt(const wchar_t * wide_string, size_t string_len, int & z, bool & correct)
 {
 	if( string_len == 0 )
 	{
 		z = 0;
 		correct = false;
 		return 0;
 	}
 	z = static_cast<int>(*wide_string);
 	correct = true;
 	if( sizeof(wchar_t) == 2 && (z>=0xD800 && z<=0xDFFF) )
 	{
 		if( z>=0xD800 && z<=0xDBFF && string_len>1 )
 		{
 			int z2 = *(wide_string+1);
 			if( z2>=0xDC00 && z2<=0xDFFF )
 			{
 				z = 0x10000 + (((z & 0x3FF) << 10) | (z2 & 0x3FF));
 				return 2;
 			}
 			else
 			{
 				correct = false;
 				return 2;
 			}
 		}
 		else
 		{
 			correct = false;
 			return 1;
 		}
 	}
 	else
 	{
 		correct = UTF8_CheckRange(z);
 		return 1;
 	}
 }
 /*
 	an auxiliary function for converting from wide characters to UTF-8
 	converting a wide character into one int
 	returns how many wide characters were used
 	if wide_string has at least one character then the return value is always greater than zero too
 */
 static size_t WideToInt(const wchar_t * wide_string, int & z, bool & correct)
 {
 size_t min_str_len = 1;
 	if( *wide_string == 0 )
 	{
 		z = 0;
 		correct = false;
 		return 0;
 	}
 	if( *(wide_string+1) != 0 )
 		min_str_len = 2;
 return WideToInt(wide_string, min_str_len, z, correct);
 }
 /*!
 	an auxiliary function for converting from wide characters to UTF-8
 	returns how many wide characters were used
 	if string_len is greater than 0 then the return value is always greater than zero too
 	utf8_written - how many characters were saved in the utf8 string (the string doesn't have
 	               a null terminating character)
 	               it can be equal to zero if the utf8 buffer is too small or there was an incorrect wide character read
 	was_utf8_buf_too_small -  will be true if the utf8 buffer is too small
 	               if this flag is true then utf8_written is equal to zero
 	was_error    - will be true if there is an error when converting (there was an incorrect wide character)
 	               (was_error will not be true if the utf8 buffer is too small)
 */
 static size_t WideOneToUTF8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len,
 							size_t & utf8_written, bool & was_utf8_buf_too_small, bool & was_error, int mode)
 {
 int z;
 bool correct;
 size_t chars;
 	utf8_written = 0;
 	was_utf8_buf_too_small = false;
 	chars = WideToInt(wide_string, string_len, z, correct);
 	if( correct )
 	{
 		utf8_written = IntToUTF8(z, utf8, utf8_len);
 		if( utf8_written == 0 )
 			was_utf8_buf_too_small = true;
 	}
 	else
 	{
 		if( mode == 1 )
 		{
 			utf8_written = IntToUTF8(0xFFFD, utf8, utf8_len); // U+FFFD "replacement character"
 			if( utf8_written == 0 )
 				was_utf8_buf_too_small = true;
 		}
 		was_error = true;
 	}
 return chars;
 }
 /*!
 	an auxiliary function for converting from wide characters to UTF-8
 	returns how many wide characters were used
 	if string_len is greater than 0 then the return value is always greater than zero too
 */
 static size_t WideOneToUTF8(const wchar_t * wide_string, size_t string_len, std::string & utf8, bool & was_error, int mode)
 {
 int z;
 bool correct;
 size_t chars;
 	chars = WideToInt(wide_string, string_len, z, correct);
 	if( correct )
 		correct = IntToUTF8(z, utf8, false) != 0;
 	if( !correct )
 	{
 		if( mode == 1 )
 			IntToUTF8(0xFFFD, utf8, false); // U+FFFD "replacement character"
 		was_error = true;
 	}
 return chars;
 }
 /*!
 	an auxiliary function for converting from wide characters to UTF-8
 	returns how many wide characters were used
 	if wide_string has at least one character then the return value is always greater than zero too
 */
 static size_t WideOneToUTF8(const wchar_t * wide_string, std::string & utf8, bool & was_error, int mode)
 {
 int z;
 bool correct;
 size_t chars;
 	chars = WideToInt(wide_string, z, correct);
 	if( correct )
 		correct = IntToUTF8(z, utf8, false) != 0;
 	if( !correct )
 	{
 		if( mode == 1 )
 			IntToUTF8(0xFFFD, utf8, false); // U+FFFD "replacement character"
 		was_error = true;
 	}
 return chars;
 }
 /*!
 	an auxiliary function for converting from wide characters to UTF-8
 	returns how many wide characters were used
 	if string_len is greater than 0 then the return value is always greater than zero too
 */
 static size_t WideOneToUTF8(const wchar_t * wide_string, size_t string_len, std::ostream & utf8, bool & was_error, int mode)
 {
 int z;
 bool correct;
 size_t chars;
 	chars = WideToInt(wide_string, string_len, z, correct);
 	if( correct )
 		correct = IntToUTF8(z, utf8) != 0;
 	if( !correct )
 	{
 		if( mode == 1 )
 			IntToUTF8(0xFFFD, utf8); // U+FFFD "replacement character"
 		was_error = true;
 	}
 return chars;
 }
 /*!
 	an auxiliary function for converting from wide characters to UTF-8
 */
 static size_t WideOneToUTF8(const wchar_t * wide_string, std::ostream & utf8, bool & was_error, int mode)
 {
 size_t min_str_len = 1;
 	if( *wide_string == 0 )
 		return 0;
 	if( *(wide_string+1) != 0 )
 		min_str_len = 2;
 return WideOneToUTF8(wide_string, min_str_len, utf8, was_error, mode);
 }
 /*!
 	this function converts a wide string into UTF-8 string
@@ -772,7 +471,7 @@ size_t chars;
 	while( string_len > 0 )
 	{
-		chars = WideOneToUTF8(wide_string, string_len, utf8, was_error, mode);
+		chars = private_namespace::WideOneToUTF8(wide_string, string_len, utf8, was_error, mode);
 		wide_string += chars;
 		string_len  -= chars;
 	}
@@ -804,7 +503,7 @@ bool was_error = false;
 		utf8.clear();
 	while( *wide_string )
-		wide_string += WideOneToUTF8(wide_string, utf8, was_error, mode);
+		wide_string += private_namespace::WideOneToUTF8(wide_string, utf8, was_error, mode);
 return !was_error;
 }
@@ -832,83 +531,6 @@ bool WideToUTF8(const std::wstring & wide_string, std::string & utf8, bool clear
 /*!
 	this function converts a wide string into UTF-8 stream
 	input:
 		wide_string - a wide string for converting
 		string_len - size of the string
 		mode - what to do with errors when converting
 			0: skip an invalid character
 			1: put U+FFFD "replacement character" istead of the invalid character (default)
 	output:
 		utf8 - a UTF-8 stream for the output sequence
 	this function returns false if there were some errors when converting
 */
 bool WideToUTF8(const wchar_t * wide_string, size_t string_len, std::ostream & utf8, int mode)
 {
 bool was_error = false;
 size_t chars;
 	while( string_len > 0 )
 	{
 		chars = WideOneToUTF8(wide_string, string_len, utf8, was_error, mode);
 		wide_string += chars;
 		string_len  -= chars;
 	}
 return !was_error;
 }
 /*!
 	this function converts a wide string into UTF-8 stream
 	input:
 		wide_string - a null terminated wide string for converting
 		mode - what to do with errors when converting
 			0: skip an invalid character
 			1: put U+FFFD "replacement character" istead of the invalid character (default)
 	output:
 		utf8 - a UTF-8 stream for the output sequence
 	this function returns false if there were some errors when converting
 */
 bool WideToUTF8(const wchar_t * wide_string, std::ostream & utf8, int mode)
 {
 bool was_error = false;
 	while( *wide_string )
 		wide_string += WideOneToUTF8(wide_string, utf8, was_error, mode);
 return !was_error;
 }
 /*!
 	this function converts a wide string (std::wstring) into UTF-8 stream
 	input:
 		wide_string - a wide string for converting
 		mode - what to do with errors when converting
 			0: skip an invalid character
 			1: put U+FFFD "replacement character" istead of the invalid character (default)
 	output:
 		utf8 - a UTF-8 stream for the output sequence
 	this function returns false if there were some errors when converting
 */
 bool WideToUTF8(const std::wstring & wide_string, std::ostream & utf8, int mode)
 {
 	return WideToUTF8(wide_string.c_str(), wide_string.size(), utf8, mode);
 }
 /*!
@@ -942,7 +564,7 @@ size_t chars, utf8_saved;
 	while( string_len > 0 )
 	{
-		chars = WideOneToUTF8(wide_string, string_len, utf8, utf8_len, utf8_saved, was_buffer_to_small, was_error, mode);
+		chars = private_namespace::WideOneToUTF8(wide_string, string_len, utf8, utf8_len, utf8_saved, was_buffer_to_small, was_error, mode);
 		if( was_buffer_to_small )
 		{
@@ -1089,7 +711,7 @@ size_t len;
 	while( *wide_string )
 	{
 		len = (*(wide_string+1) == 0) ? 1 : 2;
-		chars = WideOneToUTF8(wide_string, len, utf8, utf8_len, utf8_saved, was_buffer_to_small, was_error, mode);
+		chars = private_namespace::WideOneToUTF8(wide_string, len, utf8, utf8_len, utf8_saved, was_buffer_to_small, was_error, mode);
 		if( was_buffer_to_small )
 		{
@@ -5,7 +5,7 @@
 */
 /* 
- * Copyright (c) 2010-2018, Tomasz Sowa
+ * Copyright (c) 2010-2021, Tomasz Sowa
 * All rights reserved.
 * 
 * Redistribution and use in source and binary forms, with or without
@@ -38,7 +38,6 @@
 #ifndef headerfile_picotools_utf8_utf8
 #define headerfile_picotools_utf8_utf8
 #include <fstream>
 #include <string>
 #include "textstream/textstream.h"
@@ -46,8 +45,6 @@
 namespace PT
 {
 /*!
 	UTF-8, a transformation format of ISO 10646
 	http://tools.ietf.org/html/rfc3629
@@ -68,6 +65,16 @@ bool UTF8_CheckRange(int c);
 /*
 *
 *
 *
 * convertions from UTF-8
 *
 *
 *
 */
 /*!
 	converting one character from UTF-8 to an int
 */
@@ -85,25 +92,40 @@ bool UTF8ToWide(const char * utf8,                  std::wstring & res, bool cle
 bool UTF8ToWide(const std::string & utf8,           std::wstring & res, bool clear = true, int mode = 1);
 bool UTF8ToWide(std::istream & utf8,                std::wstring & res, bool clear = true, int mode = 1);
 template<size_t stack_size, size_t heap_block_size>
 bool UTF8ToWide(const char * utf8, size_t utf8_len, TextStreamBase<wchar_t, stack_size, heap_block_size> & res, bool clear = true, int mode = 1); // need to be tested
 template<size_t stack_size, size_t heap_block_size>
 bool UTF8ToWide(const char * utf8, TextStreamBase<wchar_t, stack_size, heap_block_size> & res, bool clear = true, int mode = 1); // need to be tested
 template<size_t stack_size, size_t heap_block_size>
 bool UTF8ToWide(const std::string & utf8, TextStreamBase<wchar_t, stack_size, heap_block_size> & res, bool clear = true, int mode = 1); // need to be tested
 template<size_t stack_size, size_t heap_block_size>
 bool UTF8ToWide(std::istream & utf8, TextStreamBase<wchar_t, stack_size, heap_block_size> & res, bool clear = true, int mode = 1); // need to be tested
 /*!
 	converting UTF-8 string to a WTextStream stream
 	(need to be tested)
 */
 /*
- implemented as templates below
+ *
-bool UTF8ToWide(const char * utf8, size_t utf8_len, WTextStream & res, bool clear = true, int mode = 1);
+ *
-bool UTF8ToWide(const char * utf8,                  WTextStream & res, bool clear = true, int mode = 1);
+ *
-bool UTF8ToWide(const std::string & utf8,           WTextStream & res, bool clear = true, int mode = 1);
+ * convertions to UTF-8
-bool UTF8ToWide(std::istream & utf8,                WTextStream & res, bool clear = true, int mode = 1);
+ *
-*/
+ *
 *
 */
 /*!
 	converting one int character to UTF-8
 */
 size_t IntToUTF8(int z, char * utf8, size_t utf8_max_len);
-size_t IntToUTF8(int z, std::string & utf8, bool clear = true );
+size_t IntToUTF8(int z, std::string & utf8, bool clear = true);
-size_t IntToUTF8(int z, std::ostream & utf8);
+
 template<typename StreamType>
 size_t IntToUTF8(int z, StreamType & utf8);
 /*!
@@ -113,216 +135,32 @@ bool WideToUTF8(const wchar_t * wide_string, size_t string_len, std::string & ut
 bool WideToUTF8(const wchar_t * wide_string,                    std::string & utf8,  bool clear = true, int mode = 1);
 bool WideToUTF8(const std::wstring & wide_string,               std::string & utf8,  bool clear = true, int mode = 1);
-// implemented as a template below
+template<typename StreamType>
-//void WideToUTF8(PT::WTextStream & buffer,                       std::string & utf8,  bool clear = true, int mode = 1);// not tested
+bool WideToUTF8(const wchar_t * wide_string, size_t string_len, StreamType & utf8, int mode = 1);
 template<typename StreamType>
 bool WideToUTF8(const wchar_t * wide_string, StreamType & utf8, int mode = 1);
 template<typename StreamType>
 bool WideToUTF8(const std::wstring & wide_string, StreamType & utf8, int mode = 1);
 bool WideToUTF8(const wchar_t * wide_string, size_t string_len, std::ostream & utf8, int mode = 1);
 bool WideToUTF8(const wchar_t * wide_string,                    std::ostream & utf8, int mode = 1);
 bool WideToUTF8(const std::wstring & wide_string,               std::ostream & utf8, int mode = 1);
 // implemented as a template below
 //void WideToUTF8(PT::WTextStream & buffer,                       std::ostream & utf8, int mode = 1);// not tested
 bool WideToUTF8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len, size_t & utf8_written, int mode = 1);
 bool WideToUTF8(const wchar_t * wide_string,                    char * utf8, size_t utf8_len, size_t & utf8_written, int mode = 1);
 bool WideToUTF8(const std::wstring & wide_string,               char * utf8, size_t utf8_len, size_t & utf8_written, int mode = 1);
-// implement void WideToUTF8(PT::WTextStream & buffer,          char * utf8, size_t utf8_len, size_t & utf8_written, int mode = 1);
+// implement template<typename StreamType>
 bool WideToUTF8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len, int mode = 1);
 bool WideToUTF8(const wchar_t * wide_string,                    char * utf8, size_t utf8_len, int mode = 1);
 bool WideToUTF8(const std::wstring & wide_string,               char * utf8, size_t utf8_len, int mode = 1);
-// implement void WideToUTF8(PT::WTextStream & buffer,          char * utf8, size_t utf8_len, int mode = 1);
+// implement template<typename StreamType>
 namespace private_namespace
 {
 template<typename function_type>
 bool UTF8ToWideGeneric(const char * utf8, size_t utf8_len, int mode, function_type convert_function)
 {
 int z;
 size_t len;
 bool correct, was_error = false;
 	while( utf8_len > 0 )
 	{
 		if( (unsigned char)*utf8 <= 0x7f )
 		{
 			// small optimization
 			len = 1;
 			correct = true;
 			z = static_cast<unsigned char>(*utf8);
 		}
 		else
 		{
 			len = UTF8ToInt(utf8, utf8_len, z, correct); // the len will be different from zero
 		}
 		if( !correct )
 		{
 			if( mode == 1 )
 				convert_function(0xFFFD); // U+FFFD "replacement character"
 			was_error = true;
 		}
 		else
 		{
 			convert_function(z);
 		}
 		utf8     += len;
 		utf8_len -= len;
 	}
 return !was_error;
 }
 template<typename char_type, size_t stack_size, size_t heap_block_size>
 void IntToWide(int c, TextStreamBase<char_type, stack_size, heap_block_size> & res)
 {
 	if( sizeof(wchar_t)==2 && c>0xffff )
 	{
 		// UTF16 surrogate pairs
 		c -= 0x10000;
 		res << static_cast<wchar_t>(((c >> 10) & 0x3FF) + 0xD800);
 		res << static_cast<wchar_t>((c & 0x3FF) + 0xDC00);
 	}
 	else
 	{
 		res << static_cast<wchar_t>(c);
 	}
 }
 // not tested
 // FIX ME it is not using surrogate pairs from input stream
 // and mode parameter
 template<typename char_type, size_t stack_size, size_t heap_block_size, typename function_type>
 void WideToUTF8Generic(TextStreamBase<char_type, stack_size, heap_block_size> & buffer, int mode, function_type write_function)
 {
 	char utf8_buffer[256];
 	std::size_t buffer_len = sizeof(utf8_buffer) / sizeof(char);
 	std::size_t utf8_sequence_max_length = 10;
 	std::size_t index = 0;
 	typename TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator i = buffer.begin();
 	while( i != buffer.end() )
 	{
 		if( index + utf8_sequence_max_length > buffer_len )
 		{
 			write_function(utf8_buffer, index);
 			index = 0;
 		}
 		index += PT::IntToUTF8(*i, utf8_buffer + index, buffer_len - index);
 		++i;
 	}
 	if( index > 0 )
 	{
 		write_function(utf8_buffer, index);
 	}
 }
 } // namespace
 // need to be tested
 template<typename char_type, size_t stack_size, size_t heap_block_size>
 bool UTF8ToWide(const char * utf8, size_t utf8_len, TextStreamBase<char_type, stack_size, heap_block_size> & res, bool clear = true, int mode = 1)
 {
 	if( clear )
 		res.clear();
 	bool status = private_namespace::UTF8ToWideGeneric(utf8, utf8_len, mode, [&res](int c) {
 		private_namespace::IntToWide(c, res);
 	});
 	return status;
 }
 // need to be tested
 template<typename char_type, size_t stack_size, size_t heap_block_size>
 bool UTF8ToWide(const char * utf8, TextStreamBase<char_type, stack_size, heap_block_size> & res, bool clear = true, int mode = 1)
 {
 size_t utf8_len = 0;
 	while( utf8[utf8_len] != 0 )
 		utf8_len += 1;
 return UTF8ToWide(utf8, utf8_len, res, clear, mode);
 }
 // need to be tested
 template<typename char_type, size_t stack_size, size_t heap_block_size>
 bool UTF8ToWide(const std::string & utf8, TextStreamBase<char_type, stack_size, heap_block_size> & res, bool clear = true, int mode = 1)
 {
 	return UTF8ToWide(utf8.c_str(), utf8.size(), res, clear, mode);
 }
 // need to be tested
 template<typename char_type, size_t stack_size, size_t heap_block_size>
 bool UTF8ToWide(std::istream & utf8, TextStreamBase<char_type, stack_size, heap_block_size> & res, bool clear = true, int mode = 1)
 {
 int z;
 bool correct, was_error = false;
 	if( clear )
 		res.clear();
 	while( UTF8ToInt(utf8, z, correct) > 0 )
 	{
 		if( !correct )
 		{
 			if( mode == 1 )
 				res << 0xFFFD; // U+FFFD "replacement character"
 			was_error = true;
 		}
 		else
 		{
 			private_namespace::IntToWide(z, res);
 		}
 	}
 return !was_error;
 }
 // not tested
 template<typename char_type, size_t stack_size, size_t heap_block_size>
 void WideToUTF8(TextStreamBase<char_type, stack_size, heap_block_size> & buffer, std::string & utf8, bool clear = true, int mode = 1)
 {
 	if( clear )
 		utf8.clear();
 	private_namespace::WideToUTF8Generic(buffer, mode, [&utf8](const char * utf8_buffer, std::size_t buffer_len){
 		utf8.append(utf8_buffer, buffer_len);
 	});
 }
 // not tested
 template<typename char_type, size_t stack_size, size_t heap_block_size>
 void WideToUTF8(TextStreamBase<char_type, stack_size, heap_block_size> & buffer, std::ostream & utf8, int mode = 1)
 {
 	private_namespace::WideToUTF8Generic(buffer, mode, [&utf8](const char * utf8_buffer, std::size_t buffer_len){
 		utf8.write(utf8_buffer, buffer_len);
 	});
 }
 template<size_t stack_size, size_t heap_block_size>
 void WideToUTF8(TextStreamBase<wchar_t, stack_size, heap_block_size> & buffer, std::string & utf8, bool clear = true, int mode = 1); // not tested
 template<size_t stack_size, size_t heap_block_size>
 void WideToUTF8(TextStreamBase<wchar_t, stack_size, heap_block_size> & buffer, std::ostream & utf8, int mode = 1); // not tested
@@ -330,5 +168,7 @@ void WideToUTF8(TextStreamBase<char_type, stack_size, heap_block_size> & buffer,
 } // namespace
 #include "utf8/utf8_templates.h"
 #endif
@@ -0,0 +1,286 @@
 /*
 * This file is a part of PikoTools
 * and is distributed under the (new) BSD licence.
 * Author: Tomasz Sowa <t.sowa@ttmath.org>
 */
 /*
 * Copyright (c) 2021, Tomasz Sowa
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 *  * Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimer.
 *
 *  * Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 *  * Neither the name Tomasz Sowa nor the names of contributors to this
 *    project may be used to endorse or promote products derived
 *    from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
 * THE POSSIBILITY OF SUCH DAMAGE.
 */
 #include "utf8_private.h"
 namespace PT
 {
 namespace private_namespace
 {
 /*!
 	an auxiliary function for converting from UTF-8 string
 */
 bool UTF8ToInt_FirstOctet(unsigned char uz, size_t & len, int & res)
 {
 	for(len=0 ; (uz & 0x80) != 0 ; ++len)
 		uz <<= 1;
 	if( len == 1 )
 		return false;
 	res = uz;
 	if( len > 0 )
 		res >>= len;
 	if( res == 0 )
 		return false;
 	if( len == 0 )
 		len = 1;
 return true;
 }
 /*!
 	an auxiliary function for converting from UTF-8 string
 */
 bool UTF8ToInt_AddNextOctet(unsigned char uz, int & res)
 {
 	if( (uz & 0xc0) != 0x80 )
 		return false;
 	res <<= 6;
 	res |= (uz & 0x3F);
 return true;
 }
 /*
 	an auxiliary function for converting from wide characters to UTF-8
 	converting a wide character into one int
 	returns how many wide characters were used
 	if string_len is greater than 0 then the return value is always greater than zero too
 */
 size_t WideToInt(const wchar_t * wide_string, size_t string_len, int & z, bool & correct)
 {
 	if( string_len == 0 )
 	{
 		z = 0;
 		correct = false;
 		return 0;
 	}
 	z = static_cast<int>(*wide_string);
 	correct = true;
 	if( sizeof(wchar_t) == 2 && (z>=0xD800 && z<=0xDFFF) )
 	{
 		if( z>=0xD800 && z<=0xDBFF && string_len>1 )
 		{
 			int z2 = *(wide_string+1);
 			if( z2>=0xDC00 && z2<=0xDFFF )
 			{
 				z = 0x10000 + (((z & 0x3FF) << 10) | (z2 & 0x3FF));
 				return 2;
 			}
 			else
 			{
 				correct = false;
 				return 2;
 			}
 		}
 		else
 		{
 			correct = false;
 			return 1;
 		}
 	}
 	else
 	{
 		correct = UTF8_CheckRange(z);
 		return 1;
 	}
 }
 /*
 	an auxiliary function for converting from wide characters to UTF-8
 	converting a wide character into one int
 	returns how many wide characters were used
 	if wide_string has at least one character then the return value is always greater than zero too
 */
 size_t WideToInt(const wchar_t * wide_string, int & z, bool & correct)
 {
 size_t min_str_len = 1;
 	if( *wide_string == 0 )
 	{
 		z = 0;
 		correct = false;
 		return 0;
 	}
 	if( *(wide_string+1) != 0 )
 		min_str_len = 2;
 return WideToInt(wide_string, min_str_len, z, correct);
 }
 /*!
 	an auxiliary function for converting from wide characters to UTF-8
 	returns how many wide characters were used
 	if string_len is greater than 0 then the return value is always greater than zero too
 	utf8_written - how many characters were saved in the utf8 string (the string doesn't have
 	               a null terminating character)
 	               it can be equal to zero if the utf8 buffer is too small or there was an incorrect wide character read
 	was_utf8_buf_too_small -  will be true if the utf8 buffer is too small
 	               if this flag is true then utf8_written is equal to zero
 	was_error    - will be true if there is an error when converting (there was an incorrect wide character)
 	               (was_error will not be true if the utf8 buffer is too small)
 */
 size_t WideOneToUTF8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len,
 							size_t & utf8_written, bool & was_utf8_buf_too_small, bool & was_error, int mode)
 {
 int z;
 bool correct;
 size_t chars;
 	utf8_written = 0;
 	was_utf8_buf_too_small = false;
 	chars = WideToInt(wide_string, string_len, z, correct);
 	if( correct )
 	{
 		utf8_written = IntToUTF8(z, utf8, utf8_len);
 		if( utf8_written == 0 )
 			was_utf8_buf_too_small = true;
 	}
 	else
 	{
 		if( mode == 1 )
 		{
 			utf8_written = IntToUTF8(0xFFFD, utf8, utf8_len); // U+FFFD "replacement character"
 			if( utf8_written == 0 )
 				was_utf8_buf_too_small = true;
 		}
 		was_error = true;
 	}
 return chars;
 }
 /*!
 	an auxiliary function for converting from wide characters to UTF-8
 	returns how many wide characters were used
 	if string_len is greater than 0 then the return value is always greater than zero too
 */
 size_t WideOneToUTF8(const wchar_t * wide_string, size_t string_len, std::string & utf8, bool & was_error, int mode)
 {
 int z;
 bool correct;
 size_t chars;
 	chars = WideToInt(wide_string, string_len, z, correct);
 	if( correct )
 		correct = IntToUTF8(z, utf8, false) != 0;
 	if( !correct )
 	{
 		if( mode == 1 )
 			IntToUTF8(0xFFFD, utf8, false); // U+FFFD "replacement character"
 		was_error = true;
 	}
 return chars;
 }
 /*!
 	an auxiliary function for converting from wide characters to UTF-8
 	returns how many wide characters were used
 	if wide_string has at least one character then the return value is always greater than zero too
 */
 size_t WideOneToUTF8(const wchar_t * wide_string, std::string & utf8, bool & was_error, int mode)
 {
 int z;
 bool correct;
 size_t chars;
 	chars = WideToInt(wide_string, z, correct);
 	if( correct )
 		correct = IntToUTF8(z, utf8, false) != 0;
 	if( !correct )
 	{
 		if( mode == 1 )
 			IntToUTF8(0xFFFD, utf8, false); // U+FFFD "replacement character"
 		was_error = true;
 	}
 return chars;
 }
 } // namespace private_namespace
 } // namespace PT
@@ -0,0 +1,220 @@
 /*
 * This file is a part of PikoTools
 * and is distributed under the (new) BSD licence.
 * Author: Tomasz Sowa <t.sowa@ttmath.org>
 */
 /*
 * Copyright (c) 2021, Tomasz Sowa
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 *  * Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimer.
 *
 *  * Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 *  * Neither the name Tomasz Sowa nor the names of contributors to this
 *    project may be used to endorse or promote products derived
 *    from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
 * THE POSSIBILITY OF SUCH DAMAGE.
 */
 #ifndef headerfile_picotools_utf8_utf8_private
 #define headerfile_picotools_utf8_utf8_private
 #include "textstream/textstream.h"
 namespace PT
 {
 bool UTF8_CheckRange(int c);
 size_t IntToUTF8(int z, char * utf8, size_t utf8_max_len);
 size_t IntToUTF8(int z, std::string & utf8, bool clear);
 size_t UTF8ToInt(const char * utf8, size_t utf8_len, int & res, bool & correct);
 namespace private_namespace
 {
 bool UTF8ToInt_FirstOctet(unsigned char uz, size_t & len, int & res);
 bool UTF8ToInt_AddNextOctet(unsigned char uz, int & res);
 size_t WideToInt(const wchar_t * wide_string, size_t string_len, int & z, bool & correct);
 size_t WideToInt(const wchar_t * wide_string, int & z, bool & correct);
 size_t WideOneToUTF8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len,
 							size_t & utf8_written, bool & was_utf8_buf_too_small, bool & was_error, int mode);
 size_t WideOneToUTF8(const wchar_t * wide_string, size_t string_len, std::string & utf8, bool & was_error, int mode);
 size_t WideOneToUTF8(const wchar_t * wide_string, std::string & utf8, bool & was_error, int mode);
 /*!
 	an auxiliary function for converting from wide characters to UTF-8
 	returns how many wide characters were used
 	if string_len is greater than 0 then the return value is always greater than zero too
 */
 template<typename StreamType>
 static size_t WideOneToUTF8(const wchar_t * wide_string, size_t string_len, StreamType & utf8, bool & was_error, int mode)
 {
 int z;
 bool correct;
 size_t chars;
 	chars = WideToInt(wide_string, string_len, z, correct);
 	if( correct )
 		correct = IntToUTF8(z, utf8) != 0;
 	if( !correct )
 	{
 		if( mode == 1 )
 			IntToUTF8(0xFFFD, utf8); // U+FFFD "replacement character"
 		was_error = true;
 	}
 return chars;
 }
 /*!
 	an auxiliary function for converting from wide characters to UTF-8
 */
 template<typename StreamType>
 static size_t WideOneToUTF8(const wchar_t * wide_string, StreamType & utf8, bool & was_error, int mode)
 {
 	size_t min_str_len = 1;
 	if( *wide_string == 0 )
 		return 0;
 	if( *(wide_string+1) != 0 )
 		min_str_len = 2;
 return WideOneToUTF8(wide_string, min_str_len, utf8, was_error, mode);
 }
 // declared in utf8.h, defined in utf8.cpp
 size_t UTF8ToInt(const char * utf8, size_t utf8_len, int & res, bool & correct);
 template<typename function_type>
 bool UTF8ToWideGeneric(const char * utf8, size_t utf8_len, int mode, function_type convert_function)
 {
 int z;
 size_t len;
 bool correct, was_error = false;
 	while( utf8_len > 0 )
 	{
 		if( (unsigned char)*utf8 <= 0x7f )
 		{
 			// small optimization
 			len = 1;
 			correct = true;
 			z = static_cast<unsigned char>(*utf8);
 		}
 		else
 		{
 			len = PT::UTF8ToInt(utf8, utf8_len, z, correct); // the len will be different from zero
 		}
 		if( !correct )
 		{
 			if( mode == 1 )
 				convert_function(0xFFFD); // U+FFFD "replacement character"
 			was_error = true;
 		}
 		else
 		{
 			convert_function(z);
 		}
 		utf8     += len;
 		utf8_len -= len;
 	}
 return !was_error;
 }
 template<size_t stack_size, size_t heap_block_size>
 void IntToWide(int c, TextStreamBase<wchar_t, stack_size, heap_block_size> & res)
 {
 	if( sizeof(wchar_t)==2 && c>0xffff )
 	{
 		// UTF16 surrogate pairs
 		c -= 0x10000;
 		res << static_cast<wchar_t>(((c >> 10) & 0x3FF) + 0xD800);
 		res << static_cast<wchar_t>((c & 0x3FF) + 0xDC00);
 	}
 	else
 	{
 		res << static_cast<wchar_t>(c);
 	}
 }
 // not tested
 // FIX ME it is not using surrogate pairs from input stream
 // and mode parameter
 template<typename char_type, size_t stack_size, size_t heap_block_size, typename function_type>
 void WideToUTF8Generic(TextStreamBase<char_type, stack_size, heap_block_size> & buffer, int mode, function_type write_function)
 {
 	char utf8_buffer[256];
 	std::size_t buffer_len = sizeof(utf8_buffer) / sizeof(char);
 	std::size_t utf8_sequence_max_length = 10;
 	std::size_t index = 0;
 	typename TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator i = buffer.begin();
 	while( i != buffer.end() )
 	{
 		if( index + utf8_sequence_max_length > buffer_len )
 		{
 			write_function(utf8_buffer, index);
 			index = 0;
 		}
 		index += IntToUTF8(*i, utf8_buffer + index, buffer_len - index);
 		++i;
 	}
 	if( index > 0 )
 	{
 		write_function(utf8_buffer, index);
 	}
 }
 } // namespace private_namespace
 } // namespace PT
 #endif
@@ -0,0 +1,271 @@
 /*
 * This file is a part of PikoTools
 * and is distributed under the (new) BSD licence.
 * Author: Tomasz Sowa <t.sowa@ttmath.org>
 */
 /*
 * Copyright (c) 2021, Tomasz Sowa
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 *  * Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimer.
 *
 *  * Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 *  * Neither the name Tomasz Sowa nor the names of contributors to this
 *    project may be used to endorse or promote products derived
 *    from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
 * THE POSSIBILITY OF SUCH DAMAGE.
 */
 #ifndef headerfile_picotools_utf8_utf8_templates
 #define headerfile_picotools_utf8_utf8_templates
 // this file is included at the end of utf8.h
 #include "utf8_private.h"
 namespace PT
 {
 /*!
 	converting UTF-8 string to a TextStreamBase<wchar_t,...> stream
 	(need to be tested)
 */
 // need to be tested
 template<size_t stack_size, size_t heap_block_size>
 bool UTF8ToWide(const char * utf8, size_t utf8_len, TextStreamBase<wchar_t, stack_size, heap_block_size> & res, bool clear, int mode)
 {
 	if( clear )
 		res.clear();
 	bool status = private_namespace::UTF8ToWideGeneric(utf8, utf8_len, mode, [&res](int c) {
 		private_namespace::IntToWide(c, res);
 	});
 	return status;
 }
 template<size_t stack_size, size_t heap_block_size>
 bool UTF8ToWide(const char * utf8, TextStreamBase<wchar_t, stack_size, heap_block_size> & res, bool clear, int mode)
 {
 size_t utf8_len = 0;
 	while( utf8[utf8_len] != 0 )
 		utf8_len += 1;
 return UTF8ToWide(utf8, utf8_len, res, clear, mode);
 }
 template<size_t stack_size, size_t heap_block_size>
 bool UTF8ToWide(const std::string & utf8, TextStreamBase<wchar_t, stack_size, heap_block_size> & res, bool clear, int mode)
 {
 	return UTF8ToWide(utf8.c_str(), utf8.size(), res, clear, mode);
 }
 // need to be tested
 template<size_t stack_size, size_t heap_block_size>
 bool UTF8ToWide(std::istream & utf8, TextStreamBase<wchar_t, stack_size, heap_block_size> & res, bool clear, int mode)
 {
 int z;
 bool correct, was_error = false;
 	if( clear )
 		res.clear();
 	while( UTF8ToInt(utf8, z, correct) > 0 )
 	{
 		if( !correct )
 		{
 			if( mode == 1 )
 				res << 0xFFFD; // U+FFFD "replacement character"
 			was_error = true;
 		}
 		else
 		{
 			private_namespace::IntToWide(z, res);
 		}
 	}
 return !was_error;
 }
 /*!
 	this function converts one wide character into UTF-8 stream
 	input:
 		z - wide character
 	output:
 		utf8 - a UTF-8 stream for the output sequence
 	the function returns how many characters have been written to the utf8 stream,
 	zero means that 'z' is an incorrect unicode character
 */
 template<typename StreamType>
 size_t IntToUTF8(int z, StreamType & utf8)
 {
 	char buf[10];
 	size_t len = IntToUTF8(z, buf, sizeof(buf)/sizeof(char));
 	if( len > 0 )
 		utf8.write(buf, len);
 	return len;
 }
 /*!
 	this function converts a wide string into UTF-8 stream
 	input:
 		wide_string - a wide string for converting
 		string_len - size of the string
 		mode - what to do with errors when converting
 			0: skip an invalid character
 			1: put U+FFFD "replacement character" istead of the invalid character (default)
 	output:
 		utf8 - a UTF-8 stream for the output sequence
 	this function returns false if there were some errors when converting
 */
 template<typename StreamType>
 bool WideToUTF8(const wchar_t * wide_string, size_t string_len, StreamType & utf8, int mode)
 {
 bool was_error = false;
 size_t chars;
 	while( string_len > 0 )
 	{
 		chars = private_namespace::WideOneToUTF8(wide_string, string_len, utf8, was_error, mode);
 		wide_string += chars;
 		string_len  -= chars;
 	}
 return !was_error;
 }
 /*!
 	this function converts a wide string into UTF-8 stream
 	input:
 		wide_string - a null terminated wide string for converting
 		mode - what to do with errors when converting
 			0: skip an invalid character
 			1: put U+FFFD "replacement character" istead of the invalid character (default)
 	output:
 		utf8 - a UTF-8 stream for the output sequence
 	this function returns false if there were some errors when converting
 */
 template<typename StreamType>
 bool WideToUTF8(const wchar_t * wide_string, StreamType & utf8, int mode)
 {
 bool was_error = false;
 	while( *wide_string )
 		wide_string += private_namespace::WideOneToUTF8(wide_string, utf8, was_error, mode);
 return !was_error;
 }
 /*!
 	this function converts a wide string (std::wstring) into UTF-8 stream
 	input:
 		wide_string - a wide string for converting
 		mode - what to do with errors when converting
 			0: skip an invalid character
 			1: put U+FFFD "replacement character" istead of the invalid character (default)
 	output:
 		utf8 - a UTF-8 stream for the output sequence
 	this function returns false if there were some errors when converting
 */
 template<typename StreamType>
 bool WideToUTF8(const std::wstring & wide_string, StreamType & utf8, int mode)
 {
 	return WideToUTF8(wide_string.c_str(), wide_string.size(), utf8, mode);
 }
 template<size_t stack_size, size_t heap_block_size>
 void WideToUTF8(TextStreamBase<wchar_t, stack_size, heap_block_size> & buffer, std::string & utf8, bool clear, int mode)
 {
 	if( clear )
 		utf8.clear();
 	private_namespace::WideToUTF8Generic(buffer, mode, [&utf8](const char * utf8_buffer, std::size_t buffer_len){
 		utf8.append(utf8_buffer, buffer_len);
 	});
 }
 // not tested
 template<size_t stack_size, size_t heap_block_size>
 void WideToUTF8(TextStreamBase<wchar_t, stack_size, heap_block_size> & buffer, std::ostream & utf8, int mode)
 {
 	private_namespace::WideToUTF8Generic(buffer, mode, [&utf8](const char * utf8_buffer, std::size_t buffer_len){
 		utf8.write(utf8_buffer, buffer_len);
 	});
 }
 } // namespace PT
 #endif