leavy only utf8.h and utf8.cpp

Remove utf8_private.h, utf8_private.cpp and utf8_templates.h and move their methods to utf8.h/utf8.cpp.
2024-05-30 21:20:25 +02:00
parent aacb1f43ae
commit 450c5d55e9
7 changed files with 1038 additions and 1206 deletions
--- a/src/Makefile.dep
+++ b/src/Makefile.dep
@@ -3,32 +3,28 @@
 ./convert/inttostr.o: ./convert/inttostr.h
 ./convert/misc.o: ./convert/misc.h ./convert/text.h textstream/stream.h
 ./convert/misc.o: textstream/types.h ./convert/inttostr.h utf8/utf8.h
-./convert/misc.o: utf8/utf8_templates.h utf8/utf8_private.h
 ./convert/text.o: ./convert/text.h ./convert/text_private.h
 ./convert/double.o: ./convert/double.h textstream/textstream.h
 ./convert/double.o: textstream/stream.h space/space.h convert/inttostr.h
-./convert/double.o: utf8/utf8.h textstream/stream.h utf8/utf8_templates.h
-./convert/double.o: utf8/utf8_private.h date/date.h membuffer/membuffer.h
-./convert/double.o: textstream/types.h textstream/stream_private.h
+./convert/double.o: utf8/utf8.h textstream/stream.h date/date.h
+./convert/double.o: membuffer/membuffer.h textstream/types.h
+./convert/double.o: textstream/stream_private.h
 ./convert/baseparser.o: ./convert/baseparser.h textstream/textstream.h
 ./convert/baseparser.o: textstream/stream.h space/space.h convert/inttostr.h
-./convert/baseparser.o: utf8/utf8.h textstream/stream.h utf8/utf8_templates.h
-./convert/baseparser.o: utf8/utf8_private.h date/date.h membuffer/membuffer.h
-./convert/baseparser.o: textstream/types.h textstream/stream_private.h
+./convert/baseparser.o: utf8/utf8.h textstream/stream.h date/date.h
+./convert/baseparser.o: membuffer/membuffer.h textstream/types.h
+./convert/baseparser.o: textstream/stream_private.h
 ./date/date.o: ./date/date.h convert/inttostr.h
 ./log/filelog.o: ./log/filelog.h textstream/textstream.h textstream/stream.h
 ./log/filelog.o: space/space.h convert/inttostr.h utf8/utf8.h
-./log/filelog.o: textstream/stream.h utf8/utf8_templates.h
-./log/filelog.o: utf8/utf8_private.h date/date.h membuffer/membuffer.h
+./log/filelog.o: textstream/stream.h date/date.h membuffer/membuffer.h
 ./log/filelog.o: textstream/types.h textstream/stream_private.h
 ./log/log.o: ./log/log.h textstream/textstream.h textstream/stream.h
 ./log/log.o: space/space.h convert/inttostr.h utf8/utf8.h textstream/stream.h
-./log/log.o: utf8/utf8_templates.h utf8/utf8_private.h date/date.h
-./log/log.o: membuffer/membuffer.h textstream/types.h
+./log/log.o: date/date.h membuffer/membuffer.h textstream/types.h
 ./log/log.o: textstream/stream_private.h ./log/filelog.h
 ./space/space.o: ./space/space.h convert/inttostr.h utf8/utf8.h
-./space/space.o: textstream/stream.h utf8/utf8_templates.h
-./space/space.o: utf8/utf8_private.h convert/convert.h ./convert/inttostr.h
+./space/space.o: textstream/stream.h convert/convert.h ./convert/inttostr.h
 ./space/space.o: convert/patternreplacer.h textstream/textstream.h
 ./space/space.o: textstream/stream.h space/space.h date/date.h
 ./space/space.o: membuffer/membuffer.h textstream/types.h
@@ -37,7 +33,6 @@
 ./space/space.o: ./convert/double.h
 ./space/spaceparser.o: ./space/spaceparser.h ./space/space.h
 ./space/spaceparser.o: convert/inttostr.h utf8/utf8.h textstream/stream.h
-./space/spaceparser.o: utf8/utf8_templates.h utf8/utf8_private.h
 ./space/spaceparser.o: convert/baseparser.h textstream/textstream.h
 ./space/spaceparser.o: textstream/stream.h space/space.h date/date.h
 ./space/spaceparser.o: membuffer/membuffer.h textstream/types.h
@@ -45,7 +40,6 @@
 ./space/spaceparser.o: ./convert/text.h ./convert/misc.h textstream/types.h
 ./space/keyvalueparser.o: ./space/keyvalueparser.h ./space/space.h
 ./space/keyvalueparser.o: convert/inttostr.h utf8/utf8.h textstream/stream.h
-./space/keyvalueparser.o: utf8/utf8_templates.h utf8/utf8_private.h
 ./space/keyvalueparser.o: convert/baseparser.h textstream/textstream.h
 ./space/keyvalueparser.o: textstream/stream.h space/space.h date/date.h
 ./space/keyvalueparser.o: membuffer/membuffer.h textstream/types.h
@@ -53,29 +47,24 @@
 ./space/keyvalueparser.o: ./convert/text.h ./convert/misc.h
 ./space/keyvalueparser.o: textstream/types.h
 ./textstream/stream_private.o: textstream/stream_private.h
-./utf8/utf8.o: ./utf8/utf8.h textstream/stream.h utf8/utf8_templates.h
-./utf8/utf8.o: utf8/utf8_private.h
-./utf8/utf8_private.o: utf8/utf8_private.h
+./utf8/utf8.o: ./utf8/utf8.h textstream/stream.h
 ./csv/csvparser.o: ./csv/csvparser.h space/space.h convert/inttostr.h
-./csv/csvparser.o: utf8/utf8.h textstream/stream.h utf8/utf8_templates.h
-./csv/csvparser.o: utf8/utf8_private.h convert/baseparser.h
+./csv/csvparser.o: utf8/utf8.h textstream/stream.h convert/baseparser.h
 ./csv/csvparser.o: textstream/textstream.h textstream/stream.h date/date.h
 ./csv/csvparser.o: membuffer/membuffer.h textstream/types.h
 ./csv/csvparser.o: textstream/stream_private.h
 ./mainoptions/mainoptionsparser.o: ./mainoptions/mainoptionsparser.h
 ./mainoptions/mainoptionsparser.o: space/space.h convert/inttostr.h
 ./mainoptions/mainoptionsparser.o: utf8/utf8.h textstream/stream.h
-./mainoptions/mainoptionsparser.o: utf8/utf8_templates.h utf8/utf8_private.h
 ./html/bbcodeparser.o: ./html/bbcodeparser.h ./html/htmlparser.h
 ./html/bbcodeparser.o: convert/baseparser.h textstream/textstream.h
 ./html/bbcodeparser.o: textstream/stream.h space/space.h convert/inttostr.h
-./html/bbcodeparser.o: utf8/utf8.h textstream/stream.h utf8/utf8_templates.h
-./html/bbcodeparser.o: utf8/utf8_private.h date/date.h membuffer/membuffer.h
-./html/bbcodeparser.o: textstream/types.h textstream/stream_private.h
+./html/bbcodeparser.o: utf8/utf8.h textstream/stream.h date/date.h
+./html/bbcodeparser.o: membuffer/membuffer.h textstream/types.h
+./html/bbcodeparser.o: textstream/stream_private.h
 ./html/htmlparser.o: ./html/htmlparser.h convert/baseparser.h
 ./html/htmlparser.o: textstream/textstream.h textstream/stream.h
 ./html/htmlparser.o: space/space.h convert/inttostr.h utf8/utf8.h
-./html/htmlparser.o: textstream/stream.h utf8/utf8_templates.h
-./html/htmlparser.o: utf8/utf8_private.h date/date.h membuffer/membuffer.h
+./html/htmlparser.o: textstream/stream.h date/date.h membuffer/membuffer.h
 ./html/htmlparser.o: textstream/types.h textstream/stream_private.h
 ./html/htmlparser.o: convert/text.h
--- a/src/utf8/utf8.cpp
+++ b/src/utf8/utf8.cpp
@@ -34,29 +34,27 @@

 #include <fstream>
 #include "utf8.h"
-#include "utf8_private.h"



 namespace pt
 {

-
-/*!
-	returns true if 'c' is a correct unicode character
-*/
+/*
+ * returns true if 'c' is a correct unicode character
+ */
 bool utf8_check_range(int c)
 {
 	return c>=0 && c<=0x10FFFF && !(c>=0xD800 && c<=0xDFFF);
 }


-/*!
-	returns true if 'c' is a correct unicode character
-
-	this method is used when reading from an utf8 string
-	how_many_bytes - means how many bytes from the utf8 string were read
-*/
+/*
+ * returns true if 'c' is a correct unicode character
+ *
+ * this method is used when reading from an utf8 string
+ * how_many_bytes - means how many bytes from the utf8 string were read
+ */
 bool utf8_check_range(int c, int how_many_bytes)
 {
 	if( c >= 0x0000 && c <= 0x007f && how_many_bytes == 1 )
@@ -126,12 +124,12 @@ bool surrogate_pair_to_int(int c1, int c2, int & z)


 /*
-	an auxiliary function for converting from wide characters to UTF-8
-	converting a wide character into one int
-
-	returns how many wide characters were used
-	if string_len is greater than 0 then the return value is always greater than zero too
-*/
+ * an auxiliary function for converting from wide characters to UTF-8
+ * converting a wide character into one int
+ *
+ * returns how many wide characters were used
+ * if string_len is greater than 0 then the return value is always greater than zero too
+ */
 size_t wide_to_int(const wchar_t * wide_string, size_t string_len, int & z, bool & correct)
 {
 	if( string_len == 0 )
@@ -177,12 +175,12 @@ size_t wide_to_int(const wchar_t * wide_string, size_t string_len, int & z, bool


 /*
-	an auxiliary function for converting from wide characters to UTF-8
-	converting a wide character into one int
+ * an auxiliary function for converting from wide characters to UTF-8
+ * converting a wide character into one int

-	returns how many wide characters were used
-	if wide_string has at least one character then the return value is always greater than zero too
-*/
+ * returns how many wide characters were used
+ * if wide_string has at least one character then the return value is always greater than zero too
+ */
 size_t wide_to_int(const wchar_t * wide_string, int & z, bool & correct)
 {
 size_t min_str_len = 1;
@@ -235,10 +233,10 @@ size_t int_to_wide(int c, wchar_t * res, size_t max_buf_len)


 /*
-	converts an int to a wide string
-
-	returns true if a character was inserted to the string
-*/
+ * converts an int to a wide string
+ *
+ * returns true if a character was inserted to the string
+ */
 bool int_to_wide(int c, std::wstring & res)
 {
 	wchar_t buf[2];
@@ -281,23 +279,23 @@ bool int_to_stream(int c, pt::Stream & stream)



-/*!
-	this function converts one UTF-8 character into one wide-character
-
-	input:
-		utf8 - an input UTF-8 string
-		utf8_len - size of the input string,
-		           the string should be at least 4 bytes length for correctly
-				   recognized the utf-8 sequence
-
-	output:
-		res - an output character
-		correct - true if it is a correct character
-
-		the function returns how many characters have been used from the input string
-		(returns zero only if utf8_len is zero)
-		even if there are errors the functions returns a different from zero value
-*/
+/*
+ * this function converts one UTF-8 character into one wide-character
+ *
+ * input:
+ *  utf8 - an input UTF-8 string
+ *  utf8_len - size of the input string,
+ *             the string should be at least 4 bytes length for correctly
+ *             recognized the utf-8 sequence
+ *
+ * output:
+ *  res - an output character
+ *  correct - true if it is a correct character
+ *
+ * the function returns how many characters have been used from the input string
+ * (returns zero only if utf8_len is zero)
+ * even if there are errors the functions returns a different from zero value
+ */
 size_t utf8_to_int(const char * utf8, size_t utf8_len, int & res, bool & correct)
 {
 size_t i, len;
@@ -1016,6 +1014,160 @@ return res;



+namespace private_namespace
+{
+
+/*!
+	an auxiliary function for converting from UTF-8 string
+*/
+bool utf8_to_int_first_octet(unsigned char uz, size_t & len, int & res)
+{
+	for(len=0 ; (uz & 0x80) != 0 ; ++len)
+		uz <<= 1;
+
+	if( len == 1 || len > 4 )
+		return false;
+
+	res = uz;
+
+	if( len > 0 )
+		res >>= len;
+
+	if( len == 0 )
+		len = 1;
+
+return true;
+}
+
+
+
+/*!
+	an auxiliary function for converting from UTF-8 string
+*/
+bool utf8_to_int_add_next_octet(unsigned char uz, int & res)
+{
+	if( (uz & 0xc0) != 0x80 )
+		return false;
+
+	res <<= 6;
+	res |= (uz & 0x3F);
+
+return true;
+}
+
+
+
+/*!
+	an auxiliary function for converting from wide characters to UTF-8
+
+	returns how many wide characters were used
+	if string_len is greater than 0 then the return value is always greater than zero too
+
+	utf8_written - how many characters were saved in the utf8 string (the string doesn't have
+				   a null terminating character)
+				   it can be equal to zero if the utf8 buffer is too small or there was an incorrect wide character read
+	was_utf8_buf_too_small -  will be true if the utf8 buffer is too small
+				   if this flag is true then utf8_written is equal to zero
+	was_error    - will be true if there is an error when converting (there was an incorrect wide character)
+				   (was_error will not be true if the utf8 buffer is too small)
+*/
+size_t wide_one_to_utf8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len,
+							size_t & utf8_written, bool & was_utf8_buf_too_small, bool & was_error, int mode)
+{
+int z;
+bool correct;
+size_t chars;
+
+	utf8_written = 0;
+	was_utf8_buf_too_small = false;
+	chars = wide_to_int(wide_string, string_len, z, correct);
+
+	if( correct )
+	{
+		utf8_written = int_to_utf8(z, utf8, utf8_len);
+
+		if( utf8_written == 0 )
+			was_utf8_buf_too_small = true;
+	}
+	else
+	{
+		if( mode == 1 )
+		{
+			utf8_written = int_to_utf8(0xFFFD, utf8, utf8_len); // U+FFFD "replacement character"
+
+			if( utf8_written == 0 )
+				was_utf8_buf_too_small = true;
+		}
+
+		was_error = true;
+	}
+
+return chars;
+}
+
+
+
+/*!
+	an auxiliary function for converting from wide characters to UTF-8
+
+	returns how many wide characters were used
+	if string_len is greater than 0 then the return value is always greater than zero too
+*/
+size_t wide_one_to_utf8(const wchar_t * wide_string, size_t string_len, std::string & utf8, bool & was_error, int mode)
+{
+int z;
+bool correct;
+size_t chars;
+
+	chars = wide_to_int(wide_string, string_len, z, correct);
+
+	if( correct )
+		correct = int_to_utf8(z, utf8, false) != 0;
+
+	if( !correct )
+	{
+		if( mode == 1 )
+			int_to_utf8(0xFFFD, utf8, false); // U+FFFD "replacement character"
+
+		was_error = true;
+	}
+
+return chars;
+}
+
+
+
+/*!
+	an auxiliary function for converting from wide characters to UTF-8
+
+	returns how many wide characters were used
+	if wide_string has at least one character then the return value is always greater than zero too
+*/
+size_t wide_one_to_utf8(const wchar_t * wide_string, std::string & utf8, bool & was_error, int mode)
+{
+int z;
+bool correct;
+size_t chars;
+
+	chars = wide_to_int(wide_string, z, correct);
+
+	if( correct )
+		correct = int_to_utf8(z, utf8, false) != 0;
+
+	if( !correct )
+	{
+		if( mode == 1 )
+			int_to_utf8(0xFFFD, utf8, false); // U+FFFD "replacement character"
+
+		was_error = true;
+	}
+
+return chars;
+}
+
+} // namespace private_namespace
+
+

 } // namespace

--- a/src/utf8/utf8.h
+++ b/src/utf8/utf8.h
@@ -177,7 +177,7 @@ bool int_to_wide(int c, std::wstring & res);
 	 call a convert_function for each character from an utf8 string
 */
 template<typename OutputFunction>
-bool utf8_to_output_function(const char * utf8, size_t utf8_len, OutputFunction convert_function, int mode = 1);
+bool utf8_to_output_function(const char * utf8, size_t utf8_len, OutputFunction output_function, int mode = 1);



@@ -298,10 +298,832 @@ template<typename StreamType>
 bool wide_stream_to_utf8(StreamType & buffer, char * utf8, std::size_t max_buffer_size, bool * was_buffer_sufficient_large = nullptr, int mode = 1);


+
+
+namespace private_namespace
+{
+bool utf8_to_int_first_octet(unsigned char uz, size_t & len, int & res);
+bool utf8_to_int_add_next_octet(unsigned char uz, int & res);
+
+size_t wide_one_to_utf8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len,
+							size_t & utf8_written, bool & was_utf8_buf_too_small, bool & was_error, int mode);
+
+size_t wide_one_to_utf8(const wchar_t * wide_string, size_t string_len, std::string & utf8, bool & was_error, int mode);
+
+size_t wide_one_to_utf8(const wchar_t * wide_string, std::string & utf8, bool & was_error, int mode);
+
+
+/*!
+	an auxiliary function for converting from wide characters to UTF-8
+
+	returns how many wide characters were used
+	if string_len is greater than 0 then the return value is always greater than zero too
+*/
+template<typename StreamType>
+static size_t wide_one_to_utf8(const wchar_t * wide_string, size_t string_len, StreamType & utf8, bool & was_error, int mode)
+{
+int z;
+bool correct;
+size_t chars;
+
+	chars = wide_to_int(wide_string, string_len, z, correct);
+
+	if( correct )
+		correct = int_to_utf8(z, utf8) != 0;
+
+	if( !correct )
+	{
+		if( mode == 1 )
+			int_to_utf8(0xFFFD, utf8); // U+FFFD "replacement character"
+
+		was_error = true;
+	}
+
+return chars;
+}
+
+
+/*!
+	an auxiliary function for converting from wide characters to UTF-8
+*/
+template<typename StreamType>
+static size_t wide_one_to_utf8(const wchar_t * wide_string, StreamType & utf8, bool & was_error, int mode)
+{
+	size_t min_str_len = 1;
+
+	if( *wide_string == 0 )
+		return 0;
+
+	if( *(wide_string+1) != 0 )
+		min_str_len = 2;
+
+return wide_one_to_utf8(wide_string, min_str_len, utf8, was_error, mode);
+}
+
+} // namespace private_namespace
+
+
+
+
+
+
+
+template<typename StreamType>
+bool int_to_wide(int c, StreamType & res)
+{
+	wchar_t buf[2];
+	size_t used = int_to_wide(c, buf, sizeof(buf) / sizeof(wchar_t));
+
+	if( used == 1 )
+	{
+		res << buf[0];
+	}
+	else
+	if( used == 2 )
+	{
+		res << buf[0];
+		res << buf[1];
+	}
+
+	return used > 0;
+}
+
+
+/*!
+	this function converts one UTF-8 character into int
+
+	input:
+		iterator_in - an stream iterator for reading from (the stream can by any stream, we use *, ++ and == operators only)
+		iterator_end - an end iterator
+
+	output:
+		res - an output character
+		correct - true if it is a correct character
+
+		the function returns how many characters have been used from the input stream
+*/
+template<typename StreamIteratorType>
+size_t utf8_to_int(StreamIteratorType & iterator_in, const StreamIteratorType & iterator_end, int & res, bool & correct)
+{
+size_t i, len;
+unsigned char uz;
+
+	res = 0;
+	correct = false;
+
+	if( iterator_in == iterator_end )
+		return 0;
+
+	uz = *iterator_in;
+	++iterator_in;
+
+	if( !private_namespace::utf8_to_int_first_octet(uz, len, res) )
+		return 1;
+
+	for(i=1 ; i<len ; ++i)
+	{
+		if( iterator_in == iterator_end )
+			return i;
+
+		uz = *iterator_in;
+		++iterator_in;
+
+		if( !private_namespace::utf8_to_int_add_next_octet(uz, res) )
+			return i + 1;
+	}
+
+	if( utf8_check_range(res, len) )
+		correct = true;
+
+return len;
+}
+
+
+
+/*!
+	converting UTF-8 string to a TextStreamBase<wchar_t,...> stream
+	(need to be tested)
+*/
+// need to be tested
+template<typename StreamType>
+bool utf8_to_wide(const char * utf8, size_t utf8_len, StreamType & res, bool clear, int mode)
+{
+	if( clear )
+		res.clear();
+
+	bool status = utf8_to_output_function(utf8, utf8_len, [&res](int c) {
+		int_to_wide(c, res);
+	}, mode);
+
+	return status;
+}
+
+
+
+
+template<typename StreamType>
+bool utf8_to_wide(const char * utf8, StreamType & res, bool clear, int mode)
+{
+size_t utf8_len = 0;
+
+	while( utf8[utf8_len] != 0 )
+		utf8_len += 1;
+
+return utf8_to_wide(utf8, utf8_len, res, clear, mode);
+}
+
+
+
+template<typename StreamType>
+bool utf8_to_wide(const std::string & utf8, StreamType & res, bool clear, int mode)
+{
+	return utf8_to_wide(utf8.c_str(), utf8.size(), res, clear, mode);
+}
+
+
+
+template<typename StreamType>
+bool utf8_to_wide(std::istream & utf8, StreamType & res, bool clear, int mode)
+{
+int z;
+bool correct, was_error = false;
+
+	if( clear )
+		res.clear();
+
+	while( utf8_to_int(utf8, z, correct) > 0 )
+	{
+		if( !correct )
+		{
+			if( mode == 1 )
+				res << 0xFFFD; // U+FFFD "replacement character"
+
+			was_error = true;
+		}
+		else
+		{
+			int_to_wide(z, res);
+		}
+	}
+
+return !was_error;
+}
+
+
+template<typename OutputFunction>
+bool utf8_to_output_function(const char * utf8, size_t utf8_len, OutputFunction output_function, int mode)
+{
+int z;
+size_t len;
+bool correct, was_error = false;
+
+	while( utf8_len > 0 )
+	{
+		if( (unsigned char)*utf8 <= 0x7f )
+		{
+			// small optimization
+			len = 1;
+			correct = true;
+			z = static_cast<unsigned char>(*utf8);
+		}
+		else
+		{
+			len = pt::utf8_to_int(utf8, utf8_len, z, correct); // the len will be different from zero
+		}
+
+		if( !correct )
+		{
+			if( mode == 1 )
+				output_function(0xFFFD); // U+FFFD "replacement character"
+
+			was_error = true;
+		}
+		else
+		{
+			output_function(z);
+		}
+
+		utf8     += len;
+		utf8_len -= len;
+	}
+
+return !was_error;
+}
+
+
+template<typename StreamType, typename OutputFunction>
+bool wide_to_output_function(StreamType & buffer, OutputFunction output_function, int mode)
+{
+	char utf8_buffer[256];
+	std::size_t buffer_len = sizeof(utf8_buffer) / sizeof(char);
+	std::size_t utf8_sequence_max_length = 10;
+	std::size_t index = 0;
+	bool was_error = false;
+
+	typename StreamType::const_iterator i = buffer.begin();
+
+	while( i != buffer.end() )
+	{
+		if( index + utf8_sequence_max_length > buffer_len )
+		{
+			bool write_status = output_function(utf8_buffer, index);
+			index = 0;
+
+			if( !write_status )
+			{
+				was_error = true;
+				break;
+			}
+		}
+
+		int c = 0xFFFD; // U+FFFD "replacement character";
+		bool seems_to_be_correct = false;
+		wchar_t w1 = *i;
+
+		if( sizeof(wchar_t) == 2 && is_first_surrogate_char(w1) )
+		{
+			++i;
+
+			if( i != buffer.end() )
+			{
+				wchar_t w2 = *i;
+
+				if( surrogate_pair_to_int(w1, w2, c) )
+				{
+					seems_to_be_correct = true;
+					++i;
+				}
+				else
+				{
+					was_error = true;
+				}
+			}
+			else
+			{
+				was_error = true;
+			}
+		}
+		else
+		{
+			c = w1;
+			seems_to_be_correct = true; // we do not test utf8_check_range(...) here because it is tested in int_to_utf8(...) below
+			++i;
+		}
+
+		if( seems_to_be_correct || mode == 1 )
+		{
+			size_t seq_len = int_to_utf8(c, utf8_buffer + index, buffer_len - index);
+			// here seq_len can be zero only when c is an incorrect unicode char (the buffer is large enough)
+
+			if( seq_len == 0 )
+			{
+				was_error = true;
+
+				if( mode == 1 )
+				{
+					seq_len = int_to_utf8(0xFFFD, utf8_buffer + index, buffer_len - index); // U+FFFD "replacement character";
+				}
+			}
+
+			index += seq_len;
+		}
+	}
+
+	if( index > 0 )
+	{
+		if( !output_function(utf8_buffer, index) )
+		{
+			was_error = true;
+		}
+	}
+
+	return !was_error;
+}
+
+
+/*
+this function converts a UTF-8 stream into a wide stream or a wide string
+
+input:
+	stream - a UTF-8 stream for converting
+	mode - what to do with errors when converting
+		0: skip an invalid character
+		1: put U+FFFD "replacement character" istead of the invalid character (default)
+
+output:
+	res - a wide stream or a wide string for the output sequence
+
+	this function returns false if there were some errors when converting
+*/
+template<typename StreamOrStringType>
+bool utf8_to_wide(const Stream & stream, StreamOrStringType & res, bool clear, int mode)
+{
+	if( clear )
+		res.clear();
+
+	return utf8_to_output_function(stream, [&](int z) {
+		int_to_wide(z, res);
+	}, mode);
+}
+
+
+/*
+this function reads characters from a UTF-8 stream and calls an output_function
+
+input:
+	stream - a UTF-8 stream for converting
+	mode - what to do with errors when converting
+		0: skip an invalid character
+		1: put U+FFFD "replacement character" istead of the invalid character (default)
+
+output:
+	output_function - is a function which gets two artuments: int (character) and a reference to StreamOrStringType
+		and should put the character to the output string/stream, this function should have the signature like this:
+		output_function(int z, StreamOrStringType & res)
+
+	this function returns false if there were some errors when converting
+*/
+template<typename OutputFunction>
+bool utf8_to_output_function(const Stream & stream, OutputFunction output_function, int mode)
+{
+	size_t len;
+	bool correct;
+	int z;
+	size_t index = 0;
+	bool was_error = false;
+
+	do
+	{
+		len = utf8_to_int(stream, index, z, correct);
+
+		if( len > 0 )
+		{
+			if( !correct )
+			{
+				if( mode == 1 )
+					output_function(0xFFFD); // U+FFFD "replacement character"
+
+				was_error = true;
+			}
+			else
+			{
+				output_function(z);
+			}
+
+			index += len;
+		}
+	}
+	while( len > 0 );
+
+	return !was_error;
+}
+
+
+
+
+
+
+/*!
+	this function converts UTF-8 stream into a wide stream or a wide string
+
+	input:
+		iterator_in - a stream iterator for reading from (the stream can by any stream, we use *, ++ and == operators only)
+		iterator_end - an end iterator
+
+	output:
+		out_stream - an output wide stream or wide string (the stream can by of any kind, we use only << operator for a stream and += for a string)
+
+		this function returns false if there were some errors when converting
+*/
+template<typename StreamIteratorType, typename StreamOrStringType>
+bool utf8_to_wide(StreamIteratorType & iterator_in, const StreamIteratorType & iterator_end, StreamOrStringType & out_stream, bool clear_stream, int mode)
+{
+	if( clear_stream )
+		out_stream.clear();
+
+	return utf8_to_output_function(iterator_in, iterator_end, [&](int z){
+		int_to_wide(z, out_stream);
+	}, mode);
+}
+
+
+template<typename StreamIteratorType, typename OutputFunction>
+bool utf8_to_output_function(StreamIteratorType & iterator_in, const StreamIteratorType & iterator_end, OutputFunction output_function, int mode)
+{
+	int res;
+	bool correct;
+	bool was_error = false;
+
+	while( iterator_in != iterator_end )
+	{
+		utf8_to_int(iterator_in, iterator_end, res, correct);
+
+		if( correct )
+		{
+			output_function(res);
+		}
+		else
+		{
+			if( mode == 1 )
+				output_function(0xFFFD); // U+FFFD "replacement character"
+
+			was_error = true;
+		}
+	}
+
+	return !was_error;
+}
+
+
+
+/*!
+	this function converts UTF-8 stream into a wide string
+
+	input:
+		iterator_in - a stream iterator for reading from (the stream can by any stream, we use *, ++ and == operators only)
+		iterator_end - an end iterator
+
+	output:
+		out_buffer - an output wide string
+		max_buffer_len - how many characters can be write (we write the terminating null character too)
+		was_buffer_sufficient_large - a pointer to a bool value - if provided it is set to true if the buffer was sufficient large
+
+		this function returns false if there were some errors when converting or if the output buffer was too short
+*/
+template<typename StreamIteratorType>
+bool utf8_to_wide(StreamIteratorType & iterator_in, const StreamIteratorType & iterator_end, wchar_t * out_buffer, size_t max_buffer_len, int mode, bool * was_buffer_sufficient_large)
+{
+	int res;
+	bool correct;
+	bool was_error = true;
+	bool was_buffer_ok = false;
+
+	if( max_buffer_len > 0 )
+	{
+		max_buffer_len -= 1; // for terminating null character
+		was_error = false;
+		was_buffer_ok = true;
+
+		while( iterator_in != iterator_end )
+		{
+			utf8_to_int(iterator_in, iterator_end, res, correct);
+
+			if( !correct )
+			{
+				was_error = true;
+
+				if( mode == 1 )
+				{
+					res = 0xFFFD; // U+FFFD "replacement character"
+					correct = true;
+				}
+			}
+
+			if( correct )
+			{
+				size_t len = int_to_wide(res, out_buffer, max_buffer_len);
+				// if len is zero then the output buffer is too short - the res input value was correct (it was returned from utf_to_int(...) beforehand)
+
+				if( len == 0 )
+				{
+					was_error = true;
+					was_buffer_ok = false;
+					break;
+				}
+				else
+				{
+					out_buffer += len;
+					max_buffer_len -= len;
+				}
+			}
+		}
+
+		*out_buffer = 0;
+	}
+
+	if( was_buffer_sufficient_large )
+		*was_buffer_sufficient_large = was_buffer_ok;
+
+	return !was_error;
+}
+
+
+
+/*!
+	this function converts UTF-8 stream into a wide string
+
+	input:
+		stream - a stream for reading from
+
+	output:
+		out_buffer - an output wide string
+		max_buffer_len - how many characters can be write (we write the terminating null character too)
+		was_buffer_sufficient_large - a pointer to a bool value - if provided it is set to true if the buffer was sufficient large
+
+		this function returns false if there were some errors when converting or if the output buffer was too short
+*/
+template<typename StreamType>
+bool utf8_to_wide(const StreamType & stream, wchar_t * out_buffer, size_t max_buffer_len, bool * was_buffer_sufficient_large, int mode)
+{
+	typename StreamType::const_iterator stream_begin = stream.begin();
+	typename StreamType::const_iterator stream_end = stream.end();
+
+	return utf8_to_wide(stream_begin, stream_end, out_buffer, max_buffer_len, mode, was_buffer_sufficient_large);
+}
+
+
+
+/*!
+	this function converts one wide character into UTF-8 stream
+
+	input:
+		z - wide character
+
+	output:
+		utf8 - a UTF-8 stream for the output sequence
+
+	the function returns how many characters have been written to the utf8 stream,
+	zero means that 'z' is an incorrect unicode character
+*/
+template<typename StreamType>
+size_t int_to_utf8(int z, StreamType & utf8)
+{
+	char buf[10];
+
+	size_t len = int_to_utf8(z, buf, sizeof(buf)/sizeof(char));
+
+	if( len > 0 )
+		utf8.write(buf, len);
+
+	return len;
+}
+
+
+
+
+
+/*!
+	this function converts a wide string into UTF-8 stream
+
+	input:
+		wide_string - a wide string for converting
+		string_len - size of the string
+		mode - what to do with errors when converting
+			0: skip an invalid character
+			1: put U+FFFD "replacement character" istead of the invalid character (default)
+
+	output:
+		utf8 - a UTF-8 stream for the output sequence
+
+	this function returns false if there were some errors when converting
+*/
+template<typename StreamType>
+bool wide_to_utf8(const wchar_t * wide_string, size_t string_len, StreamType & utf8, int mode)
+{
+bool was_error = false;
+size_t chars;
+
+	while( string_len > 0 )
+	{
+		chars = private_namespace::wide_one_to_utf8(wide_string, string_len, utf8, was_error, mode);
+		wide_string += chars;
+		string_len  -= chars;
+	}
+
+return !was_error;
+}
+
+
+
+
+
+/*!
+	this function converts a wide string into UTF-8 stream
+
+	input:
+		wide_string - a null terminated wide string for converting
+		mode - what to do with errors when converting
+			0: skip an invalid character
+			1: put U+FFFD "replacement character" istead of the invalid character (default)
+
+	output:
+		utf8 - a UTF-8 stream for the output sequence
+
+	this function returns false if there were some errors when converting
+*/
+template<typename StreamType>
+bool wide_to_utf8(const wchar_t * wide_string, StreamType & utf8, int mode)
+{
+bool was_error = false;
+
+	while( *wide_string )
+		wide_string += private_namespace::wide_one_to_utf8(wide_string, utf8, was_error, mode);
+
+return !was_error;
+}
+
+
+
+/*!
+	this function converts a wide string (std::wstring) into UTF-8 stream
+
+	input:
+		wide_string - a wide string for converting
+		mode - what to do with errors when converting
+			0: skip an invalid character
+			1: put U+FFFD "replacement character" istead of the invalid character (default)
+
+	output:
+		utf8 - a UTF-8 stream for the output sequence
+
+	this function returns false if there were some errors when converting
+*/
+template<typename StreamType>
+bool wide_to_utf8(const std::wstring & wide_string, StreamType & utf8, int mode)
+{
+	return wide_to_utf8(wide_string.c_str(), wide_string.size(), utf8, mode);
+}
+
+
+
+
+template<typename StreamType>
+bool wide_stream_to_utf8(StreamType & buffer, std::string & utf8, bool clear, int mode)
+{
+	if( clear )
+		utf8.clear();
+
+	return wide_to_output_function(buffer, [&utf8](const char * utf8_buffer, std::size_t buffer_len) -> bool {
+		utf8.append(utf8_buffer, buffer_len);
+		return true;
+	}, mode);
+}
+
+
+
+template<typename StreamType>
+bool wide_stream_to_utf8(const Stream & stream, StreamType & utf8, bool clear, int mode)
+{
+	bool was_error = false;
+
+	if( clear )
+		utf8.clear();
+
+	for(size_t i=0 ; i < stream.size() ; ++i)
+	{
+		int c = static_cast<int>(stream.get_wchar(i));
+		bool is_correct = false;
+
+		if( utf8_check_range(c) )
+		{
+			// CHECKME test me when sizeof(wchar_t) == 2
+			if( is_first_surrogate_char(c) )
+			{
+				if( i + 1 < stream.size() )
+				{
+					wchar_t c1 = static_cast<wchar_t>(c);
+					wchar_t c2 = stream.get_wchar(++i);
+
+					if( surrogate_pair_to_int(c1, c2, c) )
+					{
+						is_correct = true;
+					}
+				}
+			}
+			else
+			{
+				is_correct = true;
+			}
+		}
+
+		if( is_correct )
+		{
+			int_to_utf8(c, utf8);
+		}
+		else
+		{
+			was_error = true;
+
+			if( mode == 1 )
+				int_to_utf8(0xFFFD, utf8); // U+FFFD "replacement character"
+		}
+	}
+
+	return !was_error;
+}
+
+
+template<typename StreamTypeIn, typename StreamTypeOut>
+bool wide_stream_to_utf8(StreamTypeIn & buffer, StreamTypeOut & utf8, bool clear, int mode)
+{
+	if( clear )
+		utf8.clear();
+
+	return wide_to_output_function(buffer, [&utf8](const char * utf8_buffer, std::size_t buffer_len) -> bool {
+		utf8.write(utf8_buffer, buffer_len);
+		return true;
+	}, mode);
+}
+
+
+
+/*!
+	this function converts a wide stream into a utf8 string
+
+	input:
+		buffer - a wide stream for reading from
+
+	output:
+		utf8 - an output utf8 string
+		max_buffer_len - how many characters can be write (we write the terminating null character too)
+		was_buffer_sufficient_large - a pointer to a bool value - if provided it is set to true if the buffer was sufficient large
+
+	this function returns false if there were some errors when converting or if the output buffer was too short
+*/
+template<typename StreamType>
+bool wide_stream_to_utf8(StreamType & buffer, char * utf8, std::size_t max_buffer_size, bool * was_buffer_sufficient_large, int mode)
+{
+	bool buffer_ok = false;
+	bool is_ok = false;
+
+	if( max_buffer_size > 0 )
+	{
+		buffer_ok = true;
+		max_buffer_size -= 1; // for terminating null character
+
+		is_ok = wide_to_output_function(buffer, [&utf8, &max_buffer_size, &buffer_ok](const char * utf8_buffer, std::size_t buffer_len) -> bool {
+			std::size_t i=0;
+
+			for( ; i < buffer_len  ; ++i)
+			{
+				if( i < max_buffer_size )
+				{
+					*utf8 = utf8_buffer[i];
+					utf8 += 1;
+				}
+				else
+				{
+					buffer_ok = false;
+					break;
+				}
+			}
+
+			max_buffer_size -= i;
+			*utf8 = 0;
+			return buffer_ok;
+		}, mode);
+	}
+
+	if( was_buffer_sufficient_large )
+		*was_buffer_sufficient_large = buffer_ok;
+
+	return is_ok;
+}
+
+
 } // namespace


-#include "utf8/utf8_templates.h"

 #endif

--- a/src/utf8/utf8_private.cpp
+++ b/src/utf8/utf8_private.cpp
@@ -1,201 +0,0 @@
-/*
- * This file is a part of PikoTools
- * and is distributed under the 2-Clause BSD licence.
- * Author: Tomasz Sowa <t.sowa@ttmath.org>
- */
-
-/*
- * Copyright (c) 2021-2024, Tomasz Sowa
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-#include "utf8_private.h"
-
-
-namespace pt
-{
-
-namespace private_namespace
-{
-
-/*!
-	an auxiliary function for converting from UTF-8 string
-*/
-bool utf8_to_int_first_octet(unsigned char uz, size_t & len, int & res)
-{
-	for(len=0 ; (uz & 0x80) != 0 ; ++len)
-		uz <<= 1;
-
-	if( len == 1 || len > 4 )
-		return false;
-
-	res = uz;
-
-	if( len > 0 )
-		res >>= len;
-
-	if( len == 0 )
-		len = 1;
-
-return true;
-}
-
-
-
-/*!
-	an auxiliary function for converting from UTF-8 string
-*/
-bool utf8_to_int_add_next_octet(unsigned char uz, int & res)
-{
-	if( (uz & 0xc0) != 0x80 )
-		return false;
-
-	res <<= 6;
-	res |= (uz & 0x3F);
-
-return true;
-}
-
-
-
-/*!
-	an auxiliary function for converting from wide characters to UTF-8
-
-	returns how many wide characters were used
-	if string_len is greater than 0 then the return value is always greater than zero too
-
-	utf8_written - how many characters were saved in the utf8 string (the string doesn't have
-	               a null terminating character)
-	               it can be equal to zero if the utf8 buffer is too small or there was an incorrect wide character read
-	was_utf8_buf_too_small -  will be true if the utf8 buffer is too small
-	               if this flag is true then utf8_written is equal to zero
-	was_error    - will be true if there is an error when converting (there was an incorrect wide character)
-	               (was_error will not be true if the utf8 buffer is too small)
-*/
-size_t wide_one_to_utf8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len,
-							size_t & utf8_written, bool & was_utf8_buf_too_small, bool & was_error, int mode)
-{
-int z;
-bool correct;
-size_t chars;
-
-	utf8_written = 0;
-	was_utf8_buf_too_small = false;
-	chars = wide_to_int(wide_string, string_len, z, correct);
-
-	if( correct )
-	{
-		utf8_written = int_to_utf8(z, utf8, utf8_len);
-
-		if( utf8_written == 0 )
-			was_utf8_buf_too_small = true;
-	}
-	else
-	{
-		if( mode == 1 )
-		{
-			utf8_written = int_to_utf8(0xFFFD, utf8, utf8_len); // U+FFFD "replacement character"
-
-			if( utf8_written == 0 )
-				was_utf8_buf_too_small = true;
-		}
-
-		was_error = true;
-	}
-
-return chars;
-}
-
-
-
-/*!
-	an auxiliary function for converting from wide characters to UTF-8
-
-	returns how many wide characters were used
-	if string_len is greater than 0 then the return value is always greater than zero too
-*/
-size_t wide_one_to_utf8(const wchar_t * wide_string, size_t string_len, std::string & utf8, bool & was_error, int mode)
-{
-int z;
-bool correct;
-size_t chars;
-
-	chars = wide_to_int(wide_string, string_len, z, correct);
-
-	if( correct )
-		correct = int_to_utf8(z, utf8, false) != 0;
-
-	if( !correct )
-	{
-		if( mode == 1 )
-			int_to_utf8(0xFFFD, utf8, false); // U+FFFD "replacement character"
-
-		was_error = true;
-	}
-
-return chars;
-}
-
-
-
-/*!
-	an auxiliary function for converting from wide characters to UTF-8
-
-	returns how many wide characters were used
-	if wide_string has at least one character then the return value is always greater than zero too
-*/
-size_t wide_one_to_utf8(const wchar_t * wide_string, std::string & utf8, bool & was_error, int mode)
-{
-int z;
-bool correct;
-size_t chars;
-
-	chars = wide_to_int(wide_string, z, correct);
-
-	if( correct )
-		correct = int_to_utf8(z, utf8, false) != 0;
-
-	if( !correct )
-	{
-		if( mode == 1 )
-			int_to_utf8(0xFFFD, utf8, false); // U+FFFD "replacement character"
-
-		was_error = true;
-	}
-
-return chars;
-}
-
-
-
-
-
-} // namespace private_namespace
-
-} // namespace pt
-
-
-
--- a/src/utf8/utf8_private.h
+++ b/src/utf8/utf8_private.h
@@ -1,117 +0,0 @@
-/*
- * This file is a part of PikoTools
- * and is distributed under the 2-Clause BSD licence.
- * Author: Tomasz Sowa <t.sowa@ttmath.org>
- */
-
-/*
- * Copyright (c) 2021-2024, Tomasz Sowa
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-#ifndef headerfile_pikotools_src_utf8_utf8_private
-#define headerfile_pikotools_src_utf8_utf8_private
-
-#include <string>
-
-
-namespace pt
-{
-
-size_t int_to_utf8(int z, char * utf8, size_t utf8_max_len);
-size_t int_to_utf8(int z, std::string & utf8, bool clear);
-size_t wide_to_int(const wchar_t * wide_string, size_t string_len, int & z, bool & correct);
-size_t wide_to_int(const wchar_t * wide_string, int & z, bool & correct);
-
-
-namespace private_namespace
-{
-bool utf8_to_int_first_octet(unsigned char uz, size_t & len, int & res);
-bool utf8_to_int_add_next_octet(unsigned char uz, int & res);
-
-size_t wide_one_to_utf8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len,
-							size_t & utf8_written, bool & was_utf8_buf_too_small, bool & was_error, int mode);
-
-size_t wide_one_to_utf8(const wchar_t * wide_string, size_t string_len, std::string & utf8, bool & was_error, int mode);
-
-size_t wide_one_to_utf8(const wchar_t * wide_string, std::string & utf8, bool & was_error, int mode);
-
-
-/*!
-	an auxiliary function for converting from wide characters to UTF-8
-
-	returns how many wide characters were used
-	if string_len is greater than 0 then the return value is always greater than zero too
-*/
-template<typename StreamType>
-static size_t wide_one_to_utf8(const wchar_t * wide_string, size_t string_len, StreamType & utf8, bool & was_error, int mode)
-{
-int z;
-bool correct;
-size_t chars;
-
-	chars = wide_to_int(wide_string, string_len, z, correct);
-
-	if( correct )
-		correct = int_to_utf8(z, utf8) != 0;
-
-	if( !correct )
-	{
-		if( mode == 1 )
-			int_to_utf8(0xFFFD, utf8); // U+FFFD "replacement character"
-
-		was_error = true;
-	}
-
-return chars;
-}
-
-
-/*!
-	an auxiliary function for converting from wide characters to UTF-8
-*/
-template<typename StreamType>
-static size_t wide_one_to_utf8(const wchar_t * wide_string, StreamType & utf8, bool & was_error, int mode)
-{
-	size_t min_str_len = 1;
-
-	if( *wide_string == 0 )
-		return 0;
-
-	if( *(wide_string+1) != 0 )
-		min_str_len = 2;
-
-return wide_one_to_utf8(wide_string, min_str_len, utf8, was_error, mode);
-}
-
-
-
-
-} // namespace private_namespace
-
-} // namespace pt
-
-#endif
--- a/src/utf8/utf8_templates.h
+++ b/src/utf8/utf8_templates.h
@@ -1,808 +0,0 @@
-/*
- * This file is a part of PikoTools
- * and is distributed under the 2-Clause BSD licence.
- * Author: Tomasz Sowa <t.sowa@ttmath.org>
- */
-
-/*
- * Copyright (c) 2021-2024, Tomasz Sowa
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-#ifndef headerfile_pikotools_src_utf8_utf8_templates
-#define headerfile_pikotools_src_utf8_utf8_templates
-
-// this file is included at the end of utf8.h
-
-#include "utf8_private.h"
-
-
-namespace pt
-{
-
-
-template<typename StreamType>
-bool int_to_wide(int c, StreamType & res)
-{
-	wchar_t buf[2];
-	size_t used = int_to_wide(c, buf, sizeof(buf) / sizeof(wchar_t));
-
-	if( used == 1 )
-	{
-		res << buf[0];
-	}
-	else
-	if( used == 2 )
-	{
-		res << buf[0];
-		res << buf[1];
-	}
-
-	return used > 0;
-}
-
-
-/*!
-	this function converts one UTF-8 character into int
-
-	input:
-		iterator_in - an stream iterator for reading from (the stream can by any stream, we use *, ++ and == operators only)
-		iterator_end - an end iterator
-
-	output:
-		res - an output character
-		correct - true if it is a correct character
-
-		the function returns how many characters have been used from the input stream
-*/
-template<typename StreamIteratorType>
-size_t utf8_to_int(StreamIteratorType & iterator_in, const StreamIteratorType & iterator_end, int & res, bool & correct)
-{
-size_t i, len;
-unsigned char uz;
-
-	res = 0;
-	correct = false;
-
-	if( iterator_in == iterator_end )
-		return 0;
-
-	uz = *iterator_in;
-	++iterator_in;
-
-	if( !private_namespace::utf8_to_int_first_octet(uz, len, res) )
-		return 1;
-
-	for(i=1 ; i<len ; ++i)
-	{
-		if( iterator_in == iterator_end )
-			return i;
-
-		uz = *iterator_in;
-		++iterator_in;
-
-		if( !private_namespace::utf8_to_int_add_next_octet(uz, res) )
-			return i + 1;
-	}
-
-	if( utf8_check_range(res, len) )
-		correct = true;
-
-return len;
-}
-
-
-
-/*!
-	converting UTF-8 string to a TextStreamBase<wchar_t,...> stream
-	(need to be tested)
-*/
-// need to be tested
-template<typename StreamType>
-bool utf8_to_wide(const char * utf8, size_t utf8_len, StreamType & res, bool clear, int mode)
-{
-	if( clear )
-		res.clear();
-
-	bool status = utf8_to_output_function(utf8, utf8_len, [&res](int c) {
-		int_to_wide(c, res);
-	}, mode);
-
-	return status;
-}
-
-
-
-
-template<typename StreamType>
-bool utf8_to_wide(const char * utf8, StreamType & res, bool clear, int mode)
-{
-size_t utf8_len = 0;
-
-	while( utf8[utf8_len] != 0 )
-		utf8_len += 1;
-
-return utf8_to_wide(utf8, utf8_len, res, clear, mode);
-}
-
-
-
-template<typename StreamType>
-bool utf8_to_wide(const std::string & utf8, StreamType & res, bool clear, int mode)
-{
-	return utf8_to_wide(utf8.c_str(), utf8.size(), res, clear, mode);
-}
-
-
-
-template<typename StreamType>
-bool utf8_to_wide(std::istream & utf8, StreamType & res, bool clear, int mode)
-{
-int z;
-bool correct, was_error = false;
-
-	if( clear )
-		res.clear();
-
-	while( utf8_to_int(utf8, z, correct) > 0 )
-	{
-		if( !correct )
-		{
-			if( mode == 1 )
-				res << 0xFFFD; // U+FFFD "replacement character"
-
-			was_error = true;
-		}
-		else
-		{
-			int_to_wide(z, res);
-		}
-	}
-
-return !was_error;
-}
-
-
-template<typename OutputFunction>
-bool utf8_to_output_function(const char * utf8, size_t utf8_len, OutputFunction output_function, int mode)
-{
-int z;
-size_t len;
-bool correct, was_error = false;
-
-	while( utf8_len > 0 )
-	{
-		if( (unsigned char)*utf8 <= 0x7f )
-		{
-			// small optimization
-			len = 1;
-			correct = true;
-			z = static_cast<unsigned char>(*utf8);
-		}
-		else
-		{
-			len = pt::utf8_to_int(utf8, utf8_len, z, correct); // the len will be different from zero
-		}
-
-		if( !correct )
-		{
-			if( mode == 1 )
-				output_function(0xFFFD); // U+FFFD "replacement character"
-
-			was_error = true;
-		}
-		else
-		{
-			output_function(z);
-		}
-
-		utf8     += len;
-		utf8_len -= len;
-	}
-
-return !was_error;
-}
-
-
-template<typename StreamType, typename OutputFunction>
-bool wide_to_output_function(StreamType & buffer, OutputFunction output_function, int mode)
-{
-	char utf8_buffer[256];
-	std::size_t buffer_len = sizeof(utf8_buffer) / sizeof(char);
-	std::size_t utf8_sequence_max_length = 10;
-	std::size_t index = 0;
-	bool was_error = false;
-
-	typename StreamType::const_iterator i = buffer.begin();
-
-	while( i != buffer.end() )
-	{
-		if( index + utf8_sequence_max_length > buffer_len )
-		{
-			bool write_status = output_function(utf8_buffer, index);
-			index = 0;
-
-			if( !write_status )
-			{
-				was_error = true;
-				break;
-			}
-		}
-
-		int c = 0xFFFD; // U+FFFD "replacement character";
-		bool seems_to_be_correct = false;
-		wchar_t w1 = *i;
-
-		if( sizeof(wchar_t) == 2 && is_first_surrogate_char(w1) )
-		{
-			++i;
-
-			if( i != buffer.end() )
-			{
-				wchar_t w2 = *i;
-
-				if( surrogate_pair_to_int(w1, w2, c) )
-				{
-					seems_to_be_correct = true;
-					++i;
-				}
-				else
-				{
-					was_error = true;
-				}
-			}
-			else
-			{
-				was_error = true;
-			}
-		}
-		else
-		{
-			c = w1;
-			seems_to_be_correct = true; // we do not test utf8_check_range(...) here because it is tested in int_to_utf8(...) below
-			++i;
-		}
-
-		if( seems_to_be_correct || mode == 1 )
-		{
-			size_t seq_len = int_to_utf8(c, utf8_buffer + index, buffer_len - index);
-			// here seq_len can be zero only when c is an incorrect unicode char (the buffer is large enough)
-
-			if( seq_len == 0 )
-			{
-				was_error = true;
-
-				if( mode == 1 )
-				{
-					seq_len = int_to_utf8(0xFFFD, utf8_buffer + index, buffer_len - index); // U+FFFD "replacement character";
-				}
-			}
-
-			index += seq_len;
-		}
-	}
-
-	if( index > 0 )
-	{
-		if( !output_function(utf8_buffer, index) )
-		{
-			was_error = true;
-		}
-	}
-
-	return !was_error;
-}
-
-
-/*
-this function converts a UTF-8 stream into a wide stream or a wide string
-
-input:
-	stream - a UTF-8 stream for converting
-	mode - what to do with errors when converting
-		0: skip an invalid character
-		1: put U+FFFD "replacement character" istead of the invalid character (default)
-
-output:
-	res - a wide stream or a wide string for the output sequence
-
-	this function returns false if there were some errors when converting
-*/
-template<typename StreamOrStringType>
-bool utf8_to_wide(const Stream & stream, StreamOrStringType & res, bool clear, int mode)
-{
-	if( clear )
-		res.clear();
-
-	return utf8_to_output_function(stream, [&](int z) {
-		int_to_wide(z, res);
-	}, mode);
-}
-
-
-/*
-this function reads characters from a UTF-8 stream and calls an output_function
-
-input:
-	stream - a UTF-8 stream for converting
-	mode - what to do with errors when converting
-		0: skip an invalid character
-		1: put U+FFFD "replacement character" istead of the invalid character (default)
-
-output:
-	output_function - is a function which gets two artuments: int (character) and a reference to StreamOrStringType
-		and should put the character to the output string/stream, this function should have the signature like this:
-		output_function(int z, StreamOrStringType & res)
-
-	this function returns false if there were some errors when converting
-*/
-template<typename OutputFunction>
-bool utf8_to_output_function(const Stream & stream, OutputFunction output_function, int mode)
-{
-	size_t len;
-	bool correct;
-	int z;
-	size_t index = 0;
-	bool was_error = false;
-
-	do
-	{
-		len = utf8_to_int(stream, index, z, correct);
-
-		if( len > 0 )
-		{
-			if( !correct )
-			{
-				if( mode == 1 )
-					output_function(0xFFFD); // U+FFFD "replacement character"
-
-				was_error = true;
-			}
-			else
-			{
-				output_function(z);
-			}
-
-			index += len;
-		}
-	}
-	while( len > 0 );
-
-	return !was_error;
-}
-
-
-
-
-
-
-/*!
-	this function converts UTF-8 stream into a wide stream or a wide string
-
-	input:
-		iterator_in - a stream iterator for reading from (the stream can by any stream, we use *, ++ and == operators only)
-		iterator_end - an end iterator
-
-	output:
-		out_stream - an output wide stream or wide string (the stream can by of any kind, we use only << operator for a stream and += for a string)
-
-		this function returns false if there were some errors when converting
-*/
-template<typename StreamIteratorType, typename StreamOrStringType>
-bool utf8_to_wide(StreamIteratorType & iterator_in, const StreamIteratorType & iterator_end, StreamOrStringType & out_stream, bool clear_stream, int mode)
-{
-	if( clear_stream )
-		out_stream.clear();
-
-	return utf8_to_output_function(iterator_in, iterator_end, [&](int z){
-		int_to_wide(z, out_stream);
-	}, mode);
-}
-
-
-template<typename StreamIteratorType, typename OutputFunction>
-bool utf8_to_output_function(StreamIteratorType & iterator_in, const StreamIteratorType & iterator_end, OutputFunction output_function, int mode)
-{
-	int res;
-	bool correct;
-	bool was_error = false;
-
-	while( iterator_in != iterator_end )
-	{
-		utf8_to_int(iterator_in, iterator_end, res, correct);
-
-		if( correct )
-		{
-			output_function(res);
-		}
-		else
-		{
-			if( mode == 1 )
-				output_function(0xFFFD); // U+FFFD "replacement character"
-
-			was_error = true;
-		}
-	}
-
-	return !was_error;
-}
-
-
-
-/*!
-	this function converts UTF-8 stream into a wide string
-
-	input:
-		iterator_in - a stream iterator for reading from (the stream can by any stream, we use *, ++ and == operators only)
-		iterator_end - an end iterator
-
-	output:
-		out_buffer - an output wide string
-		max_buffer_len - how many characters can be write (we write the terminating null character too)
-		was_buffer_sufficient_large - a pointer to a bool value - if provided it is set to true if the buffer was sufficient large
-
-		this function returns false if there were some errors when converting or if the output buffer was too short
-*/
-template<typename StreamIteratorType>
-bool utf8_to_wide(StreamIteratorType & iterator_in, const StreamIteratorType & iterator_end, wchar_t * out_buffer, size_t max_buffer_len, int mode, bool * was_buffer_sufficient_large)
-{
-	int res;
-	bool correct;
-	bool was_error = true;
-	bool was_buffer_ok = false;
-
-	if( max_buffer_len > 0 )
-	{
-		max_buffer_len -= 1; // for terminating null character
-		was_error = false;
-		was_buffer_ok = true;
-
-		while( iterator_in != iterator_end )
-		{
-			utf8_to_int(iterator_in, iterator_end, res, correct);
-
-			if( !correct )
-			{
-				was_error = true;
-
-				if( mode == 1 )
-				{
-					res = 0xFFFD; // U+FFFD "replacement character"
-					correct = true;
-				}
-			}
-
-			if( correct )
-			{
-				size_t len = int_to_wide(res, out_buffer, max_buffer_len);
-				// if len is zero then the output buffer is too short - the res input value was correct (it was returned from utf_to_int(...) beforehand)
-
-				if( len == 0 )
-				{
-					was_error = true;
-					was_buffer_ok = false;
-					break;
-				}
-				else
-				{
-					out_buffer += len;
-					max_buffer_len -= len;
-				}
-			}
-		}
-
-		*out_buffer = 0;
-	}
-
-	if( was_buffer_sufficient_large )
-		*was_buffer_sufficient_large = was_buffer_ok;
-
-	return !was_error;
-}
-
-
-
-/*!
-	this function converts UTF-8 stream into a wide string
-
-	input:
-		stream - a stream for reading from
-
-	output:
-		out_buffer - an output wide string
-		max_buffer_len - how many characters can be write (we write the terminating null character too)
-		was_buffer_sufficient_large - a pointer to a bool value - if provided it is set to true if the buffer was sufficient large
-
-		this function returns false if there were some errors when converting or if the output buffer was too short
-*/
-template<typename StreamType>
-bool utf8_to_wide(const StreamType & stream, wchar_t * out_buffer, size_t max_buffer_len, bool * was_buffer_sufficient_large, int mode)
-{
-	typename StreamType::const_iterator stream_begin = stream.begin();
-	typename StreamType::const_iterator stream_end = stream.end();
-
-	return utf8_to_wide(stream_begin, stream_end, out_buffer, max_buffer_len, mode, was_buffer_sufficient_large);
-}
-
-
-
-/*!
-	this function converts one wide character into UTF-8 stream
-
-	input:
-		z - wide character
-
-	output:
-		utf8 - a UTF-8 stream for the output sequence
-
-	the function returns how many characters have been written to the utf8 stream,
-	zero means that 'z' is an incorrect unicode character
-*/
-template<typename StreamType>
-size_t int_to_utf8(int z, StreamType & utf8)
-{
-	char buf[10];
-
-	size_t len = int_to_utf8(z, buf, sizeof(buf)/sizeof(char));
-
-	if( len > 0 )
-		utf8.write(buf, len);
-
-	return len;
-}
-
-
-
-
-
-/*!
-	this function converts a wide string into UTF-8 stream
-
-	input:
-		wide_string - a wide string for converting
-		string_len - size of the string
-		mode - what to do with errors when converting
-			0: skip an invalid character
-			1: put U+FFFD "replacement character" istead of the invalid character (default)
-
-	output:
-		utf8 - a UTF-8 stream for the output sequence
-
-	this function returns false if there were some errors when converting
-*/
-template<typename StreamType>
-bool wide_to_utf8(const wchar_t * wide_string, size_t string_len, StreamType & utf8, int mode)
-{
-bool was_error = false;
-size_t chars;
-
-	while( string_len > 0 )
-	{
-		chars = private_namespace::wide_one_to_utf8(wide_string, string_len, utf8, was_error, mode);
-		wide_string += chars;
-		string_len  -= chars;
-	}
-
-return !was_error;
-}
-
-
-
-
-
-/*!
-	this function converts a wide string into UTF-8 stream
-
-	input:
-		wide_string - a null terminated wide string for converting
-		mode - what to do with errors when converting
-			0: skip an invalid character
-			1: put U+FFFD "replacement character" istead of the invalid character (default)
-
-	output:
-		utf8 - a UTF-8 stream for the output sequence
-
-	this function returns false if there were some errors when converting
-*/
-template<typename StreamType>
-bool wide_to_utf8(const wchar_t * wide_string, StreamType & utf8, int mode)
-{
-bool was_error = false;
-
-	while( *wide_string )
-		wide_string += private_namespace::wide_one_to_utf8(wide_string, utf8, was_error, mode);
-
-return !was_error;
-}
-
-
-
-/*!
-	this function converts a wide string (std::wstring) into UTF-8 stream
-
-	input:
-		wide_string - a wide string for converting
-		mode - what to do with errors when converting
-			0: skip an invalid character
-			1: put U+FFFD "replacement character" istead of the invalid character (default)
-
-	output:
-		utf8 - a UTF-8 stream for the output sequence
-
-	this function returns false if there were some errors when converting
-*/
-template<typename StreamType>
-bool wide_to_utf8(const std::wstring & wide_string, StreamType & utf8, int mode)
-{
-	return wide_to_utf8(wide_string.c_str(), wide_string.size(), utf8, mode);
-}
-
-
-
-
-template<typename StreamType>
-bool wide_stream_to_utf8(StreamType & buffer, std::string & utf8, bool clear, int mode)
-{
-	if( clear )
-		utf8.clear();
-
-	return wide_to_output_function(buffer, [&utf8](const char * utf8_buffer, std::size_t buffer_len) -> bool {
-		utf8.append(utf8_buffer, buffer_len);
-		return true;
-	}, mode);
-}
-
-
-
-template<typename StreamType>
-bool wide_stream_to_utf8(const Stream & stream, StreamType & utf8, bool clear, int mode)
-{
-	bool was_error = false;
-
-	if( clear )
-		utf8.clear();
-
-	for(size_t i=0 ; i < stream.size() ; ++i)
-	{
-		int c = static_cast<int>(stream.get_wchar(i));
-		bool is_correct = false;
-
-		if( utf8_check_range(c) )
-		{
-			// CHECKME test me when sizeof(wchar_t) == 2
-			if( is_first_surrogate_char(c) )
-			{
-				if( i + 1 < stream.size() )
-				{
-					wchar_t c1 = static_cast<wchar_t>(c);
-					wchar_t c2 = stream.get_wchar(++i);
-
-					if( surrogate_pair_to_int(c1, c2, c) )
-					{
-						is_correct = true;
-					}
-				}
-			}
-			else
-			{
-				is_correct = true;
-			}
-		}
-
-		if( is_correct )
-		{
-			int_to_utf8(c, utf8);
-		}
-		else
-		{
-			was_error = true;
-
-			if( mode == 1 )
-				int_to_utf8(0xFFFD, utf8); // U+FFFD "replacement character"
-		}
-	}
-
-	return !was_error;
-}
-
-
-template<typename StreamTypeIn, typename StreamTypeOut>
-bool wide_stream_to_utf8(StreamTypeIn & buffer, StreamTypeOut & utf8, bool clear, int mode)
-{
-	if( clear )
-		utf8.clear();
-
-	return wide_to_output_function(buffer, [&utf8](const char * utf8_buffer, std::size_t buffer_len) -> bool {
-		utf8.write(utf8_buffer, buffer_len);
-		return true;
-	}, mode);
-}
-
-
-
-/*!
-	this function converts a wide stream into a utf8 string
-
-	input:
-		buffer - a wide stream for reading from
-
-	output:
-		utf8 - an output utf8 string
-		max_buffer_len - how many characters can be write (we write the terminating null character too)
-		was_buffer_sufficient_large - a pointer to a bool value - if provided it is set to true if the buffer was sufficient large
-
-	this function returns false if there were some errors when converting or if the output buffer was too short
-*/
-template<typename StreamType>
-bool wide_stream_to_utf8(StreamType & buffer, char * utf8, std::size_t max_buffer_size, bool * was_buffer_sufficient_large, int mode)
-{
-	bool buffer_ok = false;
-	bool is_ok = false;
-
-	if( max_buffer_size > 0 )
-	{
-		buffer_ok = true;
-		max_buffer_size -= 1; // for terminating null character
-
-		is_ok = wide_to_output_function(buffer, [&utf8, &max_buffer_size, &buffer_ok](const char * utf8_buffer, std::size_t buffer_len) -> bool {
-			std::size_t i=0;
-
-			for( ; i < buffer_len  ; ++i)
-			{
-				if( i < max_buffer_size )
-				{
-					*utf8 = utf8_buffer[i];
-					utf8 += 1;
-				}
-				else
-				{
-					buffer_ok = false;
-					break;
-				}
-			}
-
-			max_buffer_size -= i;
-			*utf8 = 0;
-			return buffer_ok;
-		}, mode);
-	}
-
-	if( was_buffer_sufficient_large )
-		*was_buffer_sufficient_large = buffer_ok;
-
-	return is_ok;
-}
-
-
-
-
-
-} // namespace pt
-
-#endif
-
-
-
--- a/tests/Makefile.dep
+++ b/tests/Makefile.dep
@@ -4,16 +4,14 @@
 ./convert.o: ../src/convert/patternreplacer.h ../src/textstream/textstream.h
 ./convert.o: ../src/textstream/stream.h ../src/space/space.h
 ./convert.o: ../src/convert/inttostr.h ../src/utf8/utf8.h
-./convert.o: ../src/textstream/stream.h ../src/utf8/utf8_templates.h
-./convert.o: ../src/utf8/utf8_private.h ../src/date/date.h
+./convert.o: ../src/textstream/stream.h ../src/date/date.h
 ./convert.o: ../src/membuffer/membuffer.h ../src/textstream/types.h
 ./convert.o: ../src/textstream/stream_private.h ../src/convert/strtoint.h
 ./convert.o: ../src/convert/text.h ../src/convert/misc.h
 ./convert.o: ../src/textstream/types.h ../src/convert/double.h test.h
 ./csvparser.o: csvparser.h ../src/csv/csvparser.h ../src/space/space.h
 ./csvparser.o: ../src/convert/inttostr.h ../src/utf8/utf8.h
-./csvparser.o: ../src/textstream/stream.h ../src/utf8/utf8_templates.h
-./csvparser.o: ../src/utf8/utf8_private.h ../src/convert/baseparser.h
+./csvparser.o: ../src/textstream/stream.h ../src/convert/baseparser.h
 ./csvparser.o: ../src/textstream/textstream.h ../src/textstream/stream.h
 ./csvparser.o: ../src/date/date.h ../src/membuffer/membuffer.h
 ./csvparser.o: ../src/textstream/types.h ../src/textstream/stream_private.h
@@ -22,8 +20,7 @@
 ./main.o: ../src/convert/patternreplacer.h ../src/textstream/textstream.h
 ./main.o: ../src/textstream/stream.h ../src/space/space.h
 ./main.o: ../src/convert/inttostr.h ../src/utf8/utf8.h
-./main.o: ../src/textstream/stream.h ../src/utf8/utf8_templates.h
-./main.o: ../src/utf8/utf8_private.h ../src/date/date.h
+./main.o: ../src/textstream/stream.h ../src/date/date.h
 ./main.o: ../src/membuffer/membuffer.h ../src/textstream/types.h
 ./main.o: ../src/textstream/stream_private.h ../src/convert/strtoint.h
 ./main.o: ../src/convert/text.h ../src/convert/misc.h
@@ -34,9 +31,7 @@
 ./mainoptionsparser.o: ../src/mainoptions/mainoptionsparser.h
 ./mainoptionsparser.o: ../src/space/space.h ../src/convert/inttostr.h
 ./mainoptionsparser.o: ../src/utf8/utf8.h ../src/textstream/stream.h
-./mainoptionsparser.o: ../src/utf8/utf8_templates.h
-./mainoptionsparser.o: ../src/utf8/utf8_private.h ../src/convert/convert.h
-./mainoptionsparser.o: ../src/convert/inttostr.h
+./mainoptionsparser.o: ../src/convert/convert.h ../src/convert/inttostr.h
 ./mainoptionsparser.o: ../src/convert/patternreplacer.h
 ./mainoptionsparser.o: ../src/textstream/textstream.h
 ./mainoptionsparser.o: ../src/textstream/stream.h ../src/date/date.h