add some utf8 converting methods

add new methods: - bool int_to_stream(int c, pt::Stream & stream); - template<typename OutputFunction> bool utf8_to_output_function(const Stream & stream, OutputFunction output_function, int mode = 1); - template<typename StreamIteratorType, typename OutputFunction> bool utf8_to_output_function(StreamIteratorType & iterator_in, const StreamIteratorType & iterator_end, OutputFunction output_function, int mode = 1); - template<typename StreamType, typename OutputFunction> bool wide_to_output_function(StreamType & buffer, OutputFunction output_function, int mode = 1); make some methods public: - size_t wide_to_int(const wchar_t * wide_string, size_t string_len, int & z, bool & correct) - size_t wide_to_int(const wchar_t * wide_string, int & z, bool & correct) rename and make some methods public: - template<typename OutputFunction> utf8_to_wide_generic(const char * utf8, size_t utf8_len, OutputFunction convert_function, int mode) -> utf8_to_output_function(...) while here: - fix: correctly convert characters in Log::put_multiline_generic()
2024-05-30 20:19:04 +02:00
parent 5fd17175c1
commit aacb1f43ae
11 changed files with 428 additions and 338 deletions
--- a/src/Makefile.dep
+++ b/src/Makefile.dep
@@ -6,79 +6,76 @@
 ./convert/misc.o: utf8/utf8_templates.h utf8/utf8_private.h
 ./convert/text.o: ./convert/text.h ./convert/text_private.h
 ./convert/double.o: ./convert/double.h textstream/textstream.h
-./convert/double.o: textstream/stream.h space/space.h textstream/types.h
-./convert/double.o: convert/inttostr.h utf8/utf8.h textstream/stream.h
-./convert/double.o: utf8/utf8_templates.h utf8/utf8_private.h date/date.h
-./convert/double.o: membuffer/membuffer.h textstream/types.h
-./convert/double.o: textstream/stream_private.h
+./convert/double.o: textstream/stream.h space/space.h convert/inttostr.h
+./convert/double.o: utf8/utf8.h textstream/stream.h utf8/utf8_templates.h
+./convert/double.o: utf8/utf8_private.h date/date.h membuffer/membuffer.h
+./convert/double.o: textstream/types.h textstream/stream_private.h
 ./convert/baseparser.o: ./convert/baseparser.h textstream/textstream.h
-./convert/baseparser.o: textstream/stream.h space/space.h textstream/types.h
-./convert/baseparser.o: convert/inttostr.h utf8/utf8.h textstream/stream.h
-./convert/baseparser.o: utf8/utf8_templates.h utf8/utf8_private.h date/date.h
-./convert/baseparser.o: membuffer/membuffer.h textstream/types.h
-./convert/baseparser.o: textstream/stream_private.h
+./convert/baseparser.o: textstream/stream.h space/space.h convert/inttostr.h
+./convert/baseparser.o: utf8/utf8.h textstream/stream.h utf8/utf8_templates.h
+./convert/baseparser.o: utf8/utf8_private.h date/date.h membuffer/membuffer.h
+./convert/baseparser.o: textstream/types.h textstream/stream_private.h
 ./date/date.o: ./date/date.h convert/inttostr.h
 ./log/filelog.o: ./log/filelog.h textstream/textstream.h textstream/stream.h
-./log/filelog.o: space/space.h textstream/types.h convert/inttostr.h
-./log/filelog.o: utf8/utf8.h textstream/stream.h utf8/utf8_templates.h
+./log/filelog.o: space/space.h convert/inttostr.h utf8/utf8.h
+./log/filelog.o: textstream/stream.h utf8/utf8_templates.h
 ./log/filelog.o: utf8/utf8_private.h date/date.h membuffer/membuffer.h
 ./log/filelog.o: textstream/types.h textstream/stream_private.h
 ./log/log.o: ./log/log.h textstream/textstream.h textstream/stream.h
-./log/log.o: space/space.h textstream/types.h convert/inttostr.h utf8/utf8.h
-./log/log.o: textstream/stream.h utf8/utf8_templates.h utf8/utf8_private.h
-./log/log.o: date/date.h membuffer/membuffer.h textstream/types.h
+./log/log.o: space/space.h convert/inttostr.h utf8/utf8.h textstream/stream.h
+./log/log.o: utf8/utf8_templates.h utf8/utf8_private.h date/date.h
+./log/log.o: membuffer/membuffer.h textstream/types.h
 ./log/log.o: textstream/stream_private.h ./log/filelog.h
-./space/space.o: ./space/space.h textstream/types.h convert/inttostr.h
-./space/space.o: utf8/utf8.h textstream/stream.h utf8/utf8_templates.h
+./space/space.o: ./space/space.h convert/inttostr.h utf8/utf8.h
+./space/space.o: textstream/stream.h utf8/utf8_templates.h
 ./space/space.o: utf8/utf8_private.h convert/convert.h ./convert/inttostr.h
 ./space/space.o: convert/patternreplacer.h textstream/textstream.h
 ./space/space.o: textstream/stream.h space/space.h date/date.h
 ./space/space.o: membuffer/membuffer.h textstream/types.h
 ./space/space.o: textstream/stream_private.h convert/strtoint.h
-./space/space.o: ./convert/text.h ./convert/misc.h ./convert/double.h
+./space/space.o: ./convert/text.h ./convert/misc.h textstream/types.h
+./space/space.o: ./convert/double.h
 ./space/spaceparser.o: ./space/spaceparser.h ./space/space.h
-./space/spaceparser.o: textstream/types.h convert/inttostr.h utf8/utf8.h
-./space/spaceparser.o: textstream/stream.h utf8/utf8_templates.h
-./space/spaceparser.o: utf8/utf8_private.h convert/baseparser.h
-./space/spaceparser.o: textstream/textstream.h textstream/stream.h
-./space/spaceparser.o: space/space.h date/date.h membuffer/membuffer.h
-./space/spaceparser.o: textstream/types.h textstream/stream_private.h
-./space/spaceparser.o: convert/strtoint.h ./convert/text.h ./convert/misc.h
+./space/spaceparser.o: convert/inttostr.h utf8/utf8.h textstream/stream.h
+./space/spaceparser.o: utf8/utf8_templates.h utf8/utf8_private.h
+./space/spaceparser.o: convert/baseparser.h textstream/textstream.h
+./space/spaceparser.o: textstream/stream.h space/space.h date/date.h
+./space/spaceparser.o: membuffer/membuffer.h textstream/types.h
+./space/spaceparser.o: textstream/stream_private.h convert/strtoint.h
+./space/spaceparser.o: ./convert/text.h ./convert/misc.h textstream/types.h
 ./space/keyvalueparser.o: ./space/keyvalueparser.h ./space/space.h
-./space/keyvalueparser.o: textstream/types.h convert/inttostr.h utf8/utf8.h
-./space/keyvalueparser.o: textstream/stream.h utf8/utf8_templates.h
-./space/keyvalueparser.o: utf8/utf8_private.h convert/baseparser.h
-./space/keyvalueparser.o: textstream/textstream.h textstream/stream.h
-./space/keyvalueparser.o: space/space.h date/date.h membuffer/membuffer.h
-./space/keyvalueparser.o: textstream/types.h textstream/stream_private.h
-./space/keyvalueparser.o: convert/strtoint.h ./convert/text.h
-./space/keyvalueparser.o: ./convert/misc.h
+./space/keyvalueparser.o: convert/inttostr.h utf8/utf8.h textstream/stream.h
+./space/keyvalueparser.o: utf8/utf8_templates.h utf8/utf8_private.h
+./space/keyvalueparser.o: convert/baseparser.h textstream/textstream.h
+./space/keyvalueparser.o: textstream/stream.h space/space.h date/date.h
+./space/keyvalueparser.o: membuffer/membuffer.h textstream/types.h
+./space/keyvalueparser.o: textstream/stream_private.h convert/strtoint.h
+./space/keyvalueparser.o: ./convert/text.h ./convert/misc.h
+./space/keyvalueparser.o: textstream/types.h
 ./textstream/stream_private.o: textstream/stream_private.h
 ./utf8/utf8.o: ./utf8/utf8.h textstream/stream.h utf8/utf8_templates.h
 ./utf8/utf8.o: utf8/utf8_private.h
 ./utf8/utf8_private.o: utf8/utf8_private.h
-./csv/csvparser.o: ./csv/csvparser.h space/space.h textstream/types.h
-./csv/csvparser.o: convert/inttostr.h utf8/utf8.h textstream/stream.h
-./csv/csvparser.o: utf8/utf8_templates.h utf8/utf8_private.h
-./csv/csvparser.o: convert/baseparser.h textstream/textstream.h
-./csv/csvparser.o: textstream/stream.h date/date.h membuffer/membuffer.h
-./csv/csvparser.o: textstream/types.h textstream/stream_private.h
+./csv/csvparser.o: ./csv/csvparser.h space/space.h convert/inttostr.h
+./csv/csvparser.o: utf8/utf8.h textstream/stream.h utf8/utf8_templates.h
+./csv/csvparser.o: utf8/utf8_private.h convert/baseparser.h
+./csv/csvparser.o: textstream/textstream.h textstream/stream.h date/date.h
+./csv/csvparser.o: membuffer/membuffer.h textstream/types.h
+./csv/csvparser.o: textstream/stream_private.h
 ./mainoptions/mainoptionsparser.o: ./mainoptions/mainoptionsparser.h
-./mainoptions/mainoptionsparser.o: space/space.h textstream/types.h
-./mainoptions/mainoptionsparser.o: convert/inttostr.h utf8/utf8.h
-./mainoptions/mainoptionsparser.o: textstream/stream.h utf8/utf8_templates.h
-./mainoptions/mainoptionsparser.o: utf8/utf8_private.h
+./mainoptions/mainoptionsparser.o: space/space.h convert/inttostr.h
+./mainoptions/mainoptionsparser.o: utf8/utf8.h textstream/stream.h
+./mainoptions/mainoptionsparser.o: utf8/utf8_templates.h utf8/utf8_private.h
 ./html/bbcodeparser.o: ./html/bbcodeparser.h ./html/htmlparser.h
 ./html/bbcodeparser.o: convert/baseparser.h textstream/textstream.h
-./html/bbcodeparser.o: textstream/stream.h space/space.h textstream/types.h
-./html/bbcodeparser.o: convert/inttostr.h utf8/utf8.h textstream/stream.h
-./html/bbcodeparser.o: utf8/utf8_templates.h utf8/utf8_private.h date/date.h
-./html/bbcodeparser.o: membuffer/membuffer.h textstream/types.h
-./html/bbcodeparser.o: textstream/stream_private.h
+./html/bbcodeparser.o: textstream/stream.h space/space.h convert/inttostr.h
+./html/bbcodeparser.o: utf8/utf8.h textstream/stream.h utf8/utf8_templates.h
+./html/bbcodeparser.o: utf8/utf8_private.h date/date.h membuffer/membuffer.h
+./html/bbcodeparser.o: textstream/types.h textstream/stream_private.h
 ./html/htmlparser.o: ./html/htmlparser.h convert/baseparser.h
 ./html/htmlparser.o: textstream/textstream.h textstream/stream.h
-./html/htmlparser.o: space/space.h textstream/types.h convert/inttostr.h
-./html/htmlparser.o: utf8/utf8.h textstream/stream.h utf8/utf8_templates.h
+./html/htmlparser.o: space/space.h convert/inttostr.h utf8/utf8.h
+./html/htmlparser.o: textstream/stream.h utf8/utf8_templates.h
 ./html/htmlparser.o: utf8/utf8_private.h date/date.h membuffer/membuffer.h
 ./html/htmlparser.o: textstream/types.h textstream/stream_private.h
 ./html/htmlparser.o: convert/text.h
--- a/src/log/log.h
+++ b/src/log/log.h
@@ -5,7 +5,7 @@
 */

 /*
- * Copyright (c) 2018-2022, Tomasz Sowa
+ * Copyright (c) 2018-2024, Tomasz Sowa
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -36,7 +36,6 @@
 #define headerfile_pikotools_src_log_log

 #include <string>
-#include <fstream>
 #include "textstream/textstream.h"
 #include "filelog.h"

@@ -246,6 +245,7 @@ void Log::put_multiline_generic(const CharType * prefix, const CharType * msg)
 		{
 			was_new_line = true;
 			put_prefix = true;
+			msg += 1;
 		}
 		else
 		{
@@ -265,11 +265,32 @@ void Log::put_multiline_generic(const CharType * prefix, const CharType * msg)
 				put_prefix = false;
 			}

-			operator<<(*msg);
+			if constexpr ( sizeof(CharType) == sizeof(char) )
+			{
+				int c;
+				bool correct;
+				msg += utf8_to_int(msg, c, correct);
+
+				if( correct )
+					int_to_stream(c, *this);
+				else
+					int_to_stream(0xFFFD, *this); // replacement character
+			}
+			else
+			if constexpr ( sizeof(CharType) == sizeof(wchar_t) )
+			{
+				operator<<(*msg);
+				msg += 1;
+			}
+			else
+			{
+				// what is the CharType?
+				// at the moment do not print anything
+				msg += 1;
+			}
+
 			was_something_printed = true;
 		}
-
-		msg += 1;
 	}

 	if( was_something_printed )
--- a/src/space/space.cpp
+++ b/src/space/space.cpp
@@ -34,8 +34,10 @@

 #include <wchar.h>
 #include "space.h"
-#include "utf8/utf8.h"
 #include "convert/convert.h"
+#include "textstream/textstream.h"
+#include "utf8/utf8.h"
+


 namespace pt
--- a/src/space/space.h
+++ b/src/space/space.h
@@ -42,7 +42,6 @@
 #include <cstdio>
 #include <cwchar>
 #include <errno.h>
-#include "textstream/types.h"
 #include "convert/inttostr.h"
 #include "utf8/utf8.h"

--- a/src/textstream/textstream.h
+++ b/src/textstream/textstream.h
@@ -203,7 +203,6 @@ public:
 	template<typename arg_char_type, size_t arg_stack_size, size_t arg_heap_block_size>
 	TextStreamBase & operator<<(const TextStreamBase<arg_char_type, arg_stack_size, arg_heap_block_size> & arg);

-
 	template<typename arg_char_type, size_t arg_stack_size, size_t arg_heap_block_size>
 	bool operator==(const TextStreamBase<arg_char_type, arg_stack_size, arg_heap_block_size> & stream) const;

@@ -1235,6 +1234,7 @@ return *this;
 }


+
 template<typename char_type, size_t stack_size, size_t heap_block_size>
 template<typename arg_char_type, size_t arg_stack_size, size_t arg_heap_block_size>
 bool TextStreamBase<char_type, stack_size, heap_block_size>::operator==(const TextStreamBase<arg_char_type, arg_stack_size, arg_heap_block_size> & stream) const
--- a/src/utf8/utf8.cpp
+++ b/src/utf8/utf8.cpp
@@ -5,7 +5,7 @@
 */

 /* 
- * Copyright (c) 2010-2023, Tomasz Sowa
+ * Copyright (c) 2010-2024, Tomasz Sowa
 * All rights reserved.
 * 
 * Redistribution and use in source and binary forms, with or without
@@ -42,9 +42,6 @@ namespace pt
 {


-
-
-
 /*!
 	returns true if 'c' is a correct unicode character
 */
@@ -128,6 +125,83 @@ bool surrogate_pair_to_int(int c1, int c2, int & z)



+/*
+	an auxiliary function for converting from wide characters to UTF-8
+	converting a wide character into one int
+
+	returns how many wide characters were used
+	if string_len is greater than 0 then the return value is always greater than zero too
+*/
+size_t wide_to_int(const wchar_t * wide_string, size_t string_len, int & z, bool & correct)
+{
+	if( string_len == 0 )
+	{
+		z = 0;
+		correct = false;
+		return 0;
+	}
+
+	z = static_cast<int>(*wide_string);
+	correct = true;
+
+	if( sizeof(wchar_t) == 2 && is_first_surrogate_char(z) )
+	{
+		if( string_len > 1 )
+		{
+			int z2 = *(wide_string+1);
+
+			if( is_second_surrogate_char(z2) )
+			{
+				z = 0x10000 + (((z & 0x3FF) << 10) | (z2 & 0x3FF));
+				return 2;
+			}
+			else
+			{
+				correct = false;
+				return 1;
+			}
+		}
+		else
+		{
+			correct = false;
+			return 1;
+		}
+	}
+	else
+	{
+		correct = utf8_check_range(z);
+		return 1;
+	}
+}
+
+
+
+/*
+	an auxiliary function for converting from wide characters to UTF-8
+	converting a wide character into one int
+
+	returns how many wide characters were used
+	if wide_string has at least one character then the return value is always greater than zero too
+*/
+size_t wide_to_int(const wchar_t * wide_string, int & z, bool & correct)
+{
+size_t min_str_len = 1;
+
+	if( *wide_string == 0 )
+	{
+		z = 0;
+		correct = false;
+		return 0;
+	}
+
+	if( *(wide_string+1) != 0 )
+		min_str_len = 2;
+
+return wide_to_int(wide_string, min_str_len, z, correct);
+}
+
+
+
 /*
 * converts an int to a wide string
 *
@@ -185,6 +259,26 @@ bool int_to_wide(int c, std::wstring & res)
 }


+/*
+ * return true if c was a correct unicode character
+ * and has been put the the stream
+ */
+bool int_to_stream(int c, pt::Stream & stream)
+{
+	if( stream.is_char_stream() )
+	{
+		return int_to_utf8(c, stream) > 0;
+	}
+	else
+	if( stream.is_wchar_stream() )
+	{
+		return int_to_wide(c, stream);
+	}
+
+	return false;
+}
+
+


 /*!
@@ -410,9 +504,9 @@ bool utf8_to_wide(const char * utf8, size_t utf8_len, std::wstring & res, bool c
 	if( clear )
 		res.clear();

-	bool status = private_namespace::utf8_to_wide_generic(utf8, utf8_len, mode, [&res](int c) {
+	bool status = utf8_to_output_function(utf8, utf8_len, [&res](int c) {
 		int_to_wide(c, res);
-	});
+	}, mode);

 	return status;
 }
--- a/src/utf8/utf8.h
+++ b/src/utf8/utf8.h
@@ -5,7 +5,7 @@
 */

 /* 
- * Copyright (c) 2010-2023, Tomasz Sowa
+ * Copyright (c) 2010-2024, Tomasz Sowa
 * All rights reserved.
 * 
 * Redistribution and use in source and binary forms, with or without
@@ -109,6 +109,21 @@ bool surrogate_pair_to_int(int c1, int c2, int & z);



+/*
+ * converting one character into a stream
+ * stream can be an utf8 or wide stream
+ */
+bool int_to_stream(int c, pt::Stream & stream);
+
+
+/*
+ * converting a one unicode character to an int
+ * such an unicode character can consists of one or two wide characters
+ */
+size_t wide_to_int(const wchar_t * wide_string, size_t string_len, int & z, bool & correct); // may these methods make public?
+size_t wide_to_int(const wchar_t * wide_string, int & z, bool & correct);
+
+
 /*
 *
 *
@@ -158,6 +173,14 @@ size_t int_to_wide(int c, wchar_t * res, size_t max_buf_len);
 bool int_to_wide(int c, std::wstring & res);


+/*!
+	 call a convert_function for each character from an utf8 string
+ */
+template<typename OutputFunction>
+bool utf8_to_output_function(const char * utf8, size_t utf8_len, OutputFunction convert_function, int mode = 1);
+
+
+
 /*!
 	converting UTF-8 string to a wide string
 */
@@ -181,9 +204,15 @@ bool utf8_to_wide(std::istream & utf8, StreamType & res, bool clear = true, int
 template<typename StreamOrStringType>
 bool utf8_to_wide(const Stream & stream, StreamOrStringType & res, bool clear = true, int mode = 1);

+template<typename OutputFunction>
+bool utf8_to_output_function(const Stream & stream, OutputFunction output_function, int mode = 1);
+
 template<typename StreamIteratorType, typename StreamOrStringType>
 bool utf8_to_wide(StreamIteratorType & iterator_in, const StreamIteratorType & iterator_end, StreamOrStringType & out_stream, bool clear_stream = true, int mode = 1);

+template<typename StreamIteratorType, typename OutputFunction>
+bool utf8_to_output_function(StreamIteratorType & iterator_in, const StreamIteratorType & iterator_end, OutputFunction output_function, int mode = 1);
+
 template<typename CharT, size_t stack_size, size_t heap_block_size>
 class TextStreamBase;

@@ -220,6 +249,17 @@ template<typename StreamType>
 size_t int_to_utf8(int z, StreamType & utf8);


+/*!
+	call an output_function for some sequence of wide characters from the stream buffer
+
+	output_function has two arguments: const char * buf, size_t len:
+	output_function(const char * buf, size_t len)
+
+	StreamType should have a const_iterator and begin() and end() methods
+*/
+template<typename StreamType, typename OutputFunction>
+bool wide_to_output_function(StreamType & buffer, OutputFunction output_function, int mode = 1);
+

 /*!
 	converting a wide string to UTF-8 string
--- a/src/utf8/utf8_private.cpp
+++ b/src/utf8/utf8_private.cpp
@@ -5,7 +5,7 @@
 */

 /*
- * Copyright (c) 2021-2023, Tomasz Sowa
+ * Copyright (c) 2021-2024, Tomasz Sowa
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -81,85 +81,6 @@ return true;



-
-
-/*
-	an auxiliary function for converting from wide characters to UTF-8
-	converting a wide character into one int
-
-	returns how many wide characters were used
-	if string_len is greater than 0 then the return value is always greater than zero too
-*/
-size_t wide_to_int(const wchar_t * wide_string, size_t string_len, int & z, bool & correct)
-{
-	if( string_len == 0 )
-	{
-		z = 0;
-		correct = false;
-		return 0;
-	}
-
-	z = static_cast<int>(*wide_string);
-	correct = true;
-
-	if( sizeof(wchar_t) == 2 && is_first_surrogate_char(z) )
-	{
-		if( string_len > 1 )
-		{
-			int z2 = *(wide_string+1);
-
-			if( is_second_surrogate_char(z2) )
-			{
-				z = 0x10000 + (((z & 0x3FF) << 10) | (z2 & 0x3FF));
-				return 2;
-			}
-			else
-			{
-				correct = false;
-				return 1;
-			}
-		}
-		else
-		{
-			correct = false;
-			return 1;
-		}
-	}
-	else
-	{
-		correct = utf8_check_range(z);
-		return 1;
-	}
-}
-
-
-
-/*
-	an auxiliary function for converting from wide characters to UTF-8
-	converting a wide character into one int
-
-	returns how many wide characters were used
-	if wide_string has at least one character then the return value is always greater than zero too
-*/
-size_t wide_to_int(const wchar_t * wide_string, int & z, bool & correct)
-{
-size_t min_str_len = 1;
-
-	if( *wide_string == 0 )
-	{
-		z = 0;
-		correct = false;
-		return 0;
-	}
-
-	if( *(wide_string+1) != 0 )
-		min_str_len = 2;
-
-return wide_to_int(wide_string, min_str_len, z, correct);
-}
-
-
-
 /*!
 	an auxiliary function for converting from wide characters to UTF-8

--- a/src/utf8/utf8_private.h
+++ b/src/utf8/utf8_private.h
@@ -5,7 +5,7 @@
 */

 /*
- * Copyright (c) 2021-2023, Tomasz Sowa
+ * Copyright (c) 2021-2024, Tomasz Sowa
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -41,14 +41,10 @@
 namespace pt
 {

-bool utf8_check_range(int c);
 size_t int_to_utf8(int z, char * utf8, size_t utf8_max_len);
 size_t int_to_utf8(int z, std::string & utf8, bool clear);
-size_t utf8_to_int(const char * utf8, size_t utf8_len, int & res, bool & correct);
-bool is_surrogate_char(int c);
-bool is_first_surrogate_char(int c);
-bool is_second_surrogate_char(int c);
-bool surrogate_pair_to_int(int c1, int c2, int & z);
+size_t wide_to_int(const wchar_t * wide_string, size_t string_len, int & z, bool & correct);
+size_t wide_to_int(const wchar_t * wide_string, int & z, bool & correct);


 namespace private_namespace
@@ -56,9 +52,6 @@ namespace private_namespace
 bool utf8_to_int_first_octet(unsigned char uz, size_t & len, int & res);
 bool utf8_to_int_add_next_octet(unsigned char uz, int & res);

-size_t wide_to_int(const wchar_t * wide_string, size_t string_len, int & z, bool & correct); // may these methods make public?
-size_t wide_to_int(const wchar_t * wide_string, int & z, bool & correct);
-
 size_t wide_one_to_utf8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len,
 							size_t & utf8_written, bool & was_utf8_buf_too_small, bool & was_error, int mode);

@@ -116,144 +109,6 @@ return wide_one_to_utf8(wide_string, min_str_len, utf8, was_error, mode);



-// declared in utf8.h, defined in utf8.cpp
-size_t utf8_to_int(const char * utf8, size_t utf8_len, int & res, bool & correct);
-
-
-
-template<typename function_type>
-bool utf8_to_wide_generic(const char * utf8, size_t utf8_len, int mode, function_type convert_function)
-{
-int z;
-size_t len;
-bool correct, was_error = false;
-
-	while( utf8_len > 0 )
-	{
-		if( (unsigned char)*utf8 <= 0x7f )
-		{
-			// small optimization
-			len = 1;
-			correct = true;
-			z = static_cast<unsigned char>(*utf8);
-		}
-		else
-		{
-			len = pt::utf8_to_int(utf8, utf8_len, z, correct); // the len will be different from zero
-		}
-
-		if( !correct )
-		{
-			if( mode == 1 )
-				convert_function(0xFFFD); // U+FFFD "replacement character"
-
-			was_error = true;
-		}
-		else
-		{
-			convert_function(z);
-		}
-
-		utf8     += len;
-		utf8_len -= len;
-	}
-
-return !was_error;
-}
-
-
-
-template<typename StreamType, typename function_type>
-bool wide_to_utf8_generic(StreamType & buffer, int mode, function_type write_function)
-{
-	char utf8_buffer[256];
-	std::size_t buffer_len = sizeof(utf8_buffer) / sizeof(char);
-	std::size_t utf8_sequence_max_length = 10;
-	std::size_t index = 0;
-	bool was_error = false;
-
-	typename StreamType::const_iterator i = buffer.begin();
-
-	while( i != buffer.end() )
-	{
-		if( index + utf8_sequence_max_length > buffer_len )
-		{
-			bool write_status = write_function(utf8_buffer, index);
-			index = 0;
-
-			if( !write_status )
-			{
-				was_error = true;
-				break;
-			}
-		}
-
-		int c = 0xFFFD; // U+FFFD "replacement character";
-		bool seems_to_be_correct = false;
-		wchar_t w1 = *i;
-
-		if( sizeof(wchar_t) == 2 && is_first_surrogate_char(w1) )
-		{
-			++i;
-
-			if( i != buffer.end() )
-			{
-				wchar_t w2 = *i;
-
-				if( surrogate_pair_to_int(w1, w2, c) )
-				{
-					seems_to_be_correct = true;
-					++i;
-				}
-				else
-				{
-					was_error = true;
-				}
-			}
-			else
-			{
-				was_error = true;
-			}
-		}
-		else
-		{
-			c = w1;
-			seems_to_be_correct = true; // we do not test utf8_check_range(...) here because it is tested in int_to_utf8(...) below
-			++i;
-		}
-
-		if( seems_to_be_correct || mode == 1 )
-		{
-			size_t seq_len = int_to_utf8(c, utf8_buffer + index, buffer_len - index);
-			// here seq_len can be zero only when c is an incorrect unicode char (the buffer is large enough)
-
-			if( seq_len == 0 )
-			{
-				was_error = true;
-
-				if( mode == 1 )
-				{
-					seq_len = int_to_utf8(0xFFFD, utf8_buffer + index, buffer_len - index); // U+FFFD "replacement character";
-				}
-			}
-
-			index += seq_len;
-		}
-	}
-
-	if( index > 0 )
-	{
-		if( !write_function(utf8_buffer, index) )
-		{
-			was_error = true;
-		}
-	}
-
-	return !was_error;
-}
-
-
-

 } // namespace private_namespace

--- a/src/utf8/utf8_templates.h
+++ b/src/utf8/utf8_templates.h
@@ -5,7 +5,7 @@
 */

 /*
- * Copyright (c) 2021-2023, Tomasz Sowa
+ * Copyright (c) 2021-2024, Tomasz Sowa
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -44,7 +44,6 @@ namespace pt
 {


-
 template<typename StreamType>
 bool int_to_wide(int c, StreamType & res)
 {
@@ -128,9 +127,9 @@ bool utf8_to_wide(const char * utf8, size_t utf8_len, StreamType & res, bool cle
 	if( clear )
 		res.clear();

-	bool status = private_namespace::utf8_to_wide_generic(utf8, utf8_len, mode, [&res](int c) {
+	bool status = utf8_to_output_function(utf8, utf8_len, [&res](int c) {
 		int_to_wide(c, res);
-	});
+	}, mode);

 	return status;
 }
@@ -187,6 +186,137 @@ return !was_error;
 }


+template<typename OutputFunction>
+bool utf8_to_output_function(const char * utf8, size_t utf8_len, OutputFunction output_function, int mode)
+{
+int z;
+size_t len;
+bool correct, was_error = false;
+
+	while( utf8_len > 0 )
+	{
+		if( (unsigned char)*utf8 <= 0x7f )
+		{
+			// small optimization
+			len = 1;
+			correct = true;
+			z = static_cast<unsigned char>(*utf8);
+		}
+		else
+		{
+			len = pt::utf8_to_int(utf8, utf8_len, z, correct); // the len will be different from zero
+		}
+
+		if( !correct )
+		{
+			if( mode == 1 )
+				output_function(0xFFFD); // U+FFFD "replacement character"
+
+			was_error = true;
+		}
+		else
+		{
+			output_function(z);
+		}
+
+		utf8     += len;
+		utf8_len -= len;
+	}
+
+return !was_error;
+}
+
+
+template<typename StreamType, typename OutputFunction>
+bool wide_to_output_function(StreamType & buffer, OutputFunction output_function, int mode)
+{
+	char utf8_buffer[256];
+	std::size_t buffer_len = sizeof(utf8_buffer) / sizeof(char);
+	std::size_t utf8_sequence_max_length = 10;
+	std::size_t index = 0;
+	bool was_error = false;
+
+	typename StreamType::const_iterator i = buffer.begin();
+
+	while( i != buffer.end() )
+	{
+		if( index + utf8_sequence_max_length > buffer_len )
+		{
+			bool write_status = output_function(utf8_buffer, index);
+			index = 0;
+
+			if( !write_status )
+			{
+				was_error = true;
+				break;
+			}
+		}
+
+		int c = 0xFFFD; // U+FFFD "replacement character";
+		bool seems_to_be_correct = false;
+		wchar_t w1 = *i;
+
+		if( sizeof(wchar_t) == 2 && is_first_surrogate_char(w1) )
+		{
+			++i;
+
+			if( i != buffer.end() )
+			{
+				wchar_t w2 = *i;
+
+				if( surrogate_pair_to_int(w1, w2, c) )
+				{
+					seems_to_be_correct = true;
+					++i;
+				}
+				else
+				{
+					was_error = true;
+				}
+			}
+			else
+			{
+				was_error = true;
+			}
+		}
+		else
+		{
+			c = w1;
+			seems_to_be_correct = true; // we do not test utf8_check_range(...) here because it is tested in int_to_utf8(...) below
+			++i;
+		}
+
+		if( seems_to_be_correct || mode == 1 )
+		{
+			size_t seq_len = int_to_utf8(c, utf8_buffer + index, buffer_len - index);
+			// here seq_len can be zero only when c is an incorrect unicode char (the buffer is large enough)
+
+			if( seq_len == 0 )
+			{
+				was_error = true;
+
+				if( mode == 1 )
+				{
+					seq_len = int_to_utf8(0xFFFD, utf8_buffer + index, buffer_len - index); // U+FFFD "replacement character";
+				}
+			}
+
+			index += seq_len;
+		}
+	}
+
+	if( index > 0 )
+	{
+		if( !output_function(utf8_buffer, index) )
+		{
+			was_error = true;
+		}
+	}
+
+	return !was_error;
+}
+
+
 /*
 this function converts a UTF-8 stream into a wide stream or a wide string

@@ -203,6 +333,34 @@ output:
 */
 template<typename StreamOrStringType>
 bool utf8_to_wide(const Stream & stream, StreamOrStringType & res, bool clear, int mode)
+{
+	if( clear )
+		res.clear();
+
+	return utf8_to_output_function(stream, [&](int z) {
+		int_to_wide(z, res);
+	}, mode);
+}
+
+
+/*
+this function reads characters from a UTF-8 stream and calls an output_function
+
+input:
+	stream - a UTF-8 stream for converting
+	mode - what to do with errors when converting
+		0: skip an invalid character
+		1: put U+FFFD "replacement character" istead of the invalid character (default)
+
+output:
+	output_function - is a function which gets two artuments: int (character) and a reference to StreamOrStringType
+		and should put the character to the output string/stream, this function should have the signature like this:
+		output_function(int z, StreamOrStringType & res)
+
+	this function returns false if there were some errors when converting
+*/
+template<typename OutputFunction>
+bool utf8_to_output_function(const Stream & stream, OutputFunction output_function, int mode)
 {
 	size_t len;
 	bool correct;
@@ -210,11 +368,6 @@ bool utf8_to_wide(const Stream & stream, StreamOrStringType & res, bool clear, i
 	size_t index = 0;
 	bool was_error = false;

-	if( clear )
-		res.clear();
-
-	// CHECKME test me when sizeof(wchar_t) is 2
-
 	do
 	{
 		len = utf8_to_int(stream, index, z, correct);
@@ -224,13 +377,13 @@ bool utf8_to_wide(const Stream & stream, StreamOrStringType & res, bool clear, i
 			if( !correct )
 			{
 				if( mode == 1 )
-					int_to_wide(0xFFFD, res);  // U+FFFD "replacement character"
+					output_function(0xFFFD); // U+FFFD "replacement character"

 				was_error = true;
 			}
 			else
 			{
-				int_to_wide(z, res);
+				output_function(z);
 			}

 			index += len;
@@ -264,6 +417,15 @@ bool utf8_to_wide(StreamIteratorType & iterator_in, const StreamIteratorType & i
 	if( clear_stream )
 		out_stream.clear();

+	return utf8_to_output_function(iterator_in, iterator_end, [&](int z){
+		int_to_wide(z, out_stream);
+	}, mode);
+}
+
+
+template<typename StreamIteratorType, typename OutputFunction>
+bool utf8_to_output_function(StreamIteratorType & iterator_in, const StreamIteratorType & iterator_end, OutputFunction output_function, int mode)
+{
 	int res;
 	bool correct;
 	bool was_error = false;
@@ -274,12 +436,12 @@ bool utf8_to_wide(StreamIteratorType & iterator_in, const StreamIteratorType & i

 		if( correct )
 		{
-			int_to_wide(res, out_stream);
+			output_function(res);
 		}
 		else
 		{
 			if( mode == 1 )
-				int_to_wide(0xFFFD, out_stream); // U+FFFD "replacement character"
+				output_function(0xFFFD); // U+FFFD "replacement character"

 			was_error = true;
 		}
@@ -290,7 +452,6 @@ bool utf8_to_wide(StreamIteratorType & iterator_in, const StreamIteratorType & i



-
 /*!
 	this function converts UTF-8 stream into a wide string

@@ -508,10 +669,10 @@ bool wide_stream_to_utf8(StreamType & buffer, std::string & utf8, bool clear, in
 	if( clear )
 		utf8.clear();

-	return private_namespace::wide_to_utf8_generic(buffer, mode, [&utf8](const char * utf8_buffer, std::size_t buffer_len) -> bool {
+	return wide_to_output_function(buffer, [&utf8](const char * utf8_buffer, std::size_t buffer_len) -> bool {
 		utf8.append(utf8_buffer, buffer_len);
 		return true;
-	});
+	}, mode);
 }


@@ -574,10 +735,10 @@ bool wide_stream_to_utf8(StreamTypeIn & buffer, StreamTypeOut & utf8, bool clear
 	if( clear )
 		utf8.clear();

-	return private_namespace::wide_to_utf8_generic(buffer, mode, [&utf8](const char * utf8_buffer, std::size_t buffer_len) -> bool {
+	return wide_to_output_function(buffer, [&utf8](const char * utf8_buffer, std::size_t buffer_len) -> bool {
 		utf8.write(utf8_buffer, buffer_len);
 		return true;
-	});
+	}, mode);
 }


@@ -606,7 +767,7 @@ bool wide_stream_to_utf8(StreamType & buffer, char * utf8, std::size_t max_buffe
 		buffer_ok = true;
 		max_buffer_size -= 1; // for terminating null character

-		is_ok = private_namespace::wide_to_utf8_generic(buffer, mode, [&utf8, &max_buffer_size, &buffer_ok](const char * utf8_buffer, std::size_t buffer_len) -> bool {
+		is_ok = wide_to_output_function(buffer, [&utf8, &max_buffer_size, &buffer_ok](const char * utf8_buffer, std::size_t buffer_len) -> bool {
 			std::size_t i=0;

 			for( ; i < buffer_len  ; ++i)
@@ -626,7 +787,7 @@ bool wide_stream_to_utf8(StreamType & buffer, char * utf8, std::size_t max_buffe
 			max_buffer_size -= i;
 			*utf8 = 0;
 			return buffer_ok;
-		});
+		}, mode);
 	}

 	if( was_buffer_sufficient_large )