Merge branch 'api2021'

2022-06-25 17:52:42 +02:00
parent 6c41e0a803 44bda888b5
commit 4f07c00217
25 changed files with 5712 additions and 559 deletions
@@ -1,44 +1,76 @@
 # DO NOT DELETE

 ./convert/inttostr.o: ./convert/inttostr.h
-./convert/misc.o: ./convert/misc.h ./convert/text.h
+./convert/misc.o: ./convert/misc.h ./convert/text.h textstream/stream.h
+./convert/misc.o: textstream/types.h utf8/utf8_stream.h
+./convert/misc.o: textstream/textstream.h textstream/stream.h space/space.h
+./convert/misc.o: convert/inttostr.h utf8/utf8.h utf8/utf8_templates.h
+./convert/misc.o: utf8/utf8_private.h date/date.h membuffer/membuffer.h
+./convert/misc.o: textstream/types.h ./convert/inttostr.h
 ./convert/text.o: ./convert/text.h ./convert/text_private.h
 ./convert/double.o: ./convert/double.h textstream/textstream.h
 ./convert/double.o: textstream/stream.h space/space.h textstream/types.h
 ./convert/double.o: convert/inttostr.h utf8/utf8.h textstream/stream.h
 ./convert/double.o: utf8/utf8_templates.h utf8/utf8_private.h date/date.h
 ./convert/double.o: membuffer/membuffer.h textstream/types.h
+./convert/double.o: utf8/utf8_stream.h
+./convert/baseparser.o: ./convert/baseparser.h textstream/textstream.h
+./convert/baseparser.o: textstream/stream.h space/space.h textstream/types.h
+./convert/baseparser.o: convert/inttostr.h utf8/utf8.h textstream/stream.h
+./convert/baseparser.o: utf8/utf8_templates.h utf8/utf8_private.h date/date.h
+./convert/baseparser.o: membuffer/membuffer.h textstream/types.h
+./convert/baseparser.o: utf8/utf8_stream.h
 ./date/date.o: ./date/date.h convert/inttostr.h
 ./log/filelog.o: ./log/filelog.h textstream/textstream.h textstream/stream.h
 ./log/filelog.o: space/space.h textstream/types.h convert/inttostr.h
 ./log/filelog.o: utf8/utf8.h textstream/stream.h utf8/utf8_templates.h
 ./log/filelog.o: utf8/utf8_private.h date/date.h membuffer/membuffer.h
-./log/filelog.o: textstream/types.h
+./log/filelog.o: textstream/types.h utf8/utf8_stream.h
 ./log/log.o: ./log/log.h textstream/textstream.h textstream/stream.h
 ./log/log.o: space/space.h textstream/types.h convert/inttostr.h utf8/utf8.h
 ./log/log.o: textstream/stream.h utf8/utf8_templates.h utf8/utf8_private.h
 ./log/log.o: date/date.h membuffer/membuffer.h textstream/types.h
-./log/log.o: ./log/filelog.h
+./log/log.o: utf8/utf8_stream.h ./log/filelog.h
 ./space/space.o: ./space/space.h textstream/types.h convert/inttostr.h
 ./space/space.o: utf8/utf8.h textstream/stream.h utf8/utf8_templates.h
 ./space/space.o: utf8/utf8_private.h convert/convert.h ./convert/inttostr.h
 ./space/space.o: convert/patternreplacer.h textstream/textstream.h
 ./space/space.o: textstream/stream.h space/space.h date/date.h
-./space/space.o: membuffer/membuffer.h textstream/types.h convert/strtoint.h
-./space/space.o: ./convert/text.h ./convert/misc.h ./convert/double.h
+./space/space.o: membuffer/membuffer.h textstream/types.h utf8/utf8_stream.h
+./space/space.o: convert/strtoint.h ./convert/text.h ./convert/misc.h
+./space/space.o: ./convert/double.h
 ./space/spaceparser.o: ./space/spaceparser.h ./space/space.h
 ./space/spaceparser.o: textstream/types.h convert/inttostr.h utf8/utf8.h
 ./space/spaceparser.o: textstream/stream.h utf8/utf8_templates.h
-./space/spaceparser.o: utf8/utf8_private.h convert/strtoint.h
-./space/spaceparser.o: ./convert/text.h ./convert/misc.h
+./space/spaceparser.o: utf8/utf8_private.h convert/baseparser.h
+./space/spaceparser.o: textstream/textstream.h textstream/stream.h
+./space/spaceparser.o: space/space.h date/date.h membuffer/membuffer.h
+./space/spaceparser.o: textstream/types.h utf8/utf8_stream.h
+./space/spaceparser.o: convert/strtoint.h ./convert/text.h ./convert/misc.h
 ./utf8/utf8.o: ./utf8/utf8.h textstream/stream.h utf8/utf8_templates.h
 ./utf8/utf8.o: utf8/utf8_private.h
 ./utf8/utf8_private.o: utf8/utf8_private.h
 ./csv/csvparser.o: ./csv/csvparser.h space/space.h textstream/types.h
 ./csv/csvparser.o: convert/inttostr.h utf8/utf8.h textstream/stream.h
 ./csv/csvparser.o: utf8/utf8_templates.h utf8/utf8_private.h
+./csv/csvparser.o: convert/baseparser.h textstream/textstream.h
+./csv/csvparser.o: textstream/stream.h date/date.h membuffer/membuffer.h
+./csv/csvparser.o: textstream/types.h utf8/utf8_stream.h
 ./mainoptions/mainoptionsparser.o: ./mainoptions/mainoptionsparser.h
 ./mainoptions/mainoptionsparser.o: space/space.h textstream/types.h
 ./mainoptions/mainoptionsparser.o: convert/inttostr.h utf8/utf8.h
 ./mainoptions/mainoptionsparser.o: textstream/stream.h utf8/utf8_templates.h
 ./mainoptions/mainoptionsparser.o: utf8/utf8_private.h
+./html/htmlparser.o: ./html/htmlparser.h convert/baseparser.h
+./html/htmlparser.o: textstream/textstream.h textstream/stream.h
+./html/htmlparser.o: space/space.h textstream/types.h convert/inttostr.h
+./html/htmlparser.o: utf8/utf8.h textstream/stream.h utf8/utf8_templates.h
+./html/htmlparser.o: utf8/utf8_private.h date/date.h membuffer/membuffer.h
+./html/htmlparser.o: textstream/types.h utf8/utf8_stream.h convert/text.h
+./html/bbcodeparser.o: ./html/bbcodeparser.h ./html/htmlparser.h
+./html/bbcodeparser.o: convert/baseparser.h textstream/textstream.h
+./html/bbcodeparser.o: textstream/stream.h space/space.h textstream/types.h
+./html/bbcodeparser.o: convert/inttostr.h utf8/utf8.h textstream/stream.h
+./html/bbcodeparser.o: utf8/utf8_templates.h utf8/utf8_private.h date/date.h
+./html/bbcodeparser.o: membuffer/membuffer.h textstream/types.h
+./html/bbcodeparser.o: utf8/utf8_stream.h
@@ -0,0 +1,273 @@
+/*
+ * This file is a part of PikoTools
+ * and is distributed under the (new) BSD licence.
+ * Author: Tomasz Sowa <t.sowa@ttmath.org>
+ */
+
+/*
+ * Copyright (c) 2021-2022, Tomasz Sowa
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  * Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ *  * Neither the name Tomasz Sowa nor the names of contributors to this
+ *    project may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "baseparser.h"
+#include "utf8/utf8.h"
+#include "utf8/utf8_stream.h"
+
+
+namespace pt
+{
+
+BaseParser::BaseParser()
+{
+	clear_input_flags();
+}
+
+
+void BaseParser::clear_input_flags()
+{
+	line = 0;
+	column = 0;
+	reading_from_file = false;
+	pchar_ascii = nullptr;
+	pchar_unicode = nullptr;
+	wtext_stream_iterator = nullptr;
+	wtext_stream_iterator_end = nullptr;
+	text_stream_iterator = nullptr;
+	text_stream_iterator_end = nullptr;
+	lastc = -1;
+	input_as_utf8 = true;
+
+	if( file.is_open() )
+		file.close();
+
+	file.clear();
+}
+
+
+void BaseParser::check_new_line()
+{
+	if( lastc == '\n' )
+	{
+		++line;
+		column = 0;
+	}
+}
+
+
+int BaseParser::read_utf8_char()
+{
+int c;
+bool correct;
+
+	lastc = -1;
+
+	do
+	{
+		utf8_to_int(file, c, correct);
+
+		if( !file )
+			return lastc;
+	}
+	while( !correct );
+
+	lastc = c;
+	check_new_line();
+
+return lastc;
+}
+
+
+int BaseParser::read_ascii_char()
+{
+	lastc = file.get();
+	check_new_line();
+
+return lastc;
+}
+
+
+int BaseParser::read_char_from_wchar_string()
+{
+	if( *pchar_unicode == 0 )
+		lastc = -1;
+	else
+		lastc = *(pchar_unicode++);
+
+	check_new_line();
+
+return lastc;
+}
+
+
+int BaseParser::read_char_from_utf8_string()
+{
+int c;
+bool correct;
+
+	lastc = -1;
+
+	do
+	{
+		size_t len = utf8_to_int(pchar_ascii, c, correct);
+		pchar_ascii += len;
+	}
+	while( *pchar_ascii && !correct );
+
+	if( correct )
+		lastc = c;
+
+	check_new_line();
+
+return lastc;
+}
+
+
+int BaseParser::read_char_from_ascii_string()
+{
+	if( *pchar_ascii == 0 )
+		lastc = -1;
+	else
+		lastc = *(pchar_ascii++);
+
+	check_new_line();
+
+return lastc;
+}
+
+
+int BaseParser::read_char_from_wtext_stream()
+{
+	if( (*wtext_stream_iterator) != (*wtext_stream_iterator_end) )
+	{
+		lastc = *(*wtext_stream_iterator);
+		++(*wtext_stream_iterator);
+	}
+	else
+	{
+		lastc = -1;
+	}
+
+	check_new_line();
+
+	return lastc;
+}
+
+
+int BaseParser::read_char_from_utf8_text_stream()
+{
+	int c;
+	bool correct;
+
+	lastc = -1;
+
+	do
+	{
+		utf8_to_int(*text_stream_iterator, *text_stream_iterator_end, c, correct);
+	}
+	while( !correct && (*text_stream_iterator) != (*text_stream_iterator_end) );
+
+	if( correct )
+		lastc = c;
+
+	check_new_line();
+
+	return lastc;
+}
+
+
+int BaseParser::read_char_from_ascii_text_stream()
+{
+	if( (*text_stream_iterator) != (*text_stream_iterator_end) )
+	{
+		lastc = *(*text_stream_iterator);
+		++(*text_stream_iterator);
+	}
+	else
+	{
+		lastc = -1;
+	}
+
+	check_new_line();
+
+	return lastc;
+}
+
+
+int BaseParser::read_char_no_escape()
+{
+	if( reading_from_file )
+	{
+		if( input_as_utf8 )
+			return read_utf8_char();
+		else
+			return read_ascii_char();
+	}
+	else
+	{
+		if( pchar_ascii )
+		{
+			if( input_as_utf8 )
+				return read_char_from_utf8_string();
+			else
+				return read_char_from_ascii_string();
+		}
+		else if( pchar_unicode )
+		{
+			return read_char_from_wchar_string();
+		}
+		else if( wtext_stream_iterator && wtext_stream_iterator_end )
+		{
+			return read_char_from_wtext_stream();
+		}
+		else if( text_stream_iterator && text_stream_iterator_end )
+		{
+			if( input_as_utf8 )
+				return read_char_from_utf8_text_stream();
+			else
+				return read_char_from_ascii_text_stream();
+		}
+		else
+		{
+			lastc = -1;
+			return lastc;
+		}
+	}
+}
+
+
+int BaseParser::read_char()
+{
+	return read_char_no_escape();
+}
+
+
+
+
+}
+
@@ -0,0 +1,141 @@
+/*
+ * This file is a part of PikoTools
+ * and is distributed under the (new) BSD licence.
+ * Author: Tomasz Sowa <t.sowa@ttmath.org>
+ */
+
+/*
+ * Copyright (c) 2021-2022, Tomasz Sowa
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  * Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ *  * Neither the name Tomasz Sowa nor the names of contributors to this
+ *    project may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef headerfile_picotools_convert_baseparser
+#define headerfile_picotools_convert_baseparser
+
+#include <string>
+#include <fstream>
+#include "textstream/textstream.h"
+
+
+namespace pt
+{
+
+class BaseParser
+{
+protected:
+
+	BaseParser();
+
+	virtual void clear_input_flags();
+
+	virtual void check_new_line();
+	virtual int read_utf8_char();
+	virtual int read_ascii_char();
+	virtual int read_char_from_wchar_string();
+	virtual int read_char_from_utf8_string();
+	virtual int read_char_from_ascii_string();
+	virtual int read_char_from_wtext_stream();
+	virtual int read_char_from_utf8_text_stream();
+	virtual int read_char_from_ascii_text_stream();
+	virtual int read_char_no_escape();
+	virtual int read_char();
+
+
+
+	/*
+		a number of a line in which there is a syntax_error
+	*/
+	int line;
+
+	/*
+		a number of a column in which there is a syntax_error
+	*/
+	int column;
+
+
+	/*
+		true if parse() method was called
+		false if ParseString() was called
+	*/
+	bool reading_from_file;
+
+
+	/*
+		pointers to the current character
+		if ParseString() is in used
+	*/
+	const char    * pchar_ascii;
+	const wchar_t * pchar_unicode;
+
+
+	/*
+		pointers to WTextStream iterators
+		if set then both of them should be set
+	 */
+	WTextStream::const_iterator * wtext_stream_iterator;
+	WTextStream::const_iterator * wtext_stream_iterator_end;
+
+
+	/*
+		pointers to TextStream iterators
+		if set then both of them should be set
+	 */
+	TextStream::const_iterator * text_stream_iterator;
+	TextStream::const_iterator * text_stream_iterator_end;
+
+
+	/*
+		last read char
+		or -1 if the end
+	*/
+	int lastc;
+
+
+	/*
+		current file
+
+		may it would be better to make a pointer?
+		if we parse only a string then there is no sense to have such an object
+	*/
+	std::ifstream file;
+
+
+	/*
+		input file is in UTF-8
+		default: true
+	*/
+	bool input_as_utf8;
+
+
+
+};
+
+}
+
+#endif
@@ -5,7 +5,7 @@
 */

 /*
- * Copyright (c) 2021, Tomasz Sowa
+ * Copyright (c) 2021-2022, Tomasz Sowa
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -41,114 +41,114 @@
 namespace pt
 {

-std::string to_str(unsigned long long value, int base)
+std::string to_str(unsigned long long value, int base, size_t min_width)
 {
 	std::string res;
-	Toa(value, res, false, base);
+	Toa(value, res, false, base, min_width);

 	return res;
 }


-std::string to_str(long long value, int base)
+std::string to_str(long long value, int base, size_t min_width)
 {
 	std::string res;
-	Toa(value, res, false, base);
+	Toa(value, res, false, base, min_width);

 	return res;
 }


-std::string to_str(unsigned long value, int base)
+std::string to_str(unsigned long value, int base, size_t min_width)
 {
-	return to_str(static_cast<unsigned long long>(value), base);
+	return to_str(static_cast<unsigned long long>(value), base, min_width);
 }


-std::string to_str(long value, int base)
+std::string to_str(long value, int base, size_t min_width)
 {
-	return to_str(static_cast<long long>(value), base);
+	return to_str(static_cast<long long>(value), base, min_width);
 }


-std::string to_str(unsigned int value, int base)
+std::string to_str(unsigned int value, int base, size_t min_width)
 {
-	return to_str(static_cast<unsigned long long>(value), base);
+	return to_str(static_cast<unsigned long long>(value), base, min_width);
 }


-std::string to_str(int value, int base)
+std::string to_str(int value, int base, size_t min_width)
 {
-	return to_str(static_cast<long long>(value), base);
+	return to_str(static_cast<long long>(value), base, min_width);
 }


-std::string to_str(unsigned short value, int base)
+std::string to_str(unsigned short value, int base, size_t min_width)
 {
-	return to_str(static_cast<unsigned long long>(value), base);
+	return to_str(static_cast<unsigned long long>(value), base, min_width);
 }


-std::string to_str(short value, int base)
+std::string to_str(short value, int base, size_t min_width)
 {
-	return to_str(static_cast<long long>(value), base);
+	return to_str(static_cast<long long>(value), base, min_width);
 }





-std::wstring to_wstr(unsigned long long value, int base)
+std::wstring to_wstr(unsigned long long value, int base, size_t min_width)
 {
 	std::wstring res;
-	Toa(value, res, false, base);
+	Toa(value, res, false, base, min_width);

 	return res;
 }


-std::wstring to_wstr(long long value, int base)
+std::wstring to_wstr(long long value, int base, size_t min_width)
 {
 	std::wstring res;
-	Toa(value, res, false, base);
+	Toa(value, res, false, base, min_width);

 	return res;
 }


-std::wstring to_wstr(unsigned long value, int base)
+std::wstring to_wstr(unsigned long value, int base, size_t min_width)
 {
-	return to_wstr(static_cast<unsigned long long>(value), base);
+	return to_wstr(static_cast<unsigned long long>(value), base, min_width);
 }


-std::wstring to_wstr(long value, int base)
+std::wstring to_wstr(long value, int base, size_t min_width)
 {
-	return to_wstr(static_cast<long long>(value), base);
+	return to_wstr(static_cast<long long>(value), base, min_width);
 }


-std::wstring to_wstr(unsigned int value, int base)
+std::wstring to_wstr(unsigned int value, int base, size_t min_width)
 {
-	return to_wstr(static_cast<unsigned long long>(value), base);
+	return to_wstr(static_cast<unsigned long long>(value), base, min_width);
 }


-std::wstring to_wstr(int value, int base)
+std::wstring to_wstr(int value, int base, size_t min_width)
 {
-	return to_wstr(static_cast<long long>(value), base);
+	return to_wstr(static_cast<long long>(value), base, min_width);
 }


-std::wstring to_wstr(unsigned short value, int base)
+std::wstring to_wstr(unsigned short value, int base, size_t min_width)
 {
-	return to_wstr(static_cast<unsigned long long>(value), base);
+	return to_wstr(static_cast<unsigned long long>(value), base, min_width);
 }


-std::wstring to_wstr(short value, int base)
+std::wstring to_wstr(short value, int base, size_t min_width)
 {
-	return to_wstr(static_cast<long long>(value), base);
+	return to_wstr(static_cast<long long>(value), base, min_width);
 }


@@ -5,7 +5,7 @@
 */

 /* 
- * Copyright (c) 2012-2021, Tomasz Sowa
+ * Copyright (c) 2012-2022, Tomasz Sowa
 * All rights reserved.
 * 
 * Redistribution and use in source and binary forms, with or without
@@ -52,8 +52,9 @@ namespace pt

 // if the buffer is too small it will be terminated at the beginning (empty string)
 // and the function returns false
+// min_width - if greater than zero then it is used for zero padding
 template<class CharType>
-bool Toa(unsigned long long value, CharType * buffer, size_t buf_len, int base = 10, size_t * len_out = 0)
+bool Toa(unsigned long long value, CharType * buffer, size_t buf_len, int base = 10, size_t * len_out = nullptr, size_t min_width = 0)
 {
 size_t i1, i2;
 long rest;
@@ -77,6 +78,14 @@ long rest;
 	}
 	while(value != 0 && i2 < buf_len);

+	if( min_width > 0 )
+	{
+		for( ; i2 < min_width && i2 < buf_len ; ++i2)
+		{
+			buffer[i2] = '0';
+		}
+	}
+
 	if( i2 >= buf_len )
 	{
 		buffer[0] = 0; // ops, the buffer was too small
@@ -106,7 +115,7 @@ return true;
 // if the buffer is too small it will be terminated at the beginning (empty string)
 // and the function returns false
 template<class CharType>
-bool Toa(long long value, CharType * buffer, size_t buf_len, int base = 10, size_t * len_out = 0)
+bool Toa(long long value, CharType * buffer, size_t buf_len, int base = 10, size_t * len_out = nullptr, size_t min_width = 0)
 {
 	if( len_out )
 		*len_out = 0;
@@ -126,7 +135,7 @@ bool Toa(long long value, CharType * buffer, size_t buf_len, int base = 10, size
 		is_sign   = true;
 	}

-	bool res = Toa(static_cast<unsigned long long>(value), buf, buf_len, base, len_out);
+	bool res = Toa(static_cast<unsigned long long>(value), buf, buf_len, base, len_out, min_width);

 	if( res )
 	{
@@ -146,44 +155,44 @@ return res;


 template<class CharType>
-bool Toa(unsigned long value, CharType * buffer, size_t buf_len, int base = 10, size_t * len_out = 0)
+bool Toa(unsigned long value, CharType * buffer, size_t buf_len, int base = 10, size_t * len_out = 0, size_t min_width = 0)
 {
-	return Toa(static_cast<unsigned long long>(value), buffer, buf_len, base, len_out);
+	return Toa(static_cast<unsigned long long>(value), buffer, buf_len, base, len_out, min_width);
 }

 template<class CharType>
-bool Toa(long value, CharType * buffer, size_t buf_len, int base = 10, size_t * len_out = 0)
+bool Toa(long value, CharType * buffer, size_t buf_len, int base = 10, size_t * len_out = 0, size_t min_width = 0)
 {
-	return Toa(static_cast<long long>(value), buffer, buf_len, base, len_out);
+	return Toa(static_cast<long long>(value), buffer, buf_len, base, len_out, min_width);
 }



 template<class CharType>
-bool Toa(unsigned int value, CharType * buffer, size_t buf_len, int base = 10, size_t * len_out = 0)
+bool Toa(unsigned int value, CharType * buffer, size_t buf_len, int base = 10, size_t * len_out = 0, size_t min_width = 0)
 {
-	return Toa(static_cast<unsigned long long>(value), buffer, buf_len, base, len_out);
+	return Toa(static_cast<unsigned long long>(value), buffer, buf_len, base, len_out, min_width);
 }


 template<class CharType>
-bool Toa(int value, CharType * buffer, size_t buf_len, int base = 10, size_t * len_out = 0)
+bool Toa(int value, CharType * buffer, size_t buf_len, int base = 10, size_t * len_out = 0, size_t min_width = 0)
 {
-	return Toa(static_cast<long long>(value), buffer, buf_len, base, len_out);
+	return Toa(static_cast<long long>(value), buffer, buf_len, base, len_out, min_width);
 }


 template<class CharType>
-bool Toa(unsigned short value, CharType * buffer, size_t buf_len, int base = 10, size_t * len_out = 0)
+bool Toa(unsigned short value, CharType * buffer, size_t buf_len, int base = 10, size_t * len_out = 0, size_t min_width = 0)
 {
-	return Toa(static_cast<unsigned long long>(value), buffer, buf_len, base, len_out);
+	return Toa(static_cast<unsigned long long>(value), buffer, buf_len, base, len_out, min_width);
 }


 template<class CharType>
-bool Toa(short value, CharType * buffer, size_t buf_len, int base = 10, size_t * len_out = 0)
+bool Toa(short value, CharType * buffer, size_t buf_len, int base = 10, size_t * len_out = 0, size_t min_width = 0)
 {
-	return Toa(static_cast<long long>(value), buffer, buf_len, base, len_out);
+	return Toa(static_cast<long long>(value), buffer, buf_len, base, len_out, min_width);
 }


@@ -192,7 +201,7 @@ bool Toa(short value, CharType * buffer, size_t buf_len, int base = 10, size_t *


 template<class StringType>
-void Toa(unsigned long long value, StringType & res, bool clear_string = true, int base = 10)
+void Toa(unsigned long long value, StringType & res, bool clear_string = true, int base = 10, size_t min_width = 0)
 {
 	typename StringType::value_type buffer[50];
 	size_t buffer_len = sizeof(buffer) / sizeof(wchar_t);
@@ -204,13 +213,13 @@ void Toa(unsigned long long value, StringType & res, bool clear_string = true, i
 	 * the size of the buffer is sufficient so the status should always be true
 	 */
 	size_t len_out;
-	Toa(value, buffer, buffer_len, base, &len_out);
+	Toa(value, buffer, buffer_len, base, &len_out, min_width);
 	res.append(buffer, len_out);
 }


 template<class StringType>
-void Toa(long long value, StringType & res, bool clear_string = true, int base = 10)
+void Toa(long long value, StringType & res, bool clear_string = true, int base = 10, size_t min_width = 0)
 {
 	typename StringType::value_type buffer[50];
 	size_t buffer_len = sizeof(buffer) / sizeof(wchar_t);
@@ -222,71 +231,71 @@ void Toa(long long value, StringType & res, bool clear_string = true, int base =
 	 * the size of the buffer is sufficient so the status should always be true
 	 */
 	size_t len_out;
-	Toa(value, buffer, buffer_len, base, &len_out);
+	Toa(value, buffer, buffer_len, base, &len_out, min_width);
 	res.append(buffer, len_out);
 }


 template<class StringType>
-void Toa(unsigned long value, StringType & res, bool clear_string = true, int base = 10)
+void Toa(unsigned long value, StringType & res, bool clear_string = true, int base = 10, size_t min_width = 0)
 {
-	Toa(static_cast<unsigned long long>(value), res, clear_string, base);
+	Toa(static_cast<unsigned long long>(value), res, clear_string, base, min_width);
 }


 template<class StringType>
-void Toa(long value, StringType & res, bool clear_string = true, int base = 10)
+void Toa(long value, StringType & res, bool clear_string = true, int base = 10, size_t min_width = 0)
 {
-	Toa(static_cast<long long>(value), res, clear_string, base);
+	Toa(static_cast<long long>(value), res, clear_string, base, min_width);
 }


 template<class StringType>
-void Toa(unsigned int value, StringType & res, bool clear_string = true, int base = 10)
+void Toa(unsigned int value, StringType & res, bool clear_string = true, int base = 10, size_t min_width = 0)
 {
-	Toa(static_cast<unsigned long long>(value), res, clear_string, base);
+	Toa(static_cast<unsigned long long>(value), res, clear_string, base, min_width);
 }


 template<class StringType>
-void Toa(int value, StringType & res, bool clear_string = true, int base = 10)
+void Toa(int value, StringType & res, bool clear_string = true, int base = 10, size_t min_width = 0)
 {
-	Toa(static_cast<long long>(value), res, clear_string, base);
+	Toa(static_cast<long long>(value), res, clear_string, base, min_width);
 }


 template<class StringType>
-void Toa(unsigned short value, StringType & res, bool clear_string = true, int base = 10)
+void Toa(unsigned short value, StringType & res, bool clear_string = true, int base = 10, size_t min_width = 0)
 {
-	Toa(static_cast<unsigned long long>(value), res, clear_string, base);
+	Toa(static_cast<unsigned long long>(value), res, clear_string, base, min_width);
 }


 template<class StringType>
-void Toa(short value, StringType & res, bool clear_string = true, int base = 10)
+void Toa(short value, StringType & res, bool clear_string = true, int base = 10, size_t min_width = 0)
 {
-	Toa(static_cast<long long>(value), res, clear_string, base);
+	Toa(static_cast<long long>(value), res, clear_string, base, min_width);
 }



-std::string to_str(unsigned long long value, int base = 10);
-std::string to_str(long long value, int base = 10);
-std::string to_str(unsigned long value, int base = 10);
-std::string to_str(long value, int base = 10);
-std::string to_str(unsigned int value, int base = 10);
-std::string to_str(int value, int base = 10);
-std::string to_str(unsigned short value, int base = 10);
-std::string to_str(short value, int base = 10);
+std::string to_str(unsigned long long value, int base = 10, size_t min_width = 0);
+std::string to_str(long long value, int base = 10, size_t min_width = 0);
+std::string to_str(unsigned long value, int base = 10, size_t min_width = 0);
+std::string to_str(long value, int base = 10, size_t min_width = 0);
+std::string to_str(unsigned int value, int base = 10, size_t min_width = 0);
+std::string to_str(int value, int base = 10, size_t min_width = 0);
+std::string to_str(unsigned short value, int base = 10, size_t min_width = 0);
+std::string to_str(short value, int base = 10, size_t min_width = 0);

-std::wstring to_wstr(unsigned long long value, int base = 10);
-std::wstring to_wstr(long long value, int base = 10);
-std::wstring to_wstr(unsigned long value, int base = 10);
-std::wstring to_wstr(long value, int base = 10);
-std::wstring to_wstr(unsigned int value, int base = 10);
-std::wstring to_wstr(int value, int base = 10);
-std::wstring to_wstr(unsigned short value, int base = 10);
-std::wstring to_wstr(short value, int base = 10);
+std::wstring to_wstr(unsigned long long value, int base = 10, size_t min_width = 0);
+std::wstring to_wstr(long long value, int base = 10, size_t min_width = 0);
+std::wstring to_wstr(unsigned long value, int base = 10, size_t min_width = 0);
+std::wstring to_wstr(long value, int base = 10, size_t min_width = 0);
+std::wstring to_wstr(unsigned int value, int base = 10, size_t min_width = 0);
+std::wstring to_wstr(int value, int base = 10, size_t min_width = 0);
+std::wstring to_wstr(unsigned short value, int base = 10, size_t min_width = 0);
+std::wstring to_wstr(short value, int base = 10, size_t min_width = 0);



@@ -5,7 +5,7 @@
 */

 /*
- * Copyright (c) 2017, Tomasz Sowa
+ * Copyright (c) 2017-2022, Tomasz Sowa
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -36,6 +36,8 @@
 */

 #include "misc.h"
+#include "inttostr.h"
+#include "utf8/utf8.h"


 namespace pt
@@ -50,6 +52,363 @@ void SetOverflow(bool * was_overflow, bool val)



+void esc_to_json_uformat(wchar_t val, Stream & out)
+{
+	char buf[10];
+	size_t len;
+
+	Toa((unsigned long)val, buf, sizeof(buf)/sizeof(char), 16, &len);
+
+	out << "\\u";
+
+	if( len < 4 )
+	{
+		for(size_t i=0 ; i < (4-len) ; ++i)
+		{
+			out << '0';
+		}
+	}
+
+	out << buf;
+}
+
+
+/*
+ * return true if the val character was escaped and put to the out stream
+ * if the character is invalid for such a stream then only return true
+ * but not put it to the stream
+ */
+bool try_esc_to_json(wchar_t val, Stream & out)
+{
+	bool status = false;
+
+	if( val == '\r' )
+	{
+		out << '\\' << 'r';
+		status = true;
+	}
+	else
+	if( val == '\n' )
+	{
+		out << '\\' << 'n';
+		status = true;
+	}
+	else
+	if( val == '\t' )
+	{
+		out << '\\' << 't';
+		status = true;
+	}
+	else
+	if( val == 0x08 )
+	{
+		out << '\\' << 'b';
+		status = true;
+	}
+	else
+	if( val == 0x0c )
+	{
+		out << '\\' << 'f';
+		status = true;
+	}
+	else
+	if( val == '\\' )
+	{
+		out << '\\' << '\\';
+		status = true;
+	}
+	else
+	if( val == '"' )
+	{
+		out << '\\' << '\"';
+		status = true;
+	}
+	else
+	if( val < 32 )
+	{
+		esc_to_json_uformat(val, out);
+		status = true;
+	}
+
+	return status;
+}
+
+
+void esc_to_json(wchar_t val, Stream & out)
+{
+	if( !try_esc_to_json(val, out) )
+	{
+		out << val;
+	}
+}
+
+
+void esc_to_json(char val, Stream & out)
+{
+	if( !try_esc_to_json((wchar_t)(unsigned char)val, out) )
+	{
+		out << val;
+	}
+}
+
+
+
+void esc_to_json(const char * c, pt::Stream & out)
+{
+	for(size_t i = 0 ; c[i] != 0 ; ++i)
+	{
+		esc_to_json(c[i], out);
+	}
+}
+
+
+void esc_to_json(const char * c, std::size_t len, pt::Stream & out)
+{
+	for(size_t i = 0 ; i < len ; ++i)
+	{
+		esc_to_json(c[i], out);
+	}
+}
+
+
+void esc_to_json(const wchar_t * c, pt::Stream & out)
+{
+	for(size_t i = 0 ; c[i] != 0 ; ++i)
+	{
+		esc_to_json(c[i], out);
+	}
+}
+
+
+void esc_to_json(const wchar_t * c, size_t len, pt::Stream & out)
+{
+	for(size_t i = 0 ; i < len ; ++i)
+	{
+		esc_to_json(c[i], out);
+	}
+}
+
+
+void esc_to_json(const std::string & in, Stream & out)
+{
+	esc_to_json(in.c_str(), in.size(), out);
+}
+
+
+void esc_to_json(const std::wstring & in, Stream & out)
+{
+	esc_to_json(in.c_str(), in.size(), out);
+}
+
+
+
+/*
+ * return true if the val character was escaped and put to the out stream
+ * if the character is invalid for such a stream then only return true
+ * but not put it to the stream
+ */
+bool try_esc_to_xml(wchar_t val, Stream & out)
+{
+	bool status = false;
+
+	if( val == 0 )
+	{
+		// null character is invalid in XML 1.0 and 1.1
+		// https://en.wikipedia.org/wiki/Valid_characters_in_XML
+		// return true but not put the char to the out stream
+		status = true;
+	}
+	else
+	if( val == '<')
+	{
+		out << "&lt;";
+		status = true;
+	}
+	else
+	if( val == '>')
+	{
+		out << "&gt;";
+		status = true;
+	}
+	else
+	if( val == '&')
+	{
+		out << "&amp;";
+		status = true;
+	}
+	else
+	if( val == '"')
+	{
+		out << "&quot;";
+		status = true;
+	}
+
+	return status;
+}
+
+
+void esc_to_xml(wchar_t val, Stream & out)
+{
+	if( !try_esc_to_xml(val, out) )
+	{
+		out << val;
+	}
+}
+
+
+void esc_to_xml(char val, Stream & out)
+{
+	if( !try_esc_to_xml((wchar_t)(unsigned char)val, out) )
+	{
+		out << val;
+	}
+}
+
+
+void esc_to_xml(const char * c, pt::Stream & out)
+{
+	for(size_t i = 0 ; c[i] != 0 ; ++i)
+	{
+		esc_to_xml(c[i], out);
+	}
+}
+
+
+void esc_to_xml(const char * c, std::size_t len, pt::Stream & out)
+{
+	for(size_t i = 0 ; i < len ; ++i)
+	{
+		esc_to_xml(c[i], out);
+	}
+}
+
+
+void esc_to_xml(const wchar_t * c, pt::Stream & out)
+{
+	for(size_t i = 0 ; c[i] != 0 ; ++i)
+	{
+		esc_to_xml(c[i], out);
+	}
+}
+
+
+void esc_to_xml(const wchar_t * c, size_t len, pt::Stream & out)
+{
+	for(size_t i = 0 ; i < len ; ++i)
+	{
+		esc_to_xml(c[i], out);
+	}
+}
+
+
+void esc_to_xml(const std::string & in, Stream & out)
+{
+	esc_to_xml(in.c_str(), in.size(), out);
+}
+
+
+void esc_to_xml(const std::wstring & in, Stream & out)
+{
+	esc_to_xml(in.c_str(), in.size(), out);
+}
+
+
+
+
+
+/*
+ * return true if the val character was escaped and put to the out stream
+ * if the character is invalid for such a stream then only return true
+ * but not put it to the stream
+ */
+bool try_esc_to_csv(wchar_t val, pt::Stream & out)
+{
+	bool status = false;
+
+	if( val == 0 )
+	{
+		// null characters are invalid in text files
+		// return true but not put to the out stream
+		status = true;
+	}
+	else
+	if( val == '"' )
+	{
+		out << "\"\"";
+		status = true;
+	}
+
+	return status;
+}
+
+
+void esc_to_csv(wchar_t val, pt::Stream & out)
+{
+	if( !try_esc_to_csv(val, out) )
+	{
+		out << val;
+	}
+}
+
+
+void esc_to_csv(char val, Stream & out)
+{
+	if( !try_esc_to_csv((wchar_t)(unsigned char)val, out) )
+	{
+		out << val;
+	}
+}
+
+
+void esc_to_csv(const char * c, pt::Stream & out)
+{
+	for(size_t i = 0 ; c[i] != 0 ; ++i)
+	{
+		esc_to_csv(c[i], out);
+	}
+}
+
+
+void esc_to_csv(const char * c, std::size_t len, pt::Stream & out)
+{
+	for(size_t i = 0 ; i < len ; ++i)
+	{
+		esc_to_csv(c[i], out);
+	}
+}
+
+
+void esc_to_csv(const wchar_t * c, pt::Stream & out)
+{
+	for(size_t i = 0 ; c[i] != 0 ; ++i)
+	{
+		esc_to_csv(c[i], out);
+	}
+}
+
+
+void esc_to_csv(const wchar_t * c, size_t len, pt::Stream & out)
+{
+	for(size_t i = 0 ; i < len ; ++i)
+	{
+		esc_to_csv(c[i], out);
+	}
+}
+
+
+void esc_to_csv(const std::string & in, Stream & out)
+{
+	esc_to_csv(in.c_str(), in.size(), out);
+}
+
+
+void esc_to_csv(const std::wstring & in, Stream & out)
+{
+	esc_to_csv(in.c_str(), in.size(), out);
+}
+
+
+

 }

@@ -5,7 +5,7 @@
 */

 /*
- * Copyright (c) 2017, Tomasz Sowa
+ * Copyright (c) 2017-2022, Tomasz Sowa
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -40,6 +40,9 @@

 #include <limits>
 #include "text.h"
+#include "textstream/stream.h"
+#include "textstream/types.h"
+#include "utf8/utf8_stream.h"


 namespace pt
@@ -47,6 +50,78 @@ namespace pt

 void SetOverflow(bool * was_overflow, bool val);

+bool try_esc_to_json(wchar_t val, Stream & out);
+void esc_to_json(wchar_t val, Stream & out);
+void esc_to_json(char val, Stream & out);
+void esc_to_json(const char * c, pt::Stream & out);
+void esc_to_json(const char * c, std::size_t len, Stream & out);
+void esc_to_json(const wchar_t * c, Stream & out);
+void esc_to_json(const wchar_t * c, size_t len, pt::Stream & out);
+void esc_to_json(const std::string & in, Stream & out);
+void esc_to_json(const std::wstring & in, Stream & out);
+
+bool try_esc_to_xml(wchar_t val, Stream & out);
+void esc_to_xml(wchar_t c, pt::Stream & out);
+void esc_to_xml(char c, pt::Stream & out);
+void esc_to_xml(const char * c, pt::Stream & out);
+void esc_to_xml(const char * c, std::size_t len, pt::Stream & out);
+void esc_to_xml(const wchar_t * c, pt::Stream & out);
+void esc_to_xml(const wchar_t * c, size_t len, pt::Stream & out);
+void esc_to_xml(const std::string & in, Stream & out);
+void esc_to_xml(const std::wstring & in, Stream & out);
+
+bool try_esc_to_csv(wchar_t val, pt::Stream & out);
+void esc_to_csv(wchar_t val, Stream & out);
+void esc_to_csv(char c, pt::Stream & out);
+void esc_to_csv(const char * c, std::size_t len, Stream & out);
+void esc_to_csv(const char * c, pt::Stream & out);
+void esc_to_csv(const char * c, std::size_t len, pt::Stream & out);
+void esc_to_csv(const wchar_t * c, pt::Stream & out);
+void esc_to_csv(const wchar_t * c, size_t len, pt::Stream & out);
+void esc_to_csv(const std::string & in, Stream & out);
+
+
+
+template<typename StreamType>
+void esc_to_json(const StreamType & in, Stream & out)
+{
+	typename StreamType::const_iterator i = in.begin();
+	typename StreamType::const_iterator end = in.end();
+
+	while( i != end )
+	{
+		wchar_t c = i.get_unicode_and_advance(end);
+		esc_to_json(c, out);
+	}
+}
+
+
+template<typename StreamType>
+void esc_to_xml(const StreamType & in, Stream & out)
+{
+	typename StreamType::const_iterator i = in.begin();
+	typename StreamType::const_iterator end = in.end();
+
+	while( i != end )
+	{
+		wchar_t c = i.get_unicode_and_advance(end);
+		esc_to_xml(c, out);
+	}
+}
+
+
+template<typename StreamType>
+void esc_to_csv(const StreamType & in, Stream & out)
+{
+	typename StreamType::const_iterator i = in.begin();
+	typename StreamType::const_iterator end = in.end();
+
+	while( i != end )
+	{
+		wchar_t c = i.get_unicode_and_advance(end);
+		esc_to_csv(c, out);
+	}
+}

 }

@@ -44,10 +44,17 @@ namespace pt
 {


+CSVParser::CSVParser()
+{
+	input_as_utf8    = true;
+}
+


 CSVParser::Status CSVParser::parse_file(const char * file_name, Space & out_space)
 {
+	clear_input_flags();
+
 	reading_from_file = true;
 	space = &out_space;

@@ -98,10 +105,9 @@ CSVParser::Status CSVParser::parse_file(const std::wstring & file_name, Space &

 CSVParser::Status CSVParser::parse(const char * str, Space & out_space)
 {
-	reading_from_file         = false;
-	reading_from_wchar_string = false;
+	clear_input_flags();
+
 	pchar_ascii = str;
-	pchar_unicode             = 0;
 	space       = &out_space;

 	parse();
@@ -119,10 +125,9 @@ CSVParser::Status CSVParser::parse(const std::string & str, Space & out_space)

 CSVParser::Status CSVParser::parse(const wchar_t * str, Space & out_space)
 {
-	reading_from_file         = false;
-	reading_from_wchar_string = true;
+	clear_input_flags();
+
 	pchar_unicode = str;
-	pchar_ascii               = 0;
 	space         = &out_space;

 	parse();
@@ -285,132 +290,6 @@ bool CSVParser::read_non_quoted_value_to(std::wstring & value)



-
-int CSVParser::read_utf8_char()
-{
-int c;
-bool correct;
-
-	lastc = -1;
-
-	do
-	{
-		utf8_to_int(file, c, correct);
-
-		if( !file )
-			return lastc;
-	}
-	while( !correct );
-
-	lastc = c;
-
-	if( lastc == '\n' )
-		++line;
-
-return lastc;
-}
-
-
-
-int CSVParser::read_ascii_char()
-{
-	lastc = file.get();
-
-	if( lastc == '\n' )
-		++line;
-
-return lastc;
-}
-
-
-
-
-int CSVParser::read_char_from_wchar_string()
-{
-	if( *pchar_unicode == 0 )
-		lastc = -1;
-	else
-		lastc = *(pchar_unicode++);
-
-	if( lastc == '\n' )
-		++line;
-
-return lastc;
-}
-
-
-int CSVParser::read_char_from_utf8_string()
-{
-int c;
-bool correct;
-
-	lastc = -1;
-
-	do
-	{
-		size_t len = utf8_to_int(pchar_ascii, c, correct);
-		pchar_ascii += len;
-	}
-	while( *pchar_ascii && !correct );
-
-	if( correct )
-		lastc = c;
-
-	if( lastc == '\n' )
-		++line;
-
-return lastc;
-}
-
-
-int CSVParser::read_char_from_ascii_string()
-{
-	if( *pchar_ascii == 0 )
-		lastc = -1;
-	else
-		lastc = *(pchar_ascii++);
-
-	if( lastc == '\n' )
-		++line;
-
-return lastc;
-}
-
-
-int CSVParser::read_char_no_escape()
-{
-	if( reading_from_file )
-	{
-		if( input_as_utf8 )
-			return read_utf8_char();
-		else
-			return read_ascii_char();
-	}
-	else
-	{
-		if( reading_from_wchar_string )
-		{
-			return read_char_from_wchar_string();
-		}
-		else
-		{
-			if( input_as_utf8 )
-				return read_char_from_utf8_string();
-			else
-				return read_char_from_ascii_string();
-		}
-	}
-}
-
-
-
-
-int CSVParser::read_char()
-{
-	return read_char_no_escape();
-}
-
-
 }


@@ -38,9 +38,11 @@
 #ifndef headerfile_picotools_csv_csvparser
 #define headerfile_picotools_csv_csvparser

-#include "space/space.h"
 #include <string>
 #include <fstream>
+#include "space/space.h"
+#include "convert/baseparser.h"
+


 namespace pt
@@ -51,10 +53,12 @@ namespace pt
 * https://datatracker.ietf.org/doc/html/rfc4180
 *
 */
-class CSVParser
+class CSVParser : public BaseParser
 {
 public:

+	CSVParser();
+
 	enum Status
 	{
 		ok,
@@ -85,53 +89,6 @@ protected:

 	Space * space;

-	/*
-		true if parse_file() method was called
-		false if parse() was called
-	*/
-	bool reading_from_file;
-
-	/*
-		true if parse(wchar_t *) or parse(std::wstring&) was called
-	*/
-	bool reading_from_wchar_string;
-
-	/*
-		pointers to the current character
-		if parse() is being used
-	*/
-	const char    * pchar_ascii;
-	const wchar_t * pchar_unicode;
-
-
-	/*
-		last read char
-		or -1 if the end
-	*/
-	int lastc;
-
-
-
-	/*
-		a number of a line in which there is a syntax_error
-	*/
-	int line;
-
-	/*
-		current file
-
-		may it would be better to make a pointer?
-		if we parse only a string then there is no sense to have such an object
-	*/
-	std::ifstream file;
-
-	/*
-		input file is in UTF-8
-		default: true
-	*/
-	bool input_as_utf8;
-
-


 	void parse();
@@ -142,19 +99,6 @@ protected:
 	bool read_non_quoted_value_to(std::wstring & value);


-
-	/*
-	 * copied from SpaceParser
-	 * may it would be better to have a class with those methods and inherit from it?
-	 */
-	int read_utf8_char();
-	int read_ascii_char();
-	int read_char_from_wchar_string();
-	int read_char_from_utf8_string();
-	int read_char_from_ascii_string();
-	int read_char_no_escape();
-
-	int read_char();
 };

 }
@@ -0,0 +1,645 @@
+/*
+ * This file is a part of PikoTools
+ * and is distributed under the (new) BSD licence.
+ * Author: Tomasz Sowa <t.sowa@ttmath.org>
+ */
+
+/* 
+ * Copyright (c) 2008-2021, Tomasz Sowa
+ * All rights reserved.
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * 
+ *  * Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ *  * Neither the name Tomasz Sowa nor the names of contributors to this
+ *    project may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "bbcodeparser.h"
+
+
+namespace pt
+{
+
+
+
+
+bool BBCODEParser::Equal(const wchar_t * str1, const wchar_t * str2)
+{
+	while( *str1 == *str2 && *str1 != 0 )
+	{
+		str1 += 1;
+		str2 += 1;
+	}
+
+return *str1 == *str2;
+}
+
+
+
+
+bool BBCODEParser::IsValidCharForName(int c)
+{
+	if( (c>='a' && c<='z') ||
+		(c>='A' && c<='Z') ||
+		c=='*' || c=='_')
+		return true;
+
+return false;
+}
+
+
+bool BBCODEParser::IsOpeningTagMark(wchar_t c)
+{
+	return (c == '[');
+}
+
+
+bool BBCODEParser::IsClosingTagMark(wchar_t c)
+{
+	return (c == ']');
+}
+
+bool BBCODEParser::IsClosingXmlSimpleTagMark(wchar_t c)
+{
+	return false;
+}
+
+
+
+// there are no commentaries in bbcode
+bool BBCODEParser::IsOpeningCommentaryTagMark(const wchar_t *)
+{
+	return false;
+}
+
+
+size_t BBCODEParser::OpeningCommentaryTagMarkSize()
+{
+	return 0;
+}
+
+
+
+bool BBCODEParser::SkipCommentaryTagIfExists()
+{
+	return false;
+}
+
+
+
+
+
+
+
+
+
+
+// one enter will generate one <br>
+// two enters or more will generate only two br (<br><br>)
+void BBCODEParser::PutNormalText(const wchar_t * str, const wchar_t * end)
+{
+int br_len;
+
+	if( lastc != -1 )
+	{
+		// trimming last white characters at end of the user text
+		while( str<end && (IsWhite(*(end-1)) || *(end-1)==10) )
+			--end;
+	}
+
+
+	while( str < end )
+	{
+		if( *str == 10 )
+		{
+			++str;
+			br_len = 1;
+
+			// skipping white characters without a new line character
+			while( str < end && IsWhite(*str) )
+				++str;
+
+			if( str < end && *str == 10 )
+			{
+				br_len = 2;
+
+				// skipping white characters with new line characters
+				while( str < end && (IsWhite(*str) || *str==10) )
+					++str;
+			}
+
+			if( !has_open_ol_tag && !has_open_ul_tag && !has_open_li_tag )
+			{
+				for(int i=0 ; i < br_len ; ++i)
+					(*out_string) += L"<br>\n";
+			}
+		}
+		else
+		{
+			PrintEscape(*str);
+			++str;
+		}
+	}
+}
+
+
+
+void BBCODEParser::ReadNormalTextSkipWhite(const wchar_t * & start, const wchar_t * & last_non_white)
+{
+}
+
+
+void BBCODEParser::CheckExceptions()
+{
+	if( stack_len >= 2 )
+	{
+		if( pstack[stack_len-1].type == Item::opening &&
+			pstack[stack_len-2].type == Item::opening &&
+			IsNameEqual(L"*", pstack[stack_len-1].name) &&
+			IsNameEqual(L"*", pstack[stack_len-2].name) )
+		{
+			// removing the last [*] from the stack
+			// </li> was put automatically
+			PopStack();
+		}
+	}
+}
+
+
+
+
+/*
+	bbcode format:
+	[bbcodetag=value]some text[/bbcodetag]
+	the value can be quoted, e.g.
+	[bbcodetag="value"]some text[/bbcodetag], or
+	[bbcodetag='value']some text[/bbcodetag]
+
+	the third string below (in tags table) is 'html_argument' from Tags,
+	it can contain a special character % followed by a string which means:
+	 %1  - "value" escaped as for html
+	 %2  - "some text" escaped as for html
+	 %u1 - "value" trimmed and escaped as for url-es
+	 %u2 - "some text" trimmed and escaped as for url-es
+	 %%  - one %
+
+	 if you are using %2 or %u2 then "some text" is not treated as bbcode, e.g.
+	 [bbcodetag=value]some [b]text[/b][/bbcodetag] will produce:
+	 <htmltag arg="value">some [b]text[/b]</htmltag> (the inner tags [b][/b] were not parsed)
+
+	 also when using %2 or %u2 the closing bbcode tag is skipped 
+	 (if you want this tag then you can put it in 'html_argument')
+
+	 and when using u (%u1 or %u2) the argument is trimmed from whitespaces and new lines
+	 at the beginning and at the end
+	 (because otherwise a space would be changed to %20 and this were probably not what you really wanted)
+*/
+const BBCODEParser::Tags * BBCODEParser::FindTag(const wchar_t * tag)
+{
+	static Tags tags[] = {
+		{L"*",     L"li",    L">",                        false},
+		{L"b",     L"em",    L">",                        true},
+		{L"i",     L"span",  L" class=\"bbitalic\">",     true},
+		{L"u",     L"span",  L" class=\"bbunderline\">",  true},
+		{L"s",     L"span",  L" class=\"bbstrike\">",     true},
+		{L"code",  L"code",  L" class=\"bbcode\">",       false},
+		{L"list",  L"ul",    L" class=\"bblist\">",       false},
+		{L"color", L"span",  L" class=\"bbcol%1\">",      true},
+		{L"url",   L"a",     L" href=\"%u1\">",           true},
+		{L"img",   L"img",   L" alt=\"%1\" src=\"%u2\">", true},
+		{L"quote", L"div",   L" class=\"bbquote\">\n<span class=\"bbquotewho\">%1</span><br>\n", false},
+	};
+
+	size_t i;
+	size_t len = sizeof(tags) / sizeof(Tags);
+
+	for(i=0 ; i<len ; ++i)
+	{
+		if( Equal(tag, tags[i].bbcode) )
+			return &tags[i];
+	}
+
+return 0;
+}
+
+const BBCODEParser::Tags * BBCODEParser::FindTag(const std::wstring & tag)
+{
+	return FindTag(tag.c_str());
+}
+
+
+
+void BBCODEParser::PrintArgumentCheckQuotes(const wchar_t * & start, const wchar_t * & end)
+{
+	// skipping white characters from the argument
+	while( start<end && IsWhite(*start) )
+		++start; 
+
+	// skipping first '=' character if exists
+	if( start<end && *start == '=' )
+		++start; 
+
+	// skipping white characters from the argument
+	// at the beginning
+	while( start<end && IsWhite(*start) )
+		++start; 
+
+	// and at the end
+	while( start<end && IsWhite(*(end-1)) )
+		--end; 
+
+
+	if( start<end && (*start=='\'' || *start=='\"') )
+	{
+		++start;
+
+		if( start<end && *(start-1) == *(end-1) )
+			--end;
+
+		// skipping white characters after a first quote char [url  =  "   ww...."]
+		while( start<end && IsWhite(*start) )
+			++start; 
+	}
+}
+
+
+
+void BBCODEParser::PrintEncode(int c)
+{
+	if( c == '&' )
+	{
+		(*out_string) += L"&amp;";
+	}
+	else
+	if( (c>='a' && c<='z') ||
+		(c>='A' && c<='Z') ||
+		(c>='0' && c<='9') ||
+		(c=='_' || c=='?' || c=='.' || c==',' || c=='/' || c=='-' ||
+		 c=='+' || c=='*' || c=='(' || c==')' || c=='=' || c==':')
+		)
+	{
+		(*out_string) += c;
+	}
+	else
+	{
+		wchar_t buffer[20];
+		swprintf(buffer, 20, L"%02X", c);
+
+		(*out_string) += '%';
+		(*out_string) += buffer;
+	}
+}
+
+
+void BBCODEParser::PrintEscape(int c, bool change_quote)
+{
+	if( c == '<' )
+	{
+		(*out_string) += L"&lt;";
+	}
+	else
+	if( c == '>' ) 
+	{
+		(*out_string) += L"&gt;";
+	}
+	else
+	if( c == '&' ) 
+	{
+		(*out_string) += L"&amp;";
+	}
+	else
+	if( c == '\"' && change_quote )
+	{
+		(*out_string) += L"&quot;";
+	}
+	else
+	{
+		(*out_string) += c;
+	}
+}
+
+
+void BBCODEParser::PrintArgumentEncode(const wchar_t * start, const wchar_t * end)
+{
+	PrintArgumentCheckQuotes(start, end);
+	TrimWhiteWithNewLines(start, end);
+
+	for( ; start<end ; ++start )
+		PrintEncode(*start);
+}
+
+
+void BBCODEParser::PrintArgumentEscape(const wchar_t * start, const wchar_t * end)
+{
+	PrintArgumentCheckQuotes(start, end);
+	
+	for( ; start<end ; ++start )
+		PrintEscape(*start, true); // quotes are escaped as well here
+}
+
+
+void BBCODEParser::CheckOpeningTag(const Tags * tag, const wchar_t * tag_name, bool & condition)
+{
+	if( Equal(tag->html_tag, tag_name) )
+	{
+		if( condition )
+		{
+			PutClosingTag(tag);
+			(*out_string) += '\n';
+		}
+
+		condition = true;
+	}
+}
+
+
+void BBCODEParser::CheckOpeningTag(const Tags * tag)
+{
+	bool has_list_tag = has_open_ul_tag || has_open_ol_tag;
+
+	CheckOpeningTag(tag, L"li", has_open_li_tag);
+	CheckOpeningTag(tag, L"ul", has_open_ul_tag);
+	CheckOpeningTag(tag, L"ol", has_open_ol_tag);
+
+	if( has_open_li_tag && !has_list_tag )
+	{
+		(*out_string) += L"<ul>\n";
+		has_open_ul_tag = true;
+	}
+}
+
+
+
+
+
+void BBCODEParser::PrintEscape(const wchar_t * start, const wchar_t * end, bool change_quote)
+{
+	for( ; start < end ; ++start)
+		PrintEscape(*start, change_quote);
+}
+
+
+
+void BBCODEParser::PrintEncode(const wchar_t * start, const wchar_t * end)
+{
+	for( ; start < end ; ++start)
+		PrintEncode(*start);
+}
+
+
+
+void BBCODEParser::PutOpeningTagFromEzc()
+{
+	// this can be a tag from Ezc templates system
+	(*out_string) += '[';
+	(*out_string) += LastItem().name;
+
+
+// FIXME
+//	const wchar_t * start = pchar;
+//
+//	while( *pchar && *pchar!=']' )
+//		++pchar;
+//
+//	if( *pchar == ']' )
+//		++pchar;
+//
+//	Put(start, pchar);
+}
+
+
+
+
+
+void BBCODEParser::PutHtmlArgument1(const wchar_t * arg_start, const wchar_t * arg_end, bool has_u)
+{
+	if( has_u )
+		PrintArgumentEncode(arg_start, arg_end);
+	else
+		PrintArgumentEscape(arg_start, arg_end);
+}
+
+
+
+void BBCODEParser::TrimWhiteWithNewLines(const wchar_t * & start, const wchar_t * & end)
+{
+	while( start < end && (IsWhite(*start) || *start==10) )
+		++start;
+
+	while( start < end && (IsWhite(*(end-1)) || *(end-1)==10) )
+		--end;
+}
+
+
+
+void BBCODEParser::PutHtmlArgument2(const Tags * tag, bool has_u)
+{
+//const wchar_t * start = pchar;
+//const wchar_t * end   = pchar;
+bool first_tag_removed = false;
+
+	while( lastc != -1 )
+	{
+		if( IsOpeningTagMark(lastc) )
+		{
+			// FIXME
+//			if( IsClosingTagForLastItem() )
+//			{
+//				// the last tag is skipped when using patterns with %2 or %u2
+//
+//				PopStack(); // removing opening tag from the stack
+//				first_tag_removed = true;
+//				break;
+//			}
+		}
+		else
+		{
+			read_char();
+			//end = pchar;
+		}
+	}
+
+	if( !first_tag_removed )
+		PopStack(); // user has forgotten to close the tag
+
+	if( has_u )
+	{
+// FIXME
+//		TrimWhiteWithNewLines(start, end);
+//		PrintEncode(start, end);
+	}
+	else
+	{
+		// FIXME
+//		PrintEscape(start, end);
+	}
+}
+
+
+
+void BBCODEParser::PutHtmlArgument(const Tags * tag, const wchar_t * arg_start, const wchar_t * arg_end)
+{
+const wchar_t * pattern = tag->html_argument;
+bool has_u;
+
+	while( *pattern )
+	{
+		if( *pattern == '%' )
+		{
+			++pattern;
+			has_u = false;
+
+			if( *pattern == 'u' )
+			{
+				++pattern;
+				has_u = true;
+			}
+
+			if( *pattern == '1' )
+			{
+				++pattern;
+				PutHtmlArgument1(arg_start, arg_end, has_u);
+			}
+			else
+			if( *pattern == '2' )
+			{
+				++pattern;
+				PutHtmlArgument2(tag, has_u);
+			}
+			else
+			if( *pattern == '%' )
+			{
+				(*out_string) += '%';
+				++pattern;
+			}
+			// else unrecognized, will be printed next time as a normal character
+		}
+		else
+		{
+			(*out_string) += *pattern;
+			++pattern;
+		}
+	}
+}
+
+
+void BBCODEParser::PutOpeningTagFromBBCode(const Tags * tag)
+{
+	CheckOpeningTag(tag);
+	PutOpeningTagMark();
+	Put(tag->html_tag);
+
+// FIXME
+//	const wchar_t * start = pchar;
+//
+//	while( *pchar && *pchar != ']' )
+//		++pchar;
+//
+//	PutHtmlArgument(tag, start, pchar);
+//
+//	if( *pchar == ']' )
+//		++pchar;
+
+	if( !tag->inline_tag )
+	{
+		Put(10);
+		SkipWhiteLines();
+	}
+}
+
+
+bool BBCODEParser::PutOpeningTag()
+{
+	const Tags * tag = FindTag(LastItem().name);
+
+	if( !tag )
+		PutOpeningTagFromEzc();
+	else
+		PutOpeningTagFromBBCode(tag);
+
+return false;
+}
+
+
+void BBCODEParser::PutClosingTag(const Tags * tag)
+{
+	if( !tag )
+		return; // skipping the tag
+
+	PutOpeningTagMark();
+	(*out_string) += '/';
+	(*out_string) += tag->html_tag;
+	PutClosingTagMark();
+
+	if( !tag->inline_tag )
+	{
+		(*out_string) += L"\n";
+		SkipWhiteLines();
+	}
+
+	if( Equal(tag->html_tag, L"li") )
+		has_open_li_tag = false;
+
+	if( Equal(tag->html_tag, L"ol") )
+		has_open_ol_tag = false;
+
+	if( Equal(tag->html_tag, L"ul") )
+		has_open_ul_tag = false;
+}
+
+
+void BBCODEParser::PutClosingTag(const wchar_t * tag_name)
+{
+	const Tags * tag = FindTag(tag_name);
+	PutClosingTag(tag);
+}
+
+
+
+void BBCODEParser::Init()
+{
+	has_open_li_tag = false;
+	has_open_ol_tag = false;
+	has_open_ul_tag = false;
+
+	SkipWhiteLines();
+}
+
+
+void BBCODEParser::Uninit()
+{
+	if( has_open_li_tag )
+		(*out_string) += L"</li>\n";
+
+	if( has_open_ol_tag )
+		(*out_string) += L"</ol>\n";
+
+	if( has_open_ul_tag )
+		(*out_string) += L"</ul>\n";
+}
+
+
+
+}
+
@@ -0,0 +1,128 @@
+/*
+ * This file is a part of PikoTools
+ * and is distributed under the (new) BSD licence.
+ * Author: Tomasz Sowa <t.sowa@ttmath.org>
+ */
+
+/* 
+ * Copyright (c) 2008-2021, Tomasz Sowa
+ * All rights reserved.
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * 
+ *  * Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ *  * Neither the name Tomasz Sowa nor the names of contributors to this
+ *    project may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef headerfile_winix_core_bbcodeparser
+#define headerfile_winix_core_bbcodeparser
+
+#include "htmlparser.h"
+
+namespace pt
+{
+
+
+class BBCODEParser : public HTMLParser
+{
+
+	struct Tags
+	{
+		const wchar_t * bbcode;
+		const wchar_t * html_tag;
+		const wchar_t * html_argument; // with closing '>'
+		bool inline_tag;
+	};
+
+
+	/*
+		virtual methods
+		(from HTMLParser class)
+	*/
+	virtual void Init();
+	virtual void Uninit();
+
+	virtual bool IsOpeningTagMark(wchar_t c);
+	virtual bool IsClosingTagMark(wchar_t c);
+	virtual bool IsClosingXmlSimpleTagMark(wchar_t c);
+
+	virtual bool IsOpeningCommentaryTagMark(const wchar_t *);
+	virtual size_t OpeningCommentaryTagMarkSize();
+
+	virtual bool IsValidCharForName(int c);
+	virtual void CheckExceptions();
+	virtual bool SkipCommentaryTagIfExists();
+
+	virtual bool PutOpeningTag();
+	virtual void PutClosingTag(const wchar_t * tag);
+
+	virtual void PutNormalText(const wchar_t * str, const wchar_t * end);
+	virtual void ReadNormalTextSkipWhite(const wchar_t * & start, const wchar_t * & last_non_white);
+
+
+
+	/*
+		others
+	*/
+	bool Equal(const wchar_t * str1, const wchar_t * str2);
+
+	void PutHtmlArgument1(const wchar_t * arg_start, const wchar_t * arg_end, bool has_u);
+	void PutHtmlArgument2(const Tags * tag, bool has_u);
+	void PutHtmlArgument(const Tags * tag, const wchar_t * arg_start, const wchar_t * arg_end);
+
+	void PutOpeningTagFromEzc();
+	void PutOpeningTagFromBBCode(const Tags * tag);
+
+	const Tags * FindTag(const wchar_t * tag);
+	const Tags * FindTag(const std::wstring & tag);
+	void PrintArgumentCheckQuotes(const wchar_t * & start, const wchar_t * & end);
+
+	void PrintEscape(int c, bool change_quote = false);
+	void PrintEncode(int c);
+
+	void PrintEscape(const wchar_t * start, const wchar_t * end, bool change_quote = false);
+	void PrintEncode(const wchar_t * start, const wchar_t * end);
+
+	void PrintArgumentEncode(const wchar_t * start, const wchar_t * end);
+	void PrintArgumentEscape(const wchar_t * start, const wchar_t * end);
+
+	void PutClosingTag(const Tags * tag);
+
+	void CheckOpeningTag(const Tags * tag, const wchar_t * tag_name, bool & condition);
+	void CheckOpeningTag(const Tags * tag);
+
+	void TrimWhiteWithNewLines(const wchar_t * & start, const wchar_t * & end);
+
+
+
+	bool has_open_ol_tag; // has open html <ol> tag
+	bool has_open_ul_tag; // has open html <ul> tag
+	bool has_open_li_tag; // has open html <li> tag
+};
+
+
+}
+
+
+#endif
@@ -0,0 +1,490 @@
+/*
+ * This file is a part of PikoTools
+ * and is distributed under the (new) BSD licence.
+ * Author: Tomasz Sowa <t.sowa@ttmath.org>
+ */
+
+/* 
+ * Copyright (c) 2008-2022, Tomasz Sowa
+ * All rights reserved.
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * 
+ *  * Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ *  * Neither the name Tomasz Sowa nor the names of contributors to this
+ *    project may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef headerfile_picotools_html_htmlfilter
+#define headerfile_picotools_html_htmlfilter
+
+#include <string>
+#include <map>
+#include <vector>
+#include <algorithm>
+#include "convert/baseparser.h"
+#include "space/space.h"
+#include "textstream/stream.h"
+
+
+namespace pt
+{
+
+
+
+// max length of a name of a html tag (with terminating null)
+#define WINIX_HTMLFILTER_ITEM_NAME_MAXLEN	30
+
+// max length of a html lang attribute (e.g. "en", "pl")
+#define WINIX_HTMLFILTER_ITEM_LANG_MAXLEN	10
+
+
+#define WINIX_HTMLFILTER_ATTR_NAME_MAXLEN	40
+
+
+#define WINIX_HTMLFILTER_ATTR_VALUE_MAXLEN	500
+
+
+// depth of the html tree
+#define WINIX_HTMLFILTER_STACK_MAXLEN		100
+
+// length of a buffer used for printing
+// it should be at least: WINIX_HTMLFILTER_ITEM_NAME_MAXLEN+3
+#define WINIX_HTMLFILTER_BUFFER_MAXLEN	2048
+
+
+
+
+/*!
+	very lightweight filter for html
+	(without using any dynamic memory - some memory is allocated only at the beginning - in ctors)
+	this filter has O(n) complexity over the whole html string
+
+	such tags as: <script> <pre> <textarea> are treated in a special way
+	all characters between the opening and closing tag (<script>....</script>) are untouched
+
+	if the filter finds that there are not closed tags it will close them,
+	if the filter finds a closing tag which doesn't have an opening tag - it will skip it
+
+	tags which don't need to be closed: meta, input, br, img, link
+	look at CheckExceptions() method
+
+	the filter recognizes xml simple tags (with / at the end) such as: <br />
+*/
+class HTMLParser : public BaseParser
+{
+public:
+
+
+	/*
+		status of parsing
+	*/
+	enum Status { ok, cant_open_file, syntax_error };
+
+
+	enum OrphanMode
+	{
+		orphan_nbsp,		// putting "&nbsp;" string
+		orphan_160space		// putting 160 ascii code
+	};
+
+
+	// orphans for one language
+	struct Orphans
+	{
+		std::vector<std::wstring> tab;
+		size_t max_len;
+	};
+
+
+	struct Item
+	{
+		std::wstring name; // max size: WINIX_HTMLFILTER_ITEM_NAME_MAXLEN
+
+		enum Type
+		{
+			opening,		/* sample:  <h1>		*/
+			closing,		/* sample:  </h1>		*/
+			simple,			/* sample:  <br/>		*/
+			special,		/* sample:  <!doctype>	*/
+			none
+		} type;
+
+		bool is_commentary;
+
+		bool is_cdata;
+
+		// is a new line before this tag (or just a new line and some white characters)
+		bool new_line_before;
+
+		// is there a new line after this tag (or just some white characters and a new line)
+		bool new_line_after;
+
+		// is there a new line in the middle after this tag and before the next tag
+		bool new_line_in_the_middle;
+
+		// is there a white char (but not new line) before this tag
+		bool white_char_before;
+
+		// current orphans table
+		// (will be propagated)
+		Orphans * porphans;
+
+		// this item or one from its parents is a 'body' html tag
+		// (will be propagated)
+		bool has_body_tag;
+
+		size_t tree_index;
+
+		Space * space;
+
+		void Clear();
+		Item();
+	};
+
+
+	class ItemParsedListener
+	{
+	public:
+
+		ItemParsedListener() {}
+
+		virtual bool item_parsed(const Item & item) { return true; }
+		virtual ~ItemParsedListener() {}
+
+	};
+
+
+	/*
+		the last status of parsing, set by parse() methods
+	*/
+	Status status;
+
+	HTMLParser();
+	HTMLParser(const HTMLParser & f);
+	HTMLParser & operator=(const HTMLParser & f);
+	virtual ~HTMLParser();
+
+	void set_item_parsed_listener(ItemParsedListener * listener);
+
+
+	void parse_html(const wchar_t * in, Space & space, bool compact_mode = false);
+
+	Status parse_xml_file(const char * file_name,         Space & out_space, bool compact_mode = false, bool clear_space = true);
+	Status parse_xml_file(const std::string & file_name,  Space & out_space, bool compact_mode = false, bool clear_space = true);
+	Status parse_xml_file(const wchar_t * file_name,      Space & out_space, bool compact_mode = false, bool clear_space = true);
+	Status parse_xml_file(const std::wstring & file_name, Space & out_space, bool compact_mode = false, bool clear_space = true);
+
+
+
+	// main methods used for filtering
+	void filter(const wchar_t * in, std::wstring & out, bool clear_out_string = true);
+	void filter(const std::wstring & in, std::wstring & out, bool clear_out_string = true);
+
+	void filter(const WTextStream & in, Stream & out, bool clear_out_stream = true);
+
+	HTMLParser::Status filter_file(const char * file_name, std::wstring & out, bool clear_out_stream = true);
+	HTMLParser::Status filter_file(const std::string & file_name, std::wstring & out, bool clear_out_stream = true);
+	HTMLParser::Status filter_file(const wchar_t * file_name, std::wstring & out, bool clear_out_stream = true);
+	HTMLParser::Status filter_file(const std::wstring & file_name, std::wstring & out, bool clear_out_stream = true);
+
+
+	/*
+	 *
+	 * returns a number of a last parsed line
+	 * can be used to obtain the line in which there was a syntax error
+	 *
+	 */
+	int get_last_parsed_line();
+
+
+
+
+	const static int WHITE_MODE_ORIGIN = 0;
+	const static int WHITE_MODE_SINGLE_LINE = 1;
+	const static int WHITE_MODE_TREE = 2;
+
+
+	// white chars mode
+	//
+	void white_chars_mode(int mode);
+
+	// if the line is wrap_line_ length (or longer) then insert a new line character (in a place of a white char)
+	// (only between html tags and only in <body> subtree)
+	// skipped in such tags: script, pre, textarea
+	// 0 - off
+	// lines are wrapped only in 'body' tag (useful for text in 'title' tag which is in 'head' section)
+	void WrapLine(size_t wrap_line_);
+
+	// first tabs in a tree
+	// default: 2 (spaces)
+	// set 0 to turn off
+	void InsertTabs(size_t tabsize);
+
+	// set a name of a html tag which will be used as 'nofilter' tag
+	// elements between such tags are not filtered (similarly as in <pre> and <textarea>)
+	// these tags (opening and closing) will no be placed in the html output
+	void SetNoFilterTag(const std::wstring & tag_name);
+
+	// orphans are checked only in 'body' tag
+	void AssignOrphans(const wchar_t * lang_code,      const std::vector<std::wstring> & otab);
+	void AssignOrphans(const std::wstring & lang_code, const std::vector<std::wstring> & otab);
+	void ClearOrphans();
+
+	// check 'orphans' for the specicic language
+	// if an orphan is detected then the non-break space ("&nbsp;" or ascii 160 code) will be put
+	// default disable (lang_none)
+	void OrphansMode(const std::wstring & orphan_mode);
+
+	// skipping some unsafe tags
+	// (script, iframe, frame, frameset, applet, head, meta, html, link, body, ...)
+	void SafeMode(bool safe_mode_);
+
+	// skip all html tags
+	// gives only text without markup
+	// but there can be commentaries
+	void SkipTags(bool skip_tags);
+
+	// skip commentaries
+	void SkipCommentaries(bool skip_commentaries);
+
+	// if true then entities such as &nbsp; are skipped
+	// this automatically turns on AnalyzeEntities
+	// in such a case FoundEntity callbacks are sent
+	void SkipEntities(bool skip_entities);
+
+	// analyze html entities such as &nbsp;
+	// virtual method: FoundEntity is called
+	// entities are analyzed in normal text and in attribute values such as <p class="a&nbsp;">
+	void AnalyzeEntities(bool analyze_entities);
+
+
+protected:
+
+	/*
+	 * true when parsing html input, false for parsing xml
+	 */
+	bool parsing_html;
+
+
+	bool xml_compact_mode;
+
+
+
+	// orphans for all languages
+	// map<language_code, Orphans>
+	typedef std::map<std::wstring, Orphans> OrphansTab;
+	OrphansTab orphans_tab;
+
+	// html <nofilter> tag name
+	std::wstring no_filter_tag;
+
+	ItemParsedListener * item_parsed_listener;
+
+	/*
+		true if the lastc was escaped (with a backslash)
+		we have to know if the last sequence was \" or just "
+	*/
+	bool char_was_escaped;
+
+	std::wstring escaped_chars_buffer;
+	size_t escaped_char_index;
+
+	/*
+	 * filter mode, a method filter(...) was called
+	 * in filter mode we do not unescape xml sequences such as &lt; &gt; ...
+	 */
+	bool filter_mode;
+
+
+	void clear_input_flags();
+
+
+	/*
+		virtual methods
+	*/
+	virtual void Init();
+	virtual void Uninit();
+
+	virtual bool IsOpeningTagMark(wchar_t c);
+	virtual bool IsClosingTagMark(wchar_t c);
+	virtual bool IsClosingTagIndicator(wchar_t c);
+	virtual bool IsSpecialTagIndicator(wchar_t c);
+	virtual bool IsXMLSpecialTagIndicator(wchar_t c);
+	virtual bool IsAttributeAssignmentMark(wchar_t c);
+	virtual bool IsClosingXmlSimpleTagMark(wchar_t c);
+	virtual bool IsStartingEntityMark(wchar_t c);
+	virtual bool IsEndingEntityMark(wchar_t c);
+
+	virtual bool IsValidCharForName(int c);
+	virtual bool IsValidCharForAttrName(int c);
+	virtual bool IsValidCharForEntityName(int c);
+
+	virtual void CheckSingleItemExceptions();
+	virtual void CheckWhiteCharsExceptions(Item & item);
+	virtual void CheckDifferentContentExceptions(Item & item);
+
+	virtual void Put(wchar_t c);
+	virtual void Put(const wchar_t * str, const wchar_t * end);
+	virtual void Put(const std::wstring & str);
+	virtual void AnalyzeEntitiesAndPut(const wchar_t * str, const wchar_t * end, std::wstring * out);
+
+	virtual void PutOpeningTagMark();
+	virtual void PutClosingTagMark();
+	virtual bool PutOpeningTag();
+	virtual void PutClosingTag(const Item & item);
+
+	virtual void ItemFound();
+	virtual void EntityFound(const wchar_t * str, const wchar_t * end);
+
+	/*
+		others
+	*/
+	void SetSomeDefaults();
+
+	Item & GetItem(size_t i);
+	Item & LastItem();
+
+	wchar_t ToLower(wchar_t c);
+	void    ToLower(std::wstring & str);
+
+	bool IsNameEqual(const wchar_t * name1,      const wchar_t * name2);
+	bool IsNameEqual(const wchar_t * name1,      const std::wstring & name2);
+	bool IsNameEqual(const std::wstring & name1, const wchar_t * name2);
+	bool IsNameEqual(const std::wstring & name1, const std::wstring & name2);
+
+	bool IsNameEqual(const wchar_t * name1,      const wchar_t * name2,      size_t len);
+	bool IsNameEqual(const wchar_t * name1,      const std::wstring & name2, size_t len);
+	bool IsNameEqual(const std::wstring & name1, const wchar_t * name2,      size_t len);
+	bool IsNameEqual(const std::wstring & name1, const std::wstring & name2, size_t len);
+
+	bool IsLastTag(const wchar_t * name);
+	bool IsLastTag(const std::wstring & name);
+	bool IsTagSafe(const wchar_t * tag);
+	bool IsTagSafe(const std::wstring & tag);
+
+	int  CheckOrphan(const wchar_t * str, const wchar_t * end, const std::wstring & orphan_str);
+	bool CheckOrphan(const wchar_t * str, const wchar_t * end, const std::vector<std::wstring> & orphans);
+	bool CheckOrphan(const wchar_t * str, const wchar_t * end);
+
+	bool IsWhite(int c);
+	void SkipWhite(std::wstring * out_string = nullptr);
+	void SkipWhiteLines(std::wstring * out_string = nullptr);
+	void SkipWhiteWithFirstNewLine();
+
+	int current_white_char_mode();
+
+	void ReadTextUntilClosingCommentary();
+	bool IsClosingTagForLastItem(bool put_closing_tag_as_well);
+	void ReadTextUntilClosingTag(bool put_closing_tag_as_well);
+	void SkipAndCheckClosingTag(std::wstring * remember_text = nullptr);
+
+	void PopStack();
+	bool PushStack();
+	void CheckStackPrintRest();
+	void AddForgottenTags();
+	void CheckClosingTags();
+	void ReadText(bool is_cdata);
+	bool PrintRest();
+	bool PrintOpeningItem();
+	void ReadItemName(std::wstring & name, bool clear_name = true);
+	void ReadItemAttrName();
+	void ReadItemAttrValueAdd(const std::wstring & str);
+	void ReadItemAttrValue(bool has_quote, wchar_t quote_char);
+	void ReadXMLItemAttrValue(bool has_quote, wchar_t quote_char);
+
+	bool ReadItemAttr();
+	void CheckItemLangAttr();
+	void PrintItemAttr();
+	void PutItemAttrToSpace();
+
+	void ReadItemClosing();
+	void ReadItemSpecial();
+	void ReadItemOpening();
+	bool ReadItem();
+	void ReadLoop();
+	void Read();
+
+	void CheckChar(wchar_t c);
+
+	bool PutNormalNonWhite(std::wstring & str, bool allow_put_new_line, bool allow_put_space, bool is_cdata);
+	void PutNormalWhite(bool & was_white_char, bool & was_new_line, std::wstring * result_text = nullptr);
+
+	void PutTabs(size_t len);
+	void PutNonBreakingSpace();
+	void CalcOrphansMaxLen(Orphans & orphans);
+
+	void AddItemToSpace();
+	void RemoveLastSpace(size_t index);
+	void AddSpaceToSpaceTree(const Space & space);
+
+	bool RemoveIfNeeded(size_t index);
+
+	bool check_escape_sequentions();
+	void read_xml_entity();
+	void read_char_from_entity_buffer();
+	int read_char() override;
+
+	Item empty;
+	Item * pstack;			// stack pointer
+	size_t stack_len;		// length of the stack
+	wchar_t * buffer;		// buffer used when printing
+	std::wstring * out_string;
+	Stream * out_stream;
+	Space * out_space;
+	Space text_space_tmp;
+
+	std::vector<int> white_char_mode_tab;
+
+	//bool last_new_line;
+	bool new_item_has_new_line_before;
+	bool new_item_has_white_char_before;
+	int white_mode;
+	bool is_first_item;
+	size_t wrap_line;		// insert a new line character into long lines
+	size_t tab_size;
+	OrphanMode orphan_mode;
+	std::wstring attr_name;
+	std::vector<std::wstring> attr_value;
+	std::wstring attr_value_temp;
+	std::wstring attr_value_lower;
+	bool attr_has_value;
+	std::wstring lang_code_lower;
+	size_t line_len;		//length of the current line (without first spaces which create the html tree)
+	bool safe_mode;			// skipping some unsafe tags
+	Orphans orphans_temp;
+	bool skip_tags;
+	bool skip_commentaries;
+	bool skip_entities;
+	bool analyze_entities;
+	std::wstring tmp_text;
+	std::wstring tmp_name;
+};
+
+
+
+}
+
+
+
+#endif
@@ -112,6 +112,7 @@ void FileLog::save_log(WTextStream * buffer)
 			if( log_stdout )
 			{
 				wide_stream_to_utf8(*buffer, std::cout);
+				std::cout.flush();
 			}

 			if( !log_file.empty() )
@@ -5,7 +5,7 @@
 */

 /*
- * Copyright (c) 2018-2021, Tomasz Sowa
+ * Copyright (c) 2018-2022, Tomasz Sowa
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -684,5 +684,51 @@ void Log::save_log()
 }


+
+Log & Log::put_multiline(const char * prefix, const char * msg)
+{
+	put_multiline_generic(prefix, msg);
+	return *this;
+}
+
+
+Log & Log::put_multiline(const wchar_t * prefix, const wchar_t * msg)
+{
+	put_multiline_generic(prefix, msg);
+	return *this;
+}
+
+
+Log & Log::put_multiline(const char * prefix, const std::string & msg)
+{
+	put_multiline_generic(prefix, msg.c_str());
+	return *this;
+}
+
+
+Log & Log::put_multiline(const wchar_t * prefix, const std::wstring & msg)
+{
+	put_multiline_generic(prefix, msg.c_str());
+	return *this;
+}
+
+
+Log & Log::put_multiline(const std::string & prefix, const std::string & msg)
+{
+	put_multiline_generic(prefix.c_str(), msg.c_str());
+	return *this;
+}
+
+
+Log & Log::put_multiline(const std::wstring & prefix, const std::wstring & msg)
+{
+	put_multiline_generic(prefix.c_str(), msg.c_str());
+	return *this;
+}
+
+
+
+
+
 } // namespace

@@ -5,7 +5,7 @@
 */

 /*
- * Copyright (c) 2018-2021, Tomasz Sowa
+ * Copyright (c) 2018-2022, Tomasz Sowa
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -156,6 +156,17 @@ public:
 	virtual Log & put_binary_blob(const char * blob, size_t blob_len);
 	virtual Log & put_binary_blob(const std::string & blob);

+	/*
+	 * put multiline message
+	 * first and last new characters are trimmed
+	 * at the beginning of each line a prefix is inserted
+	 */
+	virtual Log & put_multiline(const char * prefix, 		const char * msg);
+	virtual Log & put_multiline(const wchar_t * prefix, 	const wchar_t * msg);
+	virtual Log & put_multiline(const char * prefix, 		const std::string & msg);
+	virtual Log & put_multiline(const wchar_t * prefix, 	const std::wstring & msg);
+	virtual Log & put_multiline(const std::string & prefix, const std::string & msg);
+	virtual Log & put_multiline(const std::wstring & prefix, const std::wstring & msg);


 protected:
@@ -184,6 +195,10 @@ protected:
 	virtual void save_log();
 	virtual void save_log_and_clear();

+	template<typename CharType>
+	void put_multiline_generic(const CharType * prefix, const CharType * msg);
+
+
 };


@@ -221,11 +236,61 @@ Log & Log::log_string_generic(const StringType & value, size_t max_size)



+template<typename CharType>
+void Log::put_multiline_generic(const CharType * prefix, const CharType * msg)
+{
+	bool put_prefix = true;
+	bool was_new_line = false;
+	bool was_something_printed = false;
+
+	while( *msg )
+	{
+		if( static_cast<CharType>(*msg) == static_cast<CharType>('\n') )
+		{
+			was_new_line = true;
+			put_prefix = true;
+		}
+		else
+		{
+			if( was_new_line )
+			{
+				if( was_something_printed )
+				{
+					operator<<(logend);
+				}
+
+				was_new_line = false;
+			}
+
+			if( put_prefix )
+			{
+				operator<<(prefix);
+				put_prefix = false;
+			}
+
+			operator<<(*msg);
+			was_something_printed = true;
+		}
+
+		msg += 1;
+	}
+
+	if( was_something_printed )
+	{
+		operator<<(logend);
+	}
+}
+
+


 } // namespace


+
+
+
+
 #endif


@@ -5,7 +5,7 @@
 */

 /* 
- * Copyright (c) 2008-2021, Tomasz Sowa
+ * Copyright (c) 2008-2022, Tomasz Sowa
 * All rights reserved.
 * 
 * Redistribution and use in source and binary forms, with or without
@@ -188,6 +188,12 @@ Space::Space(const Space * space)
 	set(space);
 }

+Space::Space(const Date & date)
+{
+	initialize();
+	set(date);
+}
+

 void Space::clear()
 {
@@ -427,6 +433,13 @@ void Space::set(Space && space)
 	move_from(std::move(space));
 }

+void Space::set(const Date & date)
+{
+	initialize_value_wstring_if_needed();
+	WTextStream str;
+	date.SerializeISO(str);
+	str.to_str(value.value_wstring);
+}


 Space & Space::add(bool val)
@@ -528,6 +541,12 @@ Space & Space::add(Space && space)
 }


+Space & Space::add(const Date & date)
+{
+	return add_generic(date);
+}
+
+
 Space & Space::add_empty_space()
 {
 	return add_generic(static_cast<Space*>(nullptr));
@@ -643,6 +662,13 @@ Space & Space::add(const wchar_t * field, Space && space)
 	return *(insert_res.first->second);
 }

+
+Space & Space::add(const wchar_t * field, const Date & date)
+{
+	return add_generic(field, date);
+}
+
+
 Space & Space::add_empty_space(const wchar_t * field)
 {
 	return add_generic(field, static_cast<Space*>(nullptr));
@@ -746,6 +772,11 @@ Space & Space::add(const std::wstring & field, Space && space)
 	return add(field.c_str(), std::move(space));
 }

+Space & Space::add(const std::wstring & field, const Date & date)
+{
+	return add_generic(field, date);
+}
+
 Space & Space::add_empty_space(const std::wstring & field)
 {
 	return add_generic(field, static_cast<Space*>(nullptr));
@@ -827,8 +858,31 @@ bool Space::to_bool() const
 	if( type == type_bool )
 		return value.value_bool;

-	long long val = to_long_long();
-	return (val != 0) ? true : false;
+	if( type == type_long )
+		return value.value_long != 0;
+
+	if( type == type_float )
+		return value.value_float != 0.0f;
+
+	if( type == type_double )
+		return value.value_double != 0.0;
+
+	if( type == type_long_double )
+		return value.value_long_double != 0.0L;
+
+	if( type == type_string )
+		return !value.value_string.empty();
+
+	if( type == type_wstring )
+		return !value.value_wstring.empty();
+
+	if( type == type_table )
+		return !value.value_table.empty();
+
+	if( type == type_object )
+		return !value.value_object.empty();
+
+	return false;
 }

 short Space::to_short() const
@@ -1474,35 +1528,35 @@ void Space::serialize_to_space_to(std::wstring & str, bool pretty_print) const



-std::string Space::serialize_to_json_str() const
+std::string Space::serialize_to_json_str(bool pretty_print) const
 {
 	std::string str;
-	serialize_to_json_to(str);
+	serialize_to_json_to(str, pretty_print);
 	return str;
 }


-std::wstring Space::serialize_to_json_wstr() const
+std::wstring Space::serialize_to_json_wstr(bool pretty_print) const
 {
 	std::wstring str;
-	serialize_to_json_to(str);
+	serialize_to_json_to(str, pretty_print);
 	return str;
 }


-void Space::serialize_to_json_to(std::string & str) const
+void Space::serialize_to_json_to(std::string & str, bool pretty_print) const
 {
 	TextStream stream;
-	serialize_to_json_stream(stream);
+	serialize_to_json_stream(stream, pretty_print);

 	stream.to_str(str);
 }


-void Space::serialize_to_json_to(std::wstring & str) const
+void Space::serialize_to_json_to(std::wstring & str, bool pretty_print) const
 {
 	WTextStream stream;
-	serialize_to_json_stream(stream);
+	serialize_to_json_stream(stream, pretty_print);

 	stream.to_str(str);
 }
@@ -2523,6 +2577,18 @@ void Space::remove_value_table(bool only_clear)
 }


+Space * Space::get_table_item(size_t index)
+{
+	if( is_table() && index < table_size() )
+	{
+		return value.value_table[index];
+	}
+
+	return nullptr;
+}
+
+
+

 } // namespace

@@ -5,7 +5,7 @@
 */

 /* 
- * Copyright (c) 2010-2021, Tomasz Sowa
+ * Copyright (c) 2010-2022, Tomasz Sowa
 * All rights reserved.
 * 
 * Redistribution and use in source and binary forms, with or without
@@ -207,6 +207,7 @@ public:
 	Space(const std::string & str);
 	Space(const std::wstring & str);
 	Space(const Space * space);
+	Space(const Date & date);


 	void clear();
@@ -243,6 +244,7 @@ public:
 	void set(const Space & space);
 	void set(const Space * space);
 	void set(Space && space);
+	void set(const Date & date);


 	// add a value to the table, change to table if needed, return the reference to the new inserted item
@@ -265,6 +267,7 @@ public:
 	Space & add(const Space & space);
 	Space & add(const Space * space);
 	Space & add(Space && space);
+	Space & add(const Date & date);
 	Space & add_empty_space(); // IMPROVEME rename me to something better


@@ -289,6 +292,7 @@ public:
 	Space & add(const wchar_t * field, const Space & space);
 	Space & add(const wchar_t * field, const Space * space);
 	Space & add(const wchar_t * field, Space && space);
+	Space & add(const wchar_t * field, const Date & date);
 	Space & add_empty_space(const wchar_t * field); // IMPROVEME rename me to something better

 	Space & add(const std::wstring & field, bool val);
@@ -310,6 +314,7 @@ public:
 	Space & add(const std::wstring & field, const Space & space);
 	Space & add(const std::wstring & field, const Space * space);
 	Space & add(const std::wstring & field, Space && space);
+	Space & add(const std::wstring & field, const Date & date);
 	Space & add_empty_space(const std::wstring & field); // IMPROVEME rename me to something better


@@ -555,10 +560,10 @@ public:



-	std::string  serialize_to_json_str() const;
-	std::wstring serialize_to_json_wstr() const;
-	void serialize_to_json_to(std::string & str) const;
-	void serialize_to_json_to(std::wstring & str) const;
+	std::string  serialize_to_json_str(bool pretty_print = false) const;
+	std::wstring serialize_to_json_wstr(bool pretty_print = false) const;
+	void serialize_to_json_to(std::string & str, bool pretty_print = false) const;
+	void serialize_to_json_to(std::wstring & str, bool pretty_print = false) const;


 	template<typename StreamType>
@@ -630,7 +635,7 @@ public:
 	bool has_value(const wchar_t * field, const std::wstring & val) const;


-
+	Space * get_table_item(size_t index);


 protected:
@@ -5,7 +5,7 @@
 */

 /* 
- * Copyright (c) 2012-2021, Tomasz Sowa
+ * Copyright (c) 2012-2022, Tomasz Sowa
 * All rights reserved.
 * 
 * Redistribution and use in source and binary forms, with or without
@@ -55,6 +55,10 @@ SpaceParser::SpaceParser()
 	space_end		 = '}';
 	option_delimiter = ',';
 	input_as_utf8    = true;
+	object_items_limit          = 0;
+	table_items_limit          = 0;
+	all_items_limit          = 0;
+	nested_levels_limit          = 0;
 }


@@ -71,14 +75,78 @@ int SpaceParser::get_last_parsed_line()
 }


+int SpaceParser::get_last_parsed_column()
+{
+	return column;
+}
+
+
+void SpaceParser::set_object_items_limit(size_t val)
+{
+	this->object_items_limit = val;
+}
+
+
+void SpaceParser::set_table_items_limit(size_t val)
+{
+	this->table_items_limit = val;
+}
+
+
+void SpaceParser::set_all_items_limit(size_t val)
+{
+	this->all_items_limit = val;
+}
+
+
+void SpaceParser::set_nested_level_limit(size_t val)
+{
+	this->nested_levels_limit = val;
+}
+
+
+size_t SpaceParser::get_object_items_limit()
+{
+	return object_items_limit;
+}
+
+
+size_t SpaceParser::get_table_items_limit()
+{
+	return table_items_limit;
+}
+
+
+size_t SpaceParser::get_all_items_limit()
+{
+	return all_items_limit;
+}
+
+
+size_t SpaceParser::get_nested_level_limit()
+{
+	return nested_levels_limit;
+}
+
+
+
+void SpaceParser::prepare_to_parsing()
+{
+	clear_input_flags();
+
+	current_items_counter = 0;
+	current_nested_level = 0;
+}
+

 SpaceParser::Status SpaceParser::parse_json_file(const char * file_name, Space & out_space, bool clear_space)
 {
+	prepare_to_parsing();
+
 	reading_from_file = true;
 	parsing_space = false;
 	root_space = &out_space;

-	file.clear();
 	file.open(file_name, std::ios_base::binary | std::ios_base::in);
 	
 	if( file )
@@ -125,11 +193,12 @@ SpaceParser::Status SpaceParser::parse_json_file(const std::wstring & file_name,

 SpaceParser::Status SpaceParser::parse_space_file(const char * file_name, Space & out_space, bool clear_space)
 {
+	prepare_to_parsing();
+
 	reading_from_file = true;
 	parsing_space = true;
 	root_space = &out_space;

-	file.clear();
 	file.open(file_name, std::ios_base::binary | std::ios_base::in);

 	if( file )
@@ -174,10 +243,9 @@ SpaceParser::Status SpaceParser::parse_space_file(const std::wstring & file_name

 SpaceParser::Status SpaceParser::parse_json(const char * str, Space & out_space, bool clear_space)
 {
-	reading_from_file         = false;
-	reading_from_wchar_string = false;
+	prepare_to_parsing();
+
 	pchar_ascii               = str;
-	pchar_unicode             = 0;
 	parsing_space             = false;
 	root_space                = &out_space;

@@ -195,10 +263,9 @@ SpaceParser::Status SpaceParser::parse_json(const std::string & str, Space & out

 SpaceParser::Status SpaceParser::parse_json(const wchar_t * str, Space & out_space, bool clear_space)
 {
-	reading_from_file         = false;
-	reading_from_wchar_string = true;
+	prepare_to_parsing();
+
 	pchar_unicode             = str;
-	pchar_ascii               = 0;
 	parsing_space             = false;
 	root_space                = &out_space;

@@ -215,14 +282,50 @@ SpaceParser::Status SpaceParser::parse_json(const std::wstring & str, Space & ou



+SpaceParser::Status SpaceParser::parse_json(const pt::TextStream & str, Space & out_space, bool clear_space)
+{
+	prepare_to_parsing();
+
+	pt::TextStream::const_iterator start = str.begin();
+	pt::TextStream::const_iterator end = str.end();
+
+	text_stream_iterator      = &start;
+	text_stream_iterator_end  = &end;
+	parsing_space             = false;
+	root_space                = &out_space;
+
+	parse_root_space(clear_space);
+
+	return status;
+}
+
+
+SpaceParser::Status SpaceParser::parse_json(const pt::WTextStream & str, Space & out_space, bool clear_space)
+{
+	prepare_to_parsing();
+
+	pt::WTextStream::const_iterator start = str.begin();
+	pt::WTextStream::const_iterator end = str.end();
+
+	wtext_stream_iterator     = &start;
+	wtext_stream_iterator_end = &end;
+	parsing_space             = false;
+	root_space                = &out_space;
+
+	parse_root_space(clear_space);
+
+	return status;
+}
+
+
+


 SpaceParser::Status SpaceParser::parse_space(const char * str, Space & out_space, bool clear_space)
 {
-	reading_from_file         = false;
-	reading_from_wchar_string = false;
+	prepare_to_parsing();
+
 	pchar_ascii               = str;
-	pchar_unicode             = 0;
 	parsing_space             = true;
 	root_space                = &out_space;

@@ -240,10 +343,9 @@ SpaceParser::Status SpaceParser::parse_space(const std::string & str, Space & ou

 SpaceParser::Status SpaceParser::parse_space(const wchar_t * str, Space & out_space, bool clear_space)
 {
-	reading_from_file         = false;
-	reading_from_wchar_string = true;
+	prepare_to_parsing();
+
 	pchar_unicode             = str;
-	pchar_ascii               = 0;
 	parsing_space             = true;
 	root_space                = &out_space;

@@ -259,6 +361,41 @@ SpaceParser::Status SpaceParser::parse_space(const std::wstring & str, Space & o
 }


+SpaceParser::Status SpaceParser::parse_space(const pt::TextStream & str, Space & out_space, bool clear_space)
+{
+	prepare_to_parsing();
+
+	pt::TextStream::const_iterator start = str.begin();
+	pt::TextStream::const_iterator end = str.end();
+
+	text_stream_iterator      = &start;
+	text_stream_iterator_end  = &end;
+	parsing_space             = true;
+	root_space                = &out_space;
+
+	parse_root_space(clear_space);
+
+	return status;
+}
+
+
+SpaceParser::Status SpaceParser::parse_space(const pt::WTextStream & str, Space & out_space, bool clear_space)
+{
+	prepare_to_parsing();
+
+	pt::WTextStream::const_iterator start = str.begin();
+	pt::WTextStream::const_iterator end = str.end();
+
+	wtext_stream_iterator     = &start;
+	wtext_stream_iterator_end = &end;
+	parsing_space             = true;
+	root_space                = &out_space;
+
+	parse_root_space(clear_space);
+
+	return status;
+}
+



@@ -289,10 +426,13 @@ void SpaceParser::parse_root_space(bool clear_root_space)
 		parse(root_space, false, false);
 	}

+	if( status == ok )
+	{
 		skip_white();

 		if( lastc != -1 )
 			status = syntax_error;
+	}

 	token.clear();
 }
@@ -364,6 +504,8 @@ void SpaceParser::parse(Space * space, bool is_object_value, bool is_table_value

 void SpaceParser::parse_space(Space * space)
 {
+	if( nested_levels_limit == 0 || current_nested_level++ < nested_levels_limit )
+	{
 		/*
 		 * in Space format in global namespace the space start character is not required
 		 */
@@ -379,6 +521,8 @@ void SpaceParser::parse_space(Space * space)

 		parse_key_value_pairs(space);

+		if( status == ok )
+		{
 			if( need_space_start_character )
 			{
 				if( lastc == space_end )
@@ -390,6 +534,15 @@ void SpaceParser::parse_space(Space * space)
 					status = syntax_error;
 				}
 			}
+		}
+	}
+	else
+	{
+		status = limit_nested_level_exceeded;
+	}
+
+	if( current_nested_level > 0 )
+		current_nested_level -= 1;
 }


@@ -465,10 +618,14 @@ void SpaceParser::parse_floating_point_value(Space * space)

 void SpaceParser::parse_table(Space * space)
 {
+	if( nested_levels_limit == 0 || current_nested_level++ < nested_levels_limit )
+	{
 		read_char(); // inserting a next character after the table_start char to lastc
 		space->set_empty_table();
 		parse_values_list(space);

+		if( status == ok )
+		{
 			if( lastc == table_end )
 			{
 				read_char();
@@ -477,6 +634,15 @@ void SpaceParser::parse_table(Space * space)
 			{
 				status = syntax_error;
 			}
+		}
+	}
+	else
+	{
+		status = limit_nested_level_exceeded;
+	}
+
+	if( current_nested_level > 0 )
+		current_nested_level -= 1;
 }


@@ -526,10 +692,25 @@ void SpaceParser::parse_key_value_pairs(Space * space)
 				{
 					read_char(); // inserting a next character after the separator to lastc

+					if( object_items_limit == 0 || !space->is_object() || (space->object_size() < object_items_limit) )
+					{
 						Space & new_space = space->add(token.c_str(), new Space());
+
+						if( all_items_limit == 0 || current_items_counter++ < all_items_limit )
+						{
 							parse(&new_space, true, false);
 						}
 						else
+						{
+							status = limit_all_items_exceeded;
+						}
+					}
+					else
+					{
+						status = limit_object_items_exceeded;
+					}
+				}
+				else
 				{
 					status = syntax_error;
 				}
@@ -537,8 +718,12 @@ void SpaceParser::parse_key_value_pairs(Space * space)
 		}

 		is_first = false;
+
+		if( status == ok )
+		{
 			skip_white();
 		}
+	}
 }


@@ -577,14 +762,33 @@ void SpaceParser::parse_values_list(Space * space)
 		}

 		if( status == ok )
+		{
+			if( table_items_limit == 0 || !space->is_table() || (space->table_size() < table_items_limit) )
 			{
 				Space * new_space = &space->add(new Space());
+
+				if( all_items_limit == 0 || current_items_counter++ < all_items_limit )
+				{
 					parse(new_space, false, true);
 				}
+				else
+				{
+					status = limit_all_items_exceeded;
+				}
+			}
+			else
+			{
+				status = limit_table_items_exceeded;
+			}
+		}

 		is_first = false;
+
+		if( status == ok )
+		{
 			skip_white();
 		}
+	}
 }


@@ -891,122 +1095,6 @@ void SpaceParser::read_key()



-int SpaceParser::read_utf8_char()
-{
-int c;
-bool correct;
-
-	lastc = -1;
-
-	do
-	{
-		utf8_to_int(file, c, correct);
-
-		if( !file )
-			return lastc;
-	}
-	while( !correct );
-
-	lastc = c;
-
-	if( lastc == '\n' )
-		++line;
-	
-return lastc;
-}
-
-
-
-int SpaceParser::read_ascii_char()
-{
-	lastc = file.get();
-
-	if( lastc == '\n' )
-		++line;
-	
-return lastc;
-}
-
-
-
-
-int SpaceParser::read_char_from_wchar_string()
-{
-	if( *pchar_unicode == 0 )
-		lastc = -1;
-	else
-		lastc = *(pchar_unicode++);
-
-	if( lastc == '\n' )
-		++line;
-
-return lastc;
-}
-
-
-int SpaceParser::read_char_from_utf8_string()
-{
-int c;
-bool correct;
-
-	lastc = -1;
-
-	do
-	{
-		size_t len = utf8_to_int(pchar_ascii, c, correct);
-		pchar_ascii += len;
-	}
-	while( *pchar_ascii && !correct );
-
-	if( correct )
-		lastc = c;
-
-	if( lastc == '\n' )
-		++line;
-	
-return lastc;
-	
-}
-
-
-int SpaceParser::read_char_from_ascii_string()
-{
-	if( *pchar_ascii == 0 )
-		lastc = -1;
-	else
-		lastc = *(pchar_ascii++);
-
-	if( lastc == '\n' )
-		++line;
-
-return lastc;
-}
-
-
-int SpaceParser::read_char_no_escape()
-{
-	if( reading_from_file )
-	{
-		if( input_as_utf8 )
-			return read_utf8_char();
-		else
-			return read_ascii_char();
-	}
-	else
-	{
-		if( reading_from_wchar_string )
-		{
-			return read_char_from_wchar_string();
-		}
-		else
-		{
-			if( input_as_utf8 )
-				return read_char_from_utf8_string();
-			else
-				return read_char_from_ascii_string();
-		}
-	}
-}

 bool SpaceParser::is_hex_digit(wchar_t c)
 {
@@ -5,7 +5,7 @@
 */

 /* 
- * Copyright (c) 2012-2021, Tomasz Sowa
+ * Copyright (c) 2012-2022, Tomasz Sowa
 * All rights reserved.
 * 
 * Redistribution and use in source and binary forms, with or without
@@ -40,6 +40,7 @@

 #include <fstream>
 #include "space.h"
+#include "convert/baseparser.h"



@@ -49,7 +50,7 @@ namespace pt



-class SpaceParser
+class SpaceParser : public BaseParser
 {
 public:

@@ -62,8 +63,23 @@ public:

 	/*
 		status of parsing
+		ok - input stream has been parsed correctly
+		cant_open_file - I cannot open the file (returns only in a case when parsing a file)
+		syntax_error - a syntax error in the input stream
+		limit_object_items_exceeded - limit of object items has been exceeded
+		limit_table_items_exceeded - limit of table items has been exceeded
+		limit_all_items_exceeded - limit of items (key/value pairs of objects or table items) throughout the whole tree has been exceeded
+		limit_nested_level_exceeded - limit of nested objects/tables has been exceeded
 	*/
-	enum Status { ok, cant_open_file, syntax_error };
+	enum Status {
+		ok,
+		cant_open_file,
+		syntax_error,
+		limit_object_items_exceeded,
+		limit_table_items_exceeded,
+		limit_all_items_exceeded,
+		limit_nested_level_exceeded
+	};


 	/*
@@ -108,6 +124,8 @@ public:
 	Status parse_json(const wchar_t * str,      Space & out_space, bool clear_space = true);
 	Status parse_json(const std::wstring & str, Space & out_space, bool clear_space = true);

+	Status parse_json(const pt::TextStream & str, Space & out_space, bool clear_space = true);
+	Status parse_json(const pt::WTextStream & str, Space & out_space, bool clear_space = true);


 	Status parse_space(const char * str,         Space & out_space, bool clear_space = true);
@@ -115,6 +133,8 @@ public:
 	Status parse_space(const wchar_t * str,      Space & out_space, bool clear_space = true);
 	Status parse_space(const std::wstring & str, Space & out_space, bool clear_space = true);

+	Status parse_space(const pt::TextStream & str, Space & out_space, bool clear_space = true);
+	Status parse_space(const pt::WTextStream & str, Space & out_space, bool clear_space = true);


 	/*
@@ -143,6 +163,45 @@ public:
 	 *
 	 */
 	int get_last_parsed_line();
+	int get_last_parsed_column();
+
+
+	/*
+	 * get/set limit of object items in one object
+	 * default: 0 (disabled)
+	 */
+	void set_object_items_limit(size_t val);
+	size_t get_object_items_limit();
+
+
+	/*
+	 * get/set limit of items in one table
+	 * default: 0 (disabled)
+	 *
+	 */
+	void set_table_items_limit(size_t val);
+	size_t get_table_items_limit();
+
+
+	/*
+	 * get/set limit of all items (objects items and table items) througout the whole tree
+	 * default: 0 (disabled)
+	 *
+	 */
+	void set_all_items_limit(size_t val);
+	size_t get_all_items_limit();
+
+
+	/*
+	 * get/set nested level limit
+	 * limit of nested objects and tables
+	 * default: 0 (disabled)
+	 *
+	 */
+	void set_nested_level_limit(size_t val);
+	size_t get_nested_level_limit();
+
+


 private:
@@ -154,32 +213,6 @@ private:
 	Space * root_space;


-	/*
-		a number of a line in which there is a syntax_error
-	*/
-	int line;
-
-	/*
-		true if parse() method was called
-		false if ParseString() was called
-	*/
-	bool reading_from_file;
-
-
-	/*
-		pointers to the current character
-		if ParseString() is in used
-	*/
-	const char    * pchar_ascii;
-	const wchar_t * pchar_unicode;
-
-
-	/*
-		true if ParseString(wchar_t *) or ParseString(std::wstring&) was called
-	*/
-	bool reading_from_wchar_string;
-
-
 	/*
 		last read token
 	*/
@@ -222,13 +255,6 @@ private:
 	int option_delimiter;


-	/*
-		last read char
-		or -1 if the end
-	*/
-	int lastc;
-
-
 	/*
 		true if the lastc was escaped (with a backslash)
 		we have to know if the last sequence was \" or just "
@@ -236,22 +262,6 @@ private:
 	bool char_was_escaped;


-	/*
-		current file
-
-		may it would be better to make a pointer?
-		if we parse only a string then there is no sense to have such an object
-	*/
-	std::ifstream file;
-
-
-	/*
-		input file is in UTF-8
-		default: true
-	*/
-	bool input_as_utf8;
-
-
 	/*
 	 * if parsing_space is false then it means we are parsing JSON format
 	 *
@@ -259,6 +269,23 @@ private:
 	bool parsing_space;


+	/*
+	 * object_items_limit - limit of key/value pairs of one object
+	 * table_items_limit - limit of items of one table
+	 * all_items_limit - limit of all items of all objects and all tables
+	 * nested_levels_limit - limit of nested objects/tables
+	 */
+	size_t object_items_limit;
+	size_t table_items_limit;
+	size_t all_items_limit;
+	size_t nested_levels_limit;
+
+	/*
+	 * current_items_counter - how many items (key/value pairs of objects or table items) throughout the whole tree
+	 * current_nested_level - current nested level of objects and tables
+	 */
+	size_t current_items_counter;
+	size_t current_nested_level;

 	void parse_root_space(bool clear_root_space);
 	void parse(Space * space, bool is_object_value, bool is_table_value);
@@ -287,12 +314,6 @@ private:
 	void read_token_quoted(std::wstring & token);
 	void read_multiline_token_quoted(std::wstring & token);

-	int  read_utf8_char();
-	int  read_ascii_char();
-	int  read_char_from_wchar_string();
-	int  read_char_from_utf8_string();
-	int  read_char_from_ascii_string();
-	int  read_char_no_escape();
 	int  read_char();
 	bool is_white(int c);
 	void skip_line();
@@ -306,6 +327,8 @@ private:
 	void read_unicode_floating_format();
 	void read_unicode_code_point();

+	void prepare_to_parsing();
+
 };


@@ -5,7 +5,7 @@
 */

 /* 
- * Copyright (c) 2012-2021, Tomasz Sowa
+ * Copyright (c) 2012-2022, Tomasz Sowa
 * All rights reserved.
 * 
 * Redistribution and use in source and binary forms, with or without
@@ -46,6 +46,7 @@
 #include "membuffer/membuffer.h"
 #include "types.h"
 #include "utf8/utf8.h"
+#include "utf8/utf8_stream.h"

 // for snprintf
 #include <cstdio>
@@ -71,8 +72,67 @@ public:
 	typedef CharT char_type;

 	typedef MemBuffer<char_type, stack_size, heap_block_size> buffer_type;
-	typedef typename buffer_type::iterator iterator;
-	typedef typename buffer_type::const_iterator const_iterator;
+
+
+	class iterator
+	{
+	public:
+
+		typename buffer_type::iterator membuffer_iterator;
+
+		iterator();
+		iterator(const iterator & i);
+		iterator & operator=(const iterator & i);
+
+		iterator(const typename buffer_type::iterator & i);
+		iterator & operator=(const typename buffer_type::iterator & i);
+
+		bool operator==(const iterator & i) const;
+		bool operator!=(const iterator & i) const;
+
+		iterator & operator++(); 		// prefix  ++
+		iterator   operator++(int); 	// postfix ++
+
+		iterator & operator--(); 		// prefix  --
+		iterator   operator--(int); 	// postfix --
+
+		CharT & operator*();
+
+		wchar_t get_unicode_and_advance(const iterator & end);
+	};
+
+
+	class const_iterator
+	{
+	public:
+
+		typename buffer_type::const_iterator membuffer_const_iterator;
+
+		const_iterator();
+		const_iterator(const const_iterator & i);
+		const_iterator(const iterator & i);
+		const_iterator & operator=(const const_iterator & i);
+		const_iterator & operator=(const iterator & i);
+
+		const_iterator(const typename buffer_type::const_iterator & i);
+		const_iterator(const typename buffer_type::iterator & i);
+		const_iterator & operator=(const typename buffer_type::const_iterator & i);
+		const_iterator & operator=(const typename buffer_type::iterator & i);
+
+		bool operator==(const const_iterator & i) const;
+		bool operator!=(const const_iterator & i) const;
+
+		const_iterator & operator++(); 		// prefix  ++
+		const_iterator   operator++(int); 	// postfix ++
+
+		const_iterator & operator--(); 		// prefix  --
+		const_iterator   operator--(int); 	// postfix --
+
+		CharT operator*();
+
+		wchar_t get_unicode_and_advance(const const_iterator & end);
+
+	};


 	bool is_char_stream() const;
@@ -112,7 +172,7 @@ public:

 	TextStreamBase & operator<<(char);
 	TextStreamBase & operator<<(unsigned char);
-	TextStreamBase & operator<<(wchar_t);
+	TextStreamBase & operator<<(wchar_t); // no surrogate pairs are used
 	TextStreamBase & operator<<(bool);
 	TextStreamBase & operator<<(short);
 	TextStreamBase & operator<<(int);
@@ -173,6 +233,272 @@ TextStreamBase<char_type, stack_size, heap_block_size>::TextStreamBase()
 }


+
+template<typename char_type, size_t stack_size, size_t heap_block_size>
+TextStreamBase<char_type, stack_size, heap_block_size>::iterator::iterator()
+{
+}
+
+
+template<typename char_type, size_t stack_size, size_t heap_block_size>
+TextStreamBase<char_type, stack_size, heap_block_size>::iterator::iterator(const iterator & i) : membuffer_iterator(i)
+{
+}
+
+
+template<typename char_type, size_t stack_size, size_t heap_block_size>
+typename TextStreamBase<char_type, stack_size, heap_block_size>::iterator &
+TextStreamBase<char_type, stack_size, heap_block_size>::iterator::operator=(const iterator & i)
+{
+	membuffer_iterator = i;
+}
+
+
+template<typename char_type, size_t stack_size, size_t heap_block_size>
+TextStreamBase<char_type, stack_size, heap_block_size>::iterator::iterator(const typename buffer_type::iterator & i) : membuffer_iterator(i)
+{
+}
+
+
+template<typename char_type, size_t stack_size, size_t heap_block_size>
+typename TextStreamBase<char_type, stack_size, heap_block_size>::iterator &
+TextStreamBase<char_type, stack_size, heap_block_size>::iterator::operator=(const typename buffer_type::iterator & i)
+{
+	membuffer_iterator = i;
+}
+
+
+
+
+template<typename char_type, size_t stack_size, size_t heap_block_size>
+bool TextStreamBase<char_type, stack_size, heap_block_size>::iterator::operator==(const iterator & i) const
+{
+	return membuffer_iterator == i.membuffer_iterator;
+}
+
+template<typename char_type, size_t stack_size, size_t heap_block_size>
+bool TextStreamBase<char_type, stack_size, heap_block_size>::iterator::operator!=(const iterator & i) const
+{
+	return membuffer_iterator != i.membuffer_iterator;
+}
+
+template<typename char_type, size_t stack_size, size_t heap_block_size>
+typename TextStreamBase<char_type, stack_size, heap_block_size>::iterator &
+TextStreamBase<char_type, stack_size, heap_block_size>::iterator::operator++()
+{
+	++membuffer_iterator;
+	return *this;
+}
+
+template<typename char_type, size_t stack_size, size_t heap_block_size>
+typename TextStreamBase<char_type, stack_size, heap_block_size>::iterator
+TextStreamBase<char_type, stack_size, heap_block_size>::iterator::operator++(int)
+{
+	const_iterator old(*this);
+	membuffer_iterator++;
+	return old;
+}
+
+template<typename char_type, size_t stack_size, size_t heap_block_size>
+typename TextStreamBase<char_type, stack_size, heap_block_size>::iterator &
+TextStreamBase<char_type, stack_size, heap_block_size>::iterator::operator--()
+{
+	--membuffer_iterator;
+	return *this;
+}
+
+template<typename char_type, size_t stack_size, size_t heap_block_size>
+typename TextStreamBase<char_type, stack_size, heap_block_size>::iterator
+TextStreamBase<char_type, stack_size, heap_block_size>::iterator::operator--(int)
+{
+	const_iterator old(*this);
+	membuffer_iterator--;
+	return old;
+}
+
+template<typename char_type, size_t stack_size, size_t heap_block_size>
+char_type & TextStreamBase<char_type, stack_size, heap_block_size>::iterator::operator*()
+{
+	return *membuffer_iterator;
+}
+
+
+template<typename char_type, size_t stack_size, size_t heap_block_size>
+wchar_t TextStreamBase<char_type, stack_size, heap_block_size>::iterator::get_unicode_and_advance(const iterator & end)
+{
+	if( *this != end )
+	{
+		if constexpr (sizeof(char_type) == sizeof(char) )
+		{
+			int res;
+			bool correct;
+			utf8_to_int(*this, end, res, correct);
+
+			if( correct )
+				return static_cast<wchar_t>(res);
+			else
+				return static_cast<wchar_t>(0xFFFD); // U+FFFD "replacement character"
+		}
+		else
+		{
+			wchar_t c = operator*();
+			++membuffer_iterator;
+			return c;
+		}
+	}
+
+	return 0;
+}
+
+
+
+
+
+template<typename char_type, size_t stack_size, size_t heap_block_size>
+TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::const_iterator()
+{
+}
+
+template<typename char_type, size_t stack_size, size_t heap_block_size>
+TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::const_iterator(const const_iterator & i) : membuffer_const_iterator(i.membuffer_const_iterator)
+{
+}
+
+template<typename char_type, size_t stack_size, size_t heap_block_size>
+TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::const_iterator(const iterator & i) : membuffer_const_iterator(i.membuffer_iterator)
+{
+}
+
+template<typename char_type, size_t stack_size, size_t heap_block_size>
+typename TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator &
+TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::operator=(const const_iterator & i)
+{
+	membuffer_const_iterator = i.membuffer_const_iterator;
+	 return *this;
+}
+
+template<typename char_type, size_t stack_size, size_t heap_block_size>
+typename TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator &
+TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::operator=(const iterator & i)
+{
+	membuffer_const_iterator = i.membuffer_iterator;
+	 return *this;
+}
+
+
+
+template<typename char_type, size_t stack_size, size_t heap_block_size>
+TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::const_iterator(const typename buffer_type::const_iterator & i) : membuffer_const_iterator(i)
+{
+}
+
+template<typename char_type, size_t stack_size, size_t heap_block_size>
+TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::const_iterator(const typename buffer_type::iterator & i) : membuffer_const_iterator(i)
+{
+}
+
+template<typename char_type, size_t stack_size, size_t heap_block_size>
+typename TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator &
+TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::operator=(const typename buffer_type::const_iterator & i)
+{
+	 membuffer_const_iterator = i;
+	 return *this;
+}
+
+template<typename char_type, size_t stack_size, size_t heap_block_size>
+typename TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator &
+TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::operator=(const typename buffer_type::iterator & i)
+{
+	 membuffer_const_iterator = i;
+	 return *this;
+}
+
+
+
+template<typename char_type, size_t stack_size, size_t heap_block_size>
+bool TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::operator==(const const_iterator & i) const
+{
+	return membuffer_const_iterator == i.membuffer_const_iterator;
+}
+
+template<typename char_type, size_t stack_size, size_t heap_block_size>
+bool TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::operator!=(const const_iterator & i) const
+{
+	return membuffer_const_iterator != i.membuffer_const_iterator;
+}
+
+template<typename char_type, size_t stack_size, size_t heap_block_size>
+typename TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator &
+TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::operator++()
+{
+	++membuffer_const_iterator;
+	return *this;
+}
+
+template<typename char_type, size_t stack_size, size_t heap_block_size>
+typename TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator
+TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::operator++(int)
+{
+	const_iterator old(*this);
+	membuffer_const_iterator++;
+	return old;
+}
+
+template<typename char_type, size_t stack_size, size_t heap_block_size>
+typename TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator &
+TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::operator--()
+{
+	--membuffer_const_iterator;
+	return *this;
+}
+
+template<typename char_type, size_t stack_size, size_t heap_block_size>
+typename TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator
+TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::operator--(int)
+{
+	const_iterator old(*this);
+	membuffer_const_iterator--;
+	return old;
+}
+
+template<typename char_type, size_t stack_size, size_t heap_block_size>
+char_type TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::operator*()
+{
+	return *membuffer_const_iterator;
+}
+
+
+template<typename char_type, size_t stack_size, size_t heap_block_size>
+wchar_t TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::get_unicode_and_advance(const const_iterator & end)
+{
+	if( *this != end )
+	{
+		if constexpr (sizeof(char_type) == sizeof(char) )
+		{
+			int res;
+			bool correct;
+			pt::utf8_to_int(*this, end, res, correct);
+
+			if( correct )
+				return static_cast<wchar_t>(res);
+			else
+				return static_cast<wchar_t>(0xFFFD); // U+FFFD "replacement character"
+		}
+		else
+		{
+			wchar_t c = operator*();
+			++membuffer_const_iterator;
+			return c;
+		}
+	}
+
+	return 0;
+}
+
+
+
+
+
 template<typename char_type, size_t stack_size, size_t heap_block_size>
 bool TextStreamBase<char_type, stack_size, heap_block_size>::is_char_stream() const
 {
@@ -433,10 +759,14 @@ template<typename char_type, size_t stack_size, size_t heap_block_size>
 TextStreamBase<char_type, stack_size, heap_block_size> &
 TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(char v)
 {
-	// IMPROVEME
-	// if char_type == 1 then if v <= 127 then put that char but if (unsigned)v > 127 put replacement character
-	// if char_type > 1 then simply put that character
-	buffer.append(static_cast<char_type>(v));
+	if constexpr (sizeof(char_type) == sizeof(wchar_t) )
+	{
+		buffer.append(static_cast<char_type>(static_cast<unsigned char>(v)));
+	}
+	else
+	{
+		buffer.append(v);
+	}

 return *this;
 }
@@ -446,9 +776,6 @@ template<typename char_type, size_t stack_size, size_t heap_block_size>
 TextStreamBase<char_type, stack_size, heap_block_size> &
 TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(unsigned char v)
 {
-	// IMPROVEME
-	// if char_type == 1 then if v <= 127 then put that char but if v > 127 put replacement character
-	// if char_type > 1 then simply put that character
 	buffer.append(static_cast<char_type>(v));

 return *this;
@@ -459,8 +786,14 @@ template<typename char_type, size_t stack_size, size_t heap_block_size>
 TextStreamBase<char_type, stack_size, heap_block_size> &
 TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(wchar_t v)
 {
-	// IMPROVEME add utf8/wide conversion, if v is from surrogate pair we can skip it
-	buffer.append(static_cast<char_type>(v));
+	if constexpr (sizeof(char_type) == sizeof(wchar_t) )
+	{
+		buffer.append(v);
+	}
+	else
+	{
+		pt::int_to_utf8(static_cast<int>(v), *this);
+	}

 return *this;
 }
@@ -45,6 +45,12 @@
 namespace pt
 {

+/*
+ * public methods are also defined in utf8_stream.h
+ *
+ */
+
+
 /*!
 	UTF-8, a transformation format of ISO 10646
 	http://tools.ietf.org/html/rfc3629
@@ -213,9 +219,7 @@ template<typename StreamType>
 bool wide_stream_to_utf8(const Stream & stream, StreamType & utf8, bool clear = true, int mode = 1);

 template<typename StreamTypeIn, typename StreamTypeOut>
-void wide_stream_to_utf8(StreamTypeIn & buffer, StreamTypeOut & utf8, int mode = 1); // not tested, IMPROVE ME add clear parameter, mode parameter is not used
-
-
+void wide_stream_to_utf8(StreamTypeIn & buffer, StreamTypeOut & utf8, bool clear = true, int mode = 1); // not tested, IMPROVE ME mode parameter is not used


 } // namespace
@@ -0,0 +1,104 @@
+/*
+ * This file is a part of PikoTools
+ * and is distributed under the (new) BSD licence.
+ * Author: Tomasz Sowa <t.sowa@ttmath.org>
+ */
+
+/*
+ * Copyright (c) 2021-2022, Tomasz Sowa
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  * Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ *  * Neither the name Tomasz Sowa nor the names of contributors to this
+ *    project may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef headerfile_picotools_utf8_utf8_stream
+#define headerfile_picotools_utf8_utf8_stream
+
+#include "textstream/textstream.h"
+
+namespace pt
+{
+
+
+/*!
+	this function converts one UTF-8 character into one wide-character
+
+	input:
+		iterator_in - an TextStream iterator for reading from
+		iterator_end - an end iterator (can be returned by end() method from TextStream)
+
+	output:
+		res - an output character
+		correct - true if it is a correct character
+
+		the function returns how many characters have been used from the input stream
+*/
+template<typename StreamIteratorType>
+size_t utf8_to_int(
+		StreamIteratorType & iterator_in,
+		const StreamIteratorType & iterator_end,
+		int & res,
+		bool & correct)
+{
+size_t i, len;
+unsigned char uz;
+
+	res = 0;
+	correct = false;
+
+	if( iterator_in == iterator_end )
+		return 0;
+
+	uz = *iterator_in;
+	++iterator_in;
+
+	if( !private_namespace::utf8_to_int_first_octet(uz, len, res) )
+		return 1;
+
+	for(i=1 ; i<len ; ++i)
+	{
+		if( iterator_in == iterator_end )
+			return i;
+
+		uz = *iterator_in;
+		++iterator_in;
+
+		if( !private_namespace::utf8_to_int_add_next_octet(uz, res) )
+			return i;
+	}
+
+	if( utf8_check_range(res, len) )
+		correct = true;
+
+return len;
+}
+
+
+
+}
+
+#endif
@@ -47,6 +47,7 @@ namespace pt
 {


+
 template<typename StreamType>
 void int_to_wide(int c, StreamType & res)
 {
@@ -65,6 +66,7 @@ void int_to_wide(int c, StreamType & res)



+
 /*!
 	converting UTF-8 string to a TextStreamBase<wchar_t,...> stream
 	(need to be tested)
@@ -376,8 +378,11 @@ bool wide_stream_to_utf8(const Stream & stream, StreamType & utf8, bool clear, i

 // not tested
 template<typename StreamTypeIn, typename StreamTypeOut>
-void wide_stream_to_utf8(StreamTypeIn & buffer, StreamTypeOut & utf8, int mode)
+void wide_stream_to_utf8(StreamTypeIn & buffer, StreamTypeOut & utf8, bool clear, int mode)
 {
+	if( clear )
+		utf8.clear();
+
 	private_namespace::wide_to_utf8_generic(buffer, mode, [&utf8](const char * utf8_buffer, std::size_t buffer_len){
 		utf8.write(utf8_buffer, buffer_len);
 	});
@@ -385,8 +390,6 @@ void wide_stream_to_utf8(StreamTypeIn & buffer, StreamTypeOut & utf8, int mode)



-
-
 } // namespace pt

 #endif
@@ -1,6 +1,5 @@
 # DO NOT DELETE

-./main.o: convert.h mainoptionsparser.h csvparser.h
 ./convert.o: convert.h test.h ../src/convert/convert.h
 ./convert.o: ../src/convert/inttostr.h ../src/convert/patternreplacer.h
 ./convert.o: ../src/textstream/textstream.h ../src/textstream/stream.h
@@ -9,8 +8,18 @@
 ./convert.o: ../src/textstream/stream.h ../src/utf8/utf8_templates.h
 ./convert.o: ../src/utf8/utf8_private.h ../src/date/date.h
 ./convert.o: ../src/membuffer/membuffer.h ../src/textstream/types.h
-./convert.o: ../src/convert/strtoint.h ../src/convert/text.h
-./convert.o: ../src/convert/misc.h ../src/convert/double.h
+./convert.o: ../src/utf8/utf8_stream.h ../src/convert/strtoint.h
+./convert.o: ../src/convert/text.h ../src/convert/misc.h
+./convert.o: ../src/convert/double.h
+./csvparser.o: csvparser.h ../src/csv/csvparser.h ../src/space/space.h
+./csvparser.o: ../src/textstream/types.h ../src/convert/inttostr.h
+./csvparser.o: ../src/utf8/utf8.h ../src/textstream/stream.h
+./csvparser.o: ../src/utf8/utf8_templates.h ../src/utf8/utf8_private.h
+./csvparser.o: ../src/convert/baseparser.h ../src/textstream/textstream.h
+./csvparser.o: ../src/textstream/stream.h ../src/date/date.h
+./csvparser.o: ../src/membuffer/membuffer.h ../src/textstream/types.h
+./csvparser.o: ../src/utf8/utf8_stream.h test.h
+./main.o: convert.h mainoptionsparser.h csvparser.h
 ./test.o: test.h
 ./mainoptionsparser.o: mainoptionsparser.h test.h
 ./mainoptionsparser.o: ../src/mainoptions/mainoptionsparser.h
@@ -24,9 +33,6 @@
 ./mainoptionsparser.o: ../src/textstream/textstream.h
 ./mainoptionsparser.o: ../src/textstream/stream.h ../src/date/date.h
 ./mainoptionsparser.o: ../src/membuffer/membuffer.h ../src/textstream/types.h
-./mainoptionsparser.o: ../src/convert/strtoint.h ../src/convert/text.h
-./mainoptionsparser.o: ../src/convert/misc.h ../src/convert/double.h
-./csvparser.o: csvparser.h ../src/csv/csvparser.h ../src/space/space.h
-./csvparser.o: ../src/textstream/types.h ../src/convert/inttostr.h
-./csvparser.o: ../src/utf8/utf8.h ../src/textstream/stream.h
-./csvparser.o: ../src/utf8/utf8_templates.h ../src/utf8/utf8_private.h test.h
+./mainoptionsparser.o: ../src/utf8/utf8_stream.h ../src/convert/strtoint.h
+./mainoptionsparser.o: ../src/convert/text.h ../src/convert/misc.h
+./mainoptionsparser.o: ../src/convert/double.h