From 4f8ae6ce291d7bc535c39ef102e5bf85b351080e Mon Sep 17 00:00:00 2001
From: Tomasz Sowa <t.sowa@ttmath.org>
Date: Tue, 20 Jul 2021 20:48:01 +0200
Subject: [PATCH] some work in HTMLFilter - instead of directly using pchar
 pointer now we use pointers/streams from BaseParser - removed support for
 putting a white char in long words: removed BreakWord(size_t break_after_)
 method - changed the way how white characters are treated: added
 white_chars_mode(int mode) method   mode 0: WHITE_MODE_ORIGIN   mode 1:
 WHITE_MODE_SINGLE_LINE   mode 2: WHITE_MODE_TREE

---
 src/html/bbcodeparser.cpp |  59 +--
 src/html/htmlfilter.cpp   | 755 +++++++++++++++++++++-----------------
 src/html/htmlfilter.h     |  75 ++--
 3 files changed, 478 insertions(+), 411 deletions(-)

diff --git a/src/html/bbcodeparser.cpp b/src/html/bbcodeparser.cpp
index 0a60273..ec39de6 100644
--- a/src/html/bbcodeparser.cpp
+++ b/src/html/bbcodeparser.cpp
@@ -121,7 +121,7 @@ void BBCODEParser::PutNormalText(const wchar_t * str, const wchar_t * end)
 {
 int br_len;
 
-	if( *pchar == 0 )
+	if( lastc != -1 )
 	{
 		// trimming last white characters at end of the user text
 		while( str<end && (IsWhite(*(end-1)) || *(end-1)==10) )
@@ -415,15 +415,17 @@ void BBCODEParser::PutOpeningTagFromEzc()
 	(*out_string) += '[';
 	(*out_string) += LastItem().name;
 
-	const wchar_t * start = pchar;
 
-	while( *pchar && *pchar!=']' )
-		++pchar;
-
-	if( *pchar == ']' )
-		++pchar;
-
-	Put(start, pchar);
+// FIXME
+//	const wchar_t * start = pchar;
+//
+//	while( *pchar && *pchar!=']' )
+//		++pchar;
+//
+//	if( *pchar == ']' )
+//		++pchar;
+//
+//	Put(start, pchar);
 }
 
 
@@ -453,13 +455,13 @@ void BBCODEParser::TrimWhiteWithNewLines(const wchar_t * & start, const wchar_t
 
 void BBCODEParser::PutHtmlArgument2(const Tags * tag, bool has_u)
 {
-const wchar_t * start = pchar;
-const wchar_t * end   = pchar;
+//const wchar_t * start = pchar;
+//const wchar_t * end   = pchar;
 bool first_tag_removed = false;
 
-	while( *pchar != 0 )
+	while( lastc != -1 )
 	{
-		if( IsOpeningTagMark(*pchar) )
+		if( IsOpeningTagMark(lastc) )
 		{
 			if( IsClosingTagForLastItem() )
 			{
@@ -472,8 +474,8 @@ bool first_tag_removed = false;
 		}
 		else
 		{
-			pchar += 1;
-			end = pchar;
+			read_char();
+			//end = pchar;
 		}
 	}
 
@@ -482,12 +484,14 @@ bool first_tag_removed = false;
 
 	if( has_u )
 	{
-		TrimWhiteWithNewLines(start, end);
-		PrintEncode(start, end);
+// FIXME
+//		TrimWhiteWithNewLines(start, end);
+//		PrintEncode(start, end);
 	}
 	else
 	{
-		PrintEscape(start, end);
+		// FIXME
+//		PrintEscape(start, end);
 	}
 }
 
@@ -545,15 +549,16 @@ void BBCODEParser::PutOpeningTagFromBBCode(const Tags * tag)
 	PutOpeningTagMark();
 	Put(tag->html_tag);
 
-	const wchar_t * start = pchar;
-
-	while( *pchar && *pchar != ']' )
-		++pchar;
-
-	PutHtmlArgument(tag, start, pchar);
-
-	if( *pchar == ']' )
-		++pchar;
+// FIXME
+//	const wchar_t * start = pchar;
+//
+//	while( *pchar && *pchar != ']' )
+//		++pchar;
+//
+//	PutHtmlArgument(tag, start, pchar);
+//
+//	if( *pchar == ']' )
+//		++pchar;
 
 	if( !tag->inline_tag )
 	{
diff --git a/src/html/htmlfilter.cpp b/src/html/htmlfilter.cpp
index d103b9e..5274950 100644
--- a/src/html/htmlfilter.cpp
+++ b/src/html/htmlfilter.cpp
@@ -48,10 +48,13 @@ namespace pt
 void HTMLFilter::Item::Clear()
 {
 	name.clear();
-	type         = none;
-	porphans     = 0;
-	new_line     = false;
-	has_body_tag = false;
+	type          = none;
+	is_commentary = false;
+	porphans      = nullptr;
+	new_line      = false;
+	new_line_in_the_middle = false;
+	has_body_tag  = false;
+	tree_index    = 0;
 }
 
 
@@ -64,10 +67,15 @@ HTMLFilter::Item::Item()
 
 void HTMLFilter::Filter(const wchar_t * in, std::wstring & out)
 {
-	pchar         = in;
+	reading_from_file         = false;
+	reading_from_wchar_string = true;
+	pchar_unicode             = in;
+	pchar_ascii               = 0;
+
 	stack_len     = 0;
 	out_string    = &out;
 	last_new_line = false;
+	was_ending_commentary = false;
 	line_len      = 0;
 	out_string->clear();
 
@@ -108,9 +116,9 @@ void HTMLFilter::Filter(const std::wstring & in, std::wstring & out)
 
 void HTMLFilter::SetSomeDefaults()
 {
+	white_mode  = WHITE_MODE_ORIGIN;
+
 	tab_size    = 2;
-	trim_white  = false;
-	break_after = 0;
 	wrap_line   = 0;
 	orphan_mode = orphan_nbsp;
 	safe_mode   = false;
@@ -160,16 +168,15 @@ HTMLFilter::~HTMLFilter()
 
 
 
-
-void HTMLFilter::BreakWord(size_t break_after_)
+void HTMLFilter::white_chars_mode(int mode)
 {
-	break_after = break_after_;
-
-	if( break_after > 10000 )
-		break_after = 10000;
+	if( mode >= WHITE_MODE_ORIGIN && mode <= WHITE_MODE_TREE )
+		white_mode = mode;
 }
 
 
+
+
 void HTMLFilter::WrapLine(size_t wrap_line_)
 {
 	wrap_line = wrap_line_;
@@ -180,12 +187,6 @@ void HTMLFilter::WrapLine(size_t wrap_line_)
 
 
 
-void HTMLFilter::TrimWhite(bool trim)
-{
-	trim_white = trim;
-}
-
-
 void HTMLFilter::InsertTabs(size_t tabsize)
 {
 	tab_size = tabsize;
@@ -322,9 +323,10 @@ bool HTMLFilter::PushStack()
 
 	if( stack_len > 0 )
 	{
-		// 'porphans' and 'has_body_tag' attributes are propagated
+		// 'porphans', 'has_body_tag' and 'tree_index' attributes are propagated
 		pstack[stack_len].porphans     = pstack[stack_len-1].porphans;
 		pstack[stack_len].has_body_tag = pstack[stack_len-1].has_body_tag;
+		pstack[stack_len].tree_index   = pstack[stack_len-1].tree_index;
 	}
 
 	stack_len += 1;
@@ -356,15 +358,15 @@ return false;
 
 void HTMLFilter::SkipWhite()
 {
-	while( IsWhite(*pchar) )
-		++pchar;
+	while( IsWhite(lastc) )
+		read_char();
 }
 
 
 void HTMLFilter::SkipWhiteLines()
 {
-	while( *pchar==10 || IsWhite(*pchar) )
-		++pchar;
+	while( lastc==10 || IsWhite(lastc) )
+		read_char();
 }
 
 
@@ -372,29 +374,22 @@ void HTMLFilter::SkipWhiteWithFirstNewLine()
 {
 	SkipWhite();
 
-	if( *pchar == 10 )
+	if( lastc == 10 )
 	{
-		pchar += 1;
+		read_char();
 		SkipWhite();
 	}
 }
 
 
-void HTMLFilter::SkipWhiteLines(const wchar_t * & str, const wchar_t * end)
-{
-	while( str < end && (*str==10 || IsWhite(*str)) )
-		++str;
-}
-
-
 void HTMLFilter::CheckNewLine()
 {
-const wchar_t * start = pchar;
+	if( white_mode == WHITE_MODE_TREE )
+	{
+		SkipWhite();
+	}
 
-	SkipWhite();
-	last_new_line = (*pchar==10);
-
-	pchar = start;
+	last_new_line = (lastc==10);
 }
 
 
@@ -402,22 +397,23 @@ const wchar_t * start = pchar;
 
 bool HTMLFilter::IsClosingTagForLastItem()
 {
-	pchar += 1;
+	read_char();
 	SkipWhite();
 
-	if( *pchar == '/' )
+	if( lastc == '/' )
 	{
-		pchar += 1;
+		read_char();
 		SkipWhite();
 
-		if( IsNameEqual(pchar, LastItem().name, LastItem().name.size()) )
+		ReadItemName(tmp_name);
+
+		if( IsNameEqual(tmp_name, LastItem().name) )
 		{
-			pchar += LastItem().name.size();
 			SkipWhite();
 
-			if( IsClosingTagMark(*pchar) )
+			if( IsClosingTagMark(lastc) )
 			{
-				pchar += 1;
+				read_char();
 				return true;
 			}
 		}
@@ -432,17 +428,16 @@ return false;
 // used for such tags as: script, pre, textarea
 void HTMLFilter::PutEverythingUntilClosingTag(bool put_closing_tag_as_well)
 {
-const wchar_t * start = pchar;
-const wchar_t * end = pchar;
+	bool was_closing_tag = false;
+	tmp_text.clear();
 
-	while( *pchar != 0 )
+	while( lastc != -1 )
 	{
-		if( IsOpeningTagMark(*pchar) )
+		if( IsOpeningTagMark(lastc) )
 		{
 			if( IsClosingTagForLastItem() )
 			{
-				if( put_closing_tag_as_well )
-					end = pchar;
+				was_closing_tag = true;
 
 				PopStack();
 				CheckNewLine();
@@ -451,29 +446,37 @@ const wchar_t * end = pchar;
 		}
 		else
 		{
-			pchar += 1;
-			end = pchar;
+			tmp_text += lastc;
+			read_char();
 		}
 	}
 
-	Put(start, end);
+	Put(tmp_text);
+
+	if( was_closing_tag && put_closing_tag_as_well )
+	{
+		Put('<');
+		Put('/');
+		Put(tmp_name);
+		Put('>');
+	}
 }
 
 
 
 
-void HTMLFilter::SkipAndCheckClosingTag()
+void HTMLFilter::SkipAndCheckClosingTag(std::wstring * remember_text)
 {
 	bool is_quoted = false;
 	wchar_t quote_char = 0;
 
-	for( ; *pchar ; ++pchar )
+	while( lastc != -1 )
 	{
-		if( *pchar == '"' || *pchar == '\'' )
+		if( lastc == '"' || lastc == '\'' )
 		{
 			if( is_quoted )
 			{
-				if( *pchar == quote_char )
+				if( lastc == quote_char )
 				{
 					is_quoted = false;
 				}
@@ -481,20 +484,25 @@ void HTMLFilter::SkipAndCheckClosingTag()
 			else
 			{
 				is_quoted = true;
-				quote_char = *pchar;
+				quote_char = lastc;
 			}
 		}
 		else
-		if( !is_quoted && LastItem().type == Item::opening && IsClosingXmlSimpleTagMark(*pchar) ) // closing xml tag: default '/'
+		if( !is_quoted && LastItem().type == Item::opening && IsClosingXmlSimpleTagMark(lastc) ) // closing xml tag: default '/'
 		{
 			LastItem().type = Item::simple;
 		}
 		else
-		if( !is_quoted && IsClosingTagMark(*pchar) )
+		if( !is_quoted && IsClosingTagMark(lastc) )
 		{
-			++pchar;
+			read_char();
 			break;
 		}
+
+		if( remember_text )
+			(*remember_text) += lastc;
+
+		read_char();
 	}
 }
 
@@ -505,7 +513,7 @@ bool HTMLFilter::IsValidCharForName(int c)
 	if( (c>='a' && c<='z') ||
 		(c>='A' && c<='Z') ||
 		(c>='0' && c<='9') ||
-		c=='-' || c=='!' || c==':') // : for namespace character
+		c=='-' || c=='!' || c==':' || c=='-') // : is for a namespace character, - is for a commentary
 		return true;
 
 return false;
@@ -536,16 +544,28 @@ return false;
 }
 
 
-void HTMLFilter::ReadItemName()
+void HTMLFilter::ReadItemName(std::wstring & name, bool clear_name)
 {
 size_t i;
 
-	for( i=0 ; IsValidCharForName(*pchar) ; ++i )
+	if( clear_name )
+		name.clear();
+
+	for(i=0 ; IsValidCharForName(lastc) ; ++i)
 	{
 		if( i < WINIX_HTMLFILTER_ITEM_NAME_MAXLEN )
-			LastItem().name += *pchar;
+		{
+			name += lastc;
 
-		++pchar;
+			if( LastItem().type == Item::special && name == L"!--" )
+			{
+				LastItem().is_commentary = true;
+				read_char();
+				break;
+			}
+		}
+
+		read_char();
 	}
 }
 
@@ -557,71 +577,69 @@ size_t i;
 
 	attr_name.clear();
 
-	for( i=0 ; *pchar && IsValidCharForAttrName(*pchar) ; ++i )
+	for( i=0 ; lastc != -1 && IsValidCharForAttrName(lastc) ; ++i )
 	{
 		if( i < WINIX_HTMLFILTER_ATTR_NAME_MAXLEN )
-			attr_name += *pchar;
+			attr_name += lastc;
 
-		++pchar;
+		read_char();
 	}
 }
 
 
 
-void HTMLFilter::ReadItemAttrValueAdd(const wchar_t * value_start, const wchar_t * value_end)
+void HTMLFilter::ReadItemAttrValueAdd(const std::wstring & str)
 {
-	attr_value.push_back(std::wstring());
-
 	if( analyze_entities )
 	{
-		AnalyzeEntitiesAndPut(value_start, value_end, &attr_value.back());
+		attr_value.push_back(std::wstring());
+		AnalyzeEntitiesAndPut(str.c_str(), str.c_str() + str.size(), &attr_value.back());
 	}
 	else
 	{
-		attr_value.back().append(value_start, value_end);
+		attr_value.push_back(str);
 	}
 }
 
 
 void HTMLFilter::ReadItemAttrValue(bool has_quote, wchar_t quote_char)
 {
-size_t i;
-
 	attr_value.clear();
-	const wchar_t * value_start = pchar;
-	size_t value_len = 0; // how many non white characters
+	tmp_text.clear();
 
-	for(i=0 ; *pchar ; ++i, ++pchar )
+	while( lastc != -1 )
 	{
 		if( has_quote )
 		{
-			if( *pchar == quote_char )
+			if( lastc == quote_char )
 				break;
 		}
 		else
 		{
-			if( IsClosingTagMark(*pchar) || *pchar == 10 || IsWhite(*pchar) )
+			if( IsClosingTagMark(lastc) || lastc == 10 || IsWhite(lastc) )
 				break;
 		}
 
-		if( *pchar==10 || IsWhite(*pchar) )
+		if( lastc==10 || IsWhite(lastc) )
 		{
-			if( value_len > 0 && value_len <= WINIX_HTMLFILTER_ATTR_VALUE_MAXLEN )
-				ReadItemAttrValueAdd(value_start, pchar);
+			if( tmp_text.size() > 0 && tmp_text.size() <= WINIX_HTMLFILTER_ATTR_VALUE_MAXLEN )
+				ReadItemAttrValueAdd(tmp_text);
 
-			value_len = 0;
+			tmp_text.clear();
 		}
 		else
 		{
-			if( value_len == 0 )
-				value_start = pchar;
+			if( tmp_text.size() > WINIX_HTMLFILTER_ATTR_VALUE_MAXLEN )
+				tmp_text.clear();
 
-			value_len += 1;
+			tmp_text += lastc;
 		}
+
+		read_char();
 	}
 
-	if( value_len > 0 && value_len <= WINIX_HTMLFILTER_ATTR_VALUE_MAXLEN )
-		ReadItemAttrValueAdd(value_start, pchar);
+	if( tmp_text.size() > 0 && tmp_text.size() <= WINIX_HTMLFILTER_ATTR_VALUE_MAXLEN )
+		ReadItemAttrValueAdd(tmp_text);
 }
 
 
@@ -641,15 +659,6 @@ void HTMLFilter::Put(wchar_t c)
 }
 
 
-void HTMLFilter::Put(const wchar_t * str)
-{
-	out_string->append(str);
-
-	for( ; *str ; ++str)
-		CheckChar(*str);
-}
-
-
 void HTMLFilter::Put(const wchar_t * str, const wchar_t * end)
 {
 	if( str >= end )
@@ -663,12 +672,16 @@ void HTMLFilter::Put(const wchar_t * str, const wchar_t * end)
 }
 
 
+
 void HTMLFilter::Put(const std::wstring & str)
 {
-	out_string->append(str);
+	if( !str.empty() )
+	{
+		out_string->append(str);
 
-	for(size_t i=0 ; i<str.size() ; ++i)
-		CheckChar(str[i]);
+		for(size_t i=0 ; i < str.size() ; ++i)
+			CheckChar(str[i]);
+	}
 }
 
 
@@ -808,118 +821,75 @@ return CheckOrphan(str, end, LastItem().porphans->tab);
 }
 
 
-// if there is a semicolon nearby then we break the line after it
-// (useful in html entities)
-// !! dodac sprawdzanie czy dlugosc stringu nie jest mala tez (end-str)
-// i wtedy tez nie dodajemy zadnego znaku
-bool HTMLFilter::HasEntityEndAround(const wchar_t * str, const wchar_t * end)
+void HTMLFilter::PutNormalNonWhite(std::wstring & str, bool allow_put_new_line, bool allow_put_space)
 {
-size_t i, epsilon = 8;// !! IMPROVE ME put as a constant
-
-	for(i=0 ; str < end && i<epsilon ; ++i, ++str)
-		if( IsEndingEntityMark(*str) )
-			return true;
-
-return false;
-}
-
-
-void HTMLFilter::CheckLineWrap()
-{
-	if( wrap_line != 0 && LastItem().has_body_tag && line_len > wrap_line )
+	while( lastc != -1 && lastc != 10 && !IsWhite(lastc) && !IsOpeningTagMark(lastc) )
 	{
-		Put(10);
-		PutTabs(stack_len);
-	}
-}
+		str += lastc;
+		read_char();
 
-
-void HTMLFilter::PutNormalNonWhite(const wchar_t * & str, const wchar_t * end)
-{
-const wchar_t * word = str;
-size_t non_whites = 0;
-bool was_entity_end = false;
-
-	for( ; str < end && *str!=10 && !IsWhite(*str) ; ++str, ++non_whites )
-	{
-		if( break_after != 0 && non_whites >= break_after && (was_entity_end || !HasEntityEndAround(str, end)) )
+		if( IsEndingCommentaryTagMarkAtEndOfString(str) )
 		{
-			Put(word, str);
-			word           = str;
-			non_whites     = 0;
-			Put(' ');
-			CheckLineWrap();
+			str.erase(str.size() - 3); // IMPROVEME define a function or what
+			was_ending_commentary = true;
+			break;
 		}
+	}
 
-		was_entity_end = (IsEndingEntityMark(*str));
+	if( !str.empty() )
+	{
+		if( allow_put_new_line )
+		{
+			Put(10);
+			PutTabs(LastItem().tree_index + 1);
+		}
+		else
+		if( allow_put_space )
+		{
+			Put(' ');
+		}
 	}
 
 	if( analyze_entities )
-		AnalyzeEntitiesAndPut(word, str, nullptr);
+		AnalyzeEntitiesAndPut(str.c_str(), str.c_str() + str.size(), nullptr);
 	else
-		Put(word, str);
+		Put(str);
 }
 
 
-void HTMLFilter::PutNormalWhite(const wchar_t * & str, const wchar_t * end)
+bool HTMLFilter::PutNormalWhite()
 {
-	if( str < end )
+	bool was_white_char = false;
+	bool was_new_line = false;
+
+	while( lastc == 10 || IsWhite(lastc) )
 	{
-		if( trim_white )
-		{
-			Put(' ');
-			SkipWhiteLines(str, end);
-		}
-		else
-		{
-			while( str < end && (*str==10 || IsWhite(*str)) )
-			{
-				Put(*str);
+		was_white_char = true; // anyone white char even new line
 
-				if( *str == 10 )
-					PutTabs(stack_len);
+		if( lastc == 10 )
+			was_new_line = true;
 
-				++str;
-			}
+		if( white_mode == WHITE_MODE_ORIGIN )
+		{
+			Put(lastc);
 		}
+
+		read_char();
 	}
-}
 
-
-void HTMLFilter::PutNormalText(const wchar_t * str, const wchar_t * end)
-{
-const wchar_t * word, * white;
-
-	if( str < end )
-		CheckLineWrap();
-
-	while( str < end )
+	if( white_mode == WHITE_MODE_SINGLE_LINE && was_white_char )
 	{
-		word = str;
-		PutNormalNonWhite(str, end);
-
-		if( CheckOrphan(word, str) )
-		{
-			white = str;
-			SkipWhiteLines(str, end);
-
-			if( white < str )
-				PutNonBreakingSpace();
-		}
-		else
-		{
-			PutNormalWhite(str, end);
-
-			if( str < end ) // !! lub moze podobnie jak jest na gorze tutaj? juz nie mam sily myslec :(
-				CheckLineWrap();
-		}
-
-		// for safety (if str was not incremented then there is an infinite loop)
-		if( word == str )
-			break;
+		Put(' ');
 	}
-}
 
+	if( white_mode == WHITE_MODE_TREE && was_new_line )
+	{
+		// in WHITE_MODE_TREE white characters are written at the beginning of a <tag> or text
+	}
+
+	last_new_line = was_new_line;
+	return was_white_char;
+}
 
 
 
@@ -985,6 +955,12 @@ bool HTMLFilter::PutOpeningTag()
 		return false;
 	}
 
+	if( white_mode == WHITE_MODE_TREE && last_new_line )
+	{
+		Put(10);
+		PutTabs(LastItem().tree_index);
+	}
+
 	PutOpeningTagMark();
 	Put(LastItem().name);
 
@@ -993,14 +969,18 @@ return true;
 
 
 
-void HTMLFilter::PutClosingTag(const wchar_t * tag)
+void HTMLFilter::PutClosingTag(const Item & item)
 {
-	if( skip_tags || !IsTagSafe(tag) )
+	if( skip_tags || !IsTagSafe(item.name) )
 		return;
 
-	PutOpeningTagMark();
-	Put('/');
-	Put(tag);
+	if( !item.is_commentary )
+	{
+		PutOpeningTagMark();
+		Put('/');
+	}
+
+	Put(item.name);
 	PutClosingTagMark();
 }
 
@@ -1011,7 +991,7 @@ void HTMLFilter::PutTabs(size_t len)
 	if( len > 30 )
 		len = 30;
 
-	for(size_t i=0 ; i < (len*tab_size) ; ++i)
+	for(int i=0 ; i < (len*tab_size) ; ++i)
 		(*out_string) += ' '; // we do not add them to 'line_len'
 }
 
@@ -1031,12 +1011,12 @@ void HTMLFilter::PutNonBreakingSpace()
 
 
 
-void HTMLFilter::PutNewLine()
-{
-	buffer[0] = 10;
-	Put(buffer, buffer+1);
-	line_len = 0;
-}
+//void HTMLFilter::PutNewLine()
+//{
+//	buffer[0] = 10; // CHECKME for what purpose is this buffer?
+//	Put(10);
+//	line_len = 0;
+//}
 
 
 // we assume the size of the opening mark to be one
@@ -1053,6 +1033,28 @@ bool HTMLFilter::IsClosingTagMark(wchar_t c)
 }
 
 
+// the slash in the closing tag mark e.g. </p>
+bool HTMLFilter::IsClosingTagIndicator(wchar_t c)
+{
+	return (c == '/');
+}
+
+
+// the slash in the closing tag mark e.g. </p>
+bool HTMLFilter::IsSpecialTagIndicator(wchar_t c)
+{
+	return (c == '!');
+}
+
+
+// the '=' operator e.g. class="value"
+bool HTMLFilter::IsAttributeAssignmentMark(wchar_t c)
+{
+	return (c == '=');
+}
+
+
+
 // the slash at the end <img src=".." /> (without '>' character)
 // we assume the size of the mark to be one
 bool HTMLFilter::IsClosingXmlSimpleTagMark(wchar_t c)
@@ -1061,18 +1063,33 @@ bool HTMLFilter::IsClosingXmlSimpleTagMark(wchar_t c)
 }
 
 
-bool HTMLFilter::IsOpeningCommentaryTagMark(const wchar_t * str)
+//bool HTMLFilter::IsOpeningCommentaryTagMark(const wchar_t * str)
+//{
+//static wchar_t comm_open[] = L"<!--";
+//size_t comm_open_len = sizeof(comm_open) / sizeof(wchar_t) - 1;
+//
+//	//return IsNameEqual(pchar, comm_open, comm_open_len);
+//	return false;
+//}
+//
+//
+//size_t HTMLFilter::OpeningCommentaryTagMarkSize()
+//{
+//	return 4; // size of "<!--"
+//}
+
+
+bool HTMLFilter::IsEndingCommentaryTagMarkAtEndOfString(const std::wstring & str)
 {
-static wchar_t comm_open[] = L"<!--";
-size_t comm_open_len = sizeof(comm_open) / sizeof(wchar_t) - 1;
+	static wchar_t comm_end[] = L"-->";
+	size_t comm_end_len = sizeof(comm_end) / sizeof(wchar_t) - 1;
 
-	return IsNameEqual(pchar, comm_open, comm_open_len);
-}
+	if( str.size() >= comm_end_len )
+	{
+		return IsNameEqual(str.c_str() + str.size() - comm_end_len, comm_end);
+	}
 
-
-size_t HTMLFilter::OpeningCommentaryTagMarkSize()
-{
-	return 4; // size of "<!--"
+	return false;
 }
 
 
@@ -1092,9 +1109,9 @@ bool HTMLFilter::IsEndingEntityMark(wchar_t c)
 // skipping the commentary tag if exists
 bool HTMLFilter::SkipCommentaryTagIfExists()
 {
-static wchar_t comm_close[] = L"-->";
+wchar_t comm_close[] = L"-->";
 size_t comm_close_len = sizeof(comm_close) / sizeof(wchar_t) - 1;
-
+/*
 	if( !IsOpeningCommentaryTagMark(pchar) )
 		return false;
 
@@ -1108,86 +1125,81 @@ size_t comm_close_len = sizeof(comm_close) / sizeof(wchar_t) - 1;
 		pchar += comm_close_len;
 
 	CheckNewLine();
+*/
+
 
 return true;
 }
 
 
-void HTMLFilter::ReadNormalTextSkipWhite(const wchar_t * & start, const wchar_t * & last_non_white)
-{
-	if( trim_white )
-	{
-		// skipping all white chars (with new lines)
-		// but with remembering the last non white character
-		for( ; *pchar==10 || IsWhite(*pchar) ; ++pchar)
-			if( *pchar == 10 )
-				last_non_white = pchar;
-	}
-	else
-	{
-		// skipping first white chars with only one line between them
-		SkipWhite();
-		last_non_white = pchar;
-
-		if( *pchar == 10 )
-		{
-			++pchar;
-			SkipWhite();
-		}
-	}
-
-	start = pchar;
-
-	// exception for the commentary tag
-	if( IsOpeningCommentaryTagMark(pchar) || !IsOpeningTagMark(*pchar) )
-	{
-		PutNewLine();
-		PutTabs(stack_len);
-	}
-}
-
-
-
 // reading text between html tags
 void HTMLFilter::ReadNormalText()
 {
-const wchar_t * start = pchar;
-const wchar_t * last_non_white = pchar;
+	bool was_non_white_text = false;
 
-	if( last_new_line )
-		ReadNormalTextSkipWhite(start, last_non_white);
+	was_ending_commentary = false;
 
+	bool allow_put_new_line = false;
+	bool allow_put_space = false;
 
-	while( *pchar != 0 )
+	if( white_mode == WHITE_MODE_TREE )
 	{
-		const wchar_t * commentary_start = pchar;
-
-		if( SkipCommentaryTagIfExists() )
+		if( LastItem().new_line || (wrap_line != 0 && LastItem().has_body_tag && line_len >= wrap_line) )
 		{
-			last_non_white = pchar - 1; // pointing at the last '>' from a commentary
-			PutNormalText(start, commentary_start);
-
-			if( !skip_commentaries )
-			{
-				PutNormalText(commentary_start, pchar);
-			}
-
-			start = pchar;
-		}
-		else
-		{
-			if( IsOpeningTagMark(*pchar) )
-				break;
-
-			if( !IsWhite(*pchar) )
-				last_non_white = pchar;
-
-			pchar += 1;
+			allow_put_new_line = true;
 		}
 	}
 
-	last_new_line = (*last_non_white == 10);
-	PutNormalText(start, pchar);
+	while( lastc != -1 && !IsOpeningTagMark(lastc) )
+	{
+		tmp_text.clear();
+		PutNormalNonWhite(tmp_text, allow_put_new_line, allow_put_space);
+
+		if( !tmp_text.empty() )
+		{
+			allow_put_new_line = false;
+			allow_put_space = false;
+			was_non_white_text = true;
+		}
+
+		if( CheckOrphan(tmp_text.c_str(), tmp_text.c_str() + tmp_text.size()) )
+		{
+			if( lastc == 10 || IsWhite(lastc) )
+			{
+				SkipWhiteLines();
+				PutNonBreakingSpace();
+			}
+		}
+		else
+		{
+			if( was_ending_commentary )
+				break;
+
+			if( PutNormalWhite() && white_mode == WHITE_MODE_TREE )
+			{
+				if( last_new_line )
+				{
+					allow_put_new_line = true;
+					allow_put_space = false;
+
+					LastItem().new_line_in_the_middle = true;
+
+					if( !was_non_white_text )
+						LastItem().new_line = true;
+				}
+				else
+				{
+					allow_put_new_line = false;
+					allow_put_space = true;
+				}
+
+				if( wrap_line != 0 && LastItem().has_body_tag && line_len >= wrap_line )
+				{
+					allow_put_new_line = true;
+				}
+			}
+		}
+	}
 }
 
 
@@ -1197,15 +1209,7 @@ bool HTMLFilter::PrintOpeningItem()
 	if( skip_tags || IsNameEqual(no_filter_tag, LastItem().name) )
 		return true;
 
-	if( last_new_line )
-	{
-		PutNewLine();
-
-		if( stack_len > 1 )
-			PutTabs(stack_len-1);
-	}
-
-return PutOpeningTag();
+	return PutOpeningTag();
 }
 
 
@@ -1226,34 +1230,34 @@ bool HTMLFilter::ReadItemAttr()
 
 	SkipWhiteLines();
 
-	if( *pchar != '=' )
+	if( !IsAttributeAssignmentMark(lastc) ) // '='
 		return true;
 	
 	attr_has_value = true;
-	pchar += 1;				// skipping '='
+	read_char();				// skipping '='
 	SkipWhiteLines();
 
-	bool has_quote = (*pchar == '\"' || *pchar == '\'');
-	wchar_t quote_char = *pchar;
+	bool has_quote = (lastc == '\"' || lastc == '\'');
+	wchar_t quote_char = lastc;
 
 	if( has_quote )
-		pchar += 1;			// skipping the first quote mark
+		read_char();			// skipping the first quote mark
 
 	ReadItemAttrValue(has_quote, quote_char);
 
-	if( has_quote && *pchar == quote_char )
-		pchar += 1;			// skipping the last quote mark
+	if( has_quote && lastc == quote_char )
+		read_char();			// skipping the last quote mark
 
 return true;
 }
 
 
 
-bool HTMLFilter::CheckItemAttr()
+void HTMLFilter::CheckItemLangAttr()
 {
 	if( attr_has_value && IsNameEqual(L"lang", attr_name) )
 	{
-		LastItem().porphans = 0;
+		LastItem().porphans = nullptr;
 
 		if( !attr_value.empty() )
 		{
@@ -1267,8 +1271,6 @@ bool HTMLFilter::CheckItemAttr()
 				LastItem().porphans = &i->second;
 		}
 	}
-
-return true;
 }
 
 
@@ -1301,9 +1303,9 @@ size_t i;
 
 void HTMLFilter::ReadItemClosing()
 {
-	pchar += 1; // skipping '/'
+	read_char(); // skipping '/'
 	SkipWhiteLines();
-	ReadItemName();
+	ReadItemName(LastItem().name);
 	LastItem().type = Item::closing;
 	SkipAndCheckClosingTag();
 
@@ -1316,32 +1318,55 @@ void HTMLFilter::ReadItemSpecial()
 	LastItem().type = Item::special;
 
 	if( !skip_tags )
+	{
+		if( white_mode == WHITE_MODE_TREE && last_new_line )
+		{
+			Put(10);
+			PutTabs(LastItem().tree_index);
+		}
+
 		PutOpeningTagMark();
+	}
 
-	const wchar_t * start = pchar;
-	pchar += 1; // skipping '!'
+	read_char(); // skipping '!'
+	LastItem().name = '!';
+	ReadItemName(LastItem().name, false);
 
-	ReadItemName();
-	SkipAndCheckClosingTag();
-
-	if( !skip_tags && pchar > start )
-		Put(start, pchar);
-
-	// closing tag mark is printed directly from the source
+	if( skip_tags )
+	{
+		SkipAndCheckClosingTag();
+	}
+	else
+	{
+		if( LastItem().is_commentary )
+		{
+			Put(LastItem().name);
+		}
+		else
+		{
+			tmp_text.clear();
+			SkipWhiteLines();
+			SkipAndCheckClosingTag(&tmp_text);
+			Put(LastItem().name);
+			Put(' ');
+			Put(tmp_text);
+			Put('>');
+		}
+	}
 }
 
 
 void HTMLFilter::ReadItemOpening()
 {
 	LastItem().type = Item::opening;
-	ReadItemName();
+	ReadItemName(LastItem().name);
 	
 	if( PrintOpeningItem() )
 	{
 		while( ReadItemAttr() )
 		{
-			if( CheckItemAttr() )
-				PrintItemAttr();
+			CheckItemLangAttr();
+			PrintItemAttr();
 		}
 
 		SkipAndCheckClosingTag(); // here LastItem().type can be changed to 'simple'
@@ -1368,25 +1393,35 @@ void HTMLFilter::EntityFound(const wchar_t * str, const wchar_t * end)
 
 bool HTMLFilter::ReadItem()
 {
-	if( *pchar == 0 )
+	if( lastc == -1 )
 		return false;
 
 	if( !PushStack() )
 		return false;
 
-	pchar += 1;	// skipping the first '<'
-	SkipWhiteLines();
+	if( stack_len > 1 && pstack[stack_len-2].new_line_in_the_middle )
+		LastItem().tree_index += 1;
 
-	if( *pchar == '!' )
-		ReadItemSpecial();
+	if( was_ending_commentary )
+	{
+		LastItem().type = Item::closing;
+		LastItem().is_commentary = true;
+		LastItem().name = L"--";
+		was_ending_commentary = false;
+	}
 	else
-	if( *pchar == '/' ) // we have a closing tag (dodac jako metode wirtualna) !!
-		ReadItemClosing();
-	else
-		ReadItemOpening();
+	{
+		read_char();	// skipping the first opening tag mark '<'
+		SkipWhiteLines();
 
-	CheckNewLine();
-	LastItem().new_line = last_new_line;
+		if( IsSpecialTagIndicator(lastc) )
+			ReadItemSpecial();
+		else
+		if( IsClosingTagIndicator(lastc) )
+			ReadItemClosing();
+		else
+			ReadItemOpening();
+	}
 
 	ItemFound();
 
@@ -1556,11 +1591,14 @@ int i;
 	{
 		if( !skip_tags && pstack[z].new_line )
 		{
-			PutNewLine();
-			PutTabs(z);
+			if( white_mode == WHITE_MODE_TREE )
+			{
+				Put(10);
+				PutTabs(pstack[z].tree_index);
+			}
 		}
 
-		PutClosingTag(pstack[z].name.c_str());
+		PutClosingTag(pstack[z]);
 		pstack[z].Clear();
 	}
 
@@ -1576,10 +1614,19 @@ void HTMLFilter::CheckStackPrintRest()
 	while( stack_len-- > 0 )
 	{
 		if( stack_len==0 || pstack[stack_len-1].new_line )
-			PutNewLine();
+		{
+			if( white_mode == WHITE_MODE_TREE )
+			{
+				Put(10);
+				PutTabs(pstack[stack_len-1].tree_index);
+			}
+			else
+			{
+				Put(' ');
+			}
+		}
 
-		PutTabs(stack_len);
-		PutClosingTag(pstack[stack_len].name.c_str());
+		PutClosingTag(pstack[stack_len]);
 	}
 }
 
@@ -1601,16 +1648,19 @@ void HTMLFilter::CheckClosingTags()
 	}
 
 	// there are more than one tag 
-	if( IsNameEqual(pstack[stack_len-1].name, pstack[stack_len-2].name) )
+	if( (pstack[stack_len-1].is_commentary && pstack[stack_len-2].is_commentary) || IsNameEqual(pstack[stack_len-1].name, pstack[stack_len-2].name) )
 	{
 		// last closing tag is from the previous one
 		if( !skip_tags && pstack[stack_len-2].new_line )
 		{
-			PutNewLine();
-			PutTabs(stack_len-2);
+			if( white_mode == WHITE_MODE_TREE )
+			{
+				Put(10);
+				PutTabs(pstack[stack_len-2].tree_index);
+			}
 		}
 
-		PutClosingTag(pstack[stack_len-1].name.c_str());
+		PutClosingTag(pstack[stack_len-1]);
 		last_new_line = pstack[stack_len-1].new_line;
 		PopStack();
 		PopStack();
@@ -1624,22 +1674,30 @@ void HTMLFilter::CheckClosingTags()
 
 bool HTMLFilter::PrintRest()
 {
-const wchar_t * start = pchar;
+//const wchar_t * start = pchar;
 
 	// in safe mode we do not print the rest html code
 	if( safe_mode || skip_tags )
 		return false;
 
-	while( *pchar )
-		++pchar;
+	bool was_chars = false;
 
-	if( pchar > start )
+	while( lastc != -1 )
 	{
-		Put(start, pchar);
-		return true;
+		Put(lastc);
+		read_char();
+		was_chars = true;
 	}
 
-return false;
+	return was_chars;
+
+//	if( pchar > start )
+//	{
+//		Put(start, pchar);
+//		return true;
+//	}
+
+//return false;
 }
 
 
@@ -1660,15 +1718,18 @@ void HTMLFilter::ReadLoop()
 				//pstack[stack_len-2].new_line = LastItem().new_line;
 			}
 			else
-			if( trim_white )
+			if( white_mode == WHITE_MODE_TREE )
 			{
 				// one new line after a simple or special tag
 				// (if the tag has level 0 in the tree - it not means that this is a first tag)
-				// for example can be DOCTYPE 
-				PutNewLine(); 
+				// for example can be DOCTYPE
+
+				if( !LastItem().is_commentary )
+					Put(10);
 			}
 
-			PopStack();
+			if( !LastItem().is_commentary )
+				PopStack();
 		}
 		else
 		if( LastItem().type == Item::closing )
@@ -1688,7 +1749,9 @@ void HTMLFilter::ReadLoop()
 
 void HTMLFilter::Read()
 {
-	if( trim_white )
+	read_char(); // put first character to lastc
+
+	if( white_mode != WHITE_MODE_ORIGIN )
 		SkipWhiteLines();
 
 	// it can be some text or white lines before the first html tag (we print it)
diff --git a/src/html/htmlfilter.h b/src/html/htmlfilter.h
index 35710d3..6407e0e 100644
--- a/src/html/htmlfilter.h
+++ b/src/html/htmlfilter.h
@@ -42,7 +42,7 @@
 #include <map>
 #include <vector>
 #include <algorithm>
-
+#include "convert/baseparser.h"
 
 
 namespace pt
@@ -90,7 +90,7 @@ namespace pt
 
 	the filter recognizes xml simple tags (with / at the end) such as: <br />
 */
-class HTMLFilter
+class HTMLFilter : public BaseParser
 {
 public:
 
@@ -111,27 +111,22 @@ public:
 	void Filter(const std::wstring & in, std::wstring & out);
 
 
-	// insert a white space into long words
-	// (only between html tags)
-	// skipped in such tags: script, pre, textarea
-	// break_after - after how many characters insert a space (0 - off)
-	void BreakWord(size_t break_after_);
+	const static int WHITE_MODE_ORIGIN = 0;
+	const static int WHITE_MODE_SINGLE_LINE = 1;
+	const static int WHITE_MODE_TREE = 2;
 
-	// insert a new line character into long lines
-	// (only between html tags)
+
+	// white chars mode
+	//
+	void white_chars_mode(int mode);
+
+	// if the line is wrap_line_ length (or longer) then insert a new line character (in a place of a white char)
+	// (only between html tags and only in <body> subtree)
 	// skipped in such tags: script, pre, textarea
-	// wrap_line - after how many characters wrap a line (0 - off)
+	// 0 - off
 	// lines are wrapped only in 'body' tag (useful for text in 'title' tag which is in 'head' section)
 	void WrapLine(size_t wrap_line_);
 
-	// trimming white characters (with new lines)
-	// at the beginning, at the end and in the middle of a string
-	// only between html tags
-	// at the beginning and at the end only one space is left
-	// skipped in such tags: script, pre, textarea
-	// false by default
-	void TrimWhite(bool trim);
-
 	// first tabs in a tree
 	// default: 2 (spaces)
 	// set 0 to turn off
@@ -207,9 +202,14 @@ protected:
 			none
 		} type;
 
+		bool is_commentary;
+
 		// is there a new line after this tag
 		bool new_line;
 
+		// is there a new
+		bool new_line_in_the_middle;
+
 		// current orphans table
 		// (will be propagated)
 		Orphans * porphans;
@@ -218,6 +218,8 @@ protected:
 		// (will be propagated)
 		bool has_body_tag;
 
+		size_t tree_index;
+
 		void Clear();
 		Item();
 	};
@@ -235,12 +237,16 @@ protected:
 
 	virtual bool IsOpeningTagMark(wchar_t c);
 	virtual bool IsClosingTagMark(wchar_t c);
+	virtual bool IsClosingTagIndicator(wchar_t c);
+	virtual bool IsSpecialTagIndicator(wchar_t c);
+	virtual bool IsAttributeAssignmentMark(wchar_t c);
 	virtual bool IsClosingXmlSimpleTagMark(wchar_t c);
 	virtual bool IsStartingEntityMark(wchar_t c);
 	virtual bool IsEndingEntityMark(wchar_t c);
 
-	virtual bool   IsOpeningCommentaryTagMark(const wchar_t * str);
-	virtual size_t OpeningCommentaryTagMarkSize();
+//	virtual bool   IsOpeningCommentaryTagMark(const wchar_t * str);
+//	virtual size_t OpeningCommentaryTagMarkSize();
+	virtual bool IsEndingCommentaryTagMarkAtEndOfString(const std::wstring & str);
 
 	virtual bool IsValidCharForName(int c);
 	virtual bool IsValidCharForAttrName(int c);
@@ -249,7 +255,6 @@ protected:
 	virtual bool SkipCommentaryTagIfExists();
 
 	virtual void Put(wchar_t c);
-	virtual void Put(const wchar_t * str);
 	virtual void Put(const wchar_t * str, const wchar_t * end);
 	virtual void Put(const std::wstring & str);
 	virtual void AnalyzeEntitiesAndPut(const wchar_t * str, const wchar_t * end, std::wstring * out);
@@ -257,10 +262,7 @@ protected:
 	virtual void PutOpeningTagMark();
 	virtual void PutClosingTagMark();
 	virtual bool PutOpeningTag();
-	virtual void PutClosingTag(const wchar_t * tag);
-
-	virtual void PutNormalText(const wchar_t * str, const wchar_t * end);
-	virtual void ReadNormalTextSkipWhite(const wchar_t * & start, const wchar_t * & last_non_white);
+	virtual void PutClosingTag(const Item & item);
 
 	virtual void ItemFound();
 	virtual void EntityFound(const wchar_t * str, const wchar_t * end);
@@ -299,9 +301,8 @@ protected:
 	void SkipWhite();
 	void SkipWhiteLines();
 	void SkipWhiteWithFirstNewLine();
-	void SkipWhiteLines(const wchar_t * & str, const wchar_t * end);
 	bool IsClosingTagForLastItem();
-	void SkipAndCheckClosingTag();
+	void SkipAndCheckClosingTag(std::wstring * remember_text = nullptr);
 
 	void PopStack();
 	bool PushStack();
@@ -312,13 +313,13 @@ protected:
 	void ReadNormalText();
 	bool PrintRest();
 	bool PrintOpeningItem();
-	void ReadItemName();
+	void ReadItemName(std::wstring & name, bool clear_name = true);
 	void ReadItemAttrName();
-	void ReadItemAttrValueAdd(const wchar_t * value_start, const wchar_t * value_end);
+	void ReadItemAttrValueAdd(const std::wstring & str);
 	void ReadItemAttrValue(bool has_quote, wchar_t quote_char);
 
 	bool ReadItemAttr();
-	bool CheckItemAttr();
+	void CheckItemLangAttr();
 	void PrintItemAttr();
 
 	void ReadItemClosing();
@@ -330,27 +331,23 @@ protected:
 
 	void CheckChar(wchar_t c);
 
-	void CheckLineWrap();
-	bool HasEntityEndAround(const wchar_t * str, const wchar_t * end);
-	void PutNormalNonWhite(const wchar_t * & str, const wchar_t * end);
-	void PutNormalWhite(const wchar_t * & str, const wchar_t * end);
+	void PutNormalNonWhite(std::wstring & str, bool allow_put_new_line, bool allow_put_space);
+	bool PutNormalWhite();
 	void PutEverythingUntilClosingTag(bool put_closing_tag_as_well);
 	void PutTabs(size_t len);
 	void PutNonBreakingSpace();
-	void PutNewLine();
 	void CalcOrphansMaxLen(Orphans & orphans);
 
-	const wchar_t * pchar;
 	Item empty;
 	Item * pstack;			// stack pointer
 	size_t stack_len;		// length of the stack
 	wchar_t * buffer;		// buffer used when printing
 	std::wstring * out_string;
 	bool last_new_line;
-	size_t break_after;		// insert a space into long words after 'break_after' characters
+	int white_mode;
 	size_t wrap_line;		// insert a new line character into long lines
-	bool trim_white;		// trimming white characters
 	size_t tab_size;
+	bool was_ending_commentary;
 	OrphanMode orphan_mode;
 	std::wstring attr_name;
 	std::vector<std::wstring> attr_value;
@@ -365,6 +362,8 @@ protected:
 	bool skip_commentaries;
 	bool skip_entities;
 	bool analyze_entities;
+	std::wstring tmp_text;
+	std::wstring tmp_name;
 };