diff --git a/src/html/htmlfilter.cpp b/src/html/htmlfilter.cpp
index 5274950..04888c3 100644
--- a/src/html/htmlfilter.cpp
+++ b/src/html/htmlfilter.cpp
@@ -36,7 +36,7 @@
*/
#include "htmlfilter.h"
-
+#include "convert/text.h"
namespace pt
@@ -48,13 +48,14 @@ namespace pt
void HTMLFilter::Item::Clear()
{
name.clear();
- type = none;
- is_commentary = false;
- porphans = nullptr;
- new_line = false;
+ type = none;
+ is_commentary = false;
+ porphans = nullptr;
+ new_line_before = false;
+ new_line = false;
new_line_in_the_middle = false;
- has_body_tag = false;
- tree_index = 0;
+ has_body_tag = false;
+ tree_index = 0;
}
@@ -74,7 +75,7 @@ void HTMLFilter::Filter(const wchar_t * in, std::wstring & out)
stack_len = 0;
out_string = &out;
- last_new_line = false;
+ //last_new_line = false;
was_ending_commentary = false;
line_len = 0;
out_string->clear();
@@ -382,15 +383,15 @@ void HTMLFilter::SkipWhiteWithFirstNewLine()
}
-void HTMLFilter::CheckNewLine()
-{
- if( white_mode == WHITE_MODE_TREE )
- {
- SkipWhite();
- }
-
- last_new_line = (lastc==10);
-}
+//void HTMLFilter::CheckNewLine()
+//{
+// if( white_mode == WHITE_MODE_TREE )
+// {
+// SkipWhite();
+// }
+//
+// last_new_line = (lastc==10);
+//}
@@ -440,7 +441,7 @@ void HTMLFilter::PutEverythingUntilClosingTag(bool put_closing_tag_as_well)
was_closing_tag = true;
PopStack();
- CheckNewLine();
+ //CheckNewLine();
break;
}
}
@@ -857,17 +858,17 @@ void HTMLFilter::PutNormalNonWhite(std::wstring & str, bool allow_put_new_line,
}
-bool HTMLFilter::PutNormalWhite()
+void HTMLFilter::PutNormalWhite(bool & was_white_char, bool & was_new_line)
{
- bool was_white_char = false;
- bool was_new_line = false;
+ was_white_char = false;
+ was_new_line = false;
while( lastc == 10 || IsWhite(lastc) )
{
- was_white_char = true; // anyone white char even new line
-
if( lastc == 10 )
was_new_line = true;
+ else
+ was_white_char = true;
if( white_mode == WHITE_MODE_ORIGIN )
{
@@ -877,18 +878,12 @@ bool HTMLFilter::PutNormalWhite()
read_char();
}
- if( white_mode == WHITE_MODE_SINGLE_LINE && was_white_char )
+ if( white_mode == WHITE_MODE_SINGLE_LINE && (was_white_char || was_new_line) )
{
Put(' ');
}
- if( white_mode == WHITE_MODE_TREE && was_new_line )
- {
- // in WHITE_MODE_TREE white characters are written at the beginning of a or text
- }
-
- last_new_line = was_new_line;
- return was_white_char;
+ // in WHITE_MODE_TREE white characters are written at the beginning of a or text
}
@@ -955,7 +950,7 @@ bool HTMLFilter::PutOpeningTag()
return false;
}
- if( white_mode == WHITE_MODE_TREE && last_new_line )
+ if( white_mode == WHITE_MODE_TREE && LastItem().new_line_before )
{
Put(10);
PutTabs(LastItem().tree_index);
@@ -991,7 +986,7 @@ void HTMLFilter::PutTabs(size_t len)
if( len > 30 )
len = 30;
- for(int i=0 ; i < (len*tab_size) ; ++i)
+ for(size_t i=0 ; i < (len*tab_size) ; ++i)
(*out_string) += ' '; // we do not add them to 'line_len'
}
@@ -1010,15 +1005,6 @@ void HTMLFilter::PutNonBreakingSpace()
-
-//void HTMLFilter::PutNewLine()
-//{
-// buffer[0] = 10; // CHECKME for what purpose is this buffer?
-// Put(10);
-// line_len = 0;
-//}
-
-
// we assume the size of the opening mark to be one
bool HTMLFilter::IsOpeningTagMark(wchar_t c)
{
@@ -1063,22 +1049,6 @@ bool HTMLFilter::IsClosingXmlSimpleTagMark(wchar_t c)
}
-//bool HTMLFilter::IsOpeningCommentaryTagMark(const wchar_t * str)
-//{
-//static wchar_t comm_open[] = L"";
@@ -1106,35 +1076,12 @@ bool HTMLFilter::IsEndingEntityMark(wchar_t c)
-// skipping the commentary tag if exists
-bool HTMLFilter::SkipCommentaryTagIfExists()
-{
-wchar_t comm_close[] = L"-->";
-size_t comm_close_len = sizeof(comm_close) / sizeof(wchar_t) - 1;
-/*
- if( !IsOpeningCommentaryTagMark(pchar) )
- return false;
-
- pchar += OpeningCommentaryTagMarkSize();
-
- // looking for "-->"
- while( *pchar!=0 && !IsNameEqual(pchar, comm_close, comm_close_len) )
- ++pchar;
-
- if( *pchar!= 0 )
- pchar += comm_close_len;
-
- CheckNewLine();
-*/
-
-
-return true;
-}
-
-
// reading text between html tags
-void HTMLFilter::ReadNormalText()
+void HTMLFilter::ReadText()
{
+ bool was_white_char = false;
+ bool was_new_line = false;
+
bool was_non_white_text = false;
was_ending_commentary = false;
@@ -1175,13 +1122,16 @@ void HTMLFilter::ReadNormalText()
if( was_ending_commentary )
break;
- if( PutNormalWhite() && white_mode == WHITE_MODE_TREE )
+ PutNormalWhite(was_white_char, was_new_line);
+
+ if( (was_white_char || was_new_line) && white_mode == WHITE_MODE_TREE )
{
- if( last_new_line )
+ allow_put_new_line = false;
+ allow_put_space = false;
+
+ if( was_new_line )
{
allow_put_new_line = true;
- allow_put_space = false;
-
LastItem().new_line_in_the_middle = true;
if( !was_non_white_text )
@@ -1189,7 +1139,6 @@ void HTMLFilter::ReadNormalText()
}
else
{
- allow_put_new_line = false;
allow_put_space = true;
}
@@ -1200,6 +1149,8 @@ void HTMLFilter::ReadNormalText()
}
}
}
+
+ new_item_has_new_line_before = was_new_line;
}
@@ -1319,7 +1270,7 @@ void HTMLFilter::ReadItemSpecial()
if( !skip_tags )
{
- if( white_mode == WHITE_MODE_TREE && last_new_line )
+ if( white_mode == WHITE_MODE_TREE && LastItem().new_line_before )
{
Put(10);
PutTabs(LastItem().tree_index);
@@ -1351,6 +1302,13 @@ void HTMLFilter::ReadItemSpecial()
Put(' ');
Put(tmp_text);
Put('>');
+
+ if( is_first_item && white_mode == WHITE_MODE_TREE && is_equal_nc(LastItem().name.c_str(), L"!doctype") )
+ {
+ Put(10);
+ Put(10);
+ SkipWhiteLines();
+ }
}
}
}
@@ -1399,6 +1357,8 @@ bool HTMLFilter::ReadItem()
if( !PushStack() )
return false;
+ LastItem().new_line_before = new_item_has_new_line_before; // new_item_has_new_line_before is set by ReadText() method
+
if( stack_len > 1 && pstack[stack_len-2].new_line_in_the_middle )
LastItem().tree_index += 1;
@@ -1602,7 +1562,7 @@ int i;
pstack[z].Clear();
}
- last_new_line = pstack[stack_len-1].new_line;
+ //last_new_line = pstack[stack_len-1].new_line;
// invalidate tags
stack_len = i;
@@ -1661,7 +1621,7 @@ void HTMLFilter::CheckClosingTags()
}
PutClosingTag(pstack[stack_len-1]);
- last_new_line = pstack[stack_len-1].new_line;
+ //last_new_line = pstack[stack_len-1].new_line;
PopStack();
PopStack();
}
@@ -1711,27 +1671,17 @@ void HTMLFilter::ReadLoop()
CheckExceptions();
}
else
- if( LastItem().type == Item::special || LastItem().type == Item::simple )
+ if( LastItem().type == Item::special )
{
- if( stack_len > 1 )
- {
- //pstack[stack_len-2].new_line = LastItem().new_line;
- }
- else
- if( white_mode == WHITE_MODE_TREE )
- {
- // one new line after a simple or special tag
- // (if the tag has level 0 in the tree - it not means that this is a first tag)
- // for example can be DOCTYPE
-
- if( !LastItem().is_commentary )
- Put(10);
- }
-
if( !LastItem().is_commentary )
PopStack();
}
else
+ if( LastItem().type == Item::simple )
+ {
+ PopStack();
+ }
+ else
if( LastItem().type == Item::closing )
{
CheckClosingTags();
@@ -1741,7 +1691,8 @@ void HTMLFilter::ReadLoop()
PopStack();
}
- ReadNormalText();
+ ReadText();
+ is_first_item = false;
}
}
@@ -1750,12 +1701,13 @@ void HTMLFilter::ReadLoop()
void HTMLFilter::Read()
{
read_char(); // put first character to lastc
+ is_first_item = true;
if( white_mode != WHITE_MODE_ORIGIN )
SkipWhiteLines();
// it can be some text or white lines before the first html tag (we print it)
- ReadNormalText();
+ ReadText();
// reading the whole html source
ReadLoop();
diff --git a/src/html/htmlfilter.h b/src/html/htmlfilter.h
index 6407e0e..4b20ef4 100644
--- a/src/html/htmlfilter.h
+++ b/src/html/htmlfilter.h
@@ -204,6 +204,8 @@ protected:
bool is_commentary;
+ bool new_line_before;
+
// is there a new line after this tag
bool new_line;
@@ -244,15 +246,12 @@ protected:
virtual bool IsStartingEntityMark(wchar_t c);
virtual bool IsEndingEntityMark(wchar_t c);
-// virtual bool IsOpeningCommentaryTagMark(const wchar_t * str);
-// virtual size_t OpeningCommentaryTagMarkSize();
virtual bool IsEndingCommentaryTagMarkAtEndOfString(const std::wstring & str);
virtual bool IsValidCharForName(int c);
virtual bool IsValidCharForAttrName(int c);
virtual bool IsValidCharForEntityName(int c);
virtual void CheckExceptions();
- virtual bool SkipCommentaryTagIfExists();
virtual void Put(wchar_t c);
virtual void Put(const wchar_t * str, const wchar_t * end);
@@ -306,11 +305,10 @@ protected:
void PopStack();
bool PushStack();
- void CheckNewLine();
void CheckStackPrintRest();
void AddForgottenTags();
void CheckClosingTags();
- void ReadNormalText();
+ void ReadText();
bool PrintRest();
bool PrintOpeningItem();
void ReadItemName(std::wstring & name, bool clear_name = true);
@@ -332,7 +330,7 @@ protected:
void CheckChar(wchar_t c);
void PutNormalNonWhite(std::wstring & str, bool allow_put_new_line, bool allow_put_space);
- bool PutNormalWhite();
+ void PutNormalWhite(bool & was_white_char, bool & was_new_line);
void PutEverythingUntilClosingTag(bool put_closing_tag_as_well);
void PutTabs(size_t len);
void PutNonBreakingSpace();
@@ -343,8 +341,10 @@ protected:
size_t stack_len; // length of the stack
wchar_t * buffer; // buffer used when printing
std::wstring * out_string;
- bool last_new_line;
+ //bool last_new_line;
+ bool new_item_has_new_line_before;
int white_mode;
+ bool is_first_item;
size_t wrap_line; // insert a new line character into long lines
size_t tab_size;
bool was_ending_commentary;