HTMLFilter: added a std::vector<int> stack for a current white mode - white chars mode can be changed by such tags: <textarea>, <pre>, <script>, <nofilter>

This commit is contained in:
Tomasz Sowa 2021-07-21 15:57:46 +02:00
parent c0e940c500
commit f6df8bc1bc
3 changed files with 89 additions and 105 deletions

View File

@ -463,14 +463,15 @@ bool first_tag_removed = false;
{ {
if( IsOpeningTagMark(lastc) ) if( IsOpeningTagMark(lastc) )
{ {
if( IsClosingTagForLastItem() ) // FIXME
{ // if( IsClosingTagForLastItem() )
// the last tag is skipped when using patterns with %2 or %u2 // {
// // the last tag is skipped when using patterns with %2 or %u2
PopStack(); // removing opening tag from the stack //
first_tag_removed = true; // PopStack(); // removing opening tag from the stack
break; // first_tag_removed = true;
} // break;
// }
} }
else else
{ {

View File

@ -41,7 +41,9 @@
namespace pt namespace pt
{ {
const int HTMLFilter::WHITE_MODE_ORIGIN;
const int HTMLFilter::WHITE_MODE_SINGLE_LINE;
const int HTMLFilter::WHITE_MODE_TREE;
@ -197,6 +199,15 @@ void HTMLFilter::InsertTabs(size_t tabsize)
} }
int HTMLFilter::current_white_char_mode()
{
if( !white_char_mode_tab.empty() )
return white_char_mode_tab.back();
return WHITE_MODE_ORIGIN;
}
void HTMLFilter::CalcOrphansMaxLen(Orphans & orphans) void HTMLFilter::CalcOrphansMaxLen(Orphans & orphans)
{ {
size_t i; size_t i;
@ -396,76 +407,6 @@ void HTMLFilter::SkipWhiteWithFirstNewLine()
bool HTMLFilter::IsClosingTagForLastItem()
{
read_char();
SkipWhite();
if( lastc == '/' )
{
read_char();
SkipWhite();
ReadItemName(tmp_name);
if( IsNameEqual(tmp_name, LastItem().name) )
{
SkipWhite();
if( IsClosingTagMark(lastc) )
{
read_char();
return true;
}
}
}
return false;
}
// used for such tags as: script, pre, textarea
void HTMLFilter::PutEverythingUntilClosingTag(bool put_closing_tag_as_well)
{
bool was_closing_tag = false;
tmp_text.clear();
while( lastc != -1 )
{
if( IsOpeningTagMark(lastc) )
{
if( IsClosingTagForLastItem() )
{
was_closing_tag = true;
PopStack();
//CheckNewLine();
break;
}
}
else
{
tmp_text += lastc;
read_char();
}
}
Put(tmp_text);
if( was_closing_tag && put_closing_tag_as_well )
{
Put('<');
Put('/');
Put(tmp_name);
Put('>');
}
}
void HTMLFilter::SkipAndCheckClosingTag(std::wstring * remember_text) void HTMLFilter::SkipAndCheckClosingTag(std::wstring * remember_text)
{ {
bool is_quoted = false; bool is_quoted = false;
@ -870,7 +811,7 @@ void HTMLFilter::PutNormalWhite(bool & was_white_char, bool & was_new_line)
else else
was_white_char = true; was_white_char = true;
if( white_mode == WHITE_MODE_ORIGIN ) if( current_white_char_mode() == WHITE_MODE_ORIGIN )
{ {
Put(lastc); Put(lastc);
} }
@ -878,7 +819,7 @@ void HTMLFilter::PutNormalWhite(bool & was_white_char, bool & was_new_line)
read_char(); read_char();
} }
if( white_mode == WHITE_MODE_SINGLE_LINE && (was_white_char || was_new_line) ) if( current_white_char_mode() == WHITE_MODE_SINGLE_LINE && (was_white_char || was_new_line) )
{ {
Put(' '); Put(' ');
} }
@ -950,7 +891,7 @@ bool HTMLFilter::PutOpeningTag()
return false; return false;
} }
if( white_mode == WHITE_MODE_TREE && LastItem().new_line_before ) if( current_white_char_mode() == WHITE_MODE_TREE && LastItem().new_line_before )
{ {
Put(10); Put(10);
PutTabs(LastItem().tree_index); PutTabs(LastItem().tree_index);
@ -1089,7 +1030,7 @@ void HTMLFilter::ReadText()
bool allow_put_new_line = false; bool allow_put_new_line = false;
bool allow_put_space = false; bool allow_put_space = false;
if( white_mode == WHITE_MODE_TREE ) if( current_white_char_mode() == WHITE_MODE_TREE )
{ {
if( LastItem().new_line || (wrap_line != 0 && LastItem().has_body_tag && line_len >= wrap_line) ) if( LastItem().new_line || (wrap_line != 0 && LastItem().has_body_tag && line_len >= wrap_line) )
{ {
@ -1124,7 +1065,7 @@ void HTMLFilter::ReadText()
PutNormalWhite(was_white_char, was_new_line); PutNormalWhite(was_white_char, was_new_line);
if( (was_white_char || was_new_line) && white_mode == WHITE_MODE_TREE ) if( (was_white_char || was_new_line) && current_white_char_mode() == WHITE_MODE_TREE )
{ {
allow_put_new_line = false; allow_put_new_line = false;
allow_put_space = false; allow_put_space = false;
@ -1270,7 +1211,7 @@ void HTMLFilter::ReadItemSpecial()
if( !skip_tags ) if( !skip_tags )
{ {
if( white_mode == WHITE_MODE_TREE && LastItem().new_line_before ) if( current_white_char_mode() == WHITE_MODE_TREE && LastItem().new_line_before )
{ {
Put(10); Put(10);
PutTabs(LastItem().tree_index); PutTabs(LastItem().tree_index);
@ -1303,7 +1244,7 @@ void HTMLFilter::ReadItemSpecial()
Put(tmp_text); Put(tmp_text);
Put('>'); Put('>');
if( is_first_item && white_mode == WHITE_MODE_TREE && is_equal_nc(LastItem().name.c_str(), L"!doctype") ) if( is_first_item && current_white_char_mode() == WHITE_MODE_TREE && is_equal_nc(LastItem().name.c_str(), L"!doctype") )
{ {
Put(10); Put(10);
Put(10); Put(10);
@ -1383,6 +1324,7 @@ bool HTMLFilter::ReadItem()
ReadItemOpening(); ReadItemOpening();
} }
// IMPROVE ME later CheckSingleItemExceptions() can change opening to single type
ItemFound(); ItemFound();
return true; return true;
@ -1491,7 +1433,7 @@ bool HTMLFilter::IsLastTag(const std::wstring & name)
// checking exceptions for opening tags // checking exceptions for opening tags
void HTMLFilter::CheckExceptions() void HTMLFilter::CheckSingleItemExceptions()
{ {
if( IsLastTag(L"meta") || if( IsLastTag(L"meta") ||
IsLastTag(L"input") || IsLastTag(L"input") ||
@ -1508,21 +1450,47 @@ void HTMLFilter::CheckExceptions()
return; return;
} }
// in safe_mode the script tag is ignored // move me to a better place
if( !safe_mode && IsLastTag(L"script") )
PutEverythingUntilClosingTag(!skip_tags);
if( IsLastTag(L"pre") || IsLastTag(L"textarea") )
PutEverythingUntilClosingTag(!skip_tags);
if( IsLastTag(no_filter_tag) )
PutEverythingUntilClosingTag(false);
if( IsLastTag(L"body") ) if( IsLastTag(L"body") )
LastItem().has_body_tag = true; LastItem().has_body_tag = true;
} }
void HTMLFilter::CheckWhiteCharsExceptions(Item & item)
{
bool change_white_mode = false;
// in safe_mode the script tag is ignored
if( !safe_mode && IsNameEqual(item.name, L"script") )
{
change_white_mode = true;
}
if( IsNameEqual(item.name, L"pre") || IsNameEqual(item.name, L"textarea") )
{
change_white_mode = true;
}
if( IsNameEqual(item.name, no_filter_tag) )
{
change_white_mode = true;
}
if( change_white_mode )
{
if( item.type == Item::opening )
{
white_char_mode_tab.push_back(WHITE_MODE_ORIGIN);
}
else
{
if( !white_char_mode_tab.empty() )
white_char_mode_tab.pop_back();
}
}
}
void HTMLFilter::AddForgottenTags() void HTMLFilter::AddForgottenTags()
@ -1549,9 +1517,11 @@ int i;
for(int z=(int)stack_len-2 ; z>=i ; --z) for(int z=(int)stack_len-2 ; z>=i ; --z)
{ {
CheckWhiteCharsExceptions(pstack[z]);
if( !skip_tags && pstack[z].new_line ) if( !skip_tags && pstack[z].new_line )
{ {
if( white_mode == WHITE_MODE_TREE ) if( current_white_char_mode() == WHITE_MODE_TREE )
{ {
Put(10); Put(10);
PutTabs(pstack[z].tree_index); PutTabs(pstack[z].tree_index);
@ -1575,7 +1545,7 @@ void HTMLFilter::CheckStackPrintRest()
{ {
if( stack_len==0 || pstack[stack_len-1].new_line ) if( stack_len==0 || pstack[stack_len-1].new_line )
{ {
if( white_mode == WHITE_MODE_TREE ) if( current_white_char_mode() == WHITE_MODE_TREE )
{ {
Put(10); Put(10);
PutTabs(pstack[stack_len-1].tree_index); PutTabs(pstack[stack_len-1].tree_index);
@ -1610,10 +1580,12 @@ void HTMLFilter::CheckClosingTags()
// there are more than one tag // there are more than one tag
if( (pstack[stack_len-1].is_commentary && pstack[stack_len-2].is_commentary) || IsNameEqual(pstack[stack_len-1].name, pstack[stack_len-2].name) ) if( (pstack[stack_len-1].is_commentary && pstack[stack_len-2].is_commentary) || IsNameEqual(pstack[stack_len-1].name, pstack[stack_len-2].name) )
{ {
CheckWhiteCharsExceptions(pstack[stack_len-1]);
// last closing tag is from the previous one // last closing tag is from the previous one
if( !skip_tags && pstack[stack_len-2].new_line ) if( !skip_tags && pstack[stack_len-2].new_line )
{ {
if( white_mode == WHITE_MODE_TREE ) if( current_white_char_mode() == WHITE_MODE_TREE )
{ {
Put(10); Put(10);
PutTabs(pstack[stack_len-2].tree_index); PutTabs(pstack[stack_len-2].tree_index);
@ -1668,7 +1640,8 @@ void HTMLFilter::ReadLoop()
{ {
if( LastItem().type == Item::opening ) if( LastItem().type == Item::opening )
{ {
CheckExceptions(); CheckSingleItemExceptions();
CheckWhiteCharsExceptions(LastItem());
} }
else else
if( LastItem().type == Item::special ) if( LastItem().type == Item::special )
@ -1703,7 +1676,10 @@ void HTMLFilter::Read()
read_char(); // put first character to lastc read_char(); // put first character to lastc
is_first_item = true; is_first_item = true;
if( white_mode != WHITE_MODE_ORIGIN ) white_char_mode_tab.clear();
white_char_mode_tab.push_back(white_mode);
if( current_white_char_mode() != WHITE_MODE_ORIGIN )
SkipWhiteLines(); SkipWhiteLines();
// it can be some text or white lines before the first html tag (we print it) // it can be some text or white lines before the first html tag (we print it)

View File

@ -251,7 +251,9 @@ protected:
virtual bool IsValidCharForName(int c); virtual bool IsValidCharForName(int c);
virtual bool IsValidCharForAttrName(int c); virtual bool IsValidCharForAttrName(int c);
virtual bool IsValidCharForEntityName(int c); virtual bool IsValidCharForEntityName(int c);
virtual void CheckExceptions();
virtual void CheckSingleItemExceptions();
virtual void CheckWhiteCharsExceptions(Item & item);
virtual void Put(wchar_t c); virtual void Put(wchar_t c);
virtual void Put(const wchar_t * str, const wchar_t * end); virtual void Put(const wchar_t * str, const wchar_t * end);
@ -300,7 +302,9 @@ protected:
void SkipWhite(); void SkipWhite();
void SkipWhiteLines(); void SkipWhiteLines();
void SkipWhiteWithFirstNewLine(); void SkipWhiteWithFirstNewLine();
bool IsClosingTagForLastItem();
int current_white_char_mode();
void SkipAndCheckClosingTag(std::wstring * remember_text = nullptr); void SkipAndCheckClosingTag(std::wstring * remember_text = nullptr);
void PopStack(); void PopStack();
@ -331,7 +335,7 @@ protected:
void PutNormalNonWhite(std::wstring & str, bool allow_put_new_line, bool allow_put_space); void PutNormalNonWhite(std::wstring & str, bool allow_put_new_line, bool allow_put_space);
void PutNormalWhite(bool & was_white_char, bool & was_new_line); void PutNormalWhite(bool & was_white_char, bool & was_new_line);
void PutEverythingUntilClosingTag(bool put_closing_tag_as_well);
void PutTabs(size_t len); void PutTabs(size_t len);
void PutNonBreakingSpace(); void PutNonBreakingSpace();
void CalcOrphansMaxLen(Orphans & orphans); void CalcOrphansMaxLen(Orphans & orphans);
@ -341,6 +345,9 @@ protected:
size_t stack_len; // length of the stack size_t stack_len; // length of the stack
wchar_t * buffer; // buffer used when printing wchar_t * buffer; // buffer used when printing
std::wstring * out_string; std::wstring * out_string;
std::vector<int> white_char_mode_tab;
//bool last_new_line; //bool last_new_line;
bool new_item_has_new_line_before; bool new_item_has_new_line_before;
int white_mode; int white_mode;