HTMLFilter: added a std::vector<int> stack for a current white mode - white chars mode can be changed by such tags: <textarea>, <pre>, <script>, <nofilter>

This commit is contained in:
Tomasz Sowa 2021-07-21 15:57:46 +02:00
parent c0e940c500
commit f6df8bc1bc
3 changed files with 89 additions and 105 deletions

View File

@ -463,14 +463,15 @@ bool first_tag_removed = false;
{
if( IsOpeningTagMark(lastc) )
{
if( IsClosingTagForLastItem() )
{
// the last tag is skipped when using patterns with %2 or %u2
PopStack(); // removing opening tag from the stack
first_tag_removed = true;
break;
}
// FIXME
// if( IsClosingTagForLastItem() )
// {
// // the last tag is skipped when using patterns with %2 or %u2
//
// PopStack(); // removing opening tag from the stack
// first_tag_removed = true;
// break;
// }
}
else
{

View File

@ -41,7 +41,9 @@
namespace pt
{
const int HTMLFilter::WHITE_MODE_ORIGIN;
const int HTMLFilter::WHITE_MODE_SINGLE_LINE;
const int HTMLFilter::WHITE_MODE_TREE;
@ -197,6 +199,15 @@ void HTMLFilter::InsertTabs(size_t tabsize)
}
int HTMLFilter::current_white_char_mode()
{
if( !white_char_mode_tab.empty() )
return white_char_mode_tab.back();
return WHITE_MODE_ORIGIN;
}
void HTMLFilter::CalcOrphansMaxLen(Orphans & orphans)
{
size_t i;
@ -396,76 +407,6 @@ void HTMLFilter::SkipWhiteWithFirstNewLine()
bool HTMLFilter::IsClosingTagForLastItem()
{
read_char();
SkipWhite();
if( lastc == '/' )
{
read_char();
SkipWhite();
ReadItemName(tmp_name);
if( IsNameEqual(tmp_name, LastItem().name) )
{
SkipWhite();
if( IsClosingTagMark(lastc) )
{
read_char();
return true;
}
}
}
return false;
}
// used for such tags as: script, pre, textarea
void HTMLFilter::PutEverythingUntilClosingTag(bool put_closing_tag_as_well)
{
bool was_closing_tag = false;
tmp_text.clear();
while( lastc != -1 )
{
if( IsOpeningTagMark(lastc) )
{
if( IsClosingTagForLastItem() )
{
was_closing_tag = true;
PopStack();
//CheckNewLine();
break;
}
}
else
{
tmp_text += lastc;
read_char();
}
}
Put(tmp_text);
if( was_closing_tag && put_closing_tag_as_well )
{
Put('<');
Put('/');
Put(tmp_name);
Put('>');
}
}
void HTMLFilter::SkipAndCheckClosingTag(std::wstring * remember_text)
{
bool is_quoted = false;
@ -870,7 +811,7 @@ void HTMLFilter::PutNormalWhite(bool & was_white_char, bool & was_new_line)
else
was_white_char = true;
if( white_mode == WHITE_MODE_ORIGIN )
if( current_white_char_mode() == WHITE_MODE_ORIGIN )
{
Put(lastc);
}
@ -878,7 +819,7 @@ void HTMLFilter::PutNormalWhite(bool & was_white_char, bool & was_new_line)
read_char();
}
if( white_mode == WHITE_MODE_SINGLE_LINE && (was_white_char || was_new_line) )
if( current_white_char_mode() == WHITE_MODE_SINGLE_LINE && (was_white_char || was_new_line) )
{
Put(' ');
}
@ -950,7 +891,7 @@ bool HTMLFilter::PutOpeningTag()
return false;
}
if( white_mode == WHITE_MODE_TREE && LastItem().new_line_before )
if( current_white_char_mode() == WHITE_MODE_TREE && LastItem().new_line_before )
{
Put(10);
PutTabs(LastItem().tree_index);
@ -1089,7 +1030,7 @@ void HTMLFilter::ReadText()
bool allow_put_new_line = false;
bool allow_put_space = false;
if( white_mode == WHITE_MODE_TREE )
if( current_white_char_mode() == WHITE_MODE_TREE )
{
if( LastItem().new_line || (wrap_line != 0 && LastItem().has_body_tag && line_len >= wrap_line) )
{
@ -1124,7 +1065,7 @@ void HTMLFilter::ReadText()
PutNormalWhite(was_white_char, was_new_line);
if( (was_white_char || was_new_line) && white_mode == WHITE_MODE_TREE )
if( (was_white_char || was_new_line) && current_white_char_mode() == WHITE_MODE_TREE )
{
allow_put_new_line = false;
allow_put_space = false;
@ -1270,7 +1211,7 @@ void HTMLFilter::ReadItemSpecial()
if( !skip_tags )
{
if( white_mode == WHITE_MODE_TREE && LastItem().new_line_before )
if( current_white_char_mode() == WHITE_MODE_TREE && LastItem().new_line_before )
{
Put(10);
PutTabs(LastItem().tree_index);
@ -1303,7 +1244,7 @@ void HTMLFilter::ReadItemSpecial()
Put(tmp_text);
Put('>');
if( is_first_item && white_mode == WHITE_MODE_TREE && is_equal_nc(LastItem().name.c_str(), L"!doctype") )
if( is_first_item && current_white_char_mode() == WHITE_MODE_TREE && is_equal_nc(LastItem().name.c_str(), L"!doctype") )
{
Put(10);
Put(10);
@ -1383,6 +1324,7 @@ bool HTMLFilter::ReadItem()
ReadItemOpening();
}
// IMPROVE ME later CheckSingleItemExceptions() can change opening to single type
ItemFound();
return true;
@ -1491,7 +1433,7 @@ bool HTMLFilter::IsLastTag(const std::wstring & name)
// checking exceptions for opening tags
void HTMLFilter::CheckExceptions()
void HTMLFilter::CheckSingleItemExceptions()
{
if( IsLastTag(L"meta") ||
IsLastTag(L"input") ||
@ -1508,21 +1450,47 @@ void HTMLFilter::CheckExceptions()
return;
}
// in safe_mode the script tag is ignored
if( !safe_mode && IsLastTag(L"script") )
PutEverythingUntilClosingTag(!skip_tags);
if( IsLastTag(L"pre") || IsLastTag(L"textarea") )
PutEverythingUntilClosingTag(!skip_tags);
if( IsLastTag(no_filter_tag) )
PutEverythingUntilClosingTag(false);
// move me to a better place
if( IsLastTag(L"body") )
LastItem().has_body_tag = true;
}
void HTMLFilter::CheckWhiteCharsExceptions(Item & item)
{
bool change_white_mode = false;
// in safe_mode the script tag is ignored
if( !safe_mode && IsNameEqual(item.name, L"script") )
{
change_white_mode = true;
}
if( IsNameEqual(item.name, L"pre") || IsNameEqual(item.name, L"textarea") )
{
change_white_mode = true;
}
if( IsNameEqual(item.name, no_filter_tag) )
{
change_white_mode = true;
}
if( change_white_mode )
{
if( item.type == Item::opening )
{
white_char_mode_tab.push_back(WHITE_MODE_ORIGIN);
}
else
{
if( !white_char_mode_tab.empty() )
white_char_mode_tab.pop_back();
}
}
}
void HTMLFilter::AddForgottenTags()
@ -1549,9 +1517,11 @@ int i;
for(int z=(int)stack_len-2 ; z>=i ; --z)
{
CheckWhiteCharsExceptions(pstack[z]);
if( !skip_tags && pstack[z].new_line )
{
if( white_mode == WHITE_MODE_TREE )
if( current_white_char_mode() == WHITE_MODE_TREE )
{
Put(10);
PutTabs(pstack[z].tree_index);
@ -1575,7 +1545,7 @@ void HTMLFilter::CheckStackPrintRest()
{
if( stack_len==0 || pstack[stack_len-1].new_line )
{
if( white_mode == WHITE_MODE_TREE )
if( current_white_char_mode() == WHITE_MODE_TREE )
{
Put(10);
PutTabs(pstack[stack_len-1].tree_index);
@ -1610,10 +1580,12 @@ void HTMLFilter::CheckClosingTags()
// there are more than one tag
if( (pstack[stack_len-1].is_commentary && pstack[stack_len-2].is_commentary) || IsNameEqual(pstack[stack_len-1].name, pstack[stack_len-2].name) )
{
CheckWhiteCharsExceptions(pstack[stack_len-1]);
// last closing tag is from the previous one
if( !skip_tags && pstack[stack_len-2].new_line )
{
if( white_mode == WHITE_MODE_TREE )
if( current_white_char_mode() == WHITE_MODE_TREE )
{
Put(10);
PutTabs(pstack[stack_len-2].tree_index);
@ -1668,7 +1640,8 @@ void HTMLFilter::ReadLoop()
{
if( LastItem().type == Item::opening )
{
CheckExceptions();
CheckSingleItemExceptions();
CheckWhiteCharsExceptions(LastItem());
}
else
if( LastItem().type == Item::special )
@ -1703,7 +1676,10 @@ void HTMLFilter::Read()
read_char(); // put first character to lastc
is_first_item = true;
if( white_mode != WHITE_MODE_ORIGIN )
white_char_mode_tab.clear();
white_char_mode_tab.push_back(white_mode);
if( current_white_char_mode() != WHITE_MODE_ORIGIN )
SkipWhiteLines();
// it can be some text or white lines before the first html tag (we print it)

View File

@ -251,7 +251,9 @@ protected:
virtual bool IsValidCharForName(int c);
virtual bool IsValidCharForAttrName(int c);
virtual bool IsValidCharForEntityName(int c);
virtual void CheckExceptions();
virtual void CheckSingleItemExceptions();
virtual void CheckWhiteCharsExceptions(Item & item);
virtual void Put(wchar_t c);
virtual void Put(const wchar_t * str, const wchar_t * end);
@ -300,7 +302,9 @@ protected:
void SkipWhite();
void SkipWhiteLines();
void SkipWhiteWithFirstNewLine();
bool IsClosingTagForLastItem();
int current_white_char_mode();
void SkipAndCheckClosingTag(std::wstring * remember_text = nullptr);
void PopStack();
@ -331,7 +335,7 @@ protected:
void PutNormalNonWhite(std::wstring & str, bool allow_put_new_line, bool allow_put_space);
void PutNormalWhite(bool & was_white_char, bool & was_new_line);
void PutEverythingUntilClosingTag(bool put_closing_tag_as_well);
void PutTabs(size_t len);
void PutNonBreakingSpace();
void CalcOrphansMaxLen(Orphans & orphans);
@ -341,6 +345,9 @@ protected:
size_t stack_len; // length of the stack
wchar_t * buffer; // buffer used when printing
std::wstring * out_string;
std::vector<int> white_char_mode_tab;
//bool last_new_line;
bool new_item_has_new_line_before;
int white_mode;