HTMLFilter: added a std::vector<int> stack for a current white mode - white chars mode can be changed by such tags: <textarea>, <pre>, <script>, <nofilter>
This commit is contained in:
parent
c0e940c500
commit
f6df8bc1bc
|
@ -463,14 +463,15 @@ bool first_tag_removed = false;
|
|||
{
|
||||
if( IsOpeningTagMark(lastc) )
|
||||
{
|
||||
if( IsClosingTagForLastItem() )
|
||||
{
|
||||
// the last tag is skipped when using patterns with %2 or %u2
|
||||
|
||||
PopStack(); // removing opening tag from the stack
|
||||
first_tag_removed = true;
|
||||
break;
|
||||
}
|
||||
// FIXME
|
||||
// if( IsClosingTagForLastItem() )
|
||||
// {
|
||||
// // the last tag is skipped when using patterns with %2 or %u2
|
||||
//
|
||||
// PopStack(); // removing opening tag from the stack
|
||||
// first_tag_removed = true;
|
||||
// break;
|
||||
// }
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
|
@ -41,7 +41,9 @@
|
|||
|
||||
namespace pt
|
||||
{
|
||||
|
||||
const int HTMLFilter::WHITE_MODE_ORIGIN;
|
||||
const int HTMLFilter::WHITE_MODE_SINGLE_LINE;
|
||||
const int HTMLFilter::WHITE_MODE_TREE;
|
||||
|
||||
|
||||
|
||||
|
@ -197,6 +199,15 @@ void HTMLFilter::InsertTabs(size_t tabsize)
|
|||
}
|
||||
|
||||
|
||||
int HTMLFilter::current_white_char_mode()
|
||||
{
|
||||
if( !white_char_mode_tab.empty() )
|
||||
return white_char_mode_tab.back();
|
||||
|
||||
return WHITE_MODE_ORIGIN;
|
||||
}
|
||||
|
||||
|
||||
void HTMLFilter::CalcOrphansMaxLen(Orphans & orphans)
|
||||
{
|
||||
size_t i;
|
||||
|
@ -396,76 +407,6 @@ void HTMLFilter::SkipWhiteWithFirstNewLine()
|
|||
|
||||
|
||||
|
||||
bool HTMLFilter::IsClosingTagForLastItem()
|
||||
{
|
||||
read_char();
|
||||
SkipWhite();
|
||||
|
||||
if( lastc == '/' )
|
||||
{
|
||||
read_char();
|
||||
SkipWhite();
|
||||
|
||||
ReadItemName(tmp_name);
|
||||
|
||||
if( IsNameEqual(tmp_name, LastItem().name) )
|
||||
{
|
||||
SkipWhite();
|
||||
|
||||
if( IsClosingTagMark(lastc) )
|
||||
{
|
||||
read_char();
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
// used for such tags as: script, pre, textarea
|
||||
void HTMLFilter::PutEverythingUntilClosingTag(bool put_closing_tag_as_well)
|
||||
{
|
||||
bool was_closing_tag = false;
|
||||
tmp_text.clear();
|
||||
|
||||
while( lastc != -1 )
|
||||
{
|
||||
if( IsOpeningTagMark(lastc) )
|
||||
{
|
||||
if( IsClosingTagForLastItem() )
|
||||
{
|
||||
was_closing_tag = true;
|
||||
|
||||
PopStack();
|
||||
//CheckNewLine();
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
tmp_text += lastc;
|
||||
read_char();
|
||||
}
|
||||
}
|
||||
|
||||
Put(tmp_text);
|
||||
|
||||
if( was_closing_tag && put_closing_tag_as_well )
|
||||
{
|
||||
Put('<');
|
||||
Put('/');
|
||||
Put(tmp_name);
|
||||
Put('>');
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
void HTMLFilter::SkipAndCheckClosingTag(std::wstring * remember_text)
|
||||
{
|
||||
bool is_quoted = false;
|
||||
|
@ -870,7 +811,7 @@ void HTMLFilter::PutNormalWhite(bool & was_white_char, bool & was_new_line)
|
|||
else
|
||||
was_white_char = true;
|
||||
|
||||
if( white_mode == WHITE_MODE_ORIGIN )
|
||||
if( current_white_char_mode() == WHITE_MODE_ORIGIN )
|
||||
{
|
||||
Put(lastc);
|
||||
}
|
||||
|
@ -878,7 +819,7 @@ void HTMLFilter::PutNormalWhite(bool & was_white_char, bool & was_new_line)
|
|||
read_char();
|
||||
}
|
||||
|
||||
if( white_mode == WHITE_MODE_SINGLE_LINE && (was_white_char || was_new_line) )
|
||||
if( current_white_char_mode() == WHITE_MODE_SINGLE_LINE && (was_white_char || was_new_line) )
|
||||
{
|
||||
Put(' ');
|
||||
}
|
||||
|
@ -950,7 +891,7 @@ bool HTMLFilter::PutOpeningTag()
|
|||
return false;
|
||||
}
|
||||
|
||||
if( white_mode == WHITE_MODE_TREE && LastItem().new_line_before )
|
||||
if( current_white_char_mode() == WHITE_MODE_TREE && LastItem().new_line_before )
|
||||
{
|
||||
Put(10);
|
||||
PutTabs(LastItem().tree_index);
|
||||
|
@ -1089,7 +1030,7 @@ void HTMLFilter::ReadText()
|
|||
bool allow_put_new_line = false;
|
||||
bool allow_put_space = false;
|
||||
|
||||
if( white_mode == WHITE_MODE_TREE )
|
||||
if( current_white_char_mode() == WHITE_MODE_TREE )
|
||||
{
|
||||
if( LastItem().new_line || (wrap_line != 0 && LastItem().has_body_tag && line_len >= wrap_line) )
|
||||
{
|
||||
|
@ -1124,7 +1065,7 @@ void HTMLFilter::ReadText()
|
|||
|
||||
PutNormalWhite(was_white_char, was_new_line);
|
||||
|
||||
if( (was_white_char || was_new_line) && white_mode == WHITE_MODE_TREE )
|
||||
if( (was_white_char || was_new_line) && current_white_char_mode() == WHITE_MODE_TREE )
|
||||
{
|
||||
allow_put_new_line = false;
|
||||
allow_put_space = false;
|
||||
|
@ -1270,7 +1211,7 @@ void HTMLFilter::ReadItemSpecial()
|
|||
|
||||
if( !skip_tags )
|
||||
{
|
||||
if( white_mode == WHITE_MODE_TREE && LastItem().new_line_before )
|
||||
if( current_white_char_mode() == WHITE_MODE_TREE && LastItem().new_line_before )
|
||||
{
|
||||
Put(10);
|
||||
PutTabs(LastItem().tree_index);
|
||||
|
@ -1303,7 +1244,7 @@ void HTMLFilter::ReadItemSpecial()
|
|||
Put(tmp_text);
|
||||
Put('>');
|
||||
|
||||
if( is_first_item && white_mode == WHITE_MODE_TREE && is_equal_nc(LastItem().name.c_str(), L"!doctype") )
|
||||
if( is_first_item && current_white_char_mode() == WHITE_MODE_TREE && is_equal_nc(LastItem().name.c_str(), L"!doctype") )
|
||||
{
|
||||
Put(10);
|
||||
Put(10);
|
||||
|
@ -1383,6 +1324,7 @@ bool HTMLFilter::ReadItem()
|
|||
ReadItemOpening();
|
||||
}
|
||||
|
||||
// IMPROVE ME later CheckSingleItemExceptions() can change opening to single type
|
||||
ItemFound();
|
||||
|
||||
return true;
|
||||
|
@ -1491,7 +1433,7 @@ bool HTMLFilter::IsLastTag(const std::wstring & name)
|
|||
|
||||
|
||||
// checking exceptions for opening tags
|
||||
void HTMLFilter::CheckExceptions()
|
||||
void HTMLFilter::CheckSingleItemExceptions()
|
||||
{
|
||||
if( IsLastTag(L"meta") ||
|
||||
IsLastTag(L"input") ||
|
||||
|
@ -1508,21 +1450,47 @@ void HTMLFilter::CheckExceptions()
|
|||
return;
|
||||
}
|
||||
|
||||
// in safe_mode the script tag is ignored
|
||||
if( !safe_mode && IsLastTag(L"script") )
|
||||
PutEverythingUntilClosingTag(!skip_tags);
|
||||
|
||||
if( IsLastTag(L"pre") || IsLastTag(L"textarea") )
|
||||
PutEverythingUntilClosingTag(!skip_tags);
|
||||
|
||||
if( IsLastTag(no_filter_tag) )
|
||||
PutEverythingUntilClosingTag(false);
|
||||
|
||||
// move me to a better place
|
||||
if( IsLastTag(L"body") )
|
||||
LastItem().has_body_tag = true;
|
||||
}
|
||||
|
||||
|
||||
void HTMLFilter::CheckWhiteCharsExceptions(Item & item)
|
||||
{
|
||||
bool change_white_mode = false;
|
||||
|
||||
// in safe_mode the script tag is ignored
|
||||
if( !safe_mode && IsNameEqual(item.name, L"script") )
|
||||
{
|
||||
change_white_mode = true;
|
||||
}
|
||||
|
||||
if( IsNameEqual(item.name, L"pre") || IsNameEqual(item.name, L"textarea") )
|
||||
{
|
||||
change_white_mode = true;
|
||||
}
|
||||
|
||||
if( IsNameEqual(item.name, no_filter_tag) )
|
||||
{
|
||||
change_white_mode = true;
|
||||
}
|
||||
|
||||
if( change_white_mode )
|
||||
{
|
||||
if( item.type == Item::opening )
|
||||
{
|
||||
white_char_mode_tab.push_back(WHITE_MODE_ORIGIN);
|
||||
}
|
||||
else
|
||||
{
|
||||
if( !white_char_mode_tab.empty() )
|
||||
white_char_mode_tab.pop_back();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
void HTMLFilter::AddForgottenTags()
|
||||
|
@ -1549,9 +1517,11 @@ int i;
|
|||
|
||||
for(int z=(int)stack_len-2 ; z>=i ; --z)
|
||||
{
|
||||
CheckWhiteCharsExceptions(pstack[z]);
|
||||
|
||||
if( !skip_tags && pstack[z].new_line )
|
||||
{
|
||||
if( white_mode == WHITE_MODE_TREE )
|
||||
if( current_white_char_mode() == WHITE_MODE_TREE )
|
||||
{
|
||||
Put(10);
|
||||
PutTabs(pstack[z].tree_index);
|
||||
|
@ -1575,7 +1545,7 @@ void HTMLFilter::CheckStackPrintRest()
|
|||
{
|
||||
if( stack_len==0 || pstack[stack_len-1].new_line )
|
||||
{
|
||||
if( white_mode == WHITE_MODE_TREE )
|
||||
if( current_white_char_mode() == WHITE_MODE_TREE )
|
||||
{
|
||||
Put(10);
|
||||
PutTabs(pstack[stack_len-1].tree_index);
|
||||
|
@ -1610,10 +1580,12 @@ void HTMLFilter::CheckClosingTags()
|
|||
// there are more than one tag
|
||||
if( (pstack[stack_len-1].is_commentary && pstack[stack_len-2].is_commentary) || IsNameEqual(pstack[stack_len-1].name, pstack[stack_len-2].name) )
|
||||
{
|
||||
CheckWhiteCharsExceptions(pstack[stack_len-1]);
|
||||
|
||||
// last closing tag is from the previous one
|
||||
if( !skip_tags && pstack[stack_len-2].new_line )
|
||||
{
|
||||
if( white_mode == WHITE_MODE_TREE )
|
||||
if( current_white_char_mode() == WHITE_MODE_TREE )
|
||||
{
|
||||
Put(10);
|
||||
PutTabs(pstack[stack_len-2].tree_index);
|
||||
|
@ -1668,7 +1640,8 @@ void HTMLFilter::ReadLoop()
|
|||
{
|
||||
if( LastItem().type == Item::opening )
|
||||
{
|
||||
CheckExceptions();
|
||||
CheckSingleItemExceptions();
|
||||
CheckWhiteCharsExceptions(LastItem());
|
||||
}
|
||||
else
|
||||
if( LastItem().type == Item::special )
|
||||
|
@ -1703,7 +1676,10 @@ void HTMLFilter::Read()
|
|||
read_char(); // put first character to lastc
|
||||
is_first_item = true;
|
||||
|
||||
if( white_mode != WHITE_MODE_ORIGIN )
|
||||
white_char_mode_tab.clear();
|
||||
white_char_mode_tab.push_back(white_mode);
|
||||
|
||||
if( current_white_char_mode() != WHITE_MODE_ORIGIN )
|
||||
SkipWhiteLines();
|
||||
|
||||
// it can be some text or white lines before the first html tag (we print it)
|
||||
|
|
|
@ -251,7 +251,9 @@ protected:
|
|||
virtual bool IsValidCharForName(int c);
|
||||
virtual bool IsValidCharForAttrName(int c);
|
||||
virtual bool IsValidCharForEntityName(int c);
|
||||
virtual void CheckExceptions();
|
||||
|
||||
virtual void CheckSingleItemExceptions();
|
||||
virtual void CheckWhiteCharsExceptions(Item & item);
|
||||
|
||||
virtual void Put(wchar_t c);
|
||||
virtual void Put(const wchar_t * str, const wchar_t * end);
|
||||
|
@ -300,7 +302,9 @@ protected:
|
|||
void SkipWhite();
|
||||
void SkipWhiteLines();
|
||||
void SkipWhiteWithFirstNewLine();
|
||||
bool IsClosingTagForLastItem();
|
||||
|
||||
int current_white_char_mode();
|
||||
|
||||
void SkipAndCheckClosingTag(std::wstring * remember_text = nullptr);
|
||||
|
||||
void PopStack();
|
||||
|
@ -331,7 +335,7 @@ protected:
|
|||
|
||||
void PutNormalNonWhite(std::wstring & str, bool allow_put_new_line, bool allow_put_space);
|
||||
void PutNormalWhite(bool & was_white_char, bool & was_new_line);
|
||||
void PutEverythingUntilClosingTag(bool put_closing_tag_as_well);
|
||||
|
||||
void PutTabs(size_t len);
|
||||
void PutNonBreakingSpace();
|
||||
void CalcOrphansMaxLen(Orphans & orphans);
|
||||
|
@ -341,6 +345,9 @@ protected:
|
|||
size_t stack_len; // length of the stack
|
||||
wchar_t * buffer; // buffer used when printing
|
||||
std::wstring * out_string;
|
||||
|
||||
std::vector<int> white_char_mode_tab;
|
||||
|
||||
//bool last_new_line;
|
||||
bool new_item_has_new_line_before;
|
||||
int white_mode;
|
||||
|
|
Loading…
Reference in New Issue