HTMLFilter: added a std::vector<int> stack for a current white mode - white chars mode can be changed by such tags: <textarea>, <pre>, <script>, <nofilter>
This commit is contained in:
parent
c0e940c500
commit
f6df8bc1bc
|
@ -463,14 +463,15 @@ bool first_tag_removed = false;
|
||||||
{
|
{
|
||||||
if( IsOpeningTagMark(lastc) )
|
if( IsOpeningTagMark(lastc) )
|
||||||
{
|
{
|
||||||
if( IsClosingTagForLastItem() )
|
// FIXME
|
||||||
{
|
// if( IsClosingTagForLastItem() )
|
||||||
// the last tag is skipped when using patterns with %2 or %u2
|
// {
|
||||||
|
// // the last tag is skipped when using patterns with %2 or %u2
|
||||||
PopStack(); // removing opening tag from the stack
|
//
|
||||||
first_tag_removed = true;
|
// PopStack(); // removing opening tag from the stack
|
||||||
break;
|
// first_tag_removed = true;
|
||||||
}
|
// break;
|
||||||
|
// }
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
|
|
@ -41,7 +41,9 @@
|
||||||
|
|
||||||
namespace pt
|
namespace pt
|
||||||
{
|
{
|
||||||
|
const int HTMLFilter::WHITE_MODE_ORIGIN;
|
||||||
|
const int HTMLFilter::WHITE_MODE_SINGLE_LINE;
|
||||||
|
const int HTMLFilter::WHITE_MODE_TREE;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -197,6 +199,15 @@ void HTMLFilter::InsertTabs(size_t tabsize)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int HTMLFilter::current_white_char_mode()
|
||||||
|
{
|
||||||
|
if( !white_char_mode_tab.empty() )
|
||||||
|
return white_char_mode_tab.back();
|
||||||
|
|
||||||
|
return WHITE_MODE_ORIGIN;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void HTMLFilter::CalcOrphansMaxLen(Orphans & orphans)
|
void HTMLFilter::CalcOrphansMaxLen(Orphans & orphans)
|
||||||
{
|
{
|
||||||
size_t i;
|
size_t i;
|
||||||
|
@ -396,76 +407,6 @@ void HTMLFilter::SkipWhiteWithFirstNewLine()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
bool HTMLFilter::IsClosingTagForLastItem()
|
|
||||||
{
|
|
||||||
read_char();
|
|
||||||
SkipWhite();
|
|
||||||
|
|
||||||
if( lastc == '/' )
|
|
||||||
{
|
|
||||||
read_char();
|
|
||||||
SkipWhite();
|
|
||||||
|
|
||||||
ReadItemName(tmp_name);
|
|
||||||
|
|
||||||
if( IsNameEqual(tmp_name, LastItem().name) )
|
|
||||||
{
|
|
||||||
SkipWhite();
|
|
||||||
|
|
||||||
if( IsClosingTagMark(lastc) )
|
|
||||||
{
|
|
||||||
read_char();
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// used for such tags as: script, pre, textarea
|
|
||||||
void HTMLFilter::PutEverythingUntilClosingTag(bool put_closing_tag_as_well)
|
|
||||||
{
|
|
||||||
bool was_closing_tag = false;
|
|
||||||
tmp_text.clear();
|
|
||||||
|
|
||||||
while( lastc != -1 )
|
|
||||||
{
|
|
||||||
if( IsOpeningTagMark(lastc) )
|
|
||||||
{
|
|
||||||
if( IsClosingTagForLastItem() )
|
|
||||||
{
|
|
||||||
was_closing_tag = true;
|
|
||||||
|
|
||||||
PopStack();
|
|
||||||
//CheckNewLine();
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
tmp_text += lastc;
|
|
||||||
read_char();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Put(tmp_text);
|
|
||||||
|
|
||||||
if( was_closing_tag && put_closing_tag_as_well )
|
|
||||||
{
|
|
||||||
Put('<');
|
|
||||||
Put('/');
|
|
||||||
Put(tmp_name);
|
|
||||||
Put('>');
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
void HTMLFilter::SkipAndCheckClosingTag(std::wstring * remember_text)
|
void HTMLFilter::SkipAndCheckClosingTag(std::wstring * remember_text)
|
||||||
{
|
{
|
||||||
bool is_quoted = false;
|
bool is_quoted = false;
|
||||||
|
@ -870,7 +811,7 @@ void HTMLFilter::PutNormalWhite(bool & was_white_char, bool & was_new_line)
|
||||||
else
|
else
|
||||||
was_white_char = true;
|
was_white_char = true;
|
||||||
|
|
||||||
if( white_mode == WHITE_MODE_ORIGIN )
|
if( current_white_char_mode() == WHITE_MODE_ORIGIN )
|
||||||
{
|
{
|
||||||
Put(lastc);
|
Put(lastc);
|
||||||
}
|
}
|
||||||
|
@ -878,7 +819,7 @@ void HTMLFilter::PutNormalWhite(bool & was_white_char, bool & was_new_line)
|
||||||
read_char();
|
read_char();
|
||||||
}
|
}
|
||||||
|
|
||||||
if( white_mode == WHITE_MODE_SINGLE_LINE && (was_white_char || was_new_line) )
|
if( current_white_char_mode() == WHITE_MODE_SINGLE_LINE && (was_white_char || was_new_line) )
|
||||||
{
|
{
|
||||||
Put(' ');
|
Put(' ');
|
||||||
}
|
}
|
||||||
|
@ -950,7 +891,7 @@ bool HTMLFilter::PutOpeningTag()
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if( white_mode == WHITE_MODE_TREE && LastItem().new_line_before )
|
if( current_white_char_mode() == WHITE_MODE_TREE && LastItem().new_line_before )
|
||||||
{
|
{
|
||||||
Put(10);
|
Put(10);
|
||||||
PutTabs(LastItem().tree_index);
|
PutTabs(LastItem().tree_index);
|
||||||
|
@ -1089,7 +1030,7 @@ void HTMLFilter::ReadText()
|
||||||
bool allow_put_new_line = false;
|
bool allow_put_new_line = false;
|
||||||
bool allow_put_space = false;
|
bool allow_put_space = false;
|
||||||
|
|
||||||
if( white_mode == WHITE_MODE_TREE )
|
if( current_white_char_mode() == WHITE_MODE_TREE )
|
||||||
{
|
{
|
||||||
if( LastItem().new_line || (wrap_line != 0 && LastItem().has_body_tag && line_len >= wrap_line) )
|
if( LastItem().new_line || (wrap_line != 0 && LastItem().has_body_tag && line_len >= wrap_line) )
|
||||||
{
|
{
|
||||||
|
@ -1124,7 +1065,7 @@ void HTMLFilter::ReadText()
|
||||||
|
|
||||||
PutNormalWhite(was_white_char, was_new_line);
|
PutNormalWhite(was_white_char, was_new_line);
|
||||||
|
|
||||||
if( (was_white_char || was_new_line) && white_mode == WHITE_MODE_TREE )
|
if( (was_white_char || was_new_line) && current_white_char_mode() == WHITE_MODE_TREE )
|
||||||
{
|
{
|
||||||
allow_put_new_line = false;
|
allow_put_new_line = false;
|
||||||
allow_put_space = false;
|
allow_put_space = false;
|
||||||
|
@ -1270,7 +1211,7 @@ void HTMLFilter::ReadItemSpecial()
|
||||||
|
|
||||||
if( !skip_tags )
|
if( !skip_tags )
|
||||||
{
|
{
|
||||||
if( white_mode == WHITE_MODE_TREE && LastItem().new_line_before )
|
if( current_white_char_mode() == WHITE_MODE_TREE && LastItem().new_line_before )
|
||||||
{
|
{
|
||||||
Put(10);
|
Put(10);
|
||||||
PutTabs(LastItem().tree_index);
|
PutTabs(LastItem().tree_index);
|
||||||
|
@ -1303,7 +1244,7 @@ void HTMLFilter::ReadItemSpecial()
|
||||||
Put(tmp_text);
|
Put(tmp_text);
|
||||||
Put('>');
|
Put('>');
|
||||||
|
|
||||||
if( is_first_item && white_mode == WHITE_MODE_TREE && is_equal_nc(LastItem().name.c_str(), L"!doctype") )
|
if( is_first_item && current_white_char_mode() == WHITE_MODE_TREE && is_equal_nc(LastItem().name.c_str(), L"!doctype") )
|
||||||
{
|
{
|
||||||
Put(10);
|
Put(10);
|
||||||
Put(10);
|
Put(10);
|
||||||
|
@ -1383,6 +1324,7 @@ bool HTMLFilter::ReadItem()
|
||||||
ReadItemOpening();
|
ReadItemOpening();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// IMPROVE ME later CheckSingleItemExceptions() can change opening to single type
|
||||||
ItemFound();
|
ItemFound();
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
@ -1491,7 +1433,7 @@ bool HTMLFilter::IsLastTag(const std::wstring & name)
|
||||||
|
|
||||||
|
|
||||||
// checking exceptions for opening tags
|
// checking exceptions for opening tags
|
||||||
void HTMLFilter::CheckExceptions()
|
void HTMLFilter::CheckSingleItemExceptions()
|
||||||
{
|
{
|
||||||
if( IsLastTag(L"meta") ||
|
if( IsLastTag(L"meta") ||
|
||||||
IsLastTag(L"input") ||
|
IsLastTag(L"input") ||
|
||||||
|
@ -1508,21 +1450,47 @@ void HTMLFilter::CheckExceptions()
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// in safe_mode the script tag is ignored
|
// move me to a better place
|
||||||
if( !safe_mode && IsLastTag(L"script") )
|
|
||||||
PutEverythingUntilClosingTag(!skip_tags);
|
|
||||||
|
|
||||||
if( IsLastTag(L"pre") || IsLastTag(L"textarea") )
|
|
||||||
PutEverythingUntilClosingTag(!skip_tags);
|
|
||||||
|
|
||||||
if( IsLastTag(no_filter_tag) )
|
|
||||||
PutEverythingUntilClosingTag(false);
|
|
||||||
|
|
||||||
if( IsLastTag(L"body") )
|
if( IsLastTag(L"body") )
|
||||||
LastItem().has_body_tag = true;
|
LastItem().has_body_tag = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void HTMLFilter::CheckWhiteCharsExceptions(Item & item)
|
||||||
|
{
|
||||||
|
bool change_white_mode = false;
|
||||||
|
|
||||||
|
// in safe_mode the script tag is ignored
|
||||||
|
if( !safe_mode && IsNameEqual(item.name, L"script") )
|
||||||
|
{
|
||||||
|
change_white_mode = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if( IsNameEqual(item.name, L"pre") || IsNameEqual(item.name, L"textarea") )
|
||||||
|
{
|
||||||
|
change_white_mode = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if( IsNameEqual(item.name, no_filter_tag) )
|
||||||
|
{
|
||||||
|
change_white_mode = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if( change_white_mode )
|
||||||
|
{
|
||||||
|
if( item.type == Item::opening )
|
||||||
|
{
|
||||||
|
white_char_mode_tab.push_back(WHITE_MODE_ORIGIN);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if( !white_char_mode_tab.empty() )
|
||||||
|
white_char_mode_tab.pop_back();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
void HTMLFilter::AddForgottenTags()
|
void HTMLFilter::AddForgottenTags()
|
||||||
|
@ -1549,9 +1517,11 @@ int i;
|
||||||
|
|
||||||
for(int z=(int)stack_len-2 ; z>=i ; --z)
|
for(int z=(int)stack_len-2 ; z>=i ; --z)
|
||||||
{
|
{
|
||||||
|
CheckWhiteCharsExceptions(pstack[z]);
|
||||||
|
|
||||||
if( !skip_tags && pstack[z].new_line )
|
if( !skip_tags && pstack[z].new_line )
|
||||||
{
|
{
|
||||||
if( white_mode == WHITE_MODE_TREE )
|
if( current_white_char_mode() == WHITE_MODE_TREE )
|
||||||
{
|
{
|
||||||
Put(10);
|
Put(10);
|
||||||
PutTabs(pstack[z].tree_index);
|
PutTabs(pstack[z].tree_index);
|
||||||
|
@ -1575,7 +1545,7 @@ void HTMLFilter::CheckStackPrintRest()
|
||||||
{
|
{
|
||||||
if( stack_len==0 || pstack[stack_len-1].new_line )
|
if( stack_len==0 || pstack[stack_len-1].new_line )
|
||||||
{
|
{
|
||||||
if( white_mode == WHITE_MODE_TREE )
|
if( current_white_char_mode() == WHITE_MODE_TREE )
|
||||||
{
|
{
|
||||||
Put(10);
|
Put(10);
|
||||||
PutTabs(pstack[stack_len-1].tree_index);
|
PutTabs(pstack[stack_len-1].tree_index);
|
||||||
|
@ -1610,10 +1580,12 @@ void HTMLFilter::CheckClosingTags()
|
||||||
// there are more than one tag
|
// there are more than one tag
|
||||||
if( (pstack[stack_len-1].is_commentary && pstack[stack_len-2].is_commentary) || IsNameEqual(pstack[stack_len-1].name, pstack[stack_len-2].name) )
|
if( (pstack[stack_len-1].is_commentary && pstack[stack_len-2].is_commentary) || IsNameEqual(pstack[stack_len-1].name, pstack[stack_len-2].name) )
|
||||||
{
|
{
|
||||||
|
CheckWhiteCharsExceptions(pstack[stack_len-1]);
|
||||||
|
|
||||||
// last closing tag is from the previous one
|
// last closing tag is from the previous one
|
||||||
if( !skip_tags && pstack[stack_len-2].new_line )
|
if( !skip_tags && pstack[stack_len-2].new_line )
|
||||||
{
|
{
|
||||||
if( white_mode == WHITE_MODE_TREE )
|
if( current_white_char_mode() == WHITE_MODE_TREE )
|
||||||
{
|
{
|
||||||
Put(10);
|
Put(10);
|
||||||
PutTabs(pstack[stack_len-2].tree_index);
|
PutTabs(pstack[stack_len-2].tree_index);
|
||||||
|
@ -1668,7 +1640,8 @@ void HTMLFilter::ReadLoop()
|
||||||
{
|
{
|
||||||
if( LastItem().type == Item::opening )
|
if( LastItem().type == Item::opening )
|
||||||
{
|
{
|
||||||
CheckExceptions();
|
CheckSingleItemExceptions();
|
||||||
|
CheckWhiteCharsExceptions(LastItem());
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
if( LastItem().type == Item::special )
|
if( LastItem().type == Item::special )
|
||||||
|
@ -1703,7 +1676,10 @@ void HTMLFilter::Read()
|
||||||
read_char(); // put first character to lastc
|
read_char(); // put first character to lastc
|
||||||
is_first_item = true;
|
is_first_item = true;
|
||||||
|
|
||||||
if( white_mode != WHITE_MODE_ORIGIN )
|
white_char_mode_tab.clear();
|
||||||
|
white_char_mode_tab.push_back(white_mode);
|
||||||
|
|
||||||
|
if( current_white_char_mode() != WHITE_MODE_ORIGIN )
|
||||||
SkipWhiteLines();
|
SkipWhiteLines();
|
||||||
|
|
||||||
// it can be some text or white lines before the first html tag (we print it)
|
// it can be some text or white lines before the first html tag (we print it)
|
||||||
|
|
|
@ -251,7 +251,9 @@ protected:
|
||||||
virtual bool IsValidCharForName(int c);
|
virtual bool IsValidCharForName(int c);
|
||||||
virtual bool IsValidCharForAttrName(int c);
|
virtual bool IsValidCharForAttrName(int c);
|
||||||
virtual bool IsValidCharForEntityName(int c);
|
virtual bool IsValidCharForEntityName(int c);
|
||||||
virtual void CheckExceptions();
|
|
||||||
|
virtual void CheckSingleItemExceptions();
|
||||||
|
virtual void CheckWhiteCharsExceptions(Item & item);
|
||||||
|
|
||||||
virtual void Put(wchar_t c);
|
virtual void Put(wchar_t c);
|
||||||
virtual void Put(const wchar_t * str, const wchar_t * end);
|
virtual void Put(const wchar_t * str, const wchar_t * end);
|
||||||
|
@ -300,7 +302,9 @@ protected:
|
||||||
void SkipWhite();
|
void SkipWhite();
|
||||||
void SkipWhiteLines();
|
void SkipWhiteLines();
|
||||||
void SkipWhiteWithFirstNewLine();
|
void SkipWhiteWithFirstNewLine();
|
||||||
bool IsClosingTagForLastItem();
|
|
||||||
|
int current_white_char_mode();
|
||||||
|
|
||||||
void SkipAndCheckClosingTag(std::wstring * remember_text = nullptr);
|
void SkipAndCheckClosingTag(std::wstring * remember_text = nullptr);
|
||||||
|
|
||||||
void PopStack();
|
void PopStack();
|
||||||
|
@ -331,7 +335,7 @@ protected:
|
||||||
|
|
||||||
void PutNormalNonWhite(std::wstring & str, bool allow_put_new_line, bool allow_put_space);
|
void PutNormalNonWhite(std::wstring & str, bool allow_put_new_line, bool allow_put_space);
|
||||||
void PutNormalWhite(bool & was_white_char, bool & was_new_line);
|
void PutNormalWhite(bool & was_white_char, bool & was_new_line);
|
||||||
void PutEverythingUntilClosingTag(bool put_closing_tag_as_well);
|
|
||||||
void PutTabs(size_t len);
|
void PutTabs(size_t len);
|
||||||
void PutNonBreakingSpace();
|
void PutNonBreakingSpace();
|
||||||
void CalcOrphansMaxLen(Orphans & orphans);
|
void CalcOrphansMaxLen(Orphans & orphans);
|
||||||
|
@ -341,6 +345,9 @@ protected:
|
||||||
size_t stack_len; // length of the stack
|
size_t stack_len; // length of the stack
|
||||||
wchar_t * buffer; // buffer used when printing
|
wchar_t * buffer; // buffer used when printing
|
||||||
std::wstring * out_string;
|
std::wstring * out_string;
|
||||||
|
|
||||||
|
std::vector<int> white_char_mode_tab;
|
||||||
|
|
||||||
//bool last_new_line;
|
//bool last_new_line;
|
||||||
bool new_item_has_new_line_before;
|
bool new_item_has_new_line_before;
|
||||||
int white_mode;
|
int white_mode;
|
||||||
|
|
Loading…
Reference in New Issue