fixed improper new line character after <single/> items, added Item::new_line_before flag
parent
4f8ae6ce29
commit
c0e940c500
|
@ -36,7 +36,7 @@
|
|||
*/
|
||||
|
||||
#include "htmlfilter.h"
|
||||
|
||||
#include "convert/text.h"
|
||||
|
||||
|
||||
namespace pt
|
||||
|
@ -48,13 +48,14 @@ namespace pt
|
|||
void HTMLFilter::Item::Clear()
|
||||
{
|
||||
name.clear();
|
||||
type = none;
|
||||
is_commentary = false;
|
||||
porphans = nullptr;
|
||||
new_line = false;
|
||||
type = none;
|
||||
is_commentary = false;
|
||||
porphans = nullptr;
|
||||
new_line_before = false;
|
||||
new_line = false;
|
||||
new_line_in_the_middle = false;
|
||||
has_body_tag = false;
|
||||
tree_index = 0;
|
||||
has_body_tag = false;
|
||||
tree_index = 0;
|
||||
}
|
||||
|
||||
|
||||
|
@ -74,7 +75,7 @@ void HTMLFilter::Filter(const wchar_t * in, std::wstring & out)
|
|||
|
||||
stack_len = 0;
|
||||
out_string = &out;
|
||||
last_new_line = false;
|
||||
//last_new_line = false;
|
||||
was_ending_commentary = false;
|
||||
line_len = 0;
|
||||
out_string->clear();
|
||||
|
@ -382,15 +383,15 @@ void HTMLFilter::SkipWhiteWithFirstNewLine()
|
|||
}
|
||||
|
||||
|
||||
void HTMLFilter::CheckNewLine()
|
||||
{
|
||||
if( white_mode == WHITE_MODE_TREE )
|
||||
{
|
||||
SkipWhite();
|
||||
}
|
||||
|
||||
last_new_line = (lastc==10);
|
||||
}
|
||||
//void HTMLFilter::CheckNewLine()
|
||||
//{
|
||||
// if( white_mode == WHITE_MODE_TREE )
|
||||
// {
|
||||
// SkipWhite();
|
||||
// }
|
||||
//
|
||||
// last_new_line = (lastc==10);
|
||||
//}
|
||||
|
||||
|
||||
|
||||
|
@ -440,7 +441,7 @@ void HTMLFilter::PutEverythingUntilClosingTag(bool put_closing_tag_as_well)
|
|||
was_closing_tag = true;
|
||||
|
||||
PopStack();
|
||||
CheckNewLine();
|
||||
//CheckNewLine();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -857,17 +858,17 @@ void HTMLFilter::PutNormalNonWhite(std::wstring & str, bool allow_put_new_line,
|
|||
}
|
||||
|
||||
|
||||
bool HTMLFilter::PutNormalWhite()
|
||||
void HTMLFilter::PutNormalWhite(bool & was_white_char, bool & was_new_line)
|
||||
{
|
||||
bool was_white_char = false;
|
||||
bool was_new_line = false;
|
||||
was_white_char = false;
|
||||
was_new_line = false;
|
||||
|
||||
while( lastc == 10 || IsWhite(lastc) )
|
||||
{
|
||||
was_white_char = true; // anyone white char even new line
|
||||
|
||||
if( lastc == 10 )
|
||||
was_new_line = true;
|
||||
else
|
||||
was_white_char = true;
|
||||
|
||||
if( white_mode == WHITE_MODE_ORIGIN )
|
||||
{
|
||||
|
@ -877,18 +878,12 @@ bool HTMLFilter::PutNormalWhite()
|
|||
read_char();
|
||||
}
|
||||
|
||||
if( white_mode == WHITE_MODE_SINGLE_LINE && was_white_char )
|
||||
if( white_mode == WHITE_MODE_SINGLE_LINE && (was_white_char || was_new_line) )
|
||||
{
|
||||
Put(' ');
|
||||
}
|
||||
|
||||
if( white_mode == WHITE_MODE_TREE && was_new_line )
|
||||
{
|
||||
// in WHITE_MODE_TREE white characters are written at the beginning of a <tag> or text
|
||||
}
|
||||
|
||||
last_new_line = was_new_line;
|
||||
return was_white_char;
|
||||
// in WHITE_MODE_TREE white characters are written at the beginning of a <tag> or text
|
||||
}
|
||||
|
||||
|
||||
|
@ -955,7 +950,7 @@ bool HTMLFilter::PutOpeningTag()
|
|||
return false;
|
||||
}
|
||||
|
||||
if( white_mode == WHITE_MODE_TREE && last_new_line )
|
||||
if( white_mode == WHITE_MODE_TREE && LastItem().new_line_before )
|
||||
{
|
||||
Put(10);
|
||||
PutTabs(LastItem().tree_index);
|
||||
|
@ -991,7 +986,7 @@ void HTMLFilter::PutTabs(size_t len)
|
|||
if( len > 30 )
|
||||
len = 30;
|
||||
|
||||
for(int i=0 ; i < (len*tab_size) ; ++i)
|
||||
for(size_t i=0 ; i < (len*tab_size) ; ++i)
|
||||
(*out_string) += ' '; // we do not add them to 'line_len'
|
||||
}
|
||||
|
||||
|
@ -1010,15 +1005,6 @@ void HTMLFilter::PutNonBreakingSpace()
|
|||
|
||||
|
||||
|
||||
|
||||
//void HTMLFilter::PutNewLine()
|
||||
//{
|
||||
// buffer[0] = 10; // CHECKME for what purpose is this buffer?
|
||||
// Put(10);
|
||||
// line_len = 0;
|
||||
//}
|
||||
|
||||
|
||||
// we assume the size of the opening mark to be one
|
||||
bool HTMLFilter::IsOpeningTagMark(wchar_t c)
|
||||
{
|
||||
|
@ -1063,22 +1049,6 @@ bool HTMLFilter::IsClosingXmlSimpleTagMark(wchar_t c)
|
|||
}
|
||||
|
||||
|
||||
//bool HTMLFilter::IsOpeningCommentaryTagMark(const wchar_t * str)
|
||||
//{
|
||||
//static wchar_t comm_open[] = L"<!--";
|
||||
//size_t comm_open_len = sizeof(comm_open) / sizeof(wchar_t) - 1;
|
||||
//
|
||||
// //return IsNameEqual(pchar, comm_open, comm_open_len);
|
||||
// return false;
|
||||
//}
|
||||
//
|
||||
//
|
||||
//size_t HTMLFilter::OpeningCommentaryTagMarkSize()
|
||||
//{
|
||||
// return 4; // size of "<!--"
|
||||
//}
|
||||
|
||||
|
||||
bool HTMLFilter::IsEndingCommentaryTagMarkAtEndOfString(const std::wstring & str)
|
||||
{
|
||||
static wchar_t comm_end[] = L"-->";
|
||||
|
@ -1106,35 +1076,12 @@ bool HTMLFilter::IsEndingEntityMark(wchar_t c)
|
|||
|
||||
|
||||
|
||||
// skipping the commentary tag if exists
|
||||
bool HTMLFilter::SkipCommentaryTagIfExists()
|
||||
{
|
||||
wchar_t comm_close[] = L"-->";
|
||||
size_t comm_close_len = sizeof(comm_close) / sizeof(wchar_t) - 1;
|
||||
/*
|
||||
if( !IsOpeningCommentaryTagMark(pchar) )
|
||||
return false;
|
||||
|
||||
pchar += OpeningCommentaryTagMarkSize();
|
||||
|
||||
// looking for "-->"
|
||||
while( *pchar!=0 && !IsNameEqual(pchar, comm_close, comm_close_len) )
|
||||
++pchar;
|
||||
|
||||
if( *pchar!= 0 )
|
||||
pchar += comm_close_len;
|
||||
|
||||
CheckNewLine();
|
||||
*/
|
||||
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
// reading text between html tags
|
||||
void HTMLFilter::ReadNormalText()
|
||||
void HTMLFilter::ReadText()
|
||||
{
|
||||
bool was_white_char = false;
|
||||
bool was_new_line = false;
|
||||
|
||||
bool was_non_white_text = false;
|
||||
|
||||
was_ending_commentary = false;
|
||||
|
@ -1175,13 +1122,16 @@ void HTMLFilter::ReadNormalText()
|
|||
if( was_ending_commentary )
|
||||
break;
|
||||
|
||||
if( PutNormalWhite() && white_mode == WHITE_MODE_TREE )
|
||||
PutNormalWhite(was_white_char, was_new_line);
|
||||
|
||||
if( (was_white_char || was_new_line) && white_mode == WHITE_MODE_TREE )
|
||||
{
|
||||
if( last_new_line )
|
||||
allow_put_new_line = false;
|
||||
allow_put_space = false;
|
||||
|
||||
if( was_new_line )
|
||||
{
|
||||
allow_put_new_line = true;
|
||||
allow_put_space = false;
|
||||
|
||||
LastItem().new_line_in_the_middle = true;
|
||||
|
||||
if( !was_non_white_text )
|
||||
|
@ -1189,7 +1139,6 @@ void HTMLFilter::ReadNormalText()
|
|||
}
|
||||
else
|
||||
{
|
||||
allow_put_new_line = false;
|
||||
allow_put_space = true;
|
||||
}
|
||||
|
||||
|
@ -1200,6 +1149,8 @@ void HTMLFilter::ReadNormalText()
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
new_item_has_new_line_before = was_new_line;
|
||||
}
|
||||
|
||||
|
||||
|
@ -1319,7 +1270,7 @@ void HTMLFilter::ReadItemSpecial()
|
|||
|
||||
if( !skip_tags )
|
||||
{
|
||||
if( white_mode == WHITE_MODE_TREE && last_new_line )
|
||||
if( white_mode == WHITE_MODE_TREE && LastItem().new_line_before )
|
||||
{
|
||||
Put(10);
|
||||
PutTabs(LastItem().tree_index);
|
||||
|
@ -1351,6 +1302,13 @@ void HTMLFilter::ReadItemSpecial()
|
|||
Put(' ');
|
||||
Put(tmp_text);
|
||||
Put('>');
|
||||
|
||||
if( is_first_item && white_mode == WHITE_MODE_TREE && is_equal_nc(LastItem().name.c_str(), L"!doctype") )
|
||||
{
|
||||
Put(10);
|
||||
Put(10);
|
||||
SkipWhiteLines();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1399,6 +1357,8 @@ bool HTMLFilter::ReadItem()
|
|||
if( !PushStack() )
|
||||
return false;
|
||||
|
||||
LastItem().new_line_before = new_item_has_new_line_before; // new_item_has_new_line_before is set by ReadText() method
|
||||
|
||||
if( stack_len > 1 && pstack[stack_len-2].new_line_in_the_middle )
|
||||
LastItem().tree_index += 1;
|
||||
|
||||
|
@ -1602,7 +1562,7 @@ int i;
|
|||
pstack[z].Clear();
|
||||
}
|
||||
|
||||
last_new_line = pstack[stack_len-1].new_line;
|
||||
//last_new_line = pstack[stack_len-1].new_line;
|
||||
|
||||
// invalidate tags
|
||||
stack_len = i;
|
||||
|
@ -1661,7 +1621,7 @@ void HTMLFilter::CheckClosingTags()
|
|||
}
|
||||
|
||||
PutClosingTag(pstack[stack_len-1]);
|
||||
last_new_line = pstack[stack_len-1].new_line;
|
||||
//last_new_line = pstack[stack_len-1].new_line;
|
||||
PopStack();
|
||||
PopStack();
|
||||
}
|
||||
|
@ -1711,27 +1671,17 @@ void HTMLFilter::ReadLoop()
|
|||
CheckExceptions();
|
||||
}
|
||||
else
|
||||
if( LastItem().type == Item::special || LastItem().type == Item::simple )
|
||||
if( LastItem().type == Item::special )
|
||||
{
|
||||
if( stack_len > 1 )
|
||||
{
|
||||
//pstack[stack_len-2].new_line = LastItem().new_line;
|
||||
}
|
||||
else
|
||||
if( white_mode == WHITE_MODE_TREE )
|
||||
{
|
||||
// one new line after a simple or special tag
|
||||
// (if the tag has level 0 in the tree - it not means that this is a first tag)
|
||||
// for example can be DOCTYPE
|
||||
|
||||
if( !LastItem().is_commentary )
|
||||
Put(10);
|
||||
}
|
||||
|
||||
if( !LastItem().is_commentary )
|
||||
PopStack();
|
||||
}
|
||||
else
|
||||
if( LastItem().type == Item::simple )
|
||||
{
|
||||
PopStack();
|
||||
}
|
||||
else
|
||||
if( LastItem().type == Item::closing )
|
||||
{
|
||||
CheckClosingTags();
|
||||
|
@ -1741,7 +1691,8 @@ void HTMLFilter::ReadLoop()
|
|||
PopStack();
|
||||
}
|
||||
|
||||
ReadNormalText();
|
||||
ReadText();
|
||||
is_first_item = false;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1750,12 +1701,13 @@ void HTMLFilter::ReadLoop()
|
|||
void HTMLFilter::Read()
|
||||
{
|
||||
read_char(); // put first character to lastc
|
||||
is_first_item = true;
|
||||
|
||||
if( white_mode != WHITE_MODE_ORIGIN )
|
||||
SkipWhiteLines();
|
||||
|
||||
// it can be some text or white lines before the first html tag (we print it)
|
||||
ReadNormalText();
|
||||
ReadText();
|
||||
|
||||
// reading the whole html source
|
||||
ReadLoop();
|
||||
|
|
|
@ -204,6 +204,8 @@ protected:
|
|||
|
||||
bool is_commentary;
|
||||
|
||||
bool new_line_before;
|
||||
|
||||
// is there a new line after this tag
|
||||
bool new_line;
|
||||
|
||||
|
@ -244,15 +246,12 @@ protected:
|
|||
virtual bool IsStartingEntityMark(wchar_t c);
|
||||
virtual bool IsEndingEntityMark(wchar_t c);
|
||||
|
||||
// virtual bool IsOpeningCommentaryTagMark(const wchar_t * str);
|
||||
// virtual size_t OpeningCommentaryTagMarkSize();
|
||||
virtual bool IsEndingCommentaryTagMarkAtEndOfString(const std::wstring & str);
|
||||
|
||||
virtual bool IsValidCharForName(int c);
|
||||
virtual bool IsValidCharForAttrName(int c);
|
||||
virtual bool IsValidCharForEntityName(int c);
|
||||
virtual void CheckExceptions();
|
||||
virtual bool SkipCommentaryTagIfExists();
|
||||
|
||||
virtual void Put(wchar_t c);
|
||||
virtual void Put(const wchar_t * str, const wchar_t * end);
|
||||
|
@ -306,11 +305,10 @@ protected:
|
|||
|
||||
void PopStack();
|
||||
bool PushStack();
|
||||
void CheckNewLine();
|
||||
void CheckStackPrintRest();
|
||||
void AddForgottenTags();
|
||||
void CheckClosingTags();
|
||||
void ReadNormalText();
|
||||
void ReadText();
|
||||
bool PrintRest();
|
||||
bool PrintOpeningItem();
|
||||
void ReadItemName(std::wstring & name, bool clear_name = true);
|
||||
|
@ -332,7 +330,7 @@ protected:
|
|||
void CheckChar(wchar_t c);
|
||||
|
||||
void PutNormalNonWhite(std::wstring & str, bool allow_put_new_line, bool allow_put_space);
|
||||
bool PutNormalWhite();
|
||||
void PutNormalWhite(bool & was_white_char, bool & was_new_line);
|
||||
void PutEverythingUntilClosingTag(bool put_closing_tag_as_well);
|
||||
void PutTabs(size_t len);
|
||||
void PutNonBreakingSpace();
|
||||
|
@ -343,8 +341,10 @@ protected:
|
|||
size_t stack_len; // length of the stack
|
||||
wchar_t * buffer; // buffer used when printing
|
||||
std::wstring * out_string;
|
||||
bool last_new_line;
|
||||
//bool last_new_line;
|
||||
bool new_item_has_new_line_before;
|
||||
int white_mode;
|
||||
bool is_first_item;
|
||||
size_t wrap_line; // insert a new line character into long lines
|
||||
size_t tab_size;
|
||||
bool was_ending_commentary;
|
||||
|
|
Loading…
Reference in New Issue