From 4f8ae6ce291d7bc535c39ef102e5bf85b351080e Mon Sep 17 00:00:00 2001
From: Tomasz Sowa
Date: Tue, 20 Jul 2021 20:48:01 +0200
Subject: [PATCH] some work in HTMLFilter - instead of directly using pchar
pointer now we use pointers/streams from BaseParser - removed support for
putting a white char in long words: removed BreakWord(size_t break_after_)
method - changed the way how white characters are treated: added
white_chars_mode(int mode) method mode 0: WHITE_MODE_ORIGIN mode 1:
WHITE_MODE_SINGLE_LINE mode 2: WHITE_MODE_TREE
---
src/html/bbcodeparser.cpp | 59 +--
src/html/htmlfilter.cpp | 755 +++++++++++++++++++++-----------------
src/html/htmlfilter.h | 75 ++--
3 files changed, 478 insertions(+), 411 deletions(-)
diff --git a/src/html/bbcodeparser.cpp b/src/html/bbcodeparser.cpp
index 0a60273..ec39de6 100644
--- a/src/html/bbcodeparser.cpp
+++ b/src/html/bbcodeparser.cpp
@@ -121,7 +121,7 @@ void BBCODEParser::PutNormalText(const wchar_t * str, const wchar_t * end)
{
int br_len;
- if( *pchar == 0 )
+ if( lastc != -1 )
{
// trimming last white characters at end of the user text
while( strhtml_tag);
- const wchar_t * start = pchar;
-
- while( *pchar && *pchar != ']' )
- ++pchar;
-
- PutHtmlArgument(tag, start, pchar);
-
- if( *pchar == ']' )
- ++pchar;
+// FIXME
+// const wchar_t * start = pchar;
+//
+// while( *pchar && *pchar != ']' )
+// ++pchar;
+//
+// PutHtmlArgument(tag, start, pchar);
+//
+// if( *pchar == ']' )
+// ++pchar;
if( !tag->inline_tag )
{
diff --git a/src/html/htmlfilter.cpp b/src/html/htmlfilter.cpp
index d103b9e..5274950 100644
--- a/src/html/htmlfilter.cpp
+++ b/src/html/htmlfilter.cpp
@@ -48,10 +48,13 @@ namespace pt
void HTMLFilter::Item::Clear()
{
name.clear();
- type = none;
- porphans = 0;
- new_line = false;
- has_body_tag = false;
+ type = none;
+ is_commentary = false;
+ porphans = nullptr;
+ new_line = false;
+ new_line_in_the_middle = false;
+ has_body_tag = false;
+ tree_index = 0;
}
@@ -64,10 +67,15 @@ HTMLFilter::Item::Item()
void HTMLFilter::Filter(const wchar_t * in, std::wstring & out)
{
- pchar = in;
+ reading_from_file = false;
+ reading_from_wchar_string = true;
+ pchar_unicode = in;
+ pchar_ascii = 0;
+
stack_len = 0;
out_string = &out;
last_new_line = false;
+ was_ending_commentary = false;
line_len = 0;
out_string->clear();
@@ -108,9 +116,9 @@ void HTMLFilter::Filter(const std::wstring & in, std::wstring & out)
void HTMLFilter::SetSomeDefaults()
{
+ white_mode = WHITE_MODE_ORIGIN;
+
tab_size = 2;
- trim_white = false;
- break_after = 0;
wrap_line = 0;
orphan_mode = orphan_nbsp;
safe_mode = false;
@@ -160,16 +168,15 @@ HTMLFilter::~HTMLFilter()
-
-void HTMLFilter::BreakWord(size_t break_after_)
+void HTMLFilter::white_chars_mode(int mode)
{
- break_after = break_after_;
-
- if( break_after > 10000 )
- break_after = 10000;
+ if( mode >= WHITE_MODE_ORIGIN && mode <= WHITE_MODE_TREE )
+ white_mode = mode;
}
+
+
void HTMLFilter::WrapLine(size_t wrap_line_)
{
wrap_line = wrap_line_;
@@ -180,12 +187,6 @@ void HTMLFilter::WrapLine(size_t wrap_line_)
-void HTMLFilter::TrimWhite(bool trim)
-{
- trim_white = trim;
-}
-
-
void HTMLFilter::InsertTabs(size_t tabsize)
{
tab_size = tabsize;
@@ -322,9 +323,10 @@ bool HTMLFilter::PushStack()
if( stack_len > 0 )
{
- // 'porphans' and 'has_body_tag' attributes are propagated
+ // 'porphans', 'has_body_tag' and 'tree_index' attributes are propagated
pstack[stack_len].porphans = pstack[stack_len-1].porphans;
pstack[stack_len].has_body_tag = pstack[stack_len-1].has_body_tag;
+ pstack[stack_len].tree_index = pstack[stack_len-1].tree_index;
}
stack_len += 1;
@@ -356,15 +358,15 @@ return false;
void HTMLFilter::SkipWhite()
{
- while( IsWhite(*pchar) )
- ++pchar;
+ while( IsWhite(lastc) )
+ read_char();
}
void HTMLFilter::SkipWhiteLines()
{
- while( *pchar==10 || IsWhite(*pchar) )
- ++pchar;
+ while( lastc==10 || IsWhite(lastc) )
+ read_char();
}
@@ -372,29 +374,22 @@ void HTMLFilter::SkipWhiteWithFirstNewLine()
{
SkipWhite();
- if( *pchar == 10 )
+ if( lastc == 10 )
{
- pchar += 1;
+ read_char();
SkipWhite();
}
}
-void HTMLFilter::SkipWhiteLines(const wchar_t * & str, const wchar_t * end)
-{
- while( str < end && (*str==10 || IsWhite(*str)) )
- ++str;
-}
-
-
void HTMLFilter::CheckNewLine()
{
-const wchar_t * start = pchar;
+ if( white_mode == WHITE_MODE_TREE )
+ {
+ SkipWhite();
+ }
- SkipWhite();
- last_new_line = (*pchar==10);
-
- pchar = start;
+ last_new_line = (lastc==10);
}
@@ -402,22 +397,23 @@ const wchar_t * start = pchar;
bool HTMLFilter::IsClosingTagForLastItem()
{
- pchar += 1;
+ read_char();
SkipWhite();
- if( *pchar == '/' )
+ if( lastc == '/' )
{
- pchar += 1;
+ read_char();
SkipWhite();
- if( IsNameEqual(pchar, LastItem().name, LastItem().name.size()) )
+ ReadItemName(tmp_name);
+
+ if( IsNameEqual(tmp_name, LastItem().name) )
{
- pchar += LastItem().name.size();
SkipWhite();
- if( IsClosingTagMark(*pchar) )
+ if( IsClosingTagMark(lastc) )
{
- pchar += 1;
+ read_char();
return true;
}
}
@@ -432,17 +428,16 @@ return false;
// used for such tags as: script, pre, textarea
void HTMLFilter::PutEverythingUntilClosingTag(bool put_closing_tag_as_well)
{
-const wchar_t * start = pchar;
-const wchar_t * end = pchar;
+ bool was_closing_tag = false;
+ tmp_text.clear();
- while( *pchar != 0 )
+ while( lastc != -1 )
{
- if( IsOpeningTagMark(*pchar) )
+ if( IsOpeningTagMark(lastc) )
{
if( IsClosingTagForLastItem() )
{
- if( put_closing_tag_as_well )
- end = pchar;
+ was_closing_tag = true;
PopStack();
CheckNewLine();
@@ -451,29 +446,37 @@ const wchar_t * end = pchar;
}
else
{
- pchar += 1;
- end = pchar;
+ tmp_text += lastc;
+ read_char();
}
}
- Put(start, end);
+ Put(tmp_text);
+
+ if( was_closing_tag && put_closing_tag_as_well )
+ {
+ Put('<');
+ Put('/');
+ Put(tmp_name);
+ Put('>');
+ }
}
-void HTMLFilter::SkipAndCheckClosingTag()
+void HTMLFilter::SkipAndCheckClosingTag(std::wstring * remember_text)
{
bool is_quoted = false;
wchar_t quote_char = 0;
- for( ; *pchar ; ++pchar )
+ while( lastc != -1 )
{
- if( *pchar == '"' || *pchar == '\'' )
+ if( lastc == '"' || lastc == '\'' )
{
if( is_quoted )
{
- if( *pchar == quote_char )
+ if( lastc == quote_char )
{
is_quoted = false;
}
@@ -481,20 +484,25 @@ void HTMLFilter::SkipAndCheckClosingTag()
else
{
is_quoted = true;
- quote_char = *pchar;
+ quote_char = lastc;
}
}
else
- if( !is_quoted && LastItem().type == Item::opening && IsClosingXmlSimpleTagMark(*pchar) ) // closing xml tag: default '/'
+ if( !is_quoted && LastItem().type == Item::opening && IsClosingXmlSimpleTagMark(lastc) ) // closing xml tag: default '/'
{
LastItem().type = Item::simple;
}
else
- if( !is_quoted && IsClosingTagMark(*pchar) )
+ if( !is_quoted && IsClosingTagMark(lastc) )
{
- ++pchar;
+ read_char();
break;
}
+
+ if( remember_text )
+ (*remember_text) += lastc;
+
+ read_char();
}
}
@@ -505,7 +513,7 @@ bool HTMLFilter::IsValidCharForName(int c)
if( (c>='a' && c<='z') ||
(c>='A' && c<='Z') ||
(c>='0' && c<='9') ||
- c=='-' || c=='!' || c==':') // : for namespace character
+ c=='-' || c=='!' || c==':' || c=='-') // : is for a namespace character, - is for a commentary
return true;
return false;
@@ -536,16 +544,28 @@ return false;
}
-void HTMLFilter::ReadItemName()
+void HTMLFilter::ReadItemName(std::wstring & name, bool clear_name)
{
size_t i;
- for( i=0 ; IsValidCharForName(*pchar) ; ++i )
+ if( clear_name )
+ name.clear();
+
+ for(i=0 ; IsValidCharForName(lastc) ; ++i)
{
if( i < WINIX_HTMLFILTER_ITEM_NAME_MAXLEN )
- LastItem().name += *pchar;
+ {
+ name += lastc;
- ++pchar;
+ if( LastItem().type == Item::special && name == L"!--" )
+ {
+ LastItem().is_commentary = true;
+ read_char();
+ break;
+ }
+ }
+
+ read_char();
}
}
@@ -557,71 +577,69 @@ size_t i;
attr_name.clear();
- for( i=0 ; *pchar && IsValidCharForAttrName(*pchar) ; ++i )
+ for( i=0 ; lastc != -1 && IsValidCharForAttrName(lastc) ; ++i )
{
if( i < WINIX_HTMLFILTER_ATTR_NAME_MAXLEN )
- attr_name += *pchar;
+ attr_name += lastc;
- ++pchar;
+ read_char();
}
}
-void HTMLFilter::ReadItemAttrValueAdd(const wchar_t * value_start, const wchar_t * value_end)
+void HTMLFilter::ReadItemAttrValueAdd(const std::wstring & str)
{
- attr_value.push_back(std::wstring());
-
if( analyze_entities )
{
- AnalyzeEntitiesAndPut(value_start, value_end, &attr_value.back());
+ attr_value.push_back(std::wstring());
+ AnalyzeEntitiesAndPut(str.c_str(), str.c_str() + str.size(), &attr_value.back());
}
else
{
- attr_value.back().append(value_start, value_end);
+ attr_value.push_back(str);
}
}
void HTMLFilter::ReadItemAttrValue(bool has_quote, wchar_t quote_char)
{
-size_t i;
-
attr_value.clear();
- const wchar_t * value_start = pchar;
- size_t value_len = 0; // how many non white characters
+ tmp_text.clear();
- for(i=0 ; *pchar ; ++i, ++pchar )
+ while( lastc != -1 )
{
if( has_quote )
{
- if( *pchar == quote_char )
+ if( lastc == quote_char )
break;
}
else
{
- if( IsClosingTagMark(*pchar) || *pchar == 10 || IsWhite(*pchar) )
+ if( IsClosingTagMark(lastc) || lastc == 10 || IsWhite(lastc) )
break;
}
- if( *pchar==10 || IsWhite(*pchar) )
+ if( lastc==10 || IsWhite(lastc) )
{
- if( value_len > 0 && value_len <= WINIX_HTMLFILTER_ATTR_VALUE_MAXLEN )
- ReadItemAttrValueAdd(value_start, pchar);
+ if( tmp_text.size() > 0 && tmp_text.size() <= WINIX_HTMLFILTER_ATTR_VALUE_MAXLEN )
+ ReadItemAttrValueAdd(tmp_text);
- value_len = 0;
+ tmp_text.clear();
}
else
{
- if( value_len == 0 )
- value_start = pchar;
+ if( tmp_text.size() > WINIX_HTMLFILTER_ATTR_VALUE_MAXLEN )
+ tmp_text.clear();
- value_len += 1;
+ tmp_text += lastc;
}
+
+ read_char();
}
- if( value_len > 0 && value_len <= WINIX_HTMLFILTER_ATTR_VALUE_MAXLEN )
- ReadItemAttrValueAdd(value_start, pchar);
+ if( tmp_text.size() > 0 && tmp_text.size() <= WINIX_HTMLFILTER_ATTR_VALUE_MAXLEN )
+ ReadItemAttrValueAdd(tmp_text);
}
@@ -641,15 +659,6 @@ void HTMLFilter::Put(wchar_t c)
}
-void HTMLFilter::Put(const wchar_t * str)
-{
- out_string->append(str);
-
- for( ; *str ; ++str)
- CheckChar(*str);
-}
-
-
void HTMLFilter::Put(const wchar_t * str, const wchar_t * end)
{
if( str >= end )
@@ -663,12 +672,16 @@ void HTMLFilter::Put(const wchar_t * str, const wchar_t * end)
}
+
void HTMLFilter::Put(const std::wstring & str)
{
- out_string->append(str);
+ if( !str.empty() )
+ {
+ out_string->append(str);
- for(size_t i=0 ; itab);
}
-// if there is a semicolon nearby then we break the line after it
-// (useful in html entities)
-// !! dodac sprawdzanie czy dlugosc stringu nie jest mala tez (end-str)
-// i wtedy tez nie dodajemy zadnego znaku
-bool HTMLFilter::HasEntityEndAround(const wchar_t * str, const wchar_t * end)
+void HTMLFilter::PutNormalNonWhite(std::wstring & str, bool allow_put_new_line, bool allow_put_space)
{
-size_t i, epsilon = 8;// !! IMPROVE ME put as a constant
-
- for(i=0 ; str < end && i wrap_line )
+ while( lastc != -1 && lastc != 10 && !IsWhite(lastc) && !IsOpeningTagMark(lastc) )
{
- Put(10);
- PutTabs(stack_len);
- }
-}
+ str += lastc;
+ read_char();
-
-void HTMLFilter::PutNormalNonWhite(const wchar_t * & str, const wchar_t * end)
-{
-const wchar_t * word = str;
-size_t non_whites = 0;
-bool was_entity_end = false;
-
- for( ; str < end && *str!=10 && !IsWhite(*str) ; ++str, ++non_whites )
- {
- if( break_after != 0 && non_whites >= break_after && (was_entity_end || !HasEntityEndAround(str, end)) )
+ if( IsEndingCommentaryTagMarkAtEndOfString(str) )
{
- Put(word, str);
- word = str;
- non_whites = 0;
- Put(' ');
- CheckLineWrap();
+ str.erase(str.size() - 3); // IMPROVEME define a function or what
+ was_ending_commentary = true;
+ break;
}
+ }
- was_entity_end = (IsEndingEntityMark(*str));
+ if( !str.empty() )
+ {
+ if( allow_put_new_line )
+ {
+ Put(10);
+ PutTabs(LastItem().tree_index + 1);
+ }
+ else
+ if( allow_put_space )
+ {
+ Put(' ');
+ }
}
if( analyze_entities )
- AnalyzeEntitiesAndPut(word, str, nullptr);
+ AnalyzeEntitiesAndPut(str.c_str(), str.c_str() + str.size(), nullptr);
else
- Put(word, str);
+ Put(str);
}
-void HTMLFilter::PutNormalWhite(const wchar_t * & str, const wchar_t * end)
+bool HTMLFilter::PutNormalWhite()
{
- if( str < end )
+ bool was_white_char = false;
+ bool was_new_line = false;
+
+ while( lastc == 10 || IsWhite(lastc) )
{
- if( trim_white )
- {
- Put(' ');
- SkipWhiteLines(str, end);
- }
- else
- {
- while( str < end && (*str==10 || IsWhite(*str)) )
- {
- Put(*str);
+ was_white_char = true; // anyone white char even new line
- if( *str == 10 )
- PutTabs(stack_len);
+ if( lastc == 10 )
+ was_new_line = true;
- ++str;
- }
+ if( white_mode == WHITE_MODE_ORIGIN )
+ {
+ Put(lastc);
}
+
+ read_char();
}
-}
-
-void HTMLFilter::PutNormalText(const wchar_t * str, const wchar_t * end)
-{
-const wchar_t * word, * white;
-
- if( str < end )
- CheckLineWrap();
-
- while( str < end )
+ if( white_mode == WHITE_MODE_SINGLE_LINE && was_white_char )
{
- word = str;
- PutNormalNonWhite(str, end);
-
- if( CheckOrphan(word, str) )
- {
- white = str;
- SkipWhiteLines(str, end);
-
- if( white < str )
- PutNonBreakingSpace();
- }
- else
- {
- PutNormalWhite(str, end);
-
- if( str < end ) // !! lub moze podobnie jak jest na gorze tutaj? juz nie mam sily myslec :(
- CheckLineWrap();
- }
-
- // for safety (if str was not incremented then there is an infinite loop)
- if( word == str )
- break;
+ Put(' ');
}
-}
+ if( white_mode == WHITE_MODE_TREE && was_new_line )
+ {
+ // in WHITE_MODE_TREE white characters are written at the beginning of a or text
+ }
+
+ last_new_line = was_new_line;
+ return was_white_char;
+}
@@ -985,6 +955,12 @@ bool HTMLFilter::PutOpeningTag()
return false;
}
+ if( white_mode == WHITE_MODE_TREE && last_new_line )
+ {
+ Put(10);
+ PutTabs(LastItem().tree_index);
+ }
+
PutOpeningTagMark();
Put(LastItem().name);
@@ -993,14 +969,18 @@ return true;
-void HTMLFilter::PutClosingTag(const wchar_t * tag)
+void HTMLFilter::PutClosingTag(const Item & item)
{
- if( skip_tags || !IsTagSafe(tag) )
+ if( skip_tags || !IsTagSafe(item.name) )
return;
- PutOpeningTagMark();
- Put('/');
- Put(tag);
+ if( !item.is_commentary )
+ {
+ PutOpeningTagMark();
+ Put('/');
+ }
+
+ Put(item.name);
PutClosingTagMark();
}
@@ -1011,7 +991,7 @@ void HTMLFilter::PutTabs(size_t len)
if( len > 30 )
len = 30;
- for(size_t i=0 ; i < (len*tab_size) ; ++i)
+ for(int i=0 ; i < (len*tab_size) ; ++i)
(*out_string) += ' '; // we do not add them to 'line_len'
}
@@ -1031,12 +1011,12 @@ void HTMLFilter::PutNonBreakingSpace()
-void HTMLFilter::PutNewLine()
-{
- buffer[0] = 10;
- Put(buffer, buffer+1);
- line_len = 0;
-}
+//void HTMLFilter::PutNewLine()
+//{
+// buffer[0] = 10; // CHECKME for what purpose is this buffer?
+// Put(10);
+// line_len = 0;
+//}
// we assume the size of the opening mark to be one
@@ -1053,6 +1033,28 @@ bool HTMLFilter::IsClosingTagMark(wchar_t c)
}
+// the slash in the closing tag mark e.g.
+bool HTMLFilter::IsClosingTagIndicator(wchar_t c)
+{
+ return (c == '/');
+}
+
+
+// the slash in the closing tag mark e.g.
+bool HTMLFilter::IsSpecialTagIndicator(wchar_t c)
+{
+ return (c == '!');
+}
+
+
+// the '=' operator e.g. class="value"
+bool HTMLFilter::IsAttributeAssignmentMark(wchar_t c)
+{
+ return (c == '=');
+}
+
+
+
// the slash at the end (without '>' character)
// we assume the size of the mark to be one
bool HTMLFilter::IsClosingXmlSimpleTagMark(wchar_t c)
@@ -1061,18 +1063,33 @@ bool HTMLFilter::IsClosingXmlSimpleTagMark(wchar_t c)
}
-bool HTMLFilter::IsOpeningCommentaryTagMark(const wchar_t * str)
+//bool HTMLFilter::IsOpeningCommentaryTagMark(const wchar_t * str)
+//{
+//static wchar_t comm_open[] = L"";
+ size_t comm_end_len = sizeof(comm_end) / sizeof(wchar_t) - 1;
- return IsNameEqual(pchar, comm_open, comm_open_len);
-}
+ if( str.size() >= comm_end_len )
+ {
+ return IsNameEqual(str.c_str() + str.size() - comm_end_len, comm_end);
+ }
-
-size_t HTMLFilter::OpeningCommentaryTagMarkSize()
-{
- return 4; // size of "";
+wchar_t comm_close[] = L"-->";
size_t comm_close_len = sizeof(comm_close) / sizeof(wchar_t) - 1;
-
+/*
if( !IsOpeningCommentaryTagMark(pchar) )
return false;
@@ -1108,86 +1125,81 @@ size_t comm_close_len = sizeof(comm_close) / sizeof(wchar_t) - 1;
pchar += comm_close_len;
CheckNewLine();
+*/
+
return true;
}
-void HTMLFilter::ReadNormalTextSkipWhite(const wchar_t * & start, const wchar_t * & last_non_white)
-{
- if( trim_white )
- {
- // skipping all white chars (with new lines)
- // but with remembering the last non white character
- for( ; *pchar==10 || IsWhite(*pchar) ; ++pchar)
- if( *pchar == 10 )
- last_non_white = pchar;
- }
- else
- {
- // skipping first white chars with only one line between them
- SkipWhite();
- last_non_white = pchar;
-
- if( *pchar == 10 )
- {
- ++pchar;
- SkipWhite();
- }
- }
-
- start = pchar;
-
- // exception for the commentary tag
- if( IsOpeningCommentaryTagMark(pchar) || !IsOpeningTagMark(*pchar) )
- {
- PutNewLine();
- PutTabs(stack_len);
- }
-}
-
-
-
// reading text between html tags
void HTMLFilter::ReadNormalText()
{
-const wchar_t * start = pchar;
-const wchar_t * last_non_white = pchar;
+ bool was_non_white_text = false;
- if( last_new_line )
- ReadNormalTextSkipWhite(start, last_non_white);
+ was_ending_commentary = false;
+ bool allow_put_new_line = false;
+ bool allow_put_space = false;
- while( *pchar != 0 )
+ if( white_mode == WHITE_MODE_TREE )
{
- const wchar_t * commentary_start = pchar;
-
- if( SkipCommentaryTagIfExists() )
+ if( LastItem().new_line || (wrap_line != 0 && LastItem().has_body_tag && line_len >= wrap_line) )
{
- last_non_white = pchar - 1; // pointing at the last '>' from a commentary
- PutNormalText(start, commentary_start);
-
- if( !skip_commentaries )
- {
- PutNormalText(commentary_start, pchar);
- }
-
- start = pchar;
- }
- else
- {
- if( IsOpeningTagMark(*pchar) )
- break;
-
- if( !IsWhite(*pchar) )
- last_non_white = pchar;
-
- pchar += 1;
+ allow_put_new_line = true;
}
}
- last_new_line = (*last_non_white == 10);
- PutNormalText(start, pchar);
+ while( lastc != -1 && !IsOpeningTagMark(lastc) )
+ {
+ tmp_text.clear();
+ PutNormalNonWhite(tmp_text, allow_put_new_line, allow_put_space);
+
+ if( !tmp_text.empty() )
+ {
+ allow_put_new_line = false;
+ allow_put_space = false;
+ was_non_white_text = true;
+ }
+
+ if( CheckOrphan(tmp_text.c_str(), tmp_text.c_str() + tmp_text.size()) )
+ {
+ if( lastc == 10 || IsWhite(lastc) )
+ {
+ SkipWhiteLines();
+ PutNonBreakingSpace();
+ }
+ }
+ else
+ {
+ if( was_ending_commentary )
+ break;
+
+ if( PutNormalWhite() && white_mode == WHITE_MODE_TREE )
+ {
+ if( last_new_line )
+ {
+ allow_put_new_line = true;
+ allow_put_space = false;
+
+ LastItem().new_line_in_the_middle = true;
+
+ if( !was_non_white_text )
+ LastItem().new_line = true;
+ }
+ else
+ {
+ allow_put_new_line = false;
+ allow_put_space = true;
+ }
+
+ if( wrap_line != 0 && LastItem().has_body_tag && line_len >= wrap_line )
+ {
+ allow_put_new_line = true;
+ }
+ }
+ }
+ }
}
@@ -1197,15 +1209,7 @@ bool HTMLFilter::PrintOpeningItem()
if( skip_tags || IsNameEqual(no_filter_tag, LastItem().name) )
return true;
- if( last_new_line )
- {
- PutNewLine();
-
- if( stack_len > 1 )
- PutTabs(stack_len-1);
- }
-
-return PutOpeningTag();
+ return PutOpeningTag();
}
@@ -1226,34 +1230,34 @@ bool HTMLFilter::ReadItemAttr()
SkipWhiteLines();
- if( *pchar != '=' )
+ if( !IsAttributeAssignmentMark(lastc) ) // '='
return true;
attr_has_value = true;
- pchar += 1; // skipping '='
+ read_char(); // skipping '='
SkipWhiteLines();
- bool has_quote = (*pchar == '\"' || *pchar == '\'');
- wchar_t quote_char = *pchar;
+ bool has_quote = (lastc == '\"' || lastc == '\'');
+ wchar_t quote_char = lastc;
if( has_quote )
- pchar += 1; // skipping the first quote mark
+ read_char(); // skipping the first quote mark
ReadItemAttrValue(has_quote, quote_char);
- if( has_quote && *pchar == quote_char )
- pchar += 1; // skipping the last quote mark
+ if( has_quote && lastc == quote_char )
+ read_char(); // skipping the last quote mark
return true;
}
-bool HTMLFilter::CheckItemAttr()
+void HTMLFilter::CheckItemLangAttr()
{
if( attr_has_value && IsNameEqual(L"lang", attr_name) )
{
- LastItem().porphans = 0;
+ LastItem().porphans = nullptr;
if( !attr_value.empty() )
{
@@ -1267,8 +1271,6 @@ bool HTMLFilter::CheckItemAttr()
LastItem().porphans = &i->second;
}
}
-
-return true;
}
@@ -1301,9 +1303,9 @@ size_t i;
void HTMLFilter::ReadItemClosing()
{
- pchar += 1; // skipping '/'
+ read_char(); // skipping '/'
SkipWhiteLines();
- ReadItemName();
+ ReadItemName(LastItem().name);
LastItem().type = Item::closing;
SkipAndCheckClosingTag();
@@ -1316,32 +1318,55 @@ void HTMLFilter::ReadItemSpecial()
LastItem().type = Item::special;
if( !skip_tags )
+ {
+ if( white_mode == WHITE_MODE_TREE && last_new_line )
+ {
+ Put(10);
+ PutTabs(LastItem().tree_index);
+ }
+
PutOpeningTagMark();
+ }
- const wchar_t * start = pchar;
- pchar += 1; // skipping '!'
+ read_char(); // skipping '!'
+ LastItem().name = '!';
+ ReadItemName(LastItem().name, false);
- ReadItemName();
- SkipAndCheckClosingTag();
-
- if( !skip_tags && pchar > start )
- Put(start, pchar);
-
- // closing tag mark is printed directly from the source
+ if( skip_tags )
+ {
+ SkipAndCheckClosingTag();
+ }
+ else
+ {
+ if( LastItem().is_commentary )
+ {
+ Put(LastItem().name);
+ }
+ else
+ {
+ tmp_text.clear();
+ SkipWhiteLines();
+ SkipAndCheckClosingTag(&tmp_text);
+ Put(LastItem().name);
+ Put(' ');
+ Put(tmp_text);
+ Put('>');
+ }
+ }
}
void HTMLFilter::ReadItemOpening()
{
LastItem().type = Item::opening;
- ReadItemName();
+ ReadItemName(LastItem().name);
if( PrintOpeningItem() )
{
while( ReadItemAttr() )
{
- if( CheckItemAttr() )
- PrintItemAttr();
+ CheckItemLangAttr();
+ PrintItemAttr();
}
SkipAndCheckClosingTag(); // here LastItem().type can be changed to 'simple'
@@ -1368,25 +1393,35 @@ void HTMLFilter::EntityFound(const wchar_t * str, const wchar_t * end)
bool HTMLFilter::ReadItem()
{
- if( *pchar == 0 )
+ if( lastc == -1 )
return false;
if( !PushStack() )
return false;
- pchar += 1; // skipping the first '<'
- SkipWhiteLines();
+ if( stack_len > 1 && pstack[stack_len-2].new_line_in_the_middle )
+ LastItem().tree_index += 1;
- if( *pchar == '!' )
- ReadItemSpecial();
+ if( was_ending_commentary )
+ {
+ LastItem().type = Item::closing;
+ LastItem().is_commentary = true;
+ LastItem().name = L"--";
+ was_ending_commentary = false;
+ }
else
- if( *pchar == '/' ) // we have a closing tag (dodac jako metode wirtualna) !!
- ReadItemClosing();
- else
- ReadItemOpening();
+ {
+ read_char(); // skipping the first opening tag mark '<'
+ SkipWhiteLines();
- CheckNewLine();
- LastItem().new_line = last_new_line;
+ if( IsSpecialTagIndicator(lastc) )
+ ReadItemSpecial();
+ else
+ if( IsClosingTagIndicator(lastc) )
+ ReadItemClosing();
+ else
+ ReadItemOpening();
+ }
ItemFound();
@@ -1556,11 +1591,14 @@ int i;
{
if( !skip_tags && pstack[z].new_line )
{
- PutNewLine();
- PutTabs(z);
+ if( white_mode == WHITE_MODE_TREE )
+ {
+ Put(10);
+ PutTabs(pstack[z].tree_index);
+ }
}
- PutClosingTag(pstack[z].name.c_str());
+ PutClosingTag(pstack[z]);
pstack[z].Clear();
}
@@ -1576,10 +1614,19 @@ void HTMLFilter::CheckStackPrintRest()
while( stack_len-- > 0 )
{
if( stack_len==0 || pstack[stack_len-1].new_line )
- PutNewLine();
+ {
+ if( white_mode == WHITE_MODE_TREE )
+ {
+ Put(10);
+ PutTabs(pstack[stack_len-1].tree_index);
+ }
+ else
+ {
+ Put(' ');
+ }
+ }
- PutTabs(stack_len);
- PutClosingTag(pstack[stack_len].name.c_str());
+ PutClosingTag(pstack[stack_len]);
}
}
@@ -1601,16 +1648,19 @@ void HTMLFilter::CheckClosingTags()
}
// there are more than one tag
- if( IsNameEqual(pstack[stack_len-1].name, pstack[stack_len-2].name) )
+ if( (pstack[stack_len-1].is_commentary && pstack[stack_len-2].is_commentary) || IsNameEqual(pstack[stack_len-1].name, pstack[stack_len-2].name) )
{
// last closing tag is from the previous one
if( !skip_tags && pstack[stack_len-2].new_line )
{
- PutNewLine();
- PutTabs(stack_len-2);
+ if( white_mode == WHITE_MODE_TREE )
+ {
+ Put(10);
+ PutTabs(pstack[stack_len-2].tree_index);
+ }
}
- PutClosingTag(pstack[stack_len-1].name.c_str());
+ PutClosingTag(pstack[stack_len-1]);
last_new_line = pstack[stack_len-1].new_line;
PopStack();
PopStack();
@@ -1624,22 +1674,30 @@ void HTMLFilter::CheckClosingTags()
bool HTMLFilter::PrintRest()
{
-const wchar_t * start = pchar;
+//const wchar_t * start = pchar;
// in safe mode we do not print the rest html code
if( safe_mode || skip_tags )
return false;
- while( *pchar )
- ++pchar;
+ bool was_chars = false;
- if( pchar > start )
+ while( lastc != -1 )
{
- Put(start, pchar);
- return true;
+ Put(lastc);
+ read_char();
+ was_chars = true;
}
-return false;
+ return was_chars;
+
+// if( pchar > start )
+// {
+// Put(start, pchar);
+// return true;
+// }
+
+//return false;
}
@@ -1660,15 +1718,18 @@ void HTMLFilter::ReadLoop()
//pstack[stack_len-2].new_line = LastItem().new_line;
}
else
- if( trim_white )
+ if( white_mode == WHITE_MODE_TREE )
{
// one new line after a simple or special tag
// (if the tag has level 0 in the tree - it not means that this is a first tag)
- // for example can be DOCTYPE
- PutNewLine();
+ // for example can be DOCTYPE
+
+ if( !LastItem().is_commentary )
+ Put(10);
}
- PopStack();
+ if( !LastItem().is_commentary )
+ PopStack();
}
else
if( LastItem().type == Item::closing )
@@ -1688,7 +1749,9 @@ void HTMLFilter::ReadLoop()
void HTMLFilter::Read()
{
- if( trim_white )
+ read_char(); // put first character to lastc
+
+ if( white_mode != WHITE_MODE_ORIGIN )
SkipWhiteLines();
// it can be some text or white lines before the first html tag (we print it)
diff --git a/src/html/htmlfilter.h b/src/html/htmlfilter.h
index 35710d3..6407e0e 100644
--- a/src/html/htmlfilter.h
+++ b/src/html/htmlfilter.h
@@ -42,7 +42,7 @@
#include