some work in HTMLFilter

- instead of directly using pchar pointer now we use pointers/streams from BaseParser
- removed support for putting a white char in long words: removed BreakWord(size_t break_after_) method
- changed the way how white characters are treated: added white_chars_mode(int mode) method
  mode 0: WHITE_MODE_ORIGIN
  mode 1: WHITE_MODE_SINGLE_LINE
  mode 2: WHITE_MODE_TREE
This commit is contained in:
Tomasz Sowa 2021-07-20 20:48:01 +02:00
parent 7ce07c57f5
commit 4f8ae6ce29
3 changed files with 478 additions and 411 deletions

View File

@ -121,7 +121,7 @@ void BBCODEParser::PutNormalText(const wchar_t * str, const wchar_t * end)
{ {
int br_len; int br_len;
if( *pchar == 0 ) if( lastc != -1 )
{ {
// trimming last white characters at end of the user text // trimming last white characters at end of the user text
while( str<end && (IsWhite(*(end-1)) || *(end-1)==10) ) while( str<end && (IsWhite(*(end-1)) || *(end-1)==10) )
@ -415,15 +415,17 @@ void BBCODEParser::PutOpeningTagFromEzc()
(*out_string) += '['; (*out_string) += '[';
(*out_string) += LastItem().name; (*out_string) += LastItem().name;
const wchar_t * start = pchar;
while( *pchar && *pchar!=']' ) // FIXME
++pchar; // const wchar_t * start = pchar;
//
if( *pchar == ']' ) // while( *pchar && *pchar!=']' )
++pchar; // ++pchar;
//
Put(start, pchar); // if( *pchar == ']' )
// ++pchar;
//
// Put(start, pchar);
} }
@ -453,13 +455,13 @@ void BBCODEParser::TrimWhiteWithNewLines(const wchar_t * & start, const wchar_t
void BBCODEParser::PutHtmlArgument2(const Tags * tag, bool has_u) void BBCODEParser::PutHtmlArgument2(const Tags * tag, bool has_u)
{ {
const wchar_t * start = pchar; //const wchar_t * start = pchar;
const wchar_t * end = pchar; //const wchar_t * end = pchar;
bool first_tag_removed = false; bool first_tag_removed = false;
while( *pchar != 0 ) while( lastc != -1 )
{ {
if( IsOpeningTagMark(*pchar) ) if( IsOpeningTagMark(lastc) )
{ {
if( IsClosingTagForLastItem() ) if( IsClosingTagForLastItem() )
{ {
@ -472,8 +474,8 @@ bool first_tag_removed = false;
} }
else else
{ {
pchar += 1; read_char();
end = pchar; //end = pchar;
} }
} }
@ -482,12 +484,14 @@ bool first_tag_removed = false;
if( has_u ) if( has_u )
{ {
TrimWhiteWithNewLines(start, end); // FIXME
PrintEncode(start, end); // TrimWhiteWithNewLines(start, end);
// PrintEncode(start, end);
} }
else else
{ {
PrintEscape(start, end); // FIXME
// PrintEscape(start, end);
} }
} }
@ -545,15 +549,16 @@ void BBCODEParser::PutOpeningTagFromBBCode(const Tags * tag)
PutOpeningTagMark(); PutOpeningTagMark();
Put(tag->html_tag); Put(tag->html_tag);
const wchar_t * start = pchar; // FIXME
// const wchar_t * start = pchar;
while( *pchar && *pchar != ']' ) //
++pchar; // while( *pchar && *pchar != ']' )
// ++pchar;
PutHtmlArgument(tag, start, pchar); //
// PutHtmlArgument(tag, start, pchar);
if( *pchar == ']' ) //
++pchar; // if( *pchar == ']' )
// ++pchar;
if( !tag->inline_tag ) if( !tag->inline_tag )
{ {

File diff suppressed because it is too large Load Diff

View File

@ -42,7 +42,7 @@
#include <map> #include <map>
#include <vector> #include <vector>
#include <algorithm> #include <algorithm>
#include "convert/baseparser.h"
namespace pt namespace pt
@ -90,7 +90,7 @@ namespace pt
the filter recognizes xml simple tags (with / at the end) such as: <br /> the filter recognizes xml simple tags (with / at the end) such as: <br />
*/ */
class HTMLFilter class HTMLFilter : public BaseParser
{ {
public: public:
@ -111,27 +111,22 @@ public:
void Filter(const std::wstring & in, std::wstring & out); void Filter(const std::wstring & in, std::wstring & out);
// insert a white space into long words const static int WHITE_MODE_ORIGIN = 0;
// (only between html tags) const static int WHITE_MODE_SINGLE_LINE = 1;
// skipped in such tags: script, pre, textarea const static int WHITE_MODE_TREE = 2;
// break_after - after how many characters insert a space (0 - off)
void BreakWord(size_t break_after_);
// insert a new line character into long lines
// (only between html tags) // white chars mode
//
void white_chars_mode(int mode);
// if the line is wrap_line_ length (or longer) then insert a new line character (in a place of a white char)
// (only between html tags and only in <body> subtree)
// skipped in such tags: script, pre, textarea // skipped in such tags: script, pre, textarea
// wrap_line - after how many characters wrap a line (0 - off) // 0 - off
// lines are wrapped only in 'body' tag (useful for text in 'title' tag which is in 'head' section) // lines are wrapped only in 'body' tag (useful for text in 'title' tag which is in 'head' section)
void WrapLine(size_t wrap_line_); void WrapLine(size_t wrap_line_);
// trimming white characters (with new lines)
// at the beginning, at the end and in the middle of a string
// only between html tags
// at the beginning and at the end only one space is left
// skipped in such tags: script, pre, textarea
// false by default
void TrimWhite(bool trim);
// first tabs in a tree // first tabs in a tree
// default: 2 (spaces) // default: 2 (spaces)
// set 0 to turn off // set 0 to turn off
@ -207,9 +202,14 @@ protected:
none none
} type; } type;
bool is_commentary;
// is there a new line after this tag // is there a new line after this tag
bool new_line; bool new_line;
// is there a new
bool new_line_in_the_middle;
// current orphans table // current orphans table
// (will be propagated) // (will be propagated)
Orphans * porphans; Orphans * porphans;
@ -218,6 +218,8 @@ protected:
// (will be propagated) // (will be propagated)
bool has_body_tag; bool has_body_tag;
size_t tree_index;
void Clear(); void Clear();
Item(); Item();
}; };
@ -235,12 +237,16 @@ protected:
virtual bool IsOpeningTagMark(wchar_t c); virtual bool IsOpeningTagMark(wchar_t c);
virtual bool IsClosingTagMark(wchar_t c); virtual bool IsClosingTagMark(wchar_t c);
virtual bool IsClosingTagIndicator(wchar_t c);
virtual bool IsSpecialTagIndicator(wchar_t c);
virtual bool IsAttributeAssignmentMark(wchar_t c);
virtual bool IsClosingXmlSimpleTagMark(wchar_t c); virtual bool IsClosingXmlSimpleTagMark(wchar_t c);
virtual bool IsStartingEntityMark(wchar_t c); virtual bool IsStartingEntityMark(wchar_t c);
virtual bool IsEndingEntityMark(wchar_t c); virtual bool IsEndingEntityMark(wchar_t c);
virtual bool IsOpeningCommentaryTagMark(const wchar_t * str); // virtual bool IsOpeningCommentaryTagMark(const wchar_t * str);
virtual size_t OpeningCommentaryTagMarkSize(); // virtual size_t OpeningCommentaryTagMarkSize();
virtual bool IsEndingCommentaryTagMarkAtEndOfString(const std::wstring & str);
virtual bool IsValidCharForName(int c); virtual bool IsValidCharForName(int c);
virtual bool IsValidCharForAttrName(int c); virtual bool IsValidCharForAttrName(int c);
@ -249,7 +255,6 @@ protected:
virtual bool SkipCommentaryTagIfExists(); virtual bool SkipCommentaryTagIfExists();
virtual void Put(wchar_t c); virtual void Put(wchar_t c);
virtual void Put(const wchar_t * str);
virtual void Put(const wchar_t * str, const wchar_t * end); virtual void Put(const wchar_t * str, const wchar_t * end);
virtual void Put(const std::wstring & str); virtual void Put(const std::wstring & str);
virtual void AnalyzeEntitiesAndPut(const wchar_t * str, const wchar_t * end, std::wstring * out); virtual void AnalyzeEntitiesAndPut(const wchar_t * str, const wchar_t * end, std::wstring * out);
@ -257,10 +262,7 @@ protected:
virtual void PutOpeningTagMark(); virtual void PutOpeningTagMark();
virtual void PutClosingTagMark(); virtual void PutClosingTagMark();
virtual bool PutOpeningTag(); virtual bool PutOpeningTag();
virtual void PutClosingTag(const wchar_t * tag); virtual void PutClosingTag(const Item & item);
virtual void PutNormalText(const wchar_t * str, const wchar_t * end);
virtual void ReadNormalTextSkipWhite(const wchar_t * & start, const wchar_t * & last_non_white);
virtual void ItemFound(); virtual void ItemFound();
virtual void EntityFound(const wchar_t * str, const wchar_t * end); virtual void EntityFound(const wchar_t * str, const wchar_t * end);
@ -299,9 +301,8 @@ protected:
void SkipWhite(); void SkipWhite();
void SkipWhiteLines(); void SkipWhiteLines();
void SkipWhiteWithFirstNewLine(); void SkipWhiteWithFirstNewLine();
void SkipWhiteLines(const wchar_t * & str, const wchar_t * end);
bool IsClosingTagForLastItem(); bool IsClosingTagForLastItem();
void SkipAndCheckClosingTag(); void SkipAndCheckClosingTag(std::wstring * remember_text = nullptr);
void PopStack(); void PopStack();
bool PushStack(); bool PushStack();
@ -312,13 +313,13 @@ protected:
void ReadNormalText(); void ReadNormalText();
bool PrintRest(); bool PrintRest();
bool PrintOpeningItem(); bool PrintOpeningItem();
void ReadItemName(); void ReadItemName(std::wstring & name, bool clear_name = true);
void ReadItemAttrName(); void ReadItemAttrName();
void ReadItemAttrValueAdd(const wchar_t * value_start, const wchar_t * value_end); void ReadItemAttrValueAdd(const std::wstring & str);
void ReadItemAttrValue(bool has_quote, wchar_t quote_char); void ReadItemAttrValue(bool has_quote, wchar_t quote_char);
bool ReadItemAttr(); bool ReadItemAttr();
bool CheckItemAttr(); void CheckItemLangAttr();
void PrintItemAttr(); void PrintItemAttr();
void ReadItemClosing(); void ReadItemClosing();
@ -330,27 +331,23 @@ protected:
void CheckChar(wchar_t c); void CheckChar(wchar_t c);
void CheckLineWrap(); void PutNormalNonWhite(std::wstring & str, bool allow_put_new_line, bool allow_put_space);
bool HasEntityEndAround(const wchar_t * str, const wchar_t * end); bool PutNormalWhite();
void PutNormalNonWhite(const wchar_t * & str, const wchar_t * end);
void PutNormalWhite(const wchar_t * & str, const wchar_t * end);
void PutEverythingUntilClosingTag(bool put_closing_tag_as_well); void PutEverythingUntilClosingTag(bool put_closing_tag_as_well);
void PutTabs(size_t len); void PutTabs(size_t len);
void PutNonBreakingSpace(); void PutNonBreakingSpace();
void PutNewLine();
void CalcOrphansMaxLen(Orphans & orphans); void CalcOrphansMaxLen(Orphans & orphans);
const wchar_t * pchar;
Item empty; Item empty;
Item * pstack; // stack pointer Item * pstack; // stack pointer
size_t stack_len; // length of the stack size_t stack_len; // length of the stack
wchar_t * buffer; // buffer used when printing wchar_t * buffer; // buffer used when printing
std::wstring * out_string; std::wstring * out_string;
bool last_new_line; bool last_new_line;
size_t break_after; // insert a space into long words after 'break_after' characters int white_mode;
size_t wrap_line; // insert a new line character into long lines size_t wrap_line; // insert a new line character into long lines
bool trim_white; // trimming white characters
size_t tab_size; size_t tab_size;
bool was_ending_commentary;
OrphanMode orphan_mode; OrphanMode orphan_mode;
std::wstring attr_name; std::wstring attr_name;
std::vector<std::wstring> attr_value; std::vector<std::wstring> attr_value;
@ -365,6 +362,8 @@ protected:
bool skip_commentaries; bool skip_commentaries;
bool skip_entities; bool skip_entities;
bool analyze_entities; bool analyze_entities;
std::wstring tmp_text;
std::wstring tmp_name;
}; };