some work in HTMLFilter
- instead of directly using pchar pointer now we use pointers/streams from BaseParser - removed support for putting a white char in long words: removed BreakWord(size_t break_after_) method - changed the way how white characters are treated: added white_chars_mode(int mode) method mode 0: WHITE_MODE_ORIGIN mode 1: WHITE_MODE_SINGLE_LINE mode 2: WHITE_MODE_TREE
This commit is contained in:
parent
7ce07c57f5
commit
4f8ae6ce29
|
@ -121,7 +121,7 @@ void BBCODEParser::PutNormalText(const wchar_t * str, const wchar_t * end)
|
||||||
{
|
{
|
||||||
int br_len;
|
int br_len;
|
||||||
|
|
||||||
if( *pchar == 0 )
|
if( lastc != -1 )
|
||||||
{
|
{
|
||||||
// trimming last white characters at end of the user text
|
// trimming last white characters at end of the user text
|
||||||
while( str<end && (IsWhite(*(end-1)) || *(end-1)==10) )
|
while( str<end && (IsWhite(*(end-1)) || *(end-1)==10) )
|
||||||
|
@ -415,15 +415,17 @@ void BBCODEParser::PutOpeningTagFromEzc()
|
||||||
(*out_string) += '[';
|
(*out_string) += '[';
|
||||||
(*out_string) += LastItem().name;
|
(*out_string) += LastItem().name;
|
||||||
|
|
||||||
const wchar_t * start = pchar;
|
|
||||||
|
|
||||||
while( *pchar && *pchar!=']' )
|
// FIXME
|
||||||
++pchar;
|
// const wchar_t * start = pchar;
|
||||||
|
//
|
||||||
if( *pchar == ']' )
|
// while( *pchar && *pchar!=']' )
|
||||||
++pchar;
|
// ++pchar;
|
||||||
|
//
|
||||||
Put(start, pchar);
|
// if( *pchar == ']' )
|
||||||
|
// ++pchar;
|
||||||
|
//
|
||||||
|
// Put(start, pchar);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -453,13 +455,13 @@ void BBCODEParser::TrimWhiteWithNewLines(const wchar_t * & start, const wchar_t
|
||||||
|
|
||||||
void BBCODEParser::PutHtmlArgument2(const Tags * tag, bool has_u)
|
void BBCODEParser::PutHtmlArgument2(const Tags * tag, bool has_u)
|
||||||
{
|
{
|
||||||
const wchar_t * start = pchar;
|
//const wchar_t * start = pchar;
|
||||||
const wchar_t * end = pchar;
|
//const wchar_t * end = pchar;
|
||||||
bool first_tag_removed = false;
|
bool first_tag_removed = false;
|
||||||
|
|
||||||
while( *pchar != 0 )
|
while( lastc != -1 )
|
||||||
{
|
{
|
||||||
if( IsOpeningTagMark(*pchar) )
|
if( IsOpeningTagMark(lastc) )
|
||||||
{
|
{
|
||||||
if( IsClosingTagForLastItem() )
|
if( IsClosingTagForLastItem() )
|
||||||
{
|
{
|
||||||
|
@ -472,8 +474,8 @@ bool first_tag_removed = false;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
pchar += 1;
|
read_char();
|
||||||
end = pchar;
|
//end = pchar;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -482,12 +484,14 @@ bool first_tag_removed = false;
|
||||||
|
|
||||||
if( has_u )
|
if( has_u )
|
||||||
{
|
{
|
||||||
TrimWhiteWithNewLines(start, end);
|
// FIXME
|
||||||
PrintEncode(start, end);
|
// TrimWhiteWithNewLines(start, end);
|
||||||
|
// PrintEncode(start, end);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
PrintEscape(start, end);
|
// FIXME
|
||||||
|
// PrintEscape(start, end);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -545,15 +549,16 @@ void BBCODEParser::PutOpeningTagFromBBCode(const Tags * tag)
|
||||||
PutOpeningTagMark();
|
PutOpeningTagMark();
|
||||||
Put(tag->html_tag);
|
Put(tag->html_tag);
|
||||||
|
|
||||||
const wchar_t * start = pchar;
|
// FIXME
|
||||||
|
// const wchar_t * start = pchar;
|
||||||
while( *pchar && *pchar != ']' )
|
//
|
||||||
++pchar;
|
// while( *pchar && *pchar != ']' )
|
||||||
|
// ++pchar;
|
||||||
PutHtmlArgument(tag, start, pchar);
|
//
|
||||||
|
// PutHtmlArgument(tag, start, pchar);
|
||||||
if( *pchar == ']' )
|
//
|
||||||
++pchar;
|
// if( *pchar == ']' )
|
||||||
|
// ++pchar;
|
||||||
|
|
||||||
if( !tag->inline_tag )
|
if( !tag->inline_tag )
|
||||||
{
|
{
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -42,7 +42,7 @@
|
||||||
#include <map>
|
#include <map>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
#include "convert/baseparser.h"
|
||||||
|
|
||||||
|
|
||||||
namespace pt
|
namespace pt
|
||||||
|
@ -90,7 +90,7 @@ namespace pt
|
||||||
|
|
||||||
the filter recognizes xml simple tags (with / at the end) such as: <br />
|
the filter recognizes xml simple tags (with / at the end) such as: <br />
|
||||||
*/
|
*/
|
||||||
class HTMLFilter
|
class HTMLFilter : public BaseParser
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
|
||||||
|
@ -111,27 +111,22 @@ public:
|
||||||
void Filter(const std::wstring & in, std::wstring & out);
|
void Filter(const std::wstring & in, std::wstring & out);
|
||||||
|
|
||||||
|
|
||||||
// insert a white space into long words
|
const static int WHITE_MODE_ORIGIN = 0;
|
||||||
// (only between html tags)
|
const static int WHITE_MODE_SINGLE_LINE = 1;
|
||||||
// skipped in such tags: script, pre, textarea
|
const static int WHITE_MODE_TREE = 2;
|
||||||
// break_after - after how many characters insert a space (0 - off)
|
|
||||||
void BreakWord(size_t break_after_);
|
|
||||||
|
|
||||||
// insert a new line character into long lines
|
|
||||||
// (only between html tags)
|
// white chars mode
|
||||||
|
//
|
||||||
|
void white_chars_mode(int mode);
|
||||||
|
|
||||||
|
// if the line is wrap_line_ length (or longer) then insert a new line character (in a place of a white char)
|
||||||
|
// (only between html tags and only in <body> subtree)
|
||||||
// skipped in such tags: script, pre, textarea
|
// skipped in such tags: script, pre, textarea
|
||||||
// wrap_line - after how many characters wrap a line (0 - off)
|
// 0 - off
|
||||||
// lines are wrapped only in 'body' tag (useful for text in 'title' tag which is in 'head' section)
|
// lines are wrapped only in 'body' tag (useful for text in 'title' tag which is in 'head' section)
|
||||||
void WrapLine(size_t wrap_line_);
|
void WrapLine(size_t wrap_line_);
|
||||||
|
|
||||||
// trimming white characters (with new lines)
|
|
||||||
// at the beginning, at the end and in the middle of a string
|
|
||||||
// only between html tags
|
|
||||||
// at the beginning and at the end only one space is left
|
|
||||||
// skipped in such tags: script, pre, textarea
|
|
||||||
// false by default
|
|
||||||
void TrimWhite(bool trim);
|
|
||||||
|
|
||||||
// first tabs in a tree
|
// first tabs in a tree
|
||||||
// default: 2 (spaces)
|
// default: 2 (spaces)
|
||||||
// set 0 to turn off
|
// set 0 to turn off
|
||||||
|
@ -207,9 +202,14 @@ protected:
|
||||||
none
|
none
|
||||||
} type;
|
} type;
|
||||||
|
|
||||||
|
bool is_commentary;
|
||||||
|
|
||||||
// is there a new line after this tag
|
// is there a new line after this tag
|
||||||
bool new_line;
|
bool new_line;
|
||||||
|
|
||||||
|
// is there a new
|
||||||
|
bool new_line_in_the_middle;
|
||||||
|
|
||||||
// current orphans table
|
// current orphans table
|
||||||
// (will be propagated)
|
// (will be propagated)
|
||||||
Orphans * porphans;
|
Orphans * porphans;
|
||||||
|
@ -218,6 +218,8 @@ protected:
|
||||||
// (will be propagated)
|
// (will be propagated)
|
||||||
bool has_body_tag;
|
bool has_body_tag;
|
||||||
|
|
||||||
|
size_t tree_index;
|
||||||
|
|
||||||
void Clear();
|
void Clear();
|
||||||
Item();
|
Item();
|
||||||
};
|
};
|
||||||
|
@ -235,12 +237,16 @@ protected:
|
||||||
|
|
||||||
virtual bool IsOpeningTagMark(wchar_t c);
|
virtual bool IsOpeningTagMark(wchar_t c);
|
||||||
virtual bool IsClosingTagMark(wchar_t c);
|
virtual bool IsClosingTagMark(wchar_t c);
|
||||||
|
virtual bool IsClosingTagIndicator(wchar_t c);
|
||||||
|
virtual bool IsSpecialTagIndicator(wchar_t c);
|
||||||
|
virtual bool IsAttributeAssignmentMark(wchar_t c);
|
||||||
virtual bool IsClosingXmlSimpleTagMark(wchar_t c);
|
virtual bool IsClosingXmlSimpleTagMark(wchar_t c);
|
||||||
virtual bool IsStartingEntityMark(wchar_t c);
|
virtual bool IsStartingEntityMark(wchar_t c);
|
||||||
virtual bool IsEndingEntityMark(wchar_t c);
|
virtual bool IsEndingEntityMark(wchar_t c);
|
||||||
|
|
||||||
virtual bool IsOpeningCommentaryTagMark(const wchar_t * str);
|
// virtual bool IsOpeningCommentaryTagMark(const wchar_t * str);
|
||||||
virtual size_t OpeningCommentaryTagMarkSize();
|
// virtual size_t OpeningCommentaryTagMarkSize();
|
||||||
|
virtual bool IsEndingCommentaryTagMarkAtEndOfString(const std::wstring & str);
|
||||||
|
|
||||||
virtual bool IsValidCharForName(int c);
|
virtual bool IsValidCharForName(int c);
|
||||||
virtual bool IsValidCharForAttrName(int c);
|
virtual bool IsValidCharForAttrName(int c);
|
||||||
|
@ -249,7 +255,6 @@ protected:
|
||||||
virtual bool SkipCommentaryTagIfExists();
|
virtual bool SkipCommentaryTagIfExists();
|
||||||
|
|
||||||
virtual void Put(wchar_t c);
|
virtual void Put(wchar_t c);
|
||||||
virtual void Put(const wchar_t * str);
|
|
||||||
virtual void Put(const wchar_t * str, const wchar_t * end);
|
virtual void Put(const wchar_t * str, const wchar_t * end);
|
||||||
virtual void Put(const std::wstring & str);
|
virtual void Put(const std::wstring & str);
|
||||||
virtual void AnalyzeEntitiesAndPut(const wchar_t * str, const wchar_t * end, std::wstring * out);
|
virtual void AnalyzeEntitiesAndPut(const wchar_t * str, const wchar_t * end, std::wstring * out);
|
||||||
|
@ -257,10 +262,7 @@ protected:
|
||||||
virtual void PutOpeningTagMark();
|
virtual void PutOpeningTagMark();
|
||||||
virtual void PutClosingTagMark();
|
virtual void PutClosingTagMark();
|
||||||
virtual bool PutOpeningTag();
|
virtual bool PutOpeningTag();
|
||||||
virtual void PutClosingTag(const wchar_t * tag);
|
virtual void PutClosingTag(const Item & item);
|
||||||
|
|
||||||
virtual void PutNormalText(const wchar_t * str, const wchar_t * end);
|
|
||||||
virtual void ReadNormalTextSkipWhite(const wchar_t * & start, const wchar_t * & last_non_white);
|
|
||||||
|
|
||||||
virtual void ItemFound();
|
virtual void ItemFound();
|
||||||
virtual void EntityFound(const wchar_t * str, const wchar_t * end);
|
virtual void EntityFound(const wchar_t * str, const wchar_t * end);
|
||||||
|
@ -299,9 +301,8 @@ protected:
|
||||||
void SkipWhite();
|
void SkipWhite();
|
||||||
void SkipWhiteLines();
|
void SkipWhiteLines();
|
||||||
void SkipWhiteWithFirstNewLine();
|
void SkipWhiteWithFirstNewLine();
|
||||||
void SkipWhiteLines(const wchar_t * & str, const wchar_t * end);
|
|
||||||
bool IsClosingTagForLastItem();
|
bool IsClosingTagForLastItem();
|
||||||
void SkipAndCheckClosingTag();
|
void SkipAndCheckClosingTag(std::wstring * remember_text = nullptr);
|
||||||
|
|
||||||
void PopStack();
|
void PopStack();
|
||||||
bool PushStack();
|
bool PushStack();
|
||||||
|
@ -312,13 +313,13 @@ protected:
|
||||||
void ReadNormalText();
|
void ReadNormalText();
|
||||||
bool PrintRest();
|
bool PrintRest();
|
||||||
bool PrintOpeningItem();
|
bool PrintOpeningItem();
|
||||||
void ReadItemName();
|
void ReadItemName(std::wstring & name, bool clear_name = true);
|
||||||
void ReadItemAttrName();
|
void ReadItemAttrName();
|
||||||
void ReadItemAttrValueAdd(const wchar_t * value_start, const wchar_t * value_end);
|
void ReadItemAttrValueAdd(const std::wstring & str);
|
||||||
void ReadItemAttrValue(bool has_quote, wchar_t quote_char);
|
void ReadItemAttrValue(bool has_quote, wchar_t quote_char);
|
||||||
|
|
||||||
bool ReadItemAttr();
|
bool ReadItemAttr();
|
||||||
bool CheckItemAttr();
|
void CheckItemLangAttr();
|
||||||
void PrintItemAttr();
|
void PrintItemAttr();
|
||||||
|
|
||||||
void ReadItemClosing();
|
void ReadItemClosing();
|
||||||
|
@ -330,27 +331,23 @@ protected:
|
||||||
|
|
||||||
void CheckChar(wchar_t c);
|
void CheckChar(wchar_t c);
|
||||||
|
|
||||||
void CheckLineWrap();
|
void PutNormalNonWhite(std::wstring & str, bool allow_put_new_line, bool allow_put_space);
|
||||||
bool HasEntityEndAround(const wchar_t * str, const wchar_t * end);
|
bool PutNormalWhite();
|
||||||
void PutNormalNonWhite(const wchar_t * & str, const wchar_t * end);
|
|
||||||
void PutNormalWhite(const wchar_t * & str, const wchar_t * end);
|
|
||||||
void PutEverythingUntilClosingTag(bool put_closing_tag_as_well);
|
void PutEverythingUntilClosingTag(bool put_closing_tag_as_well);
|
||||||
void PutTabs(size_t len);
|
void PutTabs(size_t len);
|
||||||
void PutNonBreakingSpace();
|
void PutNonBreakingSpace();
|
||||||
void PutNewLine();
|
|
||||||
void CalcOrphansMaxLen(Orphans & orphans);
|
void CalcOrphansMaxLen(Orphans & orphans);
|
||||||
|
|
||||||
const wchar_t * pchar;
|
|
||||||
Item empty;
|
Item empty;
|
||||||
Item * pstack; // stack pointer
|
Item * pstack; // stack pointer
|
||||||
size_t stack_len; // length of the stack
|
size_t stack_len; // length of the stack
|
||||||
wchar_t * buffer; // buffer used when printing
|
wchar_t * buffer; // buffer used when printing
|
||||||
std::wstring * out_string;
|
std::wstring * out_string;
|
||||||
bool last_new_line;
|
bool last_new_line;
|
||||||
size_t break_after; // insert a space into long words after 'break_after' characters
|
int white_mode;
|
||||||
size_t wrap_line; // insert a new line character into long lines
|
size_t wrap_line; // insert a new line character into long lines
|
||||||
bool trim_white; // trimming white characters
|
|
||||||
size_t tab_size;
|
size_t tab_size;
|
||||||
|
bool was_ending_commentary;
|
||||||
OrphanMode orphan_mode;
|
OrphanMode orphan_mode;
|
||||||
std::wstring attr_name;
|
std::wstring attr_name;
|
||||||
std::vector<std::wstring> attr_value;
|
std::vector<std::wstring> attr_value;
|
||||||
|
@ -365,6 +362,8 @@ protected:
|
||||||
bool skip_commentaries;
|
bool skip_commentaries;
|
||||||
bool skip_entities;
|
bool skip_entities;
|
||||||
bool analyze_entities;
|
bool analyze_entities;
|
||||||
|
std::wstring tmp_text;
|
||||||
|
std::wstring tmp_name;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue