some work in HTMLFilter
- instead of directly using pchar pointer now we use pointers/streams from BaseParser - removed support for putting a white char in long words: removed BreakWord(size_t break_after_) method - changed the way how white characters are treated: added white_chars_mode(int mode) method mode 0: WHITE_MODE_ORIGIN mode 1: WHITE_MODE_SINGLE_LINE mode 2: WHITE_MODE_TREE
This commit is contained in:
parent
7ce07c57f5
commit
4f8ae6ce29
|
@ -121,7 +121,7 @@ void BBCODEParser::PutNormalText(const wchar_t * str, const wchar_t * end)
|
|||
{
|
||||
int br_len;
|
||||
|
||||
if( *pchar == 0 )
|
||||
if( lastc != -1 )
|
||||
{
|
||||
// trimming last white characters at end of the user text
|
||||
while( str<end && (IsWhite(*(end-1)) || *(end-1)==10) )
|
||||
|
@ -415,15 +415,17 @@ void BBCODEParser::PutOpeningTagFromEzc()
|
|||
(*out_string) += '[';
|
||||
(*out_string) += LastItem().name;
|
||||
|
||||
const wchar_t * start = pchar;
|
||||
|
||||
while( *pchar && *pchar!=']' )
|
||||
++pchar;
|
||||
|
||||
if( *pchar == ']' )
|
||||
++pchar;
|
||||
|
||||
Put(start, pchar);
|
||||
// FIXME
|
||||
// const wchar_t * start = pchar;
|
||||
//
|
||||
// while( *pchar && *pchar!=']' )
|
||||
// ++pchar;
|
||||
//
|
||||
// if( *pchar == ']' )
|
||||
// ++pchar;
|
||||
//
|
||||
// Put(start, pchar);
|
||||
}
|
||||
|
||||
|
||||
|
@ -453,13 +455,13 @@ void BBCODEParser::TrimWhiteWithNewLines(const wchar_t * & start, const wchar_t
|
|||
|
||||
void BBCODEParser::PutHtmlArgument2(const Tags * tag, bool has_u)
|
||||
{
|
||||
const wchar_t * start = pchar;
|
||||
const wchar_t * end = pchar;
|
||||
//const wchar_t * start = pchar;
|
||||
//const wchar_t * end = pchar;
|
||||
bool first_tag_removed = false;
|
||||
|
||||
while( *pchar != 0 )
|
||||
while( lastc != -1 )
|
||||
{
|
||||
if( IsOpeningTagMark(*pchar) )
|
||||
if( IsOpeningTagMark(lastc) )
|
||||
{
|
||||
if( IsClosingTagForLastItem() )
|
||||
{
|
||||
|
@ -472,8 +474,8 @@ bool first_tag_removed = false;
|
|||
}
|
||||
else
|
||||
{
|
||||
pchar += 1;
|
||||
end = pchar;
|
||||
read_char();
|
||||
//end = pchar;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -482,12 +484,14 @@ bool first_tag_removed = false;
|
|||
|
||||
if( has_u )
|
||||
{
|
||||
TrimWhiteWithNewLines(start, end);
|
||||
PrintEncode(start, end);
|
||||
// FIXME
|
||||
// TrimWhiteWithNewLines(start, end);
|
||||
// PrintEncode(start, end);
|
||||
}
|
||||
else
|
||||
{
|
||||
PrintEscape(start, end);
|
||||
// FIXME
|
||||
// PrintEscape(start, end);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -545,15 +549,16 @@ void BBCODEParser::PutOpeningTagFromBBCode(const Tags * tag)
|
|||
PutOpeningTagMark();
|
||||
Put(tag->html_tag);
|
||||
|
||||
const wchar_t * start = pchar;
|
||||
|
||||
while( *pchar && *pchar != ']' )
|
||||
++pchar;
|
||||
|
||||
PutHtmlArgument(tag, start, pchar);
|
||||
|
||||
if( *pchar == ']' )
|
||||
++pchar;
|
||||
// FIXME
|
||||
// const wchar_t * start = pchar;
|
||||
//
|
||||
// while( *pchar && *pchar != ']' )
|
||||
// ++pchar;
|
||||
//
|
||||
// PutHtmlArgument(tag, start, pchar);
|
||||
//
|
||||
// if( *pchar == ']' )
|
||||
// ++pchar;
|
||||
|
||||
if( !tag->inline_tag )
|
||||
{
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -42,7 +42,7 @@
|
|||
#include <map>
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
|
||||
#include "convert/baseparser.h"
|
||||
|
||||
|
||||
namespace pt
|
||||
|
@ -90,7 +90,7 @@ namespace pt
|
|||
|
||||
the filter recognizes xml simple tags (with / at the end) such as: <br />
|
||||
*/
|
||||
class HTMLFilter
|
||||
class HTMLFilter : public BaseParser
|
||||
{
|
||||
public:
|
||||
|
||||
|
@ -111,27 +111,22 @@ public:
|
|||
void Filter(const std::wstring & in, std::wstring & out);
|
||||
|
||||
|
||||
// insert a white space into long words
|
||||
// (only between html tags)
|
||||
// skipped in such tags: script, pre, textarea
|
||||
// break_after - after how many characters insert a space (0 - off)
|
||||
void BreakWord(size_t break_after_);
|
||||
const static int WHITE_MODE_ORIGIN = 0;
|
||||
const static int WHITE_MODE_SINGLE_LINE = 1;
|
||||
const static int WHITE_MODE_TREE = 2;
|
||||
|
||||
// insert a new line character into long lines
|
||||
// (only between html tags)
|
||||
|
||||
// white chars mode
|
||||
//
|
||||
void white_chars_mode(int mode);
|
||||
|
||||
// if the line is wrap_line_ length (or longer) then insert a new line character (in a place of a white char)
|
||||
// (only between html tags and only in <body> subtree)
|
||||
// skipped in such tags: script, pre, textarea
|
||||
// wrap_line - after how many characters wrap a line (0 - off)
|
||||
// 0 - off
|
||||
// lines are wrapped only in 'body' tag (useful for text in 'title' tag which is in 'head' section)
|
||||
void WrapLine(size_t wrap_line_);
|
||||
|
||||
// trimming white characters (with new lines)
|
||||
// at the beginning, at the end and in the middle of a string
|
||||
// only between html tags
|
||||
// at the beginning and at the end only one space is left
|
||||
// skipped in such tags: script, pre, textarea
|
||||
// false by default
|
||||
void TrimWhite(bool trim);
|
||||
|
||||
// first tabs in a tree
|
||||
// default: 2 (spaces)
|
||||
// set 0 to turn off
|
||||
|
@ -207,9 +202,14 @@ protected:
|
|||
none
|
||||
} type;
|
||||
|
||||
bool is_commentary;
|
||||
|
||||
// is there a new line after this tag
|
||||
bool new_line;
|
||||
|
||||
// is there a new
|
||||
bool new_line_in_the_middle;
|
||||
|
||||
// current orphans table
|
||||
// (will be propagated)
|
||||
Orphans * porphans;
|
||||
|
@ -218,6 +218,8 @@ protected:
|
|||
// (will be propagated)
|
||||
bool has_body_tag;
|
||||
|
||||
size_t tree_index;
|
||||
|
||||
void Clear();
|
||||
Item();
|
||||
};
|
||||
|
@ -235,12 +237,16 @@ protected:
|
|||
|
||||
virtual bool IsOpeningTagMark(wchar_t c);
|
||||
virtual bool IsClosingTagMark(wchar_t c);
|
||||
virtual bool IsClosingTagIndicator(wchar_t c);
|
||||
virtual bool IsSpecialTagIndicator(wchar_t c);
|
||||
virtual bool IsAttributeAssignmentMark(wchar_t c);
|
||||
virtual bool IsClosingXmlSimpleTagMark(wchar_t c);
|
||||
virtual bool IsStartingEntityMark(wchar_t c);
|
||||
virtual bool IsEndingEntityMark(wchar_t c);
|
||||
|
||||
virtual bool IsOpeningCommentaryTagMark(const wchar_t * str);
|
||||
virtual size_t OpeningCommentaryTagMarkSize();
|
||||
// virtual bool IsOpeningCommentaryTagMark(const wchar_t * str);
|
||||
// virtual size_t OpeningCommentaryTagMarkSize();
|
||||
virtual bool IsEndingCommentaryTagMarkAtEndOfString(const std::wstring & str);
|
||||
|
||||
virtual bool IsValidCharForName(int c);
|
||||
virtual bool IsValidCharForAttrName(int c);
|
||||
|
@ -249,7 +255,6 @@ protected:
|
|||
virtual bool SkipCommentaryTagIfExists();
|
||||
|
||||
virtual void Put(wchar_t c);
|
||||
virtual void Put(const wchar_t * str);
|
||||
virtual void Put(const wchar_t * str, const wchar_t * end);
|
||||
virtual void Put(const std::wstring & str);
|
||||
virtual void AnalyzeEntitiesAndPut(const wchar_t * str, const wchar_t * end, std::wstring * out);
|
||||
|
@ -257,10 +262,7 @@ protected:
|
|||
virtual void PutOpeningTagMark();
|
||||
virtual void PutClosingTagMark();
|
||||
virtual bool PutOpeningTag();
|
||||
virtual void PutClosingTag(const wchar_t * tag);
|
||||
|
||||
virtual void PutNormalText(const wchar_t * str, const wchar_t * end);
|
||||
virtual void ReadNormalTextSkipWhite(const wchar_t * & start, const wchar_t * & last_non_white);
|
||||
virtual void PutClosingTag(const Item & item);
|
||||
|
||||
virtual void ItemFound();
|
||||
virtual void EntityFound(const wchar_t * str, const wchar_t * end);
|
||||
|
@ -299,9 +301,8 @@ protected:
|
|||
void SkipWhite();
|
||||
void SkipWhiteLines();
|
||||
void SkipWhiteWithFirstNewLine();
|
||||
void SkipWhiteLines(const wchar_t * & str, const wchar_t * end);
|
||||
bool IsClosingTagForLastItem();
|
||||
void SkipAndCheckClosingTag();
|
||||
void SkipAndCheckClosingTag(std::wstring * remember_text = nullptr);
|
||||
|
||||
void PopStack();
|
||||
bool PushStack();
|
||||
|
@ -312,13 +313,13 @@ protected:
|
|||
void ReadNormalText();
|
||||
bool PrintRest();
|
||||
bool PrintOpeningItem();
|
||||
void ReadItemName();
|
||||
void ReadItemName(std::wstring & name, bool clear_name = true);
|
||||
void ReadItemAttrName();
|
||||
void ReadItemAttrValueAdd(const wchar_t * value_start, const wchar_t * value_end);
|
||||
void ReadItemAttrValueAdd(const std::wstring & str);
|
||||
void ReadItemAttrValue(bool has_quote, wchar_t quote_char);
|
||||
|
||||
bool ReadItemAttr();
|
||||
bool CheckItemAttr();
|
||||
void CheckItemLangAttr();
|
||||
void PrintItemAttr();
|
||||
|
||||
void ReadItemClosing();
|
||||
|
@ -330,27 +331,23 @@ protected:
|
|||
|
||||
void CheckChar(wchar_t c);
|
||||
|
||||
void CheckLineWrap();
|
||||
bool HasEntityEndAround(const wchar_t * str, const wchar_t * end);
|
||||
void PutNormalNonWhite(const wchar_t * & str, const wchar_t * end);
|
||||
void PutNormalWhite(const wchar_t * & str, const wchar_t * end);
|
||||
void PutNormalNonWhite(std::wstring & str, bool allow_put_new_line, bool allow_put_space);
|
||||
bool PutNormalWhite();
|
||||
void PutEverythingUntilClosingTag(bool put_closing_tag_as_well);
|
||||
void PutTabs(size_t len);
|
||||
void PutNonBreakingSpace();
|
||||
void PutNewLine();
|
||||
void CalcOrphansMaxLen(Orphans & orphans);
|
||||
|
||||
const wchar_t * pchar;
|
||||
Item empty;
|
||||
Item * pstack; // stack pointer
|
||||
size_t stack_len; // length of the stack
|
||||
wchar_t * buffer; // buffer used when printing
|
||||
std::wstring * out_string;
|
||||
bool last_new_line;
|
||||
size_t break_after; // insert a space into long words after 'break_after' characters
|
||||
int white_mode;
|
||||
size_t wrap_line; // insert a new line character into long lines
|
||||
bool trim_white; // trimming white characters
|
||||
size_t tab_size;
|
||||
bool was_ending_commentary;
|
||||
OrphanMode orphan_mode;
|
||||
std::wstring attr_name;
|
||||
std::vector<std::wstring> attr_value;
|
||||
|
@ -365,6 +362,8 @@ protected:
|
|||
bool skip_commentaries;
|
||||
bool skip_entities;
|
||||
bool analyze_entities;
|
||||
std::wstring tmp_text;
|
||||
std::wstring tmp_name;
|
||||
};
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue