Browse Source

some work in HTMLFilter

- instead of directly using pchar pointer now we use pointers/streams from BaseParser
- removed support for putting a white char in long words: removed BreakWord(size_t break_after_) method
- changed the way how white characters are treated: added white_chars_mode(int mode) method
  mode 0: WHITE_MODE_ORIGIN
  mode 1: WHITE_MODE_SINGLE_LINE
  mode 2: WHITE_MODE_TREE
htmlparserlistener
Tomasz Sowa 1 year ago
parent
commit
4f8ae6ce29
  1. 59
      src/html/bbcodeparser.cpp
  2. 727
      src/html/htmlfilter.cpp
  3. 75
      src/html/htmlfilter.h

59
src/html/bbcodeparser.cpp

@ -121,7 +121,7 @@ void BBCODEParser::PutNormalText(const wchar_t * str, const wchar_t * end)
{
int br_len;
if( *pchar == 0 )
if( lastc != -1 )
{
// trimming last white characters at end of the user text
while( str<end && (IsWhite(*(end-1)) || *(end-1)==10) )
@ -415,15 +415,17 @@ void BBCODEParser::PutOpeningTagFromEzc()
(*out_string) += '[';
(*out_string) += LastItem().name;
const wchar_t * start = pchar;
while( *pchar && *pchar!=']' )
++pchar;
if( *pchar == ']' )
++pchar;
Put(start, pchar);
// FIXME
// const wchar_t * start = pchar;
//
// while( *pchar && *pchar!=']' )
// ++pchar;
//
// if( *pchar == ']' )
// ++pchar;
//
// Put(start, pchar);
}
@ -453,13 +455,13 @@ void BBCODEParser::TrimWhiteWithNewLines(const wchar_t * & start, const wchar_t
void BBCODEParser::PutHtmlArgument2(const Tags * tag, bool has_u)
{
const wchar_t * start = pchar;
const wchar_t * end = pchar;
//const wchar_t * start = pchar;
//const wchar_t * end = pchar;
bool first_tag_removed = false;
while( *pchar != 0 )
while( lastc != -1 )
{
if( IsOpeningTagMark(*pchar) )
if( IsOpeningTagMark(lastc) )
{
if( IsClosingTagForLastItem() )
{
@ -472,8 +474,8 @@ bool first_tag_removed = false;
}
else
{
pchar += 1;
end = pchar;
read_char();
//end = pchar;
}
}
@ -482,12 +484,14 @@ bool first_tag_removed = false;
if( has_u )
{
TrimWhiteWithNewLines(start, end);
PrintEncode(start, end);
// FIXME
// TrimWhiteWithNewLines(start, end);
// PrintEncode(start, end);
}
else
{
PrintEscape(start, end);
// FIXME
// PrintEscape(start, end);
}
}
@ -545,15 +549,16 @@ void BBCODEParser::PutOpeningTagFromBBCode(const Tags * tag)
PutOpeningTagMark();
Put(tag->html_tag);
const wchar_t * start = pchar;
while( *pchar && *pchar != ']' )
++pchar;
PutHtmlArgument(tag, start, pchar);
if( *pchar == ']' )
++pchar;
// FIXME
// const wchar_t * start = pchar;
//
// while( *pchar && *pchar != ']' )
// ++pchar;
//
// PutHtmlArgument(tag, start, pchar);
//
// if( *pchar == ']' )
// ++pchar;
if( !tag->inline_tag )
{

727
src/html/htmlfilter.cpp

File diff suppressed because it is too large Load Diff

75
src/html/htmlfilter.h

@ -42,7 +42,7 @@
#include <map>
#include <vector>
#include <algorithm>
#include "convert/baseparser.h"
namespace pt
@ -90,7 +90,7 @@ namespace pt
the filter recognizes xml simple tags (with / at the end) such as: <br />
*/
class HTMLFilter
class HTMLFilter : public BaseParser
{
public:
@ -111,27 +111,22 @@ public:
void Filter(const std::wstring & in, std::wstring & out);
// insert a white space into long words
// (only between html tags)
// skipped in such tags: script, pre, textarea
// break_after - after how many characters insert a space (0 - off)
void BreakWord(size_t break_after_);
const static int WHITE_MODE_ORIGIN = 0;
const static int WHITE_MODE_SINGLE_LINE = 1;
const static int WHITE_MODE_TREE = 2;
// insert a new line character into long lines
// (only between html tags)
// white chars mode
//
void white_chars_mode(int mode);
// if the line is wrap_line_ length (or longer) then insert a new line character (in a place of a white char)
// (only between html tags and only in <body> subtree)
// skipped in such tags: script, pre, textarea
// wrap_line - after how many characters wrap a line (0 - off)
// 0 - off
// lines are wrapped only in 'body' tag (useful for text in 'title' tag which is in 'head' section)
void WrapLine(size_t wrap_line_);
// trimming white characters (with new lines)
// at the beginning, at the end and in the middle of a string
// only between html tags
// at the beginning and at the end only one space is left
// skipped in such tags: script, pre, textarea
// false by default
void TrimWhite(bool trim);
// first tabs in a tree
// default: 2 (spaces)
// set 0 to turn off
@ -207,9 +202,14 @@ protected:
none
} type;
bool is_commentary;
// is there a new line after this tag
bool new_line;
// is there a new
bool new_line_in_the_middle;
// current orphans table
// (will be propagated)
Orphans * porphans;
@ -218,6 +218,8 @@ protected:
// (will be propagated)
bool has_body_tag;
size_t tree_index;
void Clear();
Item();
};
@ -235,12 +237,16 @@ protected:
virtual bool IsOpeningTagMark(wchar_t c);
virtual bool IsClosingTagMark(wchar_t c);
virtual bool IsClosingTagIndicator(wchar_t c);
virtual bool IsSpecialTagIndicator(wchar_t c);
virtual bool IsAttributeAssignmentMark(wchar_t c);
virtual bool IsClosingXmlSimpleTagMark(wchar_t c);
virtual bool IsStartingEntityMark(wchar_t c);
virtual bool IsEndingEntityMark(wchar_t c);
virtual bool IsOpeningCommentaryTagMark(const wchar_t * str);
virtual size_t OpeningCommentaryTagMarkSize();
// virtual bool IsOpeningCommentaryTagMark(const wchar_t * str);
// virtual size_t OpeningCommentaryTagMarkSize();
virtual bool IsEndingCommentaryTagMarkAtEndOfString(const std::wstring & str);
virtual bool IsValidCharForName(int c);
virtual bool IsValidCharForAttrName(int c);
@ -249,7 +255,6 @@ protected:
virtual bool SkipCommentaryTagIfExists();
virtual void Put(wchar_t c);
virtual void Put(const wchar_t * str);
virtual void Put(const wchar_t * str, const wchar_t * end);
virtual void Put(const std::wstring & str);
virtual void AnalyzeEntitiesAndPut(const wchar_t * str, const wchar_t * end, std::wstring * out);
@ -257,10 +262,7 @@ protected:
virtual void PutOpeningTagMark();
virtual void PutClosingTagMark();
virtual bool PutOpeningTag();
virtual void PutClosingTag(const wchar_t * tag);
virtual void PutNormalText(const wchar_t * str, const wchar_t * end);
virtual void ReadNormalTextSkipWhite(const wchar_t * & start, const wchar_t * & last_non_white);
virtual void PutClosingTag(const Item & item);
virtual void ItemFound();
virtual void EntityFound(const wchar_t * str, const wchar_t * end);
@ -299,9 +301,8 @@ protected:
void SkipWhite();
void SkipWhiteLines();
void SkipWhiteWithFirstNewLine();
void SkipWhiteLines(const wchar_t * & str, const wchar_t * end);
bool IsClosingTagForLastItem();
void SkipAndCheckClosingTag();
void SkipAndCheckClosingTag(std::wstring * remember_text = nullptr);
void PopStack();
bool PushStack();
@ -312,13 +313,13 @@ protected:
void ReadNormalText();
bool PrintRest();
bool PrintOpeningItem();
void ReadItemName();
void ReadItemName(std::wstring & name, bool clear_name = true);
void ReadItemAttrName();
void ReadItemAttrValueAdd(const wchar_t * value_start, const wchar_t * value_end);
void ReadItemAttrValueAdd(const std::wstring & str);
void ReadItemAttrValue(bool has_quote, wchar_t quote_char);
bool ReadItemAttr();
bool CheckItemAttr();
void CheckItemLangAttr();
void PrintItemAttr();
void ReadItemClosing();
@ -330,27 +331,23 @@ protected:
void CheckChar(wchar_t c);
void CheckLineWrap();
bool HasEntityEndAround(const wchar_t * str, const wchar_t * end);
void PutNormalNonWhite(const wchar_t * & str, const wchar_t * end);
void PutNormalWhite(const wchar_t * & str, const wchar_t * end);
void PutNormalNonWhite(std::wstring & str, bool allow_put_new_line, bool allow_put_space);
bool PutNormalWhite();
void PutEverythingUntilClosingTag(bool put_closing_tag_as_well);
void PutTabs(size_t len);
void PutNonBreakingSpace();
void PutNewLine();
void CalcOrphansMaxLen(Orphans & orphans);
const wchar_t * pchar;
Item empty;
Item * pstack; // stack pointer
size_t stack_len; // length of the stack
wchar_t * buffer; // buffer used when printing
std::wstring * out_string;
bool last_new_line;
size_t break_after; // insert a space into long words after 'break_after' characters
int white_mode;
size_t wrap_line; // insert a new line character into long lines
bool trim_white; // trimming white characters
size_t tab_size;
bool was_ending_commentary;
OrphanMode orphan_mode;
std::wstring attr_name;
std::vector<std::wstring> attr_value;
@ -365,6 +362,8 @@ protected:
bool skip_commentaries;
bool skip_entities;
bool analyze_entities;
std::wstring tmp_text;
std::wstring tmp_name;
};

Loading…
Cancel
Save