added to HTMLFilter:
- possibility to remove html entities method: SkipEntity(bool) git-svn-id: svn://ttmath.org/publicrep/winix/trunk@1132 e52654a7-88a9-db11-a3e9-0013d4bc506e
This commit is contained in:
parent
027a8ec428
commit
1b8f5dc673
File diff suppressed because one or more lines are too long
|
@ -5,7 +5,7 @@
|
|||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2008-2014, Tomasz Sowa
|
||||
* Copyright (c) 2008-2018, Tomasz Sowa
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -66,36 +66,46 @@ return false;
|
|||
}
|
||||
|
||||
|
||||
bool BBCODEParser::IsOpeningTagMark()
|
||||
bool BBCODEParser::IsOpeningTagMark(wchar_t c)
|
||||
{
|
||||
return (*pchar == '[');
|
||||
return (c == '[');
|
||||
}
|
||||
|
||||
|
||||
// there are no commentaries in bbcode
|
||||
bool BBCODEParser::IsOpeningCommentaryTagMark()
|
||||
bool BBCODEParser::IsClosingTagMark(wchar_t c)
|
||||
{
|
||||
return (c == ']');
|
||||
}
|
||||
|
||||
bool BBCODEParser::IsClosingXmlSimpleTagMark(wchar_t c)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
|
||||
// there are no commentaries in bbcode
|
||||
bool BBCODEParser::IsOpeningCommentaryTagMark(const wchar_t *)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
size_t BBCODEParser::OpeningCommentaryTagMarkSize()
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
bool BBCODEParser::SkipCommentaryTagIfExists()
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
bool BBCODEParser::IsClosingTagMark()
|
||||
{
|
||||
return (*pchar == ']');
|
||||
}
|
||||
|
||||
|
||||
bool BBCODEParser::IsClosingXmlSimpleTagMark()
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -446,7 +456,7 @@ bool first_tag_removed = false;
|
|||
|
||||
while( *pchar != 0 )
|
||||
{
|
||||
if( IsOpeningTagMark() )
|
||||
if( IsOpeningTagMark(*pchar) )
|
||||
{
|
||||
if( IsClosingTagForLastItem() )
|
||||
{
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2008-2014, Tomasz Sowa
|
||||
* Copyright (c) 2008-2018, Tomasz Sowa
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -62,10 +62,12 @@ class BBCODEParser : public HTMLFilter
|
|||
virtual void Init();
|
||||
virtual void Uninit();
|
||||
|
||||
virtual bool IsOpeningTagMark();
|
||||
virtual bool IsOpeningCommentaryTagMark();
|
||||
virtual bool IsClosingTagMark();
|
||||
virtual bool IsClosingXmlSimpleTagMark();
|
||||
virtual bool IsOpeningTagMark(wchar_t c);
|
||||
virtual bool IsClosingTagMark(wchar_t c);
|
||||
virtual bool IsClosingXmlSimpleTagMark(wchar_t c);
|
||||
|
||||
virtual bool IsOpeningCommentaryTagMark(const wchar_t *);
|
||||
virtual size_t OpeningCommentaryTagMarkSize();
|
||||
|
||||
virtual bool IsValidCharForName(int c);
|
||||
virtual void CheckExceptions();
|
||||
|
|
|
@ -88,7 +88,13 @@ void HTMLFilter::Uninit()
|
|||
|
||||
void HTMLFilter::Filter(const std::wstring & in, std::wstring & out)
|
||||
{
|
||||
size_t out_projected_len = in.size() * 2 + 1;
|
||||
if( &in == &out )
|
||||
{
|
||||
// out cannot be the same string as in
|
||||
return;
|
||||
}
|
||||
|
||||
size_t out_projected_len = in.size() * 2 + 1;
|
||||
|
||||
if( out.capacity() < out_projected_len )
|
||||
out.reserve(out_projected_len);
|
||||
|
@ -105,8 +111,10 @@ void HTMLFilter::SetSomeDefaults()
|
|||
wrap_line = 0;
|
||||
orphan_mode = orphan_nbsp;
|
||||
safe_mode = false;
|
||||
skip_all_tags = false;
|
||||
skip_tags = false;
|
||||
skip_commentaries = false;
|
||||
skip_entities = false;
|
||||
analyze_entities = false;
|
||||
}
|
||||
|
||||
|
||||
|
@ -238,13 +246,34 @@ void HTMLFilter::SafeMode(bool safe_mode_)
|
|||
}
|
||||
|
||||
|
||||
void HTMLFilter::SkipAllTags(bool skip_all_tags, bool skip_commentaries)
|
||||
void HTMLFilter::SkipTags(bool skip_tags)
|
||||
{
|
||||
this->skip_tags = skip_tags;
|
||||
}
|
||||
|
||||
void HTMLFilter::SkipCommentaries(bool skip_commentaries)
|
||||
{
|
||||
this->skip_all_tags = skip_all_tags;
|
||||
this->skip_commentaries = skip_commentaries;
|
||||
}
|
||||
|
||||
|
||||
void HTMLFilter::SkipEntities(bool skip_entities)
|
||||
{
|
||||
this->skip_entities = skip_entities;
|
||||
|
||||
if( this->skip_entities )
|
||||
{
|
||||
this->analyze_entities = true;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void HTMLFilter::AnalyzeEntities(bool analyze_entities)
|
||||
{
|
||||
this->analyze_entities = analyze_entities;
|
||||
}
|
||||
|
||||
|
||||
void HTMLFilter::SetNoFilterTag(const std::wstring & tag_name)
|
||||
{
|
||||
no_filter_tag = tag_name;
|
||||
|
@ -380,7 +409,7 @@ bool HTMLFilter::IsClosingTagForLastItem()
|
|||
pchar += LastItem().name.size();
|
||||
SkipWhite();
|
||||
|
||||
if( IsClosingTagMark() )
|
||||
if( IsClosingTagMark(*pchar) )
|
||||
{
|
||||
pchar += 1;
|
||||
return true;
|
||||
|
@ -402,7 +431,7 @@ const wchar_t * end = pchar;
|
|||
|
||||
while( *pchar != 0 )
|
||||
{
|
||||
if( IsOpeningTagMark() )
|
||||
if( IsOpeningTagMark(*pchar) )
|
||||
{
|
||||
if( IsClosingTagForLastItem() )
|
||||
{
|
||||
|
@ -450,12 +479,12 @@ void HTMLFilter::SkipAndCheckClosingTag()
|
|||
}
|
||||
}
|
||||
else
|
||||
if( !is_quoted && LastItem().type == Item::opening && IsClosingXmlSimpleTagMark() ) // closing xml tag: default '/'
|
||||
if( !is_quoted && LastItem().type == Item::opening && IsClosingXmlSimpleTagMark(*pchar) ) // closing xml tag: default '/'
|
||||
{
|
||||
LastItem().type = Item::simple;
|
||||
}
|
||||
else
|
||||
if( !is_quoted && IsClosingTagMark() )
|
||||
if( !is_quoted && IsClosingTagMark(*pchar) )
|
||||
{
|
||||
++pchar;
|
||||
break;
|
||||
|
@ -470,7 +499,7 @@ bool HTMLFilter::IsValidCharForName(int c)
|
|||
if( (c>='a' && c<='z') ||
|
||||
(c>='A' && c<='Z') ||
|
||||
(c>='0' && c<='9') ||
|
||||
c=='-' || c=='!' )
|
||||
c=='-' || c=='!' || c==':') // : for namespace character
|
||||
return true;
|
||||
|
||||
return false;
|
||||
|
@ -489,6 +518,18 @@ return false;
|
|||
}
|
||||
|
||||
|
||||
bool HTMLFilter::IsValidCharForEntityName(int c)
|
||||
{
|
||||
if( (c>='a' && c<='z') ||
|
||||
(c>='A' && c<='Z') ||
|
||||
(c>='0' && c<='9') ||
|
||||
c=='#' )
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
void HTMLFilter::ReadItemName()
|
||||
{
|
||||
size_t i;
|
||||
|
@ -521,12 +562,28 @@ size_t i;
|
|||
|
||||
|
||||
|
||||
void HTMLFilter::ReadItemAttrValueAdd(const wchar_t * value_start, const wchar_t * value_end)
|
||||
{
|
||||
attr_value.push_back(std::wstring());
|
||||
|
||||
if( analyze_entities )
|
||||
{
|
||||
AnalyzeEntitiesAndPut(value_start, value_end, &attr_value.back());
|
||||
}
|
||||
else
|
||||
{
|
||||
attr_value.back().append(value_start, value_end);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void HTMLFilter::ReadItemAttrValue(bool has_quote, wchar_t quote_char)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
attr_value.clear();
|
||||
attr_value_temp.clear();
|
||||
const wchar_t * value_start = pchar;
|
||||
size_t value_len = 0; // how many non white characters
|
||||
|
||||
for(i=0 ; *pchar ; ++i, ++pchar )
|
||||
{
|
||||
|
@ -537,30 +594,28 @@ size_t i;
|
|||
}
|
||||
else
|
||||
{
|
||||
if( IsClosingTagMark() || *pchar == 10 || IsWhite(*pchar) )
|
||||
if( IsClosingTagMark(*pchar) || *pchar == 10 || IsWhite(*pchar) )
|
||||
break;
|
||||
}
|
||||
|
||||
if( *pchar==10 || IsWhite(*pchar) )
|
||||
{
|
||||
if( !attr_value_temp.empty() )
|
||||
{
|
||||
attr_value.push_back(attr_value_temp);
|
||||
attr_value_temp.clear();
|
||||
}
|
||||
if( value_len > 0 && value_len <= WINIX_HTMLFILTER_ATTR_VALUE_MAXLEN )
|
||||
ReadItemAttrValueAdd(value_start, pchar);
|
||||
|
||||
value_len = 0;
|
||||
}
|
||||
else
|
||||
if( i < WINIX_HTMLFILTER_ATTR_VALUE_MAXLEN )
|
||||
{
|
||||
attr_value_temp += *pchar;
|
||||
if( value_len == 0 )
|
||||
value_start = pchar;
|
||||
|
||||
value_len += 1;
|
||||
}
|
||||
}
|
||||
|
||||
if( !attr_value_temp.empty() )
|
||||
{
|
||||
attr_value.push_back(attr_value_temp);
|
||||
attr_value_temp.clear();
|
||||
}
|
||||
if( value_len > 0 && value_len <= WINIX_HTMLFILTER_ATTR_VALUE_MAXLEN )
|
||||
ReadItemAttrValueAdd(value_start, pchar);
|
||||
}
|
||||
|
||||
|
||||
|
@ -611,6 +666,57 @@ void HTMLFilter::Put(const std::wstring & str)
|
|||
}
|
||||
|
||||
|
||||
// out can be null
|
||||
void HTMLFilter::AnalyzeEntitiesAndPut(const wchar_t * str, const wchar_t * end, std::wstring * out)
|
||||
{
|
||||
size_t epsilon = 8; // !! IMPROVE ME put as a constant
|
||||
const wchar_t * old_str = str;
|
||||
|
||||
while( str < end )
|
||||
{
|
||||
if( IsStartingEntityMark(*str) )
|
||||
{
|
||||
const wchar_t * entity_start = str;
|
||||
str += 1; // skip &
|
||||
|
||||
for(size_t i=0 ; *str && IsValidCharForEntityName(*str) && i < epsilon ; ++i, ++str)
|
||||
{
|
||||
}
|
||||
|
||||
if( IsEndingEntityMark(*str) && str - entity_start > 1 ) // at least one character in entity name
|
||||
{
|
||||
if( out )
|
||||
out->append(old_str, entity_start);
|
||||
else
|
||||
Put(old_str, entity_start);
|
||||
|
||||
str += 1; // skip ;
|
||||
|
||||
if( !skip_entities )
|
||||
{
|
||||
if( out )
|
||||
out->append(entity_start, str);
|
||||
else
|
||||
Put(entity_start, str);
|
||||
}
|
||||
|
||||
EntityFound(entity_start + 1, str - 1); // without & and ;
|
||||
old_str = str;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
str += 1;
|
||||
}
|
||||
}
|
||||
|
||||
if( out )
|
||||
out->append(old_str, end);
|
||||
else
|
||||
Put(old_str, end);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
int HTMLFilter::CheckOrphan(const wchar_t * str, const wchar_t * end, const std::wstring & orphan_str)
|
||||
|
@ -700,12 +806,12 @@ return CheckOrphan(str, end, LastItem().porphans->tab);
|
|||
// (useful in html entities)
|
||||
// !! dodac sprawdzanie czy dlugosc stringu nie jest mala tez (end-str)
|
||||
// i wtedy tez nie dodajemy zadnego znaku
|
||||
bool HTMLFilter::HasSemiloconAround(const wchar_t * str, const wchar_t * end)
|
||||
bool HTMLFilter::HasEntityEndAround(const wchar_t * str, const wchar_t * end)
|
||||
{
|
||||
size_t i, epsilon = 8;
|
||||
size_t i, epsilon = 8;// !! IMPROVE ME put as a constant
|
||||
|
||||
for(i=0 ; str < end && i<epsilon ; ++i, ++str)
|
||||
if( *str == ';' )
|
||||
if( IsEndingEntityMark(*str) )
|
||||
return true;
|
||||
|
||||
return false;
|
||||
|
@ -726,11 +832,11 @@ void HTMLFilter::PutNormalNonWhite(const wchar_t * & str, const wchar_t * end)
|
|||
{
|
||||
const wchar_t * word = str;
|
||||
size_t non_whites = 0;
|
||||
bool was_semicolon = false;
|
||||
bool was_entity_end = false;
|
||||
|
||||
for( ; str < end && *str!=10 && !IsWhite(*str) ; ++str, ++non_whites )
|
||||
{
|
||||
if( break_after != 0 && non_whites >= break_after && (was_semicolon || !HasSemiloconAround(str, end)) )
|
||||
if( break_after != 0 && non_whites >= break_after && (was_entity_end || !HasEntityEndAround(str, end)) )
|
||||
{
|
||||
Put(word, str);
|
||||
word = str;
|
||||
|
@ -739,10 +845,13 @@ bool was_semicolon = false;
|
|||
CheckLineWrap();
|
||||
}
|
||||
|
||||
was_semicolon = (*str == ';');
|
||||
was_entity_end = (IsEndingEntityMark(*str));
|
||||
}
|
||||
|
||||
Put(word, str);
|
||||
if( analyze_entities )
|
||||
AnalyzeEntitiesAndPut(word, str, nullptr);
|
||||
else
|
||||
Put(word, str);
|
||||
}
|
||||
|
||||
|
||||
|
@ -880,7 +989,7 @@ return true;
|
|||
|
||||
void HTMLFilter::PutClosingTag(const wchar_t * tag)
|
||||
{
|
||||
if( skip_all_tags || !IsTagSafe(tag) )
|
||||
if( skip_tags || !IsTagSafe(tag) )
|
||||
return;
|
||||
|
||||
PutOpeningTagMark();
|
||||
|
@ -925,28 +1034,28 @@ void HTMLFilter::PutNewLine()
|
|||
|
||||
|
||||
// we assume the size of the opening mark to be one
|
||||
bool HTMLFilter::IsOpeningTagMark()
|
||||
bool HTMLFilter::IsOpeningTagMark(wchar_t c)
|
||||
{
|
||||
return (*pchar == '<');
|
||||
return (c == '<');
|
||||
}
|
||||
|
||||
|
||||
// we assume the size of the closing mark to be one
|
||||
bool HTMLFilter::IsClosingTagMark()
|
||||
bool HTMLFilter::IsClosingTagMark(wchar_t c)
|
||||
{
|
||||
return (*pchar == '>');
|
||||
return (c == '>');
|
||||
}
|
||||
|
||||
|
||||
// the slash at the end <img src=".." /> (without '>' character)
|
||||
// we assume the size of the mark to be one
|
||||
bool HTMLFilter::IsClosingXmlSimpleTagMark()
|
||||
bool HTMLFilter::IsClosingXmlSimpleTagMark(wchar_t c)
|
||||
{
|
||||
return (*pchar == '/');
|
||||
return (c == '/');
|
||||
}
|
||||
|
||||
|
||||
bool HTMLFilter::IsOpeningCommentaryTagMark()
|
||||
bool HTMLFilter::IsOpeningCommentaryTagMark(const wchar_t * str)
|
||||
{
|
||||
static wchar_t comm_open[] = L"<!--";
|
||||
size_t comm_open_len = sizeof(comm_open) / sizeof(wchar_t) - 1;
|
||||
|
@ -961,6 +1070,18 @@ size_t HTMLFilter::OpeningCommentaryTagMarkSize()
|
|||
}
|
||||
|
||||
|
||||
bool HTMLFilter::IsStartingEntityMark(wchar_t c)
|
||||
{
|
||||
return (c == '&');
|
||||
}
|
||||
|
||||
|
||||
bool HTMLFilter::IsEndingEntityMark(wchar_t c)
|
||||
{
|
||||
return (c == ';');
|
||||
}
|
||||
|
||||
|
||||
|
||||
// skipping the commentary tag if exists
|
||||
bool HTMLFilter::SkipCommentaryTagIfExists()
|
||||
|
@ -968,7 +1089,7 @@ bool HTMLFilter::SkipCommentaryTagIfExists()
|
|||
static wchar_t comm_close[] = L"-->";
|
||||
size_t comm_close_len = sizeof(comm_close) / sizeof(wchar_t) - 1;
|
||||
|
||||
if( !IsOpeningCommentaryTagMark() )
|
||||
if( !IsOpeningCommentaryTagMark(pchar) )
|
||||
return false;
|
||||
|
||||
pchar += OpeningCommentaryTagMarkSize();
|
||||
|
@ -1012,7 +1133,7 @@ void HTMLFilter::ReadNormalTextSkipWhite(const wchar_t * & start, const wchar_t
|
|||
start = pchar;
|
||||
|
||||
// exception for the commentary tag
|
||||
if( IsOpeningCommentaryTagMark() || !IsOpeningTagMark() )
|
||||
if( IsOpeningCommentaryTagMark(pchar) || !IsOpeningTagMark(*pchar) )
|
||||
{
|
||||
PutNewLine();
|
||||
PutTabs(stack_len);
|
||||
|
@ -1049,7 +1170,7 @@ const wchar_t * last_non_white = pchar;
|
|||
}
|
||||
else
|
||||
{
|
||||
if( IsOpeningTagMark() )
|
||||
if( IsOpeningTagMark(*pchar) )
|
||||
break;
|
||||
|
||||
if( !IsWhite(*pchar) )
|
||||
|
@ -1067,7 +1188,7 @@ const wchar_t * last_non_white = pchar;
|
|||
|
||||
bool HTMLFilter::PrintOpeningItem()
|
||||
{
|
||||
if( skip_all_tags || IsNameEqual(no_filter_tag, LastItem().name) )
|
||||
if( skip_tags || IsNameEqual(no_filter_tag, LastItem().name) )
|
||||
return true;
|
||||
|
||||
if( last_new_line )
|
||||
|
@ -1149,7 +1270,7 @@ void HTMLFilter::PrintItemAttr()
|
|||
{
|
||||
size_t i;
|
||||
|
||||
if( skip_all_tags || IsNameEqual(no_filter_tag, LastItem().name) )
|
||||
if( skip_tags || IsNameEqual(no_filter_tag, LastItem().name) )
|
||||
return;
|
||||
|
||||
Put(' ');
|
||||
|
@ -1188,7 +1309,7 @@ void HTMLFilter::ReadItemSpecial()
|
|||
{
|
||||
LastItem().type = Item::special;
|
||||
|
||||
if( !skip_all_tags )
|
||||
if( !skip_tags )
|
||||
PutOpeningTagMark();
|
||||
|
||||
const wchar_t * start = pchar;
|
||||
|
@ -1197,7 +1318,7 @@ void HTMLFilter::ReadItemSpecial()
|
|||
ReadItemName();
|
||||
SkipAndCheckClosingTag();
|
||||
|
||||
if( !skip_all_tags && pchar > start )
|
||||
if( !skip_tags && pchar > start )
|
||||
Put(start, pchar);
|
||||
|
||||
// closing tag mark is printed directly from the source
|
||||
|
@ -1219,7 +1340,7 @@ void HTMLFilter::ReadItemOpening()
|
|||
|
||||
SkipAndCheckClosingTag(); // here LastItem().type can be changed to 'simple'
|
||||
|
||||
if( !skip_all_tags && !IsNameEqual(no_filter_tag, LastItem().name) )
|
||||
if( !skip_tags && !IsNameEqual(no_filter_tag, LastItem().name) )
|
||||
{
|
||||
if( LastItem().type == Item::simple )
|
||||
Put(L" /");
|
||||
|
@ -1234,6 +1355,10 @@ void HTMLFilter::ItemFound()
|
|||
{
|
||||
}
|
||||
|
||||
void HTMLFilter::EntityFound(const wchar_t * str, const wchar_t * end)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
bool HTMLFilter::ReadItem()
|
||||
{
|
||||
|
@ -1315,6 +1440,8 @@ bool HTMLFilter::IsNameEqual(const std::wstring & name1, const std::wstring & na
|
|||
|
||||
|
||||
// len characters from both strings must be equal
|
||||
// IMPROVE ME change name to something like IsBeginningNameEqual
|
||||
// and move to text.h (pikotools)
|
||||
bool HTMLFilter::IsNameEqual(const wchar_t * name1, const wchar_t * name2, size_t len)
|
||||
{
|
||||
for( ; *name1!=0 && *name2!=0 && len>0 ; ++name1, ++name2, --len )
|
||||
|
@ -1382,10 +1509,10 @@ void HTMLFilter::CheckExceptions()
|
|||
|
||||
// in safe_mode the script tag is ignored
|
||||
if( !safe_mode && IsLastTag(L"script") )
|
||||
PutEverythingUntilClosingTag(!skip_all_tags);
|
||||
PutEverythingUntilClosingTag(!skip_tags);
|
||||
|
||||
if( IsLastTag(L"pre") || IsLastTag(L"textarea") )
|
||||
PutEverythingUntilClosingTag(!skip_all_tags);
|
||||
PutEverythingUntilClosingTag(!skip_tags);
|
||||
|
||||
if( IsLastTag(no_filter_tag) )
|
||||
PutEverythingUntilClosingTag(false);
|
||||
|
@ -1421,7 +1548,7 @@ int i;
|
|||
|
||||
for(int z=(int)stack_len-2 ; z>=i ; --z)
|
||||
{
|
||||
if( !skip_all_tags && pstack[z].new_line )
|
||||
if( !skip_tags && pstack[z].new_line )
|
||||
{
|
||||
PutNewLine();
|
||||
PutTabs(z);
|
||||
|
@ -1471,7 +1598,7 @@ void HTMLFilter::CheckClosingTags()
|
|||
if( IsNameEqual(pstack[stack_len-1].name, pstack[stack_len-2].name) )
|
||||
{
|
||||
// last closing tag is from the previous one
|
||||
if( !skip_all_tags && pstack[stack_len-2].new_line )
|
||||
if( !skip_tags && pstack[stack_len-2].new_line )
|
||||
{
|
||||
PutNewLine();
|
||||
PutTabs(stack_len-2);
|
||||
|
@ -1494,7 +1621,7 @@ bool HTMLFilter::PrintRest()
|
|||
const wchar_t * start = pchar;
|
||||
|
||||
// in safe mode we do not print the rest html code
|
||||
if( safe_mode || skip_all_tags )
|
||||
if( safe_mode || skip_tags )
|
||||
return false;
|
||||
|
||||
while( *pchar )
|
||||
|
|
|
@ -128,13 +128,11 @@ public:
|
|||
// false by default
|
||||
void TrimWhite(bool trim);
|
||||
|
||||
|
||||
// first tabs in a tree
|
||||
// default: 2 (spaces)
|
||||
// set 0 to turn off
|
||||
void InsertTabs(size_t tabsize);
|
||||
|
||||
|
||||
// set a name of a html tag which will be used as 'nofilter' tag
|
||||
// elements between such tags are not filtered (similarly as in <pre> and <textarea>)
|
||||
// these tags (opening and closing) will no be placed in the html output
|
||||
|
@ -145,20 +143,32 @@ public:
|
|||
void AssignOrphans(const std::wstring & lang_code, const std::vector<std::wstring> & otab);
|
||||
void ClearOrphans();
|
||||
|
||||
|
||||
// check 'orphans' for the specicic language
|
||||
// if an orphan is detected then the non-break space (" " or ascii 160 code) will be put
|
||||
// default disable (lang_none)
|
||||
void OrphansMode(OrphanMode mode = orphan_nbsp);
|
||||
|
||||
|
||||
// skipping some unsafe tags
|
||||
// (script, iframe, frame, frameset, applet, head, meta, html, link, body, ...)
|
||||
void SafeMode(bool safe_mode_);
|
||||
|
||||
// skip all html tags
|
||||
// gives only text without markup
|
||||
void SkipAllTags(bool skip_all_tags, bool skip_commentaries);
|
||||
// but there can be commentaries
|
||||
void SkipTags(bool skip_tags);
|
||||
|
||||
// skip commentaries
|
||||
void SkipCommentaries(bool skip_commentaries);
|
||||
|
||||
// if true then entities such as are skipped
|
||||
// this automatically turns on AnalyzeEntities
|
||||
// in such a case FoundEntity callbacks are sent
|
||||
void SkipEntities(bool skip_entities);
|
||||
|
||||
// analyze html entities such as
|
||||
// virtual method: FoundEntity is called
|
||||
// entities are analyzed in normal text and in attribute values such as <p class="a ">
|
||||
void AnalyzeEntities(bool analyze_entities);
|
||||
|
||||
|
||||
protected:
|
||||
|
@ -219,13 +229,18 @@ protected:
|
|||
virtual void Init();
|
||||
virtual void Uninit();
|
||||
|
||||
virtual bool IsOpeningTagMark();
|
||||
virtual bool IsOpeningCommentaryTagMark();
|
||||
virtual bool IsClosingTagMark();
|
||||
virtual bool IsClosingXmlSimpleTagMark();
|
||||
virtual bool IsOpeningTagMark(wchar_t c);
|
||||
virtual bool IsClosingTagMark(wchar_t c);
|
||||
virtual bool IsClosingXmlSimpleTagMark(wchar_t c);
|
||||
virtual bool IsStartingEntityMark(wchar_t c);
|
||||
virtual bool IsEndingEntityMark(wchar_t c);
|
||||
|
||||
virtual bool IsOpeningCommentaryTagMark(const wchar_t * str);
|
||||
virtual size_t OpeningCommentaryTagMarkSize();
|
||||
|
||||
virtual bool IsValidCharForName(int c);
|
||||
virtual bool IsValidCharForAttrName(int c);
|
||||
virtual bool IsValidCharForEntityName(int c);
|
||||
virtual void CheckExceptions();
|
||||
virtual bool SkipCommentaryTagIfExists();
|
||||
|
||||
|
@ -233,6 +248,7 @@ protected:
|
|||
virtual void Put(const wchar_t * str);
|
||||
virtual void Put(const wchar_t * str, const wchar_t * end);
|
||||
virtual void Put(const std::wstring & str);
|
||||
virtual void AnalyzeEntitiesAndPut(const wchar_t * str, const wchar_t * end, std::wstring * out);
|
||||
|
||||
virtual void PutOpeningTagMark();
|
||||
virtual void PutClosingTagMark();
|
||||
|
@ -243,6 +259,7 @@ protected:
|
|||
virtual void ReadNormalTextSkipWhite(const wchar_t * & start, const wchar_t * & last_non_white);
|
||||
|
||||
virtual void ItemFound();
|
||||
virtual void EntityFound(const wchar_t * str, const wchar_t * end);
|
||||
|
||||
/*
|
||||
others
|
||||
|
@ -280,7 +297,6 @@ protected:
|
|||
void SkipWhiteWithFirstNewLine();
|
||||
void SkipWhiteLines(const wchar_t * & str, const wchar_t * end);
|
||||
bool IsClosingTagForLastItem();
|
||||
size_t OpeningCommentaryTagMarkSize();
|
||||
void SkipAndCheckClosingTag();
|
||||
|
||||
void PopStack();
|
||||
|
@ -294,6 +310,7 @@ protected:
|
|||
bool PrintOpeningItem();
|
||||
void ReadItemName();
|
||||
void ReadItemAttrName();
|
||||
void ReadItemAttrValueAdd(const wchar_t * value_start, const wchar_t * value_end);
|
||||
void ReadItemAttrValue(bool has_quote, wchar_t quote_char);
|
||||
|
||||
bool ReadItemAttr();
|
||||
|
@ -310,7 +327,7 @@ protected:
|
|||
void CheckChar(wchar_t c);
|
||||
|
||||
void CheckLineWrap();
|
||||
bool HasSemiloconAround(const wchar_t * str, const wchar_t * end);
|
||||
bool HasEntityEndAround(const wchar_t * str, const wchar_t * end);
|
||||
void PutNormalNonWhite(const wchar_t * & str, const wchar_t * end);
|
||||
void PutNormalWhite(const wchar_t * & str, const wchar_t * end);
|
||||
void PutEverythingUntilClosingTag(bool put_closing_tag_as_well);
|
||||
|
@ -340,8 +357,10 @@ protected:
|
|||
size_t line_len; //length of the current line (without first spaces which create the html tree)
|
||||
bool safe_mode; // skipping some unsafe tags
|
||||
Orphans orphans_temp;
|
||||
bool skip_all_tags;
|
||||
bool skip_tags;
|
||||
bool skip_commentaries;
|
||||
bool skip_entities;
|
||||
bool analyze_entities;
|
||||
};
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue