added to HTMLFilter:
- possibility to remove html entities method: SkipEntity(bool) git-svn-id: svn://ttmath.org/publicrep/winix/trunk@1132 e52654a7-88a9-db11-a3e9-0013d4bc506e
This commit is contained in:
parent
027a8ec428
commit
1b8f5dc673
File diff suppressed because one or more lines are too long
|
@ -5,7 +5,7 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2008-2014, Tomasz Sowa
|
* Copyright (c) 2008-2018, Tomasz Sowa
|
||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -66,36 +66,46 @@ return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
bool BBCODEParser::IsOpeningTagMark()
|
bool BBCODEParser::IsOpeningTagMark(wchar_t c)
|
||||||
{
|
{
|
||||||
return (*pchar == '[');
|
return (c == '[');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// there are no commentaries in bbcode
|
bool BBCODEParser::IsClosingTagMark(wchar_t c)
|
||||||
bool BBCODEParser::IsOpeningCommentaryTagMark()
|
{
|
||||||
|
return (c == ']');
|
||||||
|
}
|
||||||
|
|
||||||
|
bool BBCODEParser::IsClosingXmlSimpleTagMark(wchar_t c)
|
||||||
{
|
{
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// there are no commentaries in bbcode
|
||||||
|
bool BBCODEParser::IsOpeningCommentaryTagMark(const wchar_t *)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
size_t BBCODEParser::OpeningCommentaryTagMarkSize()
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
bool BBCODEParser::SkipCommentaryTagIfExists()
|
bool BBCODEParser::SkipCommentaryTagIfExists()
|
||||||
{
|
{
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
bool BBCODEParser::IsClosingTagMark()
|
|
||||||
{
|
|
||||||
return (*pchar == ']');
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
bool BBCODEParser::IsClosingXmlSimpleTagMark()
|
|
||||||
{
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -446,7 +456,7 @@ bool first_tag_removed = false;
|
||||||
|
|
||||||
while( *pchar != 0 )
|
while( *pchar != 0 )
|
||||||
{
|
{
|
||||||
if( IsOpeningTagMark() )
|
if( IsOpeningTagMark(*pchar) )
|
||||||
{
|
{
|
||||||
if( IsClosingTagForLastItem() )
|
if( IsClosingTagForLastItem() )
|
||||||
{
|
{
|
||||||
|
|
|
@ -5,7 +5,7 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2008-2014, Tomasz Sowa
|
* Copyright (c) 2008-2018, Tomasz Sowa
|
||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -62,10 +62,12 @@ class BBCODEParser : public HTMLFilter
|
||||||
virtual void Init();
|
virtual void Init();
|
||||||
virtual void Uninit();
|
virtual void Uninit();
|
||||||
|
|
||||||
virtual bool IsOpeningTagMark();
|
virtual bool IsOpeningTagMark(wchar_t c);
|
||||||
virtual bool IsOpeningCommentaryTagMark();
|
virtual bool IsClosingTagMark(wchar_t c);
|
||||||
virtual bool IsClosingTagMark();
|
virtual bool IsClosingXmlSimpleTagMark(wchar_t c);
|
||||||
virtual bool IsClosingXmlSimpleTagMark();
|
|
||||||
|
virtual bool IsOpeningCommentaryTagMark(const wchar_t *);
|
||||||
|
virtual size_t OpeningCommentaryTagMarkSize();
|
||||||
|
|
||||||
virtual bool IsValidCharForName(int c);
|
virtual bool IsValidCharForName(int c);
|
||||||
virtual void CheckExceptions();
|
virtual void CheckExceptions();
|
||||||
|
|
|
@ -88,7 +88,13 @@ void HTMLFilter::Uninit()
|
||||||
|
|
||||||
void HTMLFilter::Filter(const std::wstring & in, std::wstring & out)
|
void HTMLFilter::Filter(const std::wstring & in, std::wstring & out)
|
||||||
{
|
{
|
||||||
size_t out_projected_len = in.size() * 2 + 1;
|
if( &in == &out )
|
||||||
|
{
|
||||||
|
// out cannot be the same string as in
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t out_projected_len = in.size() * 2 + 1;
|
||||||
|
|
||||||
if( out.capacity() < out_projected_len )
|
if( out.capacity() < out_projected_len )
|
||||||
out.reserve(out_projected_len);
|
out.reserve(out_projected_len);
|
||||||
|
@ -105,8 +111,10 @@ void HTMLFilter::SetSomeDefaults()
|
||||||
wrap_line = 0;
|
wrap_line = 0;
|
||||||
orphan_mode = orphan_nbsp;
|
orphan_mode = orphan_nbsp;
|
||||||
safe_mode = false;
|
safe_mode = false;
|
||||||
skip_all_tags = false;
|
skip_tags = false;
|
||||||
skip_commentaries = false;
|
skip_commentaries = false;
|
||||||
|
skip_entities = false;
|
||||||
|
analyze_entities = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -238,13 +246,34 @@ void HTMLFilter::SafeMode(bool safe_mode_)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void HTMLFilter::SkipAllTags(bool skip_all_tags, bool skip_commentaries)
|
void HTMLFilter::SkipTags(bool skip_tags)
|
||||||
|
{
|
||||||
|
this->skip_tags = skip_tags;
|
||||||
|
}
|
||||||
|
|
||||||
|
void HTMLFilter::SkipCommentaries(bool skip_commentaries)
|
||||||
{
|
{
|
||||||
this->skip_all_tags = skip_all_tags;
|
|
||||||
this->skip_commentaries = skip_commentaries;
|
this->skip_commentaries = skip_commentaries;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void HTMLFilter::SkipEntities(bool skip_entities)
|
||||||
|
{
|
||||||
|
this->skip_entities = skip_entities;
|
||||||
|
|
||||||
|
if( this->skip_entities )
|
||||||
|
{
|
||||||
|
this->analyze_entities = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void HTMLFilter::AnalyzeEntities(bool analyze_entities)
|
||||||
|
{
|
||||||
|
this->analyze_entities = analyze_entities;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void HTMLFilter::SetNoFilterTag(const std::wstring & tag_name)
|
void HTMLFilter::SetNoFilterTag(const std::wstring & tag_name)
|
||||||
{
|
{
|
||||||
no_filter_tag = tag_name;
|
no_filter_tag = tag_name;
|
||||||
|
@ -380,7 +409,7 @@ bool HTMLFilter::IsClosingTagForLastItem()
|
||||||
pchar += LastItem().name.size();
|
pchar += LastItem().name.size();
|
||||||
SkipWhite();
|
SkipWhite();
|
||||||
|
|
||||||
if( IsClosingTagMark() )
|
if( IsClosingTagMark(*pchar) )
|
||||||
{
|
{
|
||||||
pchar += 1;
|
pchar += 1;
|
||||||
return true;
|
return true;
|
||||||
|
@ -402,7 +431,7 @@ const wchar_t * end = pchar;
|
||||||
|
|
||||||
while( *pchar != 0 )
|
while( *pchar != 0 )
|
||||||
{
|
{
|
||||||
if( IsOpeningTagMark() )
|
if( IsOpeningTagMark(*pchar) )
|
||||||
{
|
{
|
||||||
if( IsClosingTagForLastItem() )
|
if( IsClosingTagForLastItem() )
|
||||||
{
|
{
|
||||||
|
@ -450,12 +479,12 @@ void HTMLFilter::SkipAndCheckClosingTag()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
if( !is_quoted && LastItem().type == Item::opening && IsClosingXmlSimpleTagMark() ) // closing xml tag: default '/'
|
if( !is_quoted && LastItem().type == Item::opening && IsClosingXmlSimpleTagMark(*pchar) ) // closing xml tag: default '/'
|
||||||
{
|
{
|
||||||
LastItem().type = Item::simple;
|
LastItem().type = Item::simple;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
if( !is_quoted && IsClosingTagMark() )
|
if( !is_quoted && IsClosingTagMark(*pchar) )
|
||||||
{
|
{
|
||||||
++pchar;
|
++pchar;
|
||||||
break;
|
break;
|
||||||
|
@ -470,7 +499,7 @@ bool HTMLFilter::IsValidCharForName(int c)
|
||||||
if( (c>='a' && c<='z') ||
|
if( (c>='a' && c<='z') ||
|
||||||
(c>='A' && c<='Z') ||
|
(c>='A' && c<='Z') ||
|
||||||
(c>='0' && c<='9') ||
|
(c>='0' && c<='9') ||
|
||||||
c=='-' || c=='!' )
|
c=='-' || c=='!' || c==':') // : for namespace character
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
|
@ -489,6 +518,18 @@ return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool HTMLFilter::IsValidCharForEntityName(int c)
|
||||||
|
{
|
||||||
|
if( (c>='a' && c<='z') ||
|
||||||
|
(c>='A' && c<='Z') ||
|
||||||
|
(c>='0' && c<='9') ||
|
||||||
|
c=='#' )
|
||||||
|
return true;
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void HTMLFilter::ReadItemName()
|
void HTMLFilter::ReadItemName()
|
||||||
{
|
{
|
||||||
size_t i;
|
size_t i;
|
||||||
|
@ -521,12 +562,28 @@ size_t i;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
void HTMLFilter::ReadItemAttrValueAdd(const wchar_t * value_start, const wchar_t * value_end)
|
||||||
|
{
|
||||||
|
attr_value.push_back(std::wstring());
|
||||||
|
|
||||||
|
if( analyze_entities )
|
||||||
|
{
|
||||||
|
AnalyzeEntitiesAndPut(value_start, value_end, &attr_value.back());
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
attr_value.back().append(value_start, value_end);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void HTMLFilter::ReadItemAttrValue(bool has_quote, wchar_t quote_char)
|
void HTMLFilter::ReadItemAttrValue(bool has_quote, wchar_t quote_char)
|
||||||
{
|
{
|
||||||
size_t i;
|
size_t i;
|
||||||
|
|
||||||
attr_value.clear();
|
attr_value.clear();
|
||||||
attr_value_temp.clear();
|
const wchar_t * value_start = pchar;
|
||||||
|
size_t value_len = 0; // how many non white characters
|
||||||
|
|
||||||
for(i=0 ; *pchar ; ++i, ++pchar )
|
for(i=0 ; *pchar ; ++i, ++pchar )
|
||||||
{
|
{
|
||||||
|
@ -537,30 +594,28 @@ size_t i;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if( IsClosingTagMark() || *pchar == 10 || IsWhite(*pchar) )
|
if( IsClosingTagMark(*pchar) || *pchar == 10 || IsWhite(*pchar) )
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if( *pchar==10 || IsWhite(*pchar) )
|
if( *pchar==10 || IsWhite(*pchar) )
|
||||||
{
|
{
|
||||||
if( !attr_value_temp.empty() )
|
if( value_len > 0 && value_len <= WINIX_HTMLFILTER_ATTR_VALUE_MAXLEN )
|
||||||
{
|
ReadItemAttrValueAdd(value_start, pchar);
|
||||||
attr_value.push_back(attr_value_temp);
|
|
||||||
attr_value_temp.clear();
|
value_len = 0;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
if( i < WINIX_HTMLFILTER_ATTR_VALUE_MAXLEN )
|
|
||||||
{
|
{
|
||||||
attr_value_temp += *pchar;
|
if( value_len == 0 )
|
||||||
|
value_start = pchar;
|
||||||
|
|
||||||
|
value_len += 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if( !attr_value_temp.empty() )
|
if( value_len > 0 && value_len <= WINIX_HTMLFILTER_ATTR_VALUE_MAXLEN )
|
||||||
{
|
ReadItemAttrValueAdd(value_start, pchar);
|
||||||
attr_value.push_back(attr_value_temp);
|
|
||||||
attr_value_temp.clear();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -611,6 +666,57 @@ void HTMLFilter::Put(const std::wstring & str)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// out can be null
|
||||||
|
void HTMLFilter::AnalyzeEntitiesAndPut(const wchar_t * str, const wchar_t * end, std::wstring * out)
|
||||||
|
{
|
||||||
|
size_t epsilon = 8; // !! IMPROVE ME put as a constant
|
||||||
|
const wchar_t * old_str = str;
|
||||||
|
|
||||||
|
while( str < end )
|
||||||
|
{
|
||||||
|
if( IsStartingEntityMark(*str) )
|
||||||
|
{
|
||||||
|
const wchar_t * entity_start = str;
|
||||||
|
str += 1; // skip &
|
||||||
|
|
||||||
|
for(size_t i=0 ; *str && IsValidCharForEntityName(*str) && i < epsilon ; ++i, ++str)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
if( IsEndingEntityMark(*str) && str - entity_start > 1 ) // at least one character in entity name
|
||||||
|
{
|
||||||
|
if( out )
|
||||||
|
out->append(old_str, entity_start);
|
||||||
|
else
|
||||||
|
Put(old_str, entity_start);
|
||||||
|
|
||||||
|
str += 1; // skip ;
|
||||||
|
|
||||||
|
if( !skip_entities )
|
||||||
|
{
|
||||||
|
if( out )
|
||||||
|
out->append(entity_start, str);
|
||||||
|
else
|
||||||
|
Put(entity_start, str);
|
||||||
|
}
|
||||||
|
|
||||||
|
EntityFound(entity_start + 1, str - 1); // without & and ;
|
||||||
|
old_str = str;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
str += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if( out )
|
||||||
|
out->append(old_str, end);
|
||||||
|
else
|
||||||
|
Put(old_str, end);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
int HTMLFilter::CheckOrphan(const wchar_t * str, const wchar_t * end, const std::wstring & orphan_str)
|
int HTMLFilter::CheckOrphan(const wchar_t * str, const wchar_t * end, const std::wstring & orphan_str)
|
||||||
|
@ -700,12 +806,12 @@ return CheckOrphan(str, end, LastItem().porphans->tab);
|
||||||
// (useful in html entities)
|
// (useful in html entities)
|
||||||
// !! dodac sprawdzanie czy dlugosc stringu nie jest mala tez (end-str)
|
// !! dodac sprawdzanie czy dlugosc stringu nie jest mala tez (end-str)
|
||||||
// i wtedy tez nie dodajemy zadnego znaku
|
// i wtedy tez nie dodajemy zadnego znaku
|
||||||
bool HTMLFilter::HasSemiloconAround(const wchar_t * str, const wchar_t * end)
|
bool HTMLFilter::HasEntityEndAround(const wchar_t * str, const wchar_t * end)
|
||||||
{
|
{
|
||||||
size_t i, epsilon = 8;
|
size_t i, epsilon = 8;// !! IMPROVE ME put as a constant
|
||||||
|
|
||||||
for(i=0 ; str < end && i<epsilon ; ++i, ++str)
|
for(i=0 ; str < end && i<epsilon ; ++i, ++str)
|
||||||
if( *str == ';' )
|
if( IsEndingEntityMark(*str) )
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
|
@ -726,11 +832,11 @@ void HTMLFilter::PutNormalNonWhite(const wchar_t * & str, const wchar_t * end)
|
||||||
{
|
{
|
||||||
const wchar_t * word = str;
|
const wchar_t * word = str;
|
||||||
size_t non_whites = 0;
|
size_t non_whites = 0;
|
||||||
bool was_semicolon = false;
|
bool was_entity_end = false;
|
||||||
|
|
||||||
for( ; str < end && *str!=10 && !IsWhite(*str) ; ++str, ++non_whites )
|
for( ; str < end && *str!=10 && !IsWhite(*str) ; ++str, ++non_whites )
|
||||||
{
|
{
|
||||||
if( break_after != 0 && non_whites >= break_after && (was_semicolon || !HasSemiloconAround(str, end)) )
|
if( break_after != 0 && non_whites >= break_after && (was_entity_end || !HasEntityEndAround(str, end)) )
|
||||||
{
|
{
|
||||||
Put(word, str);
|
Put(word, str);
|
||||||
word = str;
|
word = str;
|
||||||
|
@ -739,10 +845,13 @@ bool was_semicolon = false;
|
||||||
CheckLineWrap();
|
CheckLineWrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
was_semicolon = (*str == ';');
|
was_entity_end = (IsEndingEntityMark(*str));
|
||||||
}
|
}
|
||||||
|
|
||||||
Put(word, str);
|
if( analyze_entities )
|
||||||
|
AnalyzeEntitiesAndPut(word, str, nullptr);
|
||||||
|
else
|
||||||
|
Put(word, str);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -880,7 +989,7 @@ return true;
|
||||||
|
|
||||||
void HTMLFilter::PutClosingTag(const wchar_t * tag)
|
void HTMLFilter::PutClosingTag(const wchar_t * tag)
|
||||||
{
|
{
|
||||||
if( skip_all_tags || !IsTagSafe(tag) )
|
if( skip_tags || !IsTagSafe(tag) )
|
||||||
return;
|
return;
|
||||||
|
|
||||||
PutOpeningTagMark();
|
PutOpeningTagMark();
|
||||||
|
@ -925,28 +1034,28 @@ void HTMLFilter::PutNewLine()
|
||||||
|
|
||||||
|
|
||||||
// we assume the size of the opening mark to be one
|
// we assume the size of the opening mark to be one
|
||||||
bool HTMLFilter::IsOpeningTagMark()
|
bool HTMLFilter::IsOpeningTagMark(wchar_t c)
|
||||||
{
|
{
|
||||||
return (*pchar == '<');
|
return (c == '<');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// we assume the size of the closing mark to be one
|
// we assume the size of the closing mark to be one
|
||||||
bool HTMLFilter::IsClosingTagMark()
|
bool HTMLFilter::IsClosingTagMark(wchar_t c)
|
||||||
{
|
{
|
||||||
return (*pchar == '>');
|
return (c == '>');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// the slash at the end <img src=".." /> (without '>' character)
|
// the slash at the end <img src=".." /> (without '>' character)
|
||||||
// we assume the size of the mark to be one
|
// we assume the size of the mark to be one
|
||||||
bool HTMLFilter::IsClosingXmlSimpleTagMark()
|
bool HTMLFilter::IsClosingXmlSimpleTagMark(wchar_t c)
|
||||||
{
|
{
|
||||||
return (*pchar == '/');
|
return (c == '/');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
bool HTMLFilter::IsOpeningCommentaryTagMark()
|
bool HTMLFilter::IsOpeningCommentaryTagMark(const wchar_t * str)
|
||||||
{
|
{
|
||||||
static wchar_t comm_open[] = L"<!--";
|
static wchar_t comm_open[] = L"<!--";
|
||||||
size_t comm_open_len = sizeof(comm_open) / sizeof(wchar_t) - 1;
|
size_t comm_open_len = sizeof(comm_open) / sizeof(wchar_t) - 1;
|
||||||
|
@ -961,6 +1070,18 @@ size_t HTMLFilter::OpeningCommentaryTagMarkSize()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool HTMLFilter::IsStartingEntityMark(wchar_t c)
|
||||||
|
{
|
||||||
|
return (c == '&');
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool HTMLFilter::IsEndingEntityMark(wchar_t c)
|
||||||
|
{
|
||||||
|
return (c == ';');
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// skipping the commentary tag if exists
|
// skipping the commentary tag if exists
|
||||||
bool HTMLFilter::SkipCommentaryTagIfExists()
|
bool HTMLFilter::SkipCommentaryTagIfExists()
|
||||||
|
@ -968,7 +1089,7 @@ bool HTMLFilter::SkipCommentaryTagIfExists()
|
||||||
static wchar_t comm_close[] = L"-->";
|
static wchar_t comm_close[] = L"-->";
|
||||||
size_t comm_close_len = sizeof(comm_close) / sizeof(wchar_t) - 1;
|
size_t comm_close_len = sizeof(comm_close) / sizeof(wchar_t) - 1;
|
||||||
|
|
||||||
if( !IsOpeningCommentaryTagMark() )
|
if( !IsOpeningCommentaryTagMark(pchar) )
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
pchar += OpeningCommentaryTagMarkSize();
|
pchar += OpeningCommentaryTagMarkSize();
|
||||||
|
@ -1012,7 +1133,7 @@ void HTMLFilter::ReadNormalTextSkipWhite(const wchar_t * & start, const wchar_t
|
||||||
start = pchar;
|
start = pchar;
|
||||||
|
|
||||||
// exception for the commentary tag
|
// exception for the commentary tag
|
||||||
if( IsOpeningCommentaryTagMark() || !IsOpeningTagMark() )
|
if( IsOpeningCommentaryTagMark(pchar) || !IsOpeningTagMark(*pchar) )
|
||||||
{
|
{
|
||||||
PutNewLine();
|
PutNewLine();
|
||||||
PutTabs(stack_len);
|
PutTabs(stack_len);
|
||||||
|
@ -1049,7 +1170,7 @@ const wchar_t * last_non_white = pchar;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if( IsOpeningTagMark() )
|
if( IsOpeningTagMark(*pchar) )
|
||||||
break;
|
break;
|
||||||
|
|
||||||
if( !IsWhite(*pchar) )
|
if( !IsWhite(*pchar) )
|
||||||
|
@ -1067,7 +1188,7 @@ const wchar_t * last_non_white = pchar;
|
||||||
|
|
||||||
bool HTMLFilter::PrintOpeningItem()
|
bool HTMLFilter::PrintOpeningItem()
|
||||||
{
|
{
|
||||||
if( skip_all_tags || IsNameEqual(no_filter_tag, LastItem().name) )
|
if( skip_tags || IsNameEqual(no_filter_tag, LastItem().name) )
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
if( last_new_line )
|
if( last_new_line )
|
||||||
|
@ -1149,7 +1270,7 @@ void HTMLFilter::PrintItemAttr()
|
||||||
{
|
{
|
||||||
size_t i;
|
size_t i;
|
||||||
|
|
||||||
if( skip_all_tags || IsNameEqual(no_filter_tag, LastItem().name) )
|
if( skip_tags || IsNameEqual(no_filter_tag, LastItem().name) )
|
||||||
return;
|
return;
|
||||||
|
|
||||||
Put(' ');
|
Put(' ');
|
||||||
|
@ -1188,7 +1309,7 @@ void HTMLFilter::ReadItemSpecial()
|
||||||
{
|
{
|
||||||
LastItem().type = Item::special;
|
LastItem().type = Item::special;
|
||||||
|
|
||||||
if( !skip_all_tags )
|
if( !skip_tags )
|
||||||
PutOpeningTagMark();
|
PutOpeningTagMark();
|
||||||
|
|
||||||
const wchar_t * start = pchar;
|
const wchar_t * start = pchar;
|
||||||
|
@ -1197,7 +1318,7 @@ void HTMLFilter::ReadItemSpecial()
|
||||||
ReadItemName();
|
ReadItemName();
|
||||||
SkipAndCheckClosingTag();
|
SkipAndCheckClosingTag();
|
||||||
|
|
||||||
if( !skip_all_tags && pchar > start )
|
if( !skip_tags && pchar > start )
|
||||||
Put(start, pchar);
|
Put(start, pchar);
|
||||||
|
|
||||||
// closing tag mark is printed directly from the source
|
// closing tag mark is printed directly from the source
|
||||||
|
@ -1219,7 +1340,7 @@ void HTMLFilter::ReadItemOpening()
|
||||||
|
|
||||||
SkipAndCheckClosingTag(); // here LastItem().type can be changed to 'simple'
|
SkipAndCheckClosingTag(); // here LastItem().type can be changed to 'simple'
|
||||||
|
|
||||||
if( !skip_all_tags && !IsNameEqual(no_filter_tag, LastItem().name) )
|
if( !skip_tags && !IsNameEqual(no_filter_tag, LastItem().name) )
|
||||||
{
|
{
|
||||||
if( LastItem().type == Item::simple )
|
if( LastItem().type == Item::simple )
|
||||||
Put(L" /");
|
Put(L" /");
|
||||||
|
@ -1234,6 +1355,10 @@ void HTMLFilter::ItemFound()
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void HTMLFilter::EntityFound(const wchar_t * str, const wchar_t * end)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
bool HTMLFilter::ReadItem()
|
bool HTMLFilter::ReadItem()
|
||||||
{
|
{
|
||||||
|
@ -1315,6 +1440,8 @@ bool HTMLFilter::IsNameEqual(const std::wstring & name1, const std::wstring & na
|
||||||
|
|
||||||
|
|
||||||
// len characters from both strings must be equal
|
// len characters from both strings must be equal
|
||||||
|
// IMPROVE ME change name to something like IsBeginningNameEqual
|
||||||
|
// and move to text.h (pikotools)
|
||||||
bool HTMLFilter::IsNameEqual(const wchar_t * name1, const wchar_t * name2, size_t len)
|
bool HTMLFilter::IsNameEqual(const wchar_t * name1, const wchar_t * name2, size_t len)
|
||||||
{
|
{
|
||||||
for( ; *name1!=0 && *name2!=0 && len>0 ; ++name1, ++name2, --len )
|
for( ; *name1!=0 && *name2!=0 && len>0 ; ++name1, ++name2, --len )
|
||||||
|
@ -1382,10 +1509,10 @@ void HTMLFilter::CheckExceptions()
|
||||||
|
|
||||||
// in safe_mode the script tag is ignored
|
// in safe_mode the script tag is ignored
|
||||||
if( !safe_mode && IsLastTag(L"script") )
|
if( !safe_mode && IsLastTag(L"script") )
|
||||||
PutEverythingUntilClosingTag(!skip_all_tags);
|
PutEverythingUntilClosingTag(!skip_tags);
|
||||||
|
|
||||||
if( IsLastTag(L"pre") || IsLastTag(L"textarea") )
|
if( IsLastTag(L"pre") || IsLastTag(L"textarea") )
|
||||||
PutEverythingUntilClosingTag(!skip_all_tags);
|
PutEverythingUntilClosingTag(!skip_tags);
|
||||||
|
|
||||||
if( IsLastTag(no_filter_tag) )
|
if( IsLastTag(no_filter_tag) )
|
||||||
PutEverythingUntilClosingTag(false);
|
PutEverythingUntilClosingTag(false);
|
||||||
|
@ -1421,7 +1548,7 @@ int i;
|
||||||
|
|
||||||
for(int z=(int)stack_len-2 ; z>=i ; --z)
|
for(int z=(int)stack_len-2 ; z>=i ; --z)
|
||||||
{
|
{
|
||||||
if( !skip_all_tags && pstack[z].new_line )
|
if( !skip_tags && pstack[z].new_line )
|
||||||
{
|
{
|
||||||
PutNewLine();
|
PutNewLine();
|
||||||
PutTabs(z);
|
PutTabs(z);
|
||||||
|
@ -1471,7 +1598,7 @@ void HTMLFilter::CheckClosingTags()
|
||||||
if( IsNameEqual(pstack[stack_len-1].name, pstack[stack_len-2].name) )
|
if( IsNameEqual(pstack[stack_len-1].name, pstack[stack_len-2].name) )
|
||||||
{
|
{
|
||||||
// last closing tag is from the previous one
|
// last closing tag is from the previous one
|
||||||
if( !skip_all_tags && pstack[stack_len-2].new_line )
|
if( !skip_tags && pstack[stack_len-2].new_line )
|
||||||
{
|
{
|
||||||
PutNewLine();
|
PutNewLine();
|
||||||
PutTabs(stack_len-2);
|
PutTabs(stack_len-2);
|
||||||
|
@ -1494,7 +1621,7 @@ bool HTMLFilter::PrintRest()
|
||||||
const wchar_t * start = pchar;
|
const wchar_t * start = pchar;
|
||||||
|
|
||||||
// in safe mode we do not print the rest html code
|
// in safe mode we do not print the rest html code
|
||||||
if( safe_mode || skip_all_tags )
|
if( safe_mode || skip_tags )
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
while( *pchar )
|
while( *pchar )
|
||||||
|
|
|
@ -128,13 +128,11 @@ public:
|
||||||
// false by default
|
// false by default
|
||||||
void TrimWhite(bool trim);
|
void TrimWhite(bool trim);
|
||||||
|
|
||||||
|
|
||||||
// first tabs in a tree
|
// first tabs in a tree
|
||||||
// default: 2 (spaces)
|
// default: 2 (spaces)
|
||||||
// set 0 to turn off
|
// set 0 to turn off
|
||||||
void InsertTabs(size_t tabsize);
|
void InsertTabs(size_t tabsize);
|
||||||
|
|
||||||
|
|
||||||
// set a name of a html tag which will be used as 'nofilter' tag
|
// set a name of a html tag which will be used as 'nofilter' tag
|
||||||
// elements between such tags are not filtered (similarly as in <pre> and <textarea>)
|
// elements between such tags are not filtered (similarly as in <pre> and <textarea>)
|
||||||
// these tags (opening and closing) will no be placed in the html output
|
// these tags (opening and closing) will no be placed in the html output
|
||||||
|
@ -145,20 +143,32 @@ public:
|
||||||
void AssignOrphans(const std::wstring & lang_code, const std::vector<std::wstring> & otab);
|
void AssignOrphans(const std::wstring & lang_code, const std::vector<std::wstring> & otab);
|
||||||
void ClearOrphans();
|
void ClearOrphans();
|
||||||
|
|
||||||
|
|
||||||
// check 'orphans' for the specicic language
|
// check 'orphans' for the specicic language
|
||||||
// if an orphan is detected then the non-break space (" " or ascii 160 code) will be put
|
// if an orphan is detected then the non-break space (" " or ascii 160 code) will be put
|
||||||
// default disable (lang_none)
|
// default disable (lang_none)
|
||||||
void OrphansMode(OrphanMode mode = orphan_nbsp);
|
void OrphansMode(OrphanMode mode = orphan_nbsp);
|
||||||
|
|
||||||
|
|
||||||
// skipping some unsafe tags
|
// skipping some unsafe tags
|
||||||
// (script, iframe, frame, frameset, applet, head, meta, html, link, body, ...)
|
// (script, iframe, frame, frameset, applet, head, meta, html, link, body, ...)
|
||||||
void SafeMode(bool safe_mode_);
|
void SafeMode(bool safe_mode_);
|
||||||
|
|
||||||
// skip all html tags
|
// skip all html tags
|
||||||
// gives only text without markup
|
// gives only text without markup
|
||||||
void SkipAllTags(bool skip_all_tags, bool skip_commentaries);
|
// but there can be commentaries
|
||||||
|
void SkipTags(bool skip_tags);
|
||||||
|
|
||||||
|
// skip commentaries
|
||||||
|
void SkipCommentaries(bool skip_commentaries);
|
||||||
|
|
||||||
|
// if true then entities such as are skipped
|
||||||
|
// this automatically turns on AnalyzeEntities
|
||||||
|
// in such a case FoundEntity callbacks are sent
|
||||||
|
void SkipEntities(bool skip_entities);
|
||||||
|
|
||||||
|
// analyze html entities such as
|
||||||
|
// virtual method: FoundEntity is called
|
||||||
|
// entities are analyzed in normal text and in attribute values such as <p class="a ">
|
||||||
|
void AnalyzeEntities(bool analyze_entities);
|
||||||
|
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
@ -219,13 +229,18 @@ protected:
|
||||||
virtual void Init();
|
virtual void Init();
|
||||||
virtual void Uninit();
|
virtual void Uninit();
|
||||||
|
|
||||||
virtual bool IsOpeningTagMark();
|
virtual bool IsOpeningTagMark(wchar_t c);
|
||||||
virtual bool IsOpeningCommentaryTagMark();
|
virtual bool IsClosingTagMark(wchar_t c);
|
||||||
virtual bool IsClosingTagMark();
|
virtual bool IsClosingXmlSimpleTagMark(wchar_t c);
|
||||||
virtual bool IsClosingXmlSimpleTagMark();
|
virtual bool IsStartingEntityMark(wchar_t c);
|
||||||
|
virtual bool IsEndingEntityMark(wchar_t c);
|
||||||
|
|
||||||
|
virtual bool IsOpeningCommentaryTagMark(const wchar_t * str);
|
||||||
|
virtual size_t OpeningCommentaryTagMarkSize();
|
||||||
|
|
||||||
virtual bool IsValidCharForName(int c);
|
virtual bool IsValidCharForName(int c);
|
||||||
virtual bool IsValidCharForAttrName(int c);
|
virtual bool IsValidCharForAttrName(int c);
|
||||||
|
virtual bool IsValidCharForEntityName(int c);
|
||||||
virtual void CheckExceptions();
|
virtual void CheckExceptions();
|
||||||
virtual bool SkipCommentaryTagIfExists();
|
virtual bool SkipCommentaryTagIfExists();
|
||||||
|
|
||||||
|
@ -233,6 +248,7 @@ protected:
|
||||||
virtual void Put(const wchar_t * str);
|
virtual void Put(const wchar_t * str);
|
||||||
virtual void Put(const wchar_t * str, const wchar_t * end);
|
virtual void Put(const wchar_t * str, const wchar_t * end);
|
||||||
virtual void Put(const std::wstring & str);
|
virtual void Put(const std::wstring & str);
|
||||||
|
virtual void AnalyzeEntitiesAndPut(const wchar_t * str, const wchar_t * end, std::wstring * out);
|
||||||
|
|
||||||
virtual void PutOpeningTagMark();
|
virtual void PutOpeningTagMark();
|
||||||
virtual void PutClosingTagMark();
|
virtual void PutClosingTagMark();
|
||||||
|
@ -243,6 +259,7 @@ protected:
|
||||||
virtual void ReadNormalTextSkipWhite(const wchar_t * & start, const wchar_t * & last_non_white);
|
virtual void ReadNormalTextSkipWhite(const wchar_t * & start, const wchar_t * & last_non_white);
|
||||||
|
|
||||||
virtual void ItemFound();
|
virtual void ItemFound();
|
||||||
|
virtual void EntityFound(const wchar_t * str, const wchar_t * end);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
others
|
others
|
||||||
|
@ -280,7 +297,6 @@ protected:
|
||||||
void SkipWhiteWithFirstNewLine();
|
void SkipWhiteWithFirstNewLine();
|
||||||
void SkipWhiteLines(const wchar_t * & str, const wchar_t * end);
|
void SkipWhiteLines(const wchar_t * & str, const wchar_t * end);
|
||||||
bool IsClosingTagForLastItem();
|
bool IsClosingTagForLastItem();
|
||||||
size_t OpeningCommentaryTagMarkSize();
|
|
||||||
void SkipAndCheckClosingTag();
|
void SkipAndCheckClosingTag();
|
||||||
|
|
||||||
void PopStack();
|
void PopStack();
|
||||||
|
@ -294,6 +310,7 @@ protected:
|
||||||
bool PrintOpeningItem();
|
bool PrintOpeningItem();
|
||||||
void ReadItemName();
|
void ReadItemName();
|
||||||
void ReadItemAttrName();
|
void ReadItemAttrName();
|
||||||
|
void ReadItemAttrValueAdd(const wchar_t * value_start, const wchar_t * value_end);
|
||||||
void ReadItemAttrValue(bool has_quote, wchar_t quote_char);
|
void ReadItemAttrValue(bool has_quote, wchar_t quote_char);
|
||||||
|
|
||||||
bool ReadItemAttr();
|
bool ReadItemAttr();
|
||||||
|
@ -310,7 +327,7 @@ protected:
|
||||||
void CheckChar(wchar_t c);
|
void CheckChar(wchar_t c);
|
||||||
|
|
||||||
void CheckLineWrap();
|
void CheckLineWrap();
|
||||||
bool HasSemiloconAround(const wchar_t * str, const wchar_t * end);
|
bool HasEntityEndAround(const wchar_t * str, const wchar_t * end);
|
||||||
void PutNormalNonWhite(const wchar_t * & str, const wchar_t * end);
|
void PutNormalNonWhite(const wchar_t * & str, const wchar_t * end);
|
||||||
void PutNormalWhite(const wchar_t * & str, const wchar_t * end);
|
void PutNormalWhite(const wchar_t * & str, const wchar_t * end);
|
||||||
void PutEverythingUntilClosingTag(bool put_closing_tag_as_well);
|
void PutEverythingUntilClosingTag(bool put_closing_tag_as_well);
|
||||||
|
@ -340,8 +357,10 @@ protected:
|
||||||
size_t line_len; //length of the current line (without first spaces which create the html tree)
|
size_t line_len; //length of the current line (without first spaces which create the html tree)
|
||||||
bool safe_mode; // skipping some unsafe tags
|
bool safe_mode; // skipping some unsafe tags
|
||||||
Orphans orphans_temp;
|
Orphans orphans_temp;
|
||||||
bool skip_all_tags;
|
bool skip_tags;
|
||||||
bool skip_commentaries;
|
bool skip_commentaries;
|
||||||
|
bool skip_entities;
|
||||||
|
bool analyze_entities;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue