renamed: HTMLFilter -> HTMLParser
This commit is contained in:
parent
f6df8bc1bc
commit
fdfd0b1385
|
@ -46,5 +46,6 @@
|
|||
./mainoptions/mainoptionsparser.o: convert/inttostr.h utf8/utf8.h
|
||||
./mainoptions/mainoptionsparser.o: textstream/stream.h utf8/utf8_templates.h
|
||||
./mainoptions/mainoptionsparser.o: utf8/utf8_private.h
|
||||
./html/htmlfilter.o: ./html/htmlfilter.h
|
||||
./html/bbcodeparser.o: ./html/bbcodeparser.h ./html/htmlfilter.h
|
||||
./html/htmlparser.o: ./html/htmlparser.h convert/baseparser.h convert/text.h
|
||||
./html/bbcodeparser.o: ./html/bbcodeparser.h ./html/htmlparser.h
|
||||
./html/bbcodeparser.o: convert/baseparser.h
|
||||
|
|
|
@ -38,13 +38,13 @@
|
|||
#ifndef headerfile_winix_core_bbcodeparser
|
||||
#define headerfile_winix_core_bbcodeparser
|
||||
|
||||
#include "htmlfilter.h"
|
||||
#include "htmlparser.h"
|
||||
|
||||
namespace pt
|
||||
{
|
||||
|
||||
|
||||
class BBCODEParser : public HTMLFilter
|
||||
class BBCODEParser : public HTMLParser
|
||||
{
|
||||
|
||||
struct Tags
|
||||
|
@ -58,7 +58,7 @@ class BBCODEParser : public HTMLFilter
|
|||
|
||||
/*
|
||||
virtual methods
|
||||
(from HTMLFilter class)
|
||||
(from HTMLParser class)
|
||||
*/
|
||||
virtual void Init();
|
||||
virtual void Uninit();
|
||||
|
|
|
@ -35,19 +35,20 @@
|
|||
* THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "htmlfilter.h"
|
||||
#include "htmlparser.h"
|
||||
|
||||
#include "convert/text.h"
|
||||
|
||||
|
||||
namespace pt
|
||||
{
|
||||
const int HTMLFilter::WHITE_MODE_ORIGIN;
|
||||
const int HTMLFilter::WHITE_MODE_SINGLE_LINE;
|
||||
const int HTMLFilter::WHITE_MODE_TREE;
|
||||
const int HTMLParser::WHITE_MODE_ORIGIN;
|
||||
const int HTMLParser::WHITE_MODE_SINGLE_LINE;
|
||||
const int HTMLParser::WHITE_MODE_TREE;
|
||||
|
||||
|
||||
|
||||
void HTMLFilter::Item::Clear()
|
||||
void HTMLParser::Item::Clear()
|
||||
{
|
||||
name.clear();
|
||||
type = none;
|
||||
|
@ -61,14 +62,14 @@ void HTMLFilter::Item::Clear()
|
|||
}
|
||||
|
||||
|
||||
HTMLFilter::Item::Item()
|
||||
HTMLParser::Item::Item()
|
||||
{
|
||||
Clear();
|
||||
}
|
||||
|
||||
|
||||
|
||||
void HTMLFilter::Filter(const wchar_t * in, std::wstring & out)
|
||||
void HTMLParser::Filter(const wchar_t * in, std::wstring & out)
|
||||
{
|
||||
reading_from_file = false;
|
||||
reading_from_wchar_string = true;
|
||||
|
@ -89,18 +90,18 @@ void HTMLFilter::Filter(const wchar_t * in, std::wstring & out)
|
|||
|
||||
|
||||
|
||||
void HTMLFilter::Init()
|
||||
void HTMLParser::Init()
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
void HTMLFilter::Uninit()
|
||||
void HTMLParser::Uninit()
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
|
||||
void HTMLFilter::Filter(const std::wstring & in, std::wstring & out)
|
||||
void HTMLParser::Filter(const std::wstring & in, std::wstring & out)
|
||||
{
|
||||
if( &in == &out )
|
||||
{
|
||||
|
@ -117,7 +118,7 @@ void HTMLFilter::Filter(const std::wstring & in, std::wstring & out)
|
|||
}
|
||||
|
||||
|
||||
void HTMLFilter::SetSomeDefaults()
|
||||
void HTMLParser::SetSomeDefaults()
|
||||
{
|
||||
white_mode = WHITE_MODE_ORIGIN;
|
||||
|
||||
|
@ -132,7 +133,7 @@ void HTMLFilter::SetSomeDefaults()
|
|||
}
|
||||
|
||||
|
||||
HTMLFilter::HTMLFilter()
|
||||
HTMLParser::HTMLParser()
|
||||
{
|
||||
pstack = new Item[WINIX_HTMLFILTER_STACK_MAXLEN];
|
||||
buffer = new wchar_t[WINIX_HTMLFILTER_BUFFER_MAXLEN];
|
||||
|
@ -141,7 +142,7 @@ HTMLFilter::HTMLFilter()
|
|||
}
|
||||
|
||||
|
||||
HTMLFilter::HTMLFilter(const HTMLFilter & f)
|
||||
HTMLParser::HTMLParser(const HTMLParser & f)
|
||||
{
|
||||
// don't need to copy the stack
|
||||
pstack = new Item[WINIX_HTMLFILTER_STACK_MAXLEN];
|
||||
|
@ -151,7 +152,7 @@ HTMLFilter::HTMLFilter(const HTMLFilter & f)
|
|||
}
|
||||
|
||||
|
||||
HTMLFilter & HTMLFilter::operator=(const HTMLFilter & f)
|
||||
HTMLParser & HTMLParser::operator=(const HTMLParser & f)
|
||||
{
|
||||
// don't need to copy the stack
|
||||
pstack = new Item[WINIX_HTMLFILTER_STACK_MAXLEN];
|
||||
|
@ -163,7 +164,7 @@ return *this;
|
|||
}
|
||||
|
||||
|
||||
HTMLFilter::~HTMLFilter()
|
||||
HTMLParser::~HTMLParser()
|
||||
{
|
||||
delete [] pstack;
|
||||
delete [] buffer;
|
||||
|
@ -171,7 +172,7 @@ HTMLFilter::~HTMLFilter()
|
|||
|
||||
|
||||
|
||||
void HTMLFilter::white_chars_mode(int mode)
|
||||
void HTMLParser::white_chars_mode(int mode)
|
||||
{
|
||||
if( mode >= WHITE_MODE_ORIGIN && mode <= WHITE_MODE_TREE )
|
||||
white_mode = mode;
|
||||
|
@ -180,7 +181,7 @@ void HTMLFilter::white_chars_mode(int mode)
|
|||
|
||||
|
||||
|
||||
void HTMLFilter::WrapLine(size_t wrap_line_)
|
||||
void HTMLParser::WrapLine(size_t wrap_line_)
|
||||
{
|
||||
wrap_line = wrap_line_;
|
||||
|
||||
|
@ -190,7 +191,7 @@ void HTMLFilter::WrapLine(size_t wrap_line_)
|
|||
|
||||
|
||||
|
||||
void HTMLFilter::InsertTabs(size_t tabsize)
|
||||
void HTMLParser::InsertTabs(size_t tabsize)
|
||||
{
|
||||
tab_size = tabsize;
|
||||
|
||||
|
@ -199,7 +200,7 @@ void HTMLFilter::InsertTabs(size_t tabsize)
|
|||
}
|
||||
|
||||
|
||||
int HTMLFilter::current_white_char_mode()
|
||||
int HTMLParser::current_white_char_mode()
|
||||
{
|
||||
if( !white_char_mode_tab.empty() )
|
||||
return white_char_mode_tab.back();
|
||||
|
@ -208,7 +209,7 @@ int HTMLFilter::current_white_char_mode()
|
|||
}
|
||||
|
||||
|
||||
void HTMLFilter::CalcOrphansMaxLen(Orphans & orphans)
|
||||
void HTMLParser::CalcOrphansMaxLen(Orphans & orphans)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
|
@ -222,7 +223,7 @@ size_t i;
|
|||
}
|
||||
|
||||
|
||||
void HTMLFilter::AssignOrphans(const wchar_t * lang_code, const std::vector<std::wstring> & otab)
|
||||
void HTMLParser::AssignOrphans(const wchar_t * lang_code, const std::vector<std::wstring> & otab)
|
||||
{
|
||||
lang_code_lower = lang_code;
|
||||
ToLower(lang_code_lower);
|
||||
|
@ -236,13 +237,13 @@ void HTMLFilter::AssignOrphans(const wchar_t * lang_code, const std::vector<std:
|
|||
|
||||
|
||||
|
||||
void HTMLFilter::AssignOrphans(const std::wstring & lang_code, const std::vector<std::wstring> & otab)
|
||||
void HTMLParser::AssignOrphans(const std::wstring & lang_code, const std::vector<std::wstring> & otab)
|
||||
{
|
||||
AssignOrphans(lang_code.c_str(), otab);
|
||||
}
|
||||
|
||||
|
||||
void HTMLFilter::ClearOrphans()
|
||||
void HTMLParser::ClearOrphans()
|
||||
{
|
||||
orphans_tab.clear();
|
||||
}
|
||||
|
@ -250,7 +251,7 @@ void HTMLFilter::ClearOrphans()
|
|||
|
||||
|
||||
|
||||
void HTMLFilter::OrphansMode(const std::wstring & orphan_mode_str)
|
||||
void HTMLParser::OrphansMode(const std::wstring & orphan_mode_str)
|
||||
{
|
||||
if( orphan_mode_str == L"160" )
|
||||
orphan_mode = orphan_160space;
|
||||
|
@ -259,24 +260,24 @@ void HTMLFilter::OrphansMode(const std::wstring & orphan_mode_str)
|
|||
}
|
||||
|
||||
|
||||
void HTMLFilter::SafeMode(bool safe_mode_)
|
||||
void HTMLParser::SafeMode(bool safe_mode_)
|
||||
{
|
||||
safe_mode = safe_mode_;
|
||||
}
|
||||
|
||||
|
||||
void HTMLFilter::SkipTags(bool skip_tags)
|
||||
void HTMLParser::SkipTags(bool skip_tags)
|
||||
{
|
||||
this->skip_tags = skip_tags;
|
||||
}
|
||||
|
||||
void HTMLFilter::SkipCommentaries(bool skip_commentaries)
|
||||
void HTMLParser::SkipCommentaries(bool skip_commentaries)
|
||||
{
|
||||
this->skip_commentaries = skip_commentaries;
|
||||
}
|
||||
|
||||
|
||||
void HTMLFilter::SkipEntities(bool skip_entities)
|
||||
void HTMLParser::SkipEntities(bool skip_entities)
|
||||
{
|
||||
this->skip_entities = skip_entities;
|
||||
|
||||
|
@ -287,13 +288,13 @@ void HTMLFilter::SkipEntities(bool skip_entities)
|
|||
}
|
||||
|
||||
|
||||
void HTMLFilter::AnalyzeEntities(bool analyze_entities)
|
||||
void HTMLParser::AnalyzeEntities(bool analyze_entities)
|
||||
{
|
||||
this->analyze_entities = analyze_entities;
|
||||
}
|
||||
|
||||
|
||||
void HTMLFilter::SetNoFilterTag(const std::wstring & tag_name)
|
||||
void HTMLParser::SetNoFilterTag(const std::wstring & tag_name)
|
||||
{
|
||||
no_filter_tag = tag_name;
|
||||
}
|
||||
|
@ -301,7 +302,7 @@ void HTMLFilter::SetNoFilterTag(const std::wstring & tag_name)
|
|||
|
||||
|
||||
|
||||
HTMLFilter::Item & HTMLFilter::GetItem(size_t i)
|
||||
HTMLParser::Item & HTMLParser::GetItem(size_t i)
|
||||
{
|
||||
if( i >= stack_len )
|
||||
{
|
||||
|
@ -313,7 +314,7 @@ return pstack[i];
|
|||
}
|
||||
|
||||
|
||||
HTMLFilter::Item & HTMLFilter::LastItem()
|
||||
HTMLParser::Item & HTMLParser::LastItem()
|
||||
{
|
||||
if( stack_len == 0 )
|
||||
{
|
||||
|
@ -325,7 +326,7 @@ return pstack[stack_len-1];
|
|||
}
|
||||
|
||||
|
||||
bool HTMLFilter::PushStack()
|
||||
bool HTMLParser::PushStack()
|
||||
{
|
||||
if( stack_len == WINIX_HTMLFILTER_STACK_MAXLEN )
|
||||
// oops, too many items
|
||||
|
@ -346,7 +347,7 @@ bool HTMLFilter::PushStack()
|
|||
return true;
|
||||
}
|
||||
|
||||
void HTMLFilter::PopStack()
|
||||
void HTMLParser::PopStack()
|
||||
{
|
||||
if( stack_len == 0 )
|
||||
// oops
|
||||
|
@ -357,7 +358,7 @@ void HTMLFilter::PopStack()
|
|||
}
|
||||
|
||||
|
||||
bool HTMLFilter::IsWhite(int c)
|
||||
bool HTMLParser::IsWhite(int c)
|
||||
{
|
||||
// dont use c==10 here
|
||||
|
||||
|
@ -368,21 +369,21 @@ return false;
|
|||
}
|
||||
|
||||
|
||||
void HTMLFilter::SkipWhite()
|
||||
void HTMLParser::SkipWhite()
|
||||
{
|
||||
while( IsWhite(lastc) )
|
||||
read_char();
|
||||
}
|
||||
|
||||
|
||||
void HTMLFilter::SkipWhiteLines()
|
||||
void HTMLParser::SkipWhiteLines()
|
||||
{
|
||||
while( lastc==10 || IsWhite(lastc) )
|
||||
read_char();
|
||||
}
|
||||
|
||||
|
||||
void HTMLFilter::SkipWhiteWithFirstNewLine()
|
||||
void HTMLParser::SkipWhiteWithFirstNewLine()
|
||||
{
|
||||
SkipWhite();
|
||||
|
||||
|
@ -394,7 +395,7 @@ void HTMLFilter::SkipWhiteWithFirstNewLine()
|
|||
}
|
||||
|
||||
|
||||
//void HTMLFilter::CheckNewLine()
|
||||
//void HTMLParser::CheckNewLine()
|
||||
//{
|
||||
// if( white_mode == WHITE_MODE_TREE )
|
||||
// {
|
||||
|
@ -407,7 +408,7 @@ void HTMLFilter::SkipWhiteWithFirstNewLine()
|
|||
|
||||
|
||||
|
||||
void HTMLFilter::SkipAndCheckClosingTag(std::wstring * remember_text)
|
||||
void HTMLParser::SkipAndCheckClosingTag(std::wstring * remember_text)
|
||||
{
|
||||
bool is_quoted = false;
|
||||
wchar_t quote_char = 0;
|
||||
|
@ -450,7 +451,7 @@ void HTMLFilter::SkipAndCheckClosingTag(std::wstring * remember_text)
|
|||
|
||||
|
||||
|
||||
bool HTMLFilter::IsValidCharForName(int c)
|
||||
bool HTMLParser::IsValidCharForName(int c)
|
||||
{
|
||||
if( (c>='a' && c<='z') ||
|
||||
(c>='A' && c<='Z') ||
|
||||
|
@ -462,7 +463,7 @@ return false;
|
|||
}
|
||||
|
||||
|
||||
bool HTMLFilter::IsValidCharForAttrName(int c)
|
||||
bool HTMLParser::IsValidCharForAttrName(int c)
|
||||
{
|
||||
if( (c>='a' && c<='z') ||
|
||||
(c>='A' && c<='Z') ||
|
||||
|
@ -474,7 +475,7 @@ return false;
|
|||
}
|
||||
|
||||
|
||||
bool HTMLFilter::IsValidCharForEntityName(int c)
|
||||
bool HTMLParser::IsValidCharForEntityName(int c)
|
||||
{
|
||||
if( (c>='a' && c<='z') ||
|
||||
(c>='A' && c<='Z') ||
|
||||
|
@ -486,7 +487,7 @@ return false;
|
|||
}
|
||||
|
||||
|
||||
void HTMLFilter::ReadItemName(std::wstring & name, bool clear_name)
|
||||
void HTMLParser::ReadItemName(std::wstring & name, bool clear_name)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
|
@ -513,7 +514,7 @@ size_t i;
|
|||
|
||||
|
||||
|
||||
void HTMLFilter::ReadItemAttrName()
|
||||
void HTMLParser::ReadItemAttrName()
|
||||
{
|
||||
size_t i;
|
||||
|
||||
|
@ -530,7 +531,7 @@ size_t i;
|
|||
|
||||
|
||||
|
||||
void HTMLFilter::ReadItemAttrValueAdd(const std::wstring & str)
|
||||
void HTMLParser::ReadItemAttrValueAdd(const std::wstring & str)
|
||||
{
|
||||
if( analyze_entities )
|
||||
{
|
||||
|
@ -544,7 +545,7 @@ void HTMLFilter::ReadItemAttrValueAdd(const std::wstring & str)
|
|||
}
|
||||
|
||||
|
||||
void HTMLFilter::ReadItemAttrValue(bool has_quote, wchar_t quote_char)
|
||||
void HTMLParser::ReadItemAttrValue(bool has_quote, wchar_t quote_char)
|
||||
{
|
||||
attr_value.clear();
|
||||
tmp_text.clear();
|
||||
|
@ -585,7 +586,7 @@ void HTMLFilter::ReadItemAttrValue(bool has_quote, wchar_t quote_char)
|
|||
}
|
||||
|
||||
|
||||
void HTMLFilter::CheckChar(wchar_t c)
|
||||
void HTMLParser::CheckChar(wchar_t c)
|
||||
{
|
||||
if( c == 10 )
|
||||
line_len = 0;
|
||||
|
@ -594,14 +595,14 @@ void HTMLFilter::CheckChar(wchar_t c)
|
|||
}
|
||||
|
||||
|
||||
void HTMLFilter::Put(wchar_t c)
|
||||
void HTMLParser::Put(wchar_t c)
|
||||
{
|
||||
(*out_string) += c;
|
||||
CheckChar(c);
|
||||
}
|
||||
|
||||
|
||||
void HTMLFilter::Put(const wchar_t * str, const wchar_t * end)
|
||||
void HTMLParser::Put(const wchar_t * str, const wchar_t * end)
|
||||
{
|
||||
if( str >= end )
|
||||
return;
|
||||
|
@ -615,7 +616,7 @@ void HTMLFilter::Put(const wchar_t * str, const wchar_t * end)
|
|||
|
||||
|
||||
|
||||
void HTMLFilter::Put(const std::wstring & str)
|
||||
void HTMLParser::Put(const std::wstring & str)
|
||||
{
|
||||
if( !str.empty() )
|
||||
{
|
||||
|
@ -628,7 +629,7 @@ void HTMLFilter::Put(const std::wstring & str)
|
|||
|
||||
|
||||
// out can be null
|
||||
void HTMLFilter::AnalyzeEntitiesAndPut(const wchar_t * str, const wchar_t * end, std::wstring * out)
|
||||
void HTMLParser::AnalyzeEntitiesAndPut(const wchar_t * str, const wchar_t * end, std::wstring * out)
|
||||
{
|
||||
size_t epsilon = 8; // !! IMPROVE ME put as a constant
|
||||
const wchar_t * old_str = str;
|
||||
|
@ -680,7 +681,7 @@ void HTMLFilter::AnalyzeEntitiesAndPut(const wchar_t * str, const wchar_t * end,
|
|||
|
||||
|
||||
|
||||
int HTMLFilter::CheckOrphan(const wchar_t * str, const wchar_t * end, const std::wstring & orphan_str)
|
||||
int HTMLParser::CheckOrphan(const wchar_t * str, const wchar_t * end, const std::wstring & orphan_str)
|
||||
{
|
||||
size_t res;
|
||||
|
||||
|
@ -704,7 +705,7 @@ return -int(ToLower(*orphan));
|
|||
|
||||
|
||||
// binary search in table (table should be sorted)
|
||||
bool HTMLFilter::CheckOrphan(const wchar_t * str, const wchar_t * end, const std::vector<std::wstring> & table)
|
||||
bool HTMLParser::CheckOrphan(const wchar_t * str, const wchar_t * end, const std::vector<std::wstring> & table)
|
||||
{
|
||||
int res;
|
||||
|
||||
|
@ -749,7 +750,7 @@ return false;
|
|||
}
|
||||
|
||||
|
||||
bool HTMLFilter::CheckOrphan(const wchar_t * str, const wchar_t * end)
|
||||
bool HTMLParser::CheckOrphan(const wchar_t * str, const wchar_t * end)
|
||||
{
|
||||
if( str==end || !LastItem().has_body_tag || !LastItem().porphans )
|
||||
return false;
|
||||
|
@ -763,7 +764,7 @@ return CheckOrphan(str, end, LastItem().porphans->tab);
|
|||
}
|
||||
|
||||
|
||||
void HTMLFilter::PutNormalNonWhite(std::wstring & str, bool allow_put_new_line, bool allow_put_space)
|
||||
void HTMLParser::PutNormalNonWhite(std::wstring & str, bool allow_put_new_line, bool allow_put_space)
|
||||
{
|
||||
while( lastc != -1 && lastc != 10 && !IsWhite(lastc) && !IsOpeningTagMark(lastc) )
|
||||
{
|
||||
|
@ -799,7 +800,7 @@ void HTMLFilter::PutNormalNonWhite(std::wstring & str, bool allow_put_new_line,
|
|||
}
|
||||
|
||||
|
||||
void HTMLFilter::PutNormalWhite(bool & was_white_char, bool & was_new_line)
|
||||
void HTMLParser::PutNormalWhite(bool & was_white_char, bool & was_new_line)
|
||||
{
|
||||
was_white_char = false;
|
||||
was_new_line = false;
|
||||
|
@ -829,13 +830,13 @@ void HTMLFilter::PutNormalWhite(bool & was_white_char, bool & was_new_line)
|
|||
|
||||
|
||||
|
||||
void HTMLFilter::PutOpeningTagMark()
|
||||
void HTMLParser::PutOpeningTagMark()
|
||||
{
|
||||
Put('<');
|
||||
}
|
||||
|
||||
|
||||
void HTMLFilter::PutClosingTagMark()
|
||||
void HTMLParser::PutClosingTagMark()
|
||||
{
|
||||
Put('>');
|
||||
}
|
||||
|
@ -845,7 +846,7 @@ void HTMLFilter::PutClosingTagMark()
|
|||
|
||||
// !! IMPROVE ME change to a better name
|
||||
// this functions does not return true when the tag is safe
|
||||
bool HTMLFilter::IsTagSafe(const wchar_t * tag)
|
||||
bool HTMLParser::IsTagSafe(const wchar_t * tag)
|
||||
{
|
||||
if( !safe_mode )
|
||||
return true;
|
||||
|
@ -874,7 +875,7 @@ return true;
|
|||
}
|
||||
|
||||
|
||||
bool HTMLFilter::IsTagSafe(const std::wstring & tag)
|
||||
bool HTMLParser::IsTagSafe(const std::wstring & tag)
|
||||
{
|
||||
return IsTagSafe(tag.c_str());
|
||||
}
|
||||
|
@ -883,7 +884,7 @@ bool HTMLFilter::IsTagSafe(const std::wstring & tag)
|
|||
|
||||
|
||||
|
||||
bool HTMLFilter::PutOpeningTag()
|
||||
bool HTMLParser::PutOpeningTag()
|
||||
{
|
||||
if( !IsTagSafe(LastItem().name) )
|
||||
{
|
||||
|
@ -905,7 +906,7 @@ return true;
|
|||
|
||||
|
||||
|
||||
void HTMLFilter::PutClosingTag(const Item & item)
|
||||
void HTMLParser::PutClosingTag(const Item & item)
|
||||
{
|
||||
if( skip_tags || !IsTagSafe(item.name) )
|
||||
return;
|
||||
|
@ -922,7 +923,7 @@ void HTMLFilter::PutClosingTag(const Item & item)
|
|||
|
||||
|
||||
|
||||
void HTMLFilter::PutTabs(size_t len)
|
||||
void HTMLParser::PutTabs(size_t len)
|
||||
{
|
||||
if( len > 30 )
|
||||
len = 30;
|
||||
|
@ -932,7 +933,7 @@ void HTMLFilter::PutTabs(size_t len)
|
|||
}
|
||||
|
||||
|
||||
void HTMLFilter::PutNonBreakingSpace()
|
||||
void HTMLParser::PutNonBreakingSpace()
|
||||
{
|
||||
if( orphan_mode == orphan_nbsp )
|
||||
{
|
||||
|
@ -947,35 +948,35 @@ void HTMLFilter::PutNonBreakingSpace()
|
|||
|
||||
|
||||
// we assume the size of the opening mark to be one
|
||||
bool HTMLFilter::IsOpeningTagMark(wchar_t c)
|
||||
bool HTMLParser::IsOpeningTagMark(wchar_t c)
|
||||
{
|
||||
return (c == '<');
|
||||
}
|
||||
|
||||
|
||||
// we assume the size of the closing mark to be one
|
||||
bool HTMLFilter::IsClosingTagMark(wchar_t c)
|
||||
bool HTMLParser::IsClosingTagMark(wchar_t c)
|
||||
{
|
||||
return (c == '>');
|
||||
}
|
||||
|
||||
|
||||
// the slash in the closing tag mark e.g. </p>
|
||||
bool HTMLFilter::IsClosingTagIndicator(wchar_t c)
|
||||
bool HTMLParser::IsClosingTagIndicator(wchar_t c)
|
||||
{
|
||||
return (c == '/');
|
||||
}
|
||||
|
||||
|
||||
// the slash in the closing tag mark e.g. </p>
|
||||
bool HTMLFilter::IsSpecialTagIndicator(wchar_t c)
|
||||
bool HTMLParser::IsSpecialTagIndicator(wchar_t c)
|
||||
{
|
||||
return (c == '!');
|
||||
}
|
||||
|
||||
|
||||
// the '=' operator e.g. class="value"
|
||||
bool HTMLFilter::IsAttributeAssignmentMark(wchar_t c)
|
||||
bool HTMLParser::IsAttributeAssignmentMark(wchar_t c)
|
||||
{
|
||||
return (c == '=');
|
||||
}
|
||||
|
@ -984,13 +985,13 @@ bool HTMLFilter::IsAttributeAssignmentMark(wchar_t c)
|
|||
|
||||
// the slash at the end <img src=".." /> (without '>' character)
|
||||
// we assume the size of the mark to be one
|
||||
bool HTMLFilter::IsClosingXmlSimpleTagMark(wchar_t c)
|
||||
bool HTMLParser::IsClosingXmlSimpleTagMark(wchar_t c)
|
||||
{
|
||||
return (c == '/');
|
||||
}
|
||||
|
||||
|
||||
bool HTMLFilter::IsEndingCommentaryTagMarkAtEndOfString(const std::wstring & str)
|
||||
bool HTMLParser::IsEndingCommentaryTagMarkAtEndOfString(const std::wstring & str)
|
||||
{
|
||||
static wchar_t comm_end[] = L"-->";
|
||||
size_t comm_end_len = sizeof(comm_end) / sizeof(wchar_t) - 1;
|
||||
|
@ -1004,13 +1005,13 @@ bool HTMLFilter::IsEndingCommentaryTagMarkAtEndOfString(const std::wstring & str
|
|||
}
|
||||
|
||||
|
||||
bool HTMLFilter::IsStartingEntityMark(wchar_t c)
|
||||
bool HTMLParser::IsStartingEntityMark(wchar_t c)
|
||||
{
|
||||
return (c == '&');
|
||||
}
|
||||
|
||||
|
||||
bool HTMLFilter::IsEndingEntityMark(wchar_t c)
|
||||
bool HTMLParser::IsEndingEntityMark(wchar_t c)
|
||||
{
|
||||
return (c == ';');
|
||||
}
|
||||
|
@ -1018,7 +1019,7 @@ bool HTMLFilter::IsEndingEntityMark(wchar_t c)
|
|||
|
||||
|
||||
// reading text between html tags
|
||||
void HTMLFilter::ReadText()
|
||||
void HTMLParser::ReadText()
|
||||
{
|
||||
bool was_white_char = false;
|
||||
bool was_new_line = false;
|
||||
|
@ -1096,7 +1097,7 @@ void HTMLFilter::ReadText()
|
|||
|
||||
|
||||
|
||||
bool HTMLFilter::PrintOpeningItem()
|
||||
bool HTMLParser::PrintOpeningItem()
|
||||
{
|
||||
if( skip_tags || IsNameEqual(no_filter_tag, LastItem().name) )
|
||||
return true;
|
||||
|
@ -1108,7 +1109,7 @@ bool HTMLFilter::PrintOpeningItem()
|
|||
|
||||
|
||||
|
||||
bool HTMLFilter::ReadItemAttr()
|
||||
bool HTMLParser::ReadItemAttr()
|
||||
{
|
||||
attr_has_value = false;
|
||||
attr_name.clear();
|
||||
|
@ -1145,7 +1146,7 @@ return true;
|
|||
|
||||
|
||||
|
||||
void HTMLFilter::CheckItemLangAttr()
|
||||
void HTMLParser::CheckItemLangAttr()
|
||||
{
|
||||
if( attr_has_value && IsNameEqual(L"lang", attr_name) )
|
||||
{
|
||||
|
@ -1166,7 +1167,7 @@ void HTMLFilter::CheckItemLangAttr()
|
|||
}
|
||||
|
||||
|
||||
void HTMLFilter::PrintItemAttr()
|
||||
void HTMLParser::PrintItemAttr()
|
||||
{
|
||||
size_t i;
|
||||
|
||||
|
@ -1193,7 +1194,7 @@ size_t i;
|
|||
}
|
||||
|
||||
|
||||
void HTMLFilter::ReadItemClosing()
|
||||
void HTMLParser::ReadItemClosing()
|
||||
{
|
||||
read_char(); // skipping '/'
|
||||
SkipWhiteLines();
|
||||
|
@ -1205,7 +1206,7 @@ void HTMLFilter::ReadItemClosing()
|
|||
}
|
||||
|
||||
|
||||
void HTMLFilter::ReadItemSpecial()
|
||||
void HTMLParser::ReadItemSpecial()
|
||||
{
|
||||
LastItem().type = Item::special;
|
||||
|
||||
|
@ -1255,7 +1256,7 @@ void HTMLFilter::ReadItemSpecial()
|
|||
}
|
||||
|
||||
|
||||
void HTMLFilter::ReadItemOpening()
|
||||
void HTMLParser::ReadItemOpening()
|
||||
{
|
||||
LastItem().type = Item::opening;
|
||||
ReadItemName(LastItem().name);
|
||||
|
@ -1281,16 +1282,16 @@ void HTMLFilter::ReadItemOpening()
|
|||
}
|
||||
|
||||
|
||||
void HTMLFilter::ItemFound()
|
||||
void HTMLParser::ItemFound()
|
||||
{
|
||||
}
|
||||
|
||||
void HTMLFilter::EntityFound(const wchar_t * str, const wchar_t * end)
|
||||
void HTMLParser::EntityFound(const wchar_t * str, const wchar_t * end)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
bool HTMLFilter::ReadItem()
|
||||
bool HTMLParser::ReadItem()
|
||||
{
|
||||
if( lastc == -1 )
|
||||
return false;
|
||||
|
@ -1332,7 +1333,7 @@ return true;
|
|||
|
||||
|
||||
|
||||
wchar_t HTMLFilter::ToLower(wchar_t c)
|
||||
wchar_t HTMLParser::ToLower(wchar_t c)
|
||||
{
|
||||
if( c>='A' && c<='Z' )
|
||||
return c - 'A' + 'a';
|
||||
|
@ -1341,7 +1342,7 @@ return c;
|
|||
}
|
||||
|
||||
|
||||
void HTMLFilter::ToLower(std::wstring & str)
|
||||
void HTMLParser::ToLower(std::wstring & str)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
|
@ -1350,7 +1351,7 @@ size_t i;
|
|||
}
|
||||
|
||||
|
||||
bool HTMLFilter::IsNameEqual(const wchar_t * name1, const wchar_t * name2)
|
||||
bool HTMLParser::IsNameEqual(const wchar_t * name1, const wchar_t * name2)
|
||||
{
|
||||
for( ; *name1!=0 && *name2!=0 ; ++name1, ++name2 )
|
||||
if( ToLower(*name1) != ToLower(*name2) )
|
||||
|
@ -1363,19 +1364,19 @@ return false;
|
|||
}
|
||||
|
||||
|
||||
bool HTMLFilter::IsNameEqual(const wchar_t * name1, const std::wstring & name2)
|
||||
bool HTMLParser::IsNameEqual(const wchar_t * name1, const std::wstring & name2)
|
||||
{
|
||||
return IsNameEqual(name1, name2.c_str());
|
||||
}
|
||||
|
||||
|
||||
bool HTMLFilter::IsNameEqual(const std::wstring & name1, const wchar_t * name2)
|
||||
bool HTMLParser::IsNameEqual(const std::wstring & name1, const wchar_t * name2)
|
||||
{
|
||||
return IsNameEqual(name1.c_str(), name2);
|
||||
}
|
||||
|
||||
|
||||
bool HTMLFilter::IsNameEqual(const std::wstring & name1, const std::wstring & name2)
|
||||
bool HTMLParser::IsNameEqual(const std::wstring & name1, const std::wstring & name2)
|
||||
{
|
||||
return IsNameEqual(name1.c_str(), name2.c_str());
|
||||
}
|
||||
|
@ -1385,7 +1386,7 @@ bool HTMLFilter::IsNameEqual(const std::wstring & name1, const std::wstring & na
|
|||
// len characters from both strings must be equal
|
||||
// IMPROVE ME change name to something like IsBeginningNameEqual
|
||||
// and move to text.h (pikotools)
|
||||
bool HTMLFilter::IsNameEqual(const wchar_t * name1, const wchar_t * name2, size_t len)
|
||||
bool HTMLParser::IsNameEqual(const wchar_t * name1, const wchar_t * name2, size_t len)
|
||||
{
|
||||
for( ; *name1!=0 && *name2!=0 && len>0 ; ++name1, ++name2, --len )
|
||||
if( ToLower(*name1) != ToLower(*name2) )
|
||||
|
@ -1399,19 +1400,19 @@ return false;
|
|||
|
||||
|
||||
|
||||
bool HTMLFilter::IsNameEqual(const wchar_t * name1, const std::wstring & name2, size_t len)
|
||||
bool HTMLParser::IsNameEqual(const wchar_t * name1, const std::wstring & name2, size_t len)
|
||||
{
|
||||
return IsNameEqual(name1, name2.c_str(), len);
|
||||
}
|
||||
|
||||
|
||||
bool HTMLFilter::IsNameEqual(const std::wstring & name1, const wchar_t * name2, size_t len)
|
||||
bool HTMLParser::IsNameEqual(const std::wstring & name1, const wchar_t * name2, size_t len)
|
||||
{
|
||||
return IsNameEqual(name1.c_str(), name2, len);
|
||||
}
|
||||
|
||||
|
||||
bool HTMLFilter::IsNameEqual(const std::wstring & name1, const std::wstring & name2, size_t len)
|
||||
bool HTMLParser::IsNameEqual(const std::wstring & name1, const std::wstring & name2, size_t len)
|
||||
{
|
||||
return IsNameEqual(name1.c_str(), name2.c_str(), len);
|
||||
}
|
||||
|
@ -1420,20 +1421,20 @@ bool HTMLFilter::IsNameEqual(const std::wstring & name1, const std::wstring & na
|
|||
|
||||
|
||||
|
||||
bool HTMLFilter::IsLastTag(const wchar_t * name)
|
||||
bool HTMLParser::IsLastTag(const wchar_t * name)
|
||||
{
|
||||
return IsNameEqual(name, LastItem().name);
|
||||
}
|
||||
|
||||
|
||||
bool HTMLFilter::IsLastTag(const std::wstring & name)
|
||||
bool HTMLParser::IsLastTag(const std::wstring & name)
|
||||
{
|
||||
return IsNameEqual(name, LastItem().name);
|
||||
}
|
||||
|
||||
|
||||
// checking exceptions for opening tags
|
||||
void HTMLFilter::CheckSingleItemExceptions()
|
||||
void HTMLParser::CheckSingleItemExceptions()
|
||||
{
|
||||
if( IsLastTag(L"meta") ||
|
||||
IsLastTag(L"input") ||
|
||||
|
@ -1456,7 +1457,7 @@ void HTMLFilter::CheckSingleItemExceptions()
|
|||
}
|
||||
|
||||
|
||||
void HTMLFilter::CheckWhiteCharsExceptions(Item & item)
|
||||
void HTMLParser::CheckWhiteCharsExceptions(Item & item)
|
||||
{
|
||||
bool change_white_mode = false;
|
||||
|
||||
|
@ -1493,7 +1494,7 @@ void HTMLFilter::CheckWhiteCharsExceptions(Item & item)
|
|||
|
||||
|
||||
|
||||
void HTMLFilter::AddForgottenTags()
|
||||
void HTMLParser::AddForgottenTags()
|
||||
{
|
||||
int i;
|
||||
|
||||
|
@ -1539,7 +1540,7 @@ int i;
|
|||
}
|
||||
|
||||
|
||||
void HTMLFilter::CheckStackPrintRest()
|
||||
void HTMLParser::CheckStackPrintRest()
|
||||
{
|
||||
while( stack_len-- > 0 )
|
||||
{
|
||||
|
@ -1561,7 +1562,7 @@ void HTMLFilter::CheckStackPrintRest()
|
|||
}
|
||||
|
||||
|
||||
void HTMLFilter::CheckClosingTags()
|
||||
void HTMLParser::CheckClosingTags()
|
||||
{
|
||||
if( stack_len == 0 )
|
||||
return;
|
||||
|
@ -1604,7 +1605,7 @@ void HTMLFilter::CheckClosingTags()
|
|||
}
|
||||
|
||||
|
||||
bool HTMLFilter::PrintRest()
|
||||
bool HTMLParser::PrintRest()
|
||||
{
|
||||
//const wchar_t * start = pchar;
|
||||
|
||||
|
@ -1634,7 +1635,7 @@ bool HTMLFilter::PrintRest()
|
|||
|
||||
|
||||
|
||||
void HTMLFilter::ReadLoop()
|
||||
void HTMLParser::ReadLoop()
|
||||
{
|
||||
while( ReadItem() )
|
||||
{
|
||||
|
@ -1671,7 +1672,7 @@ void HTMLFilter::ReadLoop()
|
|||
|
||||
|
||||
|
||||
void HTMLFilter::Read()
|
||||
void HTMLParser::Read()
|
||||
{
|
||||
read_char(); // put first character to lastc
|
||||
is_first_item = true;
|
|
@ -90,7 +90,7 @@ namespace pt
|
|||
|
||||
the filter recognizes xml simple tags (with / at the end) such as: <br />
|
||||
*/
|
||||
class HTMLFilter : public BaseParser
|
||||
class HTMLParser : public BaseParser
|
||||
{
|
||||
public:
|
||||
|
||||
|
@ -100,10 +100,10 @@ public:
|
|||
orphan_160space // putting 160 ascii code
|
||||
};
|
||||
|
||||
HTMLFilter();
|
||||
HTMLFilter(const HTMLFilter & f);
|
||||
HTMLFilter & operator=(const HTMLFilter & f);
|
||||
virtual ~HTMLFilter();
|
||||
HTMLParser();
|
||||
HTMLParser(const HTMLParser & f);
|
||||
HTMLParser & operator=(const HTMLParser & f);
|
||||
virtual ~HTMLParser();
|
||||
|
||||
|
||||
// main methods used for filtering
|
Loading…
Reference in New Issue