renamed: HTMLFilter -> HTMLParser

This commit is contained in:
Tomasz Sowa 2021-08-06 17:10:19 +02:00
parent f6df8bc1bc
commit fdfd0b1385
4 changed files with 117 additions and 115 deletions

View File

@ -46,5 +46,6 @@
./mainoptions/mainoptionsparser.o: convert/inttostr.h utf8/utf8.h ./mainoptions/mainoptionsparser.o: convert/inttostr.h utf8/utf8.h
./mainoptions/mainoptionsparser.o: textstream/stream.h utf8/utf8_templates.h ./mainoptions/mainoptionsparser.o: textstream/stream.h utf8/utf8_templates.h
./mainoptions/mainoptionsparser.o: utf8/utf8_private.h ./mainoptions/mainoptionsparser.o: utf8/utf8_private.h
./html/htmlfilter.o: ./html/htmlfilter.h ./html/htmlparser.o: ./html/htmlparser.h convert/baseparser.h convert/text.h
./html/bbcodeparser.o: ./html/bbcodeparser.h ./html/htmlfilter.h ./html/bbcodeparser.o: ./html/bbcodeparser.h ./html/htmlparser.h
./html/bbcodeparser.o: convert/baseparser.h

View File

@ -38,13 +38,13 @@
#ifndef headerfile_winix_core_bbcodeparser #ifndef headerfile_winix_core_bbcodeparser
#define headerfile_winix_core_bbcodeparser #define headerfile_winix_core_bbcodeparser
#include "htmlfilter.h" #include "htmlparser.h"
namespace pt namespace pt
{ {
class BBCODEParser : public HTMLFilter class BBCODEParser : public HTMLParser
{ {
struct Tags struct Tags
@ -58,7 +58,7 @@ class BBCODEParser : public HTMLFilter
/* /*
virtual methods virtual methods
(from HTMLFilter class) (from HTMLParser class)
*/ */
virtual void Init(); virtual void Init();
virtual void Uninit(); virtual void Uninit();

View File

@ -35,19 +35,20 @@
* THE POSSIBILITY OF SUCH DAMAGE. * THE POSSIBILITY OF SUCH DAMAGE.
*/ */
#include "htmlfilter.h" #include "htmlparser.h"
#include "convert/text.h" #include "convert/text.h"
namespace pt namespace pt
{ {
const int HTMLFilter::WHITE_MODE_ORIGIN; const int HTMLParser::WHITE_MODE_ORIGIN;
const int HTMLFilter::WHITE_MODE_SINGLE_LINE; const int HTMLParser::WHITE_MODE_SINGLE_LINE;
const int HTMLFilter::WHITE_MODE_TREE; const int HTMLParser::WHITE_MODE_TREE;
void HTMLFilter::Item::Clear() void HTMLParser::Item::Clear()
{ {
name.clear(); name.clear();
type = none; type = none;
@ -61,14 +62,14 @@ void HTMLFilter::Item::Clear()
} }
HTMLFilter::Item::Item() HTMLParser::Item::Item()
{ {
Clear(); Clear();
} }
void HTMLFilter::Filter(const wchar_t * in, std::wstring & out) void HTMLParser::Filter(const wchar_t * in, std::wstring & out)
{ {
reading_from_file = false; reading_from_file = false;
reading_from_wchar_string = true; reading_from_wchar_string = true;
@ -89,18 +90,18 @@ void HTMLFilter::Filter(const wchar_t * in, std::wstring & out)
void HTMLFilter::Init() void HTMLParser::Init()
{ {
} }
void HTMLFilter::Uninit() void HTMLParser::Uninit()
{ {
} }
void HTMLFilter::Filter(const std::wstring & in, std::wstring & out) void HTMLParser::Filter(const std::wstring & in, std::wstring & out)
{ {
if( &in == &out ) if( &in == &out )
{ {
@ -117,7 +118,7 @@ void HTMLFilter::Filter(const std::wstring & in, std::wstring & out)
} }
void HTMLFilter::SetSomeDefaults() void HTMLParser::SetSomeDefaults()
{ {
white_mode = WHITE_MODE_ORIGIN; white_mode = WHITE_MODE_ORIGIN;
@ -132,7 +133,7 @@ void HTMLFilter::SetSomeDefaults()
} }
HTMLFilter::HTMLFilter() HTMLParser::HTMLParser()
{ {
pstack = new Item[WINIX_HTMLFILTER_STACK_MAXLEN]; pstack = new Item[WINIX_HTMLFILTER_STACK_MAXLEN];
buffer = new wchar_t[WINIX_HTMLFILTER_BUFFER_MAXLEN]; buffer = new wchar_t[WINIX_HTMLFILTER_BUFFER_MAXLEN];
@ -141,7 +142,7 @@ HTMLFilter::HTMLFilter()
} }
HTMLFilter::HTMLFilter(const HTMLFilter & f) HTMLParser::HTMLParser(const HTMLParser & f)
{ {
// don't need to copy the stack // don't need to copy the stack
pstack = new Item[WINIX_HTMLFILTER_STACK_MAXLEN]; pstack = new Item[WINIX_HTMLFILTER_STACK_MAXLEN];
@ -151,7 +152,7 @@ HTMLFilter::HTMLFilter(const HTMLFilter & f)
} }
HTMLFilter & HTMLFilter::operator=(const HTMLFilter & f) HTMLParser & HTMLParser::operator=(const HTMLParser & f)
{ {
// don't need to copy the stack // don't need to copy the stack
pstack = new Item[WINIX_HTMLFILTER_STACK_MAXLEN]; pstack = new Item[WINIX_HTMLFILTER_STACK_MAXLEN];
@ -163,7 +164,7 @@ return *this;
} }
HTMLFilter::~HTMLFilter() HTMLParser::~HTMLParser()
{ {
delete [] pstack; delete [] pstack;
delete [] buffer; delete [] buffer;
@ -171,7 +172,7 @@ HTMLFilter::~HTMLFilter()
void HTMLFilter::white_chars_mode(int mode) void HTMLParser::white_chars_mode(int mode)
{ {
if( mode >= WHITE_MODE_ORIGIN && mode <= WHITE_MODE_TREE ) if( mode >= WHITE_MODE_ORIGIN && mode <= WHITE_MODE_TREE )
white_mode = mode; white_mode = mode;
@ -180,7 +181,7 @@ void HTMLFilter::white_chars_mode(int mode)
void HTMLFilter::WrapLine(size_t wrap_line_) void HTMLParser::WrapLine(size_t wrap_line_)
{ {
wrap_line = wrap_line_; wrap_line = wrap_line_;
@ -190,7 +191,7 @@ void HTMLFilter::WrapLine(size_t wrap_line_)
void HTMLFilter::InsertTabs(size_t tabsize) void HTMLParser::InsertTabs(size_t tabsize)
{ {
tab_size = tabsize; tab_size = tabsize;
@ -199,7 +200,7 @@ void HTMLFilter::InsertTabs(size_t tabsize)
} }
int HTMLFilter::current_white_char_mode() int HTMLParser::current_white_char_mode()
{ {
if( !white_char_mode_tab.empty() ) if( !white_char_mode_tab.empty() )
return white_char_mode_tab.back(); return white_char_mode_tab.back();
@ -208,7 +209,7 @@ int HTMLFilter::current_white_char_mode()
} }
void HTMLFilter::CalcOrphansMaxLen(Orphans & orphans) void HTMLParser::CalcOrphansMaxLen(Orphans & orphans)
{ {
size_t i; size_t i;
@ -222,7 +223,7 @@ size_t i;
} }
void HTMLFilter::AssignOrphans(const wchar_t * lang_code, const std::vector<std::wstring> & otab) void HTMLParser::AssignOrphans(const wchar_t * lang_code, const std::vector<std::wstring> & otab)
{ {
lang_code_lower = lang_code; lang_code_lower = lang_code;
ToLower(lang_code_lower); ToLower(lang_code_lower);
@ -236,13 +237,13 @@ void HTMLFilter::AssignOrphans(const wchar_t * lang_code, const std::vector<std:
void HTMLFilter::AssignOrphans(const std::wstring & lang_code, const std::vector<std::wstring> & otab) void HTMLParser::AssignOrphans(const std::wstring & lang_code, const std::vector<std::wstring> & otab)
{ {
AssignOrphans(lang_code.c_str(), otab); AssignOrphans(lang_code.c_str(), otab);
} }
void HTMLFilter::ClearOrphans() void HTMLParser::ClearOrphans()
{ {
orphans_tab.clear(); orphans_tab.clear();
} }
@ -250,7 +251,7 @@ void HTMLFilter::ClearOrphans()
void HTMLFilter::OrphansMode(const std::wstring & orphan_mode_str) void HTMLParser::OrphansMode(const std::wstring & orphan_mode_str)
{ {
if( orphan_mode_str == L"160" ) if( orphan_mode_str == L"160" )
orphan_mode = orphan_160space; orphan_mode = orphan_160space;
@ -259,24 +260,24 @@ void HTMLFilter::OrphansMode(const std::wstring & orphan_mode_str)
} }
void HTMLFilter::SafeMode(bool safe_mode_) void HTMLParser::SafeMode(bool safe_mode_)
{ {
safe_mode = safe_mode_; safe_mode = safe_mode_;
} }
void HTMLFilter::SkipTags(bool skip_tags) void HTMLParser::SkipTags(bool skip_tags)
{ {
this->skip_tags = skip_tags; this->skip_tags = skip_tags;
} }
void HTMLFilter::SkipCommentaries(bool skip_commentaries) void HTMLParser::SkipCommentaries(bool skip_commentaries)
{ {
this->skip_commentaries = skip_commentaries; this->skip_commentaries = skip_commentaries;
} }
void HTMLFilter::SkipEntities(bool skip_entities) void HTMLParser::SkipEntities(bool skip_entities)
{ {
this->skip_entities = skip_entities; this->skip_entities = skip_entities;
@ -287,13 +288,13 @@ void HTMLFilter::SkipEntities(bool skip_entities)
} }
void HTMLFilter::AnalyzeEntities(bool analyze_entities) void HTMLParser::AnalyzeEntities(bool analyze_entities)
{ {
this->analyze_entities = analyze_entities; this->analyze_entities = analyze_entities;
} }
void HTMLFilter::SetNoFilterTag(const std::wstring & tag_name) void HTMLParser::SetNoFilterTag(const std::wstring & tag_name)
{ {
no_filter_tag = tag_name; no_filter_tag = tag_name;
} }
@ -301,7 +302,7 @@ void HTMLFilter::SetNoFilterTag(const std::wstring & tag_name)
HTMLFilter::Item & HTMLFilter::GetItem(size_t i) HTMLParser::Item & HTMLParser::GetItem(size_t i)
{ {
if( i >= stack_len ) if( i >= stack_len )
{ {
@ -313,7 +314,7 @@ return pstack[i];
} }
HTMLFilter::Item & HTMLFilter::LastItem() HTMLParser::Item & HTMLParser::LastItem()
{ {
if( stack_len == 0 ) if( stack_len == 0 )
{ {
@ -325,7 +326,7 @@ return pstack[stack_len-1];
} }
bool HTMLFilter::PushStack() bool HTMLParser::PushStack()
{ {
if( stack_len == WINIX_HTMLFILTER_STACK_MAXLEN ) if( stack_len == WINIX_HTMLFILTER_STACK_MAXLEN )
// oops, too many items // oops, too many items
@ -346,7 +347,7 @@ bool HTMLFilter::PushStack()
return true; return true;
} }
void HTMLFilter::PopStack() void HTMLParser::PopStack()
{ {
if( stack_len == 0 ) if( stack_len == 0 )
// oops // oops
@ -357,7 +358,7 @@ void HTMLFilter::PopStack()
} }
bool HTMLFilter::IsWhite(int c) bool HTMLParser::IsWhite(int c)
{ {
// dont use c==10 here // dont use c==10 here
@ -368,21 +369,21 @@ return false;
} }
void HTMLFilter::SkipWhite() void HTMLParser::SkipWhite()
{ {
while( IsWhite(lastc) ) while( IsWhite(lastc) )
read_char(); read_char();
} }
void HTMLFilter::SkipWhiteLines() void HTMLParser::SkipWhiteLines()
{ {
while( lastc==10 || IsWhite(lastc) ) while( lastc==10 || IsWhite(lastc) )
read_char(); read_char();
} }
void HTMLFilter::SkipWhiteWithFirstNewLine() void HTMLParser::SkipWhiteWithFirstNewLine()
{ {
SkipWhite(); SkipWhite();
@ -394,7 +395,7 @@ void HTMLFilter::SkipWhiteWithFirstNewLine()
} }
//void HTMLFilter::CheckNewLine() //void HTMLParser::CheckNewLine()
//{ //{
// if( white_mode == WHITE_MODE_TREE ) // if( white_mode == WHITE_MODE_TREE )
// { // {
@ -407,7 +408,7 @@ void HTMLFilter::SkipWhiteWithFirstNewLine()
void HTMLFilter::SkipAndCheckClosingTag(std::wstring * remember_text) void HTMLParser::SkipAndCheckClosingTag(std::wstring * remember_text)
{ {
bool is_quoted = false; bool is_quoted = false;
wchar_t quote_char = 0; wchar_t quote_char = 0;
@ -450,7 +451,7 @@ void HTMLFilter::SkipAndCheckClosingTag(std::wstring * remember_text)
bool HTMLFilter::IsValidCharForName(int c) bool HTMLParser::IsValidCharForName(int c)
{ {
if( (c>='a' && c<='z') || if( (c>='a' && c<='z') ||
(c>='A' && c<='Z') || (c>='A' && c<='Z') ||
@ -462,7 +463,7 @@ return false;
} }
bool HTMLFilter::IsValidCharForAttrName(int c) bool HTMLParser::IsValidCharForAttrName(int c)
{ {
if( (c>='a' && c<='z') || if( (c>='a' && c<='z') ||
(c>='A' && c<='Z') || (c>='A' && c<='Z') ||
@ -474,7 +475,7 @@ return false;
} }
bool HTMLFilter::IsValidCharForEntityName(int c) bool HTMLParser::IsValidCharForEntityName(int c)
{ {
if( (c>='a' && c<='z') || if( (c>='a' && c<='z') ||
(c>='A' && c<='Z') || (c>='A' && c<='Z') ||
@ -486,7 +487,7 @@ return false;
} }
void HTMLFilter::ReadItemName(std::wstring & name, bool clear_name) void HTMLParser::ReadItemName(std::wstring & name, bool clear_name)
{ {
size_t i; size_t i;
@ -513,7 +514,7 @@ size_t i;
void HTMLFilter::ReadItemAttrName() void HTMLParser::ReadItemAttrName()
{ {
size_t i; size_t i;
@ -530,7 +531,7 @@ size_t i;
void HTMLFilter::ReadItemAttrValueAdd(const std::wstring & str) void HTMLParser::ReadItemAttrValueAdd(const std::wstring & str)
{ {
if( analyze_entities ) if( analyze_entities )
{ {
@ -544,7 +545,7 @@ void HTMLFilter::ReadItemAttrValueAdd(const std::wstring & str)
} }
void HTMLFilter::ReadItemAttrValue(bool has_quote, wchar_t quote_char) void HTMLParser::ReadItemAttrValue(bool has_quote, wchar_t quote_char)
{ {
attr_value.clear(); attr_value.clear();
tmp_text.clear(); tmp_text.clear();
@ -585,7 +586,7 @@ void HTMLFilter::ReadItemAttrValue(bool has_quote, wchar_t quote_char)
} }
void HTMLFilter::CheckChar(wchar_t c) void HTMLParser::CheckChar(wchar_t c)
{ {
if( c == 10 ) if( c == 10 )
line_len = 0; line_len = 0;
@ -594,14 +595,14 @@ void HTMLFilter::CheckChar(wchar_t c)
} }
void HTMLFilter::Put(wchar_t c) void HTMLParser::Put(wchar_t c)
{ {
(*out_string) += c; (*out_string) += c;
CheckChar(c); CheckChar(c);
} }
void HTMLFilter::Put(const wchar_t * str, const wchar_t * end) void HTMLParser::Put(const wchar_t * str, const wchar_t * end)
{ {
if( str >= end ) if( str >= end )
return; return;
@ -615,7 +616,7 @@ void HTMLFilter::Put(const wchar_t * str, const wchar_t * end)
void HTMLFilter::Put(const std::wstring & str) void HTMLParser::Put(const std::wstring & str)
{ {
if( !str.empty() ) if( !str.empty() )
{ {
@ -628,7 +629,7 @@ void HTMLFilter::Put(const std::wstring & str)
// out can be null // out can be null
void HTMLFilter::AnalyzeEntitiesAndPut(const wchar_t * str, const wchar_t * end, std::wstring * out) void HTMLParser::AnalyzeEntitiesAndPut(const wchar_t * str, const wchar_t * end, std::wstring * out)
{ {
size_t epsilon = 8; // !! IMPROVE ME put as a constant size_t epsilon = 8; // !! IMPROVE ME put as a constant
const wchar_t * old_str = str; const wchar_t * old_str = str;
@ -680,7 +681,7 @@ void HTMLFilter::AnalyzeEntitiesAndPut(const wchar_t * str, const wchar_t * end,
int HTMLFilter::CheckOrphan(const wchar_t * str, const wchar_t * end, const std::wstring & orphan_str) int HTMLParser::CheckOrphan(const wchar_t * str, const wchar_t * end, const std::wstring & orphan_str)
{ {
size_t res; size_t res;
@ -704,7 +705,7 @@ return -int(ToLower(*orphan));
// binary search in table (table should be sorted) // binary search in table (table should be sorted)
bool HTMLFilter::CheckOrphan(const wchar_t * str, const wchar_t * end, const std::vector<std::wstring> & table) bool HTMLParser::CheckOrphan(const wchar_t * str, const wchar_t * end, const std::vector<std::wstring> & table)
{ {
int res; int res;
@ -749,7 +750,7 @@ return false;
} }
bool HTMLFilter::CheckOrphan(const wchar_t * str, const wchar_t * end) bool HTMLParser::CheckOrphan(const wchar_t * str, const wchar_t * end)
{ {
if( str==end || !LastItem().has_body_tag || !LastItem().porphans ) if( str==end || !LastItem().has_body_tag || !LastItem().porphans )
return false; return false;
@ -763,7 +764,7 @@ return CheckOrphan(str, end, LastItem().porphans->tab);
} }
void HTMLFilter::PutNormalNonWhite(std::wstring & str, bool allow_put_new_line, bool allow_put_space) void HTMLParser::PutNormalNonWhite(std::wstring & str, bool allow_put_new_line, bool allow_put_space)
{ {
while( lastc != -1 && lastc != 10 && !IsWhite(lastc) && !IsOpeningTagMark(lastc) ) while( lastc != -1 && lastc != 10 && !IsWhite(lastc) && !IsOpeningTagMark(lastc) )
{ {
@ -799,7 +800,7 @@ void HTMLFilter::PutNormalNonWhite(std::wstring & str, bool allow_put_new_line,
} }
void HTMLFilter::PutNormalWhite(bool & was_white_char, bool & was_new_line) void HTMLParser::PutNormalWhite(bool & was_white_char, bool & was_new_line)
{ {
was_white_char = false; was_white_char = false;
was_new_line = false; was_new_line = false;
@ -829,13 +830,13 @@ void HTMLFilter::PutNormalWhite(bool & was_white_char, bool & was_new_line)
void HTMLFilter::PutOpeningTagMark() void HTMLParser::PutOpeningTagMark()
{ {
Put('<'); Put('<');
} }
void HTMLFilter::PutClosingTagMark() void HTMLParser::PutClosingTagMark()
{ {
Put('>'); Put('>');
} }
@ -845,7 +846,7 @@ void HTMLFilter::PutClosingTagMark()
// !! IMPROVE ME change to a better name // !! IMPROVE ME change to a better name
// this functions does not return true when the tag is safe // this functions does not return true when the tag is safe
bool HTMLFilter::IsTagSafe(const wchar_t * tag) bool HTMLParser::IsTagSafe(const wchar_t * tag)
{ {
if( !safe_mode ) if( !safe_mode )
return true; return true;
@ -874,7 +875,7 @@ return true;
} }
bool HTMLFilter::IsTagSafe(const std::wstring & tag) bool HTMLParser::IsTagSafe(const std::wstring & tag)
{ {
return IsTagSafe(tag.c_str()); return IsTagSafe(tag.c_str());
} }
@ -883,7 +884,7 @@ bool HTMLFilter::IsTagSafe(const std::wstring & tag)
bool HTMLFilter::PutOpeningTag() bool HTMLParser::PutOpeningTag()
{ {
if( !IsTagSafe(LastItem().name) ) if( !IsTagSafe(LastItem().name) )
{ {
@ -905,7 +906,7 @@ return true;
void HTMLFilter::PutClosingTag(const Item & item) void HTMLParser::PutClosingTag(const Item & item)
{ {
if( skip_tags || !IsTagSafe(item.name) ) if( skip_tags || !IsTagSafe(item.name) )
return; return;
@ -922,7 +923,7 @@ void HTMLFilter::PutClosingTag(const Item & item)
void HTMLFilter::PutTabs(size_t len) void HTMLParser::PutTabs(size_t len)
{ {
if( len > 30 ) if( len > 30 )
len = 30; len = 30;
@ -932,7 +933,7 @@ void HTMLFilter::PutTabs(size_t len)
} }
void HTMLFilter::PutNonBreakingSpace() void HTMLParser::PutNonBreakingSpace()
{ {
if( orphan_mode == orphan_nbsp ) if( orphan_mode == orphan_nbsp )
{ {
@ -947,35 +948,35 @@ void HTMLFilter::PutNonBreakingSpace()
// we assume the size of the opening mark to be one // we assume the size of the opening mark to be one
bool HTMLFilter::IsOpeningTagMark(wchar_t c) bool HTMLParser::IsOpeningTagMark(wchar_t c)
{ {
return (c == '<'); return (c == '<');
} }
// we assume the size of the closing mark to be one // we assume the size of the closing mark to be one
bool HTMLFilter::IsClosingTagMark(wchar_t c) bool HTMLParser::IsClosingTagMark(wchar_t c)
{ {
return (c == '>'); return (c == '>');
} }
// the slash in the closing tag mark e.g. </p> // the slash in the closing tag mark e.g. </p>
bool HTMLFilter::IsClosingTagIndicator(wchar_t c) bool HTMLParser::IsClosingTagIndicator(wchar_t c)
{ {
return (c == '/'); return (c == '/');
} }
// the slash in the closing tag mark e.g. </p> // the slash in the closing tag mark e.g. </p>
bool HTMLFilter::IsSpecialTagIndicator(wchar_t c) bool HTMLParser::IsSpecialTagIndicator(wchar_t c)
{ {
return (c == '!'); return (c == '!');
} }
// the '=' operator e.g. class="value" // the '=' operator e.g. class="value"
bool HTMLFilter::IsAttributeAssignmentMark(wchar_t c) bool HTMLParser::IsAttributeAssignmentMark(wchar_t c)
{ {
return (c == '='); return (c == '=');
} }
@ -984,13 +985,13 @@ bool HTMLFilter::IsAttributeAssignmentMark(wchar_t c)
// the slash at the end <img src=".." /> (without '>' character) // the slash at the end <img src=".." /> (without '>' character)
// we assume the size of the mark to be one // we assume the size of the mark to be one
bool HTMLFilter::IsClosingXmlSimpleTagMark(wchar_t c) bool HTMLParser::IsClosingXmlSimpleTagMark(wchar_t c)
{ {
return (c == '/'); return (c == '/');
} }
bool HTMLFilter::IsEndingCommentaryTagMarkAtEndOfString(const std::wstring & str) bool HTMLParser::IsEndingCommentaryTagMarkAtEndOfString(const std::wstring & str)
{ {
static wchar_t comm_end[] = L"-->"; static wchar_t comm_end[] = L"-->";
size_t comm_end_len = sizeof(comm_end) / sizeof(wchar_t) - 1; size_t comm_end_len = sizeof(comm_end) / sizeof(wchar_t) - 1;
@ -1004,13 +1005,13 @@ bool HTMLFilter::IsEndingCommentaryTagMarkAtEndOfString(const std::wstring & str
} }
bool HTMLFilter::IsStartingEntityMark(wchar_t c) bool HTMLParser::IsStartingEntityMark(wchar_t c)
{ {
return (c == '&'); return (c == '&');
} }
bool HTMLFilter::IsEndingEntityMark(wchar_t c) bool HTMLParser::IsEndingEntityMark(wchar_t c)
{ {
return (c == ';'); return (c == ';');
} }
@ -1018,7 +1019,7 @@ bool HTMLFilter::IsEndingEntityMark(wchar_t c)
// reading text between html tags // reading text between html tags
void HTMLFilter::ReadText() void HTMLParser::ReadText()
{ {
bool was_white_char = false; bool was_white_char = false;
bool was_new_line = false; bool was_new_line = false;
@ -1096,7 +1097,7 @@ void HTMLFilter::ReadText()
bool HTMLFilter::PrintOpeningItem() bool HTMLParser::PrintOpeningItem()
{ {
if( skip_tags || IsNameEqual(no_filter_tag, LastItem().name) ) if( skip_tags || IsNameEqual(no_filter_tag, LastItem().name) )
return true; return true;
@ -1108,7 +1109,7 @@ bool HTMLFilter::PrintOpeningItem()
bool HTMLFilter::ReadItemAttr() bool HTMLParser::ReadItemAttr()
{ {
attr_has_value = false; attr_has_value = false;
attr_name.clear(); attr_name.clear();
@ -1145,7 +1146,7 @@ return true;
void HTMLFilter::CheckItemLangAttr() void HTMLParser::CheckItemLangAttr()
{ {
if( attr_has_value && IsNameEqual(L"lang", attr_name) ) if( attr_has_value && IsNameEqual(L"lang", attr_name) )
{ {
@ -1166,7 +1167,7 @@ void HTMLFilter::CheckItemLangAttr()
} }
void HTMLFilter::PrintItemAttr() void HTMLParser::PrintItemAttr()
{ {
size_t i; size_t i;
@ -1193,7 +1194,7 @@ size_t i;
} }
void HTMLFilter::ReadItemClosing() void HTMLParser::ReadItemClosing()
{ {
read_char(); // skipping '/' read_char(); // skipping '/'
SkipWhiteLines(); SkipWhiteLines();
@ -1205,7 +1206,7 @@ void HTMLFilter::ReadItemClosing()
} }
void HTMLFilter::ReadItemSpecial() void HTMLParser::ReadItemSpecial()
{ {
LastItem().type = Item::special; LastItem().type = Item::special;
@ -1255,7 +1256,7 @@ void HTMLFilter::ReadItemSpecial()
} }
void HTMLFilter::ReadItemOpening() void HTMLParser::ReadItemOpening()
{ {
LastItem().type = Item::opening; LastItem().type = Item::opening;
ReadItemName(LastItem().name); ReadItemName(LastItem().name);
@ -1281,16 +1282,16 @@ void HTMLFilter::ReadItemOpening()
} }
void HTMLFilter::ItemFound() void HTMLParser::ItemFound()
{ {
} }
void HTMLFilter::EntityFound(const wchar_t * str, const wchar_t * end) void HTMLParser::EntityFound(const wchar_t * str, const wchar_t * end)
{ {
} }
bool HTMLFilter::ReadItem() bool HTMLParser::ReadItem()
{ {
if( lastc == -1 ) if( lastc == -1 )
return false; return false;
@ -1332,7 +1333,7 @@ return true;
wchar_t HTMLFilter::ToLower(wchar_t c) wchar_t HTMLParser::ToLower(wchar_t c)
{ {
if( c>='A' && c<='Z' ) if( c>='A' && c<='Z' )
return c - 'A' + 'a'; return c - 'A' + 'a';
@ -1341,7 +1342,7 @@ return c;
} }
void HTMLFilter::ToLower(std::wstring & str) void HTMLParser::ToLower(std::wstring & str)
{ {
size_t i; size_t i;
@ -1350,7 +1351,7 @@ size_t i;
} }
bool HTMLFilter::IsNameEqual(const wchar_t * name1, const wchar_t * name2) bool HTMLParser::IsNameEqual(const wchar_t * name1, const wchar_t * name2)
{ {
for( ; *name1!=0 && *name2!=0 ; ++name1, ++name2 ) for( ; *name1!=0 && *name2!=0 ; ++name1, ++name2 )
if( ToLower(*name1) != ToLower(*name2) ) if( ToLower(*name1) != ToLower(*name2) )
@ -1363,19 +1364,19 @@ return false;
} }
bool HTMLFilter::IsNameEqual(const wchar_t * name1, const std::wstring & name2) bool HTMLParser::IsNameEqual(const wchar_t * name1, const std::wstring & name2)
{ {
return IsNameEqual(name1, name2.c_str()); return IsNameEqual(name1, name2.c_str());
} }
bool HTMLFilter::IsNameEqual(const std::wstring & name1, const wchar_t * name2) bool HTMLParser::IsNameEqual(const std::wstring & name1, const wchar_t * name2)
{ {
return IsNameEqual(name1.c_str(), name2); return IsNameEqual(name1.c_str(), name2);
} }
bool HTMLFilter::IsNameEqual(const std::wstring & name1, const std::wstring & name2) bool HTMLParser::IsNameEqual(const std::wstring & name1, const std::wstring & name2)
{ {
return IsNameEqual(name1.c_str(), name2.c_str()); return IsNameEqual(name1.c_str(), name2.c_str());
} }
@ -1385,7 +1386,7 @@ bool HTMLFilter::IsNameEqual(const std::wstring & name1, const std::wstring & na
// len characters from both strings must be equal // len characters from both strings must be equal
// IMPROVE ME change name to something like IsBeginningNameEqual // IMPROVE ME change name to something like IsBeginningNameEqual
// and move to text.h (pikotools) // and move to text.h (pikotools)
bool HTMLFilter::IsNameEqual(const wchar_t * name1, const wchar_t * name2, size_t len) bool HTMLParser::IsNameEqual(const wchar_t * name1, const wchar_t * name2, size_t len)
{ {
for( ; *name1!=0 && *name2!=0 && len>0 ; ++name1, ++name2, --len ) for( ; *name1!=0 && *name2!=0 && len>0 ; ++name1, ++name2, --len )
if( ToLower(*name1) != ToLower(*name2) ) if( ToLower(*name1) != ToLower(*name2) )
@ -1399,19 +1400,19 @@ return false;
bool HTMLFilter::IsNameEqual(const wchar_t * name1, const std::wstring & name2, size_t len) bool HTMLParser::IsNameEqual(const wchar_t * name1, const std::wstring & name2, size_t len)
{ {
return IsNameEqual(name1, name2.c_str(), len); return IsNameEqual(name1, name2.c_str(), len);
} }
bool HTMLFilter::IsNameEqual(const std::wstring & name1, const wchar_t * name2, size_t len) bool HTMLParser::IsNameEqual(const std::wstring & name1, const wchar_t * name2, size_t len)
{ {
return IsNameEqual(name1.c_str(), name2, len); return IsNameEqual(name1.c_str(), name2, len);
} }
bool HTMLFilter::IsNameEqual(const std::wstring & name1, const std::wstring & name2, size_t len) bool HTMLParser::IsNameEqual(const std::wstring & name1, const std::wstring & name2, size_t len)
{ {
return IsNameEqual(name1.c_str(), name2.c_str(), len); return IsNameEqual(name1.c_str(), name2.c_str(), len);
} }
@ -1420,20 +1421,20 @@ bool HTMLFilter::IsNameEqual(const std::wstring & name1, const std::wstring & na
bool HTMLFilter::IsLastTag(const wchar_t * name) bool HTMLParser::IsLastTag(const wchar_t * name)
{ {
return IsNameEqual(name, LastItem().name); return IsNameEqual(name, LastItem().name);
} }
bool HTMLFilter::IsLastTag(const std::wstring & name) bool HTMLParser::IsLastTag(const std::wstring & name)
{ {
return IsNameEqual(name, LastItem().name); return IsNameEqual(name, LastItem().name);
} }
// checking exceptions for opening tags // checking exceptions for opening tags
void HTMLFilter::CheckSingleItemExceptions() void HTMLParser::CheckSingleItemExceptions()
{ {
if( IsLastTag(L"meta") || if( IsLastTag(L"meta") ||
IsLastTag(L"input") || IsLastTag(L"input") ||
@ -1456,7 +1457,7 @@ void HTMLFilter::CheckSingleItemExceptions()
} }
void HTMLFilter::CheckWhiteCharsExceptions(Item & item) void HTMLParser::CheckWhiteCharsExceptions(Item & item)
{ {
bool change_white_mode = false; bool change_white_mode = false;
@ -1493,7 +1494,7 @@ void HTMLFilter::CheckWhiteCharsExceptions(Item & item)
void HTMLFilter::AddForgottenTags() void HTMLParser::AddForgottenTags()
{ {
int i; int i;
@ -1539,7 +1540,7 @@ int i;
} }
void HTMLFilter::CheckStackPrintRest() void HTMLParser::CheckStackPrintRest()
{ {
while( stack_len-- > 0 ) while( stack_len-- > 0 )
{ {
@ -1561,7 +1562,7 @@ void HTMLFilter::CheckStackPrintRest()
} }
void HTMLFilter::CheckClosingTags() void HTMLParser::CheckClosingTags()
{ {
if( stack_len == 0 ) if( stack_len == 0 )
return; return;
@ -1604,7 +1605,7 @@ void HTMLFilter::CheckClosingTags()
} }
bool HTMLFilter::PrintRest() bool HTMLParser::PrintRest()
{ {
//const wchar_t * start = pchar; //const wchar_t * start = pchar;
@ -1634,7 +1635,7 @@ bool HTMLFilter::PrintRest()
void HTMLFilter::ReadLoop() void HTMLParser::ReadLoop()
{ {
while( ReadItem() ) while( ReadItem() )
{ {
@ -1671,7 +1672,7 @@ void HTMLFilter::ReadLoop()
void HTMLFilter::Read() void HTMLParser::Read()
{ {
read_char(); // put first character to lastc read_char(); // put first character to lastc
is_first_item = true; is_first_item = true;

View File

@ -90,7 +90,7 @@ namespace pt
the filter recognizes xml simple tags (with / at the end) such as: <br /> the filter recognizes xml simple tags (with / at the end) such as: <br />
*/ */
class HTMLFilter : public BaseParser class HTMLParser : public BaseParser
{ {
public: public:
@ -100,10 +100,10 @@ public:
orphan_160space // putting 160 ascii code orphan_160space // putting 160 ascii code
}; };
HTMLFilter(); HTMLParser();
HTMLFilter(const HTMLFilter & f); HTMLParser(const HTMLParser & f);
HTMLFilter & operator=(const HTMLFilter & f); HTMLParser & operator=(const HTMLParser & f);
virtual ~HTMLFilter(); virtual ~HTMLParser();
// main methods used for filtering // main methods used for filtering