added to HTMLFilter:

- possibility to remove html entities method: SkipEntity(bool) git-svn-id: svn://ttmath.org/publicrep/winix/trunk@1132 e52654a7-88a9-db11-a3e9-0013d4bc506e
2018-10-24 16:31:42 +00:00
parent 027a8ec428
commit 1b8f5dc673
5 changed files with 242 additions and 84 deletions
--- a/winixd/Makefile.dep
+++ b/winixd/Makefile.dep
--- a/winixd/core/bbcodeparser.cpp
+++ b/winixd/core/bbcodeparser.cpp
@@ -5,7 +5,7 @@
 */

 /* 
- * Copyright (c) 2008-2014, Tomasz Sowa
+ * Copyright (c) 2008-2018, Tomasz Sowa
 * All rights reserved.
 * 
 * Redistribution and use in source and binary forms, with or without
@@ -66,36 +66,46 @@ return false;
 }


-bool BBCODEParser::IsOpeningTagMark()
+bool BBCODEParser::IsOpeningTagMark(wchar_t c)
 {
-	return (*pchar == '[');
+	return (c == '[');
 }


-// there are no commentaries in bbcode
-bool BBCODEParser::IsOpeningCommentaryTagMark()
+bool BBCODEParser::IsClosingTagMark(wchar_t c)
+{
+	return (c == ']');
+}
+
+bool BBCODEParser::IsClosingXmlSimpleTagMark(wchar_t c)
 {
 	return false;
 }


+
+// there are no commentaries in bbcode
+bool BBCODEParser::IsOpeningCommentaryTagMark(const wchar_t *)
+{
+	return false;
+}
+
+
+size_t BBCODEParser::OpeningCommentaryTagMarkSize()
+{
+	return 0;
+}
+
+
+
 bool BBCODEParser::SkipCommentaryTagIfExists()
 {
 	return false;
 }


-bool BBCODEParser::IsClosingTagMark()
-{
-	return (*pchar == ']');
-}


-bool BBCODEParser::IsClosingXmlSimpleTagMark()
-{
-	return false;
-}
-



@@ -446,7 +456,7 @@ bool first_tag_removed = false;

 	while( *pchar != 0 )
 	{
-		if( IsOpeningTagMark() )
+		if( IsOpeningTagMark(*pchar) )
 		{
 			if( IsClosingTagForLastItem() )
 			{
--- a/winixd/core/bbcodeparser.h
+++ b/winixd/core/bbcodeparser.h
@@ -5,7 +5,7 @@
 */

 /* 
- * Copyright (c) 2008-2014, Tomasz Sowa
+ * Copyright (c) 2008-2018, Tomasz Sowa
 * All rights reserved.
 * 
 * Redistribution and use in source and binary forms, with or without
@@ -62,10 +62,12 @@ class BBCODEParser : public HTMLFilter
 	virtual void Init();
 	virtual void Uninit();

-	virtual bool IsOpeningTagMark();
-	virtual bool IsOpeningCommentaryTagMark();
-	virtual bool IsClosingTagMark();
-	virtual bool IsClosingXmlSimpleTagMark();
+	virtual bool IsOpeningTagMark(wchar_t c);
+	virtual bool IsClosingTagMark(wchar_t c);
+	virtual bool IsClosingXmlSimpleTagMark(wchar_t c);
+
+	virtual bool IsOpeningCommentaryTagMark(const wchar_t *);
+	virtual size_t OpeningCommentaryTagMarkSize();

 	virtual bool IsValidCharForName(int c);
 	virtual void CheckExceptions();
--- a/winixd/core/htmlfilter.cpp
+++ b/winixd/core/htmlfilter.cpp
@@ -88,7 +88,13 @@ void HTMLFilter::Uninit()

 void HTMLFilter::Filter(const std::wstring & in, std::wstring & out)
 {
-size_t out_projected_len = in.size() * 2 + 1;
+	if( &in == &out )
+	{
+		// out cannot be the same string as in
+		return;
+	}
+
+	size_t out_projected_len = in.size() * 2 + 1;

 	if( out.capacity() < out_projected_len )
 		out.reserve(out_projected_len);
@@ -105,8 +111,10 @@ void HTMLFilter::SetSomeDefaults()
 	wrap_line   = 0;
 	orphan_mode = orphan_nbsp;
 	safe_mode   = false;
-	skip_all_tags = false;
+	skip_tags = false;
 	skip_commentaries = false;
+	skip_entities = false;
+	analyze_entities = false;
 }


@@ -238,13 +246,34 @@ void HTMLFilter::SafeMode(bool safe_mode_)
 }


-void HTMLFilter::SkipAllTags(bool skip_all_tags, bool skip_commentaries)
+void HTMLFilter::SkipTags(bool skip_tags)
+{
+	this->skip_tags = skip_tags;
+}
+
+void HTMLFilter::SkipCommentaries(bool skip_commentaries)
 {
-	this->skip_all_tags = skip_all_tags;
 	this->skip_commentaries = skip_commentaries;
 }


+void HTMLFilter::SkipEntities(bool skip_entities)
+{
+	this->skip_entities = skip_entities;
+
+	if( this->skip_entities )
+	{
+		this->analyze_entities = true;
+	}
+}
+
+
+void HTMLFilter::AnalyzeEntities(bool analyze_entities)
+{
+	this->analyze_entities = analyze_entities;
+}
+
+
 void HTMLFilter::SetNoFilterTag(const std::wstring & tag_name)
 {
 	no_filter_tag = tag_name;
@@ -380,7 +409,7 @@ bool HTMLFilter::IsClosingTagForLastItem()
 			pchar += LastItem().name.size();
 			SkipWhite();

-			if( IsClosingTagMark() )
+			if( IsClosingTagMark(*pchar) )
 			{
 				pchar += 1;
 				return true;
@@ -402,7 +431,7 @@ const wchar_t * end = pchar;

 	while( *pchar != 0 )
 	{
-		if( IsOpeningTagMark() )
+		if( IsOpeningTagMark(*pchar) )
 		{
 			if( IsClosingTagForLastItem() )
 			{
@@ -450,12 +479,12 @@ void HTMLFilter::SkipAndCheckClosingTag()
 			}
 		}
 		else
-		if( !is_quoted && LastItem().type == Item::opening && IsClosingXmlSimpleTagMark() ) // closing xml tag: default '/'
+		if( !is_quoted && LastItem().type == Item::opening && IsClosingXmlSimpleTagMark(*pchar) ) // closing xml tag: default '/'
 		{
 			LastItem().type = Item::simple;
 		}
 		else
-		if( !is_quoted && IsClosingTagMark() )
+		if( !is_quoted && IsClosingTagMark(*pchar) )
 		{
 			++pchar;
 			break;
@@ -470,7 +499,7 @@ bool HTMLFilter::IsValidCharForName(int c)
 	if( (c>='a' && c<='z') ||
 		(c>='A' && c<='Z') ||
 		(c>='0' && c<='9') ||
-		c=='-' || c=='!' )
+		c=='-' || c=='!' || c==':') // : for namespace character
 		return true;

 return false;
@@ -489,6 +518,18 @@ return false;
 }


+bool HTMLFilter::IsValidCharForEntityName(int c)
+{
+	if( (c>='a' && c<='z') ||
+		(c>='A' && c<='Z') ||
+		(c>='0' && c<='9') ||
+		c=='#' )
+		return true;
+
+return false;
+}
+
+
 void HTMLFilter::ReadItemName()
 {
 size_t i;
@@ -521,12 +562,28 @@ size_t i;



+void HTMLFilter::ReadItemAttrValueAdd(const wchar_t * value_start, const wchar_t * value_end)
+{
+	attr_value.push_back(std::wstring());
+
+	if( analyze_entities )
+	{
+		AnalyzeEntitiesAndPut(value_start, value_end, &attr_value.back());
+	}
+	else
+	{
+		attr_value.back().append(value_start, value_end);
+	}
+}
+
+
 void HTMLFilter::ReadItemAttrValue(bool has_quote, wchar_t quote_char)
 {
 size_t i;

 	attr_value.clear();
-	attr_value_temp.clear();
+	const wchar_t * value_start = pchar;
+	size_t value_len = 0; // how many non white characters

 	for(i=0 ; *pchar ; ++i, ++pchar )
 	{
@@ -537,30 +594,28 @@ size_t i;
 		}
 		else
 		{
-			if( IsClosingTagMark() || *pchar == 10 || IsWhite(*pchar) )
+			if( IsClosingTagMark(*pchar) || *pchar == 10 || IsWhite(*pchar) )
 				break;
 		}

 		if( *pchar==10 || IsWhite(*pchar) )
 		{
-			if( !attr_value_temp.empty() )
-			{
-				attr_value.push_back(attr_value_temp);
-				attr_value_temp.clear();
-			}
+			if( value_len > 0 && value_len <= WINIX_HTMLFILTER_ATTR_VALUE_MAXLEN )
+				ReadItemAttrValueAdd(value_start, pchar);
+
+			value_len = 0;
 		}
 		else
-		if( i < WINIX_HTMLFILTER_ATTR_VALUE_MAXLEN )
 		{
-			attr_value_temp += *pchar;
+			if( value_len == 0 )
+				value_start = pchar;
+
+			value_len += 1;
 		}
 	}

-	if( !attr_value_temp.empty() )
-	{
-		attr_value.push_back(attr_value_temp);
-		attr_value_temp.clear();
-	}
+	if( value_len > 0 && value_len <= WINIX_HTMLFILTER_ATTR_VALUE_MAXLEN )
+		ReadItemAttrValueAdd(value_start, pchar);
 }


@@ -611,6 +666,57 @@ void HTMLFilter::Put(const std::wstring & str)
 }


+// out can be null
+void HTMLFilter::AnalyzeEntitiesAndPut(const wchar_t * str, const wchar_t * end, std::wstring * out)
+{
+	size_t epsilon = 8; // !! IMPROVE ME put as a constant
+	const wchar_t * old_str = str;
+
+	while( str < end )
+	{
+		if( IsStartingEntityMark(*str) )
+		{
+			const wchar_t * entity_start = str;
+			str += 1; // skip &
+
+			for(size_t i=0 ; *str && IsValidCharForEntityName(*str) && i < epsilon ; ++i, ++str)
+			{
+			}
+
+			if( IsEndingEntityMark(*str) && str - entity_start > 1 ) // at least one character in entity name
+			{
+				if( out )
+					out->append(old_str, entity_start);
+				else
+					Put(old_str, entity_start);
+
+				str += 1; // skip ;
+
+				if( !skip_entities )
+				{
+					if( out )
+						out->append(entity_start, str);
+					else
+						Put(entity_start, str);
+				}
+
+				EntityFound(entity_start + 1, str - 1); // without & and ;
+				old_str = str;
+			}
+		}
+		else
+		{
+			str += 1;
+		}
+	}
+
+	if( out )
+		out->append(old_str, end);
+	else
+		Put(old_str, end);
+}
+
+


 int HTMLFilter::CheckOrphan(const wchar_t * str, const wchar_t * end, const std::wstring & orphan_str)
@@ -700,12 +806,12 @@ return CheckOrphan(str, end, LastItem().porphans->tab);
 // (useful in html entities)
 // !! dodac sprawdzanie czy dlugosc stringu nie jest mala tez (end-str)
 // i wtedy tez nie dodajemy zadnego znaku
-bool HTMLFilter::HasSemiloconAround(const wchar_t * str, const wchar_t * end)
+bool HTMLFilter::HasEntityEndAround(const wchar_t * str, const wchar_t * end)
 {
-size_t i, epsilon = 8;
+size_t i, epsilon = 8;// !! IMPROVE ME put as a constant

 	for(i=0 ; str < end && i<epsilon ; ++i, ++str)
-		if( *str == ';' )
+		if( IsEndingEntityMark(*str) )
 			return true;

 return false;
@@ -726,11 +832,11 @@ void HTMLFilter::PutNormalNonWhite(const wchar_t * & str, const wchar_t * end)
 {
 const wchar_t * word = str;
 size_t non_whites = 0;
-bool was_semicolon = false;
+bool was_entity_end = false;

 	for( ; str < end && *str!=10 && !IsWhite(*str) ; ++str, ++non_whites )
 	{
-		if( break_after != 0 && non_whites >= break_after && (was_semicolon || !HasSemiloconAround(str, end)) )
+		if( break_after != 0 && non_whites >= break_after && (was_entity_end || !HasEntityEndAround(str, end)) )
 		{
 			Put(word, str);
 			word           = str;
@@ -739,10 +845,13 @@ bool was_semicolon = false;
 			CheckLineWrap();
 		}

-		was_semicolon = (*str == ';');
+		was_entity_end = (IsEndingEntityMark(*str));
 	}

-	Put(word, str);
+	if( analyze_entities )
+		AnalyzeEntitiesAndPut(word, str, nullptr);
+	else
+		Put(word, str);
 }


@@ -880,7 +989,7 @@ return true;

 void HTMLFilter::PutClosingTag(const wchar_t * tag)
 {
-	if( skip_all_tags || !IsTagSafe(tag) )
+	if( skip_tags || !IsTagSafe(tag) )
 		return;

 	PutOpeningTagMark();
@@ -925,28 +1034,28 @@ void HTMLFilter::PutNewLine()


 // we assume the size of the opening mark to be one
-bool HTMLFilter::IsOpeningTagMark()
+bool HTMLFilter::IsOpeningTagMark(wchar_t c)
 {
-	return (*pchar == '<');
+	return (c == '<');
 }


 // we assume the size of the closing mark to be one
-bool HTMLFilter::IsClosingTagMark()
+bool HTMLFilter::IsClosingTagMark(wchar_t c)
 {
-	return (*pchar == '>');
+	return (c == '>');
 }


 // the slash at the end <img src=".." /> (without '>' character)
 // we assume the size of the mark to be one
-bool HTMLFilter::IsClosingXmlSimpleTagMark()
+bool HTMLFilter::IsClosingXmlSimpleTagMark(wchar_t c)
 {
-	return (*pchar == '/');
+	return (c == '/');
 }


-bool HTMLFilter::IsOpeningCommentaryTagMark()
+bool HTMLFilter::IsOpeningCommentaryTagMark(const wchar_t * str)
 {
 static wchar_t comm_open[] = L"<!--";
 size_t comm_open_len = sizeof(comm_open) / sizeof(wchar_t) - 1;
@@ -961,6 +1070,18 @@ size_t HTMLFilter::OpeningCommentaryTagMarkSize()
 }


+bool HTMLFilter::IsStartingEntityMark(wchar_t c)
+{
+	return (c == '&');
+}
+
+
+bool HTMLFilter::IsEndingEntityMark(wchar_t c)
+{
+	return (c == ';');
+}
+
+

 // skipping the commentary tag if exists
 bool HTMLFilter::SkipCommentaryTagIfExists()
@@ -968,7 +1089,7 @@ bool HTMLFilter::SkipCommentaryTagIfExists()
 static wchar_t comm_close[] = L"-->";
 size_t comm_close_len = sizeof(comm_close) / sizeof(wchar_t) - 1;

-	if( !IsOpeningCommentaryTagMark() )
+	if( !IsOpeningCommentaryTagMark(pchar) )
 		return false;

 	pchar += OpeningCommentaryTagMarkSize();
@@ -1012,7 +1133,7 @@ void HTMLFilter::ReadNormalTextSkipWhite(const wchar_t * & start, const wchar_t
 	start = pchar;

 	// exception for the commentary tag
-	if( IsOpeningCommentaryTagMark() || !IsOpeningTagMark() )
+	if( IsOpeningCommentaryTagMark(pchar) || !IsOpeningTagMark(*pchar) )
 	{
 		PutNewLine();
 		PutTabs(stack_len);
@@ -1049,7 +1170,7 @@ const wchar_t * last_non_white = pchar;
 		}
 		else
 		{
-			if( IsOpeningTagMark() )
+			if( IsOpeningTagMark(*pchar) )
 				break;

 			if( !IsWhite(*pchar) )
@@ -1067,7 +1188,7 @@ const wchar_t * last_non_white = pchar;

 bool HTMLFilter::PrintOpeningItem()
 {
-	if( skip_all_tags || IsNameEqual(no_filter_tag, LastItem().name) )
+	if( skip_tags || IsNameEqual(no_filter_tag, LastItem().name) )
 		return true;

 	if( last_new_line )
@@ -1149,7 +1270,7 @@ void HTMLFilter::PrintItemAttr()
 {
 size_t i;

-	if( skip_all_tags || IsNameEqual(no_filter_tag, LastItem().name) )
+	if( skip_tags || IsNameEqual(no_filter_tag, LastItem().name) )
 		return;

 	Put(' ');
@@ -1188,7 +1309,7 @@ void HTMLFilter::ReadItemSpecial()
 {
 	LastItem().type = Item::special;

-	if( !skip_all_tags )
+	if( !skip_tags )
 		PutOpeningTagMark();

 	const wchar_t * start = pchar;
@@ -1197,7 +1318,7 @@ void HTMLFilter::ReadItemSpecial()
 	ReadItemName();
 	SkipAndCheckClosingTag();

-	if( !skip_all_tags && pchar > start )
+	if( !skip_tags && pchar > start )
 		Put(start, pchar);

 	// closing tag mark is printed directly from the source
@@ -1219,7 +1340,7 @@ void HTMLFilter::ReadItemOpening()

 		SkipAndCheckClosingTag(); // here LastItem().type can be changed to 'simple'

-		if( !skip_all_tags && !IsNameEqual(no_filter_tag, LastItem().name) )
+		if( !skip_tags && !IsNameEqual(no_filter_tag, LastItem().name) )
 		{
 			if( LastItem().type == Item::simple )
 				Put(L" /");
@@ -1234,6 +1355,10 @@ void HTMLFilter::ItemFound()
 {
 }

+void HTMLFilter::EntityFound(const wchar_t * str, const wchar_t * end)
+{
+}
+

 bool HTMLFilter::ReadItem()
 {
@@ -1315,6 +1440,8 @@ bool HTMLFilter::IsNameEqual(const std::wstring & name1, const std::wstring & na


 // len characters from both strings must be equal
+// IMPROVE ME change name to something like IsBeginningNameEqual
+// and move to text.h (pikotools)
 bool HTMLFilter::IsNameEqual(const wchar_t * name1, const wchar_t * name2, size_t len)
 {
 	for( ; *name1!=0 && *name2!=0 && len>0 ; ++name1, ++name2, --len )
@@ -1382,10 +1509,10 @@ void HTMLFilter::CheckExceptions()

 	// in safe_mode the script tag is ignored
 	if( !safe_mode && IsLastTag(L"script") )
-		PutEverythingUntilClosingTag(!skip_all_tags);
+		PutEverythingUntilClosingTag(!skip_tags);

 	if( IsLastTag(L"pre") || IsLastTag(L"textarea") )
-		PutEverythingUntilClosingTag(!skip_all_tags);
+		PutEverythingUntilClosingTag(!skip_tags);

 	if( IsLastTag(no_filter_tag) )
 		PutEverythingUntilClosingTag(false);
@@ -1421,7 +1548,7 @@ int i;

 	for(int z=(int)stack_len-2 ; z>=i ; --z)
 	{
-		if( !skip_all_tags && pstack[z].new_line )
+		if( !skip_tags && pstack[z].new_line )
 		{
 			PutNewLine();
 			PutTabs(z);
@@ -1471,7 +1598,7 @@ void HTMLFilter::CheckClosingTags()
 	if( IsNameEqual(pstack[stack_len-1].name, pstack[stack_len-2].name) )
 	{
 		// last closing tag is from the previous one
-		if( !skip_all_tags && pstack[stack_len-2].new_line )
+		if( !skip_tags && pstack[stack_len-2].new_line )
 		{
 			PutNewLine();
 			PutTabs(stack_len-2);
@@ -1494,7 +1621,7 @@ bool HTMLFilter::PrintRest()
 const wchar_t * start = pchar;

 	// in safe mode we do not print the rest html code
-	if( safe_mode || skip_all_tags )
+	if( safe_mode || skip_tags )
 		return false;

 	while( *pchar )
--- a/winixd/core/htmlfilter.h
+++ b/winixd/core/htmlfilter.h
@@ -128,13 +128,11 @@ public:
 	// false by default
 	void TrimWhite(bool trim);

-
 	// first tabs in a tree
 	// default: 2 (spaces)
 	// set 0 to turn off
 	void InsertTabs(size_t tabsize);

-
 	// set a name of a html tag which will be used as 'nofilter' tag
 	// elements between such tags are not filtered (similarly as in <pre> and <textarea>)
 	// these tags (opening and closing) will no be placed in the html output
@@ -145,20 +143,32 @@ public:
 	void AssignOrphans(const std::wstring & lang_code, const std::vector<std::wstring> & otab);
 	void ClearOrphans();

-
 	// check 'orphans' for the specicic language
 	// if an orphan is detected then the non-break space ("&nbsp;" or ascii 160 code) will be put
 	// default disable (lang_none)
 	void OrphansMode(OrphanMode mode = orphan_nbsp);

-
 	// skipping some unsafe tags
 	// (script, iframe, frame, frameset, applet, head, meta, html, link, body, ...)
 	void SafeMode(bool safe_mode_);

 	// skip all html tags
 	// gives only text without markup
-	void SkipAllTags(bool skip_all_tags, bool skip_commentaries);
+	// but there can be commentaries
+	void SkipTags(bool skip_tags);
+
+	// skip commentaries
+	void SkipCommentaries(bool skip_commentaries);
+
+	// if true then entities such as &nbsp; are skipped
+	// this automatically turns on AnalyzeEntities
+	// in such a case FoundEntity callbacks are sent
+	void SkipEntities(bool skip_entities);
+
+	// analyze html entities such as &nbsp;
+	// virtual method: FoundEntity is called
+	// entities are analyzed in normal text and in attribute values such as <p class="a&nbsp;">
+	void AnalyzeEntities(bool analyze_entities);


 protected:
@@ -219,13 +229,18 @@ protected:
 	virtual void Init();
 	virtual void Uninit();

-	virtual bool IsOpeningTagMark();
-	virtual bool IsOpeningCommentaryTagMark();
-	virtual bool IsClosingTagMark();
-	virtual bool IsClosingXmlSimpleTagMark();
+	virtual bool IsOpeningTagMark(wchar_t c);
+	virtual bool IsClosingTagMark(wchar_t c);
+	virtual bool IsClosingXmlSimpleTagMark(wchar_t c);
+	virtual bool IsStartingEntityMark(wchar_t c);
+	virtual bool IsEndingEntityMark(wchar_t c);
+
+	virtual bool   IsOpeningCommentaryTagMark(const wchar_t * str);
+	virtual size_t OpeningCommentaryTagMarkSize();

 	virtual bool IsValidCharForName(int c);
 	virtual bool IsValidCharForAttrName(int c);
+	virtual bool IsValidCharForEntityName(int c);
 	virtual void CheckExceptions();
 	virtual bool SkipCommentaryTagIfExists();

@@ -233,6 +248,7 @@ protected:
 	virtual void Put(const wchar_t * str);
 	virtual void Put(const wchar_t * str, const wchar_t * end);
 	virtual void Put(const std::wstring & str);
+	virtual void AnalyzeEntitiesAndPut(const wchar_t * str, const wchar_t * end, std::wstring * out);

 	virtual void PutOpeningTagMark();
 	virtual void PutClosingTagMark();
@@ -243,6 +259,7 @@ protected:
 	virtual void ReadNormalTextSkipWhite(const wchar_t * & start, const wchar_t * & last_non_white);

 	virtual void ItemFound();
+	virtual void EntityFound(const wchar_t * str, const wchar_t * end);

 	/*
 		others
@@ -280,7 +297,6 @@ protected:
 	void SkipWhiteWithFirstNewLine();
 	void SkipWhiteLines(const wchar_t * & str, const wchar_t * end);
 	bool IsClosingTagForLastItem();
-	size_t OpeningCommentaryTagMarkSize();
 	void SkipAndCheckClosingTag();

 	void PopStack();
@@ -294,6 +310,7 @@ protected:
 	bool PrintOpeningItem();
 	void ReadItemName();
 	void ReadItemAttrName();
+	void ReadItemAttrValueAdd(const wchar_t * value_start, const wchar_t * value_end);
 	void ReadItemAttrValue(bool has_quote, wchar_t quote_char);

 	bool ReadItemAttr();
@@ -310,7 +327,7 @@ protected:
 	void CheckChar(wchar_t c);

 	void CheckLineWrap();
-	bool HasSemiloconAround(const wchar_t * str, const wchar_t * end);
+	bool HasEntityEndAround(const wchar_t * str, const wchar_t * end);
 	void PutNormalNonWhite(const wchar_t * & str, const wchar_t * end);
 	void PutNormalWhite(const wchar_t * & str, const wchar_t * end);
 	void PutEverythingUntilClosingTag(bool put_closing_tag_as_well);
@@ -340,8 +357,10 @@ protected:
 	size_t line_len;		//length of the current line (without first spaces which create the html tree)
 	bool safe_mode;			// skipping some unsafe tags
 	Orphans orphans_temp;
-	bool skip_all_tags;
+	bool skip_tags;
 	bool skip_commentaries;
+	bool skip_entities;
+	bool analyze_entities;
 };