added BBCODEParser (html/bbcodeparser.h|cpp) - copied from winix project

2021-07-17 13:54:03 +02:00
parent bdb2616f32
commit 2a3f43c5c3
3 changed files with 768 additions and 0 deletions
@@ -43,3 +43,4 @@
 ./mainoptions/mainoptionsparser.o: textstream/stream.h utf8/utf8_templates.h
 ./mainoptions/mainoptionsparser.o: utf8/utf8_private.h
 ./html/htmlfilter.o: ./html/htmlfilter.h
+./html/bbcodeparser.o: ./html/bbcodeparser.h ./html/htmlfilter.h
@@ -0,0 +1,639 @@
+/*
+ * This file is a part of PikoTools
+ * and is distributed under the (new) BSD licence.
+ * Author: Tomasz Sowa <t.sowa@ttmath.org>
+ */
+
+/* 
+ * Copyright (c) 2008-2021, Tomasz Sowa
+ * All rights reserved.
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * 
+ *  * Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ *  * Neither the name Tomasz Sowa nor the names of contributors to this
+ *    project may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "bbcodeparser.h"
+
+
+namespace pt
+{
+
+
+
+
+bool BBCODEParser::Equal(const wchar_t * str1, const wchar_t * str2)
+{
+	while( *str1 == *str2 && *str1 != 0 )
+	{
+		str1 += 1;
+		str2 += 1;
+	}
+
+return *str1 == *str2;
+}
+
+
+
+
+bool BBCODEParser::IsValidCharForName(int c)
+{
+	if( (c>='a' && c<='z') ||
+		(c>='A' && c<='Z') ||
+		c=='*' || c=='_')
+		return true;
+
+return false;
+}
+
+
+bool BBCODEParser::IsOpeningTagMark(wchar_t c)
+{
+	return (c == '[');
+}
+
+
+bool BBCODEParser::IsClosingTagMark(wchar_t c)
+{
+	return (c == ']');
+}
+
+bool BBCODEParser::IsClosingXmlSimpleTagMark(wchar_t c)
+{
+	return false;
+}
+
+
+
+// there are no commentaries in bbcode
+bool BBCODEParser::IsOpeningCommentaryTagMark(const wchar_t *)
+{
+	return false;
+}
+
+
+size_t BBCODEParser::OpeningCommentaryTagMarkSize()
+{
+	return 0;
+}
+
+
+
+bool BBCODEParser::SkipCommentaryTagIfExists()
+{
+	return false;
+}
+
+
+
+
+
+
+
+
+
+
+// one enter will generate one <br>
+// two enters or more will generate only two br (<br><br>)
+void BBCODEParser::PutNormalText(const wchar_t * str, const wchar_t * end)
+{
+int br_len;
+
+	if( *pchar == 0 )
+	{
+		// trimming last white characters at end of the user text
+		while( str<end && (IsWhite(*(end-1)) || *(end-1)==10) )
+			--end;
+	}
+
+
+	while( str < end )
+	{
+		if( *str == 10 )
+		{
+			++str;
+			br_len = 1;
+
+			// skipping white characters without a new line character
+			while( str < end && IsWhite(*str) )
+				++str;
+
+			if( str < end && *str == 10 )
+			{
+				br_len = 2;
+
+				// skipping white characters with new line characters
+				while( str < end && (IsWhite(*str) || *str==10) )
+					++str;
+			}
+
+			if( !has_open_ol_tag && !has_open_ul_tag && !has_open_li_tag )
+			{
+				for(int i=0 ; i < br_len ; ++i)
+					(*out_string) += L"<br>\n";
+			}
+		}
+		else
+		{
+			PrintEscape(*str);
+			++str;
+		}
+	}
+}
+
+
+
+void BBCODEParser::ReadNormalTextSkipWhite(const wchar_t * & start, const wchar_t * & last_non_white)
+{
+}
+
+
+void BBCODEParser::CheckExceptions()
+{
+	if( stack_len >= 2 )
+	{
+		if( pstack[stack_len-1].type == Item::opening &&
+			pstack[stack_len-2].type == Item::opening &&
+			IsNameEqual(L"*", pstack[stack_len-1].name) &&
+			IsNameEqual(L"*", pstack[stack_len-2].name) )
+		{
+			// removing the last [*] from the stack
+			// </li> was put automatically
+			PopStack();
+		}
+	}
+}
+
+
+
+
+/*
+	bbcode format:
+	[bbcodetag=value]some text[/bbcodetag]
+	the value can be quoted, e.g.
+	[bbcodetag="value"]some text[/bbcodetag], or
+	[bbcodetag='value']some text[/bbcodetag]
+
+	the third string below (in tags table) is 'html_argument' from Tags,
+	it can contain a special character % followed by a string which means:
+	 %1  - "value" escaped as for html
+	 %2  - "some text" escaped as for html
+	 %u1 - "value" trimmed and escaped as for url-es
+	 %u2 - "some text" trimmed and escaped as for url-es
+	 %%  - one %
+
+	 if you are using %2 or %u2 then "some text" is not treated as bbcode, e.g.
+	 [bbcodetag=value]some [b]text[/b][/bbcodetag] will produce:
+	 <htmltag arg="value">some [b]text[/b]</htmltag> (the inner tags [b][/b] were not parsed)
+
+	 also when using %2 or %u2 the closing bbcode tag is skipped 
+	 (if you want this tag then you can put it in 'html_argument')
+
+	 and when using u (%u1 or %u2) the argument is trimmed from whitespaces and new lines
+	 at the beginning and at the end
+	 (because otherwise a space would be changed to %20 and this were probably not what you really wanted)
+*/
+const BBCODEParser::Tags * BBCODEParser::FindTag(const wchar_t * tag)
+{
+	static Tags tags[] = {
+		{L"*",     L"li",    L">",                        false},
+		{L"b",     L"em",    L">",                        true},
+		{L"i",     L"span",  L" class=\"bbitalic\">",     true},
+		{L"u",     L"span",  L" class=\"bbunderline\">",  true},
+		{L"s",     L"span",  L" class=\"bbstrike\">",     true},
+		{L"code",  L"code",  L" class=\"bbcode\">",       false},
+		{L"list",  L"ul",    L" class=\"bblist\">",       false},
+		{L"color", L"span",  L" class=\"bbcol%1\">",      true},
+		{L"url",   L"a",     L" href=\"%u1\">",           true},
+		{L"img",   L"img",   L" alt=\"%1\" src=\"%u2\">", true},
+		{L"quote", L"div",   L" class=\"bbquote\">\n<span class=\"bbquotewho\">%1</span><br>\n", false},
+	};
+
+	size_t i;
+	size_t len = sizeof(tags) / sizeof(Tags);
+
+	for(i=0 ; i<len ; ++i)
+	{
+		if( Equal(tag, tags[i].bbcode) )
+			return &tags[i];
+	}
+
+return 0;
+}
+
+const BBCODEParser::Tags * BBCODEParser::FindTag(const std::wstring & tag)
+{
+	return FindTag(tag.c_str());
+}
+
+
+
+void BBCODEParser::PrintArgumentCheckQuotes(const wchar_t * & start, const wchar_t * & end)
+{
+	// skipping white characters from the argument
+	while( start<end && IsWhite(*start) )
+		++start; 
+
+	// skipping first '=' character if exists
+	if( start<end && *start == '=' )
+		++start; 
+
+	// skipping white characters from the argument
+	// at the beginning
+	while( start<end && IsWhite(*start) )
+		++start; 
+
+	// and at the end
+	while( start<end && IsWhite(*(end-1)) )
+		--end; 
+
+
+	if( start<end && (*start=='\'' || *start=='\"') )
+	{
+		++start;
+
+		if( start<end && *(start-1) == *(end-1) )
+			--end;
+
+		// skipping white characters after a first quote char [url  =  "   ww...."]
+		while( start<end && IsWhite(*start) )
+			++start; 
+	}
+}
+
+
+
+void BBCODEParser::PrintEncode(int c)
+{
+	if( c == '&' )
+	{
+		(*out_string) += L"&amp;";
+	}
+	else
+	if( (c>='a' && c<='z') ||
+		(c>='A' && c<='Z') ||
+		(c>='0' && c<='9') ||
+		(c=='_' || c=='?' || c=='.' || c==',' || c=='/' || c=='-' ||
+		 c=='+' || c=='*' || c=='(' || c==')' || c=='=' || c==':')
+		)
+	{
+		(*out_string) += c;
+	}
+	else
+	{
+		wchar_t buffer[20];
+		swprintf(buffer, 20, L"%02X", c);
+
+		(*out_string) += '%';
+		(*out_string) += buffer;
+	}
+}
+
+
+void BBCODEParser::PrintEscape(int c, bool change_quote)
+{
+	if( c == '<' )
+	{
+		(*out_string) += L"&lt;";
+	}
+	else
+	if( c == '>' ) 
+	{
+		(*out_string) += L"&gt;";
+	}
+	else
+	if( c == '&' ) 
+	{
+		(*out_string) += L"&amp;";
+	}
+	else
+	if( c == '\"' && change_quote )
+	{
+		(*out_string) += L"&quot;";
+	}
+	else
+	{
+		(*out_string) += c;
+	}
+}
+
+
+void BBCODEParser::PrintArgumentEncode(const wchar_t * start, const wchar_t * end)
+{
+	PrintArgumentCheckQuotes(start, end);
+	TrimWhiteWithNewLines(start, end);
+
+	for( ; start<end ; ++start )
+		PrintEncode(*start);
+}
+
+
+void BBCODEParser::PrintArgumentEscape(const wchar_t * start, const wchar_t * end)
+{
+	PrintArgumentCheckQuotes(start, end);
+	
+	for( ; start<end ; ++start )
+		PrintEscape(*start, true); // quotes are escaped as well here
+}
+
+
+void BBCODEParser::CheckOpeningTag(const Tags * tag, const wchar_t * tag_name, bool & condition)
+{
+	if( Equal(tag->html_tag, tag_name) )
+	{
+		if( condition )
+		{
+			PutClosingTag(tag);
+			(*out_string) += '\n';
+		}
+
+		condition = true;
+	}
+}
+
+
+void BBCODEParser::CheckOpeningTag(const Tags * tag)
+{
+	bool has_list_tag = has_open_ul_tag || has_open_ol_tag;
+
+	CheckOpeningTag(tag, L"li", has_open_li_tag);
+	CheckOpeningTag(tag, L"ul", has_open_ul_tag);
+	CheckOpeningTag(tag, L"ol", has_open_ol_tag);
+
+	if( has_open_li_tag && !has_list_tag )
+	{
+		(*out_string) += L"<ul>\n";
+		has_open_ul_tag = true;
+	}
+}
+
+
+
+
+
+void BBCODEParser::PrintEscape(const wchar_t * start, const wchar_t * end, bool change_quote)
+{
+	for( ; start < end ; ++start)
+		PrintEscape(*start, change_quote);
+}
+
+
+
+void BBCODEParser::PrintEncode(const wchar_t * start, const wchar_t * end)
+{
+	for( ; start < end ; ++start)
+		PrintEncode(*start);
+}
+
+
+
+void BBCODEParser::PutOpeningTagFromEzc()
+{
+	// this can be a tag from Ezc templates system
+	(*out_string) += '[';
+	(*out_string) += LastItem().name;
+
+	const wchar_t * start = pchar;
+
+	while( *pchar && *pchar!=']' )
+		++pchar;
+
+	if( *pchar == ']' )
+		++pchar;
+
+	Put(start, pchar);
+}
+
+
+
+
+
+void BBCODEParser::PutHtmlArgument1(const wchar_t * arg_start, const wchar_t * arg_end, bool has_u)
+{
+	if( has_u )
+		PrintArgumentEncode(arg_start, arg_end);
+	else
+		PrintArgumentEscape(arg_start, arg_end);
+}
+
+
+
+void BBCODEParser::TrimWhiteWithNewLines(const wchar_t * & start, const wchar_t * & end)
+{
+	while( start < end && (IsWhite(*start) || *start==10) )
+		++start;
+
+	while( start < end && (IsWhite(*(end-1)) || *(end-1)==10) )
+		--end;
+}
+
+
+
+void BBCODEParser::PutHtmlArgument2(const Tags * tag, bool has_u)
+{
+const wchar_t * start = pchar;
+const wchar_t * end   = pchar;
+bool first_tag_removed = false;
+
+	while( *pchar != 0 )
+	{
+		if( IsOpeningTagMark(*pchar) )
+		{
+			if( IsClosingTagForLastItem() )
+			{
+				// the last tag is skipped when using patterns with %2 or %u2
+
+				PopStack(); // removing opening tag from the stack
+				first_tag_removed = true;
+				break;
+			}
+		}
+		else
+		{
+			pchar += 1;
+			end = pchar;
+		}
+	}
+
+	if( !first_tag_removed )
+		PopStack(); // user has forgotten to close the tag
+
+	if( has_u )
+	{
+		TrimWhiteWithNewLines(start, end);
+		PrintEncode(start, end);
+	}
+	else
+	{
+		PrintEscape(start, end);
+	}
+}
+
+
+
+void BBCODEParser::PutHtmlArgument(const Tags * tag, const wchar_t * arg_start, const wchar_t * arg_end)
+{
+const wchar_t * pattern = tag->html_argument;
+bool has_u;
+
+	while( *pattern )
+	{
+		if( *pattern == '%' )
+		{
+			++pattern;
+			has_u = false;
+
+			if( *pattern == 'u' )
+			{
+				++pattern;
+				has_u = true;
+			}
+
+			if( *pattern == '1' )
+			{
+				++pattern;
+				PutHtmlArgument1(arg_start, arg_end, has_u);
+			}
+			else
+			if( *pattern == '2' )
+			{
+				++pattern;
+				PutHtmlArgument2(tag, has_u);
+			}
+			else
+			if( *pattern == '%' )
+			{
+				(*out_string) += '%';
+				++pattern;
+			}
+			// else unrecognized, will be printed next time as a normal character
+		}
+		else
+		{
+			(*out_string) += *pattern;
+			++pattern;
+		}
+	}
+}
+
+
+void BBCODEParser::PutOpeningTagFromBBCode(const Tags * tag)
+{
+	CheckOpeningTag(tag);
+	PutOpeningTagMark();
+	Put(tag->html_tag);
+
+	const wchar_t * start = pchar;
+
+	while( *pchar && *pchar != ']' )
+		++pchar;
+
+	PutHtmlArgument(tag, start, pchar);
+
+	if( *pchar == ']' )
+		++pchar;
+
+	if( !tag->inline_tag )
+	{
+		Put(10);
+		SkipWhiteLines();
+	}
+}
+
+
+bool BBCODEParser::PutOpeningTag()
+{
+	const Tags * tag = FindTag(LastItem().name);
+
+	if( !tag )
+		PutOpeningTagFromEzc();
+	else
+		PutOpeningTagFromBBCode(tag);
+
+return false;
+}
+
+
+void BBCODEParser::PutClosingTag(const Tags * tag)
+{
+	if( !tag )
+		return; // skipping the tag
+
+	PutOpeningTagMark();
+	(*out_string) += '/';
+	(*out_string) += tag->html_tag;
+	PutClosingTagMark();
+
+	if( !tag->inline_tag )
+	{
+		(*out_string) += L"\n";
+		SkipWhiteLines();
+	}
+
+	if( Equal(tag->html_tag, L"li") )
+		has_open_li_tag = false;
+
+	if( Equal(tag->html_tag, L"ol") )
+		has_open_ol_tag = false;
+
+	if( Equal(tag->html_tag, L"ul") )
+		has_open_ul_tag = false;
+}
+
+
+void BBCODEParser::PutClosingTag(const wchar_t * tag_name)
+{
+	const Tags * tag = FindTag(tag_name);
+	PutClosingTag(tag);
+}
+
+
+
+void BBCODEParser::Init()
+{
+	has_open_li_tag = false;
+	has_open_ol_tag = false;
+	has_open_ul_tag = false;
+
+	SkipWhiteLines();
+}
+
+
+void BBCODEParser::Uninit()
+{
+	if( has_open_li_tag )
+		(*out_string) += L"</li>\n";
+
+	if( has_open_ol_tag )
+		(*out_string) += L"</ol>\n";
+
+	if( has_open_ul_tag )
+		(*out_string) += L"</ul>\n";
+}
+
+
+
+}
+
@@ -0,0 +1,128 @@
+/*
+ * This file is a part of PikoTools
+ * and is distributed under the (new) BSD licence.
+ * Author: Tomasz Sowa <t.sowa@ttmath.org>
+ */
+
+/* 
+ * Copyright (c) 2008-2021, Tomasz Sowa
+ * All rights reserved.
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * 
+ *  * Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ *  * Neither the name Tomasz Sowa nor the names of contributors to this
+ *    project may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef headerfile_winix_core_bbcodeparser
+#define headerfile_winix_core_bbcodeparser
+
+#include "htmlfilter.h"
+
+namespace pt
+{
+
+
+class BBCODEParser : public HTMLFilter
+{
+
+	struct Tags
+	{
+		const wchar_t * bbcode;
+		const wchar_t * html_tag;
+		const wchar_t * html_argument; // with closing '>'
+		bool inline_tag;
+	};
+
+
+	/*
+		virtual methods
+		(from HTMLFilter class)
+	*/
+	virtual void Init();
+	virtual void Uninit();
+
+	virtual bool IsOpeningTagMark(wchar_t c);
+	virtual bool IsClosingTagMark(wchar_t c);
+	virtual bool IsClosingXmlSimpleTagMark(wchar_t c);
+
+	virtual bool IsOpeningCommentaryTagMark(const wchar_t *);
+	virtual size_t OpeningCommentaryTagMarkSize();
+
+	virtual bool IsValidCharForName(int c);
+	virtual void CheckExceptions();
+	virtual bool SkipCommentaryTagIfExists();
+
+	virtual bool PutOpeningTag();
+	virtual void PutClosingTag(const wchar_t * tag);
+
+	virtual void PutNormalText(const wchar_t * str, const wchar_t * end);
+	virtual void ReadNormalTextSkipWhite(const wchar_t * & start, const wchar_t * & last_non_white);
+
+
+
+	/*
+		others
+	*/
+	bool Equal(const wchar_t * str1, const wchar_t * str2);
+
+	void PutHtmlArgument1(const wchar_t * arg_start, const wchar_t * arg_end, bool has_u);
+	void PutHtmlArgument2(const Tags * tag, bool has_u);
+	void PutHtmlArgument(const Tags * tag, const wchar_t * arg_start, const wchar_t * arg_end);
+
+	void PutOpeningTagFromEzc();
+	void PutOpeningTagFromBBCode(const Tags * tag);
+
+	const Tags * FindTag(const wchar_t * tag);
+	const Tags * FindTag(const std::wstring & tag);
+	void PrintArgumentCheckQuotes(const wchar_t * & start, const wchar_t * & end);
+
+	void PrintEscape(int c, bool change_quote = false);
+	void PrintEncode(int c);
+
+	void PrintEscape(const wchar_t * start, const wchar_t * end, bool change_quote = false);
+	void PrintEncode(const wchar_t * start, const wchar_t * end);
+
+	void PrintArgumentEncode(const wchar_t * start, const wchar_t * end);
+	void PrintArgumentEscape(const wchar_t * start, const wchar_t * end);
+
+	void PutClosingTag(const Tags * tag);
+
+	void CheckOpeningTag(const Tags * tag, const wchar_t * tag_name, bool & condition);
+	void CheckOpeningTag(const Tags * tag);
+
+	void TrimWhiteWithNewLines(const wchar_t * & start, const wchar_t * & end);
+
+
+
+	bool has_open_ol_tag; // has open html <ol> tag
+	bool has_open_ul_tag; // has open html <ul> tag
+	bool has_open_li_tag; // has open html <li> tag
+};
+
+
+}
+
+
+#endif