winix/core/bbcodeparser.cpp

/*
 * This file is a part of Winix
 * and is not publicly distributed
 *
 * Copyright (c) 2008-2010, Tomasz Sowa
 * All rights reserved.
 *
 */

#include "bbcodeparser.h"


bool BBCODEParser::Equal(const wchar_t * str1, const wchar_t * str2)
{
	while( *str1 == *str2 && *str1 != 0 )
	{
		str1 += 1;
		str2 += 1;
	}

return *str1 == *str2;
}


bool BBCODEParser::IsValidCharForName(int c)
{
	if( (c>='a' && c<='z') ||
		(c>='A' && c<='Z') ||
		c=='*' || c=='_')
		return true;

return false;
}


bool BBCODEParser::IsOpeningTagMark()
{
	return (*pchar == '[');
}


// there are no commentaries in bbcode
bool BBCODEParser::IsOpeningCommentaryTagMark()
{
	return false;
}


bool BBCODEParser::SkipCommentaryTagIfExists()
{
	return false;
}


bool BBCODEParser::IsClosingTagMark()
{
	return (*pchar == ']');
}


bool BBCODEParser::IsClosingXmlSimpleTagMark()
{
	return false;
}


// one enter will generate one <br>
// two enters or more will generate only two br (<br><br>)
void BBCODEParser::PutNormalText(const wchar_t * str, const wchar_t * end)
{
int br_len;

	if( *pchar == 0 )
	{
		// trimming last white characters at end of the user text
		while( str<end && (IsWhite(*(end-1)) || *(end-1)==10) )
			--end;
	}


	while( str < end )
	{
		if( *str == 10 )
		{
			++str;
			br_len = 1;

			// skipping white characters without a new line character
			while( str < end && IsWhite(*str) )
				++str;

			if( str < end && *str == 10 )
			{
				br_len = 2;

				// skipping white characters with new line characters
				while( str < end && (IsWhite(*str) || *str==10) )
					++str;
			}

			if( !has_open_ol_tag && !has_open_ul_tag && !has_open_li_tag )
			{
				for(int i=0 ; i < br_len ; ++i)
					(*out_string) += L"<br>\n";
			}
		}
		else
		{
			PrintEscape(*str);
			++str;
		}
	}
}


void BBCODEParser::PutNormalTextTrim(const wchar_t * str, const wchar_t * end)
{
	// we don't use trimming in bbcode parser
	PutNormalText(str, end);
}


void BBCODEParser::ReadNormalTextSkipWhite(const wchar_t * & start, const wchar_t * & last_non_white)
{
}


void BBCODEParser::CheckExceptions()
{
	if( stack_len >= 2 )
	{
		if( pstack[stack_len-1].type == Item::opening &&
			pstack[stack_len-2].type == Item::opening &&
			IsNameEqual(L"*", pstack[stack_len-1].name) &&
			IsNameEqual(L"*", pstack[stack_len-2].name) )
		{
			// removing the last [*] from the stack
			// </li> was put automatically
			PopStack();
		}
	}
}


/*
	bbcode format:
	[bbcodetag=value]some text[/bbcodetag]
	the value can be quoted, e.g.
	[bbcodetag="value"]some text[/bbcodetag], or
	[bbcodetag='value']some text[/bbcodetag]

	the third string below (in tags table) is 'html_argument' from Tags,
	it can contain a special character % followed by a string which means:
	 %1  - "value" escaped as for html
	 %2  - "some text" escaped as for html
	 %u1 - "value" trimmed and escaped as for url-es
	 %u2 - "some text" trimmed and escaped as for url-es
	 %%  - one %

	 if you are using %2 or %u2 then "some text" is not treated as bbcode, e.g.
	 [bbcodetag=value]some [b]text[/b][/bbcodetag] will produce:
	 <htmltag arg="value">some [b]text[/b]</htmltag> (the inner tags [b][/b] were not parsed)

	 also when using %2 or %u2 the closing bbcode tag is skipped
	 (if you want this tag then you can put it in 'html_argument')

	 and when using u (%u1 or %u2) the argument is trimmed from whitespaces and new lines
	 at the beginning and at the end
	 (because otherwise a space would be changed to %20 and this were probably not what you really wanted)
*/
const BBCODEParser::Tags * BBCODEParser::FindTag(const wchar_t * tag)
{
	static Tags tags[] = {
		{L"*",     L"li",    L">",                        false},
		{L"b",     L"em",    L">",                        true},
		{L"i",     L"span",  L" class=\"bbitalic\">",     true},
		{L"u",     L"span",  L" class=\"bbunderline\">",  true},
		{L"s",     L"span",  L" class=\"bbstrike\">",     true},
		{L"code",  L"code",  L" class=\"bbcode\">",       false},
		{L"list",  L"ul",    L" class=\"bblist\">",       false},
		{L"color", L"span",  L" class=\"bbcol%1\">",      true},
		{L"url",   L"a",     L" href=\"%u1\">",           true},
		{L"img",   L"img",   L" alt=\"%1\" src=\"%u2\">", true},
		{L"quote", L"div",   L" class=\"bbquote\">\n<span class=\"bbquotewho\">%1</span><br>\n", false},
	};

	size_t i;
	size_t len = sizeof(tags) / sizeof(Tags);

	for(i=0 ; i<len ; ++i)
	{
		if( Equal(tag, tags[i].bbcode) )
			return &tags[i];
	}

return 0;
}


void BBCODEParser::PrintArgumentCheckQuotes(const wchar_t * & start, const wchar_t * & end)
{
	// skipping white characters from the argument
	while( start<end && IsWhite(*start) )
		++start;

	// skipping first '=' character if exists
	if( start<end && *start == '=' )
		++start;

	// skipping white characters from the argument
	// at the beginning
	while( start<end && IsWhite(*start) )
		++start;

	// and at the end
	while( start<end && IsWhite(*(end-1)) )
		--end;


	if( start<end && (*start=='\'' || *start=='\"') )
	{
		++start;

		if( start<end && *(start-1) == *(end-1) )
			--end;

		// skipping white characters after a first quote char [url  =  "   ww...."]
		while( start<end && IsWhite(*start) )
			++start;
	}
}


void BBCODEParser::PrintEncode(int c)
{
	if( c == '&' )
	{
		(*out_string) += L"&amp;";
	}
	else
	if( (c>='a' && c<='z') ||
		(c>='A' && c<='Z') ||
		(c>='0' && c<='9') ||
		(c=='_' || c=='?' || c=='.' || c==',' || c=='/' || c=='-' ||
		 c=='+' || c=='*' || c=='(' || c==')' || c=='=' || c==':')
		)
	{
		(*out_string) += c;
	}
	else
	{
		wchar_t buffer[20];
		swprintf(buffer, 20, L"%02X", c);

		(*out_string) += '%';
		(*out_string) += buffer;
	}
}


void BBCODEParser::PrintEscape(int c, bool change_quote)
{
	if( c == '<' )
	{
		(*out_string) += L"&lt;";
	}
	else
	if( c == '>' )
	{
		(*out_string) += L"&gt;";
	}
	else
	if( c == '&' )
	{
		(*out_string) += L"&amp;";
	}
	else
	if( c == '\"' && change_quote )
	{
		(*out_string) += L"&quot;";
	}
	else
	{
		(*out_string) += c;
	}
}


void BBCODEParser::PrintArgumentEncode(const wchar_t * start, const wchar_t * end)
{
	PrintArgumentCheckQuotes(start, end);
	TrimWhiteWithNewLines(start, end);

	for( ; start<end ; ++start )
		PrintEncode(*start);
}


void BBCODEParser::PrintArgumentEscape(const wchar_t * start, const wchar_t * end)
{
	PrintArgumentCheckQuotes(start, end);

	for( ; start<end ; ++start )
		PrintEscape(*start, true); // quotes are escaped as well here
}


void BBCODEParser::CheckOpeningTag(const Tags * tag, const wchar_t * tag_name, bool & condition)
{
	if( Equal(tag->html_tag, tag_name) )
	{
		if( condition )
		{
			PutClosingTag(tag);
			(*out_string) += '\n';
		}

		condition = true;
	}
}


void BBCODEParser::CheckOpeningTag(const Tags * tag)
{
	bool has_list_tag = has_open_ul_tag || has_open_ol_tag;

	CheckOpeningTag(tag, L"li", has_open_li_tag);
	CheckOpeningTag(tag, L"ul", has_open_ul_tag);
	CheckOpeningTag(tag, L"ol", has_open_ol_tag);

	if( has_open_li_tag && !has_list_tag )
	{
		(*out_string) += L"<ul>\n";
		has_open_ul_tag = true;
	}
}


void BBCODEParser::PrintEscape(const wchar_t * start, const wchar_t * end, bool change_quote)
{
	for( ; start < end ; ++start)
		PrintEscape(*start, change_quote);
}


void BBCODEParser::PrintEncode(const wchar_t * start, const wchar_t * end)
{
	for( ; start < end ; ++start)
		PrintEncode(*start);
}


void BBCODEParser::PutOpeningTagFromEzc(const wchar_t * start, const wchar_t * end)
{
	// this can be a tag from Ezc templates system
	(*out_string) += '[';
	(*out_string) += LastItem().name;

	if( start != end )
	{
		(*out_string) += ' ';
		PrintEscape(start, end);
	}

	(*out_string) += ']';
}


void BBCODEParser::PutHtmlArgument1(const wchar_t * arg_start, const wchar_t * arg_end, bool has_u)
{
	if( has_u )
		PrintArgumentEncode(arg_start, arg_end);
	else
		PrintArgumentEscape(arg_start, arg_end);
}


void BBCODEParser::TrimWhiteWithNewLines(const wchar_t * & start, const wchar_t * & end)
{
	while( start < end && (IsWhite(*start) || *start==10) )
		++start;

	while( start < end && (IsWhite(*(end-1)) || *(end-1)==10) )
		--end;
}


void BBCODEParser::PutHtmlArgument2(const Tags * tag, bool has_u)
{
const wchar_t * start = pchar;
const wchar_t * end   = pchar;
bool first_tag_removed = false;

	while( *pchar != 0 )
	{
		if( IsOpeningTagMark() )
		{
			if( IsClosingTagForLastItem() )
			{
				// the last tag is skipped when using patterns with %2 or %u2

				PopStack(); // removing opening tag from the stack
				first_tag_removed = true;
				break;
			}
		}
		else
		{
			pchar += 1;
			end = pchar;
		}
	}

	if( !first_tag_removed )
		PopStack(); // user has forgotten to close the tag

	if( has_u )
	{
		TrimWhiteWithNewLines(start, end);
		PrintEncode(start, end);
	}
	else
	{
		PrintEscape(start, end);
	}
}


void BBCODEParser::PutHtmlArgument(const Tags * tag, const wchar_t * arg_start, const wchar_t * arg_end)
{
const wchar_t * pattern = tag->html_argument;
bool has_u;

	while( *pattern )
	{
		if( *pattern == '%' )
		{
			++pattern;
			has_u = false;

			if( *pattern == 'u' )
			{
				++pattern;
				has_u = true;
			}

			if( *pattern == '1' )
			{
				++pattern;
				PutHtmlArgument1(arg_start, arg_end, has_u);
			}
			else
			if( *pattern == '2' )
			{
				++pattern;
				PutHtmlArgument2(tag, has_u);
			}
			else
			if( *pattern == '%' )
			{
				(*out_string) += '%';
				++pattern;
			}
			// else unrecognized, will be printed next time as a normal character
		}
		else
		{
			(*out_string) += *pattern;
			++pattern;
		}
	}
}


void BBCODEParser::PutOpeningTagFromBBCode(const Tags * tag, const wchar_t * start, const wchar_t * end)
{
	CheckOpeningTag(tag);
	PutOpeningTagMark();
	(*out_string) += tag->html_tag;
	PutHtmlArgument(tag, start, end);

	if( !tag->inline_tag )
	{
		(*out_string) += L"\n";
		SkipWhiteLines();
	}
}


void BBCODEParser::PutOpeningTag(const wchar_t * start, const wchar_t * end)
{
	const Tags * tag = FindTag(LastItem().name);

	if( !tag )
	{
		PutOpeningTagFromEzc(start, end);
	}
	else
	{
		PutOpeningTagFromBBCode(tag, start, end);
	}
}


void BBCODEParser::PutClosingTag(const Tags * tag)
{
	if( !tag )
		return; // skipping the tag

	PutOpeningTagMark();
	(*out_string) += '/';
	(*out_string) += tag->html_tag;
	PutClosingTagMark();

	if( !tag->inline_tag )
	{
		(*out_string) += L"\n";
		SkipWhiteLines();
	}

	if( Equal(tag->html_tag, L"li") )
		has_open_li_tag = false;

	if( Equal(tag->html_tag, L"ol") )
		has_open_ol_tag = false;

	if( Equal(tag->html_tag, L"ul") )
		has_open_ul_tag = false;
}


void BBCODEParser::PutClosingTag(const wchar_t * tag_name)
{
	const Tags * tag = FindTag(tag_name);
	PutClosingTag(tag);
}


void BBCODEParser::Init()
{
	has_open_li_tag = false;
	has_open_ol_tag = false;
	has_open_ul_tag = false;

	SkipWhiteLines();
}


void BBCODEParser::Deinit()
{
	if( has_open_li_tag )
		(*out_string) += L"</li>\n";

	if( has_open_ol_tag )
		(*out_string) += L"</ol>\n";

	if( has_open_ul_tag )
		(*out_string) += L"</ul>\n";
}