winix/core/bbcodeparser.cpp

578 lines
11 KiB
C++
Executable File

/*
* This file is a part of Winix
* and is not publicly distributed
*
* Copyright (c) 2008-2010, Tomasz Sowa
* All rights reserved.
*
*/
#include "bbcodeparser.h"
bool BBCODEParser::Equal(const wchar_t * str1, const wchar_t * str2)
{
while( *str1 == *str2 && *str1 != 0 )
{
str1 += 1;
str2 += 1;
}
return *str1 == *str2;
}
bool BBCODEParser::IsValidCharForName(int c)
{
if( (c>='a' && c<='z') ||
(c>='A' && c<='Z') ||
c=='*' || c=='_')
return true;
return false;
}
bool BBCODEParser::IsOpeningTagMark()
{
return (*pchar == '[');
}
// there are no commentaries in bbcode
bool BBCODEParser::IsOpeningCommentaryTagMark()
{
return false;
}
bool BBCODEParser::SkipCommentaryTagIfExists()
{
return false;
}
bool BBCODEParser::IsClosingTagMark()
{
return (*pchar == ']');
}
bool BBCODEParser::IsClosingXmlSimpleTagMark()
{
return false;
}
// one enter will generate one <br>
// two enters or more will generate only two br (<br><br>)
void BBCODEParser::PutNormalText(const wchar_t * str, const wchar_t * end)
{
int br_len;
if( *pchar == 0 )
{
// trimming last white characters at end of the user text
while( str<end && (IsWhite(*(end-1)) || *(end-1)==10) )
--end;
}
while( str < end )
{
if( *str == 10 )
{
++str;
br_len = 1;
// skipping white characters without a new line character
while( str < end && IsWhite(*str) )
++str;
if( str < end && *str == 10 )
{
br_len = 2;
// skipping white characters with new line characters
while( str < end && (IsWhite(*str) || *str==10) )
++str;
}
if( !has_open_ol_tag && !has_open_ul_tag && !has_open_li_tag )
{
for(int i=0 ; i < br_len ; ++i)
(*out_string) += L"<br>\n";
}
}
else
{
PrintEscape(*str);
++str;
}
}
}
void BBCODEParser::PutNormalTextTrim(const wchar_t * str, const wchar_t * end)
{
// we don't use trimming in bbcode parser
PutNormalText(str, end);
}
void BBCODEParser::ReadNormalTextSkipWhite(const wchar_t * & start, const wchar_t * & last_non_white)
{
}
void BBCODEParser::CheckExceptions()
{
if( stack_len >= 2 )
{
if( pstack[stack_len-1].type == Item::opening &&
pstack[stack_len-2].type == Item::opening &&
IsNameEqual(L"*", pstack[stack_len-1].name) &&
IsNameEqual(L"*", pstack[stack_len-2].name) )
{
// removing the last [*] from the stack
// </li> was put automatically
PopStack();
}
}
}
/*
bbcode format:
[bbcodetag=value]some text[/bbcodetag]
the value can be quoted, e.g.
[bbcodetag="value"]some text[/bbcodetag], or
[bbcodetag='value']some text[/bbcodetag]
the third string below (in tags table) is 'html_argument' from Tags,
it can contain a special character % followed by a string which means:
%1 - "value" escaped as for html
%2 - "some text" escaped as for html
%u1 - "value" trimmed and escaped as for url-es
%u2 - "some text" trimmed and escaped as for url-es
%% - one %
if you are using %2 or %u2 then "some text" is not treated as bbcode, e.g.
[bbcodetag=value]some [b]text[/b][/bbcodetag] will produce:
<htmltag arg="value">some [b]text[/b]</htmltag> (the inner tags [b][/b] were not parsed)
also when using %2 or %u2 the closing bbcode tag is skipped
(if you want this tag then you can put it in 'html_argument')
and when using u (%u1 or %u2) the argument is trimmed from whitespaces and new lines
at the beginning and at the end
(because otherwise a space would be changed to %20 and this were probably not what you really wanted)
*/
const BBCODEParser::Tags * BBCODEParser::FindTag(const wchar_t * tag)
{
static Tags tags[] = {
{L"*", L"li", L">", false},
{L"b", L"em", L">", true},
{L"i", L"span", L" class=\"bbitalic\">", true},
{L"u", L"span", L" class=\"bbunderline\">", true},
{L"s", L"span", L" class=\"bbstrike\">", true},
{L"code", L"code", L" class=\"bbcode\">", false},
{L"list", L"ul", L" class=\"bblist\">", false},
{L"color", L"span", L" class=\"bbcol%1\">", true},
{L"url", L"a", L" href=\"%u1\">", true},
{L"img", L"img", L" alt=\"%1\" src=\"%u2\">", true},
{L"quote", L"div", L" class=\"bbquote\">\n<span class=\"bbquotewho\">%1</span><br>\n", false},
};
size_t i;
size_t len = sizeof(tags) / sizeof(Tags);
for(i=0 ; i<len ; ++i)
{
if( Equal(tag, tags[i].bbcode) )
return &tags[i];
}
return 0;
}
void BBCODEParser::PrintArgumentCheckQuotes(const wchar_t * & start, const wchar_t * & end)
{
// skipping white characters from the argument
while( start<end && IsWhite(*start) )
++start;
// skipping first '=' character if exists
if( start<end && *start == '=' )
++start;
// skipping white characters from the argument
// at the beginning
while( start<end && IsWhite(*start) )
++start;
// and at the end
while( start<end && IsWhite(*(end-1)) )
--end;
if( start<end && (*start=='\'' || *start=='\"') )
{
++start;
if( start<end && *(start-1) == *(end-1) )
--end;
// skipping white characters after a first quote char [url = " ww...."]
while( start<end && IsWhite(*start) )
++start;
}
}
void BBCODEParser::PrintEncode(int c)
{
if( c == '&' )
{
(*out_string) += L"&amp;";
}
else
if( (c>='a' && c<='z') ||
(c>='A' && c<='Z') ||
(c>='0' && c<='9') ||
(c=='_' || c=='?' || c=='.' || c==',' || c=='/' || c=='-' ||
c=='+' || c=='*' || c=='(' || c==')' || c=='=' || c==':')
)
{
(*out_string) += c;
}
else
{
wchar_t buffer[20];
swprintf(buffer, 20, L"%02X", c);
(*out_string) += '%';
(*out_string) += buffer;
}
}
void BBCODEParser::PrintEscape(int c, bool change_quote)
{
if( c == '<' )
{
(*out_string) += L"&lt;";
}
else
if( c == '>' )
{
(*out_string) += L"&gt;";
}
else
if( c == '&' )
{
(*out_string) += L"&amp;";
}
else
if( c == '\"' && change_quote )
{
(*out_string) += L"&quot;";
}
else
{
(*out_string) += c;
}
}
void BBCODEParser::PrintArgumentEncode(const wchar_t * start, const wchar_t * end)
{
PrintArgumentCheckQuotes(start, end);
TrimWhiteWithNewLines(start, end);
for( ; start<end ; ++start )
PrintEncode(*start);
}
void BBCODEParser::PrintArgumentEscape(const wchar_t * start, const wchar_t * end)
{
PrintArgumentCheckQuotes(start, end);
for( ; start<end ; ++start )
PrintEscape(*start, true); // quotes are escaped as well here
}
void BBCODEParser::CheckOpeningTag(const Tags * tag, const wchar_t * tag_name, bool & condition)
{
if( Equal(tag->html_tag, tag_name) )
{
if( condition )
{
PutClosingTag(tag);
(*out_string) += '\n';
}
condition = true;
}
}
void BBCODEParser::CheckOpeningTag(const Tags * tag)
{
bool has_list_tag = has_open_ul_tag || has_open_ol_tag;
CheckOpeningTag(tag, L"li", has_open_li_tag);
CheckOpeningTag(tag, L"ul", has_open_ul_tag);
CheckOpeningTag(tag, L"ol", has_open_ol_tag);
if( has_open_li_tag && !has_list_tag )
{
(*out_string) += L"<ul>\n";
has_open_ul_tag = true;
}
}
void BBCODEParser::PrintEscape(const wchar_t * start, const wchar_t * end, bool change_quote)
{
for( ; start < end ; ++start)
PrintEscape(*start, change_quote);
}
void BBCODEParser::PrintEncode(const wchar_t * start, const wchar_t * end)
{
for( ; start < end ; ++start)
PrintEncode(*start);
}
void BBCODEParser::PutOpeningTagFromEzc(const wchar_t * start, const wchar_t * end)
{
// this can be a tag from Ezc templates system
(*out_string) += '[';
(*out_string) += LastItem().name;
if( start != end )
{
(*out_string) += ' ';
PrintEscape(start, end);
}
(*out_string) += ']';
}
void BBCODEParser::PutHtmlArgument1(const wchar_t * arg_start, const wchar_t * arg_end, bool has_u)
{
if( has_u )
PrintArgumentEncode(arg_start, arg_end);
else
PrintArgumentEscape(arg_start, arg_end);
}
void BBCODEParser::TrimWhiteWithNewLines(const wchar_t * & start, const wchar_t * & end)
{
while( start < end && (IsWhite(*start) || *start==10) )
++start;
while( start < end && (IsWhite(*(end-1)) || *(end-1)==10) )
--end;
}
void BBCODEParser::PutHtmlArgument2(const Tags * tag, bool has_u)
{
const wchar_t * start = pchar;
const wchar_t * end = pchar;
bool first_tag_removed = false;
while( *pchar != 0 )
{
if( IsOpeningTagMark() )
{
if( IsClosingTagForLastItem() )
{
// the last tag is skipped when using patterns with %2 or %u2
PopStack(); // removing opening tag from the stack
first_tag_removed = true;
break;
}
}
else
{
pchar += 1;
end = pchar;
}
}
if( !first_tag_removed )
PopStack(); // user has forgotten to close the tag
if( has_u )
{
TrimWhiteWithNewLines(start, end);
PrintEncode(start, end);
}
else
{
PrintEscape(start, end);
}
}
void BBCODEParser::PutHtmlArgument(const Tags * tag, const wchar_t * arg_start, const wchar_t * arg_end)
{
const wchar_t * pattern = tag->html_argument;
bool has_u;
while( *pattern )
{
if( *pattern == '%' )
{
++pattern;
has_u = false;
if( *pattern == 'u' )
{
++pattern;
has_u = true;
}
if( *pattern == '1' )
{
++pattern;
PutHtmlArgument1(arg_start, arg_end, has_u);
}
else
if( *pattern == '2' )
{
++pattern;
PutHtmlArgument2(tag, has_u);
}
else
if( *pattern == '%' )
{
(*out_string) += '%';
++pattern;
}
// else unrecognized, will be printed next time as a normal character
}
else
{
(*out_string) += *pattern;
++pattern;
}
}
}
void BBCODEParser::PutOpeningTagFromBBCode(const Tags * tag, const wchar_t * start, const wchar_t * end)
{
CheckOpeningTag(tag);
PutOpeningTagMark();
(*out_string) += tag->html_tag;
PutHtmlArgument(tag, start, end);
if( !tag->inline_tag )
{
(*out_string) += L"\n";
SkipWhiteLines();
}
}
void BBCODEParser::PutOpeningTag(const wchar_t * start, const wchar_t * end)
{
const Tags * tag = FindTag(LastItem().name);
if( !tag )
{
PutOpeningTagFromEzc(start, end);
}
else
{
PutOpeningTagFromBBCode(tag, start, end);
}
}
void BBCODEParser::PutClosingTag(const Tags * tag)
{
if( !tag )
return; // skipping the tag
PutOpeningTagMark();
(*out_string) += '/';
(*out_string) += tag->html_tag;
PutClosingTagMark();
if( !tag->inline_tag )
{
(*out_string) += L"\n";
SkipWhiteLines();
}
if( Equal(tag->html_tag, L"li") )
has_open_li_tag = false;
if( Equal(tag->html_tag, L"ol") )
has_open_ol_tag = false;
if( Equal(tag->html_tag, L"ul") )
has_open_ul_tag = false;
}
void BBCODEParser::PutClosingTag(const wchar_t * tag_name)
{
const Tags * tag = FindTag(tag_name);
PutClosingTag(tag);
}
void BBCODEParser::Init()
{
has_open_li_tag = false;
has_open_ol_tag = false;
has_open_ul_tag = false;
SkipWhiteLines();
}
void BBCODEParser::Deinit()
{
if( has_open_li_tag )
(*out_string) += L"</li>\n";
if( has_open_ol_tag )
(*out_string) += L"</ol>\n";
if( has_open_ul_tag )
(*out_string) += L"</ul>\n";
}