added BBCODEParser (html/bbcodeparser.h|cpp) - copied from winix project

This commit is contained in:
Tomasz Sowa 2021-07-17 13:54:03 +02:00
parent bdb2616f32
commit 2a3f43c5c3
3 changed files with 768 additions and 0 deletions

View File

@ -43,3 +43,4 @@
./mainoptions/mainoptionsparser.o: textstream/stream.h utf8/utf8_templates.h
./mainoptions/mainoptionsparser.o: utf8/utf8_private.h
./html/htmlfilter.o: ./html/htmlfilter.h
./html/bbcodeparser.o: ./html/bbcodeparser.h ./html/htmlfilter.h

639
src/html/bbcodeparser.cpp Normal file
View File

@ -0,0 +1,639 @@
/*
* This file is a part of PikoTools
* and is distributed under the (new) BSD licence.
* Author: Tomasz Sowa <t.sowa@ttmath.org>
*/
/*
* Copyright (c) 2008-2021, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* * Neither the name Tomasz Sowa nor the names of contributors to this
* project may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "bbcodeparser.h"
namespace pt
{
bool BBCODEParser::Equal(const wchar_t * str1, const wchar_t * str2)
{
while( *str1 == *str2 && *str1 != 0 )
{
str1 += 1;
str2 += 1;
}
return *str1 == *str2;
}
bool BBCODEParser::IsValidCharForName(int c)
{
if( (c>='a' && c<='z') ||
(c>='A' && c<='Z') ||
c=='*' || c=='_')
return true;
return false;
}
bool BBCODEParser::IsOpeningTagMark(wchar_t c)
{
return (c == '[');
}
bool BBCODEParser::IsClosingTagMark(wchar_t c)
{
return (c == ']');
}
bool BBCODEParser::IsClosingXmlSimpleTagMark(wchar_t c)
{
return false;
}
// there are no commentaries in bbcode
bool BBCODEParser::IsOpeningCommentaryTagMark(const wchar_t *)
{
return false;
}
size_t BBCODEParser::OpeningCommentaryTagMarkSize()
{
return 0;
}
bool BBCODEParser::SkipCommentaryTagIfExists()
{
return false;
}
// one enter will generate one <br>
// two enters or more will generate only two br (<br><br>)
void BBCODEParser::PutNormalText(const wchar_t * str, const wchar_t * end)
{
int br_len;
if( *pchar == 0 )
{
// trimming last white characters at end of the user text
while( str<end && (IsWhite(*(end-1)) || *(end-1)==10) )
--end;
}
while( str < end )
{
if( *str == 10 )
{
++str;
br_len = 1;
// skipping white characters without a new line character
while( str < end && IsWhite(*str) )
++str;
if( str < end && *str == 10 )
{
br_len = 2;
// skipping white characters with new line characters
while( str < end && (IsWhite(*str) || *str==10) )
++str;
}
if( !has_open_ol_tag && !has_open_ul_tag && !has_open_li_tag )
{
for(int i=0 ; i < br_len ; ++i)
(*out_string) += L"<br>\n";
}
}
else
{
PrintEscape(*str);
++str;
}
}
}
void BBCODEParser::ReadNormalTextSkipWhite(const wchar_t * & start, const wchar_t * & last_non_white)
{
}
void BBCODEParser::CheckExceptions()
{
if( stack_len >= 2 )
{
if( pstack[stack_len-1].type == Item::opening &&
pstack[stack_len-2].type == Item::opening &&
IsNameEqual(L"*", pstack[stack_len-1].name) &&
IsNameEqual(L"*", pstack[stack_len-2].name) )
{
// removing the last [*] from the stack
// </li> was put automatically
PopStack();
}
}
}
/*
bbcode format:
[bbcodetag=value]some text[/bbcodetag]
the value can be quoted, e.g.
[bbcodetag="value"]some text[/bbcodetag], or
[bbcodetag='value']some text[/bbcodetag]
the third string below (in tags table) is 'html_argument' from Tags,
it can contain a special character % followed by a string which means:
%1 - "value" escaped as for html
%2 - "some text" escaped as for html
%u1 - "value" trimmed and escaped as for url-es
%u2 - "some text" trimmed and escaped as for url-es
%% - one %
if you are using %2 or %u2 then "some text" is not treated as bbcode, e.g.
[bbcodetag=value]some [b]text[/b][/bbcodetag] will produce:
<htmltag arg="value">some [b]text[/b]</htmltag> (the inner tags [b][/b] were not parsed)
also when using %2 or %u2 the closing bbcode tag is skipped
(if you want this tag then you can put it in 'html_argument')
and when using u (%u1 or %u2) the argument is trimmed from whitespaces and new lines
at the beginning and at the end
(because otherwise a space would be changed to %20 and this were probably not what you really wanted)
*/
const BBCODEParser::Tags * BBCODEParser::FindTag(const wchar_t * tag)
{
static Tags tags[] = {
{L"*", L"li", L">", false},
{L"b", L"em", L">", true},
{L"i", L"span", L" class=\"bbitalic\">", true},
{L"u", L"span", L" class=\"bbunderline\">", true},
{L"s", L"span", L" class=\"bbstrike\">", true},
{L"code", L"code", L" class=\"bbcode\">", false},
{L"list", L"ul", L" class=\"bblist\">", false},
{L"color", L"span", L" class=\"bbcol%1\">", true},
{L"url", L"a", L" href=\"%u1\">", true},
{L"img", L"img", L" alt=\"%1\" src=\"%u2\">", true},
{L"quote", L"div", L" class=\"bbquote\">\n<span class=\"bbquotewho\">%1</span><br>\n", false},
};
size_t i;
size_t len = sizeof(tags) / sizeof(Tags);
for(i=0 ; i<len ; ++i)
{
if( Equal(tag, tags[i].bbcode) )
return &tags[i];
}
return 0;
}
const BBCODEParser::Tags * BBCODEParser::FindTag(const std::wstring & tag)
{
return FindTag(tag.c_str());
}
void BBCODEParser::PrintArgumentCheckQuotes(const wchar_t * & start, const wchar_t * & end)
{
// skipping white characters from the argument
while( start<end && IsWhite(*start) )
++start;
// skipping first '=' character if exists
if( start<end && *start == '=' )
++start;
// skipping white characters from the argument
// at the beginning
while( start<end && IsWhite(*start) )
++start;
// and at the end
while( start<end && IsWhite(*(end-1)) )
--end;
if( start<end && (*start=='\'' || *start=='\"') )
{
++start;
if( start<end && *(start-1) == *(end-1) )
--end;
// skipping white characters after a first quote char [url = " ww...."]
while( start<end && IsWhite(*start) )
++start;
}
}
void BBCODEParser::PrintEncode(int c)
{
if( c == '&' )
{
(*out_string) += L"&amp;";
}
else
if( (c>='a' && c<='z') ||
(c>='A' && c<='Z') ||
(c>='0' && c<='9') ||
(c=='_' || c=='?' || c=='.' || c==',' || c=='/' || c=='-' ||
c=='+' || c=='*' || c=='(' || c==')' || c=='=' || c==':')
)
{
(*out_string) += c;
}
else
{
wchar_t buffer[20];
swprintf(buffer, 20, L"%02X", c);
(*out_string) += '%';
(*out_string) += buffer;
}
}
void BBCODEParser::PrintEscape(int c, bool change_quote)
{
if( c == '<' )
{
(*out_string) += L"&lt;";
}
else
if( c == '>' )
{
(*out_string) += L"&gt;";
}
else
if( c == '&' )
{
(*out_string) += L"&amp;";
}
else
if( c == '\"' && change_quote )
{
(*out_string) += L"&quot;";
}
else
{
(*out_string) += c;
}
}
void BBCODEParser::PrintArgumentEncode(const wchar_t * start, const wchar_t * end)
{
PrintArgumentCheckQuotes(start, end);
TrimWhiteWithNewLines(start, end);
for( ; start<end ; ++start )
PrintEncode(*start);
}
void BBCODEParser::PrintArgumentEscape(const wchar_t * start, const wchar_t * end)
{
PrintArgumentCheckQuotes(start, end);
for( ; start<end ; ++start )
PrintEscape(*start, true); // quotes are escaped as well here
}
void BBCODEParser::CheckOpeningTag(const Tags * tag, const wchar_t * tag_name, bool & condition)
{
if( Equal(tag->html_tag, tag_name) )
{
if( condition )
{
PutClosingTag(tag);
(*out_string) += '\n';
}
condition = true;
}
}
void BBCODEParser::CheckOpeningTag(const Tags * tag)
{
bool has_list_tag = has_open_ul_tag || has_open_ol_tag;
CheckOpeningTag(tag, L"li", has_open_li_tag);
CheckOpeningTag(tag, L"ul", has_open_ul_tag);
CheckOpeningTag(tag, L"ol", has_open_ol_tag);
if( has_open_li_tag && !has_list_tag )
{
(*out_string) += L"<ul>\n";
has_open_ul_tag = true;
}
}
void BBCODEParser::PrintEscape(const wchar_t * start, const wchar_t * end, bool change_quote)
{
for( ; start < end ; ++start)
PrintEscape(*start, change_quote);
}
void BBCODEParser::PrintEncode(const wchar_t * start, const wchar_t * end)
{
for( ; start < end ; ++start)
PrintEncode(*start);
}
void BBCODEParser::PutOpeningTagFromEzc()
{
// this can be a tag from Ezc templates system
(*out_string) += '[';
(*out_string) += LastItem().name;
const wchar_t * start = pchar;
while( *pchar && *pchar!=']' )
++pchar;
if( *pchar == ']' )
++pchar;
Put(start, pchar);
}
void BBCODEParser::PutHtmlArgument1(const wchar_t * arg_start, const wchar_t * arg_end, bool has_u)
{
if( has_u )
PrintArgumentEncode(arg_start, arg_end);
else
PrintArgumentEscape(arg_start, arg_end);
}
void BBCODEParser::TrimWhiteWithNewLines(const wchar_t * & start, const wchar_t * & end)
{
while( start < end && (IsWhite(*start) || *start==10) )
++start;
while( start < end && (IsWhite(*(end-1)) || *(end-1)==10) )
--end;
}
void BBCODEParser::PutHtmlArgument2(const Tags * tag, bool has_u)
{
const wchar_t * start = pchar;
const wchar_t * end = pchar;
bool first_tag_removed = false;
while( *pchar != 0 )
{
if( IsOpeningTagMark(*pchar) )
{
if( IsClosingTagForLastItem() )
{
// the last tag is skipped when using patterns with %2 or %u2
PopStack(); // removing opening tag from the stack
first_tag_removed = true;
break;
}
}
else
{
pchar += 1;
end = pchar;
}
}
if( !first_tag_removed )
PopStack(); // user has forgotten to close the tag
if( has_u )
{
TrimWhiteWithNewLines(start, end);
PrintEncode(start, end);
}
else
{
PrintEscape(start, end);
}
}
void BBCODEParser::PutHtmlArgument(const Tags * tag, const wchar_t * arg_start, const wchar_t * arg_end)
{
const wchar_t * pattern = tag->html_argument;
bool has_u;
while( *pattern )
{
if( *pattern == '%' )
{
++pattern;
has_u = false;
if( *pattern == 'u' )
{
++pattern;
has_u = true;
}
if( *pattern == '1' )
{
++pattern;
PutHtmlArgument1(arg_start, arg_end, has_u);
}
else
if( *pattern == '2' )
{
++pattern;
PutHtmlArgument2(tag, has_u);
}
else
if( *pattern == '%' )
{
(*out_string) += '%';
++pattern;
}
// else unrecognized, will be printed next time as a normal character
}
else
{
(*out_string) += *pattern;
++pattern;
}
}
}
void BBCODEParser::PutOpeningTagFromBBCode(const Tags * tag)
{
CheckOpeningTag(tag);
PutOpeningTagMark();
Put(tag->html_tag);
const wchar_t * start = pchar;
while( *pchar && *pchar != ']' )
++pchar;
PutHtmlArgument(tag, start, pchar);
if( *pchar == ']' )
++pchar;
if( !tag->inline_tag )
{
Put(10);
SkipWhiteLines();
}
}
bool BBCODEParser::PutOpeningTag()
{
const Tags * tag = FindTag(LastItem().name);
if( !tag )
PutOpeningTagFromEzc();
else
PutOpeningTagFromBBCode(tag);
return false;
}
void BBCODEParser::PutClosingTag(const Tags * tag)
{
if( !tag )
return; // skipping the tag
PutOpeningTagMark();
(*out_string) += '/';
(*out_string) += tag->html_tag;
PutClosingTagMark();
if( !tag->inline_tag )
{
(*out_string) += L"\n";
SkipWhiteLines();
}
if( Equal(tag->html_tag, L"li") )
has_open_li_tag = false;
if( Equal(tag->html_tag, L"ol") )
has_open_ol_tag = false;
if( Equal(tag->html_tag, L"ul") )
has_open_ul_tag = false;
}
void BBCODEParser::PutClosingTag(const wchar_t * tag_name)
{
const Tags * tag = FindTag(tag_name);
PutClosingTag(tag);
}
void BBCODEParser::Init()
{
has_open_li_tag = false;
has_open_ol_tag = false;
has_open_ul_tag = false;
SkipWhiteLines();
}
void BBCODEParser::Uninit()
{
if( has_open_li_tag )
(*out_string) += L"</li>\n";
if( has_open_ol_tag )
(*out_string) += L"</ol>\n";
if( has_open_ul_tag )
(*out_string) += L"</ul>\n";
}
}

128
src/html/bbcodeparser.h Normal file
View File

@ -0,0 +1,128 @@
/*
* This file is a part of PikoTools
* and is distributed under the (new) BSD licence.
* Author: Tomasz Sowa <t.sowa@ttmath.org>
*/
/*
* Copyright (c) 2008-2021, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* * Neither the name Tomasz Sowa nor the names of contributors to this
* project may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef headerfile_winix_core_bbcodeparser
#define headerfile_winix_core_bbcodeparser
#include "htmlfilter.h"
namespace pt
{
class BBCODEParser : public HTMLFilter
{
struct Tags
{
const wchar_t * bbcode;
const wchar_t * html_tag;
const wchar_t * html_argument; // with closing '>'
bool inline_tag;
};
/*
virtual methods
(from HTMLFilter class)
*/
virtual void Init();
virtual void Uninit();
virtual bool IsOpeningTagMark(wchar_t c);
virtual bool IsClosingTagMark(wchar_t c);
virtual bool IsClosingXmlSimpleTagMark(wchar_t c);
virtual bool IsOpeningCommentaryTagMark(const wchar_t *);
virtual size_t OpeningCommentaryTagMarkSize();
virtual bool IsValidCharForName(int c);
virtual void CheckExceptions();
virtual bool SkipCommentaryTagIfExists();
virtual bool PutOpeningTag();
virtual void PutClosingTag(const wchar_t * tag);
virtual void PutNormalText(const wchar_t * str, const wchar_t * end);
virtual void ReadNormalTextSkipWhite(const wchar_t * & start, const wchar_t * & last_non_white);
/*
others
*/
bool Equal(const wchar_t * str1, const wchar_t * str2);
void PutHtmlArgument1(const wchar_t * arg_start, const wchar_t * arg_end, bool has_u);
void PutHtmlArgument2(const Tags * tag, bool has_u);
void PutHtmlArgument(const Tags * tag, const wchar_t * arg_start, const wchar_t * arg_end);
void PutOpeningTagFromEzc();
void PutOpeningTagFromBBCode(const Tags * tag);
const Tags * FindTag(const wchar_t * tag);
const Tags * FindTag(const std::wstring & tag);
void PrintArgumentCheckQuotes(const wchar_t * & start, const wchar_t * & end);
void PrintEscape(int c, bool change_quote = false);
void PrintEncode(int c);
void PrintEscape(const wchar_t * start, const wchar_t * end, bool change_quote = false);
void PrintEncode(const wchar_t * start, const wchar_t * end);
void PrintArgumentEncode(const wchar_t * start, const wchar_t * end);
void PrintArgumentEscape(const wchar_t * start, const wchar_t * end);
void PutClosingTag(const Tags * tag);
void CheckOpeningTag(const Tags * tag, const wchar_t * tag_name, bool & condition);
void CheckOpeningTag(const Tags * tag);
void TrimWhiteWithNewLines(const wchar_t * & start, const wchar_t * & end);
bool has_open_ol_tag; // has open html <ol> tag
bool has_open_ul_tag; // has open html <ul> tag
bool has_open_li_tag; // has open html <li> tag
};
}
#endif