diff --git a/src/Makefile.dep b/src/Makefile.dep index 2a8cf37..16e85d6 100644 --- a/src/Makefile.dep +++ b/src/Makefile.dep @@ -43,3 +43,4 @@ ./mainoptions/mainoptionsparser.o: textstream/stream.h utf8/utf8_templates.h ./mainoptions/mainoptionsparser.o: utf8/utf8_private.h ./html/htmlfilter.o: ./html/htmlfilter.h +./html/bbcodeparser.o: ./html/bbcodeparser.h ./html/htmlfilter.h diff --git a/src/html/bbcodeparser.cpp b/src/html/bbcodeparser.cpp new file mode 100644 index 0000000..0a60273 --- /dev/null +++ b/src/html/bbcodeparser.cpp @@ -0,0 +1,639 @@ +/* + * This file is a part of PikoTools + * and is distributed under the (new) BSD licence. + * Author: Tomasz Sowa + */ + +/* + * Copyright (c) 2008-2021, Tomasz Sowa + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the name Tomasz Sowa nor the names of contributors to this + * project may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "bbcodeparser.h" + + +namespace pt +{ + + + + +bool BBCODEParser::Equal(const wchar_t * str1, const wchar_t * str2) +{ + while( *str1 == *str2 && *str1 != 0 ) + { + str1 += 1; + str2 += 1; + } + +return *str1 == *str2; +} + + + + +bool BBCODEParser::IsValidCharForName(int c) +{ + if( (c>='a' && c<='z') || + (c>='A' && c<='Z') || + c=='*' || c=='_') + return true; + +return false; +} + + +bool BBCODEParser::IsOpeningTagMark(wchar_t c) +{ + return (c == '['); +} + + +bool BBCODEParser::IsClosingTagMark(wchar_t c) +{ + return (c == ']'); +} + +bool BBCODEParser::IsClosingXmlSimpleTagMark(wchar_t c) +{ + return false; +} + + + +// there are no commentaries in bbcode +bool BBCODEParser::IsOpeningCommentaryTagMark(const wchar_t *) +{ + return false; +} + + +size_t BBCODEParser::OpeningCommentaryTagMarkSize() +{ + return 0; +} + + + +bool BBCODEParser::SkipCommentaryTagIfExists() +{ + return false; +} + + + + + + + + + + +// one enter will generate one
+// two enters or more will generate only two br (

) +void BBCODEParser::PutNormalText(const wchar_t * str, const wchar_t * end) +{ +int br_len; + + if( *pchar == 0 ) + { + // trimming last white characters at end of the user text + while( str\n"; + } + } + else + { + PrintEscape(*str); + ++str; + } + } +} + + + +void BBCODEParser::ReadNormalTextSkipWhite(const wchar_t * & start, const wchar_t * & last_non_white) +{ +} + + +void BBCODEParser::CheckExceptions() +{ + if( stack_len >= 2 ) + { + if( pstack[stack_len-1].type == Item::opening && + pstack[stack_len-2].type == Item::opening && + IsNameEqual(L"*", pstack[stack_len-1].name) && + IsNameEqual(L"*", pstack[stack_len-2].name) ) + { + // removing the last [*] from the stack + // was put automatically + PopStack(); + } + } +} + + + + +/* + bbcode format: + [bbcodetag=value]some text[/bbcodetag] + the value can be quoted, e.g. + [bbcodetag="value"]some text[/bbcodetag], or + [bbcodetag='value']some text[/bbcodetag] + + the third string below (in tags table) is 'html_argument' from Tags, + it can contain a special character % followed by a string which means: + %1 - "value" escaped as for html + %2 - "some text" escaped as for html + %u1 - "value" trimmed and escaped as for url-es + %u2 - "some text" trimmed and escaped as for url-es + %% - one % + + if you are using %2 or %u2 then "some text" is not treated as bbcode, e.g. + [bbcodetag=value]some [b]text[/b][/bbcodetag] will produce: + some [b]text[/b] (the inner tags [b][/b] were not parsed) + + also when using %2 or %u2 the closing bbcode tag is skipped + (if you want this tag then you can put it in 'html_argument') + + and when using u (%u1 or %u2) the argument is trimmed from whitespaces and new lines + at the beginning and at the end + (because otherwise a space would be changed to %20 and this were probably not what you really wanted) +*/ +const BBCODEParser::Tags * BBCODEParser::FindTag(const wchar_t * tag) +{ + static Tags tags[] = { + {L"*", L"li", L">", false}, + {L"b", L"em", L">", true}, + {L"i", L"span", L" class=\"bbitalic\">", true}, + {L"u", L"span", L" class=\"bbunderline\">", true}, + {L"s", L"span", L" class=\"bbstrike\">", true}, + {L"code", L"code", L" class=\"bbcode\">", false}, + {L"list", L"ul", L" class=\"bblist\">", false}, + {L"color", L"span", L" class=\"bbcol%1\">", true}, + {L"url", L"a", L" href=\"%u1\">", true}, + {L"img", L"img", L" alt=\"%1\" src=\"%u2\">", true}, + {L"quote", L"div", L" class=\"bbquote\">\n%1
\n", false}, + }; + + size_t i; + size_t len = sizeof(tags) / sizeof(Tags); + + for(i=0 ; i='a' && c<='z') || + (c>='A' && c<='Z') || + (c>='0' && c<='9') || + (c=='_' || c=='?' || c=='.' || c==',' || c=='/' || c=='-' || + c=='+' || c=='*' || c=='(' || c==')' || c=='=' || c==':') + ) + { + (*out_string) += c; + } + else + { + wchar_t buffer[20]; + swprintf(buffer, 20, L"%02X", c); + + (*out_string) += '%'; + (*out_string) += buffer; + } +} + + +void BBCODEParser::PrintEscape(int c, bool change_quote) +{ + if( c == '<' ) + { + (*out_string) += L"<"; + } + else + if( c == '>' ) + { + (*out_string) += L">"; + } + else + if( c == '&' ) + { + (*out_string) += L"&"; + } + else + if( c == '\"' && change_quote ) + { + (*out_string) += L"""; + } + else + { + (*out_string) += c; + } +} + + +void BBCODEParser::PrintArgumentEncode(const wchar_t * start, const wchar_t * end) +{ + PrintArgumentCheckQuotes(start, end); + TrimWhiteWithNewLines(start, end); + + for( ; starthtml_tag, tag_name) ) + { + if( condition ) + { + PutClosingTag(tag); + (*out_string) += '\n'; + } + + condition = true; + } +} + + +void BBCODEParser::CheckOpeningTag(const Tags * tag) +{ + bool has_list_tag = has_open_ul_tag || has_open_ol_tag; + + CheckOpeningTag(tag, L"li", has_open_li_tag); + CheckOpeningTag(tag, L"ul", has_open_ul_tag); + CheckOpeningTag(tag, L"ol", has_open_ol_tag); + + if( has_open_li_tag && !has_list_tag ) + { + (*out_string) += L"
    \n"; + has_open_ul_tag = true; + } +} + + + + + +void BBCODEParser::PrintEscape(const wchar_t * start, const wchar_t * end, bool change_quote) +{ + for( ; start < end ; ++start) + PrintEscape(*start, change_quote); +} + + + +void BBCODEParser::PrintEncode(const wchar_t * start, const wchar_t * end) +{ + for( ; start < end ; ++start) + PrintEncode(*start); +} + + + +void BBCODEParser::PutOpeningTagFromEzc() +{ + // this can be a tag from Ezc templates system + (*out_string) += '['; + (*out_string) += LastItem().name; + + const wchar_t * start = pchar; + + while( *pchar && *pchar!=']' ) + ++pchar; + + if( *pchar == ']' ) + ++pchar; + + Put(start, pchar); +} + + + + + +void BBCODEParser::PutHtmlArgument1(const wchar_t * arg_start, const wchar_t * arg_end, bool has_u) +{ + if( has_u ) + PrintArgumentEncode(arg_start, arg_end); + else + PrintArgumentEscape(arg_start, arg_end); +} + + + +void BBCODEParser::TrimWhiteWithNewLines(const wchar_t * & start, const wchar_t * & end) +{ + while( start < end && (IsWhite(*start) || *start==10) ) + ++start; + + while( start < end && (IsWhite(*(end-1)) || *(end-1)==10) ) + --end; +} + + + +void BBCODEParser::PutHtmlArgument2(const Tags * tag, bool has_u) +{ +const wchar_t * start = pchar; +const wchar_t * end = pchar; +bool first_tag_removed = false; + + while( *pchar != 0 ) + { + if( IsOpeningTagMark(*pchar) ) + { + if( IsClosingTagForLastItem() ) + { + // the last tag is skipped when using patterns with %2 or %u2 + + PopStack(); // removing opening tag from the stack + first_tag_removed = true; + break; + } + } + else + { + pchar += 1; + end = pchar; + } + } + + if( !first_tag_removed ) + PopStack(); // user has forgotten to close the tag + + if( has_u ) + { + TrimWhiteWithNewLines(start, end); + PrintEncode(start, end); + } + else + { + PrintEscape(start, end); + } +} + + + +void BBCODEParser::PutHtmlArgument(const Tags * tag, const wchar_t * arg_start, const wchar_t * arg_end) +{ +const wchar_t * pattern = tag->html_argument; +bool has_u; + + while( *pattern ) + { + if( *pattern == '%' ) + { + ++pattern; + has_u = false; + + if( *pattern == 'u' ) + { + ++pattern; + has_u = true; + } + + if( *pattern == '1' ) + { + ++pattern; + PutHtmlArgument1(arg_start, arg_end, has_u); + } + else + if( *pattern == '2' ) + { + ++pattern; + PutHtmlArgument2(tag, has_u); + } + else + if( *pattern == '%' ) + { + (*out_string) += '%'; + ++pattern; + } + // else unrecognized, will be printed next time as a normal character + } + else + { + (*out_string) += *pattern; + ++pattern; + } + } +} + + +void BBCODEParser::PutOpeningTagFromBBCode(const Tags * tag) +{ + CheckOpeningTag(tag); + PutOpeningTagMark(); + Put(tag->html_tag); + + const wchar_t * start = pchar; + + while( *pchar && *pchar != ']' ) + ++pchar; + + PutHtmlArgument(tag, start, pchar); + + if( *pchar == ']' ) + ++pchar; + + if( !tag->inline_tag ) + { + Put(10); + SkipWhiteLines(); + } +} + + +bool BBCODEParser::PutOpeningTag() +{ + const Tags * tag = FindTag(LastItem().name); + + if( !tag ) + PutOpeningTagFromEzc(); + else + PutOpeningTagFromBBCode(tag); + +return false; +} + + +void BBCODEParser::PutClosingTag(const Tags * tag) +{ + if( !tag ) + return; // skipping the tag + + PutOpeningTagMark(); + (*out_string) += '/'; + (*out_string) += tag->html_tag; + PutClosingTagMark(); + + if( !tag->inline_tag ) + { + (*out_string) += L"\n"; + SkipWhiteLines(); + } + + if( Equal(tag->html_tag, L"li") ) + has_open_li_tag = false; + + if( Equal(tag->html_tag, L"ol") ) + has_open_ol_tag = false; + + if( Equal(tag->html_tag, L"ul") ) + has_open_ul_tag = false; +} + + +void BBCODEParser::PutClosingTag(const wchar_t * tag_name) +{ + const Tags * tag = FindTag(tag_name); + PutClosingTag(tag); +} + + + +void BBCODEParser::Init() +{ + has_open_li_tag = false; + has_open_ol_tag = false; + has_open_ul_tag = false; + + SkipWhiteLines(); +} + + +void BBCODEParser::Uninit() +{ + if( has_open_li_tag ) + (*out_string) += L"\n"; + + if( has_open_ol_tag ) + (*out_string) += L"\n"; + + if( has_open_ul_tag ) + (*out_string) += L"
\n"; +} + + + +} + diff --git a/src/html/bbcodeparser.h b/src/html/bbcodeparser.h new file mode 100644 index 0000000..bd36e4d --- /dev/null +++ b/src/html/bbcodeparser.h @@ -0,0 +1,128 @@ +/* + * This file is a part of PikoTools + * and is distributed under the (new) BSD licence. + * Author: Tomasz Sowa + */ + +/* + * Copyright (c) 2008-2021, Tomasz Sowa + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the name Tomasz Sowa nor the names of contributors to this + * project may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef headerfile_winix_core_bbcodeparser +#define headerfile_winix_core_bbcodeparser + +#include "htmlfilter.h" + +namespace pt +{ + + +class BBCODEParser : public HTMLFilter +{ + + struct Tags + { + const wchar_t * bbcode; + const wchar_t * html_tag; + const wchar_t * html_argument; // with closing '>' + bool inline_tag; + }; + + + /* + virtual methods + (from HTMLFilter class) + */ + virtual void Init(); + virtual void Uninit(); + + virtual bool IsOpeningTagMark(wchar_t c); + virtual bool IsClosingTagMark(wchar_t c); + virtual bool IsClosingXmlSimpleTagMark(wchar_t c); + + virtual bool IsOpeningCommentaryTagMark(const wchar_t *); + virtual size_t OpeningCommentaryTagMarkSize(); + + virtual bool IsValidCharForName(int c); + virtual void CheckExceptions(); + virtual bool SkipCommentaryTagIfExists(); + + virtual bool PutOpeningTag(); + virtual void PutClosingTag(const wchar_t * tag); + + virtual void PutNormalText(const wchar_t * str, const wchar_t * end); + virtual void ReadNormalTextSkipWhite(const wchar_t * & start, const wchar_t * & last_non_white); + + + + /* + others + */ + bool Equal(const wchar_t * str1, const wchar_t * str2); + + void PutHtmlArgument1(const wchar_t * arg_start, const wchar_t * arg_end, bool has_u); + void PutHtmlArgument2(const Tags * tag, bool has_u); + void PutHtmlArgument(const Tags * tag, const wchar_t * arg_start, const wchar_t * arg_end); + + void PutOpeningTagFromEzc(); + void PutOpeningTagFromBBCode(const Tags * tag); + + const Tags * FindTag(const wchar_t * tag); + const Tags * FindTag(const std::wstring & tag); + void PrintArgumentCheckQuotes(const wchar_t * & start, const wchar_t * & end); + + void PrintEscape(int c, bool change_quote = false); + void PrintEncode(int c); + + void PrintEscape(const wchar_t * start, const wchar_t * end, bool change_quote = false); + void PrintEncode(const wchar_t * start, const wchar_t * end); + + void PrintArgumentEncode(const wchar_t * start, const wchar_t * end); + void PrintArgumentEscape(const wchar_t * start, const wchar_t * end); + + void PutClosingTag(const Tags * tag); + + void CheckOpeningTag(const Tags * tag, const wchar_t * tag_name, bool & condition); + void CheckOpeningTag(const Tags * tag); + + void TrimWhiteWithNewLines(const wchar_t * & start, const wchar_t * & end); + + + + bool has_open_ol_tag; // has open html
    tag + bool has_open_ul_tag; // has open html
      tag + bool has_open_li_tag; // has open html
    • tag +}; + + +} + + +#endif