HTMLFilter has been moved to pikotools library
This commit is contained in:
parent
c5c02d7f44
commit
17bd48ece3
|
@ -35,7 +35,7 @@
|
||||||
#ifndef headerfile_winix_core_bbcodeparser
|
#ifndef headerfile_winix_core_bbcodeparser
|
||||||
#define headerfile_winix_core_bbcodeparser
|
#define headerfile_winix_core_bbcodeparser
|
||||||
|
|
||||||
#include "htmlfilter.h"
|
#include "html/htmlfilter.h"
|
||||||
|
|
||||||
|
|
||||||
namespace Winix
|
namespace Winix
|
||||||
|
@ -43,7 +43,7 @@ namespace Winix
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class BBCODEParser : public HTMLFilter
|
class BBCODEParser : public pt::HTMLFilter
|
||||||
{
|
{
|
||||||
|
|
||||||
struct Tags
|
struct Tags
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,373 +0,0 @@
|
||||||
/*
|
|
||||||
* This file is a part of Winix
|
|
||||||
* and is distributed under the 2-Clause BSD licence.
|
|
||||||
* Author: Tomasz Sowa <t.sowa@ttmath.org>
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Copyright (c) 2008-2018, Tomasz Sowa
|
|
||||||
* All rights reserved.
|
|
||||||
*
|
|
||||||
* Redistribution and use in source and binary forms, with or without
|
|
||||||
* modification, are permitted provided that the following conditions are met:
|
|
||||||
*
|
|
||||||
* 1. Redistributions of source code must retain the above copyright notice,
|
|
||||||
* this list of conditions and the following disclaimer.
|
|
||||||
*
|
|
||||||
* 2. Redistributions in binary form must reproduce the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer in the
|
|
||||||
* documentation and/or other materials provided with the distribution.
|
|
||||||
*
|
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
||||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
||||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
|
||||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
||||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
||||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
||||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
||||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
||||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef headerfile_winix_core_htmlfilter
|
|
||||||
#define headerfile_winix_core_htmlfilter
|
|
||||||
|
|
||||||
#include <string>
|
|
||||||
#include <map>
|
|
||||||
#include <vector>
|
|
||||||
#include <algorithm>
|
|
||||||
#include "core/winixbase.h"
|
|
||||||
|
|
||||||
|
|
||||||
namespace Winix
|
|
||||||
{
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// max length of a name of a html tag (with terminating null)
|
|
||||||
#define WINIX_HTMLFILTER_ITEM_NAME_MAXLEN 30
|
|
||||||
|
|
||||||
// max length of a html lang attribute (e.g. "en", "pl")
|
|
||||||
#define WINIX_HTMLFILTER_ITEM_LANG_MAXLEN 10
|
|
||||||
|
|
||||||
|
|
||||||
#define WINIX_HTMLFILTER_ATTR_NAME_MAXLEN 40
|
|
||||||
|
|
||||||
|
|
||||||
#define WINIX_HTMLFILTER_ATTR_VALUE_MAXLEN 500
|
|
||||||
|
|
||||||
|
|
||||||
// depth of the html tree
|
|
||||||
#define WINIX_HTMLFILTER_STACK_MAXLEN 100
|
|
||||||
|
|
||||||
// length of a buffer used for printing
|
|
||||||
// it should be at least: WINIX_HTMLFILTER_ITEM_NAME_MAXLEN+3
|
|
||||||
#define WINIX_HTMLFILTER_BUFFER_MAXLEN 2048
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*!
|
|
||||||
very lightweight filter for html
|
|
||||||
(without using any dynamic memory - some memory is allocated only at the beginning - in ctors)
|
|
||||||
this filter has O(n) complexity over the whole html string
|
|
||||||
|
|
||||||
such tags as: <script> <pre> <textarea> are treated in a special way
|
|
||||||
all characters between the opening and closing tag (<script>....</script>) are untouched
|
|
||||||
|
|
||||||
if the filter finds that there are not closed tags it will close them,
|
|
||||||
if the filter finds a closing tag which doesn't have an opening tag - it will skip it
|
|
||||||
|
|
||||||
tags which don't need to be closed: meta, input, br, img, link
|
|
||||||
look at CheckExceptions() method
|
|
||||||
|
|
||||||
the filter recognizes xml simple tags (with / at the end) such as: <br />
|
|
||||||
*/
|
|
||||||
class HTMLFilter : public WinixBase
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
|
|
||||||
enum OrphanMode
|
|
||||||
{
|
|
||||||
orphan_nbsp, // putting " " string
|
|
||||||
orphan_160space // putting 160 ascii code
|
|
||||||
};
|
|
||||||
|
|
||||||
HTMLFilter();
|
|
||||||
HTMLFilter(const HTMLFilter & f);
|
|
||||||
HTMLFilter & operator=(const HTMLFilter & f);
|
|
||||||
virtual ~HTMLFilter();
|
|
||||||
|
|
||||||
|
|
||||||
// main methods used for filtering
|
|
||||||
void Filter(const wchar_t * in, std::wstring & out);
|
|
||||||
void Filter(const std::wstring & in, std::wstring & out);
|
|
||||||
|
|
||||||
|
|
||||||
// insert a white space into long words
|
|
||||||
// (only between html tags)
|
|
||||||
// skipped in such tags: script, pre, textarea
|
|
||||||
// break_after - after how many characters insert a space (0 - off)
|
|
||||||
void BreakWord(size_t break_after_);
|
|
||||||
|
|
||||||
// insert a new line character into long lines
|
|
||||||
// (only between html tags)
|
|
||||||
// skipped in such tags: script, pre, textarea
|
|
||||||
// wrap_line - after how many characters wrap a line (0 - off)
|
|
||||||
// lines are wrapped only in 'body' tag (useful for text in 'title' tag which is in 'head' section)
|
|
||||||
void WrapLine(size_t wrap_line_);
|
|
||||||
|
|
||||||
// trimming white characters (with new lines)
|
|
||||||
// at the beginning, at the end and in the middle of a string
|
|
||||||
// only between html tags
|
|
||||||
// at the beginning and at the end only one space is left
|
|
||||||
// skipped in such tags: script, pre, textarea
|
|
||||||
// false by default
|
|
||||||
void TrimWhite(bool trim);
|
|
||||||
|
|
||||||
// first tabs in a tree
|
|
||||||
// default: 2 (spaces)
|
|
||||||
// set 0 to turn off
|
|
||||||
void InsertTabs(size_t tabsize);
|
|
||||||
|
|
||||||
// set a name of a html tag which will be used as 'nofilter' tag
|
|
||||||
// elements between such tags are not filtered (similarly as in <pre> and <textarea>)
|
|
||||||
// these tags (opening and closing) will no be placed in the html output
|
|
||||||
void SetNoFilterTag(const std::wstring & tag_name);
|
|
||||||
|
|
||||||
// orphans are checked only in 'body' tag
|
|
||||||
void AssignOrphans(const wchar_t * lang_code, const std::vector<std::wstring> & otab);
|
|
||||||
void AssignOrphans(const std::wstring & lang_code, const std::vector<std::wstring> & otab);
|
|
||||||
void ClearOrphans();
|
|
||||||
|
|
||||||
// check 'orphans' for the specicic language
|
|
||||||
// if an orphan is detected then the non-break space (" " or ascii 160 code) will be put
|
|
||||||
// default disable (lang_none)
|
|
||||||
void OrphansMode(const std::wstring & orphan_mode);
|
|
||||||
|
|
||||||
// skipping some unsafe tags
|
|
||||||
// (script, iframe, frame, frameset, applet, head, meta, html, link, body, ...)
|
|
||||||
void SafeMode(bool safe_mode_);
|
|
||||||
|
|
||||||
// skip all html tags
|
|
||||||
// gives only text without markup
|
|
||||||
// but there can be commentaries
|
|
||||||
void SkipTags(bool skip_tags);
|
|
||||||
|
|
||||||
// skip commentaries
|
|
||||||
void SkipCommentaries(bool skip_commentaries);
|
|
||||||
|
|
||||||
// if true then entities such as are skipped
|
|
||||||
// this automatically turns on AnalyzeEntities
|
|
||||||
// in such a case FoundEntity callbacks are sent
|
|
||||||
void SkipEntities(bool skip_entities);
|
|
||||||
|
|
||||||
// analyze html entities such as
|
|
||||||
// virtual method: FoundEntity is called
|
|
||||||
// entities are analyzed in normal text and in attribute values such as <p class="a ">
|
|
||||||
void AnalyzeEntities(bool analyze_entities);
|
|
||||||
|
|
||||||
|
|
||||||
protected:
|
|
||||||
|
|
||||||
// orphans for one language
|
|
||||||
struct Orphans
|
|
||||||
{
|
|
||||||
std::vector<std::wstring> tab;
|
|
||||||
size_t max_len;
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
// orphans for all languages
|
|
||||||
// map<language_code, Orphans>
|
|
||||||
typedef std::map<std::wstring, Orphans> OrphansTab;
|
|
||||||
OrphansTab orphans_tab;
|
|
||||||
|
|
||||||
// html <nofilter> tag name
|
|
||||||
std::wstring no_filter_tag;
|
|
||||||
|
|
||||||
|
|
||||||
struct Item
|
|
||||||
{
|
|
||||||
std::wstring name; // max size: WINIX_HTMLFILTER_ITEM_NAME_MAXLEN
|
|
||||||
|
|
||||||
enum Type
|
|
||||||
{
|
|
||||||
opening, /* sample: <h1> */
|
|
||||||
closing, /* sample: </h1> */
|
|
||||||
simple, /* sample: <br/> */
|
|
||||||
special, /* sample: <!doctype> */
|
|
||||||
none
|
|
||||||
} type;
|
|
||||||
|
|
||||||
// is there a new line after this tag
|
|
||||||
bool new_line;
|
|
||||||
|
|
||||||
// current orphans table
|
|
||||||
// (will be propagated)
|
|
||||||
Orphans * porphans;
|
|
||||||
|
|
||||||
// this item or one from its parents is a 'body' html tag
|
|
||||||
// (will be propagated)
|
|
||||||
bool has_body_tag;
|
|
||||||
|
|
||||||
void Clear();
|
|
||||||
Item();
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
virtual methods
|
|
||||||
*/
|
|
||||||
virtual void Init();
|
|
||||||
virtual void Uninit();
|
|
||||||
|
|
||||||
virtual bool IsOpeningTagMark(wchar_t c);
|
|
||||||
virtual bool IsClosingTagMark(wchar_t c);
|
|
||||||
virtual bool IsClosingXmlSimpleTagMark(wchar_t c);
|
|
||||||
virtual bool IsStartingEntityMark(wchar_t c);
|
|
||||||
virtual bool IsEndingEntityMark(wchar_t c);
|
|
||||||
|
|
||||||
virtual bool IsOpeningCommentaryTagMark(const wchar_t * str);
|
|
||||||
virtual size_t OpeningCommentaryTagMarkSize();
|
|
||||||
|
|
||||||
virtual bool IsValidCharForName(int c);
|
|
||||||
virtual bool IsValidCharForAttrName(int c);
|
|
||||||
virtual bool IsValidCharForEntityName(int c);
|
|
||||||
virtual void CheckExceptions();
|
|
||||||
virtual bool SkipCommentaryTagIfExists();
|
|
||||||
|
|
||||||
virtual void Put(wchar_t c);
|
|
||||||
virtual void Put(const wchar_t * str);
|
|
||||||
virtual void Put(const wchar_t * str, const wchar_t * end);
|
|
||||||
virtual void Put(const std::wstring & str);
|
|
||||||
virtual void AnalyzeEntitiesAndPut(const wchar_t * str, const wchar_t * end, std::wstring * out);
|
|
||||||
|
|
||||||
virtual void PutOpeningTagMark();
|
|
||||||
virtual void PutClosingTagMark();
|
|
||||||
virtual bool PutOpeningTag();
|
|
||||||
virtual void PutClosingTag(const wchar_t * tag);
|
|
||||||
|
|
||||||
virtual void PutNormalText(const wchar_t * str, const wchar_t * end);
|
|
||||||
virtual void ReadNormalTextSkipWhite(const wchar_t * & start, const wchar_t * & last_non_white);
|
|
||||||
|
|
||||||
virtual void ItemFound();
|
|
||||||
virtual void EntityFound(const wchar_t * str, const wchar_t * end);
|
|
||||||
|
|
||||||
/*
|
|
||||||
others
|
|
||||||
*/
|
|
||||||
void SetSomeDefaults();
|
|
||||||
|
|
||||||
Item & GetItem(size_t i);
|
|
||||||
Item & LastItem();
|
|
||||||
|
|
||||||
wchar_t ToLower(wchar_t c);
|
|
||||||
void ToLower(std::wstring & str);
|
|
||||||
|
|
||||||
bool IsNameEqual(const wchar_t * name1, const wchar_t * name2);
|
|
||||||
bool IsNameEqual(const wchar_t * name1, const std::wstring & name2);
|
|
||||||
bool IsNameEqual(const std::wstring & name1, const wchar_t * name2);
|
|
||||||
bool IsNameEqual(const std::wstring & name1, const std::wstring & name2);
|
|
||||||
|
|
||||||
bool IsNameEqual(const wchar_t * name1, const wchar_t * name2, size_t len);
|
|
||||||
bool IsNameEqual(const wchar_t * name1, const std::wstring & name2, size_t len);
|
|
||||||
bool IsNameEqual(const std::wstring & name1, const wchar_t * name2, size_t len);
|
|
||||||
bool IsNameEqual(const std::wstring & name1, const std::wstring & name2, size_t len);
|
|
||||||
|
|
||||||
bool IsLastTag(const wchar_t * name);
|
|
||||||
bool IsLastTag(const std::wstring & name);
|
|
||||||
bool IsTagSafe(const wchar_t * tag);
|
|
||||||
bool IsTagSafe(const std::wstring & tag);
|
|
||||||
|
|
||||||
int CheckOrphan(const wchar_t * str, const wchar_t * end, const std::wstring & orphan_str);
|
|
||||||
bool CheckOrphan(const wchar_t * str, const wchar_t * end, const std::vector<std::wstring> & orphans);
|
|
||||||
bool CheckOrphan(const wchar_t * str, const wchar_t * end);
|
|
||||||
|
|
||||||
bool IsWhite(int c);
|
|
||||||
void SkipWhite();
|
|
||||||
void SkipWhiteLines();
|
|
||||||
void SkipWhiteWithFirstNewLine();
|
|
||||||
void SkipWhiteLines(const wchar_t * & str, const wchar_t * end);
|
|
||||||
bool IsClosingTagForLastItem();
|
|
||||||
void SkipAndCheckClosingTag();
|
|
||||||
|
|
||||||
void PopStack();
|
|
||||||
bool PushStack();
|
|
||||||
void CheckNewLine();
|
|
||||||
void CheckStackPrintRest();
|
|
||||||
void AddForgottenTags();
|
|
||||||
void CheckClosingTags();
|
|
||||||
void ReadNormalText();
|
|
||||||
bool PrintRest();
|
|
||||||
bool PrintOpeningItem();
|
|
||||||
void ReadItemName();
|
|
||||||
void ReadItemAttrName();
|
|
||||||
void ReadItemAttrValueAdd(const wchar_t * value_start, const wchar_t * value_end);
|
|
||||||
void ReadItemAttrValue(bool has_quote, wchar_t quote_char);
|
|
||||||
|
|
||||||
bool ReadItemAttr();
|
|
||||||
bool CheckItemAttr();
|
|
||||||
void PrintItemAttr();
|
|
||||||
|
|
||||||
void ReadItemClosing();
|
|
||||||
void ReadItemSpecial();
|
|
||||||
void ReadItemOpening();
|
|
||||||
bool ReadItem();
|
|
||||||
void ReadLoop();
|
|
||||||
void Read();
|
|
||||||
|
|
||||||
void CheckChar(wchar_t c);
|
|
||||||
|
|
||||||
void CheckLineWrap();
|
|
||||||
bool HasEntityEndAround(const wchar_t * str, const wchar_t * end);
|
|
||||||
void PutNormalNonWhite(const wchar_t * & str, const wchar_t * end);
|
|
||||||
void PutNormalWhite(const wchar_t * & str, const wchar_t * end);
|
|
||||||
void PutEverythingUntilClosingTag(bool put_closing_tag_as_well);
|
|
||||||
void PutTabs(size_t len);
|
|
||||||
void PutNonBreakingSpace();
|
|
||||||
void PutNewLine();
|
|
||||||
void CalcOrphansMaxLen(Orphans & orphans);
|
|
||||||
|
|
||||||
const wchar_t * pchar;
|
|
||||||
Item empty;
|
|
||||||
Item * pstack; // stack pointer
|
|
||||||
size_t stack_len; // length of the stack
|
|
||||||
wchar_t * buffer; // buffer used when printing
|
|
||||||
std::wstring * out_string;
|
|
||||||
bool last_new_line;
|
|
||||||
size_t break_after; // insert a space into long words after 'break_after' characters
|
|
||||||
size_t wrap_line; // insert a new line character into long lines
|
|
||||||
bool trim_white; // trimming white characters
|
|
||||||
size_t tab_size;
|
|
||||||
OrphanMode orphan_mode;
|
|
||||||
std::wstring attr_name;
|
|
||||||
std::vector<std::wstring> attr_value;
|
|
||||||
std::wstring attr_value_temp;
|
|
||||||
std::wstring attr_value_lower;
|
|
||||||
bool attr_has_value;
|
|
||||||
std::wstring lang_code_lower;
|
|
||||||
size_t line_len; //length of the current line (without first spaces which create the html tree)
|
|
||||||
bool safe_mode; // skipping some unsafe tags
|
|
||||||
Orphans orphans_temp;
|
|
||||||
bool skip_tags;
|
|
||||||
bool skip_commentaries;
|
|
||||||
bool skip_entities;
|
|
||||||
bool analyze_entities;
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
} // namespace Winix
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#endif
|
|
|
@ -83,7 +83,7 @@
|
||||||
#include "uptime.h"
|
#include "uptime.h"
|
||||||
#include "who.h"
|
#include "who.h"
|
||||||
#include "vim.h"
|
#include "vim.h"
|
||||||
#include "core/htmlfilter.h"
|
#include "html/htmlfilter.h"
|
||||||
#include "core/winixrequest.h"
|
#include "core/winixrequest.h"
|
||||||
|
|
||||||
|
|
||||||
|
@ -205,7 +205,7 @@ private:
|
||||||
SessionManager * session_manager;
|
SessionManager * session_manager;
|
||||||
|
|
||||||
std::wstring temp;
|
std::wstring temp;
|
||||||
HTMLFilter html_filter;
|
pt::HTMLFilter html_filter;
|
||||||
std::wstring link_to_temp;
|
std::wstring link_to_temp;
|
||||||
|
|
||||||
void CreateFunctions();
|
void CreateFunctions();
|
||||||
|
|
|
@ -64,7 +64,7 @@ Ezc::Blocks ezc_blocks;
|
||||||
Ezc::Objects<HtmlTextStream> ezc_objects;
|
Ezc::Objects<HtmlTextStream> ezc_objects;
|
||||||
Ezc::Vars ezc_vars;
|
Ezc::Vars ezc_vars;
|
||||||
LocaleFilter locale_filter;
|
LocaleFilter locale_filter;
|
||||||
HTMLFilter html_filter;
|
pt::HTMLFilter html_filter;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -1070,7 +1070,6 @@ void Templates::set_dependency(WinixRequest * winix_request)
|
||||||
TemplatesFunctions::index_patterns.set_dependency(winix_request);
|
TemplatesFunctions::index_patterns.set_dependency(winix_request);
|
||||||
TemplatesFunctions::pattern_cacher.set_dependency(winix_request);
|
TemplatesFunctions::pattern_cacher.set_dependency(winix_request);
|
||||||
TemplatesFunctions::locale_filter.set_dependency(winix_request);
|
TemplatesFunctions::locale_filter.set_dependency(winix_request);
|
||||||
TemplatesFunctions::html_filter.set_dependency(winix_request);
|
|
||||||
|
|
||||||
TemplatesFunctions::log.SetDependency(&this->log);
|
TemplatesFunctions::log.SetDependency(&this->log);
|
||||||
TemplatesFunctions::plugin = winix_request->get_plugin();
|
TemplatesFunctions::plugin = winix_request->get_plugin();
|
||||||
|
|
|
@ -50,7 +50,7 @@
|
||||||
#include "core/cur.h"
|
#include "core/cur.h"
|
||||||
#include "core/system.h"
|
#include "core/system.h"
|
||||||
#include "core/sessionmanager.h"
|
#include "core/sessionmanager.h"
|
||||||
#include "core/htmlfilter.h"
|
#include "html/htmlfilter.h"
|
||||||
#include "db/db.h"
|
#include "db/db.h"
|
||||||
#include "core/winixrequest.h"
|
#include "core/winixrequest.h"
|
||||||
|
|
||||||
|
@ -79,7 +79,7 @@ namespace TemplatesFunctions
|
||||||
extern Locale locale;
|
extern Locale locale;
|
||||||
extern EzcFun ezc_functions;
|
extern EzcFun ezc_functions;
|
||||||
extern LocaleFilter locale_filter;
|
extern LocaleFilter locale_filter;
|
||||||
extern HTMLFilter html_filter;
|
extern pt::HTMLFilter html_filter;
|
||||||
extern Ezc::Stack empty_stack;
|
extern Ezc::Stack empty_stack;
|
||||||
|
|
||||||
extern Db * db;
|
extern Db * db;
|
||||||
|
|
Loading…
Reference in New Issue