HTMLFilter has been moved to pikotools library
This commit is contained in:
parent
c5c02d7f44
commit
17bd48ece3
|
@ -35,7 +35,7 @@
|
|||
#ifndef headerfile_winix_core_bbcodeparser
|
||||
#define headerfile_winix_core_bbcodeparser
|
||||
|
||||
#include "htmlfilter.h"
|
||||
#include "html/htmlfilter.h"
|
||||
|
||||
|
||||
namespace Winix
|
||||
|
@ -43,7 +43,7 @@ namespace Winix
|
|||
|
||||
|
||||
|
||||
class BBCODEParser : public HTMLFilter
|
||||
class BBCODEParser : public pt::HTMLFilter
|
||||
{
|
||||
|
||||
struct Tags
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,373 +0,0 @@
|
|||
/*
|
||||
* This file is a part of Winix
|
||||
* and is distributed under the 2-Clause BSD licence.
|
||||
* Author: Tomasz Sowa <t.sowa@ttmath.org>
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2008-2018, Tomasz Sowa
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef headerfile_winix_core_htmlfilter
|
||||
#define headerfile_winix_core_htmlfilter
|
||||
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include "core/winixbase.h"
|
||||
|
||||
|
||||
namespace Winix
|
||||
{
|
||||
|
||||
|
||||
|
||||
|
||||
// max length of a name of a html tag (with terminating null)
|
||||
#define WINIX_HTMLFILTER_ITEM_NAME_MAXLEN 30
|
||||
|
||||
// max length of a html lang attribute (e.g. "en", "pl")
|
||||
#define WINIX_HTMLFILTER_ITEM_LANG_MAXLEN 10
|
||||
|
||||
|
||||
#define WINIX_HTMLFILTER_ATTR_NAME_MAXLEN 40
|
||||
|
||||
|
||||
#define WINIX_HTMLFILTER_ATTR_VALUE_MAXLEN 500
|
||||
|
||||
|
||||
// depth of the html tree
|
||||
#define WINIX_HTMLFILTER_STACK_MAXLEN 100
|
||||
|
||||
// length of a buffer used for printing
|
||||
// it should be at least: WINIX_HTMLFILTER_ITEM_NAME_MAXLEN+3
|
||||
#define WINIX_HTMLFILTER_BUFFER_MAXLEN 2048
|
||||
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
very lightweight filter for html
|
||||
(without using any dynamic memory - some memory is allocated only at the beginning - in ctors)
|
||||
this filter has O(n) complexity over the whole html string
|
||||
|
||||
such tags as: <script> <pre> <textarea> are treated in a special way
|
||||
all characters between the opening and closing tag (<script>....</script>) are untouched
|
||||
|
||||
if the filter finds that there are not closed tags it will close them,
|
||||
if the filter finds a closing tag which doesn't have an opening tag - it will skip it
|
||||
|
||||
tags which don't need to be closed: meta, input, br, img, link
|
||||
look at CheckExceptions() method
|
||||
|
||||
the filter recognizes xml simple tags (with / at the end) such as: <br />
|
||||
*/
|
||||
class HTMLFilter : public WinixBase
|
||||
{
|
||||
public:
|
||||
|
||||
enum OrphanMode
|
||||
{
|
||||
orphan_nbsp, // putting " " string
|
||||
orphan_160space // putting 160 ascii code
|
||||
};
|
||||
|
||||
HTMLFilter();
|
||||
HTMLFilter(const HTMLFilter & f);
|
||||
HTMLFilter & operator=(const HTMLFilter & f);
|
||||
virtual ~HTMLFilter();
|
||||
|
||||
|
||||
// main methods used for filtering
|
||||
void Filter(const wchar_t * in, std::wstring & out);
|
||||
void Filter(const std::wstring & in, std::wstring & out);
|
||||
|
||||
|
||||
// insert a white space into long words
|
||||
// (only between html tags)
|
||||
// skipped in such tags: script, pre, textarea
|
||||
// break_after - after how many characters insert a space (0 - off)
|
||||
void BreakWord(size_t break_after_);
|
||||
|
||||
// insert a new line character into long lines
|
||||
// (only between html tags)
|
||||
// skipped in such tags: script, pre, textarea
|
||||
// wrap_line - after how many characters wrap a line (0 - off)
|
||||
// lines are wrapped only in 'body' tag (useful for text in 'title' tag which is in 'head' section)
|
||||
void WrapLine(size_t wrap_line_);
|
||||
|
||||
// trimming white characters (with new lines)
|
||||
// at the beginning, at the end and in the middle of a string
|
||||
// only between html tags
|
||||
// at the beginning and at the end only one space is left
|
||||
// skipped in such tags: script, pre, textarea
|
||||
// false by default
|
||||
void TrimWhite(bool trim);
|
||||
|
||||
// first tabs in a tree
|
||||
// default: 2 (spaces)
|
||||
// set 0 to turn off
|
||||
void InsertTabs(size_t tabsize);
|
||||
|
||||
// set a name of a html tag which will be used as 'nofilter' tag
|
||||
// elements between such tags are not filtered (similarly as in <pre> and <textarea>)
|
||||
// these tags (opening and closing) will no be placed in the html output
|
||||
void SetNoFilterTag(const std::wstring & tag_name);
|
||||
|
||||
// orphans are checked only in 'body' tag
|
||||
void AssignOrphans(const wchar_t * lang_code, const std::vector<std::wstring> & otab);
|
||||
void AssignOrphans(const std::wstring & lang_code, const std::vector<std::wstring> & otab);
|
||||
void ClearOrphans();
|
||||
|
||||
// check 'orphans' for the specicic language
|
||||
// if an orphan is detected then the non-break space (" " or ascii 160 code) will be put
|
||||
// default disable (lang_none)
|
||||
void OrphansMode(const std::wstring & orphan_mode);
|
||||
|
||||
// skipping some unsafe tags
|
||||
// (script, iframe, frame, frameset, applet, head, meta, html, link, body, ...)
|
||||
void SafeMode(bool safe_mode_);
|
||||
|
||||
// skip all html tags
|
||||
// gives only text without markup
|
||||
// but there can be commentaries
|
||||
void SkipTags(bool skip_tags);
|
||||
|
||||
// skip commentaries
|
||||
void SkipCommentaries(bool skip_commentaries);
|
||||
|
||||
// if true then entities such as are skipped
|
||||
// this automatically turns on AnalyzeEntities
|
||||
// in such a case FoundEntity callbacks are sent
|
||||
void SkipEntities(bool skip_entities);
|
||||
|
||||
// analyze html entities such as
|
||||
// virtual method: FoundEntity is called
|
||||
// entities are analyzed in normal text and in attribute values such as <p class="a ">
|
||||
void AnalyzeEntities(bool analyze_entities);
|
||||
|
||||
|
||||
protected:
|
||||
|
||||
// orphans for one language
|
||||
struct Orphans
|
||||
{
|
||||
std::vector<std::wstring> tab;
|
||||
size_t max_len;
|
||||
};
|
||||
|
||||
|
||||
// orphans for all languages
|
||||
// map<language_code, Orphans>
|
||||
typedef std::map<std::wstring, Orphans> OrphansTab;
|
||||
OrphansTab orphans_tab;
|
||||
|
||||
// html <nofilter> tag name
|
||||
std::wstring no_filter_tag;
|
||||
|
||||
|
||||
struct Item
|
||||
{
|
||||
std::wstring name; // max size: WINIX_HTMLFILTER_ITEM_NAME_MAXLEN
|
||||
|
||||
enum Type
|
||||
{
|
||||
opening, /* sample: <h1> */
|
||||
closing, /* sample: </h1> */
|
||||
simple, /* sample: <br/> */
|
||||
special, /* sample: <!doctype> */
|
||||
none
|
||||
} type;
|
||||
|
||||
// is there a new line after this tag
|
||||
bool new_line;
|
||||
|
||||
// current orphans table
|
||||
// (will be propagated)
|
||||
Orphans * porphans;
|
||||
|
||||
// this item or one from its parents is a 'body' html tag
|
||||
// (will be propagated)
|
||||
bool has_body_tag;
|
||||
|
||||
void Clear();
|
||||
Item();
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/*
|
||||
virtual methods
|
||||
*/
|
||||
virtual void Init();
|
||||
virtual void Uninit();
|
||||
|
||||
virtual bool IsOpeningTagMark(wchar_t c);
|
||||
virtual bool IsClosingTagMark(wchar_t c);
|
||||
virtual bool IsClosingXmlSimpleTagMark(wchar_t c);
|
||||
virtual bool IsStartingEntityMark(wchar_t c);
|
||||
virtual bool IsEndingEntityMark(wchar_t c);
|
||||
|
||||
virtual bool IsOpeningCommentaryTagMark(const wchar_t * str);
|
||||
virtual size_t OpeningCommentaryTagMarkSize();
|
||||
|
||||
virtual bool IsValidCharForName(int c);
|
||||
virtual bool IsValidCharForAttrName(int c);
|
||||
virtual bool IsValidCharForEntityName(int c);
|
||||
virtual void CheckExceptions();
|
||||
virtual bool SkipCommentaryTagIfExists();
|
||||
|
||||
virtual void Put(wchar_t c);
|
||||
virtual void Put(const wchar_t * str);
|
||||
virtual void Put(const wchar_t * str, const wchar_t * end);
|
||||
virtual void Put(const std::wstring & str);
|
||||
virtual void AnalyzeEntitiesAndPut(const wchar_t * str, const wchar_t * end, std::wstring * out);
|
||||
|
||||
virtual void PutOpeningTagMark();
|
||||
virtual void PutClosingTagMark();
|
||||
virtual bool PutOpeningTag();
|
||||
virtual void PutClosingTag(const wchar_t * tag);
|
||||
|
||||
virtual void PutNormalText(const wchar_t * str, const wchar_t * end);
|
||||
virtual void ReadNormalTextSkipWhite(const wchar_t * & start, const wchar_t * & last_non_white);
|
||||
|
||||
virtual void ItemFound();
|
||||
virtual void EntityFound(const wchar_t * str, const wchar_t * end);
|
||||
|
||||
/*
|
||||
others
|
||||
*/
|
||||
void SetSomeDefaults();
|
||||
|
||||
Item & GetItem(size_t i);
|
||||
Item & LastItem();
|
||||
|
||||
wchar_t ToLower(wchar_t c);
|
||||
void ToLower(std::wstring & str);
|
||||
|
||||
bool IsNameEqual(const wchar_t * name1, const wchar_t * name2);
|
||||
bool IsNameEqual(const wchar_t * name1, const std::wstring & name2);
|
||||
bool IsNameEqual(const std::wstring & name1, const wchar_t * name2);
|
||||
bool IsNameEqual(const std::wstring & name1, const std::wstring & name2);
|
||||
|
||||
bool IsNameEqual(const wchar_t * name1, const wchar_t * name2, size_t len);
|
||||
bool IsNameEqual(const wchar_t * name1, const std::wstring & name2, size_t len);
|
||||
bool IsNameEqual(const std::wstring & name1, const wchar_t * name2, size_t len);
|
||||
bool IsNameEqual(const std::wstring & name1, const std::wstring & name2, size_t len);
|
||||
|
||||
bool IsLastTag(const wchar_t * name);
|
||||
bool IsLastTag(const std::wstring & name);
|
||||
bool IsTagSafe(const wchar_t * tag);
|
||||
bool IsTagSafe(const std::wstring & tag);
|
||||
|
||||
int CheckOrphan(const wchar_t * str, const wchar_t * end, const std::wstring & orphan_str);
|
||||
bool CheckOrphan(const wchar_t * str, const wchar_t * end, const std::vector<std::wstring> & orphans);
|
||||
bool CheckOrphan(const wchar_t * str, const wchar_t * end);
|
||||
|
||||
bool IsWhite(int c);
|
||||
void SkipWhite();
|
||||
void SkipWhiteLines();
|
||||
void SkipWhiteWithFirstNewLine();
|
||||
void SkipWhiteLines(const wchar_t * & str, const wchar_t * end);
|
||||
bool IsClosingTagForLastItem();
|
||||
void SkipAndCheckClosingTag();
|
||||
|
||||
void PopStack();
|
||||
bool PushStack();
|
||||
void CheckNewLine();
|
||||
void CheckStackPrintRest();
|
||||
void AddForgottenTags();
|
||||
void CheckClosingTags();
|
||||
void ReadNormalText();
|
||||
bool PrintRest();
|
||||
bool PrintOpeningItem();
|
||||
void ReadItemName();
|
||||
void ReadItemAttrName();
|
||||
void ReadItemAttrValueAdd(const wchar_t * value_start, const wchar_t * value_end);
|
||||
void ReadItemAttrValue(bool has_quote, wchar_t quote_char);
|
||||
|
||||
bool ReadItemAttr();
|
||||
bool CheckItemAttr();
|
||||
void PrintItemAttr();
|
||||
|
||||
void ReadItemClosing();
|
||||
void ReadItemSpecial();
|
||||
void ReadItemOpening();
|
||||
bool ReadItem();
|
||||
void ReadLoop();
|
||||
void Read();
|
||||
|
||||
void CheckChar(wchar_t c);
|
||||
|
||||
void CheckLineWrap();
|
||||
bool HasEntityEndAround(const wchar_t * str, const wchar_t * end);
|
||||
void PutNormalNonWhite(const wchar_t * & str, const wchar_t * end);
|
||||
void PutNormalWhite(const wchar_t * & str, const wchar_t * end);
|
||||
void PutEverythingUntilClosingTag(bool put_closing_tag_as_well);
|
||||
void PutTabs(size_t len);
|
||||
void PutNonBreakingSpace();
|
||||
void PutNewLine();
|
||||
void CalcOrphansMaxLen(Orphans & orphans);
|
||||
|
||||
const wchar_t * pchar;
|
||||
Item empty;
|
||||
Item * pstack; // stack pointer
|
||||
size_t stack_len; // length of the stack
|
||||
wchar_t * buffer; // buffer used when printing
|
||||
std::wstring * out_string;
|
||||
bool last_new_line;
|
||||
size_t break_after; // insert a space into long words after 'break_after' characters
|
||||
size_t wrap_line; // insert a new line character into long lines
|
||||
bool trim_white; // trimming white characters
|
||||
size_t tab_size;
|
||||
OrphanMode orphan_mode;
|
||||
std::wstring attr_name;
|
||||
std::vector<std::wstring> attr_value;
|
||||
std::wstring attr_value_temp;
|
||||
std::wstring attr_value_lower;
|
||||
bool attr_has_value;
|
||||
std::wstring lang_code_lower;
|
||||
size_t line_len; //length of the current line (without first spaces which create the html tree)
|
||||
bool safe_mode; // skipping some unsafe tags
|
||||
Orphans orphans_temp;
|
||||
bool skip_tags;
|
||||
bool skip_commentaries;
|
||||
bool skip_entities;
|
||||
bool analyze_entities;
|
||||
};
|
||||
|
||||
|
||||
|
||||
} // namespace Winix
|
||||
|
||||
|
||||
|
||||
#endif
|
|
@ -83,7 +83,7 @@
|
|||
#include "uptime.h"
|
||||
#include "who.h"
|
||||
#include "vim.h"
|
||||
#include "core/htmlfilter.h"
|
||||
#include "html/htmlfilter.h"
|
||||
#include "core/winixrequest.h"
|
||||
|
||||
|
||||
|
@ -205,7 +205,7 @@ private:
|
|||
SessionManager * session_manager;
|
||||
|
||||
std::wstring temp;
|
||||
HTMLFilter html_filter;
|
||||
pt::HTMLFilter html_filter;
|
||||
std::wstring link_to_temp;
|
||||
|
||||
void CreateFunctions();
|
||||
|
|
|
@ -64,7 +64,7 @@ Ezc::Blocks ezc_blocks;
|
|||
Ezc::Objects<HtmlTextStream> ezc_objects;
|
||||
Ezc::Vars ezc_vars;
|
||||
LocaleFilter locale_filter;
|
||||
HTMLFilter html_filter;
|
||||
pt::HTMLFilter html_filter;
|
||||
|
||||
|
||||
|
||||
|
@ -1070,7 +1070,6 @@ void Templates::set_dependency(WinixRequest * winix_request)
|
|||
TemplatesFunctions::index_patterns.set_dependency(winix_request);
|
||||
TemplatesFunctions::pattern_cacher.set_dependency(winix_request);
|
||||
TemplatesFunctions::locale_filter.set_dependency(winix_request);
|
||||
TemplatesFunctions::html_filter.set_dependency(winix_request);
|
||||
|
||||
TemplatesFunctions::log.SetDependency(&this->log);
|
||||
TemplatesFunctions::plugin = winix_request->get_plugin();
|
||||
|
|
|
@ -50,7 +50,7 @@
|
|||
#include "core/cur.h"
|
||||
#include "core/system.h"
|
||||
#include "core/sessionmanager.h"
|
||||
#include "core/htmlfilter.h"
|
||||
#include "html/htmlfilter.h"
|
||||
#include "db/db.h"
|
||||
#include "core/winixrequest.h"
|
||||
|
||||
|
@ -79,7 +79,7 @@ namespace TemplatesFunctions
|
|||
extern Locale locale;
|
||||
extern EzcFun ezc_functions;
|
||||
extern LocaleFilter locale_filter;
|
||||
extern HTMLFilter html_filter;
|
||||
extern pt::HTMLFilter html_filter;
|
||||
extern Ezc::Stack empty_stack;
|
||||
|
||||
extern Db * db;
|
||||
|
|
Loading…
Reference in New Issue