renamed: config option 'html_filter_break_lines' to 'html_filter_break_word'
added: config option 'html_filter_wrap_line' this wraps the whole line (line calculated with html tags as well) changed: orphans (for html filter) are read from locale files now ('language_orphans' value ) git-svn-id: svn://ttmath.org/publicrep/winix/trunk@728 e52654a7-88a9-db11-a3e9-0013d4bc506e
This commit is contained in:
@@ -114,12 +114,12 @@ misc.o: ../templates/patterncacher.h ../core/item.h
|
||||
misc.o: ../templates/ckeditorgetparser.h ../core/httpsimpleparser.h
|
||||
misc.o: ../core/log.h ../templates/indexpatterns.h
|
||||
misc.o: ../templates/localefilter.h ../templates/locale.h ../core/config.h
|
||||
misc.o: ../db/db.h ../db/dbbase.h ../db/dbconn.h ../db/dbtextstream.h
|
||||
misc.o: ../templates/htmltextstream.h ../core/cur.h ../core/system.h
|
||||
misc.o: ../core/sessionmanager.h ../core/htmlfilter.h ../db/db.h
|
||||
misc.o: ../db/dbbase.h ../db/dbconn.h ../db/dbtextstream.h
|
||||
misc.o: ../core/textstream.h ../core/error.h ../db/dbitemquery.h
|
||||
misc.o: ../db/dbitemcolumns.h ../core/user.h ../core/group.h
|
||||
misc.o: ../core/dircontainer.h ../core/ugcontainer.h
|
||||
misc.o: ../templates/htmltextstream.h ../core/cur.h ../core/system.h
|
||||
misc.o: ../core/sessionmanager.h
|
||||
mount.o: mount.h misc.h item.h
|
||||
mountparser.o: mountparser.h mount.h item.h error.h dirs.h dircontainer.h
|
||||
mountparser.o: ../db/db.h ../db/dbbase.h ../db/dbconn.h ../db/dbtextstream.h
|
||||
@@ -372,8 +372,8 @@ system.o: thumb.h basethread.h ../templates/templates.h
|
||||
system.o: ../templates/patterncacher.h ../templates/ckeditorgetparser.h
|
||||
system.o: ../core/httpsimpleparser.h ../core/log.h
|
||||
system.o: ../templates/indexpatterns.h ../core/cur.h ../core/system.h
|
||||
system.o: ../core/sessionmanager.h ../functions/functionbase.h
|
||||
system.o: ../core/request.h ../core/synchro.h
|
||||
system.o: ../core/sessionmanager.h ../core/htmlfilter.h
|
||||
system.o: ../functions/functionbase.h ../core/request.h ../core/synchro.h
|
||||
textstream.o: textstream.h misc.h item.h
|
||||
thumb.o: thumb.h basethread.h textstream.h ../db/db.h ../db/dbbase.h
|
||||
thumb.o: ../db/dbconn.h ../db/dbtextstream.h ../core/textstream.h misc.h
|
||||
|
13
core/app.cpp
13
core/app.cpp
@@ -604,16 +604,6 @@ void App::SendHeaders(bool compressing, int compress_encoding, Header header)
|
||||
|
||||
|
||||
|
||||
void App::SetHtmlFilterConf()
|
||||
{
|
||||
html_filter.TrimWhite(config.html_filter_trim_white);
|
||||
html_filter.BreakLines(config.html_filter_break_lines);
|
||||
html_filter.InsertTabs(config.html_filter_tabs);
|
||||
|
||||
if( config.html_filter_orphans )
|
||||
html_filter.CheckOrphans(config.html_filter_orphans_lang, config.html_filter_orphans_mode);
|
||||
}
|
||||
|
||||
|
||||
void App::FilterCompressSend(bool compressing, int compress_encoding, const std::wstring & source_ref)
|
||||
{
|
||||
@@ -624,8 +614,7 @@ void App::FilterCompressSend(bool compressing, int compress_encoding, const std:
|
||||
|
||||
if( config.html_filter && !raw )
|
||||
{
|
||||
SetHtmlFilterConf();
|
||||
html_filter.Filter(*source, clean_html);
|
||||
TemplatesFunctions::html_filter.Filter(*source, clean_html);
|
||||
AddDebugInfo(clean_html);
|
||||
source = &clean_html;
|
||||
}
|
||||
|
@@ -30,7 +30,6 @@
|
||||
#include "functions/functions.h"
|
||||
#include "templates/templates.h"
|
||||
#include "compress.h"
|
||||
#include "htmlfilter.h"
|
||||
#include "getparser.h"
|
||||
#include "postparser.h"
|
||||
#include "cookieparser.h"
|
||||
@@ -124,7 +123,6 @@ private:
|
||||
CookieParser cookie_parser;
|
||||
AcceptEncodingParser accept_encoding_parser;
|
||||
Compress compress;
|
||||
HTMLFilter html_filter;
|
||||
std::wstring clean_html, html_with_debug;
|
||||
FCGX_Request fcgi_request;
|
||||
int fcgi_socket;
|
||||
@@ -153,7 +151,6 @@ private:
|
||||
void CheckRequestMethod();
|
||||
void CheckFCGIRole();
|
||||
|
||||
void SetHtmlFilterConf();
|
||||
void PrepareSessionCookie();
|
||||
void AddDebugInfo(std::wstring & out);
|
||||
void FilterCompressSend(bool compressing, int compress_encoding, const std::wstring & source_ref);
|
||||
|
@@ -2,7 +2,7 @@
|
||||
* This file is a part of Winix
|
||||
* and is not publicly distributed
|
||||
*
|
||||
* Copyright (c) 2008-2010, Tomasz Sowa
|
||||
* Copyright (c) 2008-2011, Tomasz Sowa
|
||||
* All rights reserved.
|
||||
*
|
||||
*/
|
||||
@@ -201,6 +201,12 @@ const BBCODEParser::Tags * BBCODEParser::FindTag(const wchar_t * tag)
|
||||
return 0;
|
||||
}
|
||||
|
||||
const BBCODEParser::Tags * BBCODEParser::FindTag(const std::wstring & tag)
|
||||
{
|
||||
return FindTag(tag.c_str());
|
||||
}
|
||||
|
||||
|
||||
|
||||
void BBCODEParser::PrintArgumentCheckQuotes(const wchar_t * & start, const wchar_t * & end)
|
||||
{
|
||||
|
@@ -2,7 +2,7 @@
|
||||
* This file is a part of Winix
|
||||
* and is not publicly distributed
|
||||
*
|
||||
* Copyright (c) 2008-2010, Tomasz Sowa
|
||||
* Copyright (c) 2008-2011, Tomasz Sowa
|
||||
* All rights reserved.
|
||||
*
|
||||
*/
|
||||
@@ -58,6 +58,7 @@ class BBCODEParser : public HTMLFilter
|
||||
virtual void PutClosingTag(const wchar_t * tag);
|
||||
|
||||
const Tags * FindTag(const wchar_t * tag);
|
||||
const Tags * FindTag(const std::wstring & tag);
|
||||
void PrintArgumentCheckQuotes(const wchar_t * & start, const wchar_t * & end);
|
||||
|
||||
|
||||
|
@@ -159,10 +159,10 @@ void Config::AssignValues(bool stdout_is_closed)
|
||||
|
||||
html_filter = Bool(L"html_filter", true);
|
||||
html_filter_trim_white = Bool(L"html_filter_trim_white", true);
|
||||
html_filter_break_lines = Int(L"html_filter_break_lines", 60);
|
||||
html_filter_break_word = Int(L"html_filter_break_word", 60);
|
||||
html_filter_wrap_line = Int(L"html_filter_wrap_line", 110);
|
||||
html_filter_tabs = Size(L"html_filter_tabs", 2);
|
||||
html_filter_orphans = Bool(L"html_filter_orphans", false);
|
||||
html_filter_orphans_lang_str = AText(L"html_filter_orphans_lang", L"pl");
|
||||
html_filter_orphans = Bool(L"html_filter_orphans", true);
|
||||
html_filter_orphans_mode_str = AText(L"html_filter_orphans_mode_str", L"nbsp");
|
||||
|
||||
locale_dir = Text(L"locale_dir");
|
||||
@@ -200,17 +200,6 @@ void Config::SetAdditionalVariables()
|
||||
{
|
||||
SetHttpHost(base_url, base_url_http_host);
|
||||
|
||||
if( html_filter_orphans_lang_str == "pl" )
|
||||
html_filter_orphans_lang = HTMLFilter::lang_pl;
|
||||
else
|
||||
if( html_filter_orphans_lang_str == "cz" )
|
||||
html_filter_orphans_lang = HTMLFilter::lang_cz;
|
||||
else
|
||||
if( html_filter_orphans_lang_str == "sk" )
|
||||
html_filter_orphans_lang = HTMLFilter::lang_sk;
|
||||
else
|
||||
html_filter_orphans_lang = HTMLFilter::lang_none;
|
||||
|
||||
if( html_filter_orphans_mode_str == "160" )
|
||||
html_filter_orphans_mode = HTMLFilter::orphan_160space;
|
||||
else
|
||||
|
@@ -167,25 +167,24 @@ public:
|
||||
// should white characters be trimmed
|
||||
bool html_filter_trim_white;
|
||||
|
||||
// when long lines (lines without a white character) should be break (inserted a space)
|
||||
// when long words should be broken (a space will be inserted)
|
||||
// default: after 60 non white characters will be put a space
|
||||
// set zero to turn off
|
||||
int html_filter_break_lines;
|
||||
size_t html_filter_break_word;
|
||||
|
||||
// when long lines should be broken (a new line character will be inserted)
|
||||
// default: 80
|
||||
// set zero to turn off
|
||||
size_t html_filter_wrap_line;
|
||||
|
||||
// how many spaces will be put at one tree level
|
||||
// default: 2
|
||||
size_t html_filter_tabs;
|
||||
|
||||
// use checking for 'orphans' for a specicic language
|
||||
// default: false
|
||||
// default: true
|
||||
bool html_filter_orphans;
|
||||
|
||||
// language for html orphans
|
||||
// default: pl
|
||||
// can be either: "pl" or "cz" or "sk"
|
||||
std::string html_filter_orphans_lang_str;
|
||||
HTMLFilter::Lang html_filter_orphans_lang;
|
||||
|
||||
// orphans mode
|
||||
// either: "nbsp" or "160"
|
||||
// default: "nbsp"
|
||||
|
File diff suppressed because it is too large
Load Diff
@@ -2,7 +2,7 @@
|
||||
* This file is a part of Winix
|
||||
* and is not publicly distributed
|
||||
*
|
||||
* Copyright (c) 2008-2010, Tomasz Sowa
|
||||
* Copyright (c) 2008-2011, Tomasz Sowa
|
||||
* All rights reserved.
|
||||
*
|
||||
*/
|
||||
@@ -11,18 +11,31 @@
|
||||
#define headerfile_winix_core_htmlfilter
|
||||
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
|
||||
|
||||
|
||||
|
||||
// max length of a name of a html tag (with terminating null)
|
||||
#define WINIX_HTMLFILTER_ITEM_MAXLEN 30
|
||||
#define WINIX_HTMLFILTER_ITEM_NAME_MAXLEN 30
|
||||
|
||||
// max length of a html lang attribute (e.g. "en", "pl")
|
||||
#define WINIX_HTMLFILTER_ITEM_LANG_MAXLEN 10
|
||||
|
||||
|
||||
#define WINIX_HTMLFILTER_ATTR_NAME_MAXLEN 40
|
||||
|
||||
|
||||
#define WINIX_HTMLFILTER_ATTR_VALUE_MAXLEN 500
|
||||
|
||||
|
||||
// depth of the html tree
|
||||
#define WINIX_HTMLFILTER_STACK_MAXLEN 100
|
||||
#define WINIX_HTMLFILTER_STACK_MAXLEN 100
|
||||
|
||||
// length of a buffer used for printing
|
||||
// it should be at least: WINIX_HTMLFILTER_ITEM_MAXLEN+3
|
||||
// it should be at least: WINIX_HTMLFILTER_ITEM_NAME_MAXLEN+3
|
||||
#define WINIX_HTMLFILTER_BUFFER_MAXLEN 2048
|
||||
|
||||
|
||||
@@ -48,23 +61,12 @@ class HTMLFilter
|
||||
{
|
||||
public:
|
||||
|
||||
|
||||
// for checking orphans
|
||||
enum Lang
|
||||
{
|
||||
lang_pl,
|
||||
lang_cz,
|
||||
lang_sk,
|
||||
lang_none
|
||||
};
|
||||
|
||||
enum OrphanMode
|
||||
{
|
||||
orphan_nbsp, // putting " " string
|
||||
orphan_160space // putting 160 ascii code
|
||||
};
|
||||
|
||||
|
||||
HTMLFilter();
|
||||
HTMLFilter(const HTMLFilter & f);
|
||||
HTMLFilter & operator=(const HTMLFilter & f);
|
||||
@@ -76,12 +78,18 @@ public:
|
||||
void Filter(const std::wstring & in, std::wstring & out);
|
||||
|
||||
|
||||
// insert a white space into long lines
|
||||
// only between html tags
|
||||
// insert a white space into long words
|
||||
// (only between html tags)
|
||||
// skipped in such tags: script, pre, textarea
|
||||
// break_after - after how many characters insert a space (0 - off)
|
||||
void BreakLines(size_t break_after_);
|
||||
void BreakWord(size_t break_after_);
|
||||
|
||||
// insert a new line character into long lines
|
||||
// (only between html tags)
|
||||
// skipped in such tags: script, pre, textarea
|
||||
// wrap_line - after how many characters wrap a line (0 - off)
|
||||
// lines are wrapped only in 'body' tag (useful for text in 'title' tag which is in 'head' section)
|
||||
void WrapLine(size_t wrap_line_);
|
||||
|
||||
// trimming white characters (with new lines)
|
||||
// at the beginning, at the end and in the middle of a string
|
||||
@@ -98,10 +106,16 @@ public:
|
||||
void InsertTabs(size_t tabsize);
|
||||
|
||||
|
||||
// orphans are checked only in 'body' tag
|
||||
void AssignOrphans(const wchar_t * lang_code, const std::vector<std::wstring> & otab);
|
||||
void AssignOrphans(const std::wstring & lang_code, const std::vector<std::wstring> & otab);
|
||||
void ClearOrphans();
|
||||
|
||||
|
||||
// check 'orphans' for the specicic language
|
||||
// if an orphan is detected then the non-break space (" " or ascii 160 code) will be put
|
||||
// default disable (lang_none)
|
||||
void CheckOrphans(Lang lang_, OrphanMode mode = orphan_nbsp);
|
||||
void OrphansMode(OrphanMode mode = orphan_nbsp);
|
||||
|
||||
|
||||
// skipping some unsafe tags
|
||||
@@ -112,52 +126,80 @@ public:
|
||||
|
||||
protected:
|
||||
|
||||
// orphans for one language
|
||||
struct Orphans
|
||||
{
|
||||
std::vector<std::wstring> tab;
|
||||
size_t max_len;
|
||||
};
|
||||
|
||||
|
||||
// orphans for all languages
|
||||
// map<language_code, Orphans>
|
||||
typedef std::map<std::wstring, Orphans> OrphansTab;
|
||||
OrphansTab orphans_tab;
|
||||
|
||||
|
||||
struct Item
|
||||
{
|
||||
wchar_t name[WINIX_HTMLFILTER_ITEM_MAXLEN];
|
||||
size_t name_len;
|
||||
std::wstring name; // max size: WINIX_HTMLFILTER_ITEM_NAME_MAXLEN
|
||||
|
||||
enum Type
|
||||
{
|
||||
opening,
|
||||
closing,
|
||||
simple,
|
||||
special,
|
||||
opening, /* sample: <h1> */
|
||||
closing, /* sample: </h1> */
|
||||
simple, /* sample: <br/> */
|
||||
special, /* sample: <!doctype> */
|
||||
none
|
||||
} type;
|
||||
|
||||
// is there a new line after this tag
|
||||
bool new_line;
|
||||
|
||||
// current orphans table
|
||||
// (will be propagated)
|
||||
Orphans * porphans;
|
||||
|
||||
// this item or one from its parents is a 'body' html tag
|
||||
// (will be propagated)
|
||||
bool has_body_tag;
|
||||
|
||||
void Clear();
|
||||
Item();
|
||||
};
|
||||
|
||||
|
||||
// only this method have direct access to the output string
|
||||
// you can easily change the output from a std::wstring to something else
|
||||
virtual void Put(const wchar_t * str, const wchar_t * end);
|
||||
|
||||
|
||||
Item & GetItem(size_t i);
|
||||
Item & LastItem();
|
||||
|
||||
wchar_t ToLower(wchar_t c);
|
||||
bool IsNameEqual(const wchar_t * name1, const wchar_t * name2);
|
||||
bool IsNameEqual(const wchar_t * name1, const wchar_t * name2, size_t len);
|
||||
void ToLower(std::wstring & str);
|
||||
|
||||
bool IsNameEqual(const wchar_t * name1, const wchar_t * name2);
|
||||
bool IsNameEqual(const wchar_t * name1, const std::wstring & name2);
|
||||
bool IsNameEqual(const std::wstring & name1, const wchar_t * name2);
|
||||
bool IsNameEqual(const std::wstring & name1, const std::wstring & name2);
|
||||
|
||||
bool IsNameEqual(const wchar_t * name1, const wchar_t * name2, size_t len);
|
||||
bool IsNameEqual(const wchar_t * name1, const std::wstring & name2, size_t len);
|
||||
bool IsNameEqual(const std::wstring & name1, const wchar_t * name2, size_t len);
|
||||
bool IsNameEqual(const std::wstring & name1, const std::wstring & name2, size_t len);
|
||||
|
||||
bool IsLastTag(const wchar_t * name);
|
||||
bool IsTagSafe(const wchar_t * tag);
|
||||
bool IsTagSafe(const std::wstring & tag);
|
||||
|
||||
int CheckOrphan(const wchar_t * str, const wchar_t * end, const wchar_t * orphan);
|
||||
bool CheckOrphanTable(const wchar_t * str, const wchar_t * end, const wchar_t ** table, size_t o1, size_t o2);
|
||||
bool CheckOrphanLangPl(const wchar_t * str, const wchar_t * end);
|
||||
bool CheckOrphanLangCz(const wchar_t * str, const wchar_t * end);
|
||||
int CheckOrphan(const wchar_t * str, const wchar_t * end, const std::wstring & orphan_str);
|
||||
bool CheckOrphan(const wchar_t * str, const wchar_t * end, const std::vector<std::wstring> & orphans);
|
||||
bool CheckOrphan(const wchar_t * str, const wchar_t * end);
|
||||
|
||||
bool IsWhite(int c);
|
||||
void SkipWhite();
|
||||
void SkipWhiteLines();
|
||||
void SkipWhiteWithFirstNewLine();
|
||||
void SkipWhiteLines(const wchar_t * & str, const wchar_t * end);
|
||||
bool IsClosingTagForLastItem();
|
||||
virtual bool IsOpeningTagMark();
|
||||
virtual bool IsOpeningCommentaryTagMark();
|
||||
@@ -165,11 +207,12 @@ protected:
|
||||
virtual bool IsClosingTagMark();
|
||||
virtual bool IsClosingXmlSimpleTagMark();
|
||||
bool SkipCommentaryTagIfExists();
|
||||
const wchar_t * SkipItemCheckXmlSimple();
|
||||
void SkipAndCheckClosingTag();
|
||||
|
||||
void PopStack();
|
||||
bool PushStack();
|
||||
virtual bool IsValidCharForName(int c);
|
||||
virtual bool IsValidCharForAttrName(int c);
|
||||
void CheckNewLine();
|
||||
virtual void CheckExceptions();
|
||||
void CheckStackPrintRest();
|
||||
@@ -178,41 +221,68 @@ protected:
|
||||
virtual void ReadNormalTextSkipWhite(const wchar_t * & start, const wchar_t * & last_non_white);
|
||||
void ReadNormalText();
|
||||
bool PrintRest();
|
||||
void PrintItem(const wchar_t * start, const wchar_t * end);
|
||||
void PrintOpeningItem();
|
||||
void ReadItemName();
|
||||
void ReadItemAttrName();
|
||||
void ReadItemAttrValue(bool has_quote);
|
||||
|
||||
bool ReadItemAttr();
|
||||
bool CheckItemAttr();
|
||||
void PrinItemAttr();
|
||||
|
||||
void ReadItemClosing();
|
||||
void ReadItemSpecial();
|
||||
void ReadItemOpening();
|
||||
bool ReadItem();
|
||||
virtual void Init();
|
||||
virtual void Deinit();
|
||||
void ReadLoop();
|
||||
void Read();
|
||||
|
||||
size_t PutNormalTextTrimFillBuffer(const wchar_t * & str, const wchar_t * & end);
|
||||
size_t PutNormalTextFillBuffer(const wchar_t * & str, const wchar_t * & end);
|
||||
void CheckChar(wchar_t c);
|
||||
|
||||
virtual void Put(wchar_t c);
|
||||
virtual void Put(const wchar_t * str);
|
||||
virtual void Put(const wchar_t * str, const wchar_t * end);
|
||||
virtual void Put(const std::wstring & str);
|
||||
|
||||
void CheckLineWrap();
|
||||
bool HasSemiloconAround(const wchar_t * str, const wchar_t * end);
|
||||
void PutNormalNonWhite(const wchar_t * & str, const wchar_t * end);
|
||||
void PutNormalWhite(const wchar_t * & str, const wchar_t * end);
|
||||
virtual void PutNormalText(const wchar_t * str, const wchar_t * end);
|
||||
virtual void PutNormalTextTrim(const wchar_t * str, const wchar_t * end);
|
||||
void PutLastTagWithClosingTag();
|
||||
virtual void PutOpeningTagMark();
|
||||
virtual void PutClosingTagMark();
|
||||
virtual void PutTagName(const wchar_t * name);
|
||||
virtual void PutOpeningTag(const wchar_t * start, const wchar_t * end);
|
||||
void PutTagName(const std::wstring & name);
|
||||
virtual void PutOpeningTag(bool close_tag = true);
|
||||
virtual void PutClosingTag(const wchar_t * tag);
|
||||
size_t PutTabsToBuffer(size_t index, size_t len);
|
||||
size_t PutNonBreakSpaceToBuffer(size_t index);
|
||||
void PutTabs(size_t len);
|
||||
void PutNonBreakingSpace();
|
||||
void PutNewLine();
|
||||
void CalcOrphansMaxLen(Orphans & orphans);
|
||||
|
||||
const wchar_t * pchar;
|
||||
Item empty;
|
||||
Item * pstack; // stack pointer
|
||||
size_t stack_len; // length of the stack
|
||||
Item * pstack; // stack pointer
|
||||
size_t stack_len; // length of the stack
|
||||
wchar_t * buffer; // buffer used when printing
|
||||
std::wstring * out_string;
|
||||
bool last_new_line;
|
||||
size_t break_after; // insert a space into long lines after break_after characters
|
||||
bool trim_white; // trimming white characters
|
||||
size_t break_after; // insert a space into long words after 'break_after' characters
|
||||
size_t wrap_line; // insert a new line character into long lines
|
||||
bool trim_white; // trimming white characters
|
||||
size_t tab_size;
|
||||
Lang lang; // current language for checking orphans
|
||||
OrphanMode orphan_mode;
|
||||
bool safe_mode; // skipping some unsafe tags
|
||||
std::wstring attr_name;
|
||||
std::wstring attr_value;
|
||||
std::wstring attr_value_lower;
|
||||
bool attr_has_value;
|
||||
std::wstring lang_code_lower;
|
||||
size_t line_len; //length of the current line (without first spaces which create the html tree)
|
||||
bool safe_mode; // skipping some unsafe tags
|
||||
Orphans orphans_temp;
|
||||
};
|
||||
|
||||
|
||||
|
Reference in New Issue
Block a user