167 lines
3.9 KiB
C++
Executable File
167 lines
3.9 KiB
C++
Executable File
/*
|
|
* This file is a part of CMSLU -- Content Management System like Unix
|
|
* and is not publicly distributed
|
|
*
|
|
* Copyright (c) 2008-2009, Tomasz Sowa
|
|
* All rights reserved.
|
|
*
|
|
*/
|
|
|
|
#ifndef headerfilecmslucorehtmlfilter
|
|
#define headerfilecmslucorehtmlfilter
|
|
|
|
#include <string>
|
|
|
|
|
|
|
|
|
|
// max length of a name of a html tag (with terminating null)
|
|
#define CMSLU_HTMLFILTER_ITEM_MAXLEN 30
|
|
|
|
// depth of the html tree
|
|
#define CMSLU_HTMLFILTER_STACK_MAXLEN 100
|
|
|
|
// length of a buffer used for printing
|
|
// it should be at least: CMSLU_HTMLFILTER_ITEM_MAXLEN+3
|
|
#define CMSLU_HTMLFILTER_BUFFER_MAXLEN 1024
|
|
|
|
|
|
|
|
|
|
/*!
|
|
very lightweight filter for html
|
|
(without using any dynamic memory - some memory is allocated only at the beginning - in ctors)
|
|
this filter has O(n) complexity over the whole html string
|
|
|
|
such tags as: <script> <pre> <textarea> are treated in a special way
|
|
all characters between the opening and closing tag (<script>....</script>) are untouched
|
|
|
|
if the filter finds that there are not closed tags it will close them,
|
|
if the filter finds a closing tag which doesn't have an opening tag - it will skip it
|
|
|
|
tags which don't need to be closed: meta, input, br, img, link
|
|
look at CheckExceptions() method
|
|
|
|
the filter recognizes xml simple tags (with / at the end) such as: <br />
|
|
*/
|
|
class HTMLFilter
|
|
{
|
|
public:
|
|
|
|
HTMLFilter();
|
|
HTMLFilter(const HTMLFilter & f);
|
|
HTMLFilter & operator=(const HTMLFilter & f);
|
|
~HTMLFilter();
|
|
|
|
// main methods used for filtering
|
|
void Filter(const char * in, std::string & out);
|
|
void Filter(const std::string & in, std::string & out);
|
|
|
|
// insert a white space into long lines
|
|
// only between html tags
|
|
// skipped in such tags: script, pre, textarea
|
|
// false by default
|
|
void BreakLongLines(bool break_lines);
|
|
|
|
// trimming white characters (with new lines)
|
|
// at the beginning, at the end and in the middle of a string
|
|
// only between html tags
|
|
// at the beginning and at the end only one space is left
|
|
// skipped in such tags: script, pre, textarea
|
|
// false by default
|
|
void TrimWhite(bool trim);
|
|
|
|
// first tabs in a tree
|
|
// default: 2 (spaces)
|
|
// set 0 to turn off
|
|
void InsertTabs(size_t tabsize);
|
|
|
|
|
|
|
|
|
|
protected:
|
|
|
|
struct Item
|
|
{
|
|
char name[CMSLU_HTMLFILTER_ITEM_MAXLEN];
|
|
size_t name_len;
|
|
|
|
enum Type
|
|
{
|
|
opening,
|
|
closing,
|
|
simple,
|
|
special,
|
|
none
|
|
} type;
|
|
|
|
// is there a new line after this tag
|
|
bool new_line;
|
|
|
|
void Clear();
|
|
Item();
|
|
};
|
|
|
|
|
|
// only this method have direct access to the output string
|
|
// you can easily change the output from a std::string to something else
|
|
void Put(const char * str, const char * end);
|
|
|
|
|
|
Item & GetItem(size_t i);
|
|
Item & LastItem();
|
|
|
|
int ToLower(int c);
|
|
bool IsNameEqual(const char * name1, const char * name2);
|
|
bool IsNameEqual(const char * name1, const char * name2, size_t len);
|
|
bool IsLastTag(const char * name);
|
|
|
|
bool IsWhite(int c);
|
|
void SkipWhite();
|
|
void SkipWhiteLines();
|
|
bool SkipTagCheck();
|
|
void SkipNormalText();
|
|
bool IsOpeningCommentaryTag();
|
|
bool SkipCommentaryTagIfExists();
|
|
void SkipItem();
|
|
void SkipItemCheckXmlSimple();
|
|
|
|
void PopStack();
|
|
bool PushStack();
|
|
bool IsValidCharForName(int c);
|
|
void CheckNewLine();
|
|
void CheckExceptions();
|
|
void AddForgottenTags();
|
|
void CheckClosingTags();
|
|
void ReadNormalText();
|
|
void PrintRest();
|
|
void PrintItem(const char * start, const char * end);
|
|
void ReadItemName();
|
|
bool ReadItem();
|
|
void Read();
|
|
|
|
size_t PutTrimFillBuffer(const char * & str, const char * & end);
|
|
void PutTrim(const char * str, const char * end);
|
|
void PutLastTagWithClosingTag();
|
|
void PutOpeningTag(const char * tag);
|
|
void PutClosingTag(const char * tag);
|
|
void PutTabs(size_t len);
|
|
void PutNewLine();
|
|
|
|
const char * pchar;
|
|
Item empty;
|
|
Item * pstack; // stack pointer
|
|
size_t stack_len; // length of the stack
|
|
char * buffer; // buffer used when printing
|
|
std::string * out_string;
|
|
bool last_new_line;
|
|
bool break_long_lines; // insert a space into long lines
|
|
bool trim_white; // trimming white characters
|
|
size_t tab_size;
|
|
};
|
|
|
|
|
|
|
|
|
|
#endif
|