/* * This file is a part of CMSLU -- Content Management System like Unix * and is not publicly distributed * * Copyright (c) 2008-2009, Tomasz Sowa * All rights reserved. * */ #ifndef headerfilecmslucorehtmlfilter #define headerfilecmslucorehtmlfilter #include // max length of a name of a html tag (with terminating null) #define CMSLU_HTMLFILTER_ITEM_MAXLEN 30 // depth of the html tree #define CMSLU_HTMLFILTER_STACK_MAXLEN 100 // length of a buffer used for printing // it should be at least: CMSLU_HTMLFILTER_ITEM_MAXLEN+3 #define CMSLU_HTMLFILTER_BUFFER_MAXLEN 1024 /*! very lightweight filter for html (without using any dynamic memory - some memory is allocated only at the beginning - in ctors) this filter has O(n) complexity over the whole html string such tags as: ) are untouched if the filter finds that there are not closed tags it will close them, if the filter finds a closing tag which doesn't have an opening tag - it will skip it tags which don't need to be closed: meta, input, br, img, link look at CheckExceptions() method the filter recognizes xml simple tags (with / at the end) such as:
*/ class HTMLFilter { public: HTMLFilter(); HTMLFilter(const HTMLFilter & f); HTMLFilter & operator=(const HTMLFilter & f); ~HTMLFilter(); // main methods used for filtering void Filter(const char * in, std::string & out); void Filter(const std::string & in, std::string & out); // insert a white space into long lines // only between html tags // skipped in such tags: script, pre, textarea // false by default void BreakLongLines(bool break_lines); // trimming white characters (with new lines) // at the beginning, at the end and in the middle of a string // only between html tags // at the beginning and at the end only one space is left // skipped in such tags: script, pre, textarea // false by default void TrimWhite(bool trim); // first tabs in a tree // default: 2 (spaces) // set 0 to turn off void InsertTabs(size_t tabsize); protected: struct Item { char name[CMSLU_HTMLFILTER_ITEM_MAXLEN]; size_t name_len; enum Type { opening, closing, simple, special, none } type; // is there a new line after this tag bool new_line; void Clear(); Item(); }; // only this method have direct access to the output string // you can easily change the output from a std::string to something else void Put(const char * str, const char * end); Item & GetItem(size_t i); Item & LastItem(); int ToLower(int c); bool IsNameEqual(const char * name1, const char * name2); bool IsNameEqual(const char * name1, const char * name2, size_t len); bool IsLastTag(const char * name); bool IsWhite(int c); void SkipWhite(); void SkipWhiteLines(); bool SkipTagCheck(); void SkipNormalText(); bool IsOpeningCommentaryTag(); bool SkipCommentaryTagIfExists(); void SkipItem(); void SkipItemCheckXmlSimple(); void PopStack(); bool PushStack(); bool IsValidCharForName(int c); void CheckNewLine(); void CheckExceptions(); void AddForgottenTags(); void CheckClosingTags(); void ReadNormalText(); void PrintRest(); void PrintItem(const char * start, const char * end); void ReadItemName(); bool ReadItem(); void Read(); size_t PutTrimFillBuffer(const char * & str, const char * & end); void PutTrim(const char * str, const char * end); void PutLastTagWithClosingTag(); void PutOpeningTag(const char * tag); void PutClosingTag(const char * tag); void PutTabs(size_t len); void PutNewLine(); const char * pchar; Item empty; Item * pstack; // stack pointer size_t stack_len; // length of the stack char * buffer; // buffer used when printing std::string * out_string; bool last_new_line; bool break_long_lines; // insert a space into long lines bool trim_white; // trimming white characters size_t tab_size; }; #endif