@ -5,7 +5,7 @@
*/
/*
* Copyright ( c ) 2008 - 201 4 , Tomasz Sowa
* Copyright ( c ) 2008 - 201 8 , Tomasz Sowa
* All rights reserved .
*
* Redistribution and use in source and binary forms , with or without
@ -97,17 +97,25 @@ size_t out_projected_len = in.size() * 2 + 1;
}
HTMLFilter : : HTMLFilter ( )
void HTMLFilter : : SetSomeDefaults ( )
{
pstack = new Item [ WINIX_HTMLFILTER_STACK_MAXLEN ] ;
buffer = new wchar_t [ WINIX_HTMLFILTER_BUFFER_MAXLEN ] ;
tab_size = 2 ;
trim_white = false ;
break_after = 0 ;
wrap_line = 0 ;
orphan_mode = orphan_nbsp ;
safe_mode = false ;
skip_all_tags = false ;
skip_commentaries = false ;
}
HTMLFilter : : HTMLFilter ( )
{
pstack = new Item [ WINIX_HTMLFILTER_STACK_MAXLEN ] ;
buffer = new wchar_t [ WINIX_HTMLFILTER_BUFFER_MAXLEN ] ;
SetSomeDefaults ( ) ;
}
@ -116,6 +124,8 @@ HTMLFilter::HTMLFilter(const HTMLFilter & f)
// don't need to copy the stack
pstack = new Item [ WINIX_HTMLFILTER_STACK_MAXLEN ] ;
buffer = new wchar_t [ WINIX_HTMLFILTER_BUFFER_MAXLEN ] ;
SetSomeDefaults ( ) ;
}
@ -125,6 +135,8 @@ HTMLFilter & HTMLFilter::operator=(const HTMLFilter & f)
pstack = new Item [ WINIX_HTMLFILTER_STACK_MAXLEN ] ;
buffer = new wchar_t [ WINIX_HTMLFILTER_BUFFER_MAXLEN ] ;
// we can copy some fields from f
return * this ;
}
@ -136,6 +148,8 @@ HTMLFilter::~HTMLFilter()
}
void HTMLFilter : : BreakWord ( size_t break_after_ )
{
break_after = break_after_ ;
@ -224,6 +238,13 @@ void HTMLFilter::SafeMode(bool safe_mode_)
}
void HTMLFilter : : SkipAllTags ( bool skip_all_tags , bool skip_commentaries )
{
this - > skip_all_tags = skip_all_tags ;
this - > skip_commentaries = skip_commentaries ;
}
void HTMLFilter : : SetNoFilterTag ( const std : : wstring & tag_name )
{
no_filter_tag = tag_name ;
@ -374,9 +395,10 @@ return false;
// used for such tags as: script, pre, textarea
void HTMLFilter : : Put LastTagWithClosingTag( )
void HTMLFilter : : Put EverythingUntilClosingTag( bool put_closing_tag_as_well )
{
const wchar_t * start = pchar ;
const wchar_t * end = pchar ;
while ( * pchar ! = 0 )
{
@ -384,32 +406,9 @@ const wchar_t * start = pchar;
{
if ( IsClosingTagForLastItem ( ) )
{
PopStack ( ) ;
CheckNewLine ( ) ;
break ;
}
}
else
{
pchar + = 1 ;
}
}
Put ( start , pchar ) ;
}
// used with <nofilter> </nofilter> tags
void HTMLFilter : : PutTextBetweenLastTagWithClosingTag ( )
{
const wchar_t * start = pchar , * end = pchar ;
if ( put_closing_tag_as_well )
end = pchar ;
while ( * pchar ! = 0 )
{
if ( IsOpeningTagMark ( ) )
{
if ( IsClosingTagForLastItem ( ) )
{
PopStack ( ) ;
CheckNewLine ( ) ;
break ;
@ -427,16 +426,36 @@ const wchar_t * start = pchar, * end = pchar;
void HTMLFilter : : SkipAndCheckClosingTag ( )
{
bool is_quoted = false ;
wchar_t quote_char = 0 ;
for ( ; * pchar ; + + pchar )
{
if ( LastItem ( ) . type = = Item : : opening & & IsClosingXmlSimpleTagMark ( ) ) // closing xml tag: default '/'
if ( * pchar = = ' " ' | | * pchar = = ' \' ' )
{
if ( is_quoted )
{
if ( * pchar = = quote_char )
{
is_quoted = false ;
}
}
else
{
is_quoted = true ;
quote_char = * pchar ;
}
}
else
if ( ! is_quoted & & LastItem ( ) . type = = Item : : opening & & IsClosingXmlSimpleTagMark ( ) ) // closing xml tag: default '/'
{
LastItem ( ) . type = Item : : simple ;
}
if ( IsClosingTagMark ( ) )
else
if ( ! is_quoted & & IsClosingTagMark ( ) )
{
+ + pchar ;
break ;
@ -502,18 +521,26 @@ size_t i;
void HTMLFilter : : ReadItemAttrValue ( bool has_quote )
void HTMLFilter : : ReadItemAttrValue ( bool has_quote , wchar_t quote_char )
{
size_t i ;
// sprawdzic to wszedzie bo teraz jest tablica
attr_value . clear ( ) ;
attr_value_temp . clear ( ) ;
// !! dodac obsluge pojedynczego cudzyslowu
for ( i = 0 ; * pchar & & * pchar ! = ' \" ' & & ! IsClosingTagMark ( ) & & ( has_quote | | ( * pchar ! = 10 & & ! IsWhite ( * pchar ) ) ) ; + + i )
for ( i = 0 ; * pchar ; + + i , + + pchar )
{
if ( has_quote )
{
if ( * pchar = = quote_char )
break ;
}
else
{
if ( IsClosingTagMark ( ) | | * pchar = = 10 | | IsWhite ( * pchar ) )
break ;
}
if ( * pchar = = 10 | | IsWhite ( * pchar ) )
{
if ( ! attr_value_temp . empty ( ) )
@ -524,9 +551,9 @@ size_t i;
}
else
if ( i < WINIX_HTMLFILTER_ATTR_VALUE_MAXLEN )
{
attr_value_temp + = * pchar ;
+ + pchar ;
}
}
if ( ! attr_value_temp . empty ( ) )
@ -795,8 +822,8 @@ void HTMLFilter::PutClosingTagMark()
// !! zmienic na lepsza nazw e
// bo to nie zwraca true jesli tag jest safe
// !! IMPROVE ME change to a better nam e
// this functions does not return true when the tag is safe
bool HTMLFilter : : IsTagSafe ( const wchar_t * tag )
{
if ( ! safe_mode )
@ -838,9 +865,10 @@ bool HTMLFilter::IsTagSafe(const std::wstring & tag)
bool HTMLFilter : : PutOpeningTag ( )
{
if ( ! IsTagSafe ( LastItem ( ) . name ) )
// !! IMPROVE ME
// !! dodac tutaj skipniecie calego tagu
{
SkipAndCheckClosingTag ( ) ;
return false ;
}
PutOpeningTagMark ( ) ;
Put ( LastItem ( ) . name ) ;
@ -852,7 +880,7 @@ return true;
void HTMLFilter : : PutClosingTag ( const wchar_t * tag )
{
if ( ! IsTagSafe ( tag ) )
if ( skip_all_tags | | ! IsTagSafe ( tag ) )
return ;
PutOpeningTagMark ( ) ;
@ -1005,9 +1033,19 @@ const wchar_t * last_non_white = pchar;
while ( * pchar ! = 0 )
{
const wchar_t * commentary_start = pchar ;
if ( SkipCommentaryTagIfExists ( ) )
{
last_non_white = pchar - 1 ; // pointing at the last '>' from a commentary
PutNormalText ( start , commentary_start ) ;
if ( ! skip_commentaries )
{
PutNormalText ( commentary_start , pchar ) ;
}
start = pchar ;
}
else
{
@ -1029,7 +1067,7 @@ const wchar_t * last_non_white = pchar;
bool HTMLFilter : : PrintOpeningItem ( )
{
if ( IsNameEqual( no_filter_tag , LastItem ( ) . name ) )
if ( skip_all_tags | | IsNameEqual( no_filter_tag , LastItem ( ) . name ) )
return true ;
if ( last_new_line )
@ -1068,16 +1106,15 @@ bool HTMLFilter::ReadItemAttr()
pchar + = 1 ; // skipping '='
SkipWhiteLines ( ) ;
// !! dodac obsluge pojedynczego cudzyslowu
bool has_quote = ( * pchar = = ' \" ' ) ;
bool has_quote = ( * pchar = = ' \" ' | | * pchar = = ' \' ' ) ;
wchar_t quote_char = * pchar ;
if ( has_quote )
pchar + = 1 ; // skipping the first quote mark
ReadItemAttrValue ( has_quote );
ReadItemAttrValue ( has_quote , quote_char );
if ( * pchar = = ' \" ' )
if ( has_quote & & * pchar = = quote_char )
pchar + = 1 ; // skipping the last quote mark
return true ;
@ -1112,7 +1149,7 @@ void HTMLFilter::PrintItemAttr()
{
size_t i ;
if ( IsNameEqual( no_filter_tag , LastItem ( ) . name ) )
if ( skip_all_tags | | IsNameEqual( no_filter_tag , LastItem ( ) . name ) )
return ;
Put ( ' ' ) ;
@ -1150,11 +1187,17 @@ void HTMLFilter::ReadItemClosing()
void HTMLFilter : : ReadItemSpecial ( )
{
LastItem ( ) . type = Item : : special ;
PutOpeningTagMark ( ) ;
if ( ! skip_all_tags )
PutOpeningTagMark ( ) ;
const wchar_t * start = pchar ;
pchar + = 1 ; // skipping '!'
ReadItemName ( ) ;
SkipAndCheckClosingTag ( ) ;
if ( pchar > start )
if ( ! skip_all_tags & & pchar > start )
Put ( start , pchar ) ;
// closing tag mark is printed directly from the source
@ -1176,7 +1219,7 @@ void HTMLFilter::ReadItemOpening()
SkipAndCheckClosingTag ( ) ; // here LastItem().type can be changed to 'simple'
if ( ! IsNameEqual( no_filter_tag , LastItem ( ) . name ) )
if ( ! skip_all_tags & & ! IsNameEqual( no_filter_tag , LastItem ( ) . name ) )
{
if ( LastItem ( ) . type = = Item : : simple )
Put ( L " / " ) ;
@ -1187,6 +1230,11 @@ void HTMLFilter::ReadItemOpening()
}
void HTMLFilter : : ItemFound ( )
{
}
bool HTMLFilter : : ReadItem ( )
{
if ( * pchar = = 0 )
@ -1209,6 +1257,8 @@ bool HTMLFilter::ReadItem()
CheckNewLine ( ) ;
LastItem ( ) . new_line = last_new_line ;
ItemFound ( ) ;
return true ;
}
@ -1332,13 +1382,13 @@ void HTMLFilter::CheckExceptions()
// in safe_mode the script tag is ignored
if ( ! safe_mode & & IsLastTag ( L " script " ) )
Put LastTagWithClosingTag( ) ;
Put EverythingUntilClosingTag( ! skip_all_tags ) ;
if ( IsLastTag ( L " pre " ) | | IsLastTag ( L " textarea " ) )
Put LastTagWithClosingTag( ) ;
Put EverythingUntilClosingTag( ! skip_all_tags ) ;
if ( IsLastTag ( no_filter_tag ) )
Put TextBetweenLastTagWithClosingTag( ) ;
Put EverythingUntilClosingTag( false ) ;
if ( IsLastTag ( L " body " ) )
LastItem ( ) . has_body_tag = true ;
@ -1371,7 +1421,7 @@ int i;
for ( int z = ( int ) stack_len - 2 ; z > = i ; - - z )
{
if ( pstack [ z ] . new_line )
if ( ! skip_all_tags & & pstack [ z ] . new_line )
{
PutNewLine ( ) ;
PutTabs ( z ) ;
@ -1421,7 +1471,7 @@ void HTMLFilter::CheckClosingTags()
if ( IsNameEqual ( pstack [ stack_len - 1 ] . name , pstack [ stack_len - 2 ] . name ) )
{
// last closing tag is from the previous one
if ( pstack [ stack_len - 2 ] . new_line )
if ( ! skip_all_tags & & pstack [ stack_len - 2 ] . new_line )
{
PutNewLine ( ) ;
PutTabs ( stack_len - 2 ) ;
@ -1444,7 +1494,7 @@ bool HTMLFilter::PrintRest()
const wchar_t * start = pchar ;
// in safe mode we do not print the rest html code
if ( safe_mode )
if ( safe_mode | | skip_all_tags )
return false ;
while ( * pchar )
@ -1474,7 +1524,7 @@ void HTMLFilter::ReadLoop()
{
if ( stack_len > 1 )
{
pstack [ stack_len - 2 ] . new_line = LastItem ( ) . new_line ;
//pstack[stack_len-2].new_line = LastItem().new_line;
}
else
if ( trim_white )
@ -1492,6 +1542,10 @@ void HTMLFilter::ReadLoop()
{
CheckClosingTags ( ) ;
}
else
{
PopStack ( ) ;
}
ReadNormalText ( ) ;
}