diff --git a/src/html/htmlparser.cpp b/src/html/htmlparser.cpp index c5e37cf..f4b158e 100644 --- a/src/html/htmlparser.cpp +++ b/src/html/htmlparser.cpp @@ -63,6 +63,7 @@ void HTMLParser::clear_input_flags() char_was_escaped = false; escaped_chars_buffer.clear(); escaped_char_index = 0; + filter_mode = false; } @@ -173,6 +174,7 @@ void HTMLParser::filter(const wchar_t * in, std::wstring & out, bool clear_out_s pchar_unicode = in; out_string = &out; + filter_mode = true; if( clear_out_string ) out_string->clear(); @@ -209,8 +211,8 @@ void HTMLParser::filter(const WTextStream & in, Stream & out, bool clear_out_str wtext_stream_iterator = &begin; wtext_stream_iterator_end = &end; - out_stream = &out; + filter_mode = true; if( clear_out_stream ) out_stream->clear(); @@ -232,6 +234,7 @@ HTMLParser::Status HTMLParser::filter_file(const char * file_name, std::wstring file.open(file_name, std::ios_base::binary | std::ios_base::in); out_string = &out; + filter_mode = true; if( clear_out_stream ) out_string->clear(); @@ -2383,7 +2386,7 @@ int HTMLParser::read_char() { read_char_no_escape(); - if( lastc == '&' ) + if( !filter_mode && lastc == '&' ) { read_xml_entity(); diff --git a/src/html/htmlparser.h b/src/html/htmlparser.h index fb63809..15dd8db 100644 --- a/src/html/htmlparser.h +++ b/src/html/htmlparser.h @@ -310,6 +310,12 @@ protected: std::wstring escaped_chars_buffer; size_t escaped_char_index; + /* + * filter mode, a method filter(...) was called + * in filter mode we do not unescape xml sequences such as < > ... + */ + bool filter_mode; + void clear_input_flags();