- added some converting methods: esc_to_json(...), esc_to_xml(...), esc_to_csv() (convert/misc.h)
- BaseParser: added possibility to read from TextStream and WTextStream - HTMLParser: added filter(const WTextStream & in, Stream & out, ...) method - added utf8_stream.h with one method: template<typename StreamIteratorType> size_t utf8_to_int( StreamIteratorType & iterator_in, StreamIteratorType & iterator_end, int & res, bool & correct)
This commit is contained in:
@@ -48,6 +48,24 @@ const int HTMLParser::WHITE_MODE_TREE;
|
||||
|
||||
|
||||
|
||||
void HTMLParser::clear_input_flags()
|
||||
{
|
||||
BaseParser::clear_input_flags();
|
||||
|
||||
parsing_html = true;
|
||||
xml_compact_mode = true;
|
||||
status = ok;
|
||||
line = 1;
|
||||
stack_len = 0;
|
||||
out_string = nullptr;
|
||||
out_stream = nullptr;
|
||||
out_space = nullptr;
|
||||
line_len = 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
void HTMLParser::Item::Clear()
|
||||
{
|
||||
name.clear();
|
||||
@@ -71,21 +89,11 @@ HTMLParser::Item::Item()
|
||||
|
||||
void HTMLParser::parse_html(const wchar_t * in, Space & space, bool compact_mode)
|
||||
{
|
||||
parsing_html = true;
|
||||
reading_from_file = false;
|
||||
reading_from_wchar_string = true;
|
||||
pchar_unicode = in;
|
||||
pchar_ascii = 0;
|
||||
xml_compact_mode = compact_mode;
|
||||
clear_input_flags();
|
||||
|
||||
status = ok;
|
||||
line = 1;
|
||||
|
||||
stack_len = 0;
|
||||
out_string = nullptr;
|
||||
out_space = &space;
|
||||
//last_new_line = false;
|
||||
line_len = 0;
|
||||
pchar_unicode = in;
|
||||
xml_compact_mode = compact_mode;
|
||||
out_space = &space;
|
||||
out_space->clear();
|
||||
|
||||
Init();
|
||||
@@ -96,16 +104,11 @@ void HTMLParser::parse_html(const wchar_t * in, Space & space, bool compact_mode
|
||||
|
||||
HTMLParser::Status HTMLParser::parse_xml_file(const char * file_name, Space & out_space, bool compact_mode, bool clear_space)
|
||||
{
|
||||
clear_input_flags();
|
||||
|
||||
parsing_html = false;
|
||||
reading_from_file = true;
|
||||
xml_compact_mode = compact_mode;
|
||||
|
||||
status = ok;
|
||||
line = 1;
|
||||
stack_len = 0;
|
||||
out_string = nullptr;
|
||||
line_len = 0;
|
||||
|
||||
xml_compact_mode = compact_mode;
|
||||
this->out_space = &out_space;
|
||||
|
||||
if( clear_space )
|
||||
@@ -153,20 +156,15 @@ HTMLParser::Status HTMLParser::parse_xml_file(const std::wstring & file_name, Sp
|
||||
|
||||
|
||||
|
||||
void HTMLParser::Filter(const wchar_t * in, std::wstring & out)
|
||||
void HTMLParser::filter(const wchar_t * in, std::wstring & out, bool clear_out_string)
|
||||
{
|
||||
parsing_html = true;
|
||||
reading_from_file = false;
|
||||
reading_from_wchar_string = true;
|
||||
pchar_unicode = in;
|
||||
pchar_ascii = 0;
|
||||
clear_input_flags();
|
||||
|
||||
stack_len = 0;
|
||||
pchar_unicode = in;
|
||||
out_string = &out;
|
||||
out_space = nullptr;
|
||||
//last_new_line = false;
|
||||
line_len = 0;
|
||||
out_string->clear();
|
||||
|
||||
if( clear_out_string )
|
||||
out_string->clear();
|
||||
|
||||
Init();
|
||||
Read();
|
||||
@@ -174,7 +172,7 @@ void HTMLParser::Filter(const wchar_t * in, std::wstring & out)
|
||||
}
|
||||
|
||||
|
||||
void HTMLParser::Filter(const std::wstring & in, std::wstring & out)
|
||||
void HTMLParser::filter(const std::wstring & in, std::wstring & out, bool clear_out_string)
|
||||
{
|
||||
if( &in == &out )
|
||||
{
|
||||
@@ -187,27 +185,45 @@ void HTMLParser::Filter(const std::wstring & in, std::wstring & out)
|
||||
if( out.capacity() < out_projected_len )
|
||||
out.reserve(out_projected_len);
|
||||
|
||||
Filter(in.c_str(), out);
|
||||
filter(in.c_str(), out, clear_out_string);
|
||||
}
|
||||
|
||||
|
||||
|
||||
HTMLParser::Status HTMLParser::filter_file(const char * file_name, std::wstring & out)
|
||||
void HTMLParser::filter(const WTextStream & in, Stream & out, bool clear_out_stream)
|
||||
{
|
||||
parsing_html = true;
|
||||
clear_input_flags();
|
||||
|
||||
WTextStream::const_iterator begin = in.begin();
|
||||
WTextStream::const_iterator end = in.end();
|
||||
|
||||
wtext_stream_iterator = &begin;
|
||||
wtext_stream_iterator_end = &end;
|
||||
|
||||
out_stream = &out;
|
||||
|
||||
if( clear_out_stream )
|
||||
out_stream->clear();
|
||||
|
||||
Init();
|
||||
Read();
|
||||
Uninit();
|
||||
}
|
||||
|
||||
|
||||
HTMLParser::Status HTMLParser::filter_file(const char * file_name, std::wstring & out, bool clear_out_stream)
|
||||
{
|
||||
clear_input_flags();
|
||||
|
||||
reading_from_file = true;
|
||||
|
||||
// open the file before clearing 'out' string, 'out' string can be the same string as the file_name
|
||||
file.clear();
|
||||
file.open(file_name, std::ios_base::binary | std::ios_base::in);
|
||||
|
||||
status = ok;
|
||||
line = 1;
|
||||
stack_len = 0;
|
||||
out_string = &out;
|
||||
out_space = nullptr;
|
||||
line_len = 0;
|
||||
out_string->clear();
|
||||
out_string = &out;
|
||||
|
||||
if( clear_out_stream )
|
||||
out_string->clear();
|
||||
|
||||
if( file )
|
||||
{
|
||||
@@ -226,24 +242,24 @@ HTMLParser::Status HTMLParser::filter_file(const char * file_name, std::wstring
|
||||
}
|
||||
|
||||
|
||||
HTMLParser::Status HTMLParser::filter_file(const std::string & file_name, std::wstring & out)
|
||||
HTMLParser::Status HTMLParser::filter_file(const std::string & file_name, std::wstring & out, bool clear_out_stream)
|
||||
{
|
||||
return filter_file(file_name.c_str(), out);
|
||||
return filter_file(file_name.c_str(), out, clear_out_stream);
|
||||
}
|
||||
|
||||
|
||||
HTMLParser::Status HTMLParser::filter_file(const wchar_t * file_name, std::wstring & out)
|
||||
HTMLParser::Status HTMLParser::filter_file(const wchar_t * file_name, std::wstring & out, bool clear_out_stream)
|
||||
{
|
||||
std::string file_name_utf8;
|
||||
pt::wide_to_utf8(file_name, file_name_utf8);
|
||||
|
||||
return filter_file(file_name_utf8, out);
|
||||
return filter_file(file_name_utf8, out, clear_out_stream);
|
||||
}
|
||||
|
||||
|
||||
HTMLParser::Status HTMLParser::filter_file(const std::wstring & file_name, std::wstring & out)
|
||||
HTMLParser::Status HTMLParser::filter_file(const std::wstring & file_name, std::wstring & out, bool clear_out_stream)
|
||||
{
|
||||
return filter_file(file_name.c_str(), out);
|
||||
return filter_file(file_name.c_str(), out, clear_out_stream);
|
||||
}
|
||||
|
||||
|
||||
@@ -792,6 +808,9 @@ void HTMLParser::Put(wchar_t c)
|
||||
if( out_string )
|
||||
(*out_string) += c;
|
||||
|
||||
if( out_stream )
|
||||
(*out_stream) << c;
|
||||
|
||||
CheckChar(c);
|
||||
}
|
||||
|
||||
@@ -806,6 +825,9 @@ void HTMLParser::Put(const wchar_t * str, const wchar_t * end)
|
||||
if( out_string )
|
||||
out_string->append(str, len);
|
||||
|
||||
if( out_stream )
|
||||
out_stream->write(str, len);
|
||||
|
||||
for( ; str < end ; ++str)
|
||||
CheckChar(*str);
|
||||
}
|
||||
@@ -819,6 +841,9 @@ void HTMLParser::Put(const std::wstring & str)
|
||||
if( out_string )
|
||||
out_string->append(str);
|
||||
|
||||
if( out_stream )
|
||||
out_stream->write(str.c_str(), str.size());
|
||||
|
||||
for(size_t i=0 ; i < str.size() ; ++i)
|
||||
CheckChar(str[i]);
|
||||
}
|
||||
@@ -1130,6 +1155,9 @@ void HTMLParser::PutTabs(size_t len)
|
||||
{
|
||||
if( out_string )
|
||||
(*out_string) += ' '; // we do not add them to 'line_len'
|
||||
|
||||
if( out_stream )
|
||||
(*out_stream) << ' ';
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -44,6 +44,7 @@
|
||||
#include <algorithm>
|
||||
#include "convert/baseparser.h"
|
||||
#include "space/space.h"
|
||||
#include "textstream/stream.h"
|
||||
|
||||
|
||||
namespace pt
|
||||
@@ -130,14 +131,15 @@ public:
|
||||
|
||||
|
||||
// main methods used for filtering
|
||||
void Filter(const wchar_t * in, std::wstring & out);
|
||||
void Filter(const std::wstring & in, std::wstring & out);
|
||||
void filter(const wchar_t * in, std::wstring & out, bool clear_out_string = true);
|
||||
void filter(const std::wstring & in, std::wstring & out, bool clear_out_string = true);
|
||||
|
||||
void filter(const WTextStream & in, Stream & out, bool clear_out_stream = true);
|
||||
|
||||
HTMLParser::Status filter_file(const char * file_name, std::wstring & out);
|
||||
HTMLParser::Status filter_file(const std::string & file_name, std::wstring & out);
|
||||
HTMLParser::Status filter_file(const wchar_t * file_name, std::wstring & out);
|
||||
HTMLParser::Status filter_file(const std::wstring & file_name, std::wstring & out);
|
||||
HTMLParser::Status filter_file(const char * file_name, std::wstring & out, bool clear_out_stream = true);
|
||||
HTMLParser::Status filter_file(const std::string & file_name, std::wstring & out, bool clear_out_stream = true);
|
||||
HTMLParser::Status filter_file(const wchar_t * file_name, std::wstring & out, bool clear_out_stream = true);
|
||||
HTMLParser::Status filter_file(const std::wstring & file_name, std::wstring & out, bool clear_out_stream = true);
|
||||
|
||||
|
||||
/*
|
||||
@@ -278,7 +280,7 @@ protected:
|
||||
|
||||
|
||||
|
||||
|
||||
void clear_input_flags();
|
||||
|
||||
|
||||
/*
|
||||
@@ -403,6 +405,7 @@ protected:
|
||||
size_t stack_len; // length of the stack
|
||||
wchar_t * buffer; // buffer used when printing
|
||||
std::wstring * out_string;
|
||||
Stream * out_stream;
|
||||
Space * out_space;
|
||||
Space text_space_tmp;
|
||||
|
||||
|
Reference in New Issue
Block a user