add HTMLParser::parse_xml(...) methods
This commit is contained in:
parent
cbaf57bec3
commit
7e92b5d9d7
|
@ -5,7 +5,7 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2008-2022, Tomasz Sowa
|
* Copyright (c) 2008-2023, Tomasz Sowa
|
||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -110,18 +110,25 @@ void HTMLParser::set_item_parsed_listener(ItemParsedListener * listener)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void HTMLParser::prepare_to_parse_xml(Space & out_space, bool compact_mode, bool clear_space)
|
||||||
HTMLParser::Status HTMLParser::parse_xml_file(const char * file_name, Space & out_space, bool compact_mode, bool clear_space)
|
|
||||||
{
|
{
|
||||||
clear_input_flags();
|
clear_input_flags();
|
||||||
|
|
||||||
parsing_html = false;
|
parsing_html = false;
|
||||||
reading_from_file = true;
|
|
||||||
xml_compact_mode = compact_mode;
|
|
||||||
this->out_space = &out_space;
|
this->out_space = &out_space;
|
||||||
|
xml_compact_mode = compact_mode;
|
||||||
|
|
||||||
if( clear_space )
|
if( clear_space )
|
||||||
|
{
|
||||||
this->out_space->clear();
|
this->out_space->clear();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
HTMLParser::Status HTMLParser::parse_xml_file(const char * file_name, Space & out_space, bool compact_mode, bool clear_space)
|
||||||
|
{
|
||||||
|
prepare_to_parse_xml(out_space, compact_mode, clear_space);
|
||||||
|
reading_from_file = true;
|
||||||
|
|
||||||
file.clear();
|
file.clear();
|
||||||
file.open(file_name, std::ios_base::binary | std::ios_base::in);
|
file.open(file_name, std::ios_base::binary | std::ios_base::in);
|
||||||
|
@ -164,6 +171,79 @@ HTMLParser::Status HTMLParser::parse_xml_file(const std::wstring & file_name, Sp
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
HTMLParser::Status HTMLParser::parse_xml(const char * str, Space & out_space, bool compact_mode, bool clear_space)
|
||||||
|
{
|
||||||
|
prepare_to_parse_xml(out_space, compact_mode, clear_space);
|
||||||
|
pchar_ascii = str;
|
||||||
|
|
||||||
|
Init();
|
||||||
|
Read();
|
||||||
|
Uninit();
|
||||||
|
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
HTMLParser::Status HTMLParser::parse_xml(const std::string & str, Space & out_space, bool compact_mode, bool clear_space)
|
||||||
|
{
|
||||||
|
return parse_xml(str.c_str(), out_space, compact_mode, clear_space);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
HTMLParser::Status HTMLParser::parse_xml(const wchar_t * str, Space & out_space, bool compact_mode, bool clear_space)
|
||||||
|
{
|
||||||
|
prepare_to_parse_xml(out_space, compact_mode, clear_space);
|
||||||
|
pchar_unicode = str;
|
||||||
|
|
||||||
|
Init();
|
||||||
|
Read();
|
||||||
|
Uninit();
|
||||||
|
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
HTMLParser::Status HTMLParser::parse_xml(const std::wstring & str, Space & out_space, bool compact_mode, bool clear_space)
|
||||||
|
{
|
||||||
|
return parse_xml(str.c_str(), out_space, compact_mode, clear_space);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
HTMLParser::Status HTMLParser::parse_xml(const pt::TextStream & str, Space & out_space, bool compact_mode, bool clear_space)
|
||||||
|
{
|
||||||
|
prepare_to_parse_xml(out_space, compact_mode, clear_space);
|
||||||
|
pt::TextStream::const_iterator start = str.begin();
|
||||||
|
pt::TextStream::const_iterator end = str.end();
|
||||||
|
|
||||||
|
text_stream_iterator = &start;
|
||||||
|
text_stream_iterator_end = &end;
|
||||||
|
|
||||||
|
Init();
|
||||||
|
Read();
|
||||||
|
Uninit();
|
||||||
|
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
HTMLParser::Status HTMLParser::parse_xml(const pt::WTextStream & str, Space & out_space, bool compact_mode, bool clear_space)
|
||||||
|
{
|
||||||
|
prepare_to_parse_xml(out_space, compact_mode, clear_space);
|
||||||
|
pt::WTextStream::const_iterator start = str.begin();
|
||||||
|
pt::WTextStream::const_iterator end = str.end();
|
||||||
|
|
||||||
|
wtext_stream_iterator = &start;
|
||||||
|
wtext_stream_iterator_end = &end;
|
||||||
|
|
||||||
|
Init();
|
||||||
|
Read();
|
||||||
|
Uninit();
|
||||||
|
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
void HTMLParser::filter(const wchar_t * in, std::wstring & out, bool clear_out_string)
|
void HTMLParser::filter(const wchar_t * in, std::wstring & out, bool clear_out_string)
|
||||||
{
|
{
|
||||||
|
@ -295,6 +375,12 @@ int HTMLParser::get_last_parsed_line()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int HTMLParser::get_last_parsed_column()
|
||||||
|
{
|
||||||
|
return column;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void HTMLParser::SetSomeDefaults()
|
void HTMLParser::SetSomeDefaults()
|
||||||
{
|
{
|
||||||
white_mode = WHITE_MODE_ORIGIN;
|
white_mode = WHITE_MODE_ORIGIN;
|
||||||
|
|
|
@ -5,7 +5,7 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2008-2022, Tomasz Sowa
|
* Copyright (c) 2008-2023, Tomasz Sowa
|
||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -192,6 +192,14 @@ public:
|
||||||
Status parse_xml_file(const wchar_t * file_name, Space & out_space, bool compact_mode = false, bool clear_space = true);
|
Status parse_xml_file(const wchar_t * file_name, Space & out_space, bool compact_mode = false, bool clear_space = true);
|
||||||
Status parse_xml_file(const std::wstring & file_name, Space & out_space, bool compact_mode = false, bool clear_space = true);
|
Status parse_xml_file(const std::wstring & file_name, Space & out_space, bool compact_mode = false, bool clear_space = true);
|
||||||
|
|
||||||
|
Status parse_xml(const char * str, Space & out_space, bool compact_mode = false, bool clear_space = true);
|
||||||
|
Status parse_xml(const std::string & str, Space & out_space, bool compact_mode = false, bool clear_space = true);
|
||||||
|
|
||||||
|
Status parse_xml(const wchar_t * str, Space & out_space, bool compact_mode = false, bool clear_space = true);
|
||||||
|
Status parse_xml(const std::wstring & str, Space & out_space, bool compact_mode = false, bool clear_space = true);
|
||||||
|
|
||||||
|
Status parse_xml(const pt::TextStream & str, Space & out_space, bool compact_mode = false, bool clear_space = true);
|
||||||
|
Status parse_xml(const pt::WTextStream & str, Space & out_space, bool compact_mode = false, bool clear_space = true);
|
||||||
|
|
||||||
|
|
||||||
// main methods used for filtering
|
// main methods used for filtering
|
||||||
|
@ -208,12 +216,12 @@ public:
|
||||||
|
|
||||||
/*
|
/*
|
||||||
*
|
*
|
||||||
* returns a number of a last parsed line
|
* returns a number of a last parsed line/column
|
||||||
* can be used to obtain the line in which there was a syntax error
|
* can be used to obtain the line in which there was a syntax error
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
int get_last_parsed_line();
|
int get_last_parsed_line();
|
||||||
|
int get_last_parsed_column();
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -323,6 +331,8 @@ protected:
|
||||||
virtual void Init();
|
virtual void Init();
|
||||||
virtual void Uninit();
|
virtual void Uninit();
|
||||||
|
|
||||||
|
void prepare_to_parse_xml(Space & out_space, bool compact_mode, bool clear_space);
|
||||||
|
|
||||||
virtual bool IsOpeningTagMark(wchar_t c);
|
virtual bool IsOpeningTagMark(wchar_t c);
|
||||||
virtual bool IsClosingTagMark(wchar_t c);
|
virtual bool IsClosingTagMark(wchar_t c);
|
||||||
virtual bool IsClosingTagIndicator(wchar_t c);
|
virtual bool IsClosingTagIndicator(wchar_t c);
|
||||||
|
|
|
@ -163,7 +163,7 @@ template<typename StreamType>
|
||||||
bool utf8_to_wide(const std::string & utf8, StreamType & res, bool clear = true, int mode = 1); // need to be tested
|
bool utf8_to_wide(const std::string & utf8, StreamType & res, bool clear = true, int mode = 1); // need to be tested
|
||||||
|
|
||||||
template<typename StreamType>
|
template<typename StreamType>
|
||||||
bool utf8_to_wide(std::istream & utf8, StreamType & res, bool clear = true, int mode = 1); // need to be tested
|
bool utf8_to_wide(std::istream & utf8, StreamType & res, bool clear = true, int mode = 1);
|
||||||
|
|
||||||
template<typename StreamOrStringType>
|
template<typename StreamOrStringType>
|
||||||
bool utf8_to_wide(const Stream & stream, StreamOrStringType & res, bool clear = true, int mode = 1);
|
bool utf8_to_wide(const Stream & stream, StreamOrStringType & res, bool clear = true, int mode = 1);
|
||||||
|
|
|
@ -155,7 +155,6 @@ bool utf8_to_wide(const std::string & utf8, StreamType & res, bool clear, int mo
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// need to be tested
|
|
||||||
template<typename StreamType>
|
template<typename StreamType>
|
||||||
bool utf8_to_wide(std::istream & utf8, StreamType & res, bool clear, int mode)
|
bool utf8_to_wide(std::istream & utf8, StreamType & res, bool clear, int mode)
|
||||||
{
|
{
|
||||||
|
|
Loading…
Reference in New Issue