add HTMLParser::parse_xml(...) methods

This commit is contained in:
Tomasz Sowa 2023-07-04 22:58:43 +02:00
parent cbaf57bec3
commit 7e92b5d9d7
Signed by: tomasz.sowa
GPG Key ID: 662CC1438638588B
4 changed files with 105 additions and 10 deletions

View File

@ -5,7 +5,7 @@
*/
/*
* Copyright (c) 2008-2022, Tomasz Sowa
* Copyright (c) 2008-2023, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -110,18 +110,25 @@ void HTMLParser::set_item_parsed_listener(ItemParsedListener * listener)
}
HTMLParser::Status HTMLParser::parse_xml_file(const char * file_name, Space & out_space, bool compact_mode, bool clear_space)
void HTMLParser::prepare_to_parse_xml(Space & out_space, bool compact_mode, bool clear_space)
{
clear_input_flags();
parsing_html = false;
reading_from_file = true;
xml_compact_mode = compact_mode;
this->out_space = &out_space;
xml_compact_mode = compact_mode;
if( clear_space )
{
this->out_space->clear();
}
}
HTMLParser::Status HTMLParser::parse_xml_file(const char * file_name, Space & out_space, bool compact_mode, bool clear_space)
{
prepare_to_parse_xml(out_space, compact_mode, clear_space);
reading_from_file = true;
file.clear();
file.open(file_name, std::ios_base::binary | std::ios_base::in);
@ -164,6 +171,79 @@ HTMLParser::Status HTMLParser::parse_xml_file(const std::wstring & file_name, Sp
}
HTMLParser::Status HTMLParser::parse_xml(const char * str, Space & out_space, bool compact_mode, bool clear_space)
{
prepare_to_parse_xml(out_space, compact_mode, clear_space);
pchar_ascii = str;
Init();
Read();
Uninit();
return status;
}
HTMLParser::Status HTMLParser::parse_xml(const std::string & str, Space & out_space, bool compact_mode, bool clear_space)
{
return parse_xml(str.c_str(), out_space, compact_mode, clear_space);
}
HTMLParser::Status HTMLParser::parse_xml(const wchar_t * str, Space & out_space, bool compact_mode, bool clear_space)
{
prepare_to_parse_xml(out_space, compact_mode, clear_space);
pchar_unicode = str;
Init();
Read();
Uninit();
return status;
}
HTMLParser::Status HTMLParser::parse_xml(const std::wstring & str, Space & out_space, bool compact_mode, bool clear_space)
{
return parse_xml(str.c_str(), out_space, compact_mode, clear_space);
}
HTMLParser::Status HTMLParser::parse_xml(const pt::TextStream & str, Space & out_space, bool compact_mode, bool clear_space)
{
prepare_to_parse_xml(out_space, compact_mode, clear_space);
pt::TextStream::const_iterator start = str.begin();
pt::TextStream::const_iterator end = str.end();
text_stream_iterator = &start;
text_stream_iterator_end = &end;
Init();
Read();
Uninit();
return status;
}
HTMLParser::Status HTMLParser::parse_xml(const pt::WTextStream & str, Space & out_space, bool compact_mode, bool clear_space)
{
prepare_to_parse_xml(out_space, compact_mode, clear_space);
pt::WTextStream::const_iterator start = str.begin();
pt::WTextStream::const_iterator end = str.end();
wtext_stream_iterator = &start;
wtext_stream_iterator_end = &end;
Init();
Read();
Uninit();
return status;
}
void HTMLParser::filter(const wchar_t * in, std::wstring & out, bool clear_out_string)
{
@ -295,6 +375,12 @@ int HTMLParser::get_last_parsed_line()
}
int HTMLParser::get_last_parsed_column()
{
return column;
}
void HTMLParser::SetSomeDefaults()
{
white_mode = WHITE_MODE_ORIGIN;

View File

@ -5,7 +5,7 @@
*/
/*
* Copyright (c) 2008-2022, Tomasz Sowa
* Copyright (c) 2008-2023, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -192,6 +192,14 @@ public:
Status parse_xml_file(const wchar_t * file_name, Space & out_space, bool compact_mode = false, bool clear_space = true);
Status parse_xml_file(const std::wstring & file_name, Space & out_space, bool compact_mode = false, bool clear_space = true);
Status parse_xml(const char * str, Space & out_space, bool compact_mode = false, bool clear_space = true);
Status parse_xml(const std::string & str, Space & out_space, bool compact_mode = false, bool clear_space = true);
Status parse_xml(const wchar_t * str, Space & out_space, bool compact_mode = false, bool clear_space = true);
Status parse_xml(const std::wstring & str, Space & out_space, bool compact_mode = false, bool clear_space = true);
Status parse_xml(const pt::TextStream & str, Space & out_space, bool compact_mode = false, bool clear_space = true);
Status parse_xml(const pt::WTextStream & str, Space & out_space, bool compact_mode = false, bool clear_space = true);
// main methods used for filtering
@ -208,12 +216,12 @@ public:
/*
*
* returns a number of a last parsed line
* returns a number of a last parsed line/column
* can be used to obtain the line in which there was a syntax error
*
*/
int get_last_parsed_line();
int get_last_parsed_column();
@ -323,6 +331,8 @@ protected:
virtual void Init();
virtual void Uninit();
void prepare_to_parse_xml(Space & out_space, bool compact_mode, bool clear_space);
virtual bool IsOpeningTagMark(wchar_t c);
virtual bool IsClosingTagMark(wchar_t c);
virtual bool IsClosingTagIndicator(wchar_t c);

View File

@ -163,7 +163,7 @@ template<typename StreamType>
bool utf8_to_wide(const std::string & utf8, StreamType & res, bool clear = true, int mode = 1); // need to be tested
template<typename StreamType>
bool utf8_to_wide(std::istream & utf8, StreamType & res, bool clear = true, int mode = 1); // need to be tested
bool utf8_to_wide(std::istream & utf8, StreamType & res, bool clear = true, int mode = 1);
template<typename StreamOrStringType>
bool utf8_to_wide(const Stream & stream, StreamOrStringType & res, bool clear = true, int mode = 1);

View File

@ -155,7 +155,6 @@ bool utf8_to_wide(const std::string & utf8, StreamType & res, bool clear, int mo
// need to be tested
template<typename StreamType>
bool utf8_to_wide(std::istream & utf8, StreamType & res, bool clear, int mode)
{