add HTMLParser::parse_xml(...) methods

This commit is contained in:
Tomasz Sowa 2023-07-04 22:58:43 +02:00
parent cbaf57bec3
commit 7e92b5d9d7
Signed by: tomasz.sowa
GPG Key ID: 662CC1438638588B
4 changed files with 105 additions and 10 deletions

View File

@ -5,7 +5,7 @@
*/ */
/* /*
* Copyright (c) 2008-2022, Tomasz Sowa * Copyright (c) 2008-2023, Tomasz Sowa
* All rights reserved. * All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
@ -110,18 +110,25 @@ void HTMLParser::set_item_parsed_listener(ItemParsedListener * listener)
} }
void HTMLParser::prepare_to_parse_xml(Space & out_space, bool compact_mode, bool clear_space)
HTMLParser::Status HTMLParser::parse_xml_file(const char * file_name, Space & out_space, bool compact_mode, bool clear_space)
{ {
clear_input_flags(); clear_input_flags();
parsing_html = false; parsing_html = false;
reading_from_file = true;
xml_compact_mode = compact_mode;
this->out_space = &out_space; this->out_space = &out_space;
xml_compact_mode = compact_mode;
if( clear_space ) if( clear_space )
{
this->out_space->clear(); this->out_space->clear();
}
}
HTMLParser::Status HTMLParser::parse_xml_file(const char * file_name, Space & out_space, bool compact_mode, bool clear_space)
{
prepare_to_parse_xml(out_space, compact_mode, clear_space);
reading_from_file = true;
file.clear(); file.clear();
file.open(file_name, std::ios_base::binary | std::ios_base::in); file.open(file_name, std::ios_base::binary | std::ios_base::in);
@ -164,6 +171,79 @@ HTMLParser::Status HTMLParser::parse_xml_file(const std::wstring & file_name, Sp
} }
HTMLParser::Status HTMLParser::parse_xml(const char * str, Space & out_space, bool compact_mode, bool clear_space)
{
prepare_to_parse_xml(out_space, compact_mode, clear_space);
pchar_ascii = str;
Init();
Read();
Uninit();
return status;
}
HTMLParser::Status HTMLParser::parse_xml(const std::string & str, Space & out_space, bool compact_mode, bool clear_space)
{
return parse_xml(str.c_str(), out_space, compact_mode, clear_space);
}
HTMLParser::Status HTMLParser::parse_xml(const wchar_t * str, Space & out_space, bool compact_mode, bool clear_space)
{
prepare_to_parse_xml(out_space, compact_mode, clear_space);
pchar_unicode = str;
Init();
Read();
Uninit();
return status;
}
HTMLParser::Status HTMLParser::parse_xml(const std::wstring & str, Space & out_space, bool compact_mode, bool clear_space)
{
return parse_xml(str.c_str(), out_space, compact_mode, clear_space);
}
HTMLParser::Status HTMLParser::parse_xml(const pt::TextStream & str, Space & out_space, bool compact_mode, bool clear_space)
{
prepare_to_parse_xml(out_space, compact_mode, clear_space);
pt::TextStream::const_iterator start = str.begin();
pt::TextStream::const_iterator end = str.end();
text_stream_iterator = &start;
text_stream_iterator_end = &end;
Init();
Read();
Uninit();
return status;
}
HTMLParser::Status HTMLParser::parse_xml(const pt::WTextStream & str, Space & out_space, bool compact_mode, bool clear_space)
{
prepare_to_parse_xml(out_space, compact_mode, clear_space);
pt::WTextStream::const_iterator start = str.begin();
pt::WTextStream::const_iterator end = str.end();
wtext_stream_iterator = &start;
wtext_stream_iterator_end = &end;
Init();
Read();
Uninit();
return status;
}
void HTMLParser::filter(const wchar_t * in, std::wstring & out, bool clear_out_string) void HTMLParser::filter(const wchar_t * in, std::wstring & out, bool clear_out_string)
{ {
@ -295,6 +375,12 @@ int HTMLParser::get_last_parsed_line()
} }
int HTMLParser::get_last_parsed_column()
{
return column;
}
void HTMLParser::SetSomeDefaults() void HTMLParser::SetSomeDefaults()
{ {
white_mode = WHITE_MODE_ORIGIN; white_mode = WHITE_MODE_ORIGIN;

View File

@ -5,7 +5,7 @@
*/ */
/* /*
* Copyright (c) 2008-2022, Tomasz Sowa * Copyright (c) 2008-2023, Tomasz Sowa
* All rights reserved. * All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
@ -192,6 +192,14 @@ public:
Status parse_xml_file(const wchar_t * file_name, Space & out_space, bool compact_mode = false, bool clear_space = true); Status parse_xml_file(const wchar_t * file_name, Space & out_space, bool compact_mode = false, bool clear_space = true);
Status parse_xml_file(const std::wstring & file_name, Space & out_space, bool compact_mode = false, bool clear_space = true); Status parse_xml_file(const std::wstring & file_name, Space & out_space, bool compact_mode = false, bool clear_space = true);
Status parse_xml(const char * str, Space & out_space, bool compact_mode = false, bool clear_space = true);
Status parse_xml(const std::string & str, Space & out_space, bool compact_mode = false, bool clear_space = true);
Status parse_xml(const wchar_t * str, Space & out_space, bool compact_mode = false, bool clear_space = true);
Status parse_xml(const std::wstring & str, Space & out_space, bool compact_mode = false, bool clear_space = true);
Status parse_xml(const pt::TextStream & str, Space & out_space, bool compact_mode = false, bool clear_space = true);
Status parse_xml(const pt::WTextStream & str, Space & out_space, bool compact_mode = false, bool clear_space = true);
// main methods used for filtering // main methods used for filtering
@ -208,12 +216,12 @@ public:
/* /*
* *
* returns a number of a last parsed line * returns a number of a last parsed line/column
* can be used to obtain the line in which there was a syntax error * can be used to obtain the line in which there was a syntax error
* *
*/ */
int get_last_parsed_line(); int get_last_parsed_line();
int get_last_parsed_column();
@ -323,6 +331,8 @@ protected:
virtual void Init(); virtual void Init();
virtual void Uninit(); virtual void Uninit();
void prepare_to_parse_xml(Space & out_space, bool compact_mode, bool clear_space);
virtual bool IsOpeningTagMark(wchar_t c); virtual bool IsOpeningTagMark(wchar_t c);
virtual bool IsClosingTagMark(wchar_t c); virtual bool IsClosingTagMark(wchar_t c);
virtual bool IsClosingTagIndicator(wchar_t c); virtual bool IsClosingTagIndicator(wchar_t c);

View File

@ -163,7 +163,7 @@ template<typename StreamType>
bool utf8_to_wide(const std::string & utf8, StreamType & res, bool clear = true, int mode = 1); // need to be tested bool utf8_to_wide(const std::string & utf8, StreamType & res, bool clear = true, int mode = 1); // need to be tested
template<typename StreamType> template<typename StreamType>
bool utf8_to_wide(std::istream & utf8, StreamType & res, bool clear = true, int mode = 1); // need to be tested bool utf8_to_wide(std::istream & utf8, StreamType & res, bool clear = true, int mode = 1);
template<typename StreamOrStringType> template<typename StreamOrStringType>
bool utf8_to_wide(const Stream & stream, StreamOrStringType & res, bool clear = true, int mode = 1); bool utf8_to_wide(const Stream & stream, StreamOrStringType & res, bool clear = true, int mode = 1);

View File

@ -155,7 +155,6 @@ bool utf8_to_wide(const std::string & utf8, StreamType & res, bool clear, int mo
// need to be tested
template<typename StreamType> template<typename StreamType>
bool utf8_to_wide(std::istream & utf8, StreamType & res, bool clear, int mode) bool utf8_to_wide(std::istream & utf8, StreamType & res, bool clear, int mode)
{ {