From 7e92b5d9d77b256a278071e654b04ae5a61db0ea Mon Sep 17 00:00:00 2001 From: Tomasz Sowa Date: Tue, 4 Jul 2023 22:58:43 +0200 Subject: [PATCH] add HTMLParser::parse_xml(...) methods --- src/html/htmlparser.cpp | 96 +++++++++++++++++++++++++++++++++++++-- src/html/htmlparser.h | 16 +++++-- src/utf8/utf8.h | 2 +- src/utf8/utf8_templates.h | 1 - 4 files changed, 105 insertions(+), 10 deletions(-) diff --git a/src/html/htmlparser.cpp b/src/html/htmlparser.cpp index 7d7ff36..0efeee0 100644 --- a/src/html/htmlparser.cpp +++ b/src/html/htmlparser.cpp @@ -5,7 +5,7 @@ */ /* - * Copyright (c) 2008-2022, Tomasz Sowa + * Copyright (c) 2008-2023, Tomasz Sowa * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -110,18 +110,25 @@ void HTMLParser::set_item_parsed_listener(ItemParsedListener * listener) } - -HTMLParser::Status HTMLParser::parse_xml_file(const char * file_name, Space & out_space, bool compact_mode, bool clear_space) +void HTMLParser::prepare_to_parse_xml(Space & out_space, bool compact_mode, bool clear_space) { clear_input_flags(); parsing_html = false; - reading_from_file = true; - xml_compact_mode = compact_mode; this->out_space = &out_space; + xml_compact_mode = compact_mode; if( clear_space ) + { this->out_space->clear(); + } +} + + +HTMLParser::Status HTMLParser::parse_xml_file(const char * file_name, Space & out_space, bool compact_mode, bool clear_space) +{ + prepare_to_parse_xml(out_space, compact_mode, clear_space); + reading_from_file = true; file.clear(); file.open(file_name, std::ios_base::binary | std::ios_base::in); @@ -164,6 +171,79 @@ HTMLParser::Status HTMLParser::parse_xml_file(const std::wstring & file_name, Sp } +HTMLParser::Status HTMLParser::parse_xml(const char * str, Space & out_space, bool compact_mode, bool clear_space) +{ + prepare_to_parse_xml(out_space, compact_mode, clear_space); + pchar_ascii = str; + + Init(); + Read(); + Uninit(); + + return status; +} + + +HTMLParser::Status HTMLParser::parse_xml(const std::string & str, Space & out_space, bool compact_mode, bool clear_space) +{ + return parse_xml(str.c_str(), out_space, compact_mode, clear_space); +} + + +HTMLParser::Status HTMLParser::parse_xml(const wchar_t * str, Space & out_space, bool compact_mode, bool clear_space) +{ + prepare_to_parse_xml(out_space, compact_mode, clear_space); + pchar_unicode = str; + + Init(); + Read(); + Uninit(); + + return status; +} + + +HTMLParser::Status HTMLParser::parse_xml(const std::wstring & str, Space & out_space, bool compact_mode, bool clear_space) +{ + return parse_xml(str.c_str(), out_space, compact_mode, clear_space); +} + + +HTMLParser::Status HTMLParser::parse_xml(const pt::TextStream & str, Space & out_space, bool compact_mode, bool clear_space) +{ + prepare_to_parse_xml(out_space, compact_mode, clear_space); + pt::TextStream::const_iterator start = str.begin(); + pt::TextStream::const_iterator end = str.end(); + + text_stream_iterator = &start; + text_stream_iterator_end = &end; + + Init(); + Read(); + Uninit(); + + return status; +} + + +HTMLParser::Status HTMLParser::parse_xml(const pt::WTextStream & str, Space & out_space, bool compact_mode, bool clear_space) +{ + prepare_to_parse_xml(out_space, compact_mode, clear_space); + pt::WTextStream::const_iterator start = str.begin(); + pt::WTextStream::const_iterator end = str.end(); + + wtext_stream_iterator = &start; + wtext_stream_iterator_end = &end; + + Init(); + Read(); + Uninit(); + + return status; +} + + + void HTMLParser::filter(const wchar_t * in, std::wstring & out, bool clear_out_string) { @@ -295,6 +375,12 @@ int HTMLParser::get_last_parsed_line() } +int HTMLParser::get_last_parsed_column() +{ + return column; +} + + void HTMLParser::SetSomeDefaults() { white_mode = WHITE_MODE_ORIGIN; diff --git a/src/html/htmlparser.h b/src/html/htmlparser.h index 502c245..cdbff50 100644 --- a/src/html/htmlparser.h +++ b/src/html/htmlparser.h @@ -5,7 +5,7 @@ */ /* - * Copyright (c) 2008-2022, Tomasz Sowa + * Copyright (c) 2008-2023, Tomasz Sowa * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -192,6 +192,14 @@ public: Status parse_xml_file(const wchar_t * file_name, Space & out_space, bool compact_mode = false, bool clear_space = true); Status parse_xml_file(const std::wstring & file_name, Space & out_space, bool compact_mode = false, bool clear_space = true); + Status parse_xml(const char * str, Space & out_space, bool compact_mode = false, bool clear_space = true); + Status parse_xml(const std::string & str, Space & out_space, bool compact_mode = false, bool clear_space = true); + + Status parse_xml(const wchar_t * str, Space & out_space, bool compact_mode = false, bool clear_space = true); + Status parse_xml(const std::wstring & str, Space & out_space, bool compact_mode = false, bool clear_space = true); + + Status parse_xml(const pt::TextStream & str, Space & out_space, bool compact_mode = false, bool clear_space = true); + Status parse_xml(const pt::WTextStream & str, Space & out_space, bool compact_mode = false, bool clear_space = true); // main methods used for filtering @@ -208,12 +216,12 @@ public: /* * - * returns a number of a last parsed line + * returns a number of a last parsed line/column * can be used to obtain the line in which there was a syntax error * */ int get_last_parsed_line(); - + int get_last_parsed_column(); @@ -323,6 +331,8 @@ protected: virtual void Init(); virtual void Uninit(); + void prepare_to_parse_xml(Space & out_space, bool compact_mode, bool clear_space); + virtual bool IsOpeningTagMark(wchar_t c); virtual bool IsClosingTagMark(wchar_t c); virtual bool IsClosingTagIndicator(wchar_t c); diff --git a/src/utf8/utf8.h b/src/utf8/utf8.h index 19c0cb6..1a479cc 100644 --- a/src/utf8/utf8.h +++ b/src/utf8/utf8.h @@ -163,7 +163,7 @@ template bool utf8_to_wide(const std::string & utf8, StreamType & res, bool clear = true, int mode = 1); // need to be tested template -bool utf8_to_wide(std::istream & utf8, StreamType & res, bool clear = true, int mode = 1); // need to be tested +bool utf8_to_wide(std::istream & utf8, StreamType & res, bool clear = true, int mode = 1); template bool utf8_to_wide(const Stream & stream, StreamOrStringType & res, bool clear = true, int mode = 1); diff --git a/src/utf8/utf8_templates.h b/src/utf8/utf8_templates.h index b70eef3..15d8383 100644 --- a/src/utf8/utf8_templates.h +++ b/src/utf8/utf8_templates.h @@ -155,7 +155,6 @@ bool utf8_to_wide(const std::string & utf8, StreamType & res, bool clear, int mo -// need to be tested template bool utf8_to_wide(std::istream & utf8, StreamType & res, bool clear, int mode) {