diff --git a/src/html/htmlparser.cpp b/src/html/htmlparser.cpp
index 7d7ff36..0efeee0 100644
--- a/src/html/htmlparser.cpp
+++ b/src/html/htmlparser.cpp
@@ -5,7 +5,7 @@
*/
/*
- * Copyright (c) 2008-2022, Tomasz Sowa
+ * Copyright (c) 2008-2023, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -110,18 +110,25 @@ void HTMLParser::set_item_parsed_listener(ItemParsedListener * listener)
}
-
-HTMLParser::Status HTMLParser::parse_xml_file(const char * file_name, Space & out_space, bool compact_mode, bool clear_space)
+void HTMLParser::prepare_to_parse_xml(Space & out_space, bool compact_mode, bool clear_space)
{
clear_input_flags();
parsing_html = false;
- reading_from_file = true;
- xml_compact_mode = compact_mode;
this->out_space = &out_space;
+ xml_compact_mode = compact_mode;
if( clear_space )
+ {
this->out_space->clear();
+ }
+}
+
+
+HTMLParser::Status HTMLParser::parse_xml_file(const char * file_name, Space & out_space, bool compact_mode, bool clear_space)
+{
+ prepare_to_parse_xml(out_space, compact_mode, clear_space);
+ reading_from_file = true;
file.clear();
file.open(file_name, std::ios_base::binary | std::ios_base::in);
@@ -164,6 +171,79 @@ HTMLParser::Status HTMLParser::parse_xml_file(const std::wstring & file_name, Sp
}
+HTMLParser::Status HTMLParser::parse_xml(const char * str, Space & out_space, bool compact_mode, bool clear_space)
+{
+ prepare_to_parse_xml(out_space, compact_mode, clear_space);
+ pchar_ascii = str;
+
+ Init();
+ Read();
+ Uninit();
+
+ return status;
+}
+
+
+HTMLParser::Status HTMLParser::parse_xml(const std::string & str, Space & out_space, bool compact_mode, bool clear_space)
+{
+ return parse_xml(str.c_str(), out_space, compact_mode, clear_space);
+}
+
+
+HTMLParser::Status HTMLParser::parse_xml(const wchar_t * str, Space & out_space, bool compact_mode, bool clear_space)
+{
+ prepare_to_parse_xml(out_space, compact_mode, clear_space);
+ pchar_unicode = str;
+
+ Init();
+ Read();
+ Uninit();
+
+ return status;
+}
+
+
+HTMLParser::Status HTMLParser::parse_xml(const std::wstring & str, Space & out_space, bool compact_mode, bool clear_space)
+{
+ return parse_xml(str.c_str(), out_space, compact_mode, clear_space);
+}
+
+
+HTMLParser::Status HTMLParser::parse_xml(const pt::TextStream & str, Space & out_space, bool compact_mode, bool clear_space)
+{
+ prepare_to_parse_xml(out_space, compact_mode, clear_space);
+ pt::TextStream::const_iterator start = str.begin();
+ pt::TextStream::const_iterator end = str.end();
+
+ text_stream_iterator = &start;
+ text_stream_iterator_end = &end;
+
+ Init();
+ Read();
+ Uninit();
+
+ return status;
+}
+
+
+HTMLParser::Status HTMLParser::parse_xml(const pt::WTextStream & str, Space & out_space, bool compact_mode, bool clear_space)
+{
+ prepare_to_parse_xml(out_space, compact_mode, clear_space);
+ pt::WTextStream::const_iterator start = str.begin();
+ pt::WTextStream::const_iterator end = str.end();
+
+ wtext_stream_iterator = &start;
+ wtext_stream_iterator_end = &end;
+
+ Init();
+ Read();
+ Uninit();
+
+ return status;
+}
+
+
+
void HTMLParser::filter(const wchar_t * in, std::wstring & out, bool clear_out_string)
{
@@ -295,6 +375,12 @@ int HTMLParser::get_last_parsed_line()
}
+int HTMLParser::get_last_parsed_column()
+{
+ return column;
+}
+
+
void HTMLParser::SetSomeDefaults()
{
white_mode = WHITE_MODE_ORIGIN;
diff --git a/src/html/htmlparser.h b/src/html/htmlparser.h
index 502c245..cdbff50 100644
--- a/src/html/htmlparser.h
+++ b/src/html/htmlparser.h
@@ -5,7 +5,7 @@
*/
/*
- * Copyright (c) 2008-2022, Tomasz Sowa
+ * Copyright (c) 2008-2023, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -192,6 +192,14 @@ public:
Status parse_xml_file(const wchar_t * file_name, Space & out_space, bool compact_mode = false, bool clear_space = true);
Status parse_xml_file(const std::wstring & file_name, Space & out_space, bool compact_mode = false, bool clear_space = true);
+ Status parse_xml(const char * str, Space & out_space, bool compact_mode = false, bool clear_space = true);
+ Status parse_xml(const std::string & str, Space & out_space, bool compact_mode = false, bool clear_space = true);
+
+ Status parse_xml(const wchar_t * str, Space & out_space, bool compact_mode = false, bool clear_space = true);
+ Status parse_xml(const std::wstring & str, Space & out_space, bool compact_mode = false, bool clear_space = true);
+
+ Status parse_xml(const pt::TextStream & str, Space & out_space, bool compact_mode = false, bool clear_space = true);
+ Status parse_xml(const pt::WTextStream & str, Space & out_space, bool compact_mode = false, bool clear_space = true);
// main methods used for filtering
@@ -208,12 +216,12 @@ public:
/*
*
- * returns a number of a last parsed line
+ * returns a number of a last parsed line/column
* can be used to obtain the line in which there was a syntax error
*
*/
int get_last_parsed_line();
-
+ int get_last_parsed_column();
@@ -323,6 +331,8 @@ protected:
virtual void Init();
virtual void Uninit();
+ void prepare_to_parse_xml(Space & out_space, bool compact_mode, bool clear_space);
+
virtual bool IsOpeningTagMark(wchar_t c);
virtual bool IsClosingTagMark(wchar_t c);
virtual bool IsClosingTagIndicator(wchar_t c);
diff --git a/src/utf8/utf8.h b/src/utf8/utf8.h
index 19c0cb6..1a479cc 100644
--- a/src/utf8/utf8.h
+++ b/src/utf8/utf8.h
@@ -163,7 +163,7 @@ template
bool utf8_to_wide(const std::string & utf8, StreamType & res, bool clear = true, int mode = 1); // need to be tested
template
-bool utf8_to_wide(std::istream & utf8, StreamType & res, bool clear = true, int mode = 1); // need to be tested
+bool utf8_to_wide(std::istream & utf8, StreamType & res, bool clear = true, int mode = 1);
template
bool utf8_to_wide(const Stream & stream, StreamOrStringType & res, bool clear = true, int mode = 1);
diff --git a/src/utf8/utf8_templates.h b/src/utf8/utf8_templates.h
index b70eef3..15d8383 100644
--- a/src/utf8/utf8_templates.h
+++ b/src/utf8/utf8_templates.h
@@ -155,7 +155,6 @@ bool utf8_to_wide(const std::string & utf8, StreamType & res, bool clear, int mo
-// need to be tested
template
bool utf8_to_wide(std::istream & utf8, StreamType & res, bool clear, int mode)
{