diff --git a/src/html/htmlparser.cpp b/src/html/htmlparser.cpp
index 57f8d00..c5e37cf 100644
--- a/src/html/htmlparser.cpp
+++ b/src/html/htmlparser.cpp
@@ -5,7 +5,7 @@
*/
/*
- * Copyright (c) 2008-2021, Tomasz Sowa
+ * Copyright (c) 2008-2022, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -71,16 +71,17 @@ void HTMLParser::clear_input_flags()
void HTMLParser::Item::Clear()
{
name.clear();
- type = none;
- is_commentary = false;
- is_cdata = false;
- porphans = nullptr;
- new_line_before = false;
- new_line = false;
+ type = none;
+ is_commentary = false;
+ is_cdata = false;
+ porphans = nullptr;
+ new_line_before = false;
+ new_line_after = false;
new_line_in_the_middle = false;
- has_body_tag = false;
- tree_index = 0;
- space = nullptr;
+ white_char_before = false;
+ has_body_tag = false;
+ tree_index = 0;
+ space = nullptr;
}
@@ -1175,10 +1176,18 @@ bool HTMLParser::PutOpeningTag()
return false;
}
- if( current_white_char_mode() == WHITE_MODE_TREE && LastItem().new_line_before )
+ if( current_white_char_mode() == WHITE_MODE_TREE )
{
- Put(10);
- PutTabs(LastItem().tree_index);
+ if( LastItem().new_line_before )
+ {
+ Put(10);
+ PutTabs(LastItem().tree_index);
+ }
+ else
+ if( LastItem().white_char_before )
+ {
+ Put(' ');
+ }
}
PutOpeningTagMark();
@@ -1414,6 +1423,8 @@ void HTMLParser::ReadTextUntilClosingTag(bool put_closing_tag_as_well)
void HTMLParser::ReadText(bool is_cdata)
{
new_item_has_new_line_before = false;
+ new_item_has_white_char_before = false;
+
bool was_white_char = false;
bool was_new_line = false;
@@ -1424,7 +1435,7 @@ void HTMLParser::ReadText(bool is_cdata)
if( current_white_char_mode() == WHITE_MODE_TREE )
{
- if( LastItem().new_line || (wrap_line != 0 && LastItem().has_body_tag && line_len >= wrap_line) )
+ if( LastItem().new_line_after || (wrap_line != 0 && LastItem().has_body_tag && line_len >= wrap_line) )
{
allow_put_new_line = true;
}
@@ -1450,7 +1461,10 @@ void HTMLParser::ReadText(bool is_cdata)
was_closing_tag = PutNormalNonWhite(tmp_text, allow_put_new_line, allow_put_space, is_cdata);
if( lastc == -1 || was_closing_tag )
+ {
new_item_has_new_line_before = was_new_line;
+ new_item_has_white_char_before = was_white_char;
+ }
if( !tmp_text.empty() )
{
@@ -1486,7 +1500,7 @@ void HTMLParser::ReadText(bool is_cdata)
LastItem().new_line_in_the_middle = true;
if( !was_non_white_text )
- LastItem().new_line = true;
+ LastItem().new_line_after = true;
}
else
{
@@ -1758,6 +1772,7 @@ bool HTMLParser::ReadItem()
return false;
LastItem().new_line_before = new_item_has_new_line_before; // new_item_has_new_line_before is set by ReadText() method
+ LastItem().white_char_before = new_item_has_white_char_before; // new_item_has_white_char_before is set by ReadText() method
if( stack_len > 1 && pstack[stack_len-2].new_line_in_the_middle )
LastItem().tree_index += 1;
@@ -1971,7 +1986,7 @@ void HTMLParser::CheckStackPrintRest()
{
while( stack_len-- > 0 )
{
- if( stack_len==0 || pstack[stack_len-1].new_line )
+ if( stack_len==0 || pstack[stack_len-1].new_line_after )
{
if( current_white_char_mode() == WHITE_MODE_TREE )
{
@@ -2030,7 +2045,7 @@ void HTMLParser::CheckClosingTags()
if( !skip_tags && IsTagSafe(LastItem().name) && !IsNameEqual(no_filter_tag, LastItem().name) )
{
- if( pstack[z].new_line )
+ if( pstack[z].new_line_after )
{
if( current_white_char_mode() == WHITE_MODE_TREE )
{
diff --git a/src/html/htmlparser.h b/src/html/htmlparser.h
index 7797b51..fb63809 100644
--- a/src/html/htmlparser.h
+++ b/src/html/htmlparser.h
@@ -5,7 +5,7 @@
*/
/*
- * Copyright (c) 2008-2021, Tomasz Sowa
+ * Copyright (c) 2008-2022, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -134,14 +134,18 @@ public:
bool is_cdata;
+ // is a new line before this tag (or just a new line and some white characters)
bool new_line_before;
- // is there a new line after this tag
- bool new_line;
+ // is there a new line after this tag (or just some white characters and a new line)
+ bool new_line_after;
- // is there a new
+ // is there a new line in the middle after this tag and before the next tag
bool new_line_in_the_middle;
+ // is there a white char (but not new line) before this tag
+ bool white_char_before;
+
// current orphans table
// (will be propagated)
Orphans * porphans;
@@ -448,6 +452,7 @@ protected:
//bool last_new_line;
bool new_item_has_new_line_before;
+ bool new_item_has_white_char_before;
int white_mode;
bool is_first_item;
size_t wrap_line; // insert a new line character into long lines