From 21614a53093d6fb070e7a11b272d8ee8bad8935b Mon Sep 17 00:00:00 2001 From: Tomasz Sowa Date: Wed, 18 Oct 2023 18:33:52 +0200 Subject: [PATCH] fix: memory leak in the HTMLParser when a compact mode was used --- src/html/htmlparser.cpp | 76 +++++++++++++++++++++++++---------------- src/html/htmlparser.h | 2 +- 2 files changed, 47 insertions(+), 31 deletions(-) diff --git a/src/html/htmlparser.cpp b/src/html/htmlparser.cpp index 0efeee0..46b2ef8 100644 --- a/src/html/htmlparser.cpp +++ b/src/html/htmlparser.cpp @@ -1603,7 +1603,7 @@ void HTMLParser::ReadText(bool is_cdata) if( text_space_wstr && !text_space_wstr->empty() && was_non_white_text ) { - AddSpaceToSpaceTree(*text_space); + AddTextSpaceToSpaceTree(*text_space); } text_space_tmp.clear(); @@ -2195,26 +2195,34 @@ void HTMLParser::AddItemToSpace() if( xml_compact_mode ) { - Space * space = parent->get_space(pstack[stack_len-1].name); + bool has_parent_object_name = false; - if( space ) + if( parent->is_object() ) { - if( space->is_table() ) - { - Space & child = space->add_empty_space(); - pstack[stack_len-1].space = &child; - } - else - { - Space * tab = new Space(); - tab->add(space); - Space & child = tab->add_empty_space(); + Space::ObjectType::iterator i = parent->value.value_object.find(pstack[stack_len-1].name); - parent->value.value_object[pstack[stack_len-1].name] = tab; - pstack[stack_len-1].space = &child; + if( i != parent->value.value_object.end() ) + { + has_parent_object_name = true; + + if( i->second->is_table() ) + { + Space & child = i->second->add_empty_space(); + pstack[stack_len-1].space = &child; + } + else + { + Space * old_space = i->second; + i->second = new Space(); + i->second->set_empty_table(); + i->second->value.value_table.push_back(old_space); + Space & child = i->second->add_empty_space(); + pstack[stack_len-1].space = &child; + } } } - else + + if( !has_parent_object_name ) { Space & space = parent->add_empty_space(pstack[stack_len-1].name); pstack[stack_len-1].space = &space; @@ -2226,7 +2234,6 @@ void HTMLParser::AddItemToSpace() Space & child = childs_tab.add_empty_space(); pstack[stack_len-1].space = &child; } - } } @@ -2261,7 +2268,7 @@ void HTMLParser::RemoveLastSpace(size_t index) } } -void HTMLParser::AddSpaceToSpaceTree(const Space & space) +void HTMLParser::AddTextSpaceToSpaceTree(const Space & space) { const std::wstring * text = space.get_wstr(L"text"); @@ -2269,23 +2276,32 @@ void HTMLParser::AddSpaceToSpaceTree(const Space & space) { if( xml_compact_mode ) { - Space * child_text = LastItem().space->get_space(L"text"); + bool has_space_text = false; - if( child_text ) + if( LastItem().space->is_object() ) { - if( child_text->is_table() ) + Space::ObjectType::iterator i = LastItem().space->value.value_object.find(L"text"); + + if( i != LastItem().space->value.value_object.end() ) { - child_text->add(*text); - } - else - { - Space * tab = new Space(); - tab->add(*child_text); - tab->add(*text); - LastItem().space->value.value_object[L"text"] = tab; + has_space_text = true; + + if( i->second->is_table() ) + { + i->second->add(*text); + } + else + { + Space * old_space = i->second; + i->second = new Space(); + i->second->set_empty_table(); + i->second->value.value_table.push_back(old_space); + i->second->add(*text); + } } } - else + + if( !has_space_text ) { LastItem().space->add(L"text", *text); } diff --git a/src/html/htmlparser.h b/src/html/htmlparser.h index cdbff50..e5d5157 100644 --- a/src/html/htmlparser.h +++ b/src/html/htmlparser.h @@ -443,7 +443,7 @@ protected: void AddItemToSpace(); void RemoveLastSpace(size_t index); - void AddSpaceToSpaceTree(const Space & space); + void AddTextSpaceToSpaceTree(const Space & space); bool RemoveIfNeeded(size_t index);