From f02dd1093a1bac9b4a99465e256ce2e1504ff5ff Mon Sep 17 00:00:00 2001 From: Tomasz Sowa Date: Tue, 16 Apr 2024 09:35:47 +0200 Subject: [PATCH] fix(HtmlParser): correctly remove an item from the space struct when requested from a callback while here: - implement the removing algorithm for the compact_mode --- src/html/htmlparser.cpp | 54 +++++++++++++++++++++++++++++++---------- src/html/htmlparser.h | 2 +- src/space/space.cpp | 17 ++++++++++++- src/space/space.h | 3 ++- 4 files changed, 60 insertions(+), 16 deletions(-) diff --git a/src/html/htmlparser.cpp b/src/html/htmlparser.cpp index 46b2ef8..d08f09b 100644 --- a/src/html/htmlparser.cpp +++ b/src/html/htmlparser.cpp @@ -5,7 +5,7 @@ */ /* - * Copyright (c) 2008-2023, Tomasz Sowa + * Copyright (c) 2008-2024, Tomasz Sowa * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -2119,14 +2119,13 @@ void HTMLParser::CheckClosingTags() return; } - // CHECK ME - if( RemoveIfNeeded(stack_len - 2) ) - { - RemoveLastSpace(i); - } - for(int z=(int)stack_len-2 ; z >= i ; --z) { + if( RemoveIfNeeded(z) ) + { + RemoveLastSpace(z); + } + CheckWhiteCharsExceptions(pstack[z]); if( !skip_tags && IsTagSafe(LastItem().name) && !IsNameEqual(no_filter_tag, LastItem().name) ) @@ -2238,7 +2237,6 @@ void HTMLParser::AddItemToSpace() } - void HTMLParser::RemoveLastSpace(size_t index) { if( out_space ) @@ -2252,22 +2250,52 @@ void HTMLParser::RemoveLastSpace(size_t index) if( xml_compact_mode ) { - // IMPLEMENT ME + if( parent->is_object() ) + { + Space::ObjectType::iterator i = parent->value.value_object.find(pstack[index].name); + + if( i != parent->value.value_object.end() ) + { + if( i->second->is_table() ) + { + size_t len = i->second->table_size(); + + if( len > 0 && i->second->value.value_table[len-1] == pstack[index].space ) + { + i->second->remove(len - 1); + pstack[index].space = nullptr; + } + } + else + { + if( i->second == pstack[index].space ) + { + parent->remove(i); + pstack[index].space = nullptr; + } + } + } + } } else { Space * childs_tab = parent->get_space(L"childs"); - size_t len = childs_tab->table_size(); - if( childs_tab && childs_tab->is_table() && len > 0 && childs_tab->value.value_table[len-1] == pstack[stack_len-2].space ) + if( childs_tab && childs_tab->is_table() ) { - childs_tab->remove(len - 1); - pstack[stack_len-2].space = nullptr; + size_t len = childs_tab->table_size(); + + if( len > 0 && childs_tab->value.value_table[len-1] == pstack[index].space ) + { + childs_tab->remove(len - 1); + pstack[index].space = nullptr; + } } } } } + void HTMLParser::AddTextSpaceToSpaceTree(const Space & space) { const std::wstring * text = space.get_wstr(L"text"); diff --git a/src/html/htmlparser.h b/src/html/htmlparser.h index e5d5157..2f80e84 100644 --- a/src/html/htmlparser.h +++ b/src/html/htmlparser.h @@ -5,7 +5,7 @@ */ /* - * Copyright (c) 2008-2023, Tomasz Sowa + * Copyright (c) 2008-2024, Tomasz Sowa * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/src/space/space.cpp b/src/space/space.cpp index a9dddb8..8d7146d 100644 --- a/src/space/space.cpp +++ b/src/space/space.cpp @@ -5,7 +5,7 @@ */ /* - * Copyright (c) 2008-2023, Tomasz Sowa + * Copyright (c) 2008-2024, Tomasz Sowa * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -2223,6 +2223,21 @@ void Space::remove(const std::wstring & field) } +void Space::remove(ObjectType::iterator & iterator) +{ + if( type == type_object ) + { + if( iterator != value.value_object.end() ) + { + delete iterator->second; + iterator->second = nullptr; + + value.value_object.erase(iterator); + } + } +} + + void Space::remove(size_t table_index) { if( type == type_table && table_index < value.value_table.size() ) diff --git a/src/space/space.h b/src/space/space.h index de659a7..3964a09 100644 --- a/src/space/space.h +++ b/src/space/space.h @@ -5,7 +5,7 @@ */ /* - * Copyright (c) 2010-2023, Tomasz Sowa + * Copyright (c) 2010-2024, Tomasz Sowa * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -538,6 +538,7 @@ public: // remove a field from an object void remove(const wchar_t * field); void remove(const std::wstring & field); + void remove(ObjectType::iterator & iterator); // remove a table item void remove(size_t table_index);