added a compact_mode option when creating a space output

This commit is contained in:
Tomasz Sowa 2021-08-10 01:45:10 +02:00
parent b8a03bf852
commit b1cc64a29b
2 changed files with 71 additions and 10 deletions

View File

@ -69,13 +69,14 @@ HTMLParser::Item::Item()
} }
void HTMLParser::parse_html(const wchar_t * in, Space & space) void HTMLParser::parse_html(const wchar_t * in, Space & space, bool compact_mode)
{ {
parsing_html = true; parsing_html = true;
reading_from_file = false; reading_from_file = false;
reading_from_wchar_string = true; reading_from_wchar_string = true;
pchar_unicode = in; pchar_unicode = in;
pchar_ascii = 0; pchar_ascii = 0;
xml_compact_mode = compact_mode;
stack_len = 0; stack_len = 0;
out_string = nullptr; out_string = nullptr;
@ -1440,7 +1441,7 @@ void HTMLParser::ReadItemOpening()
AddItemToSpace(); AddItemToSpace();
Space * space = LastItem().space; Space * space = LastItem().space;
if( space ) if( !xml_compact_mode && space )
space->add(L"name", LastItem().name); space->add(L"name", LastItem().name);
if( PrintOpeningItem() ) if( PrintOpeningItem() )
@ -1837,27 +1838,85 @@ void HTMLParser::AddItemToSpace()
{ {
if( out_space && stack_len > 0 ) if( out_space && stack_len > 0 )
{ {
if( stack_len == 1 ) Space * parent = out_space;
if( stack_len > 1 )
{ {
pstack[stack_len-1].space = out_space; parent = pstack[stack_len-2].space;
}
if( xml_compact_mode )
{
Space * space = parent->get_space(pstack[stack_len-1].name);
if( space )
{
if( space->is_table() )
{
Space & child = space->add_empty_space();
pstack[stack_len-1].space = &child;
}
else
{
Space * tab = new Space();
tab->add(space);
Space & child = tab->add_empty_space();
parent->value.value_object[pstack[stack_len-1].name] = tab;
pstack[stack_len-1].space = &child;
}
}
else
{
Space & space = parent->add_empty_space(pstack[stack_len-1].name);
pstack[stack_len-1].space = &space;
}
} }
else else
{ {
// stack_len > 1 Space & childs_tab = parent->get_add_space(L"childs");
Space & childs_tab = pstack[stack_len-2].space->get_add_space(L"childs");
Space & child = childs_tab.add_empty_space(); Space & child = childs_tab.add_empty_space();
pstack[stack_len-1].space = &child; pstack[stack_len-1].space = &child;
} }
} }
} }
void HTMLParser::AddSpaceToSpaceTree(const Space & space) void HTMLParser::AddSpaceToSpaceTree(const Space & space)
{ {
if( out_space && stack_len > 0 ) const std::wstring * text = space.get_wstr(L"text");
if( out_space && stack_len > 0 && text )
{ {
Space & childs_tab = LastItem().space->get_add_space(L"childs"); if( xml_compact_mode )
childs_tab.add(space); {
Space * child_text = LastItem().space->get_space(L"text");
if( child_text )
{
if( child_text->is_table() )
{
child_text->add(*text);
}
else
{
Space * tab = new Space();
tab->add(*child_text);
tab->add(*text);
LastItem().space->value.value_object[L"text"] = tab;
}
}
else
{
LastItem().space->add(L"text", *text);
}
}
else
{
Space & childs_tab = LastItem().space->get_add_space(L"childs");
childs_tab.add(space);
}
} }
} }

View File

@ -107,7 +107,7 @@ public:
virtual ~HTMLParser(); virtual ~HTMLParser();
void parse_html(const wchar_t * in, Space & space); void parse_html(const wchar_t * in, Space & space, bool compact_mode = false);
// main methods used for filtering // main methods used for filtering
@ -182,6 +182,8 @@ protected:
bool parsing_html; bool parsing_html;
bool xml_compact_mode;
// orphans for one language // orphans for one language
struct Orphans struct Orphans
{ {