Browse Source

added a compact_mode option when creating a space output

htmlparserlistener
Tomasz Sowa 1 year ago
parent
commit
b1cc64a29b
  1. 77
      src/html/htmlparser.cpp
  2. 4
      src/html/htmlparser.h

77
src/html/htmlparser.cpp

@ -69,13 +69,14 @@ HTMLParser::Item::Item()
}
void HTMLParser::parse_html(const wchar_t * in, Space & space)
void HTMLParser::parse_html(const wchar_t * in, Space & space, bool compact_mode)
{
parsing_html = true;
reading_from_file = false;
reading_from_wchar_string = true;
pchar_unicode = in;
pchar_ascii = 0;
xml_compact_mode = compact_mode;
stack_len = 0;
out_string = nullptr;
@ -1440,7 +1441,7 @@ void HTMLParser::ReadItemOpening()
AddItemToSpace();
Space * space = LastItem().space;
if( space )
if( !xml_compact_mode && space )
space->add(L"name", LastItem().name);
if( PrintOpeningItem() )
@ -1837,27 +1838,85 @@ void HTMLParser::AddItemToSpace()
{
if( out_space && stack_len > 0 )
{
if( stack_len == 1 )
Space * parent = out_space;
if( stack_len > 1 )
{
parent = pstack[stack_len-2].space;
}
if( xml_compact_mode )
{
pstack[stack_len-1].space = out_space;
Space * space = parent->get_space(pstack[stack_len-1].name);
if( space )
{
if( space->is_table() )
{
Space & child = space->add_empty_space();
pstack[stack_len-1].space = &child;
}
else
{
Space * tab = new Space();
tab->add(space);
Space & child = tab->add_empty_space();
parent->value.value_object[pstack[stack_len-1].name] = tab;
pstack[stack_len-1].space = &child;
}
}
else
{
Space & space = parent->add_empty_space(pstack[stack_len-1].name);
pstack[stack_len-1].space = &space;
}
}
else
{
// stack_len > 1
Space & childs_tab = pstack[stack_len-2].space->get_add_space(L"childs");
Space & childs_tab = parent->get_add_space(L"childs");
Space & child = childs_tab.add_empty_space();
pstack[stack_len-1].space = &child;
}
}
}
void HTMLParser::AddSpaceToSpaceTree(const Space & space)
{
if( out_space && stack_len > 0 )
const std::wstring * text = space.get_wstr(L"text");
if( out_space && stack_len > 0 && text )
{
Space & childs_tab = LastItem().space->get_add_space(L"childs");
childs_tab.add(space);
if( xml_compact_mode )
{
Space * child_text = LastItem().space->get_space(L"text");
if( child_text )
{
if( child_text->is_table() )
{
child_text->add(*text);
}
else
{
Space * tab = new Space();
tab->add(*child_text);
tab->add(*text);
LastItem().space->value.value_object[L"text"] = tab;
}
}
else
{
LastItem().space->add(L"text", *text);
}
}
else
{
Space & childs_tab = LastItem().space->get_add_space(L"childs");
childs_tab.add(space);
}
}
}

4
src/html/htmlparser.h

@ -107,7 +107,7 @@ public:
virtual ~HTMLParser();
void parse_html(const wchar_t * in, Space & space);
void parse_html(const wchar_t * in, Space & space, bool compact_mode = false);
// main methods used for filtering
@ -182,6 +182,8 @@ protected:
bool parsing_html;
bool xml_compact_mode;
// orphans for one language
struct Orphans
{

Loading…
Cancel
Save