add limits when parsing a json/space format

while here:
- add column index error
- add parsing methods with pt::TextStream and pt::WTextStream arguments
This commit is contained in:
Tomasz Sowa 2022-05-30 01:01:14 +02:00
parent a40bab0445
commit 68fe25c8bf
4 changed files with 354 additions and 64 deletions

View File

@ -5,7 +5,7 @@
*/
/*
* Copyright (c) 2021, Tomasz Sowa
* Copyright (c) 2021-2022, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -52,6 +52,7 @@ BaseParser::BaseParser()
void BaseParser::clear_input_flags()
{
line = 0;
column = 0;
reading_from_file = false;
pchar_ascii = nullptr;
pchar_unicode = nullptr;
@ -69,6 +70,16 @@ void BaseParser::clear_input_flags()
}
void BaseParser::check_new_line()
{
if( lastc == '\n' )
{
++line;
column = 0;
}
}
int BaseParser::read_utf8_char()
{
int c;
@ -86,9 +97,7 @@ bool correct;
while( !correct );
lastc = c;
if( lastc == '\n' )
++line;
check_new_line();
return lastc;
}
@ -97,9 +106,7 @@ return lastc;
int BaseParser::read_ascii_char()
{
lastc = file.get();
if( lastc == '\n' )
++line;
check_new_line();
return lastc;
}
@ -112,8 +119,7 @@ int BaseParser::read_char_from_wchar_string()
else
lastc = *(pchar_unicode++);
if( lastc == '\n' )
++line;
check_new_line();
return lastc;
}
@ -136,8 +142,7 @@ bool correct;
if( correct )
lastc = c;
if( lastc == '\n' )
++line;
check_new_line();
return lastc;
}
@ -150,8 +155,7 @@ int BaseParser::read_char_from_ascii_string()
else
lastc = *(pchar_ascii++);
if( lastc == '\n' )
++line;
check_new_line();
return lastc;
}
@ -169,8 +173,7 @@ int BaseParser::read_char_from_wtext_stream()
lastc = -1;
}
if( lastc == '\n' )
++line;
check_new_line();
return lastc;
}
@ -192,8 +195,7 @@ int BaseParser::read_char_from_utf8_text_stream()
if( correct )
lastc = c;
if( lastc == '\n' )
++line;
check_new_line();
return lastc;
}
@ -211,8 +213,7 @@ int BaseParser::read_char_from_ascii_text_stream()
lastc = -1;
}
if( lastc == '\n' )
++line;
check_new_line();
return lastc;
}

View File

@ -5,7 +5,7 @@
*/
/*
* Copyright (c) 2021, Tomasz Sowa
* Copyright (c) 2021-2022, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -54,6 +54,7 @@ protected:
virtual void clear_input_flags();
virtual void check_new_line();
virtual int read_utf8_char();
virtual int read_ascii_char();
virtual int read_char_from_wchar_string();
@ -72,6 +73,11 @@ protected:
*/
int line;
/*
a number of a column in which there is a syntax_error
*/
int column;
/*
true if parse() method was called

View File

@ -5,7 +5,7 @@
*/
/*
* Copyright (c) 2012-2021, Tomasz Sowa
* Copyright (c) 2012-2022, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -55,6 +55,10 @@ SpaceParser::SpaceParser()
space_end = '}';
option_delimiter = ',';
input_as_utf8 = true;
object_items_limit = 0;
table_items_limit = 0;
all_items_limit = 0;
nested_levels_limit = 0;
}
@ -71,10 +75,73 @@ int SpaceParser::get_last_parsed_line()
}
int SpaceParser::get_last_parsed_column()
{
return column;
}
void SpaceParser::set_object_items_limit(size_t val)
{
this->object_items_limit = val;
}
void SpaceParser::set_table_items_limit(size_t val)
{
this->table_items_limit = val;
}
void SpaceParser::set_all_items_limit(size_t val)
{
this->all_items_limit = val;
}
void SpaceParser::set_nested_level_limit(size_t val)
{
this->nested_levels_limit = val;
}
size_t SpaceParser::get_object_items_limit()
{
return object_items_limit;
}
size_t SpaceParser::get_table_items_limit()
{
return table_items_limit;
}
size_t SpaceParser::get_all_items_limit()
{
return all_items_limit;
}
size_t SpaceParser::get_nested_level_limit()
{
return nested_levels_limit;
}
void SpaceParser::prepare_to_parsing()
{
clear_input_flags();
current_items_counter = 0;
current_nested_level = 0;
}
SpaceParser::Status SpaceParser::parse_json_file(const char * file_name, Space & out_space, bool clear_space)
{
clear_input_flags();
prepare_to_parsing();
reading_from_file = true;
parsing_space = false;
@ -126,7 +193,7 @@ SpaceParser::Status SpaceParser::parse_json_file(const std::wstring & file_name,
SpaceParser::Status SpaceParser::parse_space_file(const char * file_name, Space & out_space, bool clear_space)
{
clear_input_flags();
prepare_to_parsing();
reading_from_file = true;
parsing_space = true;
@ -176,7 +243,7 @@ SpaceParser::Status SpaceParser::parse_space_file(const std::wstring & file_name
SpaceParser::Status SpaceParser::parse_json(const char * str, Space & out_space, bool clear_space)
{
clear_input_flags();
prepare_to_parsing();
pchar_ascii = str;
parsing_space = false;
@ -196,7 +263,7 @@ SpaceParser::Status SpaceParser::parse_json(const std::string & str, Space & out
SpaceParser::Status SpaceParser::parse_json(const wchar_t * str, Space & out_space, bool clear_space)
{
clear_input_flags();
prepare_to_parsing();
pchar_unicode = str;
parsing_space = false;
@ -215,11 +282,48 @@ SpaceParser::Status SpaceParser::parse_json(const std::wstring & str, Space & ou
SpaceParser::Status SpaceParser::parse_json(const pt::TextStream & str, Space & out_space, bool clear_space)
{
prepare_to_parsing();
pt::TextStream::const_iterator start = str.begin();
pt::TextStream::const_iterator end = str.end();
text_stream_iterator = &start;
text_stream_iterator_end = &end;
parsing_space = false;
root_space = &out_space;
parse_root_space(clear_space);
return status;
}
SpaceParser::Status SpaceParser::parse_json(const pt::WTextStream & str, Space & out_space, bool clear_space)
{
prepare_to_parsing();
pt::WTextStream::const_iterator start = str.begin();
pt::WTextStream::const_iterator end = str.end();
wtext_stream_iterator = &start;
wtext_stream_iterator_end = &end;
parsing_space = false;
root_space = &out_space;
parse_root_space(clear_space);
return status;
}
SpaceParser::Status SpaceParser::parse_space(const char * str, Space & out_space, bool clear_space)
{
clear_input_flags();
prepare_to_parsing();
pchar_ascii = str;
parsing_space = true;
@ -239,7 +343,7 @@ SpaceParser::Status SpaceParser::parse_space(const std::string & str, Space & ou
SpaceParser::Status SpaceParser::parse_space(const wchar_t * str, Space & out_space, bool clear_space)
{
clear_input_flags();
prepare_to_parsing();
pchar_unicode = str;
parsing_space = true;
@ -257,6 +361,41 @@ SpaceParser::Status SpaceParser::parse_space(const std::wstring & str, Space & o
}
SpaceParser::Status SpaceParser::parse_space(const pt::TextStream & str, Space & out_space, bool clear_space)
{
prepare_to_parsing();
pt::TextStream::const_iterator start = str.begin();
pt::TextStream::const_iterator end = str.end();
text_stream_iterator = &start;
text_stream_iterator_end = &end;
parsing_space = true;
root_space = &out_space;
parse_root_space(clear_space);
return status;
}
SpaceParser::Status SpaceParser::parse_space(const pt::WTextStream & str, Space & out_space, bool clear_space)
{
prepare_to_parsing();
pt::WTextStream::const_iterator start = str.begin();
pt::WTextStream::const_iterator end = str.end();
wtext_stream_iterator = &start;
wtext_stream_iterator_end = &end;
parsing_space = true;
root_space = &out_space;
parse_root_space(clear_space);
return status;
}
@ -287,10 +426,13 @@ void SpaceParser::parse_root_space(bool clear_root_space)
parse(root_space, false, false);
}
skip_white();
if( status == ok )
{
skip_white();
if( lastc != -1 )
status = syntax_error;
if( lastc != -1 )
status = syntax_error;
}
token.clear();
}
@ -362,32 +504,45 @@ void SpaceParser::parse(Space * space, bool is_object_value, bool is_table_value
void SpaceParser::parse_space(Space * space)
{
/*
* in Space format in global namespace the space start character is not required
*/
bool need_space_start_character = !parsing_space || space != root_space;
if( need_space_start_character )
if( nested_levels_limit == 0 || current_nested_level++ < nested_levels_limit )
{
read_char(); // inserting a next character after the space_start char to lastc
}
/*
* in Space format in global namespace the space start character is not required
*/
bool need_space_start_character = !parsing_space || space != root_space;
if( !space->is_object() )
space->set_empty_object();
parse_key_value_pairs(space);
if( need_space_start_character )
{
if( lastc == space_end )
if( need_space_start_character )
{
read_char();
read_char(); // inserting a next character after the space_start char to lastc
}
else
if( !space->is_object() )
space->set_empty_object();
parse_key_value_pairs(space);
if( status == ok )
{
status = syntax_error;
if( need_space_start_character )
{
if( lastc == space_end )
{
read_char();
}
else
{
status = syntax_error;
}
}
}
}
else
{
status = limit_nested_level_exceeded;
}
if( current_nested_level > 0 )
current_nested_level -= 1;
}
@ -463,18 +618,31 @@ void SpaceParser::parse_floating_point_value(Space * space)
void SpaceParser::parse_table(Space * space)
{
read_char(); // inserting a next character after the table_start char to lastc
space->set_empty_table();
parse_values_list(space);
if( lastc == table_end )
if( nested_levels_limit == 0 || current_nested_level++ < nested_levels_limit )
{
read_char();
read_char(); // inserting a next character after the table_start char to lastc
space->set_empty_table();
parse_values_list(space);
if( status == ok )
{
if( lastc == table_end )
{
read_char();
}
else
{
status = syntax_error;
}
}
}
else
{
status = syntax_error;
status = limit_nested_level_exceeded;
}
if( current_nested_level > 0 )
current_nested_level -= 1;
}
@ -524,8 +692,23 @@ void SpaceParser::parse_key_value_pairs(Space * space)
{
read_char(); // inserting a next character after the separator to lastc
Space & new_space = space->add(token.c_str(), new Space());
parse(&new_space, true, false);
if( object_items_limit == 0 || !space->is_object() || (space->object_size() < object_items_limit) )
{
Space & new_space = space->add(token.c_str(), new Space());
if( all_items_limit == 0 || current_items_counter++ < all_items_limit )
{
parse(&new_space, true, false);
}
else
{
status = limit_all_items_exceeded;
}
}
else
{
status = limit_object_items_exceeded;
}
}
else
{
@ -535,7 +718,11 @@ void SpaceParser::parse_key_value_pairs(Space * space)
}
is_first = false;
skip_white();
if( status == ok )
{
skip_white();
}
}
}
@ -576,12 +763,31 @@ void SpaceParser::parse_values_list(Space * space)
if( status == ok )
{
Space * new_space = &space->add(new Space());
parse(new_space, false, true);
if( table_items_limit == 0 || !space->is_table() || (space->table_size() < table_items_limit) )
{
Space * new_space = &space->add(new Space());
if( all_items_limit == 0 || current_items_counter++ < all_items_limit )
{
parse(new_space, false, true);
}
else
{
status = limit_all_items_exceeded;
}
}
else
{
status = limit_table_items_exceeded;
}
}
is_first = false;
skip_white();
if( status == ok )
{
skip_white();
}
}
}

View File

@ -5,7 +5,7 @@
*/
/*
* Copyright (c) 2012-2021, Tomasz Sowa
* Copyright (c) 2012-2022, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -63,8 +63,23 @@ public:
/*
status of parsing
ok - input stream has been parsed correctly
cant_open_file - I cannot open the file (returns only in a case when parsing a file)
syntax_error - a syntax error in the input stream
limit_object_items_exceeded - limit of object items has been exceeded
limit_table_items_exceeded - limit of table items has been exceeded
limit_all_items_exceeded - limit of items (key/value pairs of objects or table items) throughout the whole tree has been exceeded
limit_nested_level_exceeded - limit of nested objects/tables has been exceeded
*/
enum Status { ok, cant_open_file, syntax_error };
enum Status {
ok,
cant_open_file,
syntax_error,
limit_object_items_exceeded,
limit_table_items_exceeded,
limit_all_items_exceeded,
limit_nested_level_exceeded
};
/*
@ -109,6 +124,8 @@ public:
Status parse_json(const wchar_t * str, Space & out_space, bool clear_space = true);
Status parse_json(const std::wstring & str, Space & out_space, bool clear_space = true);
Status parse_json(const pt::TextStream & str, Space & out_space, bool clear_space = true);
Status parse_json(const pt::WTextStream & str, Space & out_space, bool clear_space = true);
Status parse_space(const char * str, Space & out_space, bool clear_space = true);
@ -116,6 +133,8 @@ public:
Status parse_space(const wchar_t * str, Space & out_space, bool clear_space = true);
Status parse_space(const std::wstring & str, Space & out_space, bool clear_space = true);
Status parse_space(const pt::TextStream & str, Space & out_space, bool clear_space = true);
Status parse_space(const pt::WTextStream & str, Space & out_space, bool clear_space = true);
/*
@ -144,6 +163,45 @@ public:
*
*/
int get_last_parsed_line();
int get_last_parsed_column();
/*
* get/set limit of object items in one object
* default: 0 (disabled)
*/
void set_object_items_limit(size_t val);
size_t get_object_items_limit();
/*
* get/set limit of items in one table
* default: 0 (disabled)
*
*/
void set_table_items_limit(size_t val);
size_t get_table_items_limit();
/*
* get/set limit of all items (objects items and table items) througout the whole tree
* default: 0 (disabled)
*
*/
void set_all_items_limit(size_t val);
size_t get_all_items_limit();
/*
* get/set nested level limit
* limit of nested objects and tables
* default: 0 (disabled)
*
*/
void set_nested_level_limit(size_t val);
size_t get_nested_level_limit();
private:
@ -211,6 +269,23 @@ private:
bool parsing_space;
/*
* object_items_limit - limit of key/value pairs of one object
* table_items_limit - limit of items of one table
* all_items_limit - limit of all items of all objects and all tables
* nested_levels_limit - limit of nested objects/tables
*/
size_t object_items_limit;
size_t table_items_limit;
size_t all_items_limit;
size_t nested_levels_limit;
/*
* current_items_counter - how many items (key/value pairs of objects or table items) throughout the whole tree
* current_nested_level - current nested level of objects and tables
*/
size_t current_items_counter;
size_t current_nested_level;
void parse_root_space(bool clear_root_space);
void parse(Space * space, bool is_object_value, bool is_table_value);
@ -252,6 +327,8 @@ private:
void read_unicode_floating_format();
void read_unicode_code_point();
void prepare_to_parsing();
};