added to JSONToSpaceParser: possibility to parse the Space format

renamed: ParseFile() -> ParseJSONFile()
added: ParseSpaceFile(), ParseJSON(), ParseSpace()
This commit is contained in:
Tomasz Sowa 2021-03-17 18:24:50 +01:00
parent 31f7bdb857
commit 6e169f7650
2 changed files with 360 additions and 109 deletions

View File

@ -81,7 +81,6 @@ void JSONToSpaceParser::SetDefault()
skip_empty = false; skip_empty = false;
use_escape_char = true; use_escape_char = true;
input_as_utf8 = true; input_as_utf8 = true;
max_nested_level = 1000;
} }
@ -111,9 +110,10 @@ int JSONToSpaceParser::get_last_parsed_line()
JSONToSpaceParser::Status JSONToSpaceParser::ParseFile(const char * file_name) JSONToSpaceParser::Status JSONToSpaceParser::ParseJSONFile(const char * file_name)
{ {
reading_from_file = true; reading_from_file = true;
parsing_space = false;
file.clear(); file.clear();
file.open(file_name, std::ios_base::binary | std::ios_base::in); file.open(file_name, std::ios_base::binary | std::ios_base::in);
@ -133,35 +133,88 @@ return status;
JSONToSpaceParser::Status JSONToSpaceParser::ParseFile(const std::string & file_name) JSONToSpaceParser::Status JSONToSpaceParser::ParseJSONFile(const std::string & file_name)
{ {
return ParseFile(file_name.c_str()); return ParseJSONFile(file_name.c_str());
} }
JSONToSpaceParser::Status JSONToSpaceParser::ParseFile(const wchar_t * file_name) JSONToSpaceParser::Status JSONToSpaceParser::ParseJSONFile(const wchar_t * file_name)
{ {
PT::WideToUTF8(file_name, afile_name); std::string file_name_utf8;
return ParseFile(afile_name.c_str());
PT::WideToUTF8(file_name, file_name_utf8);
return ParseJSONFile(file_name_utf8.c_str());
} }
JSONToSpaceParser::Status JSONToSpaceParser::ParseFile(const std::wstring & file_name) JSONToSpaceParser::Status JSONToSpaceParser::ParseJSONFile(const std::wstring & file_name)
{ {
return ParseFile(file_name.c_str()); return ParseJSONFile(file_name.c_str());
} }
JSONToSpaceParser::Status JSONToSpaceParser::Parse(const char * str)
JSONToSpaceParser::Status JSONToSpaceParser::ParseSpaceFile(const char * file_name)
{
reading_from_file = true;
parsing_space = true;
file.clear();
file.open(file_name, std::ios_base::binary | std::ios_base::in);
if( file )
{
ParseRootSpace();
file.close();
}
else
{
status = cant_open_file;
}
return status;
}
JSONToSpaceParser::Status JSONToSpaceParser::ParseSpaceFile(const std::string & file_name)
{
return ParseSpaceFile(file_name.c_str());
}
JSONToSpaceParser::Status JSONToSpaceParser::ParseSpaceFile(const wchar_t * file_name)
{
std::string file_name_utf8;
PT::WideToUTF8(file_name, file_name_utf8);
return ParseSpaceFile(file_name_utf8.c_str());
}
JSONToSpaceParser::Status JSONToSpaceParser::ParseSpaceFile(const std::wstring & file_name)
{
return ParseSpaceFile(file_name.c_str());
}
JSONToSpaceParser::Status JSONToSpaceParser::ParseJSON(const char * str)
{ {
reading_from_file = false; reading_from_file = false;
reading_from_wchar_string = false; reading_from_wchar_string = false;
pchar_ascii = str; pchar_ascii = str;
pchar_unicode = 0; pchar_unicode = 0;
parsing_space = false;
ParseRootSpace(); ParseRootSpace();
@ -169,18 +222,19 @@ return status;
} }
JSONToSpaceParser::Status JSONToSpaceParser::Parse(const std::string & str) JSONToSpaceParser::Status JSONToSpaceParser::ParseJSON(const std::string & str)
{ {
return Parse(str.c_str()); return ParseJSON(str.c_str());
} }
JSONToSpaceParser::Status JSONToSpaceParser::Parse(const wchar_t * str) JSONToSpaceParser::Status JSONToSpaceParser::ParseJSON(const wchar_t * str)
{ {
reading_from_file = false; reading_from_file = false;
reading_from_wchar_string = true; reading_from_wchar_string = true;
pchar_unicode = str; pchar_unicode = str;
pchar_ascii = 0; pchar_ascii = 0;
parsing_space = false;
ParseRootSpace(); ParseRootSpace();
@ -188,19 +242,63 @@ return status;
} }
JSONToSpaceParser::Status JSONToSpaceParser::Parse(const std::wstring & str) JSONToSpaceParser::Status JSONToSpaceParser::ParseJSON(const std::wstring & str)
{ {
return Parse(str.c_str()); return ParseJSON(str.c_str());
} }
JSONToSpaceParser::Status JSONToSpaceParser::ParseSpace(const char * str)
{
reading_from_file = false;
reading_from_wchar_string = false;
pchar_ascii = str;
pchar_unicode = 0;
parsing_space = true;
ParseRootSpace();
return status;
}
JSONToSpaceParser::Status JSONToSpaceParser::ParseSpace(const std::string & str)
{
return ParseSpace(str.c_str());
}
JSONToSpaceParser::Status JSONToSpaceParser::ParseSpace(const wchar_t * str)
{
reading_from_file = false;
reading_from_wchar_string = true;
pchar_unicode = str;
pchar_ascii = 0;
parsing_space = true;
ParseRootSpace();
return status;
}
JSONToSpaceParser::Status JSONToSpaceParser::ParseSpace(const std::wstring & str)
{
return ParseSpace(str.c_str());
}
void JSONToSpaceParser::ParseRootSpace() void JSONToSpaceParser::ParseRootSpace()
{ {
line = 1; line = 1;
status = ok; status = ok;
skipped = 0; skipped = 0;
current_nested_level = 0;
parsing_space = false;
if( !root_space ) if( !root_space )
{ {
@ -209,12 +307,27 @@ void JSONToSpaceParser::ParseRootSpace()
} }
ReadChar(); // put first character to lastc ReadChar(); // put first character to lastc
Parse(root_space);
if( parsing_space )
{
separator = '=';
table_start = '(';
table_end = ')';
ParseSpace(root_space);
}
else
{
separator = ':';
table_start = '[';
table_end = ']';
Parse(root_space, false, false);
}
token.clear(); token.clear();
} }
void JSONToSpaceParser::Parse(Space * space) void JSONToSpaceParser::Parse(Space * space, bool is_object_value, bool is_table_value)
{ {
SkipWhite(); SkipWhite();
@ -234,7 +347,7 @@ void JSONToSpaceParser::Parse(Space * space)
} }
else else
{ {
ReadAlfaNumericToken(); ReadStringValue(token, is_object_value, is_table_value);
if( token == L"null" ) if( token == L"null" )
{ {
@ -262,7 +375,14 @@ void JSONToSpaceParser::Parse(Space * space)
} }
else else
{ {
status = syntax_error; if( parsing_space )
{
space->set(token);
}
else
{
status = syntax_error;
}
} }
} }
} }
@ -273,28 +393,30 @@ void JSONToSpaceParser::Parse(Space * space)
void JSONToSpaceParser::ParseSpace(Space * space) void JSONToSpaceParser::ParseSpace(Space * space)
{ {
current_nested_level += 1; /*
* in Space format in global namespace the space start character is not required
*/
bool need_space_start_character = !parsing_space || space != root_space;
// if( current_nested_level > max_nested_level ) if( need_space_start_character )
// { {
// status = max_nested_spaces_exceeded; ReadChar(); // inserting a next character after the space_start char to lastc
// return; }
// }
ReadChar(); // inserting a next character after the space_start char to lastc
space->set_empty_object(); space->set_empty_object();
ParseKeyValuePairs(space); ParseKeyValuePairs(space);
if( lastc == space_end ) if( need_space_start_character )
{ {
ReadChar(); if( lastc == space_end )
{
ReadChar();
}
else
{
status = syntax_error;
}
} }
else
{
status = syntax_error;
}
current_nested_level -= 1;
} }
@ -303,25 +425,13 @@ void JSONToSpaceParser::ParseSpace(Space * space)
void JSONToSpaceParser::ParseTextValue(Space * space) void JSONToSpaceParser::ParseTextValue(Space * space)
{ {
ReadChar(); // inserting a next character after the ... char to lastc
space->set_empty_wstring(); space->set_empty_wstring();
std::wstring * str = space->get_wstring(); std::wstring * str = space->get_wstring();
// IMPROVEME add support for escaped characters if( parsing_space )
while( status == ok && lastc != '"' && lastc != -1 ) ReadMultilineTokenQuoted(*str);
{
str->push_back(static_cast<wchar_t>(lastc));
ReadChar();
}
if( lastc == '"' )
{
ReadChar();
}
else else
{ ReadTokenQuoted(*str);
status = syntax_error;
}
} }
@ -375,8 +485,9 @@ void JSONToSpaceParser::ParseTable(Space * space)
{ {
space->set_empty_table(); space->set_empty_table();
ReadChar(); // inserting a next character after the table_start char to lastc ReadChar(); // inserting a next character after the table_start char to lastc
space->set_empty_object(); space->set_empty_table();
ParseValuesList(space); ParseValuesList(space);
if( lastc == table_end ) if( lastc == table_end )
@ -406,18 +517,29 @@ void JSONToSpaceParser::ParseKeyValuePairs(Space * space)
if( lastc == option_delimiter ) if( lastc == option_delimiter )
{ {
ReadChar(); // inserting a next character after the option_delimiter to lastc ReadChar(); // inserting a next character after the option_delimiter to lastc
if( parsing_space )
{
// in space format a space_end character is allowed to be after the last table item
SkipWhite();
if( lastc == space_end )
break;
}
} }
else else
if( !parsing_space )
{ {
// in json format the option_delimiter is required
status = syntax_error; status = syntax_error;
} }
} }
if( status == ok ) if( status == ok )
{ {
Space * new_key_space = ReadKey(space); ReadKey();
if( status == ok && new_key_space ) if( status == ok )
{ {
SkipWhite(); SkipWhite();
@ -425,9 +547,14 @@ void JSONToSpaceParser::ParseKeyValuePairs(Space * space)
{ {
ReadChar(); // inserting a next character after the separator to lastc ReadChar(); // inserting a next character after the separator to lastc
Parse(new_key_space); Space & new_space = space->add(token.c_str(), new Space());
//value.clear(); Parse(&new_space, true, false);
//ReadValue(false, false, true, true); }
else
if( parsing_space && lastc == space_start )
{
Space & new_space = space->add_child_space(token.c_str());
ParseSpace(&new_space);
} }
else else
{ {
@ -458,9 +585,20 @@ void JSONToSpaceParser::ParseValuesList(Space * space)
if( lastc == option_delimiter ) // may add a new delimiter for tables? default the same as for objects... if( lastc == option_delimiter ) // may add a new delimiter for tables? default the same as for objects...
{ {
ReadChar(); // inserting a next character after the delimiter ReadChar(); // inserting a next character after the delimiter
if( parsing_space )
{
// in space format a table_end character is allowed to be after the last table item
SkipWhite();
if( lastc == table_end )
break;
}
} }
else else
if( !parsing_space )
{ {
// in json format the option_delimiter is required
status = syntax_error; status = syntax_error;
} }
} }
@ -468,7 +606,7 @@ void JSONToSpaceParser::ParseValuesList(Space * space)
if( status == ok ) if( status == ok )
{ {
Space * new_space = &space->add(new Space()); Space * new_space = &space->add(new Space());
Parse(new_space); Parse(new_space, false, true);
} }
is_first = false; is_first = false;
@ -589,12 +727,47 @@ bool JSONToSpaceParser::is_alfa_numeric_char(int c)
} }
void JSONToSpaceParser::SkipLine()
{
while( lastc != -1 && (char_was_escaped || lastc != '\n') )
ReadChar();
}
void JSONToSpaceParser::SkipWhite() void JSONToSpaceParser::SkipWhite()
{ {
while( IsWhite(lastc) ) if( parsing_space )
{ {
ReadChar(); while( IsWhite(lastc) || lastc == '#' )
{
if( lastc == '#' )
SkipLine();
else
ReadChar();
}
}
else
{
while( IsWhite(lastc) )
{
ReadChar();
}
}
}
void JSONToSpaceParser::TrimLastWhite(std::wstring & s)
{
std::wstring::size_type i;
for(i=s.size() ; i>0 && IsWhite(s[i-1]) ; --i)
{
}
if( i < s.size() )
{
s.erase(i, std::wstring::npos);
} }
} }
@ -631,11 +804,21 @@ std::wstring::size_type i;
*/ */
void JSONToSpaceParser::ReadTokenUntilDelimiter(std::wstring & token, int delimiter1, int delimiter2)
{
token.clear();
while( lastc != -1 && lastc != '\n' && lastc != '#' && lastc != delimiter1 && lastc != delimiter2 )
{
token += static_cast<wchar_t>(lastc);
ReadChar();
}
TrimLastWhite(token);
}
void JSONToSpaceParser::ReadAlfaNumericToken(std::wstring & token)
void JSONToSpaceParser::ReadAlfaNumericToken()
{ {
token.clear(); token.clear();
@ -646,8 +829,71 @@ void JSONToSpaceParser::ReadAlfaNumericToken()
} }
} }
void JSONToSpaceParser::ReadStringValue(std::wstring & token, bool is_object_value, bool is_table_value)
{
if( parsing_space )
{
if( is_object_value )
{
ReadTokenUntilDelimiter(token, space_end, -1);
}
else
if( is_table_value )
{
ReadTokenUntilDelimiter(token, table_end, option_delimiter);
}
else
{
ReadTokenUntilDelimiter(token, -1, -1);
}
}
else
{
ReadAlfaNumericToken(token);
}
}
// rename to something like ReadSpaceFieldToken???
void JSONToSpaceParser::ReadSpaceFieldToken(std::wstring & token)
{
token.clear();
while( lastc != -1 && (char_was_escaped || (lastc != separator && lastc != 10 && lastc != space_start)) )
{
token += static_cast<wchar_t>(lastc);
ReadChar();
}
TrimLastWhite(token);
}
// IMPROVEME in JSON we should not allow non-escaped a new line character // IMPROVEME in JSON we should not allow non-escaped a new line character
void JSONToSpaceParser::ReadTokenQuoted() void JSONToSpaceParser::ReadTokenQuoted(std::wstring & token)
{
token.clear();
ReadChar(); // skipping the first quotation mark
while( lastc != -1 && (char_was_escaped || (lastc != '"' && lastc != 10)) )
{
token += static_cast<wchar_t>(lastc);
ReadChar();
}
if( !char_was_escaped && lastc == '"' )
{
ReadChar(); // skipping the last quotation mark
}
else
{
status = syntax_error;
}
}
void JSONToSpaceParser::ReadMultilineTokenQuoted(std::wstring & token)
{ {
token.clear(); token.clear();
ReadChar(); // skipping the first quotation mark ReadChar(); // skipping the first quotation mark
@ -670,35 +916,32 @@ void JSONToSpaceParser::ReadTokenQuoted()
void JSONToSpaceParser::ReadKey()
Space * JSONToSpaceParser::ReadKey(Space * space)
{ {
SkipWhite(); SkipWhite();
if( parsing_space ) if( parsing_space )
{ {
// IMPROVEME implement me if( lastc == '"' )
{
ReadMultilineTokenQuoted(token);
}
else
{
ReadSpaceFieldToken(token);
}
} }
else else
{ {
if( lastc == '"' ) if( lastc == '"' )
{ {
ReadTokenQuoted(); ReadTokenQuoted(token);
} }
else else
{ {
status = syntax_error; status = syntax_error;
} }
} }
if( status == ok )
{
Space * new_space = new Space();
return &space->add(token.c_str(), new_space);
}
return nullptr;
} }

View File

@ -77,7 +77,7 @@ public:
/* /*
status of parsing status of parsing
*/ */
enum Status { ok, cant_open_file, syntax_error, max_nested_spaces_exceeded, no_space }; enum Status { ok, cant_open_file, syntax_error, no_space };
/* /*
@ -96,29 +96,47 @@ public:
/* /*
main methods used to parse main methods used to parse a JSON file
file_name is the path to a file file_name is the path to a file
*/ */
Status ParseFile(const char * file_name); Status ParseJSONFile(const char * file_name);
Status ParseFile(const std::string & file_name); Status ParseJSONFile(const std::string & file_name);
Status ParseFile(const wchar_t * file_name); Status ParseJSONFile(const wchar_t * file_name);
Status ParseFile(const std::wstring & file_name); Status ParseJSONFile(const std::wstring & file_name);
/*
main methods used to parse a Space file
file_name is the path to a file
*/
Status ParseSpaceFile(const char * file_name);
Status ParseSpaceFile(const std::string & file_name);
Status ParseSpaceFile(const wchar_t * file_name);
Status ParseSpaceFile(const std::wstring & file_name);
/* /*
main methods used to parse main methods used to parse
str - input string (either 8bit ascii or UTF-8 -- see UTF8() method) str - input string (either 8bit ascii or UTF-8 -- see UTF8() method)
*/ */
Status Parse(const char * str); Status ParseJSON(const char * str);
Status Parse(const std::string & str); Status ParseJSON(const std::string & str);
/* /*
main methods used to parse main methods used to parse
here input string is always in unicode (wide characters) here input string is always in unicode (wide characters)
*/ */
Status Parse(const wchar_t * str); Status ParseJSON(const wchar_t * str);
Status Parse(const std::wstring & str); Status ParseJSON(const std::wstring & str);
Status ParseSpace(const char * str);
Status ParseSpace(const std::string & str);
Status ParseSpace(const wchar_t * str);
Status ParseSpace(const std::wstring & str);
/* /*
* add two args Parse method * add two args Parse method
@ -288,18 +306,6 @@ private:
bool use_escape_char; bool use_escape_char;
/*
*/
size_t current_nested_level;
/*
default: 1000;
*/
size_t max_nested_level;
/* /*
* *
@ -311,21 +317,16 @@ private:
// move to ParseFile() method
std::string afile_name;
// new // new
void ParseRootSpace(); void ParseRootSpace();
void Parse(Space * space); void Parse(Space * space, bool is_object_value, bool is_table_value);
void ParseSpace(Space * space); void ParseSpace(Space * space);
void ParseTable(Space * space); void ParseTable(Space * space);
void ParseKeyValuePairs(Space * space); void ParseKeyValuePairs(Space * space);
void ParseValuesList(Space * space); void ParseValuesList(Space * space);
Space * ReadKey(Space * space); void ReadKey();
void ParseTextValue(Space * space); void ParseTextValue(Space * space);
void ParseIntegerValue(Space * space); void ParseIntegerValue(Space * space);
@ -333,13 +334,18 @@ private:
bool is_alfa_numeric_char(int c); bool is_alfa_numeric_char(int c);
void ReadAlfaNumericToken();
void ReadTokenUntilDelimiter(std::wstring & token, int delimiter1, int delimiter2);
void ReadAlfaNumericToken(std::wstring & token);
void ReadStringValue(std::wstring & token, bool is_object_value, bool is_table_value);
bool is_integer_token(); bool is_integer_token();
bool is_floating_point_token(); bool is_floating_point_token();
void ReadTokenQuoted(); void ReadSpaceFieldToken(std::wstring & token);
void ReadTokenQuoted(std::wstring & token);
void ReadMultilineTokenQuoted(std::wstring & token);
@ -351,7 +357,9 @@ private:
int ReadCharNoEscape(); int ReadCharNoEscape();
int ReadChar(); int ReadChar();
bool IsWhite(int c); bool IsWhite(int c);
void SkipLine();
void SkipWhite(); void SkipWhite();
void TrimLastWhite(std::wstring & s);
//void Trim(std::wstring & s); //void Trim(std::wstring & s);
bool IsHexDigit(wchar_t c); bool IsHexDigit(wchar_t c);
int HexToInt(wchar_t c); int HexToInt(wchar_t c);