added to JSONToSpaceParser: possibility to parse the Space format

renamed: ParseFile() -> ParseJSONFile()
added: ParseSpaceFile(), ParseJSON(), ParseSpace()
This commit is contained in:
Tomasz Sowa 2021-03-17 18:24:50 +01:00
parent 31f7bdb857
commit 6e169f7650
2 changed files with 360 additions and 109 deletions

View File

@ -81,7 +81,6 @@ void JSONToSpaceParser::SetDefault()
skip_empty = false;
use_escape_char = true;
input_as_utf8 = true;
max_nested_level = 1000;
}
@ -111,9 +110,10 @@ int JSONToSpaceParser::get_last_parsed_line()
JSONToSpaceParser::Status JSONToSpaceParser::ParseFile(const char * file_name)
JSONToSpaceParser::Status JSONToSpaceParser::ParseJSONFile(const char * file_name)
{
reading_from_file = true;
parsing_space = false;
file.clear();
file.open(file_name, std::ios_base::binary | std::ios_base::in);
@ -133,35 +133,88 @@ return status;
JSONToSpaceParser::Status JSONToSpaceParser::ParseFile(const std::string & file_name)
JSONToSpaceParser::Status JSONToSpaceParser::ParseJSONFile(const std::string & file_name)
{
return ParseFile(file_name.c_str());
return ParseJSONFile(file_name.c_str());
}
JSONToSpaceParser::Status JSONToSpaceParser::ParseFile(const wchar_t * file_name)
JSONToSpaceParser::Status JSONToSpaceParser::ParseJSONFile(const wchar_t * file_name)
{
PT::WideToUTF8(file_name, afile_name);
return ParseFile(afile_name.c_str());
std::string file_name_utf8;
PT::WideToUTF8(file_name, file_name_utf8);
return ParseJSONFile(file_name_utf8.c_str());
}
JSONToSpaceParser::Status JSONToSpaceParser::ParseFile(const std::wstring & file_name)
JSONToSpaceParser::Status JSONToSpaceParser::ParseJSONFile(const std::wstring & file_name)
{
return ParseFile(file_name.c_str());
return ParseJSONFile(file_name.c_str());
}
JSONToSpaceParser::Status JSONToSpaceParser::Parse(const char * str)
JSONToSpaceParser::Status JSONToSpaceParser::ParseSpaceFile(const char * file_name)
{
reading_from_file = true;
parsing_space = true;
file.clear();
file.open(file_name, std::ios_base::binary | std::ios_base::in);
if( file )
{
ParseRootSpace();
file.close();
}
else
{
status = cant_open_file;
}
return status;
}
JSONToSpaceParser::Status JSONToSpaceParser::ParseSpaceFile(const std::string & file_name)
{
return ParseSpaceFile(file_name.c_str());
}
JSONToSpaceParser::Status JSONToSpaceParser::ParseSpaceFile(const wchar_t * file_name)
{
std::string file_name_utf8;
PT::WideToUTF8(file_name, file_name_utf8);
return ParseSpaceFile(file_name_utf8.c_str());
}
JSONToSpaceParser::Status JSONToSpaceParser::ParseSpaceFile(const std::wstring & file_name)
{
return ParseSpaceFile(file_name.c_str());
}
JSONToSpaceParser::Status JSONToSpaceParser::ParseJSON(const char * str)
{
reading_from_file = false;
reading_from_wchar_string = false;
pchar_ascii = str;
pchar_unicode = 0;
parsing_space = false;
ParseRootSpace();
@ -169,18 +222,19 @@ return status;
}
JSONToSpaceParser::Status JSONToSpaceParser::Parse(const std::string & str)
JSONToSpaceParser::Status JSONToSpaceParser::ParseJSON(const std::string & str)
{
return Parse(str.c_str());
return ParseJSON(str.c_str());
}
JSONToSpaceParser::Status JSONToSpaceParser::Parse(const wchar_t * str)
JSONToSpaceParser::Status JSONToSpaceParser::ParseJSON(const wchar_t * str)
{
reading_from_file = false;
reading_from_wchar_string = true;
pchar_unicode = str;
pchar_ascii = 0;
parsing_space = false;
ParseRootSpace();
@ -188,19 +242,63 @@ return status;
}
JSONToSpaceParser::Status JSONToSpaceParser::Parse(const std::wstring & str)
JSONToSpaceParser::Status JSONToSpaceParser::ParseJSON(const std::wstring & str)
{
return Parse(str.c_str());
return ParseJSON(str.c_str());
}
JSONToSpaceParser::Status JSONToSpaceParser::ParseSpace(const char * str)
{
reading_from_file = false;
reading_from_wchar_string = false;
pchar_ascii = str;
pchar_unicode = 0;
parsing_space = true;
ParseRootSpace();
return status;
}
JSONToSpaceParser::Status JSONToSpaceParser::ParseSpace(const std::string & str)
{
return ParseSpace(str.c_str());
}
JSONToSpaceParser::Status JSONToSpaceParser::ParseSpace(const wchar_t * str)
{
reading_from_file = false;
reading_from_wchar_string = true;
pchar_unicode = str;
pchar_ascii = 0;
parsing_space = true;
ParseRootSpace();
return status;
}
JSONToSpaceParser::Status JSONToSpaceParser::ParseSpace(const std::wstring & str)
{
return ParseSpace(str.c_str());
}
void JSONToSpaceParser::ParseRootSpace()
{
line = 1;
status = ok;
skipped = 0;
current_nested_level = 0;
parsing_space = false;
if( !root_space )
{
@ -209,12 +307,27 @@ void JSONToSpaceParser::ParseRootSpace()
}
ReadChar(); // put first character to lastc
Parse(root_space);
if( parsing_space )
{
separator = '=';
table_start = '(';
table_end = ')';
ParseSpace(root_space);
}
else
{
separator = ':';
table_start = '[';
table_end = ']';
Parse(root_space, false, false);
}
token.clear();
}
void JSONToSpaceParser::Parse(Space * space)
void JSONToSpaceParser::Parse(Space * space, bool is_object_value, bool is_table_value)
{
SkipWhite();
@ -234,7 +347,7 @@ void JSONToSpaceParser::Parse(Space * space)
}
else
{
ReadAlfaNumericToken();
ReadStringValue(token, is_object_value, is_table_value);
if( token == L"null" )
{
@ -262,7 +375,14 @@ void JSONToSpaceParser::Parse(Space * space)
}
else
{
status = syntax_error;
if( parsing_space )
{
space->set(token);
}
else
{
status = syntax_error;
}
}
}
}
@ -273,28 +393,30 @@ void JSONToSpaceParser::Parse(Space * space)
void JSONToSpaceParser::ParseSpace(Space * space)
{
current_nested_level += 1;
/*
* in Space format in global namespace the space start character is not required
*/
bool need_space_start_character = !parsing_space || space != root_space;
// if( current_nested_level > max_nested_level )
// {
// status = max_nested_spaces_exceeded;
// return;
// }
if( need_space_start_character )
{
ReadChar(); // inserting a next character after the space_start char to lastc
}
ReadChar(); // inserting a next character after the space_start char to lastc
space->set_empty_object();
ParseKeyValuePairs(space);
if( lastc == space_end )
if( need_space_start_character )
{
ReadChar();
if( lastc == space_end )
{
ReadChar();
}
else
{
status = syntax_error;
}
}
else
{
status = syntax_error;
}
current_nested_level -= 1;
}
@ -303,25 +425,13 @@ void JSONToSpaceParser::ParseSpace(Space * space)
void JSONToSpaceParser::ParseTextValue(Space * space)
{
ReadChar(); // inserting a next character after the ... char to lastc
space->set_empty_wstring();
std::wstring * str = space->get_wstring();
// IMPROVEME add support for escaped characters
while( status == ok && lastc != '"' && lastc != -1 )
{
str->push_back(static_cast<wchar_t>(lastc));
ReadChar();
}
if( lastc == '"' )
{
ReadChar();
}
if( parsing_space )
ReadMultilineTokenQuoted(*str);
else
{
status = syntax_error;
}
ReadTokenQuoted(*str);
}
@ -375,8 +485,9 @@ void JSONToSpaceParser::ParseTable(Space * space)
{
space->set_empty_table();
ReadChar(); // inserting a next character after the table_start char to lastc
space->set_empty_object();
space->set_empty_table();
ParseValuesList(space);
if( lastc == table_end )
@ -406,18 +517,29 @@ void JSONToSpaceParser::ParseKeyValuePairs(Space * space)
if( lastc == option_delimiter )
{
ReadChar(); // inserting a next character after the option_delimiter to lastc
if( parsing_space )
{
// in space format a space_end character is allowed to be after the last table item
SkipWhite();
if( lastc == space_end )
break;
}
}
else
if( !parsing_space )
{
// in json format the option_delimiter is required
status = syntax_error;
}
}
if( status == ok )
{
Space * new_key_space = ReadKey(space);
ReadKey();
if( status == ok && new_key_space )
if( status == ok )
{
SkipWhite();
@ -425,9 +547,14 @@ void JSONToSpaceParser::ParseKeyValuePairs(Space * space)
{
ReadChar(); // inserting a next character after the separator to lastc
Parse(new_key_space);
//value.clear();
//ReadValue(false, false, true, true);
Space & new_space = space->add(token.c_str(), new Space());
Parse(&new_space, true, false);
}
else
if( parsing_space && lastc == space_start )
{
Space & new_space = space->add_child_space(token.c_str());
ParseSpace(&new_space);
}
else
{
@ -458,9 +585,20 @@ void JSONToSpaceParser::ParseValuesList(Space * space)
if( lastc == option_delimiter ) // may add a new delimiter for tables? default the same as for objects...
{
ReadChar(); // inserting a next character after the delimiter
if( parsing_space )
{
// in space format a table_end character is allowed to be after the last table item
SkipWhite();
if( lastc == table_end )
break;
}
}
else
if( !parsing_space )
{
// in json format the option_delimiter is required
status = syntax_error;
}
}
@ -468,7 +606,7 @@ void JSONToSpaceParser::ParseValuesList(Space * space)
if( status == ok )
{
Space * new_space = &space->add(new Space());
Parse(new_space);
Parse(new_space, false, true);
}
is_first = false;
@ -589,12 +727,47 @@ bool JSONToSpaceParser::is_alfa_numeric_char(int c)
}
void JSONToSpaceParser::SkipLine()
{
while( lastc != -1 && (char_was_escaped || lastc != '\n') )
ReadChar();
}
void JSONToSpaceParser::SkipWhite()
{
while( IsWhite(lastc) )
if( parsing_space )
{
ReadChar();
while( IsWhite(lastc) || lastc == '#' )
{
if( lastc == '#' )
SkipLine();
else
ReadChar();
}
}
else
{
while( IsWhite(lastc) )
{
ReadChar();
}
}
}
void JSONToSpaceParser::TrimLastWhite(std::wstring & s)
{
std::wstring::size_type i;
for(i=s.size() ; i>0 && IsWhite(s[i-1]) ; --i)
{
}
if( i < s.size() )
{
s.erase(i, std::wstring::npos);
}
}
@ -631,11 +804,21 @@ std::wstring::size_type i;
*/
void JSONToSpaceParser::ReadTokenUntilDelimiter(std::wstring & token, int delimiter1, int delimiter2)
{
token.clear();
while( lastc != -1 && lastc != '\n' && lastc != '#' && lastc != delimiter1 && lastc != delimiter2 )
{
token += static_cast<wchar_t>(lastc);
ReadChar();
}
TrimLastWhite(token);
}
void JSONToSpaceParser::ReadAlfaNumericToken()
void JSONToSpaceParser::ReadAlfaNumericToken(std::wstring & token)
{
token.clear();
@ -646,8 +829,71 @@ void JSONToSpaceParser::ReadAlfaNumericToken()
}
}
void JSONToSpaceParser::ReadStringValue(std::wstring & token, bool is_object_value, bool is_table_value)
{
if( parsing_space )
{
if( is_object_value )
{
ReadTokenUntilDelimiter(token, space_end, -1);
}
else
if( is_table_value )
{
ReadTokenUntilDelimiter(token, table_end, option_delimiter);
}
else
{
ReadTokenUntilDelimiter(token, -1, -1);
}
}
else
{
ReadAlfaNumericToken(token);
}
}
// rename to something like ReadSpaceFieldToken???
void JSONToSpaceParser::ReadSpaceFieldToken(std::wstring & token)
{
token.clear();
while( lastc != -1 && (char_was_escaped || (lastc != separator && lastc != 10 && lastc != space_start)) )
{
token += static_cast<wchar_t>(lastc);
ReadChar();
}
TrimLastWhite(token);
}
// IMPROVEME in JSON we should not allow non-escaped a new line character
void JSONToSpaceParser::ReadTokenQuoted()
void JSONToSpaceParser::ReadTokenQuoted(std::wstring & token)
{
token.clear();
ReadChar(); // skipping the first quotation mark
while( lastc != -1 && (char_was_escaped || (lastc != '"' && lastc != 10)) )
{
token += static_cast<wchar_t>(lastc);
ReadChar();
}
if( !char_was_escaped && lastc == '"' )
{
ReadChar(); // skipping the last quotation mark
}
else
{
status = syntax_error;
}
}
void JSONToSpaceParser::ReadMultilineTokenQuoted(std::wstring & token)
{
token.clear();
ReadChar(); // skipping the first quotation mark
@ -670,35 +916,32 @@ void JSONToSpaceParser::ReadTokenQuoted()
Space * JSONToSpaceParser::ReadKey(Space * space)
void JSONToSpaceParser::ReadKey()
{
SkipWhite();
if( parsing_space )
{
// IMPROVEME implement me
if( lastc == '"' )
{
ReadMultilineTokenQuoted(token);
}
else
{
ReadSpaceFieldToken(token);
}
}
else
{
if( lastc == '"' )
{
ReadTokenQuoted();
ReadTokenQuoted(token);
}
else
{
status = syntax_error;
}
}
if( status == ok )
{
Space * new_space = new Space();
return &space->add(token.c_str(), new_space);
}
return nullptr;
}

View File

@ -77,7 +77,7 @@ public:
/*
status of parsing
*/
enum Status { ok, cant_open_file, syntax_error, max_nested_spaces_exceeded, no_space };
enum Status { ok, cant_open_file, syntax_error, no_space };
/*
@ -96,29 +96,47 @@ public:
/*
main methods used to parse
main methods used to parse a JSON file
file_name is the path to a file
*/
Status ParseFile(const char * file_name);
Status ParseFile(const std::string & file_name);
Status ParseFile(const wchar_t * file_name);
Status ParseFile(const std::wstring & file_name);
Status ParseJSONFile(const char * file_name);
Status ParseJSONFile(const std::string & file_name);
Status ParseJSONFile(const wchar_t * file_name);
Status ParseJSONFile(const std::wstring & file_name);
/*
main methods used to parse a Space file
file_name is the path to a file
*/
Status ParseSpaceFile(const char * file_name);
Status ParseSpaceFile(const std::string & file_name);
Status ParseSpaceFile(const wchar_t * file_name);
Status ParseSpaceFile(const std::wstring & file_name);
/*
main methods used to parse
str - input string (either 8bit ascii or UTF-8 -- see UTF8() method)
*/
Status Parse(const char * str);
Status Parse(const std::string & str);
Status ParseJSON(const char * str);
Status ParseJSON(const std::string & str);
/*
main methods used to parse
here input string is always in unicode (wide characters)
*/
Status Parse(const wchar_t * str);
Status Parse(const std::wstring & str);
Status ParseJSON(const wchar_t * str);
Status ParseJSON(const std::wstring & str);
Status ParseSpace(const char * str);
Status ParseSpace(const std::string & str);
Status ParseSpace(const wchar_t * str);
Status ParseSpace(const std::wstring & str);
/*
* add two args Parse method
@ -288,18 +306,6 @@ private:
bool use_escape_char;
/*
*/
size_t current_nested_level;
/*
default: 1000;
*/
size_t max_nested_level;
/*
*
@ -311,21 +317,16 @@ private:
// move to ParseFile() method
std::string afile_name;
// new
void ParseRootSpace();
void Parse(Space * space);
void Parse(Space * space, bool is_object_value, bool is_table_value);
void ParseSpace(Space * space);
void ParseTable(Space * space);
void ParseKeyValuePairs(Space * space);
void ParseValuesList(Space * space);
Space * ReadKey(Space * space);
void ReadKey();
void ParseTextValue(Space * space);
void ParseIntegerValue(Space * space);
@ -333,13 +334,18 @@ private:
bool is_alfa_numeric_char(int c);
void ReadAlfaNumericToken();
void ReadTokenUntilDelimiter(std::wstring & token, int delimiter1, int delimiter2);
void ReadAlfaNumericToken(std::wstring & token);
void ReadStringValue(std::wstring & token, bool is_object_value, bool is_table_value);
bool is_integer_token();
bool is_floating_point_token();
void ReadTokenQuoted();
void ReadSpaceFieldToken(std::wstring & token);
void ReadTokenQuoted(std::wstring & token);
void ReadMultilineTokenQuoted(std::wstring & token);
@ -351,7 +357,9 @@ private:
int ReadCharNoEscape();
int ReadChar();
bool IsWhite(int c);
void SkipLine();
void SkipWhite();
void TrimLastWhite(std::wstring & s);
//void Trim(std::wstring & s);
bool IsHexDigit(wchar_t c);
int HexToInt(wchar_t c);