changed: JSONToSpaceParser can parse json string/file to the new Space format now

(some minor chars escaping not ready yet)
This commit is contained in:
Tomasz Sowa 2021-03-16 18:40:14 +01:00
parent ba7fa1c195
commit 0c0f15ab8a
2 changed files with 420 additions and 385 deletions

View File

@ -5,7 +5,7 @@
*/
/*
* Copyright (c) 2012-2017, Tomasz Sowa
* Copyright (c) 2012-2021, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -39,12 +39,13 @@
#include <wchar.h>
#include "jsontospaceparser.h"
#include "utf8/utf8.h"
#include "convert/strtoint.h"
namespace PT
{
#ifdef nonexisting_value
JSONToSpaceParser::JSONToSpaceParser()
@ -81,7 +82,6 @@ void JSONToSpaceParser::SetDefault()
use_escape_char = true;
input_as_utf8 = true;
max_nested_level = 1000;
create_table_as_space = true;
}
@ -104,13 +104,14 @@ void JSONToSpaceParser::UTF8(bool utf)
}
void JSONToSpaceParser::CreateTableAsSpace(bool create_table_as_space_)
int JSONToSpaceParser::get_last_parsed_line()
{
create_table_as_space = create_table_as_space_;
return line;
}
JSONToSpaceParser::Status JSONToSpaceParser::Parse(const char * file_name)
JSONToSpaceParser::Status JSONToSpaceParser::ParseFile(const char * file_name)
{
reading_from_file = true;
@ -119,7 +120,7 @@ JSONToSpaceParser::Status JSONToSpaceParser::Parse(const char * file_name)
if( file )
{
Parse();
ParseRootSpace();
file.close();
}
else
@ -132,305 +133,438 @@ return status;
JSONToSpaceParser::Status JSONToSpaceParser::Parse(const std::string & file_name)
JSONToSpaceParser::Status JSONToSpaceParser::ParseFile(const std::string & file_name)
{
return Parse(file_name.c_str());
return ParseFile(file_name.c_str());
}
JSONToSpaceParser::Status JSONToSpaceParser::Parse(const wchar_t * file_name)
JSONToSpaceParser::Status JSONToSpaceParser::ParseFile(const wchar_t * file_name)
{
PT::WideToUTF8(file_name, afile_name);
return Parse(afile_name.c_str());
return ParseFile(afile_name.c_str());
}
JSONToSpaceParser::Status JSONToSpaceParser::Parse(const std::wstring & file_name)
JSONToSpaceParser::Status JSONToSpaceParser::ParseFile(const std::wstring & file_name)
{
return Parse(file_name.c_str());
return ParseFile(file_name.c_str());
}
JSONToSpaceParser::Status JSONToSpaceParser::ParseString(const char * str)
JSONToSpaceParser::Status JSONToSpaceParser::Parse(const char * str)
{
reading_from_file = false;
reading_from_wchar_string = false;
pchar_ascii = str;
pchar_unicode = 0;
Parse();
ParseRootSpace();
return status;
}
JSONToSpaceParser::Status JSONToSpaceParser::ParseString(const std::string & str)
JSONToSpaceParser::Status JSONToSpaceParser::Parse(const std::string & str)
{
return ParseString(str.c_str());
return Parse(str.c_str());
}
JSONToSpaceParser::Status JSONToSpaceParser::ParseString(const wchar_t * str)
JSONToSpaceParser::Status JSONToSpaceParser::Parse(const wchar_t * str)
{
reading_from_file = false;
reading_from_wchar_string = true;
pchar_unicode = str;
pchar_ascii = 0;
Parse();
ParseRootSpace();
return status;
}
JSONToSpaceParser::Status JSONToSpaceParser::ParseString(const std::wstring & str)
JSONToSpaceParser::Status JSONToSpaceParser::Parse(const std::wstring & str)
{
return ParseString(str.c_str());
return Parse(str.c_str());
}
void JSONToSpaceParser::Parse()
void JSONToSpaceParser::ParseRootSpace()
{
line = 1;
status = ok;
skipped = 0;
current_nested_level = 0;
parsing_space = false;
if( !root_space )
{
status = no_space;
return;
}
line = 1;
status = ok;
space = root_space;
skipped = 0;
current_nested_level = 0;
ReadChar();
ReadChar(); // put first character to lastc
Parse(root_space);
token.clear();
}
void JSONToSpaceParser::Parse(Space * space)
{
SkipWhite();
if( lastc == space_start )
{
ParseSpace(false, false);
ParseSpace(space);
}
else
if( lastc == table_start )
{
ParseTable(false);
ParseTable(space);
}
else
if( lastc == '"' ) // IMPROVEME define a variable
{
ParseTextValue(space);
}
else
{
ReadAlfaNumericToken();
if( token == L"null" )
{
space->set_null();
}
else
if( token == L"true" )
{
space->set(true);
}
else
if( token == L"false" )
{
space->set(false);
}
else
if( is_integer_token() )
{
ParseIntegerValue(space);
}
else
if( is_floating_point_token() )
{
ParseFloatingPointValue(space);
}
else
{
status = syntax_error;
}
}
}
void JSONToSpaceParser::ParseSpace(Space * space)
{
current_nested_level += 1;
// if( current_nested_level > max_nested_level )
// {
// status = max_nested_spaces_exceeded;
// return;
// }
ReadChar(); // inserting a next character after the space_start char to lastc
space->set_empty_object();
ParseKeyValuePairs(space);
if( lastc == space_end )
{
ReadChar();
}
else
{
// '{' or '[' expected
status = syntax_error;
}
if( status == ok && space != root_space )
current_nested_level -= 1;
}
void JSONToSpaceParser::ParseTextValue(Space * space)
{
ReadChar(); // inserting a next character after the ... char to lastc
space->set_empty_wstring();
std::wstring * str = space->get_wstring();
// IMPROVEME add support for escaped characters
while( status == ok && lastc != '"' && lastc != -1 )
{
str->push_back(static_cast<wchar_t>(lastc));
ReadChar();
}
if( lastc == '"' )
{
ReadChar();
}
else
{
// last closing '}' characters are missing (closing a space)
status = syntax_error;
}
token.clear();
key.clear();
value.clear();
}
void JSONToSpaceParser::ParseSpace(bool has_space_name, bool insert_new_space)
void JSONToSpaceParser::ParseIntegerValue(Space * space)
{
//current_nested_level += 1;
const wchar_t * after_str = nullptr;
bool was_overflow = false;
if( current_nested_level > max_nested_level )
long long val = Toll(token.c_str(), 10, &after_str, &was_overflow, false);
if( was_overflow )
{
status = max_nested_spaces_exceeded;
return;
status = syntax_error;
}
if( insert_new_space )
else
if( size_t(after_str - token.c_str()) != token.size() )
{
SpaceStarts(has_space_name);
status = syntax_error;
}
else
{
// insert_new_space as a false is used only when parsing
// the first space (root_space)
ReadChar(); // skipping the first space character '{'
space->set(val);
}
}
ParseKeyValuePairs();
if( insert_new_space )
void JSONToSpaceParser::ParseFloatingPointValue(Space * space)
{
SpaceEnds();
wchar_t * after_str = nullptr;
double val = wcstod(token.c_str(), &after_str);
if( errno == ERANGE )
{
status = syntax_error;
}
else
if( size_t(after_str - token.c_str()) != token.size() )
{
status = syntax_error;
}
else
{
ReadChar(); // skipping the last space character '}'
space->set(val);
}
//current_nested_level -= 1;
}
void JSONToSpaceParser::ParseKeyValuePairs()
void JSONToSpaceParser::ParseTable(Space * space)
{
space->set_empty_table();
ReadChar(); // inserting a next character after the table_start char to lastc
space->set_empty_object();
ParseValuesList(space);
if( lastc == table_end )
{
ReadChar();
}
else
{
status = syntax_error;
}
}
void JSONToSpaceParser::ParseKeyValuePairs(Space * space)
{
bool is_first = true;
SkipWhite();
while( status == ok && lastc != space_end && lastc != -1 )
{
ReadKey();
if( !is_first )
{
SkipWhite();
if( lastc == option_delimiter )
{
ReadChar(); // inserting a next character after the option_delimiter to lastc
}
else
{
status = syntax_error;
}
}
if( status == ok )
{
Space * new_key_space = ReadKey(space);
if( status == ok && new_key_space )
{
SkipWhite();
if( lastc == separator )
{
value.clear();
ReadChar(); // skipping separator ':'
ReadValue(false, false, true, true);
SkipWhite();
ReadChar(); // inserting a next character after the separator to lastc
if( lastc == option_delimiter )
{
ReadChar(); // skipping delimiter ','
Parse(new_key_space);
//value.clear();
//ReadValue(false, false, true, true);
}
else
if( lastc != space_end && status == ok )
{
status = syntax_error;
}
}
}
is_first = false;
SkipWhite();
}
}
void JSONToSpaceParser::ParseValuesList(Space * space)
{
bool is_first = true;
SkipWhite();
while( status == ok && lastc != table_end && lastc != -1 )
{
if( !is_first )
{
SkipWhite();
if( lastc == option_delimiter ) // may add a new delimiter for tables? default the same as for objects...
{
ReadChar(); // inserting a next character after the delimiter
}
else
{
status = syntax_error;
}
}
if( status == ok )
{
status = syntax_error;
}
}
Space * new_space = &space->add(new Space());
Parse(new_space);
}
void JSONToSpaceParser::ParseTextTable()
{
ReadChar(); // skipping table start character '['
is_first = false;
SkipWhite();
value.clear();
}
}
while( status == ok && lastc != table_end && lastc != -1 )
{
// all space objects inside or tables will be skipped
ReadValue(true);
SkipWhite();
if( lastc == option_delimiter )
bool JSONToSpaceParser::is_integer_token()
{
ReadChar(); // skipping delimiter ','
if( token.empty() )
return false;
size_t i = 0;
if( token[i] == '-' )
{
i += 1;
if( token.size() == 1 )
return false;
}
for( ; i < token.size() ; ++i)
{
if( token[i] < '0' || token[i] > '9' )
{
return false;
}
}
return true;
}
bool JSONToSpaceParser::is_floating_point_token()
{
bool was_dot = false;
bool was_exponential = false;
bool was_plus_minus_sign = false;
if( token.empty() )
return false;
size_t i = 0;
if( token[i] == '-' )
{
i += 1;
if( token.size() == 1 )
return false;
}
for( ; i < token.size() ; ++i)
{
if( token[i] == '.' )
{
if( was_dot || was_exponential )
return false;
was_dot = true;
}
else
if( lastc != table_end && status == ok )
if( token[i] == 'e' || token[i]=='E' )
{
status = syntax_error;
}
}
if( was_exponential )
return false;
if( lastc == table_end )
ReadChar(); // skipping end table character ']'
was_exponential = true;
AddKeyValuePair();
}
void JSONToSpaceParser::ParseObjectsTable(bool has_key)
{
ReadChar(); // skipping table start character '['
SpaceStarts(has_key, false);
SkipWhite();
while( status == ok && lastc != table_end && lastc != -1 )
{
// 'value' table will not be used here
// (we are creating spaces)
ReadValue(false, true);
SkipWhite();
if( lastc == option_delimiter )
{
ReadChar(); // skipping delimiter ','
// the exponential character cannot be the last character
if( i + 1 == token.size() )
return false;
}
else
if( lastc != table_end && status == ok )
if( token[i] == '+' || token[i] == '-' )
{
status = syntax_error;
}
}
if( was_plus_minus_sign || !was_exponential )
return false;
if( lastc == table_end )
ReadChar(); // skipping end table character ']'
// the plus or minus should be after the exponential character
if( i > 0 && (token[i-1] != 'e' && token[i-1] != 'E') )
return false;
SpaceEnds(false);
}
void JSONToSpaceParser::ParseTable(bool has_key)
{
if( create_table_as_space )
{
//current_nested_level += 1;
if( current_nested_level > max_nested_level )
{
status = max_nested_spaces_exceeded;
was_plus_minus_sign = true;
}
else
if( token[i] < '0' || token[i] > '9' )
{
ParseObjectsTable(has_key);
}
//current_nested_level -= 1;
}
else
{
// ParseTextTable will not create a next level
if( !has_key )
key.clear();
ParseTextTable(); // ParseTextTable will use key
return false;
}
}
void JSONToSpaceParser::SpaceStarts(bool has_space_name, bool skip_space_char)
{
Space * new_space = new Space();
space->spaces.push_back(new_space);
new_space->parent = space;
if( has_space_name )
new_space->name = key;
space = new_space;
if( skip_space_char )
ReadChar(); // skipping space starts character '{'
}
void JSONToSpaceParser::SpaceEnds(bool skip_space_char)
{
if( space == root_space )
{
// there cannot be a loose list end character in the global space
status = syntax_error;
}
else
{
space = space->parent;
if( skip_space_char )
ReadChar(); // skipping closing space character '}'
}
return true;
}
@ -446,6 +580,15 @@ return false;
}
bool JSONToSpaceParser::is_alfa_numeric_char(int c)
{
return (c >= 'a' && c <= 'z') ||
(c >= 'A' && c <= 'Z') ||
(c >= '0' && c <= '9') ||
c == '.' || c=='-';
}
void JSONToSpaceParser::SkipWhite()
{
@ -456,7 +599,7 @@ void JSONToSpaceParser::SkipWhite()
}
/*
void JSONToSpaceParser::Trim(std::wstring & s)
{
std::wstring::size_type i;
@ -469,7 +612,7 @@ std::wstring::size_type i;
if( i==0 && IsWhite(s[i]) )
{
// the whole string has white characters
// the whole string consists of white characters
s.clear();
return;
}
@ -485,27 +628,28 @@ std::wstring::size_type i;
if( i != 0 )
s.erase(0, i);
}
*/
void JSONToSpaceParser::DeleteFromTable(const std::wstring & var)
void JSONToSpaceParser::ReadAlfaNumericToken()
{
Space::Table::iterator i = space->table.find(var);
token.clear();
if( i != space->table.end() )
space->table.erase(i);
while( is_alfa_numeric_char(lastc) )
{
token += static_cast<wchar_t>(lastc);
ReadChar();
}
}
// IMPROVEME in JSON we should not allow non-escaped a new line character
void JSONToSpaceParser::ReadTokenQuoted()
{
token.clear();
ReadChar(); // skipping the first quotation mark
while( lastc != -1 && (char_was_escaped || lastc != '"') )
@ -515,162 +659,48 @@ void JSONToSpaceParser::ReadTokenQuoted()
}
if( !char_was_escaped && lastc == '"' )
{
ReadChar(); // skipping the last quotation mark
}
else
{
status = syntax_error;
}
void JSONToSpaceParser::ReadTokenSingle(bool white_delimit, bool new_line_delimit, int delimit1, int delimit2)
{
while( true )
{
if( lastc == -1 ||
(!char_was_escaped &&
(
lastc == space_end ||
lastc == table_end ||
(white_delimit && IsWhite(lastc)) ||
(new_line_delimit && lastc == '\n') ||
(delimit1 != -1 && lastc == delimit1) ||
(delimit2 != -1 && lastc == delimit2)
) ) )
{
break;
}
token += static_cast<wchar_t>(lastc);
ReadChar();
}
Trim(token);
}
void JSONToSpaceParser::ReadToken(bool white_delimit, bool new_line_delimit, int delimit1, int delimit2)
Space * JSONToSpaceParser::ReadKey(Space * space)
{
token.clear();
SkipWhite();
if( !char_was_escaped && lastc == '"' )
ReadTokenQuoted();
if( parsing_space )
{
// IMPROVEME implement me
}
else
ReadTokenSingle(white_delimit, new_line_delimit, delimit1, delimit2);
}
void JSONToSpaceParser::ReadKey()
{
SkipWhite();
ReadToken(false, true, separator, table_start);
key = token;
}
void JSONToSpaceParser::SkipText()
{
ReadChar(); // skipping the first quote character '"'
while( lastc != '"' && lastc != -1 )
ReadChar();
}
void JSONToSpaceParser::SkipObjectOrTable(int start_char, int end_char)
{
int mark = 1;
skipped += 1;
ReadChar(); // skipping the first object character '{' or '['
do
{
if( lastc == '"' )
SkipText();
else
if( lastc == end_char )
mark -= 1;
else
if( lastc == start_char )
mark += 1;
ReadChar();
}
while( mark > 0 && lastc != -1 );
}
void JSONToSpaceParser::SkipObject()
{
SkipObjectOrTable(space_start, space_end);
}
void JSONToSpaceParser::SkipTable()
{
SkipObjectOrTable(table_start, table_end);
}
//void JSONToSpaceParser::ReadValue(bool add_space_for_single_value, bool auto_add_single_value, bool has_space_name)
void JSONToSpaceParser::ReadValue(bool skip_object_or_table,
bool add_space_for_text_value,
bool has_key,
bool auto_add_text_value)
{
SkipWhite();
if( lastc == space_start )
{
if( skip_object_or_table )
SkipObject();
else
ParseSpace(has_key);
}
else
if( lastc == table_start )
{
if( skip_object_or_table )
SkipTable();
else
ParseTable(has_key);
ReadTokenQuoted();
}
else
{
if( add_space_for_text_value )
status = syntax_error;
}
}
if( status == ok )
{
SpaceStarts(false, false);
ReadToken(false, true, option_delimiter, -1);
space->name = token;
SpaceEnds(false);
}
else
{
ReadToken(false, true, option_delimiter, -1);
value.push_back(token);
if( auto_add_text_value )
AddKeyValuePair();
}
}
Space * new_space = new Space();
return &space->add(token.c_str(), new_space);
}
void JSONToSpaceParser::AddKeyValuePair()
{
if( value.empty() && skip_empty )
{
DeleteFromTable(key);
return;
return nullptr;
}
space->table[key] = value;
}
@ -862,7 +892,7 @@ int JSONToSpaceParser::ReadChar()
return lastc;
}
#endif
} // namespace

View File

@ -5,7 +5,7 @@
*/
/*
* Copyright (c) 2012-2017, Tomasz Sowa
* Copyright (c) 2012-2021, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -46,7 +46,7 @@
namespace PT
{
#ifdef nonexisting_value
class JSONToSpaceParser
@ -86,10 +86,6 @@ public:
Status status;
/*
a number of a line in which there is a syntax_error
*/
int line;
/*
@ -103,26 +99,32 @@ public:
main methods used to parse
file_name is the path to a file
*/
Status Parse(const char * file_name);
Status Parse(const std::string & file_name);
Status Parse(const wchar_t * file_name);
Status Parse(const std::wstring & file_name);
Status ParseFile(const char * file_name);
Status ParseFile(const std::string & file_name);
Status ParseFile(const wchar_t * file_name);
Status ParseFile(const std::wstring & file_name);
/*
main methods used to parse
str - input string (either 8bit ascii or UTF-8 -- see UTF8() method)
*/
Status ParseString(const char * str);
Status ParseString(const std::string & str);
Status Parse(const char * str);
Status Parse(const std::string & str);
/*
main methods used to parse
here input string is always in unicode (wide characters)
*/
Status ParseString(const wchar_t * str);
Status ParseString(const std::wstring & str);
Status Parse(const wchar_t * str);
Status Parse(const std::wstring & str);
/*
* add two args Parse method
* Status Parse(const char * str, Space & output_space);
*
*/
/*
@ -145,15 +147,23 @@ public:
/*
if true then the input file or string (char* or std::string) is treated as UTF-8
default true
the internal storage for strings is std::wstring so if you call UTF8(false) then
the characters of input string will be simple static_cast<> from char to wchar_t
*/
// rename to use_utf8(bool)
void UTF8(bool utf);
/*
default: true
*
* returns a number of a last parsed line
* can be used to obtain the line in which there was a syntax error
*
*/
void CreateTableAsSpace(bool create_table_as_space_);
int get_last_parsed_line();
private:
@ -165,10 +175,9 @@ private:
/*
a space in which we are now
a number of a line in which there is a syntax_error
*/
Space * space;
int line;
/*
true if Parse() method was called
@ -197,18 +206,6 @@ private:
std::wstring token;
/*
last read key
*/
std::wstring key;
/*
last read list
*/
Space::Value value;
/*
separator between a variable and a value, default: '='
*/
@ -261,6 +258,9 @@ private:
/*
current file
may it would be better to make a pointer?
if we parse only a string then there is no sense to have such an object
*/
std::ifstream file;
@ -288,14 +288,6 @@ private:
bool use_escape_char;
/*
if false we only allow the tables to consists of text items (numeric, boolean too)
objects are not allowed then
default: true
*/
bool create_table_as_space;
/*
*/
@ -309,35 +301,48 @@ private:
size_t max_nested_level;
/*
*
* if parsing_space is false then it means we are parsing JSON format
*
*
*/
bool parsing_space;
// move to ParseFile() method
std::string afile_name;
void Parse();
void ParseSpace(bool has_space_name, bool insert_new_space = true);
void ParseTextTable();
void ParseObjectsTable(bool has_key);
void ParseTable(bool has_key);
void ParseKeyValuePairs();
void SkipText();
void SkipObjectOrTable(int start_char, int end_char);
void SkipTable();
void SkipObject();
void SpaceEnds(bool skip_space_char = true);
void SpaceStarts(bool has_space_name, bool skip_space_char = true);
// new
void ParseRootSpace();
void Parse(Space * space);
void ParseSpace(Space * space);
void ParseTable(Space * space);
void DeleteFromTable(const std::wstring & var);
void ParseKeyValuePairs(Space * space);
void ParseValuesList(Space * space);
Space * ReadKey(Space * space);
void ParseTextValue(Space * space);
void ParseIntegerValue(Space * space);
void ParseFloatingPointValue(Space * space);
bool is_alfa_numeric_char(int c);
void ReadAlfaNumericToken();
bool is_integer_token();
bool is_floating_point_token();
void ReadTokenQuoted();
void ReadTokenSingle(bool white_delimit, bool new_line_delimit, int delimit1, int delimit2);
void ReadToken(bool white_delimit, bool new_line_delimit, int delimit1, int delimit2);
void ReadKey();
void ReadValue(bool skip_object_or_table = false,
bool add_space_for_text_value = false,
bool has_key = false,
bool auto_add_text_value = false);
void AddKeyValuePair();
int ReadUTF8Char();
int ReadASCIIChar();
int ReadCharFromWcharString();
@ -347,14 +352,14 @@ private:
int ReadChar();
bool IsWhite(int c);
void SkipWhite();
void Trim(std::wstring & s);
//void Trim(std::wstring & s);
bool IsHexDigit(wchar_t c);
int HexToInt(wchar_t c);
void ReadUnicodeCodePoint();
};
#endif
} // namespace