From 0c0f15ab8a09a83c0a496d8f715d9eb625954990 Mon Sep 17 00:00:00 2001 From: Tomasz Sowa Date: Tue, 16 Mar 2021 18:40:14 +0100 Subject: [PATCH] changed: JSONToSpaceParser can parse json string/file to the new Space format now (some minor chars escaping not ready yet) --- space/jsontospaceparser.cpp | 674 +++++++++++++++++++----------------- space/jsontospaceparser.h | 131 +++---- 2 files changed, 420 insertions(+), 385 deletions(-) diff --git a/space/jsontospaceparser.cpp b/space/jsontospaceparser.cpp index a69f820..a88fe8e 100644 --- a/space/jsontospaceparser.cpp +++ b/space/jsontospaceparser.cpp @@ -5,7 +5,7 @@ */ /* - * Copyright (c) 2012-2017, Tomasz Sowa + * Copyright (c) 2012-2021, Tomasz Sowa * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -39,12 +39,13 @@ #include #include "jsontospaceparser.h" #include "utf8/utf8.h" +#include "convert/strtoint.h" namespace PT { -#ifdef nonexisting_value + JSONToSpaceParser::JSONToSpaceParser() @@ -81,7 +82,6 @@ void JSONToSpaceParser::SetDefault() use_escape_char = true; input_as_utf8 = true; max_nested_level = 1000; - create_table_as_space = true; } @@ -104,13 +104,14 @@ void JSONToSpaceParser::UTF8(bool utf) } -void JSONToSpaceParser::CreateTableAsSpace(bool create_table_as_space_) +int JSONToSpaceParser::get_last_parsed_line() { - create_table_as_space = create_table_as_space_; + return line; } -JSONToSpaceParser::Status JSONToSpaceParser::Parse(const char * file_name) + +JSONToSpaceParser::Status JSONToSpaceParser::ParseFile(const char * file_name) { reading_from_file = true; @@ -119,7 +120,7 @@ JSONToSpaceParser::Status JSONToSpaceParser::Parse(const char * file_name) if( file ) { - Parse(); + ParseRootSpace(); file.close(); } else @@ -132,305 +133,438 @@ return status; -JSONToSpaceParser::Status JSONToSpaceParser::Parse(const std::string & file_name) +JSONToSpaceParser::Status JSONToSpaceParser::ParseFile(const std::string & file_name) { - return Parse(file_name.c_str()); + return ParseFile(file_name.c_str()); } -JSONToSpaceParser::Status JSONToSpaceParser::Parse(const wchar_t * file_name) +JSONToSpaceParser::Status JSONToSpaceParser::ParseFile(const wchar_t * file_name) { PT::WideToUTF8(file_name, afile_name); - return Parse(afile_name.c_str()); + return ParseFile(afile_name.c_str()); } -JSONToSpaceParser::Status JSONToSpaceParser::Parse(const std::wstring & file_name) +JSONToSpaceParser::Status JSONToSpaceParser::ParseFile(const std::wstring & file_name) { - return Parse(file_name.c_str()); + return ParseFile(file_name.c_str()); } -JSONToSpaceParser::Status JSONToSpaceParser::ParseString(const char * str) +JSONToSpaceParser::Status JSONToSpaceParser::Parse(const char * str) { reading_from_file = false; reading_from_wchar_string = false; pchar_ascii = str; pchar_unicode = 0; - Parse(); + ParseRootSpace(); return status; } -JSONToSpaceParser::Status JSONToSpaceParser::ParseString(const std::string & str) +JSONToSpaceParser::Status JSONToSpaceParser::Parse(const std::string & str) { - return ParseString(str.c_str()); + return Parse(str.c_str()); } -JSONToSpaceParser::Status JSONToSpaceParser::ParseString(const wchar_t * str) +JSONToSpaceParser::Status JSONToSpaceParser::Parse(const wchar_t * str) { reading_from_file = false; reading_from_wchar_string = true; pchar_unicode = str; pchar_ascii = 0; - Parse(); + ParseRootSpace(); return status; } -JSONToSpaceParser::Status JSONToSpaceParser::ParseString(const std::wstring & str) +JSONToSpaceParser::Status JSONToSpaceParser::Parse(const std::wstring & str) { - return ParseString(str.c_str()); + return Parse(str.c_str()); } -void JSONToSpaceParser::Parse() +void JSONToSpaceParser::ParseRootSpace() { + line = 1; + status = ok; + skipped = 0; + current_nested_level = 0; + parsing_space = false; + if( !root_space ) { status = no_space; return; } - line = 1; - status = ok; - space = root_space; - skipped = 0; - current_nested_level = 0; - ReadChar(); + ReadChar(); // put first character to lastc + Parse(root_space); + token.clear(); +} + + +void JSONToSpaceParser::Parse(Space * space) +{ SkipWhite(); if( lastc == space_start ) { - ParseSpace(false, false); + ParseSpace(space); } else if( lastc == table_start ) { - ParseTable(false); + ParseTable(space); + } + else + if( lastc == '"' ) // IMPROVEME define a variable + { + ParseTextValue(space); } else { - // '{' or '[' expected - status = syntax_error; - } + ReadAlfaNumericToken(); - if( status == ok && space != root_space ) - { - // last closing '}' characters are missing (closing a space) - status = syntax_error; + if( token == L"null" ) + { + space->set_null(); + } + else + if( token == L"true" ) + { + space->set(true); + } + else + if( token == L"false" ) + { + space->set(false); + } + else + if( is_integer_token() ) + { + ParseIntegerValue(space); + } + else + if( is_floating_point_token() ) + { + ParseFloatingPointValue(space); + } + else + { + status = syntax_error; + } } - - token.clear(); - key.clear(); - value.clear(); } -void JSONToSpaceParser::ParseSpace(bool has_space_name, bool insert_new_space) +void JSONToSpaceParser::ParseSpace(Space * space) { - //current_nested_level += 1; + current_nested_level += 1; - if( current_nested_level > max_nested_level ) - { - status = max_nested_spaces_exceeded; - return; - } +// if( current_nested_level > max_nested_level ) +// { +// status = max_nested_spaces_exceeded; +// return; +// } - if( insert_new_space ) + ReadChar(); // inserting a next character after the space_start char to lastc + space->set_empty_object(); + ParseKeyValuePairs(space); + + if( lastc == space_end ) { - SpaceStarts(has_space_name); + ReadChar(); } else { - // insert_new_space as a false is used only when parsing - // the first space (root_space) - ReadChar(); // skipping the first space character '{' + status = syntax_error; } - ParseKeyValuePairs(); - - if( insert_new_space ) - { - SpaceEnds(); - } - else - { - ReadChar(); // skipping the last space character '}' - } - - //current_nested_level -= 1; + current_nested_level -= 1; } -void JSONToSpaceParser::ParseKeyValuePairs() + + + +void JSONToSpaceParser::ParseTextValue(Space * space) { + ReadChar(); // inserting a next character after the ... char to lastc + space->set_empty_wstring(); + std::wstring * str = space->get_wstring(); + + // IMPROVEME add support for escaped characters + while( status == ok && lastc != '"' && lastc != -1 ) + { + str->push_back(static_cast(lastc)); + ReadChar(); + } + + if( lastc == '"' ) + { + ReadChar(); + } + else + { + status = syntax_error; + } +} + + +void JSONToSpaceParser::ParseIntegerValue(Space * space) +{ + const wchar_t * after_str = nullptr; + bool was_overflow = false; + + long long val = Toll(token.c_str(), 10, &after_str, &was_overflow, false); + + if( was_overflow ) + { + status = syntax_error; + } + else + if( size_t(after_str - token.c_str()) != token.size() ) + { + status = syntax_error; + } + else + { + space->set(val); + } +} + + +void JSONToSpaceParser::ParseFloatingPointValue(Space * space) +{ + wchar_t * after_str = nullptr; + double val = wcstod(token.c_str(), &after_str); + + if( errno == ERANGE ) + { + status = syntax_error; + } + else + if( size_t(after_str - token.c_str()) != token.size() ) + { + status = syntax_error; + } + else + { + space->set(val); + } +} + + + + +void JSONToSpaceParser::ParseTable(Space * space) +{ + space->set_empty_table(); + + ReadChar(); // inserting a next character after the table_start char to lastc + space->set_empty_object(); + ParseValuesList(space); + + if( lastc == table_end ) + { + ReadChar(); + } + else + { + status = syntax_error; + } +} + + + + +void JSONToSpaceParser::ParseKeyValuePairs(Space * space) +{ + bool is_first = true; SkipWhite(); while( status == ok && lastc != space_end && lastc != -1 ) { - ReadKey(); - SkipWhite(); - - if( lastc == separator ) + if( !is_first ) { - value.clear(); - ReadChar(); // skipping separator ':' - ReadValue(false, false, true, true); SkipWhite(); if( lastc == option_delimiter ) { - ReadChar(); // skipping delimiter ',' + ReadChar(); // inserting a next character after the option_delimiter to lastc } else - if( lastc != space_end && status == ok ) { status = syntax_error; } } - else + if( status == ok ) { - status = syntax_error; + Space * new_key_space = ReadKey(space); + + if( status == ok && new_key_space ) + { + SkipWhite(); + + if( lastc == separator ) + { + ReadChar(); // inserting a next character after the separator to lastc + + Parse(new_key_space); + //value.clear(); + //ReadValue(false, false, true, true); + } + else + { + status = syntax_error; + } + } } - } -} - -void JSONToSpaceParser::ParseTextTable() -{ - ReadChar(); // skipping table start character '[' - SkipWhite(); - value.clear(); - - while( status == ok && lastc != table_end && lastc != -1 ) - { - // all space objects inside or tables will be skipped - ReadValue(true); + is_first = false; SkipWhite(); - - if( lastc == option_delimiter ) - { - ReadChar(); // skipping delimiter ',' - } - else - if( lastc != table_end && status == ok ) - { - status = syntax_error; - } } - - if( lastc == table_end ) - ReadChar(); // skipping end table character ']' - - AddKeyValuePair(); } -void JSONToSpaceParser::ParseObjectsTable(bool has_key) + + +void JSONToSpaceParser::ParseValuesList(Space * space) { - ReadChar(); // skipping table start character '[' - SpaceStarts(has_key, false); + bool is_first = true; SkipWhite(); while( status == ok && lastc != table_end && lastc != -1 ) { - // 'value' table will not be used here - // (we are creating spaces) - ReadValue(false, true); + if( !is_first ) + { + SkipWhite(); + + if( lastc == option_delimiter ) // may add a new delimiter for tables? default the same as for objects... + { + ReadChar(); // inserting a next character after the delimiter + } + else + { + status = syntax_error; + } + } + + if( status == ok ) + { + Space * new_space = &space->add(new Space()); + Parse(new_space); + } + + is_first = false; SkipWhite(); + } +} - if( lastc == option_delimiter ) + + +bool JSONToSpaceParser::is_integer_token() +{ + if( token.empty() ) + return false; + + size_t i = 0; + + if( token[i] == '-' ) + { + i += 1; + + if( token.size() == 1 ) + return false; + } + + for( ; i < token.size() ; ++i) + { + if( token[i] < '0' || token[i] > '9' ) { - ReadChar(); // skipping delimiter ',' + return false; + } + } + + return true; +} + + +bool JSONToSpaceParser::is_floating_point_token() +{ + bool was_dot = false; + bool was_exponential = false; + bool was_plus_minus_sign = false; + + if( token.empty() ) + return false; + + size_t i = 0; + + if( token[i] == '-' ) + { + i += 1; + + if( token.size() == 1 ) + return false; + } + + for( ; i < token.size() ; ++i) + { + if( token[i] == '.' ) + { + if( was_dot || was_exponential ) + return false; + + was_dot = true; } else - if( lastc != table_end && status == ok ) + if( token[i] == 'e' || token[i]=='E' ) { - status = syntax_error; - } - } + if( was_exponential ) + return false; - if( lastc == table_end ) - ReadChar(); // skipping end table character ']' + was_exponential = true; - SpaceEnds(false); -} - - -void JSONToSpaceParser::ParseTable(bool has_key) -{ - if( create_table_as_space ) - { - //current_nested_level += 1; - - if( current_nested_level > max_nested_level ) - { - status = max_nested_spaces_exceeded; + // the exponential character cannot be the last character + if( i + 1 == token.size() ) + return false; } else + if( token[i] == '+' || token[i] == '-' ) { - ParseObjectsTable(has_key); + if( was_plus_minus_sign || !was_exponential ) + return false; + + // the plus or minus should be after the exponential character + if( i > 0 && (token[i-1] != 'e' && token[i-1] != 'E') ) + return false; + + was_plus_minus_sign = true; + } + else + if( token[i] < '0' || token[i] > '9' ) + { + return false; } - - //current_nested_level -= 1; } - else - { - // ParseTextTable will not create a next level - if( !has_key ) - key.clear(); - - ParseTextTable(); // ParseTextTable will use key - } -} - - - -void JSONToSpaceParser::SpaceStarts(bool has_space_name, bool skip_space_char) -{ - Space * new_space = new Space(); - space->spaces.push_back(new_space); - new_space->parent = space; - - if( has_space_name ) - new_space->name = key; - - space = new_space; - - if( skip_space_char ) - ReadChar(); // skipping space starts character '{' -} - - -void JSONToSpaceParser::SpaceEnds(bool skip_space_char) -{ - if( space == root_space ) - { - // there cannot be a loose list end character in the global space - status = syntax_error; - } - else - { - space = space->parent; - - if( skip_space_char ) - ReadChar(); // skipping closing space character '}' - } + return true; } @@ -446,6 +580,15 @@ return false; } +bool JSONToSpaceParser::is_alfa_numeric_char(int c) +{ + return (c >= 'a' && c <= 'z') || + (c >= 'A' && c <= 'Z') || + (c >= '0' && c <= '9') || + c == '.' || c=='-'; +} + + void JSONToSpaceParser::SkipWhite() { @@ -456,7 +599,7 @@ void JSONToSpaceParser::SkipWhite() } - +/* void JSONToSpaceParser::Trim(std::wstring & s) { std::wstring::size_type i; @@ -469,7 +612,7 @@ std::wstring::size_type i; if( i==0 && IsWhite(s[i]) ) { - // the whole string has white characters + // the whole string consists of white characters s.clear(); return; } @@ -485,27 +628,28 @@ std::wstring::size_type i; if( i != 0 ) s.erase(0, i); } +*/ - -void JSONToSpaceParser::DeleteFromTable(const std::wstring & var) +void JSONToSpaceParser::ReadAlfaNumericToken() { - Space::Table::iterator i = space->table.find(var); + token.clear(); - if( i != space->table.end() ) - space->table.erase(i); + while( is_alfa_numeric_char(lastc) ) + { + token += static_cast(lastc); + ReadChar(); + } } - - - - +// IMPROVEME in JSON we should not allow non-escaped a new line character void JSONToSpaceParser::ReadTokenQuoted() { + token.clear(); ReadChar(); // skipping the first quotation mark while( lastc != -1 && (char_was_escaped || lastc != '"') ) @@ -515,165 +659,51 @@ void JSONToSpaceParser::ReadTokenQuoted() } if( !char_was_escaped && lastc == '"' ) - ReadChar(); // skipping the last quotation mark - else - status = syntax_error; -} - - -void JSONToSpaceParser::ReadTokenSingle(bool white_delimit, bool new_line_delimit, int delimit1, int delimit2) -{ - while( true ) { - if( lastc == -1 || - (!char_was_escaped && - ( - lastc == space_end || - lastc == table_end || - (white_delimit && IsWhite(lastc)) || - (new_line_delimit && lastc == '\n') || - (delimit1 != -1 && lastc == delimit1) || - (delimit2 != -1 && lastc == delimit2) - ) ) ) - - { - break; - } - - token += static_cast(lastc); - ReadChar(); + ReadChar(); // skipping the last quotation mark } - - Trim(token); -} - - -void JSONToSpaceParser::ReadToken(bool white_delimit, bool new_line_delimit, int delimit1, int delimit2) -{ - token.clear(); - SkipWhite(); - - if( !char_was_escaped && lastc == '"' ) - ReadTokenQuoted(); else - ReadTokenSingle(white_delimit, new_line_delimit, delimit1, delimit2); + { + status = syntax_error; + } } -void JSONToSpaceParser::ReadKey() + + + +Space * JSONToSpaceParser::ReadKey(Space * space) { SkipWhite(); - ReadToken(false, true, separator, table_start); - key = token; -} - -void JSONToSpaceParser::SkipText() -{ - ReadChar(); // skipping the first quote character '"' - - while( lastc != '"' && lastc != -1 ) - ReadChar(); -} - - -void JSONToSpaceParser::SkipObjectOrTable(int start_char, int end_char) -{ -int mark = 1; - - skipped += 1; - ReadChar(); // skipping the first object character '{' or '[' - - do + if( parsing_space ) + { + // IMPROVEME implement me + } + else { if( lastc == '"' ) - SkipText(); - else - if( lastc == end_char ) - mark -= 1; - else - if( lastc == start_char ) - mark += 1; - - ReadChar(); - } - while( mark > 0 && lastc != -1 ); -} - - -void JSONToSpaceParser::SkipObject() -{ - SkipObjectOrTable(space_start, space_end); -} - - -void JSONToSpaceParser::SkipTable() -{ - SkipObjectOrTable(table_start, table_end); -} - - - -//void JSONToSpaceParser::ReadValue(bool add_space_for_single_value, bool auto_add_single_value, bool has_space_name) -void JSONToSpaceParser::ReadValue(bool skip_object_or_table, - bool add_space_for_text_value, - bool has_key, - bool auto_add_text_value) -{ - SkipWhite(); - - if( lastc == space_start ) - { - if( skip_object_or_table ) - SkipObject(); - else - ParseSpace(has_key); - } - else - if( lastc == table_start ) - { - if( skip_object_or_table ) - SkipTable(); - else - ParseTable(has_key); - } - else - { - if( add_space_for_text_value ) { - SpaceStarts(false, false); - ReadToken(false, true, option_delimiter, -1); - space->name = token; - SpaceEnds(false); + ReadTokenQuoted(); } else { - ReadToken(false, true, option_delimiter, -1); - value.push_back(token); - - if( auto_add_text_value ) - AddKeyValuePair(); + status = syntax_error; } } -} - -void JSONToSpaceParser::AddKeyValuePair() -{ - if( value.empty() && skip_empty ) + if( status == ok ) { - DeleteFromTable(key); - return; + Space * new_space = new Space(); + return &space->add(token.c_str(), new_space); } - space->table[key] = value; + return nullptr; } - - int JSONToSpaceParser::ReadUTF8Char() { int c; @@ -862,7 +892,7 @@ int JSONToSpaceParser::ReadChar() return lastc; } -#endif + } // namespace diff --git a/space/jsontospaceparser.h b/space/jsontospaceparser.h index 6691fbc..16a4390 100644 --- a/space/jsontospaceparser.h +++ b/space/jsontospaceparser.h @@ -5,7 +5,7 @@ */ /* - * Copyright (c) 2012-2017, Tomasz Sowa + * Copyright (c) 2012-2021, Tomasz Sowa * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -46,7 +46,7 @@ namespace PT { -#ifdef nonexisting_value + class JSONToSpaceParser @@ -86,10 +86,6 @@ public: Status status; - /* - a number of a line in which there is a syntax_error - */ - int line; /* @@ -103,26 +99,32 @@ public: main methods used to parse file_name is the path to a file */ - Status Parse(const char * file_name); - Status Parse(const std::string & file_name); - Status Parse(const wchar_t * file_name); - Status Parse(const std::wstring & file_name); + Status ParseFile(const char * file_name); + Status ParseFile(const std::string & file_name); + Status ParseFile(const wchar_t * file_name); + Status ParseFile(const std::wstring & file_name); /* main methods used to parse str - input string (either 8bit ascii or UTF-8 -- see UTF8() method) */ - Status ParseString(const char * str); - Status ParseString(const std::string & str); + Status Parse(const char * str); + Status Parse(const std::string & str); /* main methods used to parse here input string is always in unicode (wide characters) */ - Status ParseString(const wchar_t * str); - Status ParseString(const std::wstring & str); + Status Parse(const wchar_t * str); + Status Parse(const std::wstring & str); + + /* + * add two args Parse method + * Status Parse(const char * str, Space & output_space); + * + */ /* @@ -145,15 +147,23 @@ public: /* if true then the input file or string (char* or std::string) is treated as UTF-8 + default true + + the internal storage for strings is std::wstring so if you call UTF8(false) then + the characters of input string will be simple static_cast<> from char to wchar_t */ + // rename to use_utf8(bool) void UTF8(bool utf); /* + * + * returns a number of a last parsed line + * can be used to obtain the line in which there was a syntax error + * + */ + int get_last_parsed_line(); - default: true - */ - void CreateTableAsSpace(bool create_table_as_space_); private: @@ -165,10 +175,9 @@ private: /* - a space in which we are now + a number of a line in which there is a syntax_error */ - Space * space; - + int line; /* true if Parse() method was called @@ -197,18 +206,6 @@ private: std::wstring token; - /* - last read key - */ - std::wstring key; - - - /* - last read list - */ - Space::Value value; - - /* separator between a variable and a value, default: '=' */ @@ -261,6 +258,9 @@ private: /* current file + + may it would be better to make a pointer? + if we parse only a string then there is no sense to have such an object */ std::ifstream file; @@ -288,14 +288,6 @@ private: bool use_escape_char; - /* - if false we only allow the tables to consists of text items (numeric, boolean too) - objects are not allowed then - default: true - */ - bool create_table_as_space; - - /* */ @@ -309,35 +301,48 @@ private: size_t max_nested_level; + /* + * + * if parsing_space is false then it means we are parsing JSON format + * + * + */ + bool parsing_space; + + + + // move to ParseFile() method std::string afile_name; - void Parse(); - void ParseSpace(bool has_space_name, bool insert_new_space = true); - void ParseTextTable(); - void ParseObjectsTable(bool has_key); - void ParseTable(bool has_key); - void ParseKeyValuePairs(); - void SkipText(); - void SkipObjectOrTable(int start_char, int end_char); - void SkipTable(); - void SkipObject(); - void SpaceEnds(bool skip_space_char = true); - void SpaceStarts(bool has_space_name, bool skip_space_char = true); + // new + void ParseRootSpace(); + void Parse(Space * space); + void ParseSpace(Space * space); + void ParseTable(Space * space); - void DeleteFromTable(const std::wstring & var); + void ParseKeyValuePairs(Space * space); + void ParseValuesList(Space * space); + + Space * ReadKey(Space * space); + + void ParseTextValue(Space * space); + void ParseIntegerValue(Space * space); + void ParseFloatingPointValue(Space * space); + + + bool is_alfa_numeric_char(int c); + void ReadAlfaNumericToken(); + + + bool is_integer_token(); + bool is_floating_point_token(); void ReadTokenQuoted(); - void ReadTokenSingle(bool white_delimit, bool new_line_delimit, int delimit1, int delimit2); - void ReadToken(bool white_delimit, bool new_line_delimit, int delimit1, int delimit2); - void ReadKey(); - void ReadValue(bool skip_object_or_table = false, - bool add_space_for_text_value = false, - bool has_key = false, - bool auto_add_text_value = false); - void AddKeyValuePair(); + + int ReadUTF8Char(); int ReadASCIIChar(); int ReadCharFromWcharString(); @@ -347,14 +352,14 @@ private: int ReadChar(); bool IsWhite(int c); void SkipWhite(); - void Trim(std::wstring & s); + //void Trim(std::wstring & s); bool IsHexDigit(wchar_t c); int HexToInt(wchar_t c); void ReadUnicodeCodePoint(); }; -#endif + } // namespace