/* * This file is a part of PikoTools * and is distributed under the (new) BSD licence. * Author: Tomasz Sowa */ /* * Copyright (c) 2012-2021, Tomasz Sowa * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * * Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * * Neither the name Tomasz Sowa nor the names of contributors to this * project may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include "jsontospaceparser.h" #include "utf8/utf8.h" #include "convert/strtoint.h" namespace PT { JSONToSpaceParser::JSONToSpaceParser() { root_space = 0; SetDefault(); } void JSONToSpaceParser::SetSpace(Space * pspace) { root_space = pspace; } void JSONToSpaceParser::SetSpace(Space & pspace) { root_space = &pspace; } void JSONToSpaceParser::SetDefault() { // you can change this separators to what you want // you shoud not use only white characters here (as expected by IsWhite() method) // and new line characters ('\n') separator = ':'; space_start = '{'; space_end = '}'; table_start = '['; table_end = ']'; option_delimiter = ','; skip_empty = false; use_escape_char = true; input_as_utf8 = true; max_nested_level = 1000; } void JSONToSpaceParser::SkipEmpty(bool skip) { skip_empty = skip; } void JSONToSpaceParser::UseEscapeChar(bool escape) { use_escape_char = escape; } void JSONToSpaceParser::UTF8(bool utf) { input_as_utf8 = utf; } int JSONToSpaceParser::get_last_parsed_line() { return line; } JSONToSpaceParser::Status JSONToSpaceParser::ParseFile(const char * file_name) { reading_from_file = true; file.clear(); file.open(file_name, std::ios_base::binary | std::ios_base::in); if( file ) { ParseRootSpace(); file.close(); } else { status = cant_open_file; } return status; } JSONToSpaceParser::Status JSONToSpaceParser::ParseFile(const std::string & file_name) { return ParseFile(file_name.c_str()); } JSONToSpaceParser::Status JSONToSpaceParser::ParseFile(const wchar_t * file_name) { PT::WideToUTF8(file_name, afile_name); return ParseFile(afile_name.c_str()); } JSONToSpaceParser::Status JSONToSpaceParser::ParseFile(const std::wstring & file_name) { return ParseFile(file_name.c_str()); } JSONToSpaceParser::Status JSONToSpaceParser::Parse(const char * str) { reading_from_file = false; reading_from_wchar_string = false; pchar_ascii = str; pchar_unicode = 0; ParseRootSpace(); return status; } JSONToSpaceParser::Status JSONToSpaceParser::Parse(const std::string & str) { return Parse(str.c_str()); } JSONToSpaceParser::Status JSONToSpaceParser::Parse(const wchar_t * str) { reading_from_file = false; reading_from_wchar_string = true; pchar_unicode = str; pchar_ascii = 0; ParseRootSpace(); return status; } JSONToSpaceParser::Status JSONToSpaceParser::Parse(const std::wstring & str) { return Parse(str.c_str()); } void JSONToSpaceParser::ParseRootSpace() { line = 1; status = ok; skipped = 0; current_nested_level = 0; parsing_space = false; if( !root_space ) { status = no_space; return; } ReadChar(); // put first character to lastc Parse(root_space); token.clear(); } void JSONToSpaceParser::Parse(Space * space) { SkipWhite(); if( lastc == space_start ) { ParseSpace(space); } else if( lastc == table_start ) { ParseTable(space); } else if( lastc == '"' ) // IMPROVEME define a variable { ParseTextValue(space); } else { ReadAlfaNumericToken(); if( token == L"null" ) { space->set_null(); } else if( token == L"true" ) { space->set(true); } else if( token == L"false" ) { space->set(false); } else if( is_integer_token() ) { ParseIntegerValue(space); } else if( is_floating_point_token() ) { ParseFloatingPointValue(space); } else { status = syntax_error; } } } void JSONToSpaceParser::ParseSpace(Space * space) { current_nested_level += 1; // if( current_nested_level > max_nested_level ) // { // status = max_nested_spaces_exceeded; // return; // } ReadChar(); // inserting a next character after the space_start char to lastc space->set_empty_object(); ParseKeyValuePairs(space); if( lastc == space_end ) { ReadChar(); } else { status = syntax_error; } current_nested_level -= 1; } void JSONToSpaceParser::ParseTextValue(Space * space) { ReadChar(); // inserting a next character after the ... char to lastc space->set_empty_wstring(); std::wstring * str = space->get_wstring(); // IMPROVEME add support for escaped characters while( status == ok && lastc != '"' && lastc != -1 ) { str->push_back(static_cast(lastc)); ReadChar(); } if( lastc == '"' ) { ReadChar(); } else { status = syntax_error; } } void JSONToSpaceParser::ParseIntegerValue(Space * space) { const wchar_t * after_str = nullptr; bool was_overflow = false; long long val = Toll(token.c_str(), 10, &after_str, &was_overflow, false); if( was_overflow ) { status = syntax_error; } else if( size_t(after_str - token.c_str()) != token.size() ) { status = syntax_error; } else { space->set(val); } } void JSONToSpaceParser::ParseFloatingPointValue(Space * space) { wchar_t * after_str = nullptr; double val = wcstod(token.c_str(), &after_str); if( errno == ERANGE ) { status = syntax_error; } else if( size_t(after_str - token.c_str()) != token.size() ) { status = syntax_error; } else { space->set(val); } } void JSONToSpaceParser::ParseTable(Space * space) { space->set_empty_table(); ReadChar(); // inserting a next character after the table_start char to lastc space->set_empty_object(); ParseValuesList(space); if( lastc == table_end ) { ReadChar(); } else { status = syntax_error; } } void JSONToSpaceParser::ParseKeyValuePairs(Space * space) { bool is_first = true; SkipWhite(); while( status == ok && lastc != space_end && lastc != -1 ) { if( !is_first ) { SkipWhite(); if( lastc == option_delimiter ) { ReadChar(); // inserting a next character after the option_delimiter to lastc } else { status = syntax_error; } } if( status == ok ) { Space * new_key_space = ReadKey(space); if( status == ok && new_key_space ) { SkipWhite(); if( lastc == separator ) { ReadChar(); // inserting a next character after the separator to lastc Parse(new_key_space); //value.clear(); //ReadValue(false, false, true, true); } else { status = syntax_error; } } } is_first = false; SkipWhite(); } } void JSONToSpaceParser::ParseValuesList(Space * space) { bool is_first = true; SkipWhite(); while( status == ok && lastc != table_end && lastc != -1 ) { if( !is_first ) { SkipWhite(); if( lastc == option_delimiter ) // may add a new delimiter for tables? default the same as for objects... { ReadChar(); // inserting a next character after the delimiter } else { status = syntax_error; } } if( status == ok ) { Space * new_space = &space->add(new Space()); Parse(new_space); } is_first = false; SkipWhite(); } } bool JSONToSpaceParser::is_integer_token() { if( token.empty() ) return false; size_t i = 0; if( token[i] == '-' ) { i += 1; if( token.size() == 1 ) return false; } for( ; i < token.size() ; ++i) { if( token[i] < '0' || token[i] > '9' ) { return false; } } return true; } bool JSONToSpaceParser::is_floating_point_token() { bool was_dot = false; bool was_exponential = false; bool was_plus_minus_sign = false; if( token.empty() ) return false; size_t i = 0; if( token[i] == '-' ) { i += 1; if( token.size() == 1 ) return false; } for( ; i < token.size() ; ++i) { if( token[i] == '.' ) { if( was_dot || was_exponential ) return false; was_dot = true; } else if( token[i] == 'e' || token[i]=='E' ) { if( was_exponential ) return false; was_exponential = true; // the exponential character cannot be the last character if( i + 1 == token.size() ) return false; } else if( token[i] == '+' || token[i] == '-' ) { if( was_plus_minus_sign || !was_exponential ) return false; // the plus or minus should be after the exponential character if( i > 0 && (token[i-1] != 'e' && token[i-1] != 'E') ) return false; was_plus_minus_sign = true; } else if( token[i] < '0' || token[i] > '9' ) { return false; } } return true; } bool JSONToSpaceParser::IsWhite(int c) { // 13 (\r) is at the end of a line in a dos file \r\n // 160 is an unbreakable space if( c==' ' || c=='\t' || c==13 || c==160 || c==10 ) return true; return false; } bool JSONToSpaceParser::is_alfa_numeric_char(int c) { return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '.' || c=='-'; } void JSONToSpaceParser::SkipWhite() { while( IsWhite(lastc) ) { ReadChar(); } } /* void JSONToSpaceParser::Trim(std::wstring & s) { std::wstring::size_type i; if( s.empty() ) return; // looking for white characters at the end for(i=s.size()-1 ; i>0 && IsWhite(s[i]) ; --i); if( i==0 && IsWhite(s[i]) ) { // the whole string consists of white characters s.clear(); return; } // deleting white characters at the end if( i != s.size() - 1 ) s.erase(i+1, std::wstring::npos); // looking for white characters at the beginning for(i=0 ; i(lastc); ReadChar(); } } // IMPROVEME in JSON we should not allow non-escaped a new line character void JSONToSpaceParser::ReadTokenQuoted() { token.clear(); ReadChar(); // skipping the first quotation mark while( lastc != -1 && (char_was_escaped || lastc != '"') ) { token += static_cast(lastc); ReadChar(); } if( !char_was_escaped && lastc == '"' ) { ReadChar(); // skipping the last quotation mark } else { status = syntax_error; } } Space * JSONToSpaceParser::ReadKey(Space * space) { SkipWhite(); if( parsing_space ) { // IMPROVEME implement me } else { if( lastc == '"' ) { ReadTokenQuoted(); } else { status = syntax_error; } } if( status == ok ) { Space * new_space = new Space(); return &space->add(token.c_str(), new_space); } return nullptr; } int JSONToSpaceParser::ReadUTF8Char() { int c; bool correct; lastc = -1; do { PT::UTF8ToInt(file, c, correct); if( !file ) return lastc; } while( !correct ); lastc = c; if( lastc == '\n' ) ++line; return lastc; } int JSONToSpaceParser::ReadASCIIChar() { lastc = file.get(); if( lastc == '\n' ) ++line; return lastc; } int JSONToSpaceParser::ReadCharFromWcharString() { if( *pchar_unicode == 0 ) lastc = -1; else lastc = *(pchar_unicode++); if( lastc == '\n' ) ++line; return lastc; } int JSONToSpaceParser::ReadCharFromUTF8String() { int c; bool correct; lastc = -1; do { size_t len = PT::UTF8ToInt(pchar_ascii, c, correct); pchar_ascii += len; } while( *pchar_ascii && !correct ); if( correct ) lastc = c; if( lastc == '\n' ) ++line; return lastc; } int JSONToSpaceParser::ReadCharFromAsciiString() { if( *pchar_ascii == 0 ) lastc = -1; else lastc = *(pchar_ascii++); if( lastc == '\n' ) ++line; return lastc; } int JSONToSpaceParser::ReadCharNoEscape() { if( reading_from_file ) { if( input_as_utf8 ) return ReadUTF8Char(); else return ReadASCIIChar(); } else { if( reading_from_wchar_string ) { return ReadCharFromWcharString(); } else { if( input_as_utf8 ) return ReadCharFromUTF8String(); else return ReadCharFromAsciiString(); } } } bool JSONToSpaceParser::IsHexDigit(wchar_t c) { return ((c>='0' && c<='9') || (c>='a' && c<='f') || (c>='A' && c<='F') ); } int JSONToSpaceParser::HexToInt(wchar_t c) { if( c>='0' && c<='9' ) return c - '0'; if( c>='a' && c<='f' ) return c - 'a' + 10; if( c>='A' && c<='F' ) return c - 'A' + 10; return 0; } void JSONToSpaceParser::ReadUnicodeCodePoint() { wchar_t c; int value = 0; for(int i=0 ; i<4 ; ++i) { c = ReadCharNoEscape(); if( !IsHexDigit(c) ) { status = syntax_error; return; } value = (value << 4) | HexToInt(c); } lastc = (wchar_t)value; } int JSONToSpaceParser::ReadChar() { char_was_escaped = false; ReadCharNoEscape(); if( use_escape_char && lastc == '\\' ) { char_was_escaped = true; ReadCharNoEscape(); switch(lastc) { case '0': lastc = 0; break; case 't': lastc = '\t'; break; case 'r': lastc = '\r'; break; case 'n': lastc = '\n'; break; case 'b': lastc = 0x08; break; case 'f': lastc = 0x0c; break; case 'u': ReadUnicodeCodePoint(); break; // in other cases we return the last character } } return lastc; } } // namespace