pikotools/space/jsontospaceparser.cpp

871 lines
15 KiB
C++

/*
* This file is a part of PikoTools
* and is distributed under the (new) BSD licence.
* Author: Tomasz Sowa <t.sowa@ttmath.org>
*/
/*
* Copyright (c) 2012-2017, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* * Neither the name Tomasz Sowa nor the names of contributors to this
* project may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <cstdlib>
#include <wchar.h>
#include "jsontospaceparser.h"
#include "utf8/utf8.h"
namespace PT
{
JSONToSpaceParser::JSONToSpaceParser()
{
root_space = 0;
SetDefault();
}
void JSONToSpaceParser::SetSpace(Space * pspace)
{
root_space = pspace;
}
void JSONToSpaceParser::SetSpace(Space & pspace)
{
root_space = &pspace;
}
void JSONToSpaceParser::SetDefault()
{
// you can change this separators to what you want
// you shoud not use only white characters here (as expected by IsWhite() method)
// and new line characters ('\n')
separator = ':';
space_start = '{';
space_end = '}';
table_start = '[';
table_end = ']';
option_delimiter = ',';
skip_empty = false;
use_escape_char = true;
input_as_utf8 = true;
max_nested_level = 1000;
create_table_as_space = true;
}
void JSONToSpaceParser::SkipEmpty(bool skip)
{
skip_empty = skip;
}
void JSONToSpaceParser::UseEscapeChar(bool escape)
{
use_escape_char = escape;
}
void JSONToSpaceParser::UTF8(bool utf)
{
input_as_utf8 = utf;
}
void JSONToSpaceParser::CreateTableAsSpace(bool create_table_as_space_)
{
create_table_as_space = create_table_as_space_;
}
JSONToSpaceParser::Status JSONToSpaceParser::Parse(const char * file_name)
{
reading_from_file = true;
file.clear();
file.open(file_name, std::ios_base::binary | std::ios_base::in);
if( file )
{
Parse();
file.close();
}
else
{
status = cant_open_file;
}
return status;
}
JSONToSpaceParser::Status JSONToSpaceParser::Parse(const std::string & file_name)
{
return Parse(file_name.c_str());
}
JSONToSpaceParser::Status JSONToSpaceParser::Parse(const wchar_t * file_name)
{
PT::WideToUTF8(file_name, afile_name);
return Parse(afile_name.c_str());
}
JSONToSpaceParser::Status JSONToSpaceParser::Parse(const std::wstring & file_name)
{
return Parse(file_name.c_str());
}
JSONToSpaceParser::Status JSONToSpaceParser::ParseString(const char * str)
{
reading_from_file = false;
reading_from_wchar_string = false;
pchar_ascii = str;
pchar_unicode = 0;
Parse();
return status;
}
JSONToSpaceParser::Status JSONToSpaceParser::ParseString(const std::string & str)
{
return ParseString(str.c_str());
}
JSONToSpaceParser::Status JSONToSpaceParser::ParseString(const wchar_t * str)
{
reading_from_file = false;
reading_from_wchar_string = true;
pchar_unicode = str;
pchar_ascii = 0;
Parse();
return status;
}
JSONToSpaceParser::Status JSONToSpaceParser::ParseString(const std::wstring & str)
{
return ParseString(str.c_str());
}
void JSONToSpaceParser::Parse()
{
if( !root_space )
{
status = no_space;
return;
}
line = 1;
status = ok;
space = root_space;
skipped = 0;
current_nested_level = 0;
ReadChar();
SkipWhite();
if( lastc == space_start )
{
ParseSpace(false, false);
}
else
if( lastc == table_start )
{
ParseTable(false);
}
else
{
// '{' or '[' expected
status = syntax_error;
}
if( status == ok && space != root_space )
{
// last closing '}' characters are missing (closing a space)
status = syntax_error;
}
token.clear();
key.clear();
value.clear();
}
void JSONToSpaceParser::ParseSpace(bool has_space_name, bool insert_new_space)
{
//current_nested_level += 1;
if( current_nested_level > max_nested_level )
{
status = max_nested_spaces_exceeded;
return;
}
if( insert_new_space )
{
SpaceStarts(has_space_name);
}
else
{
// insert_new_space as a false is used only when parsing
// the first space (root_space)
ReadChar(); // skipping the first space character '{'
}
ParseKeyValuePairs();
if( insert_new_space )
{
SpaceEnds();
}
else
{
ReadChar(); // skipping the last space character '}'
}
//current_nested_level -= 1;
}
void JSONToSpaceParser::ParseKeyValuePairs()
{
SkipWhite();
while( status == ok && lastc != space_end && lastc != -1 )
{
ReadKey();
SkipWhite();
if( lastc == separator )
{
value.clear();
ReadChar(); // skipping separator ':'
ReadValue(false, false, true, true);
SkipWhite();
if( lastc == option_delimiter )
{
ReadChar(); // skipping delimiter ','
}
else
if( lastc != space_end && status == ok )
{
status = syntax_error;
}
}
else
if( status == ok )
{
status = syntax_error;
}
}
}
void JSONToSpaceParser::ParseTextTable()
{
ReadChar(); // skipping table start character '['
SkipWhite();
value.clear();
while( status == ok && lastc != table_end && lastc != -1 )
{
// all space objects inside or tables will be skipped
ReadValue(true);
SkipWhite();
if( lastc == option_delimiter )
{
ReadChar(); // skipping delimiter ','
}
else
if( lastc != table_end && status == ok )
{
status = syntax_error;
}
}
if( lastc == table_end )
ReadChar(); // skipping end table character ']'
AddKeyValuePair();
}
void JSONToSpaceParser::ParseObjectsTable(bool has_key)
{
ReadChar(); // skipping table start character '['
SpaceStarts(has_key, false);
SkipWhite();
while( status == ok && lastc != table_end && lastc != -1 )
{
// 'value' table will not be used here
// (we are creating spaces)
ReadValue(false, true);
SkipWhite();
if( lastc == option_delimiter )
{
ReadChar(); // skipping delimiter ','
}
else
if( lastc != table_end && status == ok )
{
status = syntax_error;
}
}
if( lastc == table_end )
ReadChar(); // skipping end table character ']'
SpaceEnds(false);
}
void JSONToSpaceParser::ParseTable(bool has_key)
{
if( create_table_as_space )
{
//current_nested_level += 1;
if( current_nested_level > max_nested_level )
{
status = max_nested_spaces_exceeded;
}
else
{
ParseObjectsTable(has_key);
}
//current_nested_level -= 1;
}
else
{
// ParseTextTable will not create a next level
if( !has_key )
key.clear();
ParseTextTable(); // ParseTextTable will use key
}
}
void JSONToSpaceParser::SpaceStarts(bool has_space_name, bool skip_space_char)
{
Space * new_space = new Space();
space->spaces.push_back(new_space);
new_space->parent = space;
if( has_space_name )
new_space->name = key;
space = new_space;
if( skip_space_char )
ReadChar(); // skipping space starts character '{'
}
void JSONToSpaceParser::SpaceEnds(bool skip_space_char)
{
if( space == root_space )
{
// there cannot be a loose list end character in the global space
status = syntax_error;
}
else
{
space = space->parent;
if( skip_space_char )
ReadChar(); // skipping closing space character '}'
}
}
bool JSONToSpaceParser::IsWhite(int c)
{
// 13 (\r) is at the end of a line in a dos file \r\n
// 160 is an unbreakable space
if( c==' ' || c=='\t' || c==13 || c==160 || c==10 )
return true;
return false;
}
void JSONToSpaceParser::SkipWhite()
{
while( IsWhite(lastc) )
{
ReadChar();
}
}
void JSONToSpaceParser::Trim(std::wstring & s)
{
std::wstring::size_type i;
if( s.empty() )
return;
// looking for white characters at the end
for(i=s.size()-1 ; i>0 && IsWhite(s[i]) ; --i);
if( i==0 && IsWhite(s[i]) )
{
// the whole string has white characters
s.clear();
return;
}
// deleting white characters at the end
if( i != s.size() - 1 )
s.erase(i+1, std::wstring::npos);
// looking for white characters at the beginning
for(i=0 ; i<s.size() && IsWhite(s[i]) ; ++i);
// deleting white characters at the beginning
if( i != 0 )
s.erase(0, i);
}
void JSONToSpaceParser::DeleteFromTable(const std::wstring & var)
{
Space::Table::iterator i = space->table.find(var);
if( i != space->table.end() )
space->table.erase(i);
}
void JSONToSpaceParser::ReadTokenQuoted()
{
ReadChar(); // skipping the first quotation mark
while( lastc != -1 && (char_was_escaped || lastc != '"') )
{
token += static_cast<wchar_t>(lastc);
ReadChar();
}
if( !char_was_escaped && lastc == '"' )
ReadChar(); // skipping the last quotation mark
else
status = syntax_error;
}
void JSONToSpaceParser::ReadTokenSingle(bool white_delimit, bool new_line_delimit, int delimit1, int delimit2)
{
while( true )
{
if( lastc == -1 ||
(!char_was_escaped &&
(
lastc == space_end ||
lastc == table_end ||
(white_delimit && IsWhite(lastc)) ||
(new_line_delimit && lastc == '\n') ||
(delimit1 != -1 && lastc == delimit1) ||
(delimit2 != -1 && lastc == delimit2)
) ) )
{
break;
}
token += static_cast<wchar_t>(lastc);
ReadChar();
}
Trim(token);
}
void JSONToSpaceParser::ReadToken(bool white_delimit, bool new_line_delimit, int delimit1, int delimit2)
{
token.clear();
SkipWhite();
if( !char_was_escaped && lastc == '"' )
ReadTokenQuoted();
else
ReadTokenSingle(white_delimit, new_line_delimit, delimit1, delimit2);
}
void JSONToSpaceParser::ReadKey()
{
SkipWhite();
ReadToken(false, true, separator, table_start);
key = token;
}
void JSONToSpaceParser::SkipText()
{
ReadChar(); // skipping the first quote character '"'
while( lastc != '"' && lastc != -1 )
ReadChar();
}
void JSONToSpaceParser::SkipObjectOrTable(int start_char, int end_char)
{
int mark = 1;
skipped += 1;
ReadChar(); // skipping the first object character '{' or '['
do
{
if( lastc == '"' )
SkipText();
else
if( lastc == end_char )
mark -= 1;
else
if( lastc == start_char )
mark += 1;
ReadChar();
}
while( mark > 0 && lastc != -1 );
}
void JSONToSpaceParser::SkipObject()
{
SkipObjectOrTable(space_start, space_end);
}
void JSONToSpaceParser::SkipTable()
{
SkipObjectOrTable(table_start, table_end);
}
//void JSONToSpaceParser::ReadValue(bool add_space_for_single_value, bool auto_add_single_value, bool has_space_name)
void JSONToSpaceParser::ReadValue(bool skip_object_or_table,
bool add_space_for_text_value,
bool has_key,
bool auto_add_text_value)
{
SkipWhite();
if( lastc == space_start )
{
if( skip_object_or_table )
SkipObject();
else
ParseSpace(has_key);
}
else
if( lastc == table_start )
{
if( skip_object_or_table )
SkipTable();
else
ParseTable(has_key);
}
else
{
if( add_space_for_text_value )
{
SpaceStarts(false, false);
ReadToken(false, true, option_delimiter, -1);
space->name = token;
SpaceEnds(false);
}
else
{
ReadToken(false, true, option_delimiter, -1);
value.push_back(token);
if( auto_add_text_value )
AddKeyValuePair();
}
}
}
void JSONToSpaceParser::AddKeyValuePair()
{
if( value.empty() && skip_empty )
{
DeleteFromTable(key);
return;
}
space->table[key] = value;
}
int JSONToSpaceParser::ReadUTF8Char()
{
int c;
bool correct;
lastc = -1;
do
{
PT::UTF8ToInt(file, c, correct);
if( !file )
return lastc;
}
while( !correct );
lastc = c;
if( lastc == '\n' )
++line;
return lastc;
}
int JSONToSpaceParser::ReadASCIIChar()
{
lastc = file.get();
if( lastc == '\n' )
++line;
return lastc;
}
int JSONToSpaceParser::ReadCharFromWcharString()
{
if( *pchar_unicode == 0 )
lastc = -1;
else
lastc = *(pchar_unicode++);
if( lastc == '\n' )
++line;
return lastc;
}
int JSONToSpaceParser::ReadCharFromUTF8String()
{
int c;
bool correct;
lastc = -1;
do
{
size_t len = PT::UTF8ToInt(pchar_ascii, c, correct);
pchar_ascii += len;
}
while( *pchar_ascii && !correct );
if( correct )
lastc = c;
if( lastc == '\n' )
++line;
return lastc;
}
int JSONToSpaceParser::ReadCharFromAsciiString()
{
if( *pchar_ascii == 0 )
lastc = -1;
else
lastc = *(pchar_ascii++);
if( lastc == '\n' )
++line;
return lastc;
}
int JSONToSpaceParser::ReadCharNoEscape()
{
if( reading_from_file )
{
if( input_as_utf8 )
return ReadUTF8Char();
else
return ReadASCIIChar();
}
else
{
if( reading_from_wchar_string )
{
return ReadCharFromWcharString();
}
else
{
if( input_as_utf8 )
return ReadCharFromUTF8String();
else
return ReadCharFromAsciiString();
}
}
}
bool JSONToSpaceParser::IsHexDigit(wchar_t c)
{
return ((c>='0' && c<='9') ||
(c>='a' && c<='f') ||
(c>='A' && c<='F') );
}
int JSONToSpaceParser::HexToInt(wchar_t c)
{
if( c>='0' && c<='9' )
return c - '0';
if( c>='a' && c<='f' )
return c - 'a' + 10;
if( c>='A' && c<='F' )
return c - 'A' + 10;
return 0;
}
void JSONToSpaceParser::ReadUnicodeCodePoint()
{
wchar_t c;
int value = 0;
for(int i=0 ; i<4 ; ++i)
{
c = ReadCharNoEscape();
if( !IsHexDigit(c) )
{
status = syntax_error;
return;
}
value = (value << 4) | HexToInt(c);
}
lastc = (wchar_t)value;
}
int JSONToSpaceParser::ReadChar()
{
char_was_escaped = false;
ReadCharNoEscape();
if( use_escape_char && lastc == '\\' )
{
char_was_escaped = true;
ReadCharNoEscape();
switch(lastc)
{
case '0': lastc = 0; break;
case 't': lastc = '\t'; break;
case 'r': lastc = '\r'; break;
case 'n': lastc = '\n'; break;
case 'b': lastc = 0x08; break;
case 'f': lastc = 0x0c; break;
case 'u': ReadUnicodeCodePoint(); break;
// in other cases we return the last character
}
}
return lastc;
}
} // namespace