pikotools/space/jsontospaceparser.cpp

/*
 * This file is a part of PikoTools
 * and is distributed under the (new) BSD licence.
 * Author: Tomasz Sowa <t.sowa@ttmath.org>
 */

/*
 * Copyright (c) 2012-2017, Tomasz Sowa
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 *  * Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimer.
 *
 *  * Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 *  * Neither the name Tomasz Sowa nor the names of contributors to this
 *    project may be used to endorse or promote products derived
 *    from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
 * THE POSSIBILITY OF SUCH DAMAGE.
 */

#include <cstdlib>
#include <wchar.h>
#include "jsontospaceparser.h"
#include "utf8/utf8.h"


namespace PT
{


JSONToSpaceParser::JSONToSpaceParser()
{
	root_space = 0;
	SetDefault();
}


void JSONToSpaceParser::SetSpace(Space * pspace)
{
	root_space = pspace;
}


void JSONToSpaceParser::SetSpace(Space & pspace)
{
	root_space = &pspace;
}


void JSONToSpaceParser::SetDefault()
{
	// you can change this separators to what you want
	// you shoud not use only white characters here (as expected by IsWhite() method)
	// and new line characters ('\n')
	separator        = ':';
	space_start		 = '{';
	space_end		 = '}';
	table_start      = '[';
	table_end        = ']';
	option_delimiter = ',';
	skip_empty       = false;
	use_escape_char  = true;
	input_as_utf8    = true;
	max_nested_level = 1000;
	create_table_as_space = true;
}


void JSONToSpaceParser::SkipEmpty(bool skip)
{
	skip_empty = skip;
}


void JSONToSpaceParser::UseEscapeChar(bool escape)
{
	use_escape_char = escape;
}


void JSONToSpaceParser::UTF8(bool utf)
{
	input_as_utf8 = utf;
}


void JSONToSpaceParser::CreateTableAsSpace(bool create_table_as_space_)
{
	create_table_as_space = create_table_as_space_;
}


JSONToSpaceParser::Status JSONToSpaceParser::Parse(const char * file_name)
{
	reading_from_file = true;

	file.clear();
	file.open(file_name, std::ios_base::binary | std::ios_base::in);

	if( file )
	{
		Parse();
		file.close();
	}
	else
	{
		status = cant_open_file;
	}

return status;
}


JSONToSpaceParser::Status JSONToSpaceParser::Parse(const std::string & file_name)
{
	return Parse(file_name.c_str());
}


JSONToSpaceParser::Status JSONToSpaceParser::Parse(const wchar_t * file_name)
{
	PT::WideToUTF8(file_name, afile_name);
	return Parse(afile_name.c_str());
}


JSONToSpaceParser::Status JSONToSpaceParser::Parse(const std::wstring & file_name)
{
	return Parse(file_name.c_str());
}


JSONToSpaceParser::Status JSONToSpaceParser::ParseString(const char * str)
{
	reading_from_file         = false;
	reading_from_wchar_string = false;
	pchar_ascii               = str;
	pchar_unicode             = 0;

	Parse();

return status;
}


JSONToSpaceParser::Status JSONToSpaceParser::ParseString(const std::string & str)
{
	return ParseString(str.c_str());
}


JSONToSpaceParser::Status JSONToSpaceParser::ParseString(const wchar_t * str)
{
	reading_from_file         = false;
	reading_from_wchar_string = true;
	pchar_unicode             = str;
	pchar_ascii               = 0;

	Parse();

return status;
}


JSONToSpaceParser::Status JSONToSpaceParser::ParseString(const std::wstring & str)
{
	return ParseString(str.c_str());
}


void JSONToSpaceParser::Parse()
{
	if( !root_space )
	{
		status = no_space;
		return;
	}

	line    = 1;
	status  = ok;
	space   = root_space;
	skipped = 0;
	current_nested_level = 0;
	ReadChar();
	SkipWhite();

	if( lastc == space_start )
	{
		ParseSpace(false, false);
	}
	else
	if( lastc == table_start )
	{
		ParseTable(false);
	}
	else
	{
		// '{' or '[' expected
		status = syntax_error;
	}

	if( status == ok && space != root_space )
	{
		// last closing '}' characters are missing (closing a space)
		status = syntax_error;
	}

	token.clear();
	key.clear();
	value.clear();
}


void JSONToSpaceParser::ParseSpace(bool has_space_name, bool insert_new_space)
{
	//current_nested_level += 1;

	if( current_nested_level > max_nested_level )
	{
		status = max_nested_spaces_exceeded;
		return;
	}

	if( insert_new_space )
	{
		SpaceStarts(has_space_name);
	}
	else
	{
		// insert_new_space as a false is used only when parsing
		// the first space (root_space)
		ReadChar(); // skipping the first space character '{'
	}

	ParseKeyValuePairs();

	if( insert_new_space )
	{
		SpaceEnds();
	}
	else
	{
		ReadChar(); // skipping the last space character '}'
	}

	//current_nested_level -= 1;
}


void JSONToSpaceParser::ParseKeyValuePairs()
{
	SkipWhite();

	while( status == ok && lastc != space_end && lastc != -1 )
	{
		ReadKey();
		SkipWhite();

		if( lastc == separator )
		{
			value.clear();
			ReadChar(); // skipping separator ':'
			ReadValue(false, false, true, true);
			SkipWhite();

			if( lastc == option_delimiter )
			{
				ReadChar(); // skipping delimiter ','
			}
			else
			if( lastc != space_end && status == ok )
			{
				status = syntax_error;
			}
		}
		else
		if( status == ok )
		{
			status = syntax_error;
		}
	}
}


void JSONToSpaceParser::ParseTextTable()
{
	ReadChar(); // skipping table start character '['
	SkipWhite();
	value.clear();

	while( status == ok && lastc != table_end && lastc != -1 )
	{
		// all space objects inside or tables will be skipped
		ReadValue(true);
		SkipWhite();

		if( lastc == option_delimiter )
		{
			ReadChar(); // skipping delimiter ','
		}
		else
		if( lastc != table_end && status == ok )
		{
			status = syntax_error;
		}
	}

	if( lastc == table_end )
		ReadChar(); // skipping end table character ']'

	AddKeyValuePair();
}


void JSONToSpaceParser::ParseObjectsTable(bool has_key)
{
	ReadChar(); // skipping table start character '['
	SpaceStarts(has_key, false);
	SkipWhite();

	while( status == ok && lastc != table_end && lastc != -1 )
	{
		// 'value' table will not be used here
		// (we are creating spaces)
		ReadValue(false, true);
		SkipWhite();

		if( lastc == option_delimiter )
		{
			ReadChar(); // skipping delimiter ','
		}
		else
		if( lastc != table_end && status == ok )
		{
			status = syntax_error;
		}
	}

	if( lastc == table_end )
		ReadChar(); // skipping end table character ']'

	SpaceEnds(false);
}


void JSONToSpaceParser::ParseTable(bool has_key)
{
	if( create_table_as_space )
	{
		//current_nested_level += 1;

		if( current_nested_level > max_nested_level )
		{
			status = max_nested_spaces_exceeded;
		}
		else
		{
			ParseObjectsTable(has_key);
		}

		//current_nested_level -= 1;
	}
	else
	{
		// ParseTextTable will not create a next level

		if( !has_key )
			key.clear();

		ParseTextTable(); // ParseTextTable will use key
	}
}


void JSONToSpaceParser::SpaceStarts(bool has_space_name, bool skip_space_char)
{
	Space * new_space = new Space();
	space->spaces.push_back(new_space);
	new_space->parent = space;

	if( has_space_name )
		new_space->name = key;

	space = new_space;

	if( skip_space_char )
		ReadChar();	// skipping space starts character '{'
}


void JSONToSpaceParser::SpaceEnds(bool skip_space_char)
{
	if( space == root_space )
	{
		// there cannot be a loose list end character in the global space
		status = syntax_error;
	}
	else
	{
		space = space->parent;

		if( skip_space_char )
			ReadChar(); // skipping closing space character '}'
	}
}


bool JSONToSpaceParser::IsWhite(int c)
{
	// 13 (\r) is at the end of a line in a dos file \r\n
	// 160 is an unbreakable space
	if( c==' ' || c=='\t' || c==13 || c==160 || c==10 )
		return true;

return false;
}


void JSONToSpaceParser::SkipWhite()
{
	while( IsWhite(lastc) )
	{
		ReadChar();
	}
}


void JSONToSpaceParser::Trim(std::wstring & s)
{
std::wstring::size_type i;

	if( s.empty() )
		return;

	// looking for white characters at the end
	for(i=s.size()-1 ; i>0 && IsWhite(s[i]) ; --i);

	if( i==0 && IsWhite(s[i]) )
	{
		// the whole string has white characters
		s.clear();
		return;
	}

	// deleting white characters at the end
	if( i != s.size() - 1 )
		s.erase(i+1, std::wstring::npos);

	// looking for white characters at the beginning
	for(i=0 ; i<s.size() && IsWhite(s[i]) ; ++i);

	// deleting white characters at the beginning
	if( i != 0 )
		s.erase(0, i);
}


void JSONToSpaceParser::DeleteFromTable(const std::wstring & var)
{
	Space::Table::iterator i = space->table.find(var);

	if( i != space->table.end() )
		space->table.erase(i);
}


void JSONToSpaceParser::ReadTokenQuoted()
{
	ReadChar(); // skipping the first quotation mark

	while( lastc != -1 && (char_was_escaped || lastc != '"') )
	{
		token += static_cast<wchar_t>(lastc);
		ReadChar();
	}

	if( !char_was_escaped && lastc == '"' )
		ReadChar(); // skipping the last quotation mark
	else
		status = syntax_error;
}


void JSONToSpaceParser::ReadTokenSingle(bool white_delimit, bool new_line_delimit, int delimit1, int delimit2)
{
	while( true )
	{
		if( lastc == -1									||
			(!char_was_escaped							&&
			(
				lastc == space_end						||
				lastc == table_end						||
				(white_delimit    && IsWhite(lastc))	||
				(new_line_delimit && lastc == '\n')		||
				(delimit1 != -1   && lastc == delimit1)	||
				(delimit2 != -1   && lastc == delimit2)
			) ) )

		{
			break;
		}

		token += static_cast<wchar_t>(lastc);
		ReadChar();
	}

	Trim(token);
}


void JSONToSpaceParser::ReadToken(bool white_delimit, bool new_line_delimit, int delimit1, int delimit2)
{
	token.clear();
	SkipWhite();

	if( !char_was_escaped && lastc == '"' )
		ReadTokenQuoted();
	else
		ReadTokenSingle(white_delimit, new_line_delimit, delimit1, delimit2);
}


void JSONToSpaceParser::ReadKey()
{
	SkipWhite();
	ReadToken(false, true, separator, table_start);
	key = token;
}


void JSONToSpaceParser::SkipText()
{
	ReadChar(); // skipping the first quote character '"'

	while( lastc != '"' && lastc != -1 )
		ReadChar();
}


void JSONToSpaceParser::SkipObjectOrTable(int start_char, int end_char)
{
int mark = 1;

	skipped += 1;
	ReadChar(); // skipping the first object character '{' or '['

	do
	{
		if( lastc == '"' )
			SkipText();
		else
		if( lastc == end_char )
			mark -= 1;
		else
		if( lastc == start_char )
			mark += 1;

		ReadChar();
	}
	while( mark > 0 && lastc != -1 );
}


void JSONToSpaceParser::SkipObject()
{
	SkipObjectOrTable(space_start, space_end);
}


void JSONToSpaceParser::SkipTable()
{
	SkipObjectOrTable(table_start, table_end);
}


//void JSONToSpaceParser::ReadValue(bool add_space_for_single_value, bool auto_add_single_value, bool has_space_name)
void JSONToSpaceParser::ReadValue(bool skip_object_or_table,
								bool add_space_for_text_value,
								bool has_key,
								bool auto_add_text_value)
{
	SkipWhite();

	if( lastc == space_start )
	{
		if( skip_object_or_table )
			SkipObject();
		else
			ParseSpace(has_key);
	}
	else
	if( lastc == table_start )
	{
		if( skip_object_or_table )
			SkipTable();
		else
			ParseTable(has_key);
	}
	else
	{
		if( add_space_for_text_value )
		{
			SpaceStarts(false, false);
			ReadToken(false, true, option_delimiter, -1);
			space->name = token;
			SpaceEnds(false);
		}
		else
		{
			ReadToken(false, true, option_delimiter, -1);
			value.push_back(token);

			if( auto_add_text_value )
				AddKeyValuePair();
		}
	}
}


void JSONToSpaceParser::AddKeyValuePair()
{
	if( value.empty() && skip_empty )
	{
		DeleteFromTable(key);
		return;
	}

	space->table[key] = value;
}


int JSONToSpaceParser::ReadUTF8Char()
{
int c;
bool correct;

	lastc = -1;

	do
	{
		PT::UTF8ToInt(file, c, correct);

		if( !file )
			return lastc;
	}
	while( !correct );

	lastc = c;

	if( lastc == '\n' )
		++line;

return lastc;
}


int JSONToSpaceParser::ReadASCIIChar()
{
	lastc = file.get();

	if( lastc == '\n' )
		++line;

return lastc;
}


int JSONToSpaceParser::ReadCharFromWcharString()
{
	if( *pchar_unicode == 0 )
		lastc = -1;
	else
		lastc = *(pchar_unicode++);

	if( lastc == '\n' )
		++line;

return lastc;
}


int JSONToSpaceParser::ReadCharFromUTF8String()
{
int c;
bool correct;

	lastc = -1;

	do
	{
		size_t len = PT::UTF8ToInt(pchar_ascii, c, correct);
		pchar_ascii += len;
	}
	while( *pchar_ascii && !correct );

	if( correct )
		lastc = c;

	if( lastc == '\n' )
		++line;

return lastc;

}


int JSONToSpaceParser::ReadCharFromAsciiString()
{
	if( *pchar_ascii == 0 )
		lastc = -1;
	else
		lastc = *(pchar_ascii++);

	if( lastc == '\n' )
		++line;

return lastc;
}


int JSONToSpaceParser::ReadCharNoEscape()
{
	if( reading_from_file )
	{
		if( input_as_utf8 )
			return ReadUTF8Char();
		else
			return ReadASCIIChar();
	}
	else
	{
		if( reading_from_wchar_string )
		{
			return ReadCharFromWcharString();
		}
		else
		{
			if( input_as_utf8 )
				return ReadCharFromUTF8String();
			else
				return ReadCharFromAsciiString();
		}
	}
}

bool JSONToSpaceParser::IsHexDigit(wchar_t c)
{
	return ((c>='0' && c<='9') ||
			(c>='a' && c<='f') ||
			(c>='A' && c<='F') );
}


int JSONToSpaceParser::HexToInt(wchar_t c)
{
	if( c>='0' && c<='9' )
		return c - '0';

	if( c>='a' && c<='f' )
		return c - 'a' + 10;

	if( c>='A' && c<='F' )
		return c - 'A' + 10;

return 0;
}


void JSONToSpaceParser::ReadUnicodeCodePoint()
{
wchar_t c;
int value = 0;

	for(int i=0 ; i<4 ; ++i)
	{
		c = ReadCharNoEscape();

		if( !IsHexDigit(c) )
		{
			status = syntax_error;
			return;
		}

		value = (value << 4) | HexToInt(c);
	}

	lastc = (wchar_t)value;
}


int JSONToSpaceParser::ReadChar()
{
	char_was_escaped = false;
	ReadCharNoEscape();

	if( use_escape_char && lastc == '\\' )
	{
		char_was_escaped = true;
		ReadCharNoEscape();

		switch(lastc)
		{
		case '0':	lastc = 0;		break;
		case 't':	lastc = '\t';	break;
		case 'r':	lastc = '\r';	break;
		case 'n':	lastc = '\n';	break;
		case 'b':	lastc = 0x08;	break;
		case 'f':	lastc = 0x0c;	break;
		case 'u':	ReadUnicodeCodePoint(); break;
		// in other cases we return the last character
		}
	}

return lastc;
}


} // namespace