pikotools/src/space/spaceparser.h

/*
 * This file is a part of PikoTools
 * and is distributed under the (new) BSD licence.
 * Author: Tomasz Sowa <t.sowa@ttmath.org>
 */

/*
 * Copyright (c) 2012-2021, Tomasz Sowa
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 *  * Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimer.
 *
 *  * Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 *  * Neither the name Tomasz Sowa nor the names of contributors to this
 *    project may be used to endorse or promote products derived
 *    from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
 * THE POSSIBILITY OF SUCH DAMAGE.
 */

#ifndef headerfile_picotools_space_jsonspaceparser
#define headerfile_picotools_space_jsonspaceparser

#include <fstream>
#include "space.h"


namespace pt
{


class SpaceParser
{
public:


	/*
		ctor -- setting default values (SetDefault() method)
	*/
	SpaceParser();


	/*
		setting the root space
	*/
	void SetSpace(Space * pspace);
	void SetSpace(Space & pspace);


	/*
		setting options of the parser to the default values
		utf8 etc.
	*/
	void SetDefault();


	/*
		status of parsing
	*/
	enum Status { ok, cant_open_file, syntax_error, no_space };


	/*
		the last status of parsing, set by Parse() methods
	*/
	Status status;


	/*
		main methods used to parse a JSON file
		file_name is the path to a file
	*/
	Status ParseJSONFile(const char * file_name);
	Status ParseJSONFile(const std::string & file_name);
	Status ParseJSONFile(const wchar_t * file_name);
	Status ParseJSONFile(const std::wstring & file_name);


	/*
		main methods used to parse a Space file
		file_name is the path to a file
	*/
	Status ParseSpaceFile(const char * file_name);
	Status ParseSpaceFile(const std::string & file_name);
	Status ParseSpaceFile(const wchar_t * file_name);
	Status ParseSpaceFile(const std::wstring & file_name);


	/*
		main methods used to parse
		str - input string (either 8bit ascii or UTF-8 -- see UTF8() method)
	*/
	Status ParseJSON(const char * str);
	Status ParseJSON(const std::string & str);

	/*
		main methods used to parse
		here input string is always in unicode (wide characters)
	*/
	Status ParseJSON(const wchar_t * str);
	Status ParseJSON(const std::wstring & str);


	Status ParseSpace(const char * str);
	Status ParseSpace(const std::string & str);
	Status ParseSpace(const wchar_t * str);
	Status ParseSpace(const std::wstring & str);


	/*
	 * add two args Parse method
	 * Status Parse(const char * str, Space & output_space);
	 *
	 */


	/*
		if true then empty values and lists, e.g:
			option =
			option2 = ()
		will be omitted (not inserted to 'table')
		default: false
	*/
	void SkipEmpty(bool skip);


	/*
		'\' character is used to escape other characters in a quoted string
		so "some \t t\"ext" will produce "some t t"ext"
		default: true
	*/
	void UseEscapeChar(bool escape);


	/*
		if true then the input file or string (char* or std::string) is treated as UTF-8
		default true

		the internal storage for strings is std::wstring so if you call UTF8(false) then
		the characters of input string will be simple static_cast<> from char to wchar_t
	*/
	// rename to use_utf8(bool)
	void UTF8(bool utf);


	/*
	 *
	 * returns a number of a last parsed line
	 * can be used to obtain the line in which there was a syntax error
	 *
	 */
	int get_last_parsed_line();


private:


	/*
		current space set by SetSpace();
	*/
	Space * root_space;


	/*
		a number of a line in which there is a syntax_error
	*/
	int line;

	/*
		true if Parse() method was called
		false if ParseString() was called
	*/
	bool reading_from_file;


	/*
		pointers to the current character
		if ParseString() is in used
	*/
	const char    * pchar_ascii;
	const wchar_t * pchar_unicode;


	/*
		true if ParseString(wchar_t *) or ParseString(std::wstring&) was called
	*/
	bool reading_from_wchar_string;


	/*
		last read token
	*/
	std::wstring token;


	/*
		separator between a variable and a value, default: '='
	*/
	int separator;


	/*
		space starting character, default: '{'
	*/
	int space_start;


	/*
		space ending character, default: '}'
	*/
	int space_end;


	/*
		table starting character, default: '['
	*/
	int table_start;


	/*
		table ending character, default: ']'
	*/
	int table_end;


	/*
		option delimiter, default: ','
	*/
	int option_delimiter;


	/*
		last read char
		or -1 if the end
	*/
	int lastc;


	/*
		true if the lastc was escaped (with a backslash)
		we have to know if the last sequence was \" or just "
	*/
	bool char_was_escaped;


	/*
		current file

		may it would be better to make a pointer?
		if we parse only a string then there is no sense to have such an object
	*/
	std::ifstream file;


	/*
		if true then empty lists, e.g:
			option =
			option2 = ()
		will be omitted (not inserted to 'table')
		default: false
	*/
	bool skip_empty;


	/*
		input file is in UTF-8
		default: true
	*/
	bool input_as_utf8;


	/*
		if true you can use an escape character '\' in quoted values
	*/
	bool use_escape_char;


	/*
	 *
	 * if parsing_space is false then it means we are parsing JSON format
	 *
	 *
	 */
	bool parsing_space;


	// new
	void ParseRootSpace();
	void Parse(Space * space, bool is_object_value, bool is_table_value);
	void ParseSpace(Space * space);
	void ParseTable(Space * space);

	void ParseKeyValuePairs(Space * space);
	void ParseValuesList(Space * space);

	void ReadKey();

	void ParseTextValue(Space * space);
	void ParseIntegerValue(Space * space);
	void ParseFloatingPointValue(Space * space);


	bool is_alfa_numeric_char(int c);

	void ReadTokenUntilDelimiter(std::wstring & token, int delimiter1, int delimiter2);
	void ReadAlfaNumericToken(std::wstring & token);
	void ReadStringValue(std::wstring & token, bool is_object_value, bool is_table_value);


	bool is_integer_token();
	bool is_floating_point_token();

	void ReadSpaceFieldToken(std::wstring & token);
	void ReadTokenQuoted(std::wstring & token);
	void ReadMultilineTokenQuoted(std::wstring & token);


	int  ReadUTF8Char();
	int  ReadASCIIChar();
	int  ReadCharFromWcharString();
	int  ReadCharFromUTF8String();
	int  ReadCharFromAsciiString();
	int  ReadCharNoEscape();
	int  ReadChar();
	bool IsWhite(int c);
	void SkipLine();
	void SkipWhite();
	void TrimLastWhite(std::wstring & s);
	//void Trim(std::wstring & s);
	bool IsHexDigit(wchar_t c);
	int  HexToInt(wchar_t c);
	void ReadUnicodeCodePoint();

};


} // namespace


#endif