pikotools/space/spaceparser.h

/*
 * This file is a part of PikoTools
 * and is distributed under the (new) BSD licence.
 * Author: Tomasz Sowa <t.sowa@ttmath.org>
 */

/*
 * Copyright (c) 2010-2017, Tomasz Sowa
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 *  * Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimer.
 *
 *  * Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 *  * Neither the name Tomasz Sowa nor the names of contributors to this
 *    project may be used to endorse or promote products derived
 *    from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
 * THE POSSIBILITY OF SUCH DAMAGE.
 */

#ifndef headerfile_picotools_confparser_spaceparser
#define headerfile_picotools_confparser_spaceparser

#include <fstream>
#include "space.h"


namespace PT
{


class SpaceParser
{
public:


	/*
		ctor -- setting default values (SetDefault() method)
	*/
	SpaceParser();


	/*
		setting the root space
	*/
	void SetSpace(Space * pspace);
	void SetSpace(Space & pspace);


	/*
		setting options of the parser to the default values
		utf8 etc.
	*/
	void SetDefault();


	/*
		status of parsing
	*/
	enum Status { ok, cant_open_file, syntax_error, no_space };


	/*
		the last status of parsing, set by Parse() methods
	*/
	Status status;


	/*
		a number of a line in which there is a syntax_error
	*/
	int line;


	/*
		main methods used to parse
		file_name is the path to a file
	*/
	Status Parse(const char * file_name);
	Status Parse(const std::string & file_name);
	Status Parse(const wchar_t * file_name);
	Status Parse(const std::wstring & file_name);


	/*
		main methods used to parse
		str - input string (either 8bit ascii or UTF-8 -- see UTF8() method)
	*/
	Status ParseString(const char * str);
	Status ParseString(const std::string & str);


	/*
		main methods used to parse
		here input string is always in unicode (wide characters)
	*/
	Status ParseString(const wchar_t * str);
	Status ParseString(const std::wstring & str);


	/*
		if true then empty values and lists, e.g:
			option =
			option2 = ()
		will be omitted (not inserted to 'table')
		default: false
	*/
	void SkipEmpty(bool skip);


	/*
		'\' character is used to escape other characters
		so "some \t t\"ext" will produce "some t t"ext"
		default: true
		special characters:
		\0  - 0 (zero code point)
		\t  - tabulator (9 code point)
		\r  - carriage return (13 code point)
		\n	- a new line character (10 code point)
		in other cases we return the last character so \Z gives Z and \\ gives one \
		escape character are not used in commentaries
		so you can write:
		# this is my comment \n but this was not a new line
	*/
	void UseEscapeChar(bool escape);


	/*
		if true then the input file or string (char* or std::string) is treated as UTF-8
		default: true
	*/
	void UTF8(bool utf);


private:


	/*
		current space set by SetSpace();
	*/
	Space * root_space;


	/*
		a space in which we are now
	*/
	Space * space;


	/*
		true if Parse() method was called
		false if ParseString() was called
	*/
	bool reading_from_file;


	/*
		pointers to the current character
		if ParseString() is in used
	*/
	const char    * pchar_ascii;
	const wchar_t * pchar_unicode;


	/*
		true if ParseString(wchar_t *) or ParseString(std::wstring&) was called
	*/
	bool reading_from_wchar_string;


	/*
		last read token
	*/
	std::wstring token;


	/*
		last read key
	*/
	std::wstring key;


	/*
		last read list
	*/
	Space::Value value;


	/*
		separator between a variable and a value, default: '='
	*/
	int separator;


	/*
		commentary char, default: '#'
	*/
	int commentary;


	/*
		list starting character, default: '('
	*/
	int list_start;


	/*
		list ending character, default: ')'
	*/
	int list_end;


	/*
		list delimiter, default: ','
	*/
	int list_delimiter;


	/*
		last read char
		or -1 if the end
	*/
	int lastc;


	/*
		true if the lastc was escaped (with a backslash)
		we have to know if the last sequence was \" or just "
	*/
	bool char_was_escaped;


	/*
		current file
	*/
	std::ifstream file;


	/*
		if true then empty lists, e.g:
			option =
			option2 = ()
		will be omitted (not inserted to 'table')
		default: false
	*/
	bool skip_empty;


	/*
		input file is in UTF-8
		default: true
	*/
	bool input_as_utf8;


	/*
		if true you can use an escape character '\' in quoted values
	*/
	bool use_escape_char;


	/*
		true if we are reading the commentary (#)
		this is to avoid parsing escape characters in the commentary
	*/
	bool reading_commentary;

	std::string afile_name;

	void Parse();
	void ParseLoop();
	void SpaceEnds();
	void SpaceStarts();

	void DeleteFromTable(const std::wstring & var);

	void ReadTokenQuoted();
	void ReadTokenSingle(bool white_delimit, bool new_line_delimit, int delimit1, int delimit2);
	void ReadToken(bool white_delimit, bool new_line_delimit, int delimit1, int delimit2);
	void ReadKey();
	void ReadValueList();
	void ReadValueSingle();
	void ReadValue();
	void AddKeyValuePair();

	int  ReadUTF8Char();
	int  ReadASCIIChar();
	int  ReadCharFromWcharString();
	int  ReadCharFromUTF8String();
	int  ReadCharFromAsciiString();
	int  ReadCharNoEscape();
	int  ReadChar();
	bool IsWhite(int c);
	void SkipWhite(bool skip_lines = false);
	void SkipWhiteLines();
	void SkipLine();
	void SkipComment();
	void Trim(std::wstring & s);

};


} // namespace


#endif