2012-01-12 01:24:08 +01:00
|
|
|
/*
|
2012-01-12 03:46:16 +01:00
|
|
|
* This file is a part of PikoTools
|
2012-01-12 01:24:08 +01:00
|
|
|
* and is distributed under the (new) BSD licence.
|
|
|
|
* Author: Tomasz Sowa <t.sowa@ttmath.org>
|
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Copyright (c) 2010-2012, Tomasz Sowa
|
|
|
|
* All rights reserved.
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions are met:
|
|
|
|
*
|
|
|
|
* * Redistributions of source code must retain the above copyright notice,
|
|
|
|
* this list of conditions and the following disclaimer.
|
|
|
|
*
|
|
|
|
* * Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
|
|
|
*
|
|
|
|
* * Neither the name Tomasz Sowa nor the names of contributors to this
|
|
|
|
* project may be used to endorse or promote products derived
|
|
|
|
* from this software without specific prior written permission.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
|
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
|
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
|
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
|
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
|
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
|
|
|
* THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
*/
|
|
|
|
|
2012-07-12 17:01:15 +02:00
|
|
|
#ifndef headerfile_picotools_confparser_spaceparser
|
|
|
|
#define headerfile_picotools_confparser_spaceparser
|
2012-01-12 01:24:08 +01:00
|
|
|
|
|
|
|
#include <fstream>
|
|
|
|
#include "space.h"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
namespace PT
|
|
|
|
{
|
|
|
|
|
|
|
|
|
|
|
|
|
2012-04-30 15:10:55 +02:00
|
|
|
class SpaceParser
|
2012-01-12 01:24:08 +01:00
|
|
|
{
|
|
|
|
public:
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
ctor -- setting default values (SetDefault() method)
|
|
|
|
*/
|
2012-04-30 15:10:55 +02:00
|
|
|
SpaceParser();
|
2012-01-12 01:24:08 +01:00
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
setting the root space
|
|
|
|
*/
|
|
|
|
void SetSpace(Space * pspace);
|
|
|
|
void SetSpace(Space & pspace);
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
setting options of the parser to the default values
|
|
|
|
utf8, split single etc.
|
|
|
|
*/
|
|
|
|
void SetDefault();
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
status of parsing
|
|
|
|
*/
|
|
|
|
enum Status { ok, cant_open_file, syntax_error, no_space };
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
the last status of parsing, set by Parse() methods
|
|
|
|
*/
|
|
|
|
Status status;
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
a number of a line in which there is a syntax_error
|
|
|
|
*/
|
|
|
|
int line;
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
main methods used to parse
|
|
|
|
file_name is the path to a file
|
|
|
|
*/
|
|
|
|
Status Parse(const char * file_name);
|
|
|
|
Status Parse(const std::string & file_name);
|
|
|
|
Status Parse(const wchar_t * file_name);
|
|
|
|
Status Parse(const std::wstring & file_name);
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
main methods used to parse
|
|
|
|
str - input string (either 8bit ascii or UTF-8 -- see UTF8() method)
|
|
|
|
*/
|
|
|
|
Status ParseString(const char * str);
|
|
|
|
Status ParseString(const std::string & str);
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
main methods used to parse
|
|
|
|
here input string is always in unicode (wide characters)
|
|
|
|
*/
|
|
|
|
Status ParseString(const wchar_t * str);
|
|
|
|
Status ParseString(const std::wstring & str);
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
if your list consists of only one item, e.g:
|
|
|
|
option1 = value 1
|
|
|
|
option2 = "value 2"
|
|
|
|
option3 = ( "value 3" )
|
|
|
|
then if you call SplitSingle(true) then such values will be stored in
|
|
|
|
'table_single' instead of 'table' map
|
|
|
|
default: false
|
|
|
|
*/
|
|
|
|
void SplitSingle(bool split);
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
if true then empty values and lists, e.g:
|
|
|
|
option =
|
|
|
|
option2 = ()
|
|
|
|
will be omitted (not inserted to 'table' or 'table_single')
|
|
|
|
default: false
|
|
|
|
*/
|
|
|
|
void SkipEmpty(bool skip);
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
2012-09-25 18:50:01 +02:00
|
|
|
'\' character is used to escape other characters
|
2012-01-12 01:24:08 +01:00
|
|
|
so "some \t t\"ext" will produce "some t t"ext"
|
|
|
|
default: true
|
2012-09-25 18:50:01 +02:00
|
|
|
special characters:
|
|
|
|
\0 - 0 (zero code point)
|
|
|
|
\t - tabulator (9 code point)
|
|
|
|
\r - carriage return (13 code point)
|
|
|
|
\n - a new line character (10 code point)
|
|
|
|
in other cases we return the last character so \Z gives Z and \\ gives one \
|
|
|
|
escape character are not used in commentaries
|
|
|
|
so you can write:
|
|
|
|
# this is my comment \n but this was not a new line
|
2012-01-12 01:24:08 +01:00
|
|
|
*/
|
|
|
|
void UseEscapeChar(bool escape);
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
if true then the input file or string (char* or std::string) is treated as UTF-8
|
2012-09-25 18:50:01 +02:00
|
|
|
default: true
|
2012-01-12 01:24:08 +01:00
|
|
|
*/
|
|
|
|
void UTF8(bool utf);
|
|
|
|
|
|
|
|
|
|
|
|
private:
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
current space set by SetSpace();
|
|
|
|
*/
|
|
|
|
Space * root_space;
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
a space in which we are now
|
|
|
|
*/
|
|
|
|
Space * space;
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
true if Parse() method was called
|
|
|
|
false if ParseString() was called
|
|
|
|
*/
|
|
|
|
bool reading_from_file;
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
pointers to the current character
|
|
|
|
if ParseString() is in used
|
|
|
|
*/
|
|
|
|
const char * pchar_ascii;
|
|
|
|
const wchar_t * pchar_unicode;
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
true if ParseString(wchar_t *) or ParseString(std::wstring&) was called
|
|
|
|
*/
|
|
|
|
bool reading_from_wchar_string;
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
2012-05-10 23:16:19 +02:00
|
|
|
last read token
|
2012-01-12 01:24:08 +01:00
|
|
|
*/
|
2012-05-10 23:16:19 +02:00
|
|
|
std::wstring token;
|
|
|
|
|
2012-01-12 01:24:08 +01:00
|
|
|
|
|
|
|
/*
|
2012-05-10 23:16:19 +02:00
|
|
|
last read key
|
2012-01-12 01:24:08 +01:00
|
|
|
*/
|
2012-05-10 23:16:19 +02:00
|
|
|
std::wstring key;
|
2012-01-12 01:24:08 +01:00
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
last read list
|
|
|
|
*/
|
|
|
|
Space::Value value;
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
separator between a variable and a value, default: '='
|
|
|
|
*/
|
|
|
|
int separator;
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
commentary char, default: '#'
|
|
|
|
*/
|
|
|
|
int commentary;
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
list starting character, default: '('
|
|
|
|
*/
|
|
|
|
int list_start;
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
list ending character, default: ')'
|
|
|
|
*/
|
|
|
|
int list_end;
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
list delimiter, default: ','
|
|
|
|
*/
|
|
|
|
int list_delimiter;
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
last read char
|
|
|
|
or -1 if the end
|
|
|
|
*/
|
|
|
|
int lastc;
|
|
|
|
|
|
|
|
|
2012-05-10 23:16:19 +02:00
|
|
|
/*
|
|
|
|
true if the lastc was escaped (with a backslash)
|
|
|
|
we have to know if the last sequence was \" or just "
|
|
|
|
*/
|
|
|
|
bool char_was_escaped;
|
|
|
|
|
|
|
|
|
2012-01-12 01:24:08 +01:00
|
|
|
/*
|
|
|
|
current file
|
|
|
|
*/
|
|
|
|
std::ifstream file;
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
if true then lists with one item will be put into 'table_single' table
|
|
|
|
default: false
|
|
|
|
*/
|
|
|
|
bool split_single;
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
if true then empty lists, e.g:
|
|
|
|
option =
|
|
|
|
option2 = ()
|
|
|
|
will be omitted (not inserted to 'table' or 'table_single')
|
|
|
|
default: false
|
|
|
|
*/
|
|
|
|
bool skip_empty;
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
input file is in UTF-8
|
2012-07-12 17:01:15 +02:00
|
|
|
default: true
|
2012-01-12 01:24:08 +01:00
|
|
|
*/
|
|
|
|
bool input_as_utf8;
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
if true you can use an escape character '\' in quoted values
|
|
|
|
*/
|
|
|
|
bool use_escape_char;
|
|
|
|
|
|
|
|
|
2012-09-25 18:50:01 +02:00
|
|
|
/*
|
|
|
|
true if we are reading the commentary (#)
|
|
|
|
this is to avoid parsing escape characters in the commentary
|
|
|
|
*/
|
|
|
|
bool reading_commentary;
|
|
|
|
|
2012-01-12 01:24:08 +01:00
|
|
|
std::string afile_name;
|
|
|
|
|
|
|
|
void Parse();
|
|
|
|
void ParseLoop();
|
|
|
|
void SpaceEnds();
|
|
|
|
void SpaceStarts();
|
|
|
|
|
|
|
|
void DeleteFromTable(const std::wstring & var);
|
|
|
|
void DeleteFromTableSingle(const std::wstring & var);
|
|
|
|
|
2012-05-10 23:16:19 +02:00
|
|
|
void ReadTokenQuoted();
|
|
|
|
void ReadTokenSingle(bool white_delimit, bool new_line_delimit, int delimit1, int delimit2);
|
|
|
|
void ReadToken(bool white_delimit, bool new_line_delimit, int delimit1, int delimit2);
|
|
|
|
void ReadKey();
|
|
|
|
void ReadValueList();
|
|
|
|
void ReadValueSingle();
|
|
|
|
void ReadValue();
|
|
|
|
void AddKeyValuePair();
|
|
|
|
|
2012-01-12 01:24:08 +01:00
|
|
|
int ReadUTF8Char();
|
|
|
|
int ReadASCIIChar();
|
|
|
|
int ReadCharFromWcharString();
|
|
|
|
int ReadCharFromUTF8String();
|
|
|
|
int ReadCharFromAsciiString();
|
2012-05-10 23:16:19 +02:00
|
|
|
int ReadCharNoEscape();
|
2012-01-12 01:24:08 +01:00
|
|
|
int ReadChar();
|
|
|
|
bool IsWhite(int c);
|
2012-09-25 18:50:01 +02:00
|
|
|
void SkipWhite(bool skip_lines = false);
|
2012-01-12 01:24:08 +01:00
|
|
|
void SkipWhiteLines();
|
|
|
|
void SkipLine();
|
2012-09-25 18:50:01 +02:00
|
|
|
void SkipComment();
|
2012-01-12 01:24:08 +01:00
|
|
|
void Trim(std::wstring & s);
|
|
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
} // namespace
|
|
|
|
|
|
|
|
|
|
|
|
#endif
|