winix/core/confparser.h

681 lines
14 KiB
C
Raw Normal View History

/*
* This file is a part of Winix
* and is not publicly distributed
*
* Copyright (c) 2008-2011, Tomasz Sowa
* All rights reserved.
*
*/
#ifndef headerfile_winix_core_confparser
#define headerfile_winix_core_confparser
#include <fstream>
#include <string>
#include <vector>
#include <list>
#include <map>
/*
A parser for parsing config files.
A config file can look like this:
variable1 = value 1
variable2 = " value 2 "
variable3 = (value 1, value 2)
variable4 = (" value 1 " , "value2", value 3)
sample of use:
ConfParser parser;
parser.Parse("/path/to/config");
if( parser.status == ConfParser::ok )
{
// the whole config we have in parser.table (parser.table_single)
}
config syntax:
option = list
list can consists of any number of items, if you're using more than one item you should
use brackets ()
for one item the brackets can be ommited:
option = value
white characters at the beginning of the value (and at the end) will be trimmed,
or you can use quotes:
option = "value"
option2 = "value with spaces at the end "
the form without quotes:
option = value
should be written in one line, so this is not allowed:
option =
value
you can use a new line characters only between brackets and quotes:
option = "this is
a multiline string"
option = ( value1,
value2 )
but there is one requirement: the first character " or ( should be in the same line,
so this is not allowed
option =
"this is wrong"
but this is ok:
option = "
that is ok"
empty lists:
option = ()
this creates an empty list: parser.table['option'].empty() == true
option =
this creates an empty list too (the same as previously)
option = ""
but this doesn't create an empty list, it creates a list with one (empty) item
commentaries:
# this is a commentary (until the end of the line)
option = value # this is a commentary too
commentaries are treated as white characters, other example:
option = ( # this is my list
"value 1" # this is a value one
value 2 # and this is a value two
) # end of my list
overwriting:
option1 = some value
option1 = other value
# always the last option is used so option1 is "other value"
list delimiter:
option1 = (value1, value2, value3)
option2 = ("value1", "value2", "value3")
above we're using a comma ',' as a list delimiter but when using quotes (second line)
the commas can be omitted:
option2 = ("value1" "value2" "value3")
white characters:
the name of an option cannot consist of white characters
some option = value # this is wrong
some_option = value # this is ok
which characters are allowed in an option name is defined by IsVariableChar() method
you can use white characters in values
option = value with spaces or tabs
white characters at the beginning and at the end will be trimmed,
so if you want them use quotes:
option = " other value with spaces "
special characters in quoted strings:
option = "this is a string with \" a quote inside"
the option will be: this is a string with " a quote inside
\\ - means one \
basically: \char produces char
so:
"\a" gives "a"
"\\" gives "\"
"\Z" gives "Z" and so on
you can call UseEscapeChar(false) to turn this off
*/
class Space
{
public:
Space();
~Space();
Space(const Space & s);
Space & operator=(const Space & s);
void Clear();
// first we are searching in 'table_single' and if there is not
// such a 'name' there then we are looking in 'table' (for the first item in the vector)
// these methods return true if 'name' was found
// in other case they return false and 'out' will be equal 'def'
// they can return a null pointer if there is not such a 'name'
std::wstring * GetValue(const wchar_t * name);
std::wstring * GetValue(const std::wstring & name);
/*
those methods are used to extract information from space.table or space.table_single
as a parameter they take the name of an option
and a default value (if there is no such a parameter),
they return appropriate value (either text, int or boolean)
(in lists they return the first item if exists)
when calling Text(...) and AText(...) you should copy the object to whom a reference is returned
it will be cleared in a next call to one of these methods (as well to Int() Size() and Bool())
AText(...) always returns a reference to UTF-8 string
*/
std::wstring & Text(const wchar_t * name);
std::wstring & Text(const wchar_t * name, const wchar_t * def);
std::wstring & Text(const std::wstring & name, const wchar_t * def);
std::string & AText(const wchar_t * name);
std::string & AText(const wchar_t * name, const char * def);
std::string & AText(const std::wstring & name, const char * def);
int Int(const wchar_t *);
int Int(const wchar_t * name, int def);
int Int(const std::wstring & name, int def);
size_t Size(const wchar_t *);
size_t Size(const wchar_t * name, size_t def);
size_t Size(const std::wstring & name, size_t def);
bool Bool(const wchar_t *);
bool Bool(const wchar_t * name, bool def);
bool Bool(const std::wstring & name, bool def);
/*
*
*
raw access to the parsed values
*
*
*/
/*
this is the table which represents your config file
in the Table map: the first (key) is your 'option' and the second is 'list'
*/
typedef std::vector<std::wstring> Value;
typedef std::map<std::wstring, Value> Table;
/*
if your config file consists mainly of single forms such as:
option = value
option2 = value2
then you can call SplitSingle(true) for not inserting single values to
previous 'table' but instead to 'table_single'
table_single as the second parameter takes only std::wstring (instead of the whole std::vector)
so you can save a little memory from not using std::vector
*/
typedef std::map<std::wstring, std::wstring> TableSingle;
std::wstring name; // space name
TableSingle table_single; // std::map<std::wstring, std::wstring>
Table table; // std::map<std::wstring, std::vector<std::wstring> >
// childs
typedef std::vector<Space*> Spaces;
std::vector<Space*> spaces;
// a parent space
// null means a root space
Space * parent;
/*
those methods are used to extract lists
note: if there is one option in table_single they will return it
return true if such an option exists (but value can be an empty list)
*/
bool ListText(const wchar_t * name, std::vector<std::wstring> & list);
bool ListText(const std::wstring & name, std::vector<std::wstring> & list);
/*
printing the content
(for debug purposes)
*/
template<class Stream>
void Serialize(Stream & out, bool use_indents = false, bool use_comments = false, int level = 0) const;
private:
std::wstring tmp_name;
std::wstring tmp_value_text;
std::string tmp_value_text_ascii;
int ToInt(const std::wstring & value);
size_t ToSize(const std::wstring & value);
bool ToBool(const std::wstring & value);
wchar_t ToSmall(wchar_t c);
bool EqualNoCase(const wchar_t * str1, const wchar_t * str2);
template<class Stream>
void PrintLevel(Stream & out, bool use_indents, int level) const;
template<class Stream>
void SerializeTableSingle(Stream & out, bool use_indents, int level) const;
template<class Stream>
void SerializeTableMulti(Stream & out, bool use_indents, int level) const;
template<class Stream>
void PrintValue(Stream & out, const std::wstring & str) const;
};
template<class Stream>
void Space::PrintLevel(Stream & out, bool use_indents, int level) const
{
if( use_indents )
{
for(int i=0 ; i<level ; ++i)
out << ' ';
}
}
template<class Stream>
void Space::PrintValue(Stream & out, const std::wstring & str) const
{
out << '\"';
for(size_t i=0 ; i<str.size() ; ++i)
{
if( str[i] == '\\' )
out << L"\\\\";
else
if( str[i] == '"' )
out << L"\\\"";
else
if( str[i] == 0 )
out << L"\\0";
else
out << str[i];
}
out << '\"';
}
template<class Stream>
void Space::SerializeTableSingle(Stream & out, bool use_indents, int level) const
{
if( !table_single.empty() )
{
TableSingle::const_iterator i;
for(i = table_single.begin() ; i != table_single.end() ; ++i)
{
PrintLevel(out, use_indents, level);
out << i->first << L" = ";
PrintValue(out, i->second);
out << '\n';
}
}
}
template<class Stream>
void Space::SerializeTableMulti(Stream & out, bool use_indents, int level) const
{
Table::const_iterator i2;
size_t v;
if( !table.empty() )
{
for(i2 = table.begin() ; i2 != table.end() ; ++i2)
{
PrintLevel(out, use_indents, level);
out << i2->first << L" = ";
if( i2->second.size() != 1 )
out << '(';
for(v = 0 ; v < i2->second.size() ; ++v)
{
if( v > 0 )
PrintLevel(out, use_indents, level + i2->first.size() + 3);
PrintValue(out, i2->second[v]);
if( v + 1 < i2->second.size() )
out << '\n';
}
if( i2->second.size() != 1 )
out << ')';
out << '\n';
}
}
}
template<class Stream>
void Space::Serialize(Stream & out, bool use_indents, bool use_comments, int level) const
{
if( level > 0 )
{
out << '\n';
PrintLevel(out, use_indents, level);
if( !name.empty() )
out << name << ' ';
out << L"(\n";
if( use_comments )
{
PrintLevel(out, use_indents, level);
out << L"# space level " << level << '\n';
}
}
SerializeTableSingle(out, use_indents, level);
SerializeTableMulti(out, use_indents, level);
for(size_t i=0 ; i<spaces.size() ; ++i)
spaces[i]->Serialize(out, use_indents, use_comments, level+1);
if( level > 0 )
{
PrintLevel(out, use_indents, level);
out << ')';
if( use_comments )
{
if( name.empty() )
out << L" # end of unnamed space";
else
out << L" # end of space: " << name;
out << L" (level " << level << L")";
}
out << '\n';
}
}
class ConfParser
{
public:
/*
ctor -- setting default values (SetDefault() method)
*/
ConfParser();
/*
setting the root space
*/
void SetSpace(Space * pspace);
void SetSpace(Space & pspace);
/*
setting options of the parser to the default values
utf8, split single etc.
*/
void SetDefault();
/*
status of parsing
*/
enum Status { ok, cant_open_file, syntax_error, no_space };
/*
the last status of parsing, set by Parse() methods
*/
Status status;
/*
a number of a line in which there is a syntax_error
*/
int line;
/*
main methods used to parse
file_name is the path to a file
*/
Status Parse(const char * file_name);
Status Parse(const std::string & file_name);
Status Parse(const wchar_t * file_name);
Status Parse(const std::wstring & file_name);
/*
main methods used to parse
str - input string (either 8bit ascii or UTF-8 -- see UTF8() method)
*/
Status ParseString(const char * str);
Status ParseString(const std::string & str);
/*
main methods used to parse
here input string is always in unicode (wide characters)
*/
Status ParseString(const wchar_t * str);
Status ParseString(const std::wstring & str);
/*
if your list consists of only one item, e.g:
option1 = value 1
option2 = "value 2"
option3 = ( "value 3" )
then if you call SplitSingle(true) then such values will be stored in
'table_single' instead of 'table' map
default: false
*/
void SplitSingle(bool split);
/*
if true then empty values and lists, e.g:
option =
option2 = ()
will be omitted (not inserted to 'table' or 'table_single')
default: false
*/
void SkipEmpty(bool skip);
/*
'\' character is used to escape other characters in a quoted string
so "some \t t\"ext" will produce "some t t"ext"
(this is only use in quoted string)
default: true
*/
void UseEscapeChar(bool escape);
/*
if true then the input file or string (char* or std::string) is treated as UTF-8
*/
void UTF8(bool utf);
private:
/*
current space set by SetSpace();
*/
Space * root_space;
/*
a space in which we are now
*/
Space * space;
/*
true if Parse() method was called
false if ParseString() was called
*/
bool reading_from_file;
/*
pointers to the current character
if ParseString() is in used
*/
const char * pchar_ascii;
const wchar_t * pchar_unicode;
/*
true if ParseString(wchar_t *) or ParseString(std::wstring&) was called
*/
bool reading_from_wchar_string;
/*
last read variable (option)
*/
std::wstring variable;
/*
last read list item
*/
std::wstring value_item;
/*
last read list
*/
Space::Value value;
/*
separator between a variable and a value, default: '='
*/
int separator;
/*
commentary char, default: '#'
*/
int commentary;
/*
list starting character, default: '('
*/
int list_start;
/*
list ending character, default: ')'
*/
int list_end;
/*
list delimiter, default: ','
*/
int list_delimiter;
/*
last read char
or -1 if the end
*/
int lastc;
/*
current file
*/
std::ifstream file;
/*
if true then lists with one item will be put into 'table_single' table
default: false
*/
bool split_single;
/*
if true then empty lists, e.g:
option =
option2 = ()
will be omitted (not inserted to 'table' or 'table_single')
default: false
*/
bool skip_empty;
/*
input file is in UTF-8
default: false
*/
bool input_as_utf8;
/*
if true you can use an escape character '\' in quoted values
*/
bool use_escape_char;
std::string afile_name;
void Parse();
void ParseLoop();
void SpaceEnds();
void SpaceStarts();
void ReadAddValue();
void AddOption();
void DeleteFromTable(const std::wstring & var);
void DeleteFromTableSingle(const std::wstring & var);
void ReadVariable();
bool ReadValue();
bool ReadValueList();
bool ReadValueNoList(bool use_list_delimiter = false);
bool ReadValueQuoted();
bool ReadValueSimple(bool use_list_delimiter = false);
int ReadUTF8Char();
int ReadASCIIChar();
int ReadCharFromWcharString();
int ReadCharFromUTF8String();
int ReadCharFromAsciiString();
int ReadChar();
bool IsWhite(int c);
bool IsVariableChar(int c);
void SkipWhite();
void SkipWhiteLines();
void SkipLine();
void Trim(std::wstring & s);
};
#endif