/* * This file is a part of Winix * and is not publicly distributed * * Copyright (c) 2008-2011, Tomasz Sowa * All rights reserved. * */ #ifndef headerfile_winix_core_confparser #define headerfile_winix_core_confparser #include #include #include #include #include /* A parser for parsing config files. A config file can look like this: variable1 = value 1 variable2 = " value 2 " variable3 = (value 1, value 2) variable4 = (" value 1 " , "value2", value 3) sample of use: ConfParser parser; parser.Parse("/path/to/config"); if( parser.status == ConfParser::ok ) { // the whole config we have in parser.table (parser.table_single) } config syntax: option = list list can consists of any number of items, if you're using more than one item you should use brackets () for one item the brackets can be ommited: option = value white characters at the beginning of the value (and at the end) will be trimmed, or you can use quotes: option = "value" option2 = "value with spaces at the end " the form without quotes: option = value should be written in one line, so this is not allowed: option = value you can use a new line characters only between brackets and quotes: option = "this is a multiline string" option = ( value1, value2 ) but there is one requirement: the first character " or ( should be in the same line, so this is not allowed option = "this is wrong" but this is ok: option = " that is ok" empty lists: option = () this creates an empty list: parser.table['option'].empty() == true option = this creates an empty list too (the same as previously) option = "" but this doesn't create an empty list, it creates a list with one (empty) item commentaries: # this is a commentary (until the end of the line) option = value # this is a commentary too commentaries are treated as white characters, other example: option = ( # this is my list "value 1" # this is a value one value 2 # and this is a value two ) # end of my list overwriting: option1 = some value option1 = other value # always the last option is used so option1 is "other value" list delimiter: option1 = (value1, value2, value3) option2 = ("value1", "value2", "value3") above we're using a comma ',' as a list delimiter but when using quotes (second line) the commas can be omitted: option2 = ("value1" "value2" "value3") white characters: the name of an option cannot consist of white characters some option = value # this is wrong some_option = value # this is ok which characters are allowed in an option name is defined by IsVariableChar() method you can use white characters in values option = value with spaces or tabs white characters at the beginning and at the end will be trimmed, so if you want them use quotes: option = " other value with spaces " special characters in quoted strings: option = "this is a string with \" a quote inside" the option will be: this is a string with " a quote inside \\ - means one \ basically: \char produces char so: "\a" gives "a" "\\" gives "\" "\Z" gives "Z" and so on you can call UseEscapeChar(false) to turn this off */ class Space { public: Space(); ~Space(); Space(const Space & s); Space & operator=(const Space & s); void Clear(); // first we are searching in 'table_single' and if there is not // such a 'name' there then we are looking in 'table' (for the first item in the vector) // these methods return true if 'name' was found // in other case they return false and 'out' will be equal 'def' // they can return a null pointer if there is not such a 'name' std::wstring * GetValue(const wchar_t * name); std::wstring * GetValue(const std::wstring & name); /* those methods are used to extract information from space.table or space.table_single as a parameter they take the name of an option and a default value (if there is no such a parameter), they return appropriate value (either text, int or boolean) (in lists they return the first item if exists) when calling Text(...) and AText(...) you should copy the object to whom a reference is returned it will be cleared in a next call to one of these methods (as well to Int() Size() and Bool()) AText(...) always returns a reference to UTF-8 string */ std::wstring & Text(const wchar_t * name); std::wstring & Text(const wchar_t * name, const wchar_t * def); std::wstring & Text(const std::wstring & name, const wchar_t * def); std::string & AText(const wchar_t * name); std::string & AText(const wchar_t * name, const char * def); std::string & AText(const std::wstring & name, const char * def); int Int(const wchar_t *); int Int(const wchar_t * name, int def); int Int(const std::wstring & name, int def); size_t Size(const wchar_t *); size_t Size(const wchar_t * name, size_t def); size_t Size(const std::wstring & name, size_t def); bool Bool(const wchar_t *); bool Bool(const wchar_t * name, bool def); bool Bool(const std::wstring & name, bool def); /* * * raw access to the parsed values * * */ /* this is the table which represents your config file in the Table map: the first (key) is your 'option' and the second is 'list' */ typedef std::vector Value; typedef std::map Table; /* if your config file consists mainly of single forms such as: option = value option2 = value2 then you can call SplitSingle(true) for not inserting single values to previous 'table' but instead to 'table_single' table_single as the second parameter takes only std::wstring (instead of the whole std::vector) so you can save a little memory from not using std::vector */ typedef std::map TableSingle; std::wstring name; // space name TableSingle table_single; // std::map Table table; // std::map > // childs typedef std::vector Spaces; std::vector spaces; // a parent space // null means a root space Space * parent; /* those methods are used to extract lists note: if there is one option in table_single they will return it return true if such an option exists (but value can be an empty list) */ bool ListText(const wchar_t * name, std::vector & list); bool ListText(const std::wstring & name, std::vector & list); /* printing the content (for debug purposes) */ template void Serialize(Stream & out, bool use_indents = false, bool use_comments = false, int level = 0) const; private: std::wstring tmp_name; std::wstring tmp_value_text; std::string tmp_value_text_ascii; int ToInt(const std::wstring & value); size_t ToSize(const std::wstring & value); bool ToBool(const std::wstring & value); wchar_t ToSmall(wchar_t c); bool EqualNoCase(const wchar_t * str1, const wchar_t * str2); template void PrintLevel(Stream & out, bool use_indents, int level) const; template void SerializeTableSingle(Stream & out, bool use_indents, int level) const; template void SerializeTableMulti(Stream & out, bool use_indents, int level) const; template void PrintValue(Stream & out, const std::wstring & str) const; }; template void Space::PrintLevel(Stream & out, bool use_indents, int level) const { if( use_indents ) { for(int i=0 ; i void Space::PrintValue(Stream & out, const std::wstring & str) const { out << '\"'; for(size_t i=0 ; i void Space::SerializeTableSingle(Stream & out, bool use_indents, int level) const { if( !table_single.empty() ) { TableSingle::const_iterator i; for(i = table_single.begin() ; i != table_single.end() ; ++i) { PrintLevel(out, use_indents, level); out << i->first << L" = "; PrintValue(out, i->second); out << '\n'; } } } template void Space::SerializeTableMulti(Stream & out, bool use_indents, int level) const { Table::const_iterator i2; size_t v; if( !table.empty() ) { for(i2 = table.begin() ; i2 != table.end() ; ++i2) { PrintLevel(out, use_indents, level); out << i2->first << L" = "; if( i2->second.size() != 1 ) out << '('; for(v = 0 ; v < i2->second.size() ; ++v) { if( v > 0 ) PrintLevel(out, use_indents, level + i2->first.size() + 3); PrintValue(out, i2->second[v]); if( v + 1 < i2->second.size() ) out << '\n'; } if( i2->second.size() != 1 ) out << ')'; out << '\n'; } } } template void Space::Serialize(Stream & out, bool use_indents, bool use_comments, int level) const { if( level > 0 ) { out << '\n'; PrintLevel(out, use_indents, level); if( !name.empty() ) out << name << ' '; out << L"(\n"; if( use_comments ) { PrintLevel(out, use_indents, level); out << L"# space level " << level << '\n'; } } SerializeTableSingle(out, use_indents, level); SerializeTableMulti(out, use_indents, level); for(size_t i=0 ; iSerialize(out, use_indents, use_comments, level+1); if( level > 0 ) { PrintLevel(out, use_indents, level); out << ')'; if( use_comments ) { if( name.empty() ) out << L" # end of unnamed space"; else out << L" # end of space: " << name; out << L" (level " << level << L")"; } out << '\n'; } } class ConfParser { public: /* ctor -- setting default values (SetDefault() method) */ ConfParser(); /* setting the root space */ void SetSpace(Space * pspace); void SetSpace(Space & pspace); /* setting options of the parser to the default values utf8, split single etc. */ void SetDefault(); /* status of parsing */ enum Status { ok, cant_open_file, syntax_error, no_space }; /* the last status of parsing, set by Parse() methods */ Status status; /* a number of a line in which there is a syntax_error */ int line; /* main methods used to parse file_name is the path to a file */ Status Parse(const char * file_name); Status Parse(const std::string & file_name); Status Parse(const wchar_t * file_name); Status Parse(const std::wstring & file_name); /* main methods used to parse str - input string (either 8bit ascii or UTF-8 -- see UTF8() method) */ Status ParseString(const char * str); Status ParseString(const std::string & str); /* main methods used to parse here input string is always in unicode (wide characters) */ Status ParseString(const wchar_t * str); Status ParseString(const std::wstring & str); /* if your list consists of only one item, e.g: option1 = value 1 option2 = "value 2" option3 = ( "value 3" ) then if you call SplitSingle(true) then such values will be stored in 'table_single' instead of 'table' map default: false */ void SplitSingle(bool split); /* if true then empty values and lists, e.g: option = option2 = () will be omitted (not inserted to 'table' or 'table_single') default: false */ void SkipEmpty(bool skip); /* '\' character is used to escape other characters in a quoted string so "some \t t\"ext" will produce "some t t"ext" (this is only use in quoted string) default: true */ void UseEscapeChar(bool escape); /* if true then the input file or string (char* or std::string) is treated as UTF-8 */ void UTF8(bool utf); private: /* current space set by SetSpace(); */ Space * root_space; /* a space in which we are now */ Space * space; /* true if Parse() method was called false if ParseString() was called */ bool reading_from_file; /* pointers to the current character if ParseString() is in used */ const char * pchar_ascii; const wchar_t * pchar_unicode; /* true if ParseString(wchar_t *) or ParseString(std::wstring&) was called */ bool reading_from_wchar_string; /* last read variable (option) */ std::wstring variable; /* last read list item */ std::wstring value_item; /* last read list */ Space::Value value; /* separator between a variable and a value, default: '=' */ int separator; /* commentary char, default: '#' */ int commentary; /* list starting character, default: '(' */ int list_start; /* list ending character, default: ')' */ int list_end; /* list delimiter, default: ',' */ int list_delimiter; /* last read char or -1 if the end */ int lastc; /* current file */ std::ifstream file; /* if true then lists with one item will be put into 'table_single' table default: false */ bool split_single; /* if true then empty lists, e.g: option = option2 = () will be omitted (not inserted to 'table' or 'table_single') default: false */ bool skip_empty; /* input file is in UTF-8 default: false */ bool input_as_utf8; /* if true you can use an escape character '\' in quoted values */ bool use_escape_char; std::string afile_name; void Parse(); void ParseLoop(); void SpaceEnds(); void SpaceStarts(); void ReadAddValue(); void AddOption(); void DeleteFromTable(const std::wstring & var); void DeleteFromTableSingle(const std::wstring & var); void ReadVariable(); bool ReadValue(); bool ReadValueList(); bool ReadValueNoList(bool use_list_delimiter = false); bool ReadValueQuoted(); bool ReadValueSimple(bool use_list_delimiter = false); int ReadUTF8Char(); int ReadASCIIChar(); int ReadCharFromWcharString(); int ReadCharFromUTF8String(); int ReadCharFromAsciiString(); int ReadChar(); bool IsWhite(int c); bool IsVariableChar(int c); void SkipWhite(); void SkipWhiteLines(); void SkipLine(); void Trim(std::wstring & s); }; #endif