add a KeyValueParser for parsing simple key/value strings
This commit is contained in:
parent
df24be199d
commit
90915a7209
|
@ -0,0 +1,277 @@
|
||||||
|
/*
|
||||||
|
* This file is a part of PikoTools
|
||||||
|
* and is distributed under the 2-Clause BSD licence.
|
||||||
|
* Author: Tomasz Sowa <t.sowa@ttmath.org>
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2023, Tomasz Sowa
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* 1. Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <cstdlib>
|
||||||
|
#include <wchar.h>
|
||||||
|
#include "keyvalueparser.h"
|
||||||
|
#include "utf8/utf8.h"
|
||||||
|
#include "convert/strtoint.h"
|
||||||
|
|
||||||
|
|
||||||
|
namespace pt
|
||||||
|
{
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
KeyValueParser::KeyValueParser()
|
||||||
|
{
|
||||||
|
root_space = nullptr;
|
||||||
|
separator = '=';
|
||||||
|
option_delimiter = ',';
|
||||||
|
input_as_utf8 = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void KeyValueParser::use_utf8(bool utf)
|
||||||
|
{
|
||||||
|
input_as_utf8 = utf;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void KeyValueParser::set_separator(wchar_t separator)
|
||||||
|
{
|
||||||
|
this->separator = separator;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void KeyValueParser::set_option_delimiter(wchar_t option_delimiter)
|
||||||
|
{
|
||||||
|
this->option_delimiter = option_delimiter;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void KeyValueParser::prepare_to_parsing()
|
||||||
|
{
|
||||||
|
clear_input_flags();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
KeyValueParser::Status KeyValueParser::parse_file(const char * file_name, Space & out_space, bool clear_space)
|
||||||
|
{
|
||||||
|
prepare_to_parsing();
|
||||||
|
|
||||||
|
reading_from_file = true;
|
||||||
|
root_space = &out_space;
|
||||||
|
|
||||||
|
file.open(file_name, std::ios_base::binary | std::ios_base::in);
|
||||||
|
|
||||||
|
if( file )
|
||||||
|
{
|
||||||
|
parse(clear_space);
|
||||||
|
file.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
KeyValueParser::Status KeyValueParser::parse_file(const std::string & file_name, Space & out_space, bool clear_space)
|
||||||
|
{
|
||||||
|
return parse_file(file_name.c_str(), out_space, clear_space);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
KeyValueParser::Status KeyValueParser::parse_file(const wchar_t * file_name, Space & out_space, bool clear_space)
|
||||||
|
{
|
||||||
|
std::string file_name_utf8;
|
||||||
|
|
||||||
|
wide_to_utf8(file_name, file_name_utf8);
|
||||||
|
return parse_file(file_name_utf8.c_str(), out_space, clear_space);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
KeyValueParser::Status KeyValueParser::parse_file(const std::wstring & file_name, Space & out_space, bool clear_space)
|
||||||
|
{
|
||||||
|
return parse_file(file_name.c_str(), out_space, clear_space);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
KeyValueParser::Status KeyValueParser::parse(const char * str, Space & out_space, bool clear_space)
|
||||||
|
{
|
||||||
|
prepare_to_parsing();
|
||||||
|
|
||||||
|
pchar_ascii = str;
|
||||||
|
root_space = &out_space;
|
||||||
|
|
||||||
|
parse(clear_space);
|
||||||
|
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
KeyValueParser::Status KeyValueParser::parse(const std::string & str, Space & out_space, bool clear_space)
|
||||||
|
{
|
||||||
|
return parse(str.c_str(), out_space, clear_space);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
KeyValueParser::Status KeyValueParser::parse(const wchar_t * str, Space & out_space, bool clear_space)
|
||||||
|
{
|
||||||
|
prepare_to_parsing();
|
||||||
|
|
||||||
|
pchar_unicode = str;
|
||||||
|
root_space = &out_space;
|
||||||
|
|
||||||
|
parse(clear_space);
|
||||||
|
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
KeyValueParser::Status KeyValueParser::parse(const std::wstring & str, Space & out_space, bool clear_space)
|
||||||
|
{
|
||||||
|
return parse(str.c_str(), out_space, clear_space);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
KeyValueParser::Status KeyValueParser::parse(const pt::TextStream & str, Space & out_space, bool clear_space)
|
||||||
|
{
|
||||||
|
prepare_to_parsing();
|
||||||
|
|
||||||
|
pt::TextStream::const_iterator start = str.begin();
|
||||||
|
pt::TextStream::const_iterator end = str.end();
|
||||||
|
|
||||||
|
text_stream_iterator = &start;
|
||||||
|
text_stream_iterator_end = &end;
|
||||||
|
root_space = &out_space;
|
||||||
|
|
||||||
|
parse(clear_space);
|
||||||
|
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
KeyValueParser::Status KeyValueParser::parse(const pt::WTextStream & str, Space & out_space, bool clear_space)
|
||||||
|
{
|
||||||
|
prepare_to_parsing();
|
||||||
|
|
||||||
|
pt::WTextStream::const_iterator start = str.begin();
|
||||||
|
pt::WTextStream::const_iterator end = str.end();
|
||||||
|
|
||||||
|
wtext_stream_iterator = &start;
|
||||||
|
wtext_stream_iterator_end = &end;
|
||||||
|
root_space = &out_space;
|
||||||
|
|
||||||
|
parse(clear_space);
|
||||||
|
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void KeyValueParser::parse(bool clear_root_space)
|
||||||
|
{
|
||||||
|
line = 1;
|
||||||
|
status = ok;
|
||||||
|
|
||||||
|
if( clear_root_space )
|
||||||
|
{
|
||||||
|
root_space->set_empty_object();
|
||||||
|
}
|
||||||
|
|
||||||
|
read_char(); // put first character to lastc
|
||||||
|
|
||||||
|
while( lastc != -1 )
|
||||||
|
{
|
||||||
|
key.clear();
|
||||||
|
value.clear();
|
||||||
|
wchar_t delimit = read_token(key, separator, option_delimiter);
|
||||||
|
|
||||||
|
if( delimit == separator )
|
||||||
|
{
|
||||||
|
read_token(value, option_delimiter);
|
||||||
|
}
|
||||||
|
|
||||||
|
if( !key.empty() || delimit == separator )
|
||||||
|
{
|
||||||
|
root_space->add(key, value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
key.clear();
|
||||||
|
value.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
wchar_t pt::KeyValueParser::read_token(std::wstring & token, wchar_t delimit1, wchar_t delimit2)
|
||||||
|
{
|
||||||
|
token.clear();
|
||||||
|
skip_white();
|
||||||
|
wchar_t delimit = -1;
|
||||||
|
|
||||||
|
while( lastc != -1 )
|
||||||
|
{
|
||||||
|
if( delimit1 != -1 && lastc == delimit1 )
|
||||||
|
{
|
||||||
|
delimit = delimit1;
|
||||||
|
read_char();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if( delimit2 != -1 && lastc == delimit2 )
|
||||||
|
{
|
||||||
|
delimit = delimit2;
|
||||||
|
read_char();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
token += lastc;
|
||||||
|
read_char();
|
||||||
|
}
|
||||||
|
|
||||||
|
pt::trim_white(token, false, false);
|
||||||
|
return delimit;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void pt::KeyValueParser::skip_white()
|
||||||
|
{
|
||||||
|
while( pt::is_white(lastc, true, true) )
|
||||||
|
{
|
||||||
|
read_char();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int pt::KeyValueParser::read_char()
|
||||||
|
{
|
||||||
|
return read_char_no_escape();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,173 @@
|
||||||
|
/*
|
||||||
|
* This file is a part of PikoTools
|
||||||
|
* and is distributed under the 2-Clause BSD licence.
|
||||||
|
* Author: Tomasz Sowa <t.sowa@ttmath.org>
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2023, Tomasz Sowa
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* 1. Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef headerfile_pikotools_src_space_keyvalueparser
|
||||||
|
#define headerfile_pikotools_src_space_keyvalueparser
|
||||||
|
|
||||||
|
#include "space.h"
|
||||||
|
#include "convert/baseparser.h"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
namespace pt
|
||||||
|
{
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class KeyValueParser : public BaseParser
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ctor -- sets default values
|
||||||
|
*/
|
||||||
|
KeyValueParser();
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* status of parsing
|
||||||
|
* ok - input stream has been parsed correctly
|
||||||
|
* cant_open_file - I cannot open the file (returns only in a case when parsing a file)
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
enum Status {
|
||||||
|
ok,
|
||||||
|
cant_open_file,
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* the last status of parsing, set by parse() methods
|
||||||
|
*/
|
||||||
|
Status status;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* if true then the input file or string (char* or std::string) is treated as UTF-8
|
||||||
|
* default true
|
||||||
|
*
|
||||||
|
* the internal storage for strings is std::wstring so if you call set_utf8(false) then
|
||||||
|
* the characters of input string will be simple static_cast<> from char to wchar_t
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
void use_utf8(bool utf);
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* set a separator between a variable and a value
|
||||||
|
* default: '='
|
||||||
|
*/
|
||||||
|
void set_separator(wchar_t separator);
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* set an option delimiter
|
||||||
|
* default: ','
|
||||||
|
*/
|
||||||
|
void set_option_delimiter(wchar_t option_delimiter);
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
Status parse_file(const char * file_name, Space & out_space, bool clear_space = true);
|
||||||
|
Status parse_file(const std::string & file_name, Space & out_space, bool clear_space = true);
|
||||||
|
Status parse_file(const wchar_t * file_name, Space & out_space, bool clear_space = true);
|
||||||
|
Status parse_file(const std::wstring & file_name, Space & out_space, bool clear_space = true);
|
||||||
|
|
||||||
|
/*
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
Status parse(const char * str, Space & out_space, bool clear_space = true);
|
||||||
|
Status parse(const std::string & str, Space & out_space, bool clear_space = true);
|
||||||
|
Status parse(const wchar_t * str, Space & out_space, bool clear_space = true);
|
||||||
|
Status parse(const std::wstring & str, Space & out_space, bool clear_space = true);
|
||||||
|
Status parse(const pt::TextStream & str, Space & out_space, bool clear_space = true);
|
||||||
|
Status parse(const pt::WTextStream & str, Space & out_space, bool clear_space = true);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
private:
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* current output space
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
Space * root_space;
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* separator between a variable and a value, default: '='
|
||||||
|
*/
|
||||||
|
wchar_t separator;
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* option delimiter, default: ','
|
||||||
|
*/
|
||||||
|
wchar_t option_delimiter;
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
std::wstring key;
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
std::wstring value;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
void prepare_to_parsing();
|
||||||
|
void parse(bool clear_root_space);
|
||||||
|
wchar_t read_token(std::wstring & token, wchar_t delimit1, wchar_t delimit2 = -1);
|
||||||
|
void skip_white();
|
||||||
|
int read_char();
|
||||||
|
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
|
||||||
|
#endif
|
Loading…
Reference in New Issue