add a KeyValueParser for parsing simple key/value strings

This commit is contained in:
Tomasz Sowa 2023-11-07 03:54:34 +01:00
parent df24be199d
commit 90915a7209
Signed by: tomasz.sowa
GPG Key ID: 662CC1438638588B
2 changed files with 450 additions and 0 deletions

View File

@ -0,0 +1,277 @@
/*
* This file is a part of PikoTools
* and is distributed under the 2-Clause BSD licence.
* Author: Tomasz Sowa <t.sowa@ttmath.org>
*/
/*
* Copyright (c) 2023, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*/
#include <cstdlib>
#include <wchar.h>
#include "keyvalueparser.h"
#include "utf8/utf8.h"
#include "convert/strtoint.h"
namespace pt
{
KeyValueParser::KeyValueParser()
{
root_space = nullptr;
separator = '=';
option_delimiter = ',';
input_as_utf8 = true;
}
void KeyValueParser::use_utf8(bool utf)
{
input_as_utf8 = utf;
}
void KeyValueParser::set_separator(wchar_t separator)
{
this->separator = separator;
}
void KeyValueParser::set_option_delimiter(wchar_t option_delimiter)
{
this->option_delimiter = option_delimiter;
}
void KeyValueParser::prepare_to_parsing()
{
clear_input_flags();
}
KeyValueParser::Status KeyValueParser::parse_file(const char * file_name, Space & out_space, bool clear_space)
{
prepare_to_parsing();
reading_from_file = true;
root_space = &out_space;
file.open(file_name, std::ios_base::binary | std::ios_base::in);
if( file )
{
parse(clear_space);
file.close();
}
return status;
}
KeyValueParser::Status KeyValueParser::parse_file(const std::string & file_name, Space & out_space, bool clear_space)
{
return parse_file(file_name.c_str(), out_space, clear_space);
}
KeyValueParser::Status KeyValueParser::parse_file(const wchar_t * file_name, Space & out_space, bool clear_space)
{
std::string file_name_utf8;
wide_to_utf8(file_name, file_name_utf8);
return parse_file(file_name_utf8.c_str(), out_space, clear_space);
}
KeyValueParser::Status KeyValueParser::parse_file(const std::wstring & file_name, Space & out_space, bool clear_space)
{
return parse_file(file_name.c_str(), out_space, clear_space);
}
KeyValueParser::Status KeyValueParser::parse(const char * str, Space & out_space, bool clear_space)
{
prepare_to_parsing();
pchar_ascii = str;
root_space = &out_space;
parse(clear_space);
return status;
}
KeyValueParser::Status KeyValueParser::parse(const std::string & str, Space & out_space, bool clear_space)
{
return parse(str.c_str(), out_space, clear_space);
}
KeyValueParser::Status KeyValueParser::parse(const wchar_t * str, Space & out_space, bool clear_space)
{
prepare_to_parsing();
pchar_unicode = str;
root_space = &out_space;
parse(clear_space);
return status;
}
KeyValueParser::Status KeyValueParser::parse(const std::wstring & str, Space & out_space, bool clear_space)
{
return parse(str.c_str(), out_space, clear_space);
}
KeyValueParser::Status KeyValueParser::parse(const pt::TextStream & str, Space & out_space, bool clear_space)
{
prepare_to_parsing();
pt::TextStream::const_iterator start = str.begin();
pt::TextStream::const_iterator end = str.end();
text_stream_iterator = &start;
text_stream_iterator_end = &end;
root_space = &out_space;
parse(clear_space);
return status;
}
KeyValueParser::Status KeyValueParser::parse(const pt::WTextStream & str, Space & out_space, bool clear_space)
{
prepare_to_parsing();
pt::WTextStream::const_iterator start = str.begin();
pt::WTextStream::const_iterator end = str.end();
wtext_stream_iterator = &start;
wtext_stream_iterator_end = &end;
root_space = &out_space;
parse(clear_space);
return status;
}
void KeyValueParser::parse(bool clear_root_space)
{
line = 1;
status = ok;
if( clear_root_space )
{
root_space->set_empty_object();
}
read_char(); // put first character to lastc
while( lastc != -1 )
{
key.clear();
value.clear();
wchar_t delimit = read_token(key, separator, option_delimiter);
if( delimit == separator )
{
read_token(value, option_delimiter);
}
if( !key.empty() || delimit == separator )
{
root_space->add(key, value);
}
}
key.clear();
value.clear();
}
wchar_t pt::KeyValueParser::read_token(std::wstring & token, wchar_t delimit1, wchar_t delimit2)
{
token.clear();
skip_white();
wchar_t delimit = -1;
while( lastc != -1 )
{
if( delimit1 != -1 && lastc == delimit1 )
{
delimit = delimit1;
read_char();
break;
}
if( delimit2 != -1 && lastc == delimit2 )
{
delimit = delimit2;
read_char();
break;
}
token += lastc;
read_char();
}
pt::trim_white(token, false, false);
return delimit;
}
void pt::KeyValueParser::skip_white()
{
while( pt::is_white(lastc, true, true) )
{
read_char();
}
}
int pt::KeyValueParser::read_char()
{
return read_char_no_escape();
}
} // namespace

173
src/space/keyvalueparser.h Normal file
View File

@ -0,0 +1,173 @@
/*
* This file is a part of PikoTools
* and is distributed under the 2-Clause BSD licence.
* Author: Tomasz Sowa <t.sowa@ttmath.org>
*/
/*
* Copyright (c) 2023, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*/
#ifndef headerfile_pikotools_src_space_keyvalueparser
#define headerfile_pikotools_src_space_keyvalueparser
#include "space.h"
#include "convert/baseparser.h"
namespace pt
{
class KeyValueParser : public BaseParser
{
public:
/*
* ctor -- sets default values
*/
KeyValueParser();
/*
* status of parsing
* ok - input stream has been parsed correctly
* cant_open_file - I cannot open the file (returns only in a case when parsing a file)
*
*/
enum Status {
ok,
cant_open_file,
};
/*
* the last status of parsing, set by parse() methods
*/
Status status;
/*
* if true then the input file or string (char* or std::string) is treated as UTF-8
* default true
*
* the internal storage for strings is std::wstring so if you call set_utf8(false) then
* the characters of input string will be simple static_cast<> from char to wchar_t
*
*/
void use_utf8(bool utf);
/*
* set a separator between a variable and a value
* default: '='
*/
void set_separator(wchar_t separator);
/*
* set an option delimiter
* default: ','
*/
void set_option_delimiter(wchar_t option_delimiter);
/*
*
*/
Status parse_file(const char * file_name, Space & out_space, bool clear_space = true);
Status parse_file(const std::string & file_name, Space & out_space, bool clear_space = true);
Status parse_file(const wchar_t * file_name, Space & out_space, bool clear_space = true);
Status parse_file(const std::wstring & file_name, Space & out_space, bool clear_space = true);
/*
*
*/
Status parse(const char * str, Space & out_space, bool clear_space = true);
Status parse(const std::string & str, Space & out_space, bool clear_space = true);
Status parse(const wchar_t * str, Space & out_space, bool clear_space = true);
Status parse(const std::wstring & str, Space & out_space, bool clear_space = true);
Status parse(const pt::TextStream & str, Space & out_space, bool clear_space = true);
Status parse(const pt::WTextStream & str, Space & out_space, bool clear_space = true);
private:
/*
* current output space
*
*/
Space * root_space;
/*
* separator between a variable and a value, default: '='
*/
wchar_t separator;
/*
* option delimiter, default: ','
*/
wchar_t option_delimiter;
/*
*
*/
std::wstring key;
/*
*
*/
std::wstring value;
void prepare_to_parsing();
void parse(bool clear_root_space);
wchar_t read_token(std::wstring & token, wchar_t delimit1, wchar_t delimit2 = -1);
void skip_white();
int read_char();
};
} // namespace
#endif