From 90915a720985caf16b2a5e835676f2ede4d6a6c9 Mon Sep 17 00:00:00 2001 From: Tomasz Sowa Date: Tue, 7 Nov 2023 03:54:34 +0100 Subject: [PATCH] add a KeyValueParser for parsing simple key/value strings --- src/space/keyvalueparser.cpp | 277 +++++++++++++++++++++++++++++++++++ src/space/keyvalueparser.h | 173 ++++++++++++++++++++++ 2 files changed, 450 insertions(+) create mode 100644 src/space/keyvalueparser.cpp create mode 100644 src/space/keyvalueparser.h diff --git a/src/space/keyvalueparser.cpp b/src/space/keyvalueparser.cpp new file mode 100644 index 0000000..a4a7261 --- /dev/null +++ b/src/space/keyvalueparser.cpp @@ -0,0 +1,277 @@ +/* + * This file is a part of PikoTools + * and is distributed under the 2-Clause BSD licence. + * Author: Tomasz Sowa + */ + +/* + * Copyright (c) 2023, Tomasz Sowa + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include +#include +#include "keyvalueparser.h" +#include "utf8/utf8.h" +#include "convert/strtoint.h" + + +namespace pt +{ + + + + +KeyValueParser::KeyValueParser() +{ + root_space = nullptr; + separator = '='; + option_delimiter = ','; + input_as_utf8 = true; +} + + +void KeyValueParser::use_utf8(bool utf) +{ + input_as_utf8 = utf; +} + + +void KeyValueParser::set_separator(wchar_t separator) +{ + this->separator = separator; +} + + +void KeyValueParser::set_option_delimiter(wchar_t option_delimiter) +{ + this->option_delimiter = option_delimiter; +} + + +void KeyValueParser::prepare_to_parsing() +{ + clear_input_flags(); +} + + +KeyValueParser::Status KeyValueParser::parse_file(const char * file_name, Space & out_space, bool clear_space) +{ + prepare_to_parsing(); + + reading_from_file = true; + root_space = &out_space; + + file.open(file_name, std::ios_base::binary | std::ios_base::in); + + if( file ) + { + parse(clear_space); + file.close(); + } + + return status; +} + + +KeyValueParser::Status KeyValueParser::parse_file(const std::string & file_name, Space & out_space, bool clear_space) +{ + return parse_file(file_name.c_str(), out_space, clear_space); +} + + +KeyValueParser::Status KeyValueParser::parse_file(const wchar_t * file_name, Space & out_space, bool clear_space) +{ + std::string file_name_utf8; + + wide_to_utf8(file_name, file_name_utf8); + return parse_file(file_name_utf8.c_str(), out_space, clear_space); +} + + +KeyValueParser::Status KeyValueParser::parse_file(const std::wstring & file_name, Space & out_space, bool clear_space) +{ + return parse_file(file_name.c_str(), out_space, clear_space); +} + + +KeyValueParser::Status KeyValueParser::parse(const char * str, Space & out_space, bool clear_space) +{ + prepare_to_parsing(); + + pchar_ascii = str; + root_space = &out_space; + + parse(clear_space); + + return status; +} + + +KeyValueParser::Status KeyValueParser::parse(const std::string & str, Space & out_space, bool clear_space) +{ + return parse(str.c_str(), out_space, clear_space); +} + + +KeyValueParser::Status KeyValueParser::parse(const wchar_t * str, Space & out_space, bool clear_space) +{ + prepare_to_parsing(); + + pchar_unicode = str; + root_space = &out_space; + + parse(clear_space); + + return status; +} + + +KeyValueParser::Status KeyValueParser::parse(const std::wstring & str, Space & out_space, bool clear_space) +{ + return parse(str.c_str(), out_space, clear_space); +} + + +KeyValueParser::Status KeyValueParser::parse(const pt::TextStream & str, Space & out_space, bool clear_space) +{ + prepare_to_parsing(); + + pt::TextStream::const_iterator start = str.begin(); + pt::TextStream::const_iterator end = str.end(); + + text_stream_iterator = &start; + text_stream_iterator_end = &end; + root_space = &out_space; + + parse(clear_space); + + return status; +} + + +KeyValueParser::Status KeyValueParser::parse(const pt::WTextStream & str, Space & out_space, bool clear_space) +{ + prepare_to_parsing(); + + pt::WTextStream::const_iterator start = str.begin(); + pt::WTextStream::const_iterator end = str.end(); + + wtext_stream_iterator = &start; + wtext_stream_iterator_end = &end; + root_space = &out_space; + + parse(clear_space); + + return status; +} + + +void KeyValueParser::parse(bool clear_root_space) +{ + line = 1; + status = ok; + + if( clear_root_space ) + { + root_space->set_empty_object(); + } + + read_char(); // put first character to lastc + + while( lastc != -1 ) + { + key.clear(); + value.clear(); + wchar_t delimit = read_token(key, separator, option_delimiter); + + if( delimit == separator ) + { + read_token(value, option_delimiter); + } + + if( !key.empty() || delimit == separator ) + { + root_space->add(key, value); + } + } + + key.clear(); + value.clear(); +} + + +wchar_t pt::KeyValueParser::read_token(std::wstring & token, wchar_t delimit1, wchar_t delimit2) +{ + token.clear(); + skip_white(); + wchar_t delimit = -1; + + while( lastc != -1 ) + { + if( delimit1 != -1 && lastc == delimit1 ) + { + delimit = delimit1; + read_char(); + break; + } + + if( delimit2 != -1 && lastc == delimit2 ) + { + delimit = delimit2; + read_char(); + break; + } + + token += lastc; + read_char(); + } + + pt::trim_white(token, false, false); + return delimit; +} + + +void pt::KeyValueParser::skip_white() +{ + while( pt::is_white(lastc, true, true) ) + { + read_char(); + } +} + + +int pt::KeyValueParser::read_char() +{ + return read_char_no_escape(); +} + + +} // namespace + + + + diff --git a/src/space/keyvalueparser.h b/src/space/keyvalueparser.h new file mode 100644 index 0000000..40b18c5 --- /dev/null +++ b/src/space/keyvalueparser.h @@ -0,0 +1,173 @@ +/* + * This file is a part of PikoTools + * and is distributed under the 2-Clause BSD licence. + * Author: Tomasz Sowa + */ + +/* + * Copyright (c) 2023, Tomasz Sowa + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef headerfile_pikotools_src_space_keyvalueparser +#define headerfile_pikotools_src_space_keyvalueparser + +#include "space.h" +#include "convert/baseparser.h" + + + +namespace pt +{ + + + +class KeyValueParser : public BaseParser +{ +public: + + + /* + * ctor -- sets default values + */ + KeyValueParser(); + + + /* + * status of parsing + * ok - input stream has been parsed correctly + * cant_open_file - I cannot open the file (returns only in a case when parsing a file) + * + */ + enum Status { + ok, + cant_open_file, + }; + + + /* + * the last status of parsing, set by parse() methods + */ + Status status; + + + + /* + * if true then the input file or string (char* or std::string) is treated as UTF-8 + * default true + * + * the internal storage for strings is std::wstring so if you call set_utf8(false) then + * the characters of input string will be simple static_cast<> from char to wchar_t + * + */ + void use_utf8(bool utf); + + + /* + * set a separator between a variable and a value + * default: '=' + */ + void set_separator(wchar_t separator); + + + /* + * set an option delimiter + * default: ',' + */ + void set_option_delimiter(wchar_t option_delimiter); + + + /* + * + */ + Status parse_file(const char * file_name, Space & out_space, bool clear_space = true); + Status parse_file(const std::string & file_name, Space & out_space, bool clear_space = true); + Status parse_file(const wchar_t * file_name, Space & out_space, bool clear_space = true); + Status parse_file(const std::wstring & file_name, Space & out_space, bool clear_space = true); + + /* + * + */ + Status parse(const char * str, Space & out_space, bool clear_space = true); + Status parse(const std::string & str, Space & out_space, bool clear_space = true); + Status parse(const wchar_t * str, Space & out_space, bool clear_space = true); + Status parse(const std::wstring & str, Space & out_space, bool clear_space = true); + Status parse(const pt::TextStream & str, Space & out_space, bool clear_space = true); + Status parse(const pt::WTextStream & str, Space & out_space, bool clear_space = true); + + + +private: + + + /* + * current output space + * + */ + Space * root_space; + + + /* + * separator between a variable and a value, default: '=' + */ + wchar_t separator; + + + /* + * option delimiter, default: ',' + */ + wchar_t option_delimiter; + + + /* + * + */ + std::wstring key; + + + /* + * + */ + std::wstring value; + + + + void prepare_to_parsing(); + void parse(bool clear_root_space); + wchar_t read_token(std::wstring & token, wchar_t delimit1, wchar_t delimit2 = -1); + void skip_white(); + int read_char(); + + +}; + + + + +} // namespace + + +#endif