diff --git a/src/mainspaceparser/mainspaceparser.cpp b/src/mainspaceparser/mainspaceparser.cpp index e86d491..cfd2f79 100644 --- a/src/mainspaceparser/mainspaceparser.cpp +++ b/src/mainspaceparser/mainspaceparser.cpp @@ -5,7 +5,7 @@ */ /* - * Copyright (c) 2016-2017, Tomasz Sowa + * Copyright (c) 2016-2021, Tomasz Sowa * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -40,100 +40,131 @@ #include "utf8/utf8.h" #include -// REMOVE ME -#include namespace PT { -#ifdef nonexisting_value MainSpaceParser::MainSpaceParser() { space = 0; - options_space = 0; - use_utf8 = true; + arguments_required_space = 0; + should_use_utf8 = true; last_status = status_ok; + non_option_arguments_name = L"args"; } MainSpaceParser::~MainSpaceParser() { - - } -void MainSpaceParser::UTF8(bool utf8) +void MainSpaceParser::use_utf8(bool utf8) { - use_utf8 = utf8; + should_use_utf8 = utf8; } -void MainSpaceParser::SetSpace(Space & space_ref) +void MainSpaceParser::set_non_options_arguments_name(const wchar_t * name) { - space = &space_ref; - options_space = 0; + non_option_arguments_name = name; } -std::wstring & MainSpaceParser::GetErrorToken() +void MainSpaceParser::set_non_options_arguments_name(const std::wstring & name) { - return last_error_token; + non_option_arguments_name = name; } -MainSpaceParser::Status MainSpaceParser::Parse(int argc, const char ** argv) -{ - if( !space ) - { - return status_space_not_assigned; - } - options_space = space->FindSpace(L"options"); - last_status = status_ok; - last_error_token.clear(); +std::wstring & MainSpaceParser::get_wrong_option() +{ + return last_error_option; +} + +MainSpaceParser::Status MainSpaceParser::parse(int argc, const char ** argv, Space & out_space) +{ + space = &out_space; + arguments_required_space = nullptr; + + return parse(argc, argv); +} + + +MainSpaceParser::Status MainSpaceParser::parse(int argc, const char ** argv, Space & out_space, const Space & options) +{ + space = &out_space; + arguments_required_space = &options;; + + return parse(argc, argv); +} + + +MainSpaceParser::Status MainSpaceParser::parse(int argc, const char ** argv) +{ + last_status = status_ok; + last_error_option.clear(); for(size_t i=1 ; i < (size_t)argc && last_status == status_ok ; ) { - Parse((size_t)argc, argv, i); + parse((size_t)argc, argv, i); } + options.clear(); + option.clear(); + argument.clear(); + arguments.clear(); + return last_status; } -void MainSpaceParser::Parse(size_t argc, const char ** argv, size_t & argv_index) +void MainSpaceParser::parse(size_t argc, const char ** argv, size_t & argv_index) { const char * pchar = argv[argv_index]; if( *pchar == '-' ) { + if( *(pchar+1) == '-' && *(pchar+2) == 0 ) + { + // two hyphens only "--" + argv_index += 1; + parse_non_option_arguments(argc, argv, argv_index); + } + else if( *(pchar+1) == '-' ) { - ParseMultiArgument(argc, argv, argv_index); + // two hyphens and a string, such as "--abc" + parse_long_option(argc, argv, argv_index); + } + else + if( *(pchar+1) != 0 ) + { + // one hyphen and a string, such as "-abc" + parse_short_option(argc, argv, argv_index); } else { - ParseSingleArgument(argc, argv, argv_index); + parse_non_option_arguments(argc, argv, argv_index); } } else { - last_status = status_syntax_error; - ConvertStr(pchar, last_error_token); + parse_non_option_arguments(argc, argv, argv_index); } } -void MainSpaceParser::ConvertStr(const char * src, std::wstring & dst) +void MainSpaceParser::convert_str(const char * src, std::wstring & dst) { - if( use_utf8 ) + if( should_use_utf8 ) { - PT::UTF8ToWide(src,dst); + PT::UTF8ToWide(src, dst); } else { @@ -145,154 +176,219 @@ void MainSpaceParser::ConvertStr(const char * src, std::wstring & dst) } -void MainSpaceParser::ParseSingleArgument(size_t argc, const char ** argv, size_t & argv_index) +void MainSpaceParser::convert_str(const char * src, size_t len, std::wstring & dst) { - ConvertStr(argv[argv_index] + 1, wide_arg); - const wchar_t * wide_pchar = wide_arg.c_str(); - - temp_list_val.clear(); - bool was_option = false; - argv_index += 1; - - for( ; *wide_pchar && !was_option ; ++wide_pchar ) + if( should_use_utf8 ) { - temp_arg = *wide_pchar; - size_t opt_size = RequireOption(temp_arg); - - if( opt_size > 0 ) - { - was_option = true; - - if( *(wide_pchar+1) ) - { - temp_val = wide_pchar + 1; - temp_list_val.push_back(temp_val); - opt_size -= 1; - } - - for( ; opt_size > 0 && argv_index < argc ; --opt_size, ++argv_index) - { - ConvertStr(argv[argv_index], temp_val); - temp_list_val.push_back(temp_val); - } - - if( opt_size > 0 ) - { - last_status = status_reading_eof; - last_error_token.clear(); - } - } - - temp_val.clear(); - AddValueToItem(temp_arg, temp_val, temp_list_val); - } -} - - -void MainSpaceParser::ParseMultiArgument(size_t argc, const char ** argv, size_t & argv_index) -{ - ConvertStr(argv[argv_index] + 2, temp_arg); - argv_index += 1; - - size_t opt_size = RequireOption(temp_arg); - temp_list_val.clear(); - - if( opt_size > 0 ) - { - for( ; opt_size > 0 && argv_index < argc ; --opt_size, ++argv_index) - { - ConvertStr(argv[argv_index], temp_val); - temp_list_val.push_back(temp_val); - } - - if( opt_size > 0 ) - { - last_status = status_reading_eof; - last_error_token.clear(); - } - } - - temp_val.clear(); - AddValueToItem(temp_arg, temp_val, temp_list_val); -} - - - -void MainSpaceParser::AddValueToItem(const std::wstring & name, const std::wstring & empty_value, const std::vector & list) -{ - std::wstring * val = space->GetFirstValue(name); - - if( !val ) - { - if( list.empty() ) - space->Add(name, empty_value); - else - if( list.size() == 1 ) - space->Add(name, list[0]); - else - space->table[name] = list; // !! IMPROVE ME there'll be a new api in space + PT::UTF8ToWide(src, len, dst); } else { - PT::Space::Table::iterator i = space->table.find(name); - PT::Space::Value * table_value; + dst.clear(); - if( i == space->table.end() ) + for(size_t i=0 ; i < len ; ++i) + dst += (wchar_t)(unsigned char)src[i]; + } +} + + +void MainSpaceParser::convert_str(const std::wstring & src, Space & space) +{ + if( should_use_utf8 ) + { + space.set_empty_wstring(); + space.value.value_wstring = src; + } + else + { + space.set_empty_string(); + std::string & dst = space.value.value_string; + + dst.clear(); + + for(size_t i=0 ; i < src.size() ; ++i) + dst += (char)src[i]; + } +} + + +void MainSpaceParser::parse_short_option(size_t argc, const char ** argv, size_t & argv_index) +{ + convert_str(argv[argv_index] + 1, options); + const wchar_t * options_pchar = options.c_str(); + + arguments.clear(); + bool was_argument = false; + argv_index += 1; + + for( ; *options_pchar && !was_argument && last_status == status_ok ; ++options_pchar ) + { + option = *options_pchar; + size_t args_len = how_many_arguments_required(option); + + if( args_len > 0 ) { - table_value = &space->table[name]; - table_value->push_back(*val); - //space->table_single.erase(name); + was_argument = true; + + if( *(options_pchar+1) ) + { + // first argument is directly behind the option + argument = options_pchar + 1; + arguments.push_back(argument); + args_len -= 1; + } + + parse_arguments(argc, argv, argv_index, args_len); + } + + add_option_to_space(option, arguments); + } +} + + +void MainSpaceParser::parse_long_option(size_t argc, const char ** argv, size_t & argv_index) +{ + const char * option_begin = argv[argv_index] + 2; // skip first two hyphens -- + const char * option_end = option_begin; + bool is_equal_form = false; // is the option in the form with equal sign, such as: option=argument + + while( *option_end != 0 && *option_end != '=' ) + { + option_end += 1; + } + + if( *option_end == '=' ) + { + is_equal_form = true; + convert_str(option_begin, option_end - option_begin, option); + convert_str(option_end + 1, argument); + } + else + { + convert_str(option_begin, option); + } + + argv_index += 1; + size_t args_len = how_many_arguments_required(option); + arguments.clear(); + + if( is_equal_form ) + { + if( args_len == 0 ) + { + if( !argument.empty() ) + { + // report an error + last_status = status_argument_provided; + last_error_option = option; + } + } + else + if( args_len == 1 ) + { + // argument can be empty in such a case: option= + // we treat it as if the argument would not be provided + if( !argument.empty() ) + { + arguments.push_back(argument); + args_len -= 1; + } } else { - table_value = &i->second; + // args_len is > 1 but when using option=argument form + // we can provide only one argument + last_status = status_argument_not_provided; + last_error_option = option; } + } - if( list.empty() ) - { - table_value->push_back(empty_value); - } - else - { - for(const auto & list_item : list) - table_value->push_back(list_item); - } + if( last_status == status_ok ) + { + parse_arguments(argc, argv, argv_index, args_len); + add_option_to_space(option, arguments); + } +} + + +void MainSpaceParser::parse_arguments(size_t argc, const char ** argv, size_t & argv_index, size_t args_len) +{ + for( ; args_len > 0 && argv_index < argc ; --args_len, ++argv_index) + { + convert_str(argv[argv_index], argument); + arguments.push_back(argument); + } + + if( args_len > 0 ) + { + last_status = status_argument_not_provided; + last_error_option = option; } } -size_t MainSpaceParser::RequireOption(const std::wstring & arg) +void MainSpaceParser::parse_non_option_arguments(size_t argc, const char ** argv, size_t & argv_index) +{ + Space * table_with_args = new Space(); + table_with_args->set_empty_table(); + + for( ; argv_index < argc ; ++argv_index) + { + convert_str(argv[argv_index], argument); + table_with_args->add(argument); + } + + space->add(non_option_arguments_name, table_with_args); +} + + +void MainSpaceParser::add_option_to_space(const std::wstring & option, const std::vector & arguments) +{ + Space * option_table = space->get_object_field(option); + + if( !option_table ) + { + option_table = &space->add_empty_space(option); + } + + if( !option_table->is_table()) + { + option_table->set_empty_table(); + } + + Space * arguments_table = new Space(); + arguments_table->set_empty_table(); + + for(const std::wstring & arg : arguments) + { + Space & space_arg = arguments_table->add_empty_space(); + convert_str(arg, space_arg); + } + + option_table->add(arguments_table); +} + + + +size_t MainSpaceParser::how_many_arguments_required(const std::wstring & arg) { size_t res = 0; - if( options_space ) + if( arguments_required_space && arguments_required_space->is_object() ) { - std::wstring * val = options_space->GetFirstValue(arg); + long res_long = arguments_required_space->to_llong(arg, 0); - if( val ) - { - /* - * IMPLEMENT ME - * add a converter to convert/inttostr.h - * - */ + if( res_long < 0 ) + res_long = 0; - long res_long = wcstol(val->c_str(), 0, 10); - - if( res_long < 0 ) - res_long = 0; - - res = (size_t)res_long; - - //std::wcout << L"argument " << arg << L" needs " << res << L" options" << std::endl; - } + res = (size_t)res_long; + // argument 'arg' needs 'res' options } return res; } -#endif } // namespace diff --git a/src/mainspaceparser/mainspaceparser.h b/src/mainspaceparser/mainspaceparser.h index bddc4bf..a7d9215 100644 --- a/src/mainspaceparser/mainspaceparser.h +++ b/src/mainspaceparser/mainspaceparser.h @@ -5,7 +5,7 @@ */ /* - * Copyright (c) 2016, Tomasz Sowa + * Copyright (c) 2016-2021, Tomasz Sowa * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -46,13 +46,12 @@ namespace PT { -#ifdef nonexisting_value - /* - a very little parser for main(int argc, char ** argv) parameters - look in sample/sample.cpp how to use the parser -*/ + * a very little parser for parsing main(int argc, char ** argv) parameters + * + * + */ class MainSpaceParser { public: @@ -60,42 +59,102 @@ public: MainSpaceParser(); ~MainSpaceParser(); + + /* + * status_ok - all argument have been parsed correctly + * + * status_argument_provided - an argument have been provided but was not requested + * this can be in situation when using long form with equal sign, such as: --option=argument + * and in 'options' space the option either was not set or have zero requested arguments + * + * status_argument_not_provided - an argument or arguments are required but were not provided + * this can be returned in two situations: + * 1. when using long form with equal sign, such as: --option=argument and in 'options' space + * you have requested more than one argument + * 2. when reading arguments and the input strings ended + * + */ enum Status { status_ok = 0, - status_space_not_assigned = 1, - status_syntax_error = 2, - status_reading_eof = 3 /* CHANGE ME give a better name */ + status_argument_provided = 1, + status_argument_not_provided = 2, }; - void SetSpace(Space & space); - Status Parse(int argc, const char ** argv); - void UTF8(bool utf8); + /* + * the name of a field in the output Struct space for non-option arguments (those after two hypens --) + * default: L"args" + * they will be set as a table of strings/wstrings + * + */ + void set_non_options_arguments_name(const wchar_t * name); + void set_non_options_arguments_name(const std::wstring & name); + + + /* + * parse parameters + * argc argv have the same meaning as in the main(int argc, const char ** argv) method + * the first argument from argv is usualy the name of the program and is skip by this parser + * + * return value: look at the description of the Status enum + * + */ + Status parse(int argc, const char ** argv, Space & out_space); + Status parse(int argc, const char ** argv, Space & out_space, const Space & options); + + + /* + * whether or not options arguments should be converted from utf8 char* strings to wide strings (std::wstring) + * default true + * + * if true all arguments in Space struct will be saved as std::wstring + * if false all arguments will be std::string (they are read as they are without checking + * whether correct utf8 characters are encountered) + * + * arguments are always held as std::wstring (in such a case is defined ObjectType in Space struct) + * when using use_utf8(false) characters will not be treated as an utf8 string but just all 8bit char bytes + * will be copied to std::wstring + * + */ + void use_utf8(bool utf8); + + + /* + * return the last option name which was incorrectly parsed + * or an empty string if status was equal to status_ok + */ + std::wstring & get_wrong_option(); - std::wstring & GetErrorToken(); private: Space * space; - Space * options_space; - std::wstring wide_arg, temp_arg, temp_val; - std::vector temp_list_val; - bool use_utf8; + const Space * arguments_required_space; + std::wstring non_option_arguments_name; + std::wstring options, option, argument; + std::vector arguments; + bool should_use_utf8; Status last_status; - std::wstring last_error_token; + std::wstring last_error_option; - void ConvertStr(const char * src, std::wstring & dst); - void Parse(size_t argc, const char ** argv, size_t & argv_index); - void ParseSingleArgument(size_t argc, const char ** argv, size_t & argv_index); - void ParseMultiArgument(size_t argc, const char ** argv, size_t & argv_index); - size_t RequireOption(const std::wstring & arg); - void AddValueToItem(const std::wstring & name, const std::wstring & empty_value, const std::vector & list); + void convert_str(const char * src, std::wstring & dst); + void convert_str(const char * src, size_t len, std::wstring & dst); + void convert_str(const std::wstring & src, Space & space); + + Status parse(int argc, const char ** argv); + void parse(size_t argc, const char ** argv, size_t & argv_index); + void parse_short_option(size_t argc, const char ** argv, size_t & argv_index); + void parse_long_option(size_t argc, const char ** argv, size_t & argv_index); + void parse_arguments(size_t argc, const char ** argv, size_t & argv_index, size_t args_len); + void parse_non_option_arguments(size_t argc, const char ** argv, size_t & argv_index); + size_t how_many_arguments_required(const std::wstring & arg); + void add_option_to_space(const std::wstring & option, const std::vector & arguments); }; -#endif + } // namespace