updated MainSpaceParser to the new Space format, changed api to snake case

now we can:
- parse short options, those beginning with a hypnen '-'
- parse long options, those beginning with two hyphens '--'
- long options can have arguments in two forms:
  - either with an equal sign, e.g.: --opion-name=argument
  - or with a space, e.g: --option argument
    in the latter case we can have more than one argument, e.g: --option argument1 argument2
- parse non-option arguments, those after two hyphens to the end of a string, e.g: -- arg1 arg2
This commit is contained in:
Tomasz Sowa 2021-05-17 03:09:21 +02:00
parent 77d7bb5e64
commit ac691bccb7
2 changed files with 333 additions and 178 deletions

View File

@ -5,7 +5,7 @@
*/
/*
* Copyright (c) 2016-2017, Tomasz Sowa
* Copyright (c) 2016-2021, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -40,100 +40,131 @@
#include "utf8/utf8.h"
#include <string.h>
// REMOVE ME
#include <iostream>
namespace PT
{
#ifdef nonexisting_value
MainSpaceParser::MainSpaceParser()
{
space = 0;
options_space = 0;
use_utf8 = true;
arguments_required_space = 0;
should_use_utf8 = true;
last_status = status_ok;
non_option_arguments_name = L"args";
}
MainSpaceParser::~MainSpaceParser()
{
}
void MainSpaceParser::UTF8(bool utf8)
void MainSpaceParser::use_utf8(bool utf8)
{
use_utf8 = utf8;
should_use_utf8 = utf8;
}
void MainSpaceParser::SetSpace(Space & space_ref)
void MainSpaceParser::set_non_options_arguments_name(const wchar_t * name)
{
space = &space_ref;
options_space = 0;
non_option_arguments_name = name;
}
std::wstring & MainSpaceParser::GetErrorToken()
void MainSpaceParser::set_non_options_arguments_name(const std::wstring & name)
{
return last_error_token;
non_option_arguments_name = name;
}
MainSpaceParser::Status MainSpaceParser::Parse(int argc, const char ** argv)
{
if( !space )
{
return status_space_not_assigned;
}
options_space = space->FindSpace(L"options");
std::wstring & MainSpaceParser::get_wrong_option()
{
return last_error_option;
}
MainSpaceParser::Status MainSpaceParser::parse(int argc, const char ** argv, Space & out_space)
{
space = &out_space;
arguments_required_space = nullptr;
return parse(argc, argv);
}
MainSpaceParser::Status MainSpaceParser::parse(int argc, const char ** argv, Space & out_space, const Space & options)
{
space = &out_space;
arguments_required_space = &options;;
return parse(argc, argv);
}
MainSpaceParser::Status MainSpaceParser::parse(int argc, const char ** argv)
{
last_status = status_ok;
last_error_token.clear();
last_error_option.clear();
for(size_t i=1 ; i < (size_t)argc && last_status == status_ok ; )
{
Parse((size_t)argc, argv, i);
parse((size_t)argc, argv, i);
}
options.clear();
option.clear();
argument.clear();
arguments.clear();
return last_status;
}
void MainSpaceParser::Parse(size_t argc, const char ** argv, size_t & argv_index)
void MainSpaceParser::parse(size_t argc, const char ** argv, size_t & argv_index)
{
const char * pchar = argv[argv_index];
if( *pchar == '-' )
{
if( *(pchar+1) == '-' && *(pchar+2) == 0 )
{
// two hyphens only "--"
argv_index += 1;
parse_non_option_arguments(argc, argv, argv_index);
}
else
if( *(pchar+1) == '-' )
{
ParseMultiArgument(argc, argv, argv_index);
// two hyphens and a string, such as "--abc"
parse_long_option(argc, argv, argv_index);
}
else
if( *(pchar+1) != 0 )
{
// one hyphen and a string, such as "-abc"
parse_short_option(argc, argv, argv_index);
}
else
{
ParseSingleArgument(argc, argv, argv_index);
parse_non_option_arguments(argc, argv, argv_index);
}
}
else
{
last_status = status_syntax_error;
ConvertStr(pchar, last_error_token);
parse_non_option_arguments(argc, argv, argv_index);
}
}
void MainSpaceParser::ConvertStr(const char * src, std::wstring & dst)
void MainSpaceParser::convert_str(const char * src, std::wstring & dst)
{
if( use_utf8 )
if( should_use_utf8 )
{
PT::UTF8ToWide(src,dst);
PT::UTF8ToWide(src, dst);
}
else
{
@ -145,154 +176,219 @@ void MainSpaceParser::ConvertStr(const char * src, std::wstring & dst)
}
void MainSpaceParser::ParseSingleArgument(size_t argc, const char ** argv, size_t & argv_index)
void MainSpaceParser::convert_str(const char * src, size_t len, std::wstring & dst)
{
ConvertStr(argv[argv_index] + 1, wide_arg);
const wchar_t * wide_pchar = wide_arg.c_str();
if( should_use_utf8 )
{
PT::UTF8ToWide(src, len, dst);
}
else
{
dst.clear();
temp_list_val.clear();
bool was_option = false;
for(size_t i=0 ; i < len ; ++i)
dst += (wchar_t)(unsigned char)src[i];
}
}
void MainSpaceParser::convert_str(const std::wstring & src, Space & space)
{
if( should_use_utf8 )
{
space.set_empty_wstring();
space.value.value_wstring = src;
}
else
{
space.set_empty_string();
std::string & dst = space.value.value_string;
dst.clear();
for(size_t i=0 ; i < src.size() ; ++i)
dst += (char)src[i];
}
}
void MainSpaceParser::parse_short_option(size_t argc, const char ** argv, size_t & argv_index)
{
convert_str(argv[argv_index] + 1, options);
const wchar_t * options_pchar = options.c_str();
arguments.clear();
bool was_argument = false;
argv_index += 1;
for( ; *wide_pchar && !was_option ; ++wide_pchar )
for( ; *options_pchar && !was_argument && last_status == status_ok ; ++options_pchar )
{
temp_arg = *wide_pchar;
size_t opt_size = RequireOption(temp_arg);
option = *options_pchar;
size_t args_len = how_many_arguments_required(option);
if( opt_size > 0 )
if( args_len > 0 )
{
was_option = true;
was_argument = true;
if( *(wide_pchar+1) )
if( *(options_pchar+1) )
{
temp_val = wide_pchar + 1;
temp_list_val.push_back(temp_val);
opt_size -= 1;
// first argument is directly behind the option
argument = options_pchar + 1;
arguments.push_back(argument);
args_len -= 1;
}
for( ; opt_size > 0 && argv_index < argc ; --opt_size, ++argv_index)
{
ConvertStr(argv[argv_index], temp_val);
temp_list_val.push_back(temp_val);
parse_arguments(argc, argv, argv_index, args_len);
}
if( opt_size > 0 )
{
last_status = status_reading_eof;
last_error_token.clear();
}
}
temp_val.clear();
AddValueToItem(temp_arg, temp_val, temp_list_val);
add_option_to_space(option, arguments);
}
}
void MainSpaceParser::ParseMultiArgument(size_t argc, const char ** argv, size_t & argv_index)
void MainSpaceParser::parse_long_option(size_t argc, const char ** argv, size_t & argv_index)
{
ConvertStr(argv[argv_index] + 2, temp_arg);
const char * option_begin = argv[argv_index] + 2; // skip first two hyphens --
const char * option_end = option_begin;
bool is_equal_form = false; // is the option in the form with equal sign, such as: option=argument
while( *option_end != 0 && *option_end != '=' )
{
option_end += 1;
}
if( *option_end == '=' )
{
is_equal_form = true;
convert_str(option_begin, option_end - option_begin, option);
convert_str(option_end + 1, argument);
}
else
{
convert_str(option_begin, option);
}
argv_index += 1;
size_t args_len = how_many_arguments_required(option);
arguments.clear();
size_t opt_size = RequireOption(temp_arg);
temp_list_val.clear();
if( opt_size > 0 )
if( is_equal_form )
{
for( ; opt_size > 0 && argv_index < argc ; --opt_size, ++argv_index)
if( args_len == 0 )
{
ConvertStr(argv[argv_index], temp_val);
temp_list_val.push_back(temp_val);
if( !argument.empty() )
{
// report an error
last_status = status_argument_provided;
last_error_option = option;
}
if( opt_size > 0 )
}
else
if( args_len == 1 )
{
last_status = status_reading_eof;
last_error_token.clear();
// argument can be empty in such a case: option=
// we treat it as if the argument would not be provided
if( !argument.empty() )
{
arguments.push_back(argument);
args_len -= 1;
}
}
else
{
// args_len is > 1 but when using option=argument form
// we can provide only one argument
last_status = status_argument_not_provided;
last_error_option = option;
}
}
temp_val.clear();
AddValueToItem(temp_arg, temp_val, temp_list_val);
if( last_status == status_ok )
{
parse_arguments(argc, argv, argv_index, args_len);
add_option_to_space(option, arguments);
}
}
void MainSpaceParser::AddValueToItem(const std::wstring & name, const std::wstring & empty_value, const std::vector<std::wstring> & list)
void MainSpaceParser::parse_arguments(size_t argc, const char ** argv, size_t & argv_index, size_t args_len)
{
std::wstring * val = space->GetFirstValue(name);
if( !val )
for( ; args_len > 0 && argv_index < argc ; --args_len, ++argv_index)
{
if( list.empty() )
space->Add(name, empty_value);
else
if( list.size() == 1 )
space->Add(name, list[0]);
else
space->table[name] = list; // !! IMPROVE ME there'll be a new api in space
}
else
{
PT::Space::Table::iterator i = space->table.find(name);
PT::Space::Value * table_value;
if( i == space->table.end() )
{
table_value = &space->table[name];
table_value->push_back(*val);
//space->table_single.erase(name);
}
else
{
table_value = &i->second;
convert_str(argv[argv_index], argument);
arguments.push_back(argument);
}
if( list.empty() )
if( args_len > 0 )
{
table_value->push_back(empty_value);
}
else
{
for(const auto & list_item : list)
table_value->push_back(list_item);
}
last_status = status_argument_not_provided;
last_error_option = option;
}
}
size_t MainSpaceParser::RequireOption(const std::wstring & arg)
void MainSpaceParser::parse_non_option_arguments(size_t argc, const char ** argv, size_t & argv_index)
{
Space * table_with_args = new Space();
table_with_args->set_empty_table();
for( ; argv_index < argc ; ++argv_index)
{
convert_str(argv[argv_index], argument);
table_with_args->add(argument);
}
space->add(non_option_arguments_name, table_with_args);
}
void MainSpaceParser::add_option_to_space(const std::wstring & option, const std::vector<std::wstring> & arguments)
{
Space * option_table = space->get_object_field(option);
if( !option_table )
{
option_table = &space->add_empty_space(option);
}
if( !option_table->is_table())
{
option_table->set_empty_table();
}
Space * arguments_table = new Space();
arguments_table->set_empty_table();
for(const std::wstring & arg : arguments)
{
Space & space_arg = arguments_table->add_empty_space();
convert_str(arg, space_arg);
}
option_table->add(arguments_table);
}
size_t MainSpaceParser::how_many_arguments_required(const std::wstring & arg)
{
size_t res = 0;
if( options_space )
if( arguments_required_space && arguments_required_space->is_object() )
{
std::wstring * val = options_space->GetFirstValue(arg);
if( val )
{
/*
* IMPLEMENT ME
* add a converter to convert/inttostr.h
*
*/
long res_long = wcstol(val->c_str(), 0, 10);
long res_long = arguments_required_space->to_llong(arg, 0);
if( res_long < 0 )
res_long = 0;
res = (size_t)res_long;
//std::wcout << L"argument " << arg << L" needs " << res << L" options" << std::endl;
}
// argument 'arg' needs 'res' options
}
return res;
}
#endif
} // namespace

View File

@ -5,7 +5,7 @@
*/
/*
* Copyright (c) 2016, Tomasz Sowa
* Copyright (c) 2016-2021, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -46,13 +46,12 @@
namespace PT
{
#ifdef nonexisting_value
/*
a very little parser for main(int argc, char ** argv) parameters
look in sample/sample.cpp how to use the parser
*/
* a very little parser for parsing main(int argc, char ** argv) parameters
*
*
*/
class MainSpaceParser
{
public:
@ -60,42 +59,102 @@ public:
MainSpaceParser();
~MainSpaceParser();
/*
* status_ok - all argument have been parsed correctly
*
* status_argument_provided - an argument have been provided but was not requested
* this can be in situation when using long form with equal sign, such as: --option=argument
* and in 'options' space the option either was not set or have zero requested arguments
*
* status_argument_not_provided - an argument or arguments are required but were not provided
* this can be returned in two situations:
* 1. when using long form with equal sign, such as: --option=argument and in 'options' space
* you have requested more than one argument
* 2. when reading arguments and the input strings ended
*
*/
enum Status
{
status_ok = 0,
status_space_not_assigned = 1,
status_syntax_error = 2,
status_reading_eof = 3 /* CHANGE ME give a better name */
status_argument_provided = 1,
status_argument_not_provided = 2,
};
void SetSpace(Space & space);
Status Parse(int argc, const char ** argv);
void UTF8(bool utf8);
/*
* the name of a field in the output Struct space for non-option arguments (those after two hypens --)
* default: L"args"
* they will be set as a table of strings/wstrings
*
*/
void set_non_options_arguments_name(const wchar_t * name);
void set_non_options_arguments_name(const std::wstring & name);
/*
* parse parameters
* argc argv have the same meaning as in the main(int argc, const char ** argv) method
* the first argument from argv is usualy the name of the program and is skip by this parser
*
* return value: look at the description of the Status enum
*
*/
Status parse(int argc, const char ** argv, Space & out_space);
Status parse(int argc, const char ** argv, Space & out_space, const Space & options);
/*
* whether or not options arguments should be converted from utf8 char* strings to wide strings (std::wstring)
* default true
*
* if true all arguments in Space struct will be saved as std::wstring
* if false all arguments will be std::string (they are read as they are without checking
* whether correct utf8 characters are encountered)
*
* arguments are always held as std::wstring (in such a case is defined ObjectType in Space struct)
* when using use_utf8(false) characters will not be treated as an utf8 string but just all 8bit char bytes
* will be copied to std::wstring
*
*/
void use_utf8(bool utf8);
/*
* return the last option name which was incorrectly parsed
* or an empty string if status was equal to status_ok
*/
std::wstring & get_wrong_option();
std::wstring & GetErrorToken();
private:
Space * space;
Space * options_space;
std::wstring wide_arg, temp_arg, temp_val;
std::vector<std::wstring> temp_list_val;
bool use_utf8;
const Space * arguments_required_space;
std::wstring non_option_arguments_name;
std::wstring options, option, argument;
std::vector<std::wstring> arguments;
bool should_use_utf8;
Status last_status;
std::wstring last_error_token;
std::wstring last_error_option;
void ConvertStr(const char * src, std::wstring & dst);
void Parse(size_t argc, const char ** argv, size_t & argv_index);
void ParseSingleArgument(size_t argc, const char ** argv, size_t & argv_index);
void ParseMultiArgument(size_t argc, const char ** argv, size_t & argv_index);
size_t RequireOption(const std::wstring & arg);
void AddValueToItem(const std::wstring & name, const std::wstring & empty_value, const std::vector<std::wstring> & list);
void convert_str(const char * src, std::wstring & dst);
void convert_str(const char * src, size_t len, std::wstring & dst);
void convert_str(const std::wstring & src, Space & space);
Status parse(int argc, const char ** argv);
void parse(size_t argc, const char ** argv, size_t & argv_index);
void parse_short_option(size_t argc, const char ** argv, size_t & argv_index);
void parse_long_option(size_t argc, const char ** argv, size_t & argv_index);
void parse_arguments(size_t argc, const char ** argv, size_t & argv_index, size_t args_len);
void parse_non_option_arguments(size_t argc, const char ** argv, size_t & argv_index);
size_t how_many_arguments_required(const std::wstring & arg);
void add_option_to_space(const std::wstring & option, const std::vector<std::wstring> & arguments);
};
#endif
} // namespace