From c65857297be67c211292b49cb319991fe4c00b63 Mon Sep 17 00:00:00 2001 From: Tomasz Sowa Date: Sat, 28 Mar 2009 17:40:36 +0000 Subject: [PATCH] fixed: the parser didn't correctly treat operators for changing the base (radix) -- operators '#' and '&', e.g.: '#sin(1)' was equal '0' -- there was a zero from '#' and then it was multipied by 'sin(1)' the parser didn't check whether Big::FromString() has actually read a proper value -- the method Big::FromString() didn't have something to report such a situation fixed: Big::FromString() when the base is 10, the method reads the scientific part only if such a part it correctly supplied, e.g: '1234e10', '1234e+10', '1234e-5' previous '1234e' was treated as: '1234e0' (now parsing stops on 'e' and the 'e' can be parsed by other parsers, e.g. the mathematical parser -- now in the parser would be: '1234e' = '1234 * e' = '3354,3597...' ) added: to Int::FromString(): parameter 'const char ** after_source = 0' if exists it's pointing at the end of the parsed string added: to UInt::FromString(), Int::FromString(), Big::FromString(): parameter 'bool * value_read = 0' - (if exists) tells whether something has actually been read (at least one digit) added: the parser checks itself for the operators for changing the base (operators '#' and '&') changed: in the parser: the form with operators '#' and '&' is as follows: [-|+][#|&]numeric_value previous was: [-|+][#|&][-|+]numeric_value removed: Big::FromString() this method doesn't longer recognize operators for changing the base ('#' and '&') changed: in the parser: the short form of multiplication has the same priority as the normal multiplication, e.g.: '2x^3' = 2 * (x^3) previous the priority was greater than powering priority previous: '2x^3' = (2*x) ^ 3 git-svn-id: svn://ttmath.org/publicrep/ttmath/trunk@113 e52654a7-88a9-db11-a3e9-0013d4bc506e --- CHANGELOG | 28 ++++++++++ ttmath/ttmathbig.h | 106 ++++++++++++++++++++--------------- ttmath/ttmathint.h | 13 ++++- ttmath/ttmathparser.h | 125 ++++++++++++++++++++++++++++-------------- ttmath/ttmathuint.h | 12 +++- 5 files changed, 196 insertions(+), 88 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 2393111..88a6b33 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -25,6 +25,19 @@ Version 0.8.3 prerelease: * fixed: in function DegToRad(const ValueType & x, ErrorCode * err = 0) it is better to make division first and then mutliplication -- the result is more accurate especially when x is: 90,180,270 or 360 + * fixed: the parser didn't correctly treat operators for changing the base + (radix) -- operators '#' and '&', e.g.: + '#sin(1)' was equal '0' -- there was a zero from '#' and then + it was multipied by 'sin(1)' + the parser didn't check whether Big::FromString() has actually + read a proper value -- the method Big::FromString() didn't have + something to report such a situation + * fixed: Big::FromString() when the base is 10, the method reads the scientific + part only if such a part it correctly supplied, e.g: + '1234e10', '1234e+10', '1234e-5' + previous '1234e' was treated as: '1234e0' (now parsing stops on 'e' and + the 'e' can be parsed by other parsers, e.g. the mathematical + parser -- now in the parser would be: '1234e' = '1234 * e' = '3354,3597...' ) * changed: UInt::FromString, added a parametr 'after_source' which is pointing at the end of the parsing string * changed: renamed: Big::PowUInt(UInt pow) -> Big::Pow(UInt pow) @@ -40,6 +53,14 @@ Version 0.8.3 prerelease: now they consist of 256 32bit words macro TTMATH_BUILTIN_VARIABLES_SIZE is equal: 256u on a 32bit platform and 128ul on a 64bit platform * changed: the asm code in ttmathuint.h and ttmathuint64.h has been completely rewritten (much faster now) + * changed: in the parser: the form with operators '#' and '&' is as follows: + [-|+][#|&]numeric_value + previous was: [-|+][#|&][-|+]numeric_value + * changed: in the parser: the short form of multiplication has the same + priority as the normal multiplication, e.g.: + '2x^3' = 2 * (x^3) + previous the priority was greater than powering priority + previous: '2x^3' = (2*x) ^ 3 * added: UInt::GetBit(uint bit_index) - returning the state of the specific bit * added: Big::operator=(double) and Big::Big(double) * added: UInt::Pow(UInt pow) and Int::Pow(Int pow) @@ -55,6 +76,13 @@ Version 0.8.3 prerelease: asin, acos, atan, acot * added: functions to the parser: gradtorad(grad), radtograd(rad), degtograd(deg), degtograd(d,m,s), gradtodeg(grad) + * added: to Int::FromString(): parameter 'const char ** after_source = 0' + if exists it's pointing at the end of the parsed string + * added: to UInt::FromString(), Int::FromString(), Big::FromString(): + parameter 'bool * value_read = 0' - (if exists) tells + whether something has actually been read (at least one digit) + * removed: Big::FromString() this method doesn't longer recognize operators + for changing the base ('#' and '&') Version 0.8.2 (2008.06.18): diff --git a/ttmath/ttmathbig.h b/ttmath/ttmathbig.h index 7affbdb..debcf36 100644 --- a/ttmath/ttmathbig.h +++ b/ttmath/ttmathbig.h @@ -3225,39 +3225,48 @@ public: a method for converting a string into its value it returns 1 if the value will be too big -- we cannot pass it into the range - of our class Big + of our class Big (or if the base is incorrect) that means only digits before the comma operator can make this value too big, all digits after the comma we can ignore 'source' - pointer to the string for parsing - if 'after_source' is set that when this method have finished its job - it set the pointer to the new first character after parsed value + if 'after_source' is set that when this method finishes + it sets the pointer to the new first character after parsed value + + 'value_read' - if the pointer is provided that means the value_read will be true + only when a value has been actually read, there can be situation where only such + a string '-' or '+' will be parsed -- 'after_source' will be different from 'source' but + no value has been read (there are no digits) + on other words if 'value_read' is true -- there is at least one digit in the string */ - uint FromString(const char * source, uint base = 10, const char ** after_source = 0) + uint FromString(const char * source, uint base = 10, const char ** after_source = 0, bool * value_read = 0) { bool is_sign; + bool value_read_temp = false; if( base<2 || base>16 ) { if( after_source ) *after_source = source; + if( value_read ) + *value_read = value_read_temp; + return 1; } SetZero(); - FromString_TestNewBase( source, base ); FromString_TestSign( source, is_sign ); - uint c = FromString_ReadPartBeforeComma( source, base ); + uint c = FromString_ReadPartBeforeComma( source, base, value_read_temp ); if( FromString_TestCommaOperator(source) ) - c += FromString_ReadPartAfterComma( source, base ); + c += FromString_ReadPartAfterComma( source, base, value_read_temp ); - if( base==10 && FromString_TestScientific(source) ) - c += FromString_ReadPartScientific( source ); + if( value_read_temp && base == 10 ) + c += FromString_ReadScientificIfExists( source ); if( is_sign && !IsZero() ) ChangeSign(); @@ -3265,6 +3274,9 @@ public: if( after_source ) *after_source = source; + if( value_read ) + *value_read = value_read_temp; + return (c==0)? 0 : 1; } @@ -3273,30 +3285,6 @@ public: private: - /*! - we're testing whether a user wants to change the base - - if there's a '#' character it means that the user wants the base to be 16, - if '&' the base will be 2 - */ - void FromString_TestNewBase( const char * & source, uint & base ) - { - UInt::SkipWhiteCharacters(source); - - if( *source == '#' ) - { - base = 16; - ++source; - } - else - if( *source == '&' ) - { - base = 2; - ++source; - } - } - - /*! we're testing whether the value is with the sign @@ -3342,7 +3330,7 @@ private: this method reads the first part of a string (before the comma operator) */ - uint FromString_ReadPartBeforeComma( const char * & source, uint base ) + uint FromString_ReadPartBeforeComma( const char * & source, uint base, bool & value_read ) { sint character; Big temp; @@ -3352,6 +3340,8 @@ private: for( ; (character=UInt::CharToDigit(*source, base)) != -1 ; ++source ) { + value_read = true; + temp = character; if( Mul(base_) ) @@ -3369,7 +3359,7 @@ private: this method reads the second part of a string (after the comma operator) */ - uint FromString_ReadPartAfterComma( const char * & source, uint base ) + uint FromString_ReadPartAfterComma( const char * & source, uint base, bool & value_read ) { sint character; uint c = 0, index = 1; @@ -3386,6 +3376,8 @@ private: for( ; (character=UInt::CharToDigit(*source, base)) != -1 ; ++source, ++index ) { + value_read = true; + part = character; if( power.Mul( base_ ) ) @@ -3421,6 +3413,29 @@ private: } + /*! + this method checks whether there is a scientific part: [e|E][-|+]value + + it is called when the base is 10 and some digits were read before + */ + int FromString_ReadScientificIfExists(const char * & source) + { + int c = 0; + + bool scientific_read = false; + const char * before_scientific = source; + + if( FromString_TestScientific(source) ) + c += FromString_ReadPartScientific( source, scientific_read ); + + if( !scientific_read ) + source = before_scientific; + + return (c==0)? 0 : 1; + } + + + /*! we're testing whether is there the character 'e' @@ -3445,21 +3460,24 @@ private: this method reads the exponent (after 'e' character) when there's a scientific format of value and only when we're using the base equals 10 */ - uint FromString_ReadPartScientific( const char * & source ) + uint FromString_ReadPartScientific( const char * & source, bool & scientific_read ) { uint c = 0; Big new_exponent, temp; bool was_sign = false; FromString_TestSign( source, was_sign ); - FromString_ReadPartScientific_ReadExponent( source, new_exponent ); + c += FromString_ReadPartScientific_ReadExponent( source, new_exponent, scientific_read ); - if( was_sign ) - new_exponent.ChangeSign(); + if( scientific_read ) + { + if( was_sign ) + new_exponent.ChangeSign(); - temp = 10; - c += temp.PowInt( new_exponent ); - c += Mul(temp); + temp = 10; + c += temp.Pow( new_exponent ); + c += Mul(temp); + } return (c==0)? 0 : 1; } @@ -3469,7 +3487,7 @@ private: this method reads the value of the extra exponent when scientific format is used (only when base == 10) */ - uint FromString_ReadPartScientific_ReadExponent( const char * & source, Big & new_exponent ) + uint FromString_ReadPartScientific_ReadExponent( const char * & source, Big & new_exponent, bool & scientific_read ) { sint character; Big base, temp; @@ -3481,6 +3499,8 @@ private: for( ; (character=UInt::CharToDigit(*source, 10)) != -1 ; ++source ) { + scientific_read = true; + temp = character; if( new_exponent.Mul(base) ) diff --git a/ttmath/ttmathint.h b/ttmath/ttmathint.h index a5f263e..70cc1db 100644 --- a/ttmath/ttmathint.h +++ b/ttmath/ttmathint.h @@ -900,8 +900,12 @@ public: existing first white characters will be ommited (between '-' and a first digit can be white characters too) + + after_source (if exists) is pointing at the end of the parsing string + + value_read (if exists) tells whether something has actually been read (at least one digit) */ - uint FromString(const char * s, uint b = 10) + uint FromString(const char * s, uint b = 10, const char ** after_source = 0, bool * value_read = 0) { bool is_sign = false; @@ -918,7 +922,7 @@ public: UInt::SkipWhiteCharacters(++s); } - if( UInt::FromString(s,b) ) + if( UInt::FromString(s,b,after_source,value_read) ) return 1; if( is_sign ) @@ -929,12 +933,15 @@ public: /* the reference to mmin will be automatically converted to the reference - to a UInt type + to UInt type (this value can be equal mmin -- look at a description in ChangeSign()) */ if( UInt::operator>( mmin ) ) return 1; + /* + if the value is equal mmin the method ChangeSign() does nothing (only returns 1 but we ignore it) + */ ChangeSign(); } else diff --git a/ttmath/ttmathparser.h b/ttmath/ttmathparser.h index faffc07..3f36fa7 100644 --- a/ttmath/ttmathparser.h +++ b/ttmath/ttmathparser.h @@ -5,7 +5,7 @@ */ /* - * Copyright (c) 2006-2008, Tomasz Sowa + * Copyright (c) 2006-2009, Tomasz Sowa * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -61,14 +61,14 @@ namespace ttmath /*! \brief Mathematical parser - let x will be an input string means an expression for converting: + let x will be an input string meaning an expression for converting: x = [+|-]Value[operator[+|-]Value][operator[+|-]Value]... where: an operator can be: - ^ (pow) (almost the heighest priority, look below at 'short mul') + ^ (pow) (the heighest priority) - * (mul) + * (mul) (or multiplication without an operator -- short mul) / (div) (* and / have the same priority) + (add) @@ -86,14 +86,22 @@ namespace ttmath || (logical or) (the lowest priority) short mul: - or if the second Value (Var below) is either a variable or function there cannot be - an operator between them, e.g. - [+|-]ValueVar is treated as [+|-]Value * Var and the multiplication - has the greatest priority: 2^3m equals 2^(3*m) - + if the second Value (Var below) is either a variable or function there might not be + an operator between them, e.g. + "[+|-]Value Var" is treated as "[+|-]Value * Var" and the multiplication + has the same priority as a normal multiplication: + 4x = 4 * x + 2^3m = (2^3)* m + 6h^3 = 6 * (h^3) + 2sin(pi) = 2 * sin(pi) + etc. - and Value can be: - constant e.g. 100 + Value can be: + constant e.g. 100, can be preceded by operators to change the base (radix): [#|&] + # - hex + & - bin + sample: #10 = 16 + &10 = 2 variable e.g. pi another expression between brackets e.g (x) function e.g. sin(x) @@ -112,7 +120,11 @@ namespace ttmath for separating parameters "1 < 2" (the result will be: 1) "4 < 3" (the result will be: 0) - "2+x" (of course if the variable 'x' is defined) + "2+x" (of course if the variable 'x' is defined) + "4x+10" + "#20+10" = 32 + 10 = 42 + "10 ^ -&101" = 10 ^ -5 = 0.00001 + "8 * -&10" = 8 * -2 = -16 etc. we can also use a semicolon for separating any 'x' input strings @@ -137,7 +149,7 @@ private: pow (^) and 'shortmul' used when there is no any operators between a first parameter and a variable or function - (the 'shortmul' has the greatest priority e.g. '5^3m' equals '5^(3*m)' ) + (the 'shortmul' has the same priority as the normal multiplication ) */ class MatOperator { @@ -182,6 +194,7 @@ private: break; case mul: + case shortmul: case div: priority = 12; break; @@ -190,10 +203,6 @@ private: priority = 14; break; - case shortmul: - priority = 20; - break; - default: Error( err_internal_error ); break; @@ -416,7 +425,6 @@ typedef std::map VariablesTable; VariablesTable variables_table; - /*! you can't calculate the factorial if the argument is greater than 'factorial_max' default value is zero which means there are not any limitations @@ -433,8 +441,6 @@ static void Error(ErrorCode code) } - - /*! this method skips the white character from the string @@ -447,7 +453,6 @@ void SkipWhiteCharacters() } - /*! an auxiliary method for RecurrenceParsingVariablesOrFunction(...) */ @@ -477,6 +482,7 @@ void RecurrenceParsingVariablesOrFunction_AddName(bool variable, const std::stri visited_functions.insert( name ); } + /*! an auxiliary method for RecurrenceParsingVariablesOrFunction(...) */ @@ -488,6 +494,7 @@ void RecurrenceParsingVariablesOrFunction_DeleteName(bool variable, const std::s visited_functions.erase( name ); } + /*! this method returns the value of a variable or function by creating a new instance of the mathematical parser @@ -1628,17 +1635,21 @@ return is_it_name_of_function; /*! - we're reading a numerical value directly from the string + we're reading a numerical value directly from the string */ -void ReadValue(Item & result) +void ReadValue(Item & result, int reading_base) { const char * new_stack_pointer; +bool value_read; - int carry = result.value.FromString(pstring, base, &new_stack_pointer); + int carry = result.value.FromString(pstring, reading_base, &new_stack_pointer, &value_read); pstring = new_stack_pointer; if( carry ) Error( err_overflow ); + + if( !value_read ) + Error( err_unknown_character ); } @@ -1667,6 +1678,24 @@ return c; } +/*! + this method returns true if 'character' is a proper first digit for the value (or a comma -- can be first too) +*/ +bool ValueStarts(int character, int base) +{ + if( character == TTMATH_COMMA_CHARACTER_1 ) + return true; + + if( TTMATH_COMMA_CHARACTER_2 != 0 && character == TTMATH_COMMA_CHARACTER_2 ) + return true; + + if( CharToDigit(character, base) != -1 ) + return true; + +return false; +} + + /*! we're reading the item @@ -1726,19 +1755,33 @@ int character; return 2; } else - if( character=='#' || character=='&' || - character==TTMATH_COMMA_CHARACTER_1 || - (character==TTMATH_COMMA_CHARACTER_2 && TTMATH_COMMA_CHARACTER_2 != 0) || - CharToDigit(character, base)!=-1 ) + if( character == '#' ) { - /* - warning: - if we're using for example the base equal 16 - we can find a first character like 'e' that is not e=2.71.. - but the value 14, for this case we must use something like var::e for variables - (not implemented yet) - */ - ReadValue( result ); + ++pstring; + SkipWhiteCharacters(); + + // after '#' character we do not allow '-' or '+' (can be white characters) + if( ValueStarts(*pstring, 16) ) + ReadValue( result, 16 ); + else + Error( err_unknown_character ); + } + else + if( character == '&' ) + { + ++pstring; + SkipWhiteCharacters(); + + // after '&' character we do not allow '-' or '+' (can be white characters) + if( ValueStarts(*pstring, 2) ) + ReadValue( result, 2 ); + else + Error( err_unknown_character ); + } + else + if( ValueStarts(character, base) ) + { + ReadValue( result, base ); } else if( character>='a' && character<='z' ) @@ -2041,9 +2084,9 @@ void TryRollingUpStack() */ int ReadValueVariableOrFunctionAndPushItIntoStack(Item & temp) { -int kod = ReadValueVariableOrFunction( temp ); +int code = ReadValueVariableOrFunction( temp ); - if( kod == 0 ) + if( code == 0 ) { if( stack_index < stack.size() ) stack[stack_index] = temp; @@ -2053,13 +2096,13 @@ int kod = ReadValueVariableOrFunction( temp ); ++stack_index; } - if( kod == 2 ) + if( code == 2 ) // there was a final bracket, we didn't push it into the stack // (it'll be read by the 'ReadOperatorAndCheckFinalBracket' method next) - kod = 0; + code = 0; -return kod; +return code; } diff --git a/ttmath/ttmathuint.h b/ttmath/ttmathuint.h index 764acbd..8a17ecd 100644 --- a/ttmath/ttmathuint.h +++ b/ttmath/ttmathuint.h @@ -3200,8 +3200,12 @@ public: existing first white characters will be ommited if the value from s is too large the rest digits will be skipped + + after_source (if exists) is pointing at the end of the parsing string + + value_read (if exists) tells whether something has actually been read (at least one digit) */ - uint FromString(const char * s, uint b = 10, const char ** after_source = 0) + uint FromString(const char * s, uint b = 10, const char ** after_source = 0, bool * value_read = 0) { UInt base( b ); UInt temp; @@ -3215,12 +3219,18 @@ public: if( after_source ) *after_source = s; + if( value_read ) + *value_read = false; + if( b<2 || b>16 ) return 1; for( ; (z=CharToDigit(*s, b)) != -1 ; ++s) { + if( value_read ) + *value_read = true; + if( c == 0 ) { temp.table[0] = z;