fixed: the parser didn't correctly treat operators for changing the base

(radix) -- operators '#' and '&', e.g.: '#sin(1)' was equal '0' -- there was a zero from '#' and then it was multipied by 'sin(1)' the parser didn't check whether Big::FromString() has actually read a proper value -- the method Big::FromString() didn't have something to report such a situation fixed: Big::FromString() when the base is 10, the method reads the scientific part only if such a part it correctly supplied, e.g: '1234e10', '1234e+10', '1234e-5' previous '1234e' was treated as: '1234e0' (now parsing stops on 'e' and the 'e' can be parsed by other parsers, e.g. the mathematical parser -- now in the parser would be: '1234e' = '1234 * e' = '3354,3597...' ) added: to Int::FromString(): parameter 'const char ** after_source = 0' if exists it's pointing at the end of the parsed string added: to UInt::FromString(), Int::FromString(), Big::FromString(): parameter 'bool * value_read = 0' - (if exists) tells whether something has actually been read (at least one digit) added: the parser checks itself for the operators for changing the base (operators '#' and '&') changed: in the parser: the form with operators '#' and '&' is as follows: [-|+][#|&]numeric_value previous was: [-|+][#|&][-|+]numeric_value removed: Big::FromString() this method doesn't longer recognize operators for changing the base ('#' and '&') changed: in the parser: the short form of multiplication has the same priority as the normal multiplication, e.g.: '2x^3' = 2 * (x^3) previous the priority was greater than powering priority previous: '2x^3' = (2*x) ^ 3 git-svn-id: svn://ttmath.org/publicrep/ttmath/trunk@113 e52654a7-88a9-db11-a3e9-0013d4bc506e
2009-03-28 17:40:36 +00:00 · 2009-03-28 17:40:36 +00:00 · c65857297b
parent f530635262
commit c65857297b
5 changed files with 196 additions and 88 deletions
--- a/28
+++ b/28
@ -25,6 +25,19 @@ Version 0.8.3 prerelease:
    * fixed:   in function DegToRad(const ValueType & x, ErrorCode * err = 0) it is better
               to make division first and then mutliplication -- the result is more
               accurate especially when x is: 90,180,270 or 360
+    * fixed:   the parser didn't correctly treat operators for changing the base
+               (radix) -- operators '#' and '&', e.g.:
+               '#sin(1)' was equal '0' -- there was a zero from '#' and then
+               it was multipied by 'sin(1)'
+               the parser didn't check whether Big::FromString() has actually
+               read a proper value -- the method Big::FromString() didn't have
+               something to report such a situation
+    * fixed:   Big::FromString() when the base is 10, the method reads the scientific
+               part only if such a part it correctly supplied, e.g:
+               '1234e10', '1234e+10', '1234e-5'
+               previous '1234e' was treated as: '1234e0' (now parsing stops on 'e' and
+               the 'e' can be parsed by other parsers, e.g. the mathematical
+               parser -- now in the parser would be: '1234e' = '1234 * e' = '3354,3597...' )
    * changed: UInt::FromString, added a parametr 'after_source'
               which is pointing at the end of the parsing string
    * changed: renamed: Big::PowUInt(UInt<pow_size> pow) -> Big::Pow(UInt<pow_size> pow)
@ -40,6 +53,14 @@ Version 0.8.3 prerelease:
               now they consist of 256 32bit words
               macro TTMATH_BUILTIN_VARIABLES_SIZE is equal: 256u on a 32bit platform and 128ul on a 64bit platform
    * changed: the asm code in ttmathuint.h and ttmathuint64.h has been completely rewritten (much faster now)               
+    * changed: in the parser: the form with operators '#' and '&' is as follows:
+               [-|+][#|&]numeric_value
+               previous was: [-|+][#|&][-|+]numeric_value
+    * changed: in the parser: the short form of multiplication has the same
+               priority as the normal multiplication, e.g.:
+               '2x^3' = 2 * (x^3)
+               previous the priority was greater than powering priority
+               previous: '2x^3' = (2*x) ^ 3
    * added:   UInt::GetBit(uint bit_index) - returning the state of the specific bit
    * added:   Big::operator=(double) and Big::Big(double)
    * added:   UInt::Pow(UInt<value_size> pow) and Int::Pow(Int<value_size> pow)
@ -55,6 +76,13 @@ Version 0.8.3 prerelease:
               asin, acos, atan, acot
    * added:   functions to the parser: gradtorad(grad), radtograd(rad), degtograd(deg),
               degtograd(d,m,s), gradtodeg(grad)
+    * added:   to Int::FromString(): parameter 'const char ** after_source = 0'
+               if exists it's pointing at the end of the parsed string
+    * added:   to UInt::FromString(), Int::FromString(), Big::FromString():
+               parameter 'bool * value_read = 0' - (if exists) tells
+               whether something has actually been read (at least one digit)
+    * removed: Big::FromString() this method doesn't longer recognize operators
+               for changing the base ('#' and '&')


 Version 0.8.2 (2008.06.18):
--- a/ttmath/ttmathbig.h
+++ b/ttmath/ttmathbig.h
@ -3225,39 +3225,48 @@ public:
 		a method for converting a string into its value

 		it returns 1 if the value will be too big -- we cannot pass it into the range
-		of our class Big<exp,man>
+		of our class Big<exp,man> (or if the base is incorrect)

 		that means only digits before the comma operator can make this value too big, 
 		all digits after the comma we can ignore

 		'source' - pointer to the string for parsing

-		if 'after_source' is set that when this method have finished its job
-		it set the pointer to the new first character after parsed value
+		if 'after_source' is set that when this method finishes
+		it sets the pointer to the new first character after parsed value
+
+		'value_read' - if the pointer is provided that means the value_read will be true
+		only when a value has been actually read, there can be situation where only such
+		a string '-' or '+' will be parsed -- 'after_source' will be different from 'source' but
+		no value has been read (there are no digits)
+		on other words if 'value_read' is true -- there is at least one digit in the string
 	*/
-	uint FromString(const char * source, uint base = 10, const char ** after_source = 0)
+	uint FromString(const char * source, uint base = 10, const char ** after_source = 0, bool * value_read = 0)
 	{
 	bool is_sign;
+	bool value_read_temp = false;

 		if( base<2 || base>16 )
 		{
 			if( after_source )
 				*after_source = source;

+			if( value_read )
+				*value_read = value_read_temp;
+
 			return 1;
 		}

 		SetZero();
-		FromString_TestNewBase( source, base );
 		FromString_TestSign( source, is_sign );

-		uint c = FromString_ReadPartBeforeComma( source, base );
+		uint c = FromString_ReadPartBeforeComma( source, base, value_read_temp );

 		if( FromString_TestCommaOperator(source) )
-			c += FromString_ReadPartAfterComma( source, base );
+			c += FromString_ReadPartAfterComma( source, base, value_read_temp );

-		if( base==10 && FromString_TestScientific(source) )
-			c += FromString_ReadPartScientific( source );
+		if( value_read_temp && base == 10 )
+			c += FromString_ReadScientificIfExists( source );

 		if( is_sign && !IsZero() )
 			ChangeSign();
@ -3265,6 +3274,9 @@ public:
 		if( after_source )
 			*after_source = source;

+		if( value_read )
+			*value_read = value_read_temp;
+
 	return (c==0)? 0 : 1;
 	}

@ -3273,30 +3285,6 @@ public:
 private:


-	/*!
-		we're testing whether a user wants to change the base
-
-		if there's a '#' character it means that the user wants the base to be 16,
-		if '&' the base will be 2
-	*/
-	void FromString_TestNewBase( const char * & source, uint & base )
-	{
-		UInt<man>::SkipWhiteCharacters(source);
-
-		if( *source == '#' )
-		{
-			base = 16;
-			++source;
-		}
-		else
-		if( *source == '&' )
-		{
-			base = 2;
-			++source;
-		}
-	}
-
-
 	/*!
 		we're testing whether the value is with the sign

@ -3342,7 +3330,7 @@ private:
 		this method reads the first part of a string
 		(before the comma operator)
 	*/
-	uint FromString_ReadPartBeforeComma( const char * & source, uint base )
+	uint FromString_ReadPartBeforeComma( const char * & source, uint base, bool & value_read )
 	{
 		sint character;
 		Big<exp, man> temp;
@ -3352,6 +3340,8 @@ private:

 		for( ; (character=UInt<man>::CharToDigit(*source, base)) != -1 ; ++source )
 		{
+			value_read = true;
+
 			temp = character;

 			if( Mul(base_) )
@ -3369,7 +3359,7 @@ private:
 		this method reads the second part of a string
 		(after the comma operator)
 	*/
-	uint FromString_ReadPartAfterComma( const char * & source, uint base )
+	uint FromString_ReadPartAfterComma( const char * & source, uint base, bool & value_read )
 	{
 	sint character;
 	uint c = 0, index = 1;
@ -3386,6 +3376,8 @@ private:

 		for( ; (character=UInt<man>::CharToDigit(*source, base)) != -1 ; ++source, ++index )
 		{
+			value_read = true;
+
 			part = character;

 			if( power.Mul( base_ ) )
@ -3421,6 +3413,29 @@ private:
 	}


+	/*!
+		this method checks whether there is a scientific part: [e|E][-|+]value
+
+		it is called when the base is 10 and some digits were read before
+	*/
+	int FromString_ReadScientificIfExists(const char * & source)
+	{
+	int c = 0;
+
+		bool scientific_read = false;
+		const char * before_scientific = source;
+
+		if( FromString_TestScientific(source) )
+			c += FromString_ReadPartScientific( source, scientific_read );
+
+		if( !scientific_read )
+			source = before_scientific;
+
+	return (c==0)? 0 : 1;
+	}
+
+
+
 	/*!
 		we're testing whether is there the character 'e'

@ -3445,21 +3460,24 @@ private:
 		this method reads the exponent (after 'e' character) when there's a scientific
 		format of value and only when we're using the base equals 10
 	*/
-	uint FromString_ReadPartScientific( const char * & source )
+	uint FromString_ReadPartScientific( const char * & source, bool & scientific_read )
 	{
 	uint c = 0;
 	Big<exp, man> new_exponent, temp;
 	bool was_sign = false;

 		FromString_TestSign( source, was_sign );
-		FromString_ReadPartScientific_ReadExponent( source, new_exponent );
+		c += FromString_ReadPartScientific_ReadExponent( source, new_exponent, scientific_read );

-		if( was_sign )
-			new_exponent.ChangeSign();
+		if( scientific_read )
+		{
+			if( was_sign )
+				new_exponent.ChangeSign();

-		temp = 10;
-		c += temp.PowInt( new_exponent );
-		c += Mul(temp);
+			temp = 10;
+			c += temp.Pow( new_exponent );
+			c += Mul(temp);
+		}

 	return (c==0)? 0 : 1;
 	}
@ -3469,7 +3487,7 @@ private:
 		this method reads the value of the extra exponent when scientific format is used
 		(only when base == 10)
 	*/
-	uint FromString_ReadPartScientific_ReadExponent( const char * & source, Big<exp, man> & new_exponent )
+	uint FromString_ReadPartScientific_ReadExponent( const char * & source, Big<exp, man> & new_exponent, bool & scientific_read )
 	{
 	sint character;
 	Big<exp, man> base, temp;
@ -3481,6 +3499,8 @@ private:

 		for( ; (character=UInt<man>::CharToDigit(*source, 10)) != -1 ; ++source )
 		{
+			scientific_read = true;
+
 			temp = character;

 			if( new_exponent.Mul(base) )
--- a/ttmath/ttmathint.h
+++ b/ttmath/ttmathint.h
@ -900,8 +900,12 @@ public:

 		existing first white characters will be ommited
 		(between '-' and a first digit can be white characters too)
+
+		after_source (if exists) is pointing at the end of the parsing string
+
+		value_read (if exists) tells whether something has actually been read (at least one digit)
 	*/
-	uint FromString(const char * s, uint b = 10)
+	uint FromString(const char * s, uint b = 10, const char ** after_source = 0, bool * value_read = 0)
 	{
 	bool is_sign = false;
 	
@ -918,7 +922,7 @@ public:
 			UInt<value_size>::SkipWhiteCharacters(++s);
 		}

-		if( UInt<value_size>::FromString(s,b) )
+		if( UInt<value_size>::FromString(s,b,after_source,value_read) )
 			return 1;

 		if( is_sign )
@ -929,12 +933,15 @@ public:

 			/*
 				the reference to mmin will be automatically converted to the reference
-				to a UInt type
+				to UInt type
 				(this value can be equal mmin -- look at a description in ChangeSign())
 			*/
 			if( UInt<value_size>::operator>( mmin ) )
 				return 1;

+			/*
+				if the value is equal mmin the method ChangeSign() does nothing (only returns 1 but we ignore it)
+			*/
 			ChangeSign();
 		}
 		else
--- a/ttmath/ttmathparser.h
+++ b/ttmath/ttmathparser.h
@ -5,7 +5,7 @@
 */

 /* 
- * Copyright (c) 2006-2008, Tomasz Sowa
+ * Copyright (c) 2006-2009, Tomasz Sowa
 * All rights reserved.
 * 
 * Redistribution and use in source and binary forms, with or without
@ -61,14 +61,14 @@ namespace ttmath
 /*! 
 	\brief Mathematical parser

-	let x will be an input string means an expression for converting:
+	let x will be an input string meaning an expression for converting:
 	
 	x = [+|-]Value[operator[+|-]Value][operator[+|-]Value]...
 	where:
 		an operator can be:
-			^ (pow)  (almost the heighest priority, look below at 'short mul')
+			^ (pow)   (the heighest priority)

-			* (mul) 
+			* (mul)   (or multiplication without an operator -- short mul)
 			/ (div)   (* and / have the same priority)

 			+ (add)
@ -86,14 +86,22 @@ namespace ttmath
 			|| (logical or) (the lowest priority)

 		short mul:
-		or if the second Value (Var below) is either a variable or function there cannot be 
-		an operator between them, e.g.
-	        [+|-]ValueVar is treated as [+|-]Value * Var and the multiplication
-	        has the greatest priority:  2^3m equals 2^(3*m)
-	
+ 		 if the second Value (Var below) is either a variable or function there might not be 
+		 an operator between them, e.g.
+	        "[+|-]Value Var" is treated as "[+|-]Value * Var" and the multiplication
+	        has the same priority as a normal multiplication:
+			4x       = 4 * x
+			2^3m     = (2^3)* m
+			6h^3     = 6 * (h^3)
+	        2sin(pi) = 2 * sin(pi)
+			etc.

-		and Value can be:
-			constant e.g. 100
+		Value can be:
+			constant e.g. 100, can be preceded by operators to change the base (radix): [#|&]
+			                   # - hex
+							   & - bin
+							   sample: #10  = 16
+							           &10  = 2
 			variable e.g. pi
 			another expression between brackets e.g (x)
 			function e.g. sin(x)
@ -112,7 +120,11 @@ namespace ttmath
 		                 for separating parameters
 	    "1 < 2"  (the result will be: 1)
 	    "4 < 3"  (the result will be: 0)
-		"2+x"  (of course if the variable 'x' is defined)
+		"2+x"    (of course if the variable 'x' is defined)
+		"4x+10"
+		"#20+10"     = 32 + 10 = 42
+		"10 ^ -&101" = 10 ^ -5 = 0.00001
+		"8 * -&10"   = 8 * -2  = -16
 		etc.

 	we can also use a semicolon for separating any 'x' input strings
@ -137,7 +149,7 @@ private:
 		pow (^)
 		and 'shortmul' used when there is no any operators between
 		a first parameter and a variable or function
-		(the 'shortmul' has the greatest priority e.g. '5^3m' equals '5^(3*m)' )
+		(the 'shortmul' has the same priority as the normal multiplication )
 */
 	class MatOperator
 	{
@ -182,6 +194,7 @@ private:
 				break;

 			case mul:
+			case shortmul:
 			case div:
 				priority = 12;
 				break;
@ -190,10 +203,6 @@ private:
 				priority = 14;
 				break;

-			case shortmul:
-				priority = 20;
-				break;
-
 			default:
 				Error( err_internal_error );
 				break;
@ -416,7 +425,6 @@ typedef std::map<std::string, pfunction_var> VariablesTable;
 VariablesTable variables_table;


-
 /*!
 	you can't calculate the factorial if the argument is greater than 'factorial_max'
 	default value is zero which means there are not any limitations
@ -433,8 +441,6 @@ static void Error(ErrorCode code)
 }


-
-
 /*!
 	this method skips the white character from the string

@ -447,7 +453,6 @@ void SkipWhiteCharacters()
 }


-
 /*!
 	an auxiliary method for RecurrenceParsingVariablesOrFunction(...)
 */
@ -477,6 +482,7 @@ void RecurrenceParsingVariablesOrFunction_AddName(bool variable, const std::stri
 		visited_functions.insert( name );
 }

+
 /*!
 	an auxiliary method for RecurrenceParsingVariablesOrFunction(...)
 */
@ -488,6 +494,7 @@ void RecurrenceParsingVariablesOrFunction_DeleteName(bool variable, const std::s
 		visited_functions.erase( name );
 }

+
 /*!
 	this method returns the value of a variable or function
 	by creating a new instance of the mathematical parser 
@ -1628,17 +1635,21 @@ return is_it_name_of_function;


 /*!
-	we're reading a numerical value directly from the string	
+	we're reading a numerical value directly from the string
 */
-void ReadValue(Item & result)
+void ReadValue(Item & result, int reading_base)
 {
 const char * new_stack_pointer;
+bool value_read;

-	int carry = result.value.FromString(pstring, base, &new_stack_pointer);
+	int carry = result.value.FromString(pstring, reading_base, &new_stack_pointer, &value_read);
 	pstring   = new_stack_pointer;

 	if( carry )
 		Error( err_overflow );
+
+	if( !value_read )
+		Error( err_unknown_character );
 }


@ -1667,6 +1678,24 @@ return c;
 }


+/*!
+	this method returns true if 'character' is a proper first digit for the value (or a comma -- can be first too)
+*/
+bool ValueStarts(int character, int base)
+{
+	if( character == TTMATH_COMMA_CHARACTER_1 )
+		return true;
+
+	if( TTMATH_COMMA_CHARACTER_2 != 0 && character == TTMATH_COMMA_CHARACTER_2 )
+		return true;
+
+	if( CharToDigit(character, base) != -1 )
+		return true;
+
+return false;
+}
+
+
 /*!
 	we're reading the item
  
@ -1726,19 +1755,33 @@ int  character;
 	return 2;
 	}
 	else
-	if( character=='#' || character=='&' ||
-		character==TTMATH_COMMA_CHARACTER_1 ||
-		(character==TTMATH_COMMA_CHARACTER_2 && TTMATH_COMMA_CHARACTER_2 != 0) ||
-		CharToDigit(character, base)!=-1 )
+	if( character == '#' )
 	{
-		/*
-			warning:
-			if we're using for example the base equal 16
-			we can find a first character like 'e' that is not e=2.71..
-			but the value 14, for this case we must use something like var::e for variables
-			(not implemented yet)
-		*/
-		ReadValue( result );
+		++pstring;
+		SkipWhiteCharacters();
+
+		// after '#' character we do not allow '-' or '+' (can be white characters)
+		if(	ValueStarts(*pstring, 16) )
+			ReadValue( result, 16 );
+		else
+			Error( err_unknown_character );
+	}
+	else
+	if( character == '&' )
+	{
+		++pstring;
+		SkipWhiteCharacters();
+
+		// after '&' character we do not allow '-' or '+' (can be white characters)
+		if(	ValueStarts(*pstring, 2) )
+			ReadValue( result, 2 );
+		else
+			Error( err_unknown_character );
+	}
+	else
+	if(	ValueStarts(character, base) )
+	{
+		ReadValue( result, base );
 	}
 	else
 	if( character>='a' && character<='z' )
@ -2041,9 +2084,9 @@ void TryRollingUpStack()
 */
 int ReadValueVariableOrFunctionAndPushItIntoStack(Item & temp)
 {
-int kod = ReadValueVariableOrFunction( temp );
+int code = ReadValueVariableOrFunction( temp );
 	
-	if( kod == 0 )
+	if( code == 0 )
 	{
 		if( stack_index < stack.size() )
 			stack[stack_index] = temp;
@ -2053,13 +2096,13 @@ int kod = ReadValueVariableOrFunction( temp );
 		++stack_index;
 	}

-	if( kod == 2 )
+	if( code == 2 )
 		// there was a final bracket, we didn't push it into the stack 
 		// (it'll be read by the 'ReadOperatorAndCheckFinalBracket' method next)
-		kod = 0;
+		code = 0;


-return kod;
+return code;
 }


--- a/ttmath/ttmathuint.h
+++ b/ttmath/ttmathuint.h
@ -3200,8 +3200,12 @@ public:
 		existing first white characters will be ommited

 		if the value from s is too large the rest digits will be skipped
+
+		after_source (if exists) is pointing at the end of the parsing string
+
+		value_read (if exists) tells whether something has actually been read (at least one digit)
 	*/
-	uint FromString(const char * s, uint b = 10, const char ** after_source = 0)
+	uint FromString(const char * s, uint b = 10, const char ** after_source = 0, bool * value_read = 0)
 	{
 	UInt<value_size> base( b );
 	UInt<value_size> temp;
@ -3215,12 +3219,18 @@ public:
 		if( after_source )
 			*after_source = s;

+		if( value_read )
+			*value_read = false;
+
 		if( b<2 || b>16 )
 			return 1;


 		for( ; (z=CharToDigit(*s, b)) != -1 ; ++s)
 		{
+			if( value_read )
+				*value_read = true;
+
 			if( c == 0 )
 			{
 				temp.table[0] = z;