From c65857297be67c211292b49cb319991fe4c00b63 Mon Sep 17 00:00:00 2001
From: Tomasz Sowa <t.sowa@ttmath.org>
Date: Sat, 28 Mar 2009 17:40:36 +0000
Subject: [PATCH] fixed: the parser didn't correctly treat operators for
 changing the base        (radix) -- operators '#' and '&', e.g.:       
 '#sin(1)' was equal '0' -- there was a zero from '#' and then        it was
 multipied by 'sin(1)'        the parser didn't check whether
 Big::FromString() has actually        read a proper value -- the method
 Big::FromString() didn't have        something to report such a situation
 fixed: Big::FromString() when the base is 10, the method reads the scientific
        part only if such a part it correctly supplied, e.g:        '1234e10',
 '1234e+10', '1234e-5'        previous '1234e' was treated as: '1234e0' (now
 parsing stops on 'e' and        the 'e' can be parsed by other parsers, e.g.
 the mathematical        parser -- now in the parser would be: '1234e' = '1234
 * e' = '3354,3597...' ) added: to Int::FromString(): parameter 'const char **
 after_source = 0'        if exists it's pointing at the end of the parsed
 string added: to UInt::FromString(), Int::FromString(), Big::FromString():   
     parameter 'bool * value_read = 0' - (if exists) tells        whether
 something has actually been read (at least one digit) added: the parser
 checks itself for the operators for changing the base        (operators '#'
 and '&') changed: in the parser: the form with operators '#' and '&' is as
 follows:        [-|+][#|&]numeric_value        previous was:
 [-|+][#|&][-|+]numeric_value removed: Big::FromString() this method doesn't
 longer recognize operators        for changing the base ('#' and '&')
 changed: in the parser: the short form of multiplication has the same       
 priority as the normal multiplication, e.g.:        '2x^3' = 2 * (x^3)       
 previous the priority was greater than powering priority        previous:
 '2x^3' = (2*x) ^ 3

git-svn-id: svn://ttmath.org/publicrep/ttmath/trunk@113 e52654a7-88a9-db11-a3e9-0013d4bc506e
---
 CHANGELOG             |  28 ++++++++++
 ttmath/ttmathbig.h    | 106 ++++++++++++++++++++---------------
 ttmath/ttmathint.h    |  13 ++++-
 ttmath/ttmathparser.h | 125 ++++++++++++++++++++++++++++--------------
 ttmath/ttmathuint.h   |  12 +++-
 5 files changed, 196 insertions(+), 88 deletions(-)

diff --git a/CHANGELOG b/CHANGELOG
index 2393111..88a6b33 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -25,6 +25,19 @@ Version 0.8.3 prerelease:
     * fixed:   in function DegToRad(const ValueType & x, ErrorCode * err = 0) it is better
                to make division first and then mutliplication -- the result is more
                accurate especially when x is: 90,180,270 or 360
+    * fixed:   the parser didn't correctly treat operators for changing the base
+               (radix) -- operators '#' and '&', e.g.:
+               '#sin(1)' was equal '0' -- there was a zero from '#' and then
+               it was multipied by 'sin(1)'
+               the parser didn't check whether Big::FromString() has actually
+               read a proper value -- the method Big::FromString() didn't have
+               something to report such a situation
+    * fixed:   Big::FromString() when the base is 10, the method reads the scientific
+               part only if such a part it correctly supplied, e.g:
+               '1234e10', '1234e+10', '1234e-5'
+               previous '1234e' was treated as: '1234e0' (now parsing stops on 'e' and
+               the 'e' can be parsed by other parsers, e.g. the mathematical
+               parser -- now in the parser would be: '1234e' = '1234 * e' = '3354,3597...' )
     * changed: UInt::FromString, added a parametr 'after_source'
                which is pointing at the end of the parsing string
     * changed: renamed: Big::PowUInt(UInt<pow_size> pow) -> Big::Pow(UInt<pow_size> pow)
@@ -40,6 +53,14 @@ Version 0.8.3 prerelease:
                now they consist of 256 32bit words
                macro TTMATH_BUILTIN_VARIABLES_SIZE is equal: 256u on a 32bit platform and 128ul on a 64bit platform
     * changed: the asm code in ttmathuint.h and ttmathuint64.h has been completely rewritten (much faster now)               
+    * changed: in the parser: the form with operators '#' and '&' is as follows:
+               [-|+][#|&]numeric_value
+               previous was: [-|+][#|&][-|+]numeric_value
+    * changed: in the parser: the short form of multiplication has the same
+               priority as the normal multiplication, e.g.:
+               '2x^3' = 2 * (x^3)
+               previous the priority was greater than powering priority
+               previous: '2x^3' = (2*x) ^ 3
     * added:   UInt::GetBit(uint bit_index) - returning the state of the specific bit
     * added:   Big::operator=(double) and Big::Big(double)
     * added:   UInt::Pow(UInt<value_size> pow) and Int::Pow(Int<value_size> pow)
@@ -55,6 +76,13 @@ Version 0.8.3 prerelease:
                asin, acos, atan, acot
     * added:   functions to the parser: gradtorad(grad), radtograd(rad), degtograd(deg),
                degtograd(d,m,s), gradtodeg(grad)
+    * added:   to Int::FromString(): parameter 'const char ** after_source = 0'
+               if exists it's pointing at the end of the parsed string
+    * added:   to UInt::FromString(), Int::FromString(), Big::FromString():
+               parameter 'bool * value_read = 0' - (if exists) tells
+               whether something has actually been read (at least one digit)
+    * removed: Big::FromString() this method doesn't longer recognize operators
+               for changing the base ('#' and '&')
 
 
 Version 0.8.2 (2008.06.18):
diff --git a/ttmath/ttmathbig.h b/ttmath/ttmathbig.h
index 7affbdb..debcf36 100644
--- a/ttmath/ttmathbig.h
+++ b/ttmath/ttmathbig.h
@@ -3225,39 +3225,48 @@ public:
 		a method for converting a string into its value
 
 		it returns 1 if the value will be too big -- we cannot pass it into the range
-		of our class Big<exp,man>
+		of our class Big<exp,man> (or if the base is incorrect)
 
 		that means only digits before the comma operator can make this value too big, 
 		all digits after the comma we can ignore
 
 		'source' - pointer to the string for parsing
 
-		if 'after_source' is set that when this method have finished its job
-		it set the pointer to the new first character after parsed value
+		if 'after_source' is set that when this method finishes
+		it sets the pointer to the new first character after parsed value
+
+		'value_read' - if the pointer is provided that means the value_read will be true
+		only when a value has been actually read, there can be situation where only such
+		a string '-' or '+' will be parsed -- 'after_source' will be different from 'source' but
+		no value has been read (there are no digits)
+		on other words if 'value_read' is true -- there is at least one digit in the string
 	*/
-	uint FromString(const char * source, uint base = 10, const char ** after_source = 0)
+	uint FromString(const char * source, uint base = 10, const char ** after_source = 0, bool * value_read = 0)
 	{
 	bool is_sign;
+	bool value_read_temp = false;
 
 		if( base<2 || base>16 )
 		{
 			if( after_source )
 				*after_source = source;
 
+			if( value_read )
+				*value_read = value_read_temp;
+
 			return 1;
 		}
 
 		SetZero();
-		FromString_TestNewBase( source, base );
 		FromString_TestSign( source, is_sign );
 
-		uint c = FromString_ReadPartBeforeComma( source, base );
+		uint c = FromString_ReadPartBeforeComma( source, base, value_read_temp );
 
 		if( FromString_TestCommaOperator(source) )
-			c += FromString_ReadPartAfterComma( source, base );
+			c += FromString_ReadPartAfterComma( source, base, value_read_temp );
 
-		if( base==10 && FromString_TestScientific(source) )
-			c += FromString_ReadPartScientific( source );
+		if( value_read_temp && base == 10 )
+			c += FromString_ReadScientificIfExists( source );
 
 		if( is_sign && !IsZero() )
 			ChangeSign();
@@ -3265,6 +3274,9 @@ public:
 		if( after_source )
 			*after_source = source;
 
+		if( value_read )
+			*value_read = value_read_temp;
+
 	return (c==0)? 0 : 1;
 	}
 
@@ -3273,30 +3285,6 @@ public:
 private:
 
 
-	/*!
-		we're testing whether a user wants to change the base
-
-		if there's a '#' character it means that the user wants the base to be 16,
-		if '&' the base will be 2
-	*/
-	void FromString_TestNewBase( const char * & source, uint & base )
-	{
-		UInt<man>::SkipWhiteCharacters(source);
-
-		if( *source == '#' )
-		{
-			base = 16;
-			++source;
-		}
-		else
-		if( *source == '&' )
-		{
-			base = 2;
-			++source;
-		}
-	}
-
-
 	/*!
 		we're testing whether the value is with the sign
 
@@ -3342,7 +3330,7 @@ private:
 		this method reads the first part of a string
 		(before the comma operator)
 	*/
-	uint FromString_ReadPartBeforeComma( const char * & source, uint base )
+	uint FromString_ReadPartBeforeComma( const char * & source, uint base, bool & value_read )
 	{
 		sint character;
 		Big<exp, man> temp;
@@ -3352,6 +3340,8 @@ private:
 
 		for( ; (character=UInt<man>::CharToDigit(*source, base)) != -1 ; ++source )
 		{
+			value_read = true;
+
 			temp = character;
 
 			if( Mul(base_) )
@@ -3369,7 +3359,7 @@ private:
 		this method reads the second part of a string
 		(after the comma operator)
 	*/
-	uint FromString_ReadPartAfterComma( const char * & source, uint base )
+	uint FromString_ReadPartAfterComma( const char * & source, uint base, bool & value_read )
 	{
 	sint character;
 	uint c = 0, index = 1;
@@ -3386,6 +3376,8 @@ private:
 
 		for( ; (character=UInt<man>::CharToDigit(*source, base)) != -1 ; ++source, ++index )
 		{
+			value_read = true;
+
 			part = character;
 
 			if( power.Mul( base_ ) )
@@ -3421,6 +3413,29 @@ private:
 	}
 
 
+	/*!
+		this method checks whether there is a scientific part: [e|E][-|+]value
+
+		it is called when the base is 10 and some digits were read before
+	*/
+	int FromString_ReadScientificIfExists(const char * & source)
+	{
+	int c = 0;
+
+		bool scientific_read = false;
+		const char * before_scientific = source;
+
+		if( FromString_TestScientific(source) )
+			c += FromString_ReadPartScientific( source, scientific_read );
+
+		if( !scientific_read )
+			source = before_scientific;
+
+	return (c==0)? 0 : 1;
+	}
+
+
+
 	/*!
 		we're testing whether is there the character 'e'
 
@@ -3445,21 +3460,24 @@ private:
 		this method reads the exponent (after 'e' character) when there's a scientific
 		format of value and only when we're using the base equals 10
 	*/
-	uint FromString_ReadPartScientific( const char * & source )
+	uint FromString_ReadPartScientific( const char * & source, bool & scientific_read )
 	{
 	uint c = 0;
 	Big<exp, man> new_exponent, temp;
 	bool was_sign = false;
 
 		FromString_TestSign( source, was_sign );
-		FromString_ReadPartScientific_ReadExponent( source, new_exponent );
+		c += FromString_ReadPartScientific_ReadExponent( source, new_exponent, scientific_read );
 
-		if( was_sign )
-			new_exponent.ChangeSign();
+		if( scientific_read )
+		{
+			if( was_sign )
+				new_exponent.ChangeSign();
 
-		temp = 10;
-		c += temp.PowInt( new_exponent );
-		c += Mul(temp);
+			temp = 10;
+			c += temp.Pow( new_exponent );
+			c += Mul(temp);
+		}
 
 	return (c==0)? 0 : 1;
 	}
@@ -3469,7 +3487,7 @@ private:
 		this method reads the value of the extra exponent when scientific format is used
 		(only when base == 10)
 	*/
-	uint FromString_ReadPartScientific_ReadExponent( const char * & source, Big<exp, man> & new_exponent )
+	uint FromString_ReadPartScientific_ReadExponent( const char * & source, Big<exp, man> & new_exponent, bool & scientific_read )
 	{
 	sint character;
 	Big<exp, man> base, temp;
@@ -3481,6 +3499,8 @@ private:
 
 		for( ; (character=UInt<man>::CharToDigit(*source, 10)) != -1 ; ++source )
 		{
+			scientific_read = true;
+
 			temp = character;
 
 			if( new_exponent.Mul(base) )
diff --git a/ttmath/ttmathint.h b/ttmath/ttmathint.h
index a5f263e..70cc1db 100644
--- a/ttmath/ttmathint.h
+++ b/ttmath/ttmathint.h
@@ -900,8 +900,12 @@ public:
 
 		existing first white characters will be ommited
 		(between '-' and a first digit can be white characters too)
+
+		after_source (if exists) is pointing at the end of the parsing string
+
+		value_read (if exists) tells whether something has actually been read (at least one digit)
 	*/
-	uint FromString(const char * s, uint b = 10)
+	uint FromString(const char * s, uint b = 10, const char ** after_source = 0, bool * value_read = 0)
 	{
 	bool is_sign = false;
 	
@@ -918,7 +922,7 @@ public:
 			UInt<value_size>::SkipWhiteCharacters(++s);
 		}
 
-		if( UInt<value_size>::FromString(s,b) )
+		if( UInt<value_size>::FromString(s,b,after_source,value_read) )
 			return 1;
 
 		if( is_sign )
@@ -929,12 +933,15 @@ public:
 
 			/*
 				the reference to mmin will be automatically converted to the reference
-				to a UInt type
+				to UInt type
 				(this value can be equal mmin -- look at a description in ChangeSign())
 			*/
 			if( UInt<value_size>::operator>( mmin ) )
 				return 1;
 
+			/*
+				if the value is equal mmin the method ChangeSign() does nothing (only returns 1 but we ignore it)
+			*/
 			ChangeSign();
 		}
 		else
diff --git a/ttmath/ttmathparser.h b/ttmath/ttmathparser.h
index faffc07..3f36fa7 100644
--- a/ttmath/ttmathparser.h
+++ b/ttmath/ttmathparser.h
@@ -5,7 +5,7 @@
  */
 
 /* 
- * Copyright (c) 2006-2008, Tomasz Sowa
+ * Copyright (c) 2006-2009, Tomasz Sowa
  * All rights reserved.
  * 
  * Redistribution and use in source and binary forms, with or without
@@ -61,14 +61,14 @@ namespace ttmath
 /*! 
 	\brief Mathematical parser
 
-	let x will be an input string means an expression for converting:
+	let x will be an input string meaning an expression for converting:
 	
 	x = [+|-]Value[operator[+|-]Value][operator[+|-]Value]...
 	where:
 		an operator can be:
-			^ (pow)  (almost the heighest priority, look below at 'short mul')
+			^ (pow)   (the heighest priority)
 
-			* (mul) 
+			* (mul)   (or multiplication without an operator -- short mul)
 			/ (div)   (* and / have the same priority)
 
 			+ (add)
@@ -86,14 +86,22 @@ namespace ttmath
 			|| (logical or) (the lowest priority)
 
 		short mul:
-		or if the second Value (Var below) is either a variable or function there cannot be 
-		an operator between them, e.g.
-	        [+|-]ValueVar is treated as [+|-]Value * Var and the multiplication
-	        has the greatest priority:  2^3m equals 2^(3*m)
-	
+ 		 if the second Value (Var below) is either a variable or function there might not be 
+		 an operator between them, e.g.
+	        "[+|-]Value Var" is treated as "[+|-]Value * Var" and the multiplication
+	        has the same priority as a normal multiplication:
+			4x       = 4 * x
+			2^3m     = (2^3)* m
+			6h^3     = 6 * (h^3)
+	        2sin(pi) = 2 * sin(pi)
+			etc.
 
-		and Value can be:
-			constant e.g. 100
+		Value can be:
+			constant e.g. 100, can be preceded by operators to change the base (radix): [#|&]
+			                   # - hex
+							   & - bin
+							   sample: #10  = 16
+							           &10  = 2
 			variable e.g. pi
 			another expression between brackets e.g (x)
 			function e.g. sin(x)
@@ -112,7 +120,11 @@ namespace ttmath
 		                 for separating parameters
 	    "1 < 2"  (the result will be: 1)
 	    "4 < 3"  (the result will be: 0)
-		"2+x"  (of course if the variable 'x' is defined)
+		"2+x"    (of course if the variable 'x' is defined)
+		"4x+10"
+		"#20+10"     = 32 + 10 = 42
+		"10 ^ -&101" = 10 ^ -5 = 0.00001
+		"8 * -&10"   = 8 * -2  = -16
 		etc.
 
 	we can also use a semicolon for separating any 'x' input strings
@@ -137,7 +149,7 @@ private:
 		pow (^)
 		and 'shortmul' used when there is no any operators between
 		a first parameter and a variable or function
-		(the 'shortmul' has the greatest priority e.g. '5^3m' equals '5^(3*m)' )
+		(the 'shortmul' has the same priority as the normal multiplication )
 */
 	class MatOperator
 	{
@@ -182,6 +194,7 @@ private:
 				break;
 
 			case mul:
+			case shortmul:
 			case div:
 				priority = 12;
 				break;
@@ -190,10 +203,6 @@ private:
 				priority = 14;
 				break;
 
-			case shortmul:
-				priority = 20;
-				break;
-
 			default:
 				Error( err_internal_error );
 				break;
@@ -416,7 +425,6 @@ typedef std::map<std::string, pfunction_var> VariablesTable;
 VariablesTable variables_table;
 
 
-
 /*!
 	you can't calculate the factorial if the argument is greater than 'factorial_max'
 	default value is zero which means there are not any limitations
@@ -433,8 +441,6 @@ static void Error(ErrorCode code)
 }
 
 
-
-
 /*!
 	this method skips the white character from the string
 
@@ -447,7 +453,6 @@ void SkipWhiteCharacters()
 }
 
 
-
 /*!
 	an auxiliary method for RecurrenceParsingVariablesOrFunction(...)
 */
@@ -477,6 +482,7 @@ void RecurrenceParsingVariablesOrFunction_AddName(bool variable, const std::stri
 		visited_functions.insert( name );
 }
 
+
 /*!
 	an auxiliary method for RecurrenceParsingVariablesOrFunction(...)
 */
@@ -488,6 +494,7 @@ void RecurrenceParsingVariablesOrFunction_DeleteName(bool variable, const std::s
 		visited_functions.erase( name );
 }
 
+
 /*!
 	this method returns the value of a variable or function
 	by creating a new instance of the mathematical parser 
@@ -1628,17 +1635,21 @@ return is_it_name_of_function;
 
 
 /*!
-	we're reading a numerical value directly from the string	
+	we're reading a numerical value directly from the string
 */
-void ReadValue(Item & result)
+void ReadValue(Item & result, int reading_base)
 {
 const char * new_stack_pointer;
+bool value_read;
 
-	int carry = result.value.FromString(pstring, base, &new_stack_pointer);
+	int carry = result.value.FromString(pstring, reading_base, &new_stack_pointer, &value_read);
 	pstring   = new_stack_pointer;
 
 	if( carry )
 		Error( err_overflow );
+
+	if( !value_read )
+		Error( err_unknown_character );
 }
 
 
@@ -1667,6 +1678,24 @@ return c;
 }
 
 
+/*!
+	this method returns true if 'character' is a proper first digit for the value (or a comma -- can be first too)
+*/
+bool ValueStarts(int character, int base)
+{
+	if( character == TTMATH_COMMA_CHARACTER_1 )
+		return true;
+
+	if( TTMATH_COMMA_CHARACTER_2 != 0 && character == TTMATH_COMMA_CHARACTER_2 )
+		return true;
+
+	if( CharToDigit(character, base) != -1 )
+		return true;
+
+return false;
+}
+
+
 /*!
 	we're reading the item
   
@@ -1726,19 +1755,33 @@ int  character;
 	return 2;
 	}
 	else
-	if( character=='#' || character=='&' ||
-		character==TTMATH_COMMA_CHARACTER_1 ||
-		(character==TTMATH_COMMA_CHARACTER_2 && TTMATH_COMMA_CHARACTER_2 != 0) ||
-		CharToDigit(character, base)!=-1 )
+	if( character == '#' )
 	{
-		/*
-			warning:
-			if we're using for example the base equal 16
-			we can find a first character like 'e' that is not e=2.71..
-			but the value 14, for this case we must use something like var::e for variables
-			(not implemented yet)
-		*/
-		ReadValue( result );
+		++pstring;
+		SkipWhiteCharacters();
+
+		// after '#' character we do not allow '-' or '+' (can be white characters)
+		if(	ValueStarts(*pstring, 16) )
+			ReadValue( result, 16 );
+		else
+			Error( err_unknown_character );
+	}
+	else
+	if( character == '&' )
+	{
+		++pstring;
+		SkipWhiteCharacters();
+
+		// after '&' character we do not allow '-' or '+' (can be white characters)
+		if(	ValueStarts(*pstring, 2) )
+			ReadValue( result, 2 );
+		else
+			Error( err_unknown_character );
+	}
+	else
+	if(	ValueStarts(character, base) )
+	{
+		ReadValue( result, base );
 	}
 	else
 	if( character>='a' && character<='z' )
@@ -2041,9 +2084,9 @@ void TryRollingUpStack()
 */
 int ReadValueVariableOrFunctionAndPushItIntoStack(Item & temp)
 {
-int kod = ReadValueVariableOrFunction( temp );
+int code = ReadValueVariableOrFunction( temp );
 	
-	if( kod == 0 )
+	if( code == 0 )
 	{
 		if( stack_index < stack.size() )
 			stack[stack_index] = temp;
@@ -2053,13 +2096,13 @@ int kod = ReadValueVariableOrFunction( temp );
 		++stack_index;
 	}
 
-	if( kod == 2 )
+	if( code == 2 )
 		// there was a final bracket, we didn't push it into the stack 
 		// (it'll be read by the 'ReadOperatorAndCheckFinalBracket' method next)
-		kod = 0;
+		code = 0;
 
 
-return kod;
+return code;
 }
 
 
diff --git a/ttmath/ttmathuint.h b/ttmath/ttmathuint.h
index 764acbd..8a17ecd 100644
--- a/ttmath/ttmathuint.h
+++ b/ttmath/ttmathuint.h
@@ -3200,8 +3200,12 @@ public:
 		existing first white characters will be ommited
 
 		if the value from s is too large the rest digits will be skipped
+
+		after_source (if exists) is pointing at the end of the parsing string
+
+		value_read (if exists) tells whether something has actually been read (at least one digit)
 	*/
-	uint FromString(const char * s, uint b = 10, const char ** after_source = 0)
+	uint FromString(const char * s, uint b = 10, const char ** after_source = 0, bool * value_read = 0)
 	{
 	UInt<value_size> base( b );
 	UInt<value_size> temp;
@@ -3215,12 +3219,18 @@ public:
 		if( after_source )
 			*after_source = s;
 
+		if( value_read )
+			*value_read = false;
+
 		if( b<2 || b>16 )
 			return 1;
 
 
 		for( ; (z=CharToDigit(*s, b)) != -1 ; ++s)
 		{
+			if( value_read )
+				*value_read = true;
+
 			if( c == 0 )
 			{
 				temp.table[0] = z;