Merged against the current original ttmath trunk

git-svn-id: svn://ttmath.org/publicrep/ttmath/branches/chk@170 e52654a7-88a9-db11-a3e9-0013d4bc506e
2009-06-25 11:07:55 +00:00 · 2009-06-25 11:07:55 +00:00 · de64608eba
parent be8913866a c70a947c07
commit de64608eba
8 changed files with 1396 additions and 346 deletions
--- a/29
+++ b/29
@ -1,4 +1,4 @@
-Version 0.8.5 prerelease (2009.05.15):
+Version 0.8.5 (2009.06.16):
    * fixed:   Big::Mod(x) didn't correctly return a carry
               and the result was sometimes very big (even greater than x)
    * fixed:   global function Mod(x) didn't set an ErrorCode object
@ -7,7 +7,7 @@ Version 0.8.5 prerelease (2009.05.15):
    * changed: function Sin(x) to Sin(x, ErrorCode * err=0)
               when x was very big the function returns zero
               now it sets ErrorCode object to err_overflow
-               and the result is undefined
+               and the result has a NaN flag set
               the same is to Cos() function
    * changed: PrepareSin(x) is using Big::Mod() now when reducing 2PI period
               should be a little accurate especially on a very big 'x'
@ -29,6 +29,31 @@ Version 0.8.5 prerelease (2009.05.15):
               uint SubVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result)
               three forms: asm x86, asm x86_64, no-asm
               those methods are used by the Karatsuba multiplication algorithm
    * added:   to Big<> class: support for NaN flag (Not a Number)
               bool Big::IsNan() - returns true if the NaN flag is set
               void Big::SetNan() - sets the NaN flag
               The NaN flag is set by default after creating an object:
                 Big<1, 2> a;    // NaN is set (it means the object has not a valid number)
                 std::cout << a; // cout gives "NaN"
                 a = 123;        // now NaN is not set
                 std::cout << a; // cout gives "123"
               The NaN is set if there was a carry during calculations
                 a.Mul(very_big_value); // a will have a NaN set
               The NaN is set if an argument is NaN too
                 b.SetNan();
                 a.Add(b);  // a will have NaN because b has NaN too
               If you try to do something on a NaN object, the result is a NaN too
                 a.SetNan();
                 a.Add(2);  // a is still a NaN 
               The NaN is set if you use incorrect arguments
                 a.Ln(-10); // a will have the NaN flag
               The only way to clear the NaN flag is to assign a correct value or other correct object,
               supposing 'a' has NaN flag, to remove the flag you can either:
                 a = 10;
                 a.FromInt(30);
                 a.SetOne(); 
                 a.FromBig(other_object_without_nan);
                 etc.
 Version 0.8.4 (2009.05.08):
--- a/ttmath/ttmath.h
+++ b/ttmath/ttmath.h
@ -1,4 +1,3 @@
 /*
 * This file is a part of TTMath Bignum Library
 * and is distributed under the (new) BSD licence.
@ -99,6 +98,14 @@ namespace ttmath
 	template<class ValueType>
 	ValueType Round(const ValueType & x, ErrorCode * err = 0)
 	{
 		if( x.IsNan() )
 		{
 			if( err )
 				*err = err_improper_argument;
 		return x; // NaN
 		}
 		ValueType result( x );
 		uint c = result.Round();
@ -124,6 +131,14 @@ namespace ttmath
 	template<class ValueType>
 	ValueType Ceil(const ValueType & x, ErrorCode * err = 0)
 	{
 		if( x.IsNan() )
 		{
 			if( err )
 				*err = err_improper_argument;
 		return x; // NaN
 		}
 		ValueType result(x);
 		uint c = 0;
@ -163,6 +178,14 @@ namespace ttmath
 	template<class ValueType>
 	ValueType Floor(const ValueType & x, ErrorCode * err = 0)
 	{
 		if( x.IsNan() )
 		{
 			if( err )
 				*err = err_improper_argument;
 		return x; // NaN
 		}
 		ValueType result(x);
 		uint c = 0;
@ -203,8 +226,15 @@ namespace ttmath
 	template<class ValueType>
 	ValueType Ln(const ValueType & x, ErrorCode * err = 0)
 	{
-	ValueType result;
+		if( x.IsNan() )
 		{
 			if( err )
 				*err = err_improper_argument;
 		return x; // NaN
 		}
 	ValueType result;
 		uint state = result.Ln(x);
 		if( err )
@ -237,8 +267,15 @@ namespace ttmath
 	template<class ValueType>
 	ValueType Log(const ValueType & x, const ValueType & base, ErrorCode * err = 0)
 	{
-	ValueType result;
+		if( x.IsNan() || base.IsNan() )
 		{
 			if( err )
 				*err = err_improper_argument;
 		return ValueType(); // default NaN
 		}
 	ValueType result;
 		uint state = result.Log(x, base);
 		if( err )
@ -271,8 +308,15 @@ namespace ttmath
 	template<class ValueType>
 	ValueType Exp(const ValueType & x, ErrorCode * err = 0)
 	{
-	ValueType result;
+		if( x.IsNan() )
 		{
 			if( err )
 				*err = err_improper_argument;
 		return x; // NaN
 		}
 	ValueType result;
 		uint c = result.Exp(x);
 		if( err )
@ -468,6 +512,14 @@ namespace ttmath
 	ValueType one, result;
 	bool change_sign;	
 		if( x.IsNan() )
 		{
 		if( err )
 				*err = err_improper_argument;
 		return result; // NaN is set by default
 		}
 		if( err )
 			*err = err_ok;
@ -479,7 +531,7 @@ namespace ttmath
 			// result has NaN flag set by default
 			if( err )
-				*err = err_overflow; // maybe another error code?
+				*err = err_overflow; // maybe another error code? err_improper_argument?
 		return result; // NaN is set by default
 		}
@ -511,6 +563,14 @@ namespace ttmath
 	template<class ValueType>
 	ValueType Cos(ValueType x, ErrorCode * err = 0)
 	{
 		if( x.IsNan() )
 		{
 			if( err )
 				*err = err_improper_argument;
 		return x; // NaN
 		}
 		ValueType pi05;
 		pi05.Set05Pi();
@ -783,6 +843,14 @@ namespace ttmath
 		one.SetOne();
 		bool change_sign = false;
 		if( x.IsNan() )
 		{
 			if( err )
 				*err = err_improper_argument;
 		return result; // NaN is set by default
 		}
 		if( x.GreaterWithoutSignThan(one) )
 		{
 			if( err )
@ -1005,6 +1073,9 @@ namespace ttmath
 		one.SetOne();
 		bool change_sign = false;
 		if( x.IsNan() )
 			return result; // NaN is set by default
 		// if x is negative we're using the formula:
 		// atan(-x) = -atan(x)
 		if( x.IsSign() )
@ -1085,6 +1156,14 @@ namespace ttmath
 	template<class ValueType>
 	ValueType Sinh(const ValueType & x, ErrorCode * err = 0)
 	{
 		if( x.IsNan() )
 		{
 			if( err )
 				*err = err_improper_argument;
 		return x; // NaN
 		}
 		ValueType ex, emx;
 		uint c = 0;
@ -1109,6 +1188,14 @@ namespace ttmath
 	template<class ValueType>
 	ValueType Cosh(const ValueType & x, ErrorCode * err = 0)
 	{
 		if( x.IsNan() )
 		{
 			if( err )
 				*err = err_improper_argument;
 		return x; // NaN
 		}
 		ValueType ex, emx;
 		uint c = 0;
@ -1133,6 +1220,14 @@ namespace ttmath
 	template<class ValueType>
 	ValueType Tanh(const ValueType & x, ErrorCode * err = 0)
 	{
 		if( x.IsNan() )
 		{
 			if( err )
 				*err = err_improper_argument;
 		return x; // NaN
 		}
 		ValueType ex, emx, nominator, denominator;
 		uint c = 0;
@ -1173,6 +1268,14 @@ namespace ttmath
 	template<class ValueType>
 	ValueType Coth(const ValueType & x, ErrorCode * err = 0)
 	{
 		if( x.IsNan() )
 		{
 			if( err )
 				*err = err_improper_argument;
 		return x; // NaN
 		}
 		if( x.IsZero() )
 		{
 			if( err )
@ -1230,6 +1333,14 @@ namespace ttmath
 	template<class ValueType>
 	ValueType ASinh(const ValueType & x, ErrorCode * err = 0)
 	{
 		if( x.IsNan() )
 		{
 			if( err )
 				*err = err_improper_argument;
 		return x; // NaN
 		}
 		ValueType xx(x), one, result;
 		uint c = 0;
 		one.SetOne();
@ -1258,6 +1369,14 @@ namespace ttmath
 	template<class ValueType>
 	ValueType ACosh(const ValueType & x, ErrorCode * err = 0)
 	{
 		if( x.IsNan() )
 		{
 			if( err )
 				*err = err_improper_argument;
 		return x; // NaN
 		}
 		ValueType xx(x), one, result;
 		uint c = 0;
 		one.SetOne();
@ -1299,6 +1418,14 @@ namespace ttmath
 	template<class ValueType>
 	ValueType ATanh(const ValueType & x, ErrorCode * err = 0)
 	{
 		if( x.IsNan() )
 		{
 			if( err )
 				*err = err_improper_argument;
 		return x; // NaN
 		}
 		ValueType nominator(x), denominator, one, result;
 		uint c = 0;
 		one.SetOne();
@ -1344,6 +1471,14 @@ namespace ttmath
 	template<class ValueType>
 	ValueType ACoth(const ValueType & x, ErrorCode * err = 0)
 	{
 		if( x.IsNan() )
 		{
 			if( err )
 				*err = err_improper_argument;
 		return x; // NaN
 		}
 		ValueType nominator(x), denominator(x), one, result;
 		uint c = 0;
 		one.SetOne();
@ -1402,6 +1537,14 @@ namespace ttmath
 	ValueType result, temp;
 	uint c = 0;
 		if( x.IsNan() )
 		{
 			if( err )
 				*err = err_improper_argument;
 		return result; // NaN is set by default
 		}
 		result = x;
 		// it is better to make division first and then multiplication
@ -1430,6 +1573,14 @@ namespace ttmath
 	ValueType result, delimiter;
 	uint c = 0;
 		if( x.IsNan() )
 		{
 			if( err )
 				*err = err_improper_argument;
 		return result; // NaN is set by default
 		}
 		result = 180;
 		c += result.Mul(x);
@ -1467,7 +1618,7 @@ namespace ttmath
 	ValueType delimiter, multipler;
 	uint c = 0;
-		if( m.IsSign() || s.IsSign() )
+		if( d.IsNan() || m.IsNan() || s.IsNan() || m.IsSign() || s.IsSign() )
 		{
 			if( err )
 				*err = err_improper_argument;
@ -1521,6 +1672,14 @@ namespace ttmath
 	ValueType result, temp;
 	uint c = 0;
 		if( x.IsNan() )
 		{
 			if( err )
 				*err = err_improper_argument;
 		return result; // NaN is set by default
 		}
 		result = x;
 		// it is better to make division first and then multiplication
@ -1549,6 +1708,14 @@ namespace ttmath
 	ValueType result, delimiter;
 	uint c = 0;
 		if( x.IsNan() )
 		{
 			if( err )
 				*err = err_improper_argument;
 		return result; // NaN is set by default
 		}
 		result = 200;
 		c += result.Mul(x);
@ -1573,6 +1740,14 @@ namespace ttmath
 	ValueType result, temp;
 	uint c = 0;
 		if( x.IsNan() )
 		{
 			if( err )
 				*err = err_improper_argument;
 		return result; // NaN is set by default
 		}
 		result = x;
 		temp = 200;
@ -1615,6 +1790,14 @@ namespace ttmath
 	ValueType result, temp;
 	uint c = 0;
 		if( x.IsNan() )
 		{
 			if( err )
 				*err = err_improper_argument;
 		return result; // NaN is set by default
 		}
 		result = x;
 		temp = 180;
@ -1648,7 +1831,7 @@ namespace ttmath
 	template<class ValueType>
 	ValueType Sqrt(ValueType x, ErrorCode * err = 0)
 	{
-		if( x.IsSign() )
+		if( x.IsNan() || x.IsSign() )
 		{
 			if( err )
 				*err = err_improper_argument;
@ -1730,7 +1913,7 @@ namespace ttmath
 		template<class ValueType>
-		bool RootCheckIndexOne(ValueType & x, const ValueType & index, ErrorCode * err)
+		bool RootCheckIndexOne(const ValueType & index, ErrorCode * err)
 		{
 			ValueType one;
 			one.SetOne();
@ -1772,11 +1955,12 @@ namespace ttmath
 		template<class ValueType>
-		bool RootCheckXZero(ValueType & x, const ValueType & index, ErrorCode * err)
+		bool RootCheckXZero(ValueType & x, ErrorCode * err)
 		{
 			if( x.IsZero() )
 			{
 				// root(0;index) is zero (if index!=0)
 				// RootCheckIndexZero() must be called beforehand
 				x.SetZero();
 				if( err )
@ -1843,11 +2027,19 @@ namespace ttmath
 	{
 		using namespace auxiliaryfunctions;
 		if( x.IsNan() || index.IsNan() )
 		{
 			if( err )
 				*err = err_improper_argument;
 		return ValueType(); // NaN is set by default
 		}
 		if( RootCheckIndexSign(x, index, err) ) return x;
 		if( RootCheckIndexZero(x, index, err) ) return x;
-		if( RootCheckIndexOne (x, index, err) ) return x;
+		if( RootCheckIndexOne (   index, err) ) return x;
 		if( RootCheckIndexFrac(x, index, err) ) return x;
-		if( RootCheckXZero(x, index, err) ) return x;
+		if( RootCheckXZero    (x,        err) ) return x;
 		// index integer and index!=0
 		// x!=0
@ -1898,14 +2090,17 @@ namespace ttmath
 		while( !carry && multipler<maxvalue  )
 		{
-			// !! the test here we don't have to make in all iterations (performance)
+			if( stop && (multipler & 127)==0 ) // it means 'stop && (multipler % 128)==0'
-			if( stop && stop->WasStopSignal() )
+			{
 				// after each 128 iterations we make a test
 				if( stop->WasStopSignal() )
 				{
 				if( err )
 					*err = err_interrupt;
 			return 2;
 			}
 			}
 			++multipler;
 			carry += result.MulUInt(multipler);
@ -1928,20 +2123,25 @@ namespace ttmath
 		one.SetOne();
 		uint carry = 0;
 		uint iter = 1; // only for testing the stop object
 		while( !carry && multipler < x )
 		{
-			// !! the test here we don't have to make in all iterations (performance)
+			if( stop && (iter & 31)==0 ) // it means 'stop && (iter % 32)==0'
-			if( stop && stop->WasStopSignal() )
+			{
 				// after each 32 iterations we make a test
 				if( stop->WasStopSignal() )
 				{
 				if( err )
 					*err = err_interrupt;
 			return 2;
 			}
 			}
 			carry += multipler.Add(one);
 			carry += result.Mul(multipler);
 			++iter;
 		}
 		if( err )
@ -1968,18 +2168,16 @@ namespace ttmath
 	static History<ValueType> history;
 	ValueType result;
-		result.SetOne();
+		if( x.IsNan() || x.IsSign() )
 		if( x.IsSign() )
 		{
 			if( err )
 				*err = err_improper_argument;
-			result.SetNan();
+		return result; // NaN set by default
 		return result;
 		}
 		result.SetOne();
 		if( !x.exponent.IsSign() && !x.exponent.IsZero() )
 		{
 			// when x.exponent>0 there's no sense to calculate the formula
@ -2065,6 +2263,14 @@ namespace ttmath
 	template<class ValueType>
 	ValueType Mod(ValueType a, const ValueType & b, ErrorCode * err = 0)
 	{
 		if( a.IsNan() || b.IsNan() )
 		{
 			if( err )
 				*err = err_improper_argument;
 		return ValueType(); // NaN is set by default
 		}
 		uint c = a.Mod(b);
 		if( err )
@ -2084,4 +2290,11 @@ namespace ttmath
 */
 #include "ttmathparser.h"
 #ifdef _MSC_VER
 #pragma warning( default: 4127 )
 //warning C4127: conditional expression is constant
 #endif
 #endif
--- a/ttmath/ttmathbig.h
+++ b/ttmath/ttmathbig.h
@ -143,9 +143,8 @@ public:
 			return 0;
 		uint comp = mantissa.CompensationToLeft();
 		uint c    = exponent.Sub( comp );
-	return CheckCarry(c);
+	return exponent.Sub( comp );
 	}
@ -540,6 +539,7 @@ public:
 		/*
 			we only have to test the mantissa
 			also we don't check the NaN flag
 			(maybe this method should return false if there is NaN flag set?)
 		*/
 		return mantissa.IsZero();
 	}
@ -587,6 +587,7 @@ public:
 	*/
 	void Sgn()
 	{
 		// we have to check the NaN flag, because the next SetOne() method would clear it
 		if( IsNan() )
 			return;
@ -622,6 +623,7 @@ public:
 	/*!
 		this method changes the sign
 		when there is a value of zero then the sign is not changed
 			e.g.
 			-1 -> 1
@ -629,6 +631,8 @@ public:
 	*/
 	void ChangeSign()
 	{
 		// we don't have to check the NaN flag here
 		if( info & TTMATH_BIG_SIGN )
 		{
 			info &= ~TTMATH_BIG_SIGN;
@ -708,7 +712,14 @@ public:
 			// there shouldn't be a carry here because
 			// (1) (2) guarantee that the mantissa of this
 			// is greater than or equal to the mantissa of the ss2
-			TTMATH_VERIFY( mantissa.Sub(ss2.mantissa) >= 0 )
+			
 			#ifdef TTMATH_DEBUG
 			// this is to get rid of a warning saying that c_temp is not used
 			uint c_temp = /* mantissa.Sub(ss2.mantissa); */
 			#endif
 			mantissa.Sub(ss2.mantissa);
 			TTMATH_ASSERT( c_temp == 0 )
 		}
 		c += Standardizing();
@ -1393,19 +1404,17 @@ public:
 		denominator.SetOne();
 		denominator_i.SetOne();
 		// every 'step_test' times we make a test
 		const uint step_test = 5;
 		uint i;
 		old_value = *this;
-		// we begin from 1 in order to not testing at the beginning
+		// we begin from 1 in order to not test at the beginning
 	#ifdef TTMATH_CONSTANTSGENERATOR
 		for(i=1 ; true ; ++i)
 	#else
 		for(i=1 ; i<=TTMATH_ARITHMETIC_MAX_LOOP ; ++i)
 	#endif
 		{
-			bool testing = ((i % step_test) == 0);
+			bool testing = ((i & 3) == 0); // it means '(i % 4) == 0'
 			next_part = numerator;
@ -1418,13 +1427,15 @@ public:
 			// there shouldn't be a carry here
 			Add( next_part );
-			if( testing && old_value==*this )
+			if( testing )
 			{
 				if( old_value == *this )
 				// we've added next few parts of the formula but the result
 				// is still the same then we break the loop
 				break;
 			else
 				old_value = *this;
-
+			}
 			// we set the denominator and the numerator for a next part of the formula
 			if( denominator_i.Add(one) )
@ -1559,20 +1570,17 @@ public:
 		SetZero();
 		old_value = *this;
 		// every 'step_test' times we make a test
 		const uint step_test = 5;
 		uint i;
 	#ifdef TTMATH_CONSTANTSGENERATOR
 		for(i=1 ; true ; ++i)
 	#else
-		// we begin from 1 in order to not testing at the beginning
+		// we begin from 1 in order to not test at the beginning
 		for(i=1 ; i<=TTMATH_ARITHMETIC_MAX_LOOP ; ++i)
 	#endif
 		{
-			bool testing = ((i % step_test) == 0);
+			bool testing = ((i & 3) == 0); // it means '(i % 4) == 0'
 			next_part = x1;
@ -1585,12 +1593,15 @@ public:
 			// there shouldn't be a carry here
 			Add(next_part);
-			if( testing && old_value == *this )
+			if( testing )
 			{
 				if( old_value == *this )
 				// we've added next (step_test) parts of the formula but the result
 				// is still the same then we break the loop
 				break;
 			else
 				old_value = *this;
 			}
 			if( x1.Mul(x2) )
 				// if there is a carry here the result we return as good
@ -2021,9 +2032,10 @@ public:
 			// If E=2047 and F is zero and S is 1, then V=-Infinity
 			// If E=2047 and F is zero and S is 0, then V=Infinity
-			// at the moment we do not support NaN, -Infinity and +Infinity
+			// we do not support -Infinity and +Infinity
 			// we assume that there is always NaN 
-			SetZero();
+			SetNan();
 		}
 		else
 		if( e > 0 )
@ -2134,9 +2146,10 @@ public:
 			// If E=2047 and F is zero and S is 1, then V=-Infinity
 			// If E=2047 and F is zero and S is 0, then V=Infinity
-			// at the moment we do not support NaN, -Infinity and +Infinity
+			// we do not support -Infinity and +Infinity
 			// we assume that there is always NaN 
-			SetZero();
+			SetNan();
 		}
 		else
 		if( e > 0 )
@ -2226,12 +2239,19 @@ public:
 			return 0;
 		}
 		if( IsNan() )
 		{
 			result = ToDouble_SetDouble( false, 2047, 0, false, true);
 		return 0;
 		}
 		sint e_correction = sint(man*TTMATH_BITS_PER_UINT) - 1;
 		if( exponent >= 1024 - e_correction )
 		{
 			// +/- infinity
-			result = ToDouble_SetDouble( IsSign(), 2047, 0, true);
+			result = ToDouble_SetDouble( 0, 2047, 0, true);
 		return 1;
 		}
@ -2266,7 +2286,7 @@ private:
 #ifdef TTMATH_PLATFORM32
 	// 32bit platforms
-	double ToDouble_SetDouble(bool is_sign, uint e, sint move, bool infinity = false) const
+	double ToDouble_SetDouble(bool is_sign, uint e, sint move, bool infinity = false, bool nan = false) const
 	{
 		union 
 		{
@ -2281,6 +2301,12 @@ private:
 		temp.u[1] |= (e << 20) & 0x7FF00000u;
 		if( nan )
 		{
 			temp.u[0] |= 1;
 			return temp.d;
 		}
 		if( infinity )
 			return temp.d;
@ -2303,7 +2329,7 @@ private:
 #else
 	// 64bit platforms
-	double ToDouble_SetDouble(bool is_sign, uint e, sint move, bool infinity = false) const
+	double ToDouble_SetDouble(bool is_sign, uint e, sint move, bool infinity = false, bool nan = false) const
 	{
 		union 
 		{
@ -2318,6 +2344,12 @@ private:
 		temp.u |= (e << 52) & 0x7FF0000000000000ul;
 		if( nan )
 		{
 			temp.u |= 1;
 			return temp.d;
 		}
 		if( infinity )
 			return temp.d;
@ -2600,7 +2632,12 @@ public:
 	*/
 	Big()
 	{
-		SetNan();
+		info = TTMATH_BIG_NAN;
 		/*
 			we're directly setting 'info' (instead of calling SetNan())
 			in order to get rid of a warning saying that 'info' is uninitialized
 		*/
 	}
@ -2682,7 +2719,8 @@ public:
 		output:
 			return value:
 			0 - ok and 'result' will be an object of type tstr_t which holds the value
-			1 - if there was a carry 
+			1 - if there was a carry (shoudn't be in a normal situation - if is that means there
 			    is somewhere an error in the library)
 	*/
 	uint ToString(	tstr_t & result,
 					uint base                  = 10,
@ -3100,7 +3138,7 @@ private:
 			else
 				was_carry = false;
-			new_man[i] = UInt<man>::DigitToChar( digit );
+			new_man[i] = static_cast<char>( UInt<man>::DigitToChar(digit) );
 		}
 		if( i<0 && was_carry )
@ -3826,6 +3864,25 @@ public:
 	return false;
 	}
 	bool IsNearZero() const
 	{
 		// we should check the mantissas beforehand because sometimes we can have
 		// a mantissa set to zero but in the exponent something another value
 		// (maybe we've forgotten about calling CorrectZero() ?)
 		if( mantissa.IsZero() )
 			{
 			return true;
 			}
 		UInt<man>	m(mantissa);
 		m.Rcr(man*3); // pi * thumb rule...
 		return(m.IsZero());
 	return false;
 	}
 	bool operator<(const Big<exp,man> & ss2) const
 	{
@ -3857,6 +3914,14 @@ public:
 	}
 	bool operator^=(const Big<exp,man> & ss2) const
 	{
 		if( IsSign() != ss2.IsSign() )
 			return false;
 	return AboutEqualWithoutSign( ss2 );
 	}
 	bool operator>(const Big<exp,man> & ss2) const
--- a/ttmath/ttmathparser.h
+++ b/ttmath/ttmathparser.h
@ -1,5 +1,5 @@
 /*
- * This file is a part of TTMath Mathematical Library
+ * This file is a part of TTMath Bignum Library
 * and is distributed under the (new) BSD licence.
 * Author: Tomasz Sowa <t.sowa@slimaczek.pl>
 */
@ -973,7 +973,7 @@ void Not(int sindex, int amount_of_args, ValueType & result)
 void DegToRad(int sindex, int amount_of_args, ValueType & result)
 {
-	ErrorCode err;
+	ErrorCode err = err_ok;
 	if( amount_of_args == 1 )
 	{
@ -1052,7 +1052,7 @@ void RadToGrad(int sindex, int amount_of_args, ValueType & result)
 void DegToGrad(int sindex, int amount_of_args, ValueType & result)
 {
-	ErrorCode err;
+	ErrorCode err = err_ok;
 	if( amount_of_args == 1 )
 	{
@ -1555,7 +1555,7 @@ int character;
 	do
 	{
-		result   += character;
+		result   += static_cast<char>( character );
 		character = * ++pstring;
 	}
 	while(	(character>='a' && character<='z') ||
--- a/ttmath/ttmathtypes.h
+++ b/ttmath/ttmathtypes.h
@ -64,8 +64,8 @@
 */
 #define TTMATH_MAJOR_VER		0
 #define TTMATH_MINOR_VER		8
-#define TTMATH_REVISION_VER		4
+#define TTMATH_REVISION_VER		5
-#define TTMATH_PRERELEASE_VER	1
+#define TTMATH_PRERELEASE_VER	0
 /*!
@ -232,6 +232,16 @@ namespace ttmath
 /*!
 	this is a limit when calculating Karatsuba multiplication
 	if the size of a vector is smaller than TTMATH_USE_KARATSUBA_MULTIPLICATION_FROM_SIZE
 	the Karatsuba algorithm will use standard schoolbook multiplication
 */
 #ifdef __GNUC__
 #define TTMATH_USE_KARATSUBA_MULTIPLICATION_FROM_SIZE 3
 #else
 #define TTMATH_USE_KARATSUBA_MULTIPLICATION_FROM_SIZE 5
 #endif
 namespace ttmath
 {
--- a/ttmath/ttmathuint.h
+++ b/ttmath/ttmathuint.h
@ -40,6 +40,7 @@
 #ifndef headerfilettmathuint
 #define headerfilettmathuint
 /*!
 	\file ttmathuint.h
    \brief template class UInt<uint>
@ -48,6 +49,7 @@
 #include <iostream>
 #include <iomanip>
 #include "ttmathtypes.h"
 #if defined(_MSC_VER)
@ -842,8 +844,10 @@ public:
 	/*!
 		the multiplication 'this' = 'this' * ss2
 		algorithm: 100 - means automatically choose the fastest algorithm
 	*/
-	uint Mul(const UInt<value_size> & ss2, uint algorithm = 2)
+	uint Mul(const UInt<value_size> & ss2, uint algorithm = 100)
 	{
 		switch( algorithm )
 		{
@ -851,8 +855,14 @@ public:
 			return Mul1(ss2);
 		case 2:
 		default:
 			return Mul2(ss2);
 		case 3:
 			return Mul3(ss2);
 		case 100:
 		default:
 			return MulFastest(ss2);
 		}
 	}
@ -862,10 +872,12 @@ public:
 		since the 'result' is twice bigger than 'this' and 'ss2'
 		this method never returns a carry
 		algorithm: 100 - means automatically choose the fastest algorithm
 	*/
 	void MulBig(const UInt<value_size> & ss2,
 				UInt<value_size*2> & result,
-				uint algorithm = 2)
+				uint algorithm = 100)
 	{
 		switch( algorithm )
 		{
@ -873,8 +885,14 @@ public:
 			return Mul1Big(ss2, result);
 		case 2:
 		default:
 			return Mul2Big(ss2, result);
 		case 3:
 			return Mul3Big(ss2, result);
 		case 100:
 		default:
 			return MulFastestBig(ss2, result);
 		}
 	}
@ -966,23 +984,25 @@ public:
 	uint Mul2(const UInt<value_size> & ss2)
 	{
 	UInt<value_size*2> result;
-	uint i;
+	uint i, c = 0;
 		Mul2Big(ss2, result);
 		// copying result
-		memcpy(table,result.table,sizeof(table));
+		for(i=0 ; i<value_size ; ++i)
-		//for(i=0 ; i<value_size ; ++i)
+			table[i] = result.table[i];
 		//	table[i] = result.table[i];
 		// testing carry
-		for(i = value_size ; i<value_size*2 ; ++i)
+		for( ; i<value_size*2 ; ++i)
 			if( result.table[i] != 0 )
-				return 1;
+			{
 				c = 1;
 				break;
 			}
 		TTMATH_LOG("UInt::Mul2")
-	return 0;
+	return c;
 	}
@ -994,44 +1014,385 @@ public:
 	*/
 	void Mul2Big(const UInt<value_size> & ss2, UInt<value_size*2> & result)
 	{
-	uint r2,r1;
+		Mul2Big2<value_size>(table, ss2.table, result);
-	uint x1size=value_size, x2size=value_size;
+
 		TTMATH_LOG("UInt::Mul2Big")
 	}
 private:
 	/*!
 		an auxiliary method for calculating the multiplication
 		arguments we're taking as pointers (this is to improve the Mul3Big2()- avoiding
 		unnecessary copying objects), the result should be taken as a pointer too,
 		but at the moment there is no method AddTwoInts() which can operate on pointers
 	*/
 	template<uint ss_size>
 	void Mul2Big2(const uint * ss1, const uint * ss2, UInt<ss_size*2> & result)
 	{
 	uint x1size  = ss_size, x2size  = ss_size;
 	uint x1start = 0,       x2start = 0;
 		if( ss_size > 2 )
 		{
 			// if the ss_size is smaller than or equal to 2
 			// there is no sense to set x1size (and others) to another values
 			for(x1size=ss_size ; x1size>0 && ss1[x1size-1]==0 ; --x1size);
 			for(x2size=ss_size ; x2size>0 && ss2[x2size-1]==0 ; --x2size);
 			for(x1start=0 ; x1start<x1size && ss1[x1start]==0 ; ++x1start);
 			for(x2start=0 ; x2start<x2size && ss2[x2start]==0 ; ++x2start);
 		}
 		Mul2Big3<ss_size>(ss1, ss2, result, x1start, x1size, x2start, x2size);
 	}
 	/*!
 		an auxiliary method for calculating the multiplication
 	*/
 	template<uint ss_size>
 	void Mul2Big3(const uint * ss1, const uint * ss2, UInt<ss_size*2> & result, uint x1start, uint x1size, uint x2start, uint x2size)
 	{
 	uint r2, r1;
 		result.SetZero();
-		if( value_size > 2 )
+		if( x1size==0 || x2size==0 )
 			return;
 		for(uint x1=x1start ; x1<x1size ; ++x1)
 		{
-			// if the value_size is smaller than or equal to 2
+			for(uint x2=x2start ; x2<x2size ; ++x2)
-			// there is no sense to set x1size (and others) to another values
+			{
 				MulTwoWords(ss1[x1], ss2[x2], &r2, &r1);
 				result.AddTwoInts(r2, r1, x2+x1);
 				// here will never be a carry
 			}
 		}
 	}
 public:
 	/*!
 		multiplication: this = this * ss2
 		This is Karatsuba Multiplication algorithm, we're using it when value_size is greater than
 		or equal to TTMATH_USE_KARATSUBA_MULTIPLICATION_FROM_SIZE macro (defined in ttmathuint.h).
 		If value_size is smaller then we're using Mul2Big() instead.
 		Karatsuba multiplication:
 		Assume we have:
 			this = x = x1*B^m + x0
 			ss2  = y = y1*B^m + y0
 		where x0 and y0 are less than B^m
 		the product from multiplication we can show as:
 	    x*y = (x1*B^m + x0)(y1*B^m + y0) = z2*B^(2m) + z1*B^m + z0
 		where
 		    z2 = x1*y1
 			z1 = x1*y0 + x0*y1
 			z0 = x0*y0
 		this is standard schoolbook algorithm with O(n^2), Karatsuba observed that z1 can be given in other form:
 			z1 = (x1 + x0)*(y1 + y0) - z2 - z0    / z1 = (x1*y1 + x1*y0 + x0*y1 + x0*y0) - x1*y1 - x0*y0 = x1*y0 + x0*y1 /
 		and to calculate the multiplication we need only three multiplications (with some additions and subtractions)
 		Our objects 'this' and 'ss2' we divide into two parts and by using recurrence we calculate the multiplication.
 		Karatsuba multiplication has O( n^(ln(3)/ln(2)) )
 	*/
 	uint Mul3(const UInt<value_size> & ss2)
 	{
 	UInt<value_size*2> result;
 	uint i, c = 0;
 		Mul3Big(ss2, result);
 		// copying result
 		for(i=0 ; i<value_size ; ++i)
 			table[i] = result.table[i];
 		// testing carry
 		for( ; i<value_size*2 ; ++i)
 			if( result.table[i] != 0 )
 			{
 				c = 1;
 				break;
 			}
 		TTMATH_LOG("UInt::Mul3")
 	return c;
 	}
 	/*!
 		multiplication: result = this * ss2
 		result is twice bigger than this and ss2,
 		this method never returns carry,
 		(Karatsuba multiplication)
 	*/
 	void Mul3Big(const UInt<value_size> & ss2, UInt<value_size*2> & result)
 	{
 		Mul3Big2<value_size>(table, ss2.table, result.table);
 		TTMATH_LOG("UInt::Mul3Big")
 	}
 private:
 	/*!
 		an auxiliary method for calculating the Karatsuba multiplication
 		result_size is equal ss_size*2
 	*/
 	template<uint ss_size>
 	void Mul3Big2(const uint * ss1, const uint * ss2, uint * result)
 	{
 	const uint * x1, * x0, * y1, * y0;
 		if( ss_size>1 && ss_size<TTMATH_USE_KARATSUBA_MULTIPLICATION_FROM_SIZE )
 		{
 			UInt<ss_size*2> res;
 			Mul2Big2<ss_size>(ss1, ss2, res);
 			for(uint i=0 ; i<ss_size*2 ; ++i)
 				result[i] = res.table[i];
 		return;
 		}
 		else
 		if( ss_size == 1 )
 		{
 			return MulTwoWords(*ss1, *ss2, &result[1], &result[0]);
 		}
 		if( (ss_size & 1) == 1 )
 		{
 			// ss_size is odd
 			x0 = ss1;
 			y0 = ss2;
 			x1 = ss1 + ss_size / 2 + 1;
 			y1 = ss2 + ss_size / 2 + 1;
 			// the second vectors (x1 and y1) are smaller about one from the first ones (x0 and y0)
 			Mul3Big3<ss_size/2 + 1, ss_size/2, ss_size*2>(x1, x0, y1, y0, result);
 		}
 		else
 		{
 			// ss_size is even
 			x0 = ss1;
 			y0 = ss2;
 			x1 = ss1 + ss_size / 2;
 			y1 = ss2 + ss_size / 2;
 			// all four vectors (x0 x1 y0 y1) are equal in size
 			Mul3Big3<ss_size/2, ss_size/2, ss_size*2>(x1, x0, y1, y0, result);
 		}
 	}
 #ifdef _MSC_VER
 #pragma warning (disable : 4717)
 //warning C4717: recursive on all control paths, function will cause runtime stack overflow
 //we have the stop point in Mul3Big2() method
 #endif
 	/*!
 		an auxiliary method for calculating the Karatsuba multiplication
 			x = x1*B^m + x0
 			y = y1*B^m + y0
 			first_size  - is the size of vectors: x0 and y0
 			second_size - is the size of vectors: x1 and y1 (can be either equal first_size or smaller about one from first_size)
 			x*y = (x1*B^m + x0)(y1*B^m + y0) = z2*B^(2m) + z1*B^m + z0
 		      where
 			   z0 = x0*y0
 			   z2 = x1*y1
 			   z1 = (x1 + x0)*(y1 + y0) - z2 - z0
 	*/
 	template<uint first_size, uint second_size, uint result_size>
 	void Mul3Big3(const uint * x1, const uint * x0, const uint * y1, const uint * y0, uint * result)
 	{
 	uint i, c, xc, yc;
 		UInt<first_size>   temp, temp2;
 		UInt<first_size*3> z1;
 		// z0 and z2 we store directly in the result (we don't use any temporary variables)
 		Mul3Big2<first_size>(x0, y0, result);                  // z0
 		Mul3Big2<second_size>(x1, y1, result+first_size*2);    // z2
 		// now we calculate z1
 		// temp  = (x0 + x1)
 		// temp2 = (y0 + y1)
 		// we're using temp and temp2 with UInt<first_size>, although there can be a carry but
 		// we simple remember it in xc and yc (xc and yc can be either 0 or 1),
 		// and (x0 + x1)*(y0 + y1) we calculate in this way (schoolbook algorithm):
 		//
 		//                 xc     |     temp
 		//                 yc     |     temp2
 		//               --------------------
 		//               (temp    *   temp2)
 		//               xc*temp2 |
 		//               yc*temp  |
 		//       xc*yc |
 		//       ----------     z1     --------
 		//
 		// and the result is never larger in size than 3*first_size
 		xc = AddVector(x0, x1, first_size, second_size, temp.table);
 		yc = AddVector(y0, y1, first_size, second_size, temp2.table);
 		Mul3Big2<first_size>(temp.table, temp2.table, z1.table);
 		// clearing the rest of z1
 		for(i=first_size*2 ; i<first_size*3 ; ++i)
 			z1.table[i] = 0;
 		if( xc )
 		{
 			c = AddVector(z1.table+first_size, temp2.table, first_size*3-first_size, first_size, z1.table+first_size);
 			TTMATH_ASSERT( c==0 )
 		}
 		if( yc )
 		{
 			c = AddVector(z1.table+first_size, temp.table, first_size*3-first_size, first_size, z1.table+first_size);
 			TTMATH_ASSERT( c==0 )
 		}
 		if( xc && yc )
 		{
 			for( i=first_size*2 ; i<first_size*3 ; ++i )
 				if( ++z1.table[i] != 0 )
 					break;  // break if there was no carry
 		}
 		// z1 = z1 - z2
 		c = SubVector(z1.table, result+first_size*2, first_size*3, second_size*2, z1.table);
 		TTMATH_ASSERT(c==0)
 		// z1 = z1 - z0
 		c = SubVector(z1.table, result, first_size*3, first_size*2, z1.table);
 		TTMATH_ASSERT(c==0)
 		// here we've calculated the z1
 		// now we're adding it to the result
 		if( first_size > second_size )
 		{
 			uint z1_size = result_size - first_size;
 			TTMATH_ASSERT( z1_size <= first_size*3 )
 			for(i=z1_size ; i<first_size*3 ; ++i)
 				TTMATH_ASSERT( z1.table[i] == 0 )
 				;
 			c = AddVector(result+first_size, z1.table, result_size-first_size, z1_size, result+first_size);
 			TTMATH_ASSERT(c==0)
 		}
 		else
 		{
 			c = AddVector(result+first_size, z1.table, result_size-first_size, first_size*3, result+first_size);
 			TTMATH_ASSERT(c==0)
 		}
 	}
 #ifdef _MSC_VER
 #pragma warning (default : 4717)
 #endif
 public:
 	/*!
 		multiplication this = this * ss2
 	*/
 	uint MulFastest(const UInt<value_size> & ss2)
 	{
 	UInt<value_size*2> result;
 	uint i, c = 0;
 		MulFastestBig(ss2, result);
 		// copying result
 		for(i=0 ; i<value_size ; ++i)
 			table[i] = result.table[i];
 		// testing carry
 		for( ; i<value_size*2 ; ++i)
 			if( result.table[i] != 0 )
 			{
 				c = 1;
 				break;
 			}
 		TTMATH_LOG("UInt::MulFastest")
 	return c;
 	}
 	/*!
 		multiplication result = this * ss2
 		this method is trying to select the fastest algorithm
 		(in the future this method can be improved)
 	*/
 	void MulFastestBig(const UInt<value_size> & ss2, UInt<value_size*2> & result)
 	{
 		if( value_size < TTMATH_USE_KARATSUBA_MULTIPLICATION_FROM_SIZE )
 			return Mul2Big(ss2, result);
 		uint x1size  = value_size, x2size  = value_size;
 		uint x1start = 0,          x2start = 0;
 		for(x1size=value_size ; x1size>0 && table[x1size-1]==0 ; --x1size);
 		for(x2size=value_size ; x2size>0 && ss2.table[x2size-1]==0 ; --x2size);
 		if( x1size==0 || x2size==0 )
 		{
-				TTMATH_LOG("UInt::Mul2Big")
+			// either 'this' or 'ss2' is equal zero - the result is zero too
 			result.SetZero();
 			return;
 		}
 		for(x1start=0 ; x1start<x1size && table[x1start]==0 ; ++x1start);
 		for(x2start=0 ; x2start<x2size && ss2.table[x2start]==0 ; ++x2start);
 		}
-		for(uint x1=x1start ; x1<x1size ; ++x1)
+		uint distancex1 = x1size - x1start;
-		{
+		uint distancex2 = x2size - x2start;
 			for(uint x2=x2start ; x2<x2size ; ++x2)
 			{
 				MulTwoWords(table[x1], ss2.table[x2], &r2, &r1);
 				result.AddTwoInts(r2,r1,x2+x1);
 				// here will never be a carry
 			}
 		}
-		TTMATH_LOG("UInt::Mul2Big")
+		if( distancex1 < 3 || distancex2 < 3 )
-	}
+			// either 'this' or 'ss2' have only 2 (or 1) item different from zero (side by side)
 			// (this condition in the future can be improved)
 			return Mul2Big3<value_size>(table, ss2.table, result, x1start, x1size, x2start, x2size);
 		// Karatsuba multiplication
 		Mul3Big(ss2, result);
 		TTMATH_LOG("UInt::MulFastestBig")
 	}
 	/*!
@ -1456,7 +1817,7 @@ private:
 		if( Div2_DivisorGreaterOrEqual(	divisor, remainder,
 										table_id, index,
-										divisor_table_id, divisor_index) )
+										divisor_index) )
 		{
 			TTMATH_LOG("UInt::Div2_FindLeadingBitsAndCheck")
 			return 0;
@ -1476,7 +1837,7 @@ private:
 	bool Div2_DivisorGreaterOrEqual(	const UInt<value_size> & divisor,
 										UInt<value_size> * remainder,
 										uint table_id, uint index,
-										uint /*divisor_table_id*/, uint divisor_index  )
+										uint divisor_index  )
 	{
 		if( divisor_index > index )
 		{
@ -2349,7 +2710,7 @@ public:
 		do
 		{
 			temp.DivInt(b, &rem);
-			character = DigitToChar( rem );
+			character = static_cast<char>( DigitToChar(rem) );
 			result.insert(result.begin(), character);
 		}
 		while( !temp.IsZero() );
@ -2877,10 +3238,13 @@ public:
 			ttmathuint_noasm.h
 	*/
-#if defined(TTMATH_NOASM)
+#if defined(TTMATH_NOASM) || !defined(__GNUC__)
 	// not yet implemented in 64 bit ASM
 	static uint AddTwoWords(uint a, uint b, uint carry, uint * result);
 	static uint SubTwoWords(uint a, uint b, uint carry, uint * result);
 #endif
 #if defined(TTMATH_NOASM)
 #ifdef TTMATH_PLATFORM64
 	union uint_
@ -2906,19 +3270,19 @@ public:
 private:
 public: // !!! chwilowo public
 	uint Rcl2_one(uint c);
 	uint Rcr2_one(uint c);
 	uint Rcl2(uint bits, uint c);
 	uint Rcr2(uint bits, uint c);
 public:
 	uint Add(const UInt<value_size> & ss2, uint c=0);
 	uint AddInt(uint value, uint index = 0);
 	uint AddTwoInts(uint x2, uint x1, uint index);
 	static uint AddVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result);
 	uint Sub(const UInt<value_size> & ss2, uint c=0);
 	uint SubInt(uint value, uint index = 0);
 	static uint SubVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result);
 	static sint FindLeadingBitInWord(uint x);
 	static uint SetBitInWord(uint & value, uint bit);
 	static void MulTwoWords(uint a, uint b, uint * result_high, uint * result_low);
@ -2926,6 +3290,23 @@ public:
 };
 /*!
 	this specialization is needed in order to not confused the compiler "error: ISO C++ forbids zero-size array"
 	when compiling Mul3Big2() method
 */
 template<>
 class UInt<0>
 {
 public:
 	uint table[1];
 	void Mul2Big(const UInt<0> &, UInt<0> &) { TTMATH_ASSERT(false) };
 	void SetZero() { TTMATH_ASSERT(false) };
 	uint AddTwoInts(uint, uint, uint) { TTMATH_ASSERT(false) return 0; };
 };
 } //namespace
 #if defined(_MSC_VER)
--- a/ttmath/ttmathuint_x86.h
+++ b/ttmath/ttmathuint_x86.h
@ -75,9 +75,9 @@ namespace ttmath
 	template<uint value_size>
 	uint UInt<value_size>::Add(const UInt<value_size> & ss2, uint c)
 	{
-	register uint b = value_size;
+	uint b = value_size;
-	register uint * p1 = table;
+	uint * p1 = table;
-	register uint * p2 = const_cast<uint*>(ss2.table);
+	uint * p2 = const_cast<uint*>(ss2.table);
 		// we don't have to use TTMATH_REFERENCE_ASSERT here
 		// this algorithm doesn't require it
@ -96,13 +96,13 @@ namespace ttmath
 				sub eax,[c]  // CF=c
 				ALIGN 16
-			p:				
+			ttmath_loop:				
 				mov eax,[esi+edx*4+0]
 				adc [ebx+edx*4+0],eax
 				lea edx, [edx+1] // inc edx, but faster (no flags dependencies)
 				dec ecx
-				jnz p
+			jnz ttmath_loop
 				setc	al
 				movzx	eax, al
@ -112,17 +112,13 @@ namespace ttmath
 		#ifdef __GNUC__
-			
+		uint dummy, dummy2;
 			//	this part should be compiled with gcc
 			__asm__ __volatile__(
-				"push %%ecx						\n"
+				"xorl %%edx, %%edx				\n"
-			
+				"negl %%eax						\n"  // CF=1 if rax!=0 , CF=0 if rax==0
 				"xorl %%eax, %%eax				\n"
 				"movl %%eax, %%edx				\n"
 				"subl %%edi, %%eax				\n"
 			"1:									\n"
 				"movl (%%esi,%%edx,4), %%eax	\n"
@ -132,14 +128,11 @@ namespace ttmath
 				"decl %%ecx						\n"
 			"jnz 1b								\n"
-				"setc %%al						\n"
+				"adc %%ecx, %%ecx				\n"
 				"movzx %%al,%%edx				\n"
-				"pop %%ecx						\n"
+				: "=c" (c), "=a" (dummy), "=d" (dummy2)
-
+				: "0" (b),  "1" (c), "b" (p1), "S" (p2)
-				: "=d" (c)
+				: "cc", "memory" );
 				: "D" (c), "c" (b), "b" (p1), "S" (p2)
 				: "%eax", "cc", "memory" );
 		#endif
 		TTMATH_LOG("UInt::Add")
@ -189,16 +182,16 @@ namespace ttmath
 				mov eax, [value]
 				ALIGN 16
-			p:
+			ttmath_loop:
 				add [ebx+edx*4], eax
-				jnc end
+			jnc ttmath_end
 				mov eax, 1
 				lea edx, [edx+1] // inc edx, but faster (no flags dependencies)
 				dec ecx
-				jnz p
+			jnz ttmath_loop
-			end:
+			ttmath_end:
 				setc	al
 				movzx	eax, al
 				mov		[c], eax
@ -207,10 +200,9 @@ namespace ttmath
 		#ifdef __GNUC__
-			__asm__ __volatile__(
+		uint dummy, dummy2;
-				"push %%eax						\n"
+			__asm__ __volatile__(
 				"push %%ecx						\n"
 				"subl %%edx, %%ecx 				\n"
@ -227,12 +219,10 @@ namespace ttmath
 				"setc %%al						\n"
 				"movzx %%al, %%edx				\n"
-				"pop %%ecx						\n"
+				: "=d" (c),    "=a" (dummy), "=c" (dummy2)
-				"pop %%eax						\n"
+				: "0" (index), "1" (value),  "2" (b), "b" (p1)
 				: "=d" (c)
 				: "a" (value), "c" (b), "0" (index), "b" (p1)
 				: "cc", "memory" );
 		#endif
 		TTMATH_LOG("UInt::AddInt")
@ -297,16 +287,16 @@ namespace ttmath
 				mov eax, [x2]
 				ALIGN 16
-			p:
+			ttmath_loop:
 				adc [ebx+edx*4+4], eax
-				jnc end
+			jnc ttmath_end
 				mov eax, 0
 				lea edx, [edx+1] // inc edx, but faster (no flags dependencies)
 				dec ecx
-				jnz p
+			jnz ttmath_loop
-			end:
+			ttmath_end:
 				setc	al
 				movzx	eax, al
 				mov		[c], eax
@ -338,12 +328,10 @@ namespace ttmath
 				"setc %%al						\n"
 				"movzx %%al, %%eax				\n"
-				"pop %%edx						\n"
+				: "=a" (c), "=c" (dummy), "=d" (dummy2)
-				"pop %%ecx						\n"
+				: "0" (x2), "1" (b),      "2" (index), "b" (p1), "S" (x1)
 				: "=a" (c)
 				: "c" (b), "d" (index), "b" (p1), "S" (x1), "0" (x2)
 				: "cc", "memory" );
 		#endif
 		TTMATH_LOG("UInt::AddTwoInts")
@ -353,6 +341,131 @@ namespace ttmath
 	/*!
 		this static method addes one vector to the other
 		'ss1' is larger in size or equal to 'ss2'
 		ss1 points to the first (larger) vector
 		ss2 points to the second vector
 		ss1_size - size of the ss1 (and size of the result too)
 		ss2_size - size of the ss2
 		result - is the result vector (which has size the same as ss1: ss1_size)
 		Example:  ss1_size is 5, ss2_size is 3
 		ss1:      ss2:   result (output):
 		  5        1         5+1
 		  4        3         4+3
 		  2        7         2+7
 		  6                  6
 		  9                  9
 	  of course the carry is propagated and will be returned from the last item
 	  (this method is used by the Karatsuba multiplication algorithm)
 	*/
 	template<uint value_size>
 	uint UInt<value_size>::AddVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result)
 	{
 		TTMATH_ASSERT( ss1_size >= ss2_size )
 		uint rest = ss1_size - ss2_size;
 		uint c;
 		#ifndef __GNUC__
 			//	this part might be compiled with for example visual c
 			__asm
 			{
 				mov ecx, [ss2_size]
 				xor edx, edx               // edx = 0, cf = 0
 				mov esi, [ss1]
 				mov ebx, [ss2]
 				mov edi, [result]
 				ALIGN 16
 			ttmath_loop:
 				mov eax, [esi+edx*4]
 				adc eax, [ebx+edx*4]
 				mov [edi+edx*4], eax
 				lea edx, [edx+1] // inc edx, but faster (no flags dependencies)
 				dec ecx
 			jnz ttmath_loop
 				adc ecx, ecx             // ecx has the cf state
 				mov ebx, [rest]
 				or ebx, ebx
 				jz ttmath_end
 				xor ebx, ebx             // ebx = 0
 				neg ecx                  // setting cf from ecx
 				mov ecx, [rest]          // ecx is != 0
 			ttmath_loop2:
 				mov eax, [esi+edx*4]
 				adc eax, ebx 
 				mov [edi+edx*4], eax
 				lea edx, [edx+1] // inc edx, but faster (no flags dependencies)
 				dec ecx
 			jnz ttmath_loop2
 				adc ecx, ecx
 			ttmath_end:
 				mov [c], ecx
 			}
 		#endif		
 		#ifdef __GNUC__
 		//	this part should be compiled with gcc
 		uint dummy1, dummy2, dummy3;
 			__asm__ __volatile__(
 				"push %%edx							\n"
 				"xor %%edx, %%edx					\n"   // edx = 0, cf = 0
 			"1:										\n"
 				"mov (%%esi,%%edx,4), %%eax			\n"
 				"adc (%%ebx,%%edx,4), %%eax			\n"
 				"mov %%eax, (%%edi,%%edx,4)			\n"
 				"inc %%edx							\n"
 				"dec %%ecx							\n"
 			"jnz 1b									\n"
 				"adc %%ecx, %%ecx					\n"   // ecx has the cf state
 				"pop %%eax							\n"   // eax = rest
 				"or %%eax, %%eax					\n"
 				"jz 3f								\n"
 				"xor %%ebx, %%ebx					\n"   // ebx = 0
 				"neg %%ecx							\n"   // setting cf from ecx
 				"mov %%eax, %%ecx					\n"   // ecx=rest and is != 0
 			"2:										\n"
 				"mov (%%esi, %%edx, 4), %%eax		\n"
 				"adc %%ebx, %%eax 					\n"
 				"mov %%eax, (%%edi, %%edx, 4)		\n"
 				"inc %%edx							\n"
 				"dec %%ecx							\n"
 			"jnz 2b									\n"
 				"adc %%ecx, %%ecx					\n"
 			"3:										\n"
 				: "=a" (dummy1), "=b" (dummy2), "=c" (c),       "=d" (dummy3)
 				:                "1" (ss2),     "2" (ss2_size), "3" (rest),   "S" (ss1),  "D" (result)
 				: "cc", "memory" );
 		#endif
 		TTMATH_LOG("UInt::AddVector")
 	return c;
 	}
 	/*!
@ -366,9 +479,9 @@ namespace ttmath
 	template<uint value_size>
 	uint UInt<value_size>::Sub(const UInt<value_size> & ss2, uint c)
 	{
-	register uint b = value_size;
+	uint b = value_size;
-	register uint * p1 = table;
+	uint * p1 = table;
-	register uint * p2 = const_cast<uint*>(ss2.table);
+	uint * p2 = const_cast<uint*>(ss2.table);
 		// we don't have to use TTMATH_REFERENCE_ASSERT here
 		// this algorithm doesn't require it
@ -387,13 +500,13 @@ namespace ttmath
 				sub eax, [c]
 				ALIGN 16
-			p:
+			ttmath_loop:
 				mov eax, [esi+edx*4]
 				sbb [ebx+edx*4], eax
 				lea edx, [edx+1] // inc edx, but faster (no flags dependencies)
 				dec ecx
-				jnz p
+			jnz ttmath_loop
 				setc	al
 				movzx	eax, al
@ -404,13 +517,12 @@ namespace ttmath
 		#ifdef __GNUC__
 		uint dummy, dummy2;
 			__asm__  __volatile__(
 				"push %%ecx						\n"
 				"xorl %%eax, %%eax				\n"
 				"movl %%eax, %%edx				\n"
 				"subl %%edi, %%eax				\n"
 				"xorl %%edx, %%edx				\n"
 				"negl %%eax						\n"  // CF=1 if rax!=0 , CF=0 if rax==0
 			"1:									\n"
 				"movl (%%esi,%%edx,4), %%eax	\n"
@ -420,14 +532,11 @@ namespace ttmath
 				"decl %%ecx						\n"
 			"jnz 1b								\n"
-				"setc %%al						\n"
+				"adc %%ecx, %%ecx				\n"
 				"movzx %%al,%%edx				\n"
-				"pop %%ecx						\n"
+				: "=c" (c), "=a" (dummy), "=d" (dummy2)
-
+				: "0" (b),  "1" (c), "b" (p1), "S" (p2)
-				: "=d" (c)
+				: "cc", "memory" );
 				: "D" (c), "c" (b), "b" (p1), "S" (p2)
 				: "%eax", "cc", "memory" );
 		#endif
@ -479,16 +588,16 @@ namespace ttmath
 				mov eax, [value]
 				ALIGN 16
-			p:
+			ttmath_loop:
 				sub [ebx+edx*4], eax
-				jnc end
+			jnc ttmath_end
 				mov eax, 1
 				lea edx, [edx+1] // inc edx, but faster (no flags dependencies)
 				dec ecx
-				jnz p
+			jnz ttmath_loop
-			end:
+			ttmath_end:
 				setc	al
 				movzx	eax, al
 				mov		[c], eax
@ -497,10 +606,9 @@ namespace ttmath
 		#ifdef __GNUC__
-			__asm__ __volatile__(
+		uint dummy, dummy2;
-				"push %%eax						\n"
+			__asm__ __volatile__(
 				"push %%ecx						\n"
 				"subl %%edx, %%ecx 				\n"
@ -517,12 +625,10 @@ namespace ttmath
 				"setc %%al						\n"
 				"movzx %%al, %%edx				\n"
-				"pop %%ecx						\n"
+				: "=d" (c),    "=a" (dummy), "=c" (dummy2)
-				"pop %%eax						\n"
+				: "0" (index), "1" (value),  "2" (b), "b" (p1)
 				: "=d" (c)
 				: "a" (value), "c" (b), "0" (index), "b" (p1)
 				: "cc", "memory" );
 		#endif
 		TTMATH_LOG("UInt::SubInt")
@ -532,6 +638,141 @@ namespace ttmath
 	/*!
 		this static method subtractes one vector from the other
 		'ss1' is larger in size or equal to 'ss2'
 		ss1 points to the first (larger) vector
 		ss2 points to the second vector
 		ss1_size - size of the ss1 (and size of the result too)
 		ss2_size - size of the ss2
 		result - is the result vector (which has size the same as ss1: ss1_size)
 		Example:  ss1_size is 5, ss2_size is 3
 		ss1:      ss2:   result (output):
 		  5        1         5-1
 		  4        3         4-3
 		  2        7         2-7
 		  6                  6-1  (the borrow from previous item)
 		  9                  9
 		              return (carry): 0
 	  of course the carry (borrow) is propagated and will be returned from the last item
 	  (this method is used by the Karatsuba multiplication algorithm)
 	*/
 	template<uint value_size>
 	uint UInt<value_size>::SubVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result)
 	{
 		TTMATH_ASSERT( ss1_size >= ss2_size )
 		uint rest = ss1_size - ss2_size;
 		uint c;
 		#ifndef __GNUC__
 			//	this part might be compiled with for example visual c
 			/*
 				the asm code is nearly the same as in AddVector
 				only two instructions 'adc' are changed to 'sbb'
 			*/
 			__asm
 			{
 				mov ecx, [ss2_size]
 				xor edx, edx               // edx = 0, cf = 0
 				mov esi, [ss1]
 				mov ebx, [ss2]
 				mov edi, [result]
 			ttmath_loop:
 				mov eax, [esi+edx*4]
 				sbb eax, [ebx+edx*4]
 				mov [edi+edx*4], eax
 				lea edx, [edx+1]
 				dec ecx
 			jnz ttmath_loop
 				adc ecx, ecx             // ecx has the cf state
 				mov ebx, [rest]
 				or ebx, ebx
 				jz ttmath_end
 				xor ebx, ebx             // ebx = 0
 				neg ecx                  // setting cf from ecx
 				mov ecx, [rest]          // ecx is != 0
 			ttmath_loop2:
 				mov eax, [esi+edx*4]
 				sbb eax, ebx 
 				mov [edi+edx*4], eax
 				lea edx, [edx+1]
 				dec ecx
 			jnz ttmath_loop2
 				adc ecx, ecx
 			ttmath_end:
 				mov [c], ecx
 			}
 		#endif		
 		#ifdef __GNUC__
 		//	this part should be compiled with gcc
 		uint dummy1, dummy2, dummy3;
 			__asm__ __volatile__(
 				"push %%edx							\n"
 				"xor %%edx, %%edx					\n"   // edx = 0, cf = 0
 			"1:										\n"
 				"mov (%%esi,%%edx,4), %%eax			\n"
 				"sbb (%%ebx,%%edx,4), %%eax			\n"
 				"mov %%eax, (%%edi,%%edx,4)			\n"
 				"inc %%edx							\n"
 				"dec %%ecx							\n"
 			"jnz 1b									\n"
 				"adc %%ecx, %%ecx					\n"   // ecx has the cf state
 				"pop %%eax							\n"   // eax = rest
 				"or %%eax, %%eax					\n"
 				"jz 3f								\n"
 				"xor %%ebx, %%ebx					\n"   // ebx = 0
 				"neg %%ecx							\n"   // setting cf from ecx
 				"mov %%eax, %%ecx					\n"   // ecx=rest and is != 0
 			"2:										\n"
 				"mov (%%esi, %%edx, 4), %%eax		\n"
 				"sbb %%ebx, %%eax 					\n"
 				"mov %%eax, (%%edi, %%edx, 4)		\n"
 				"inc %%edx							\n"
 				"dec %%ecx							\n"
 			"jnz 2b									\n"
 				"adc %%ecx, %%ecx					\n"
 			"3:										\n"
 				: "=a" (dummy1), "=b" (dummy2), "=c" (c),       "=d" (dummy3)
 				:                "1" (ss2),     "2" (ss2_size), "3" (rest),   "S" (ss1),  "D" (result)
 				: "cc", "memory" );
 		#endif
 		TTMATH_LOG("UInt::SubVector")
 	return c;
 	}
 	/*!
 		this method moves all bits into the left hand side
 		return value <- this <- c
@ -547,8 +788,8 @@ namespace ttmath
 	template<uint value_size>
 	uint UInt<value_size>::Rcl2_one(uint c)
 	{
-	register sint b = value_size;
+	uint b = value_size;
-	register uint * p1 = table;
+	uint * p1 = table;
 		#ifndef __GNUC__
 			__asm
@ -562,12 +803,12 @@ namespace ttmath
 				mov ecx, [b]
 				ALIGN 16
-			p:
+			ttmath_loop:
 				rcl dword ptr [ebx+edx*4], 1
 				lea edx, [edx+1] // inc edx, but faster (no flags dependencies)
 				dec ecx
-				jnz p
+			jnz ttmath_loop
 				setc	al
 				movzx	eax, al
@ -577,13 +818,12 @@ namespace ttmath
 		#ifdef __GNUC__
 		uint dummy, dummy2;
 		__asm__  __volatile__(
 			"push %%edx					\n"
 			"push %%ecx					\n"
 			"xorl %%edx, %%edx			\n"   // edx=0
-			"neg %%eax					\n"   // CF=1 if eax!=0 , CF=0 if eax==0
+			"negl %%eax					\n"   // CF=1 if eax!=0 , CF=0 if eax==0
 		"1:								\n"
 			"rcll $1, (%%ebx, %%edx, 4)	\n"
@ -592,15 +832,12 @@ namespace ttmath
 			"decl %%ecx					\n"
 		"jnz 1b							\n"
-			"setc %%al					\n"
+			"adcl %%ecx, %%ecx			\n"
 			"movzx %%al, %%eax			\n"
-			"pop %%ecx					\n"
+			: "=c" (c), "=a" (dummy), "=d" (dummy2)
-			"pop %%edx					\n"
+			: "0" (b),  "1" (c), "b" (p1)
 			: "=a" (c)
 			: "0" (c), "c" (b), "b" (p1)
 			: "cc", "memory" );
 		#endif
 		TTMATH_LOG("UInt::Rcl2_one")
@ -625,8 +862,8 @@ namespace ttmath
 	template<uint value_size>
 	uint UInt<value_size>::Rcr2_one(uint c)
 	{
-	register sint b = value_size;
+	uint b = value_size;
-	register uint * p1 = table;
+	uint * p1 = table;
 		#ifndef __GNUC__
 			__asm
@ -638,11 +875,11 @@ namespace ttmath
 				mov ecx, [b]
 				ALIGN 16
-			p:
+			ttmath_loop:
 				rcr dword ptr [ebx+ecx*4-4], 1
 				dec ecx
-				jnz p
+			jnz ttmath_loop
 				setc	al
 				movzx	eax, al
@ -652,11 +889,11 @@ namespace ttmath
 		#ifdef __GNUC__
 		uint dummy;
 		__asm__  __volatile__(
-			"push %%ecx						\n"
+			"negl %%eax						\n"   // CF=1 if eax!=0 , CF=0 if eax==0
 			"neg %%eax						\n"   // CF=1 if eax!=0 , CF=0 if eax==0
 		"1:									\n"
 			"rcrl $1, -4(%%ebx, %%ecx, 4)	\n"
@ -664,14 +901,12 @@ namespace ttmath
 			"decl %%ecx						\n"
 		"jnz 1b								\n"
-			"setc %%al						\n"
+			"adcl %%ecx, %%ecx				\n"
 			"movzx %%al, %%eax				\n"
-			"pop %%ecx						\n"
+			: "=c" (c), "=a" (dummy)
-
+			: "0" (b),  "1" (c), "b" (p1)
 			: "=a" (c)
 			: "0" (c), "c" (b), "b" (p1)
 			: "cc", "memory" );
 		#endif
 		TTMATH_LOG("UInt::Rcr2_one")
@ -724,7 +959,7 @@ namespace ttmath
 				cmovnz esi, [mask] // if c then old value = mask
 				ALIGN 16
-			p:
+			ttmath_loop:
 				rol dword ptr [ebx+edx*4], cl
 				mov eax, [ebx+edx*4]
@ -735,7 +970,7 @@ namespace ttmath
 				lea edx, [edx+1] // inc edx, but faster (no flags dependencies)
 				dec edi
-				jnz p
+			jnz ttmath_loop
 				and eax, 1
 				mov	dword ptr [c], eax
@ -744,31 +979,30 @@ namespace ttmath
 		#ifdef __GNUC__
 		uint dummy, dummy2, dummy3;
 		__asm__  __volatile__(
-			"push %%edx						\n"
+			"push %%ebp						\n"
 			"push %%esi						\n"
 			"push %%edi						\n"
 			"movl %%ecx, %%esi				\n"
 			"movl $32, %%ecx				\n"
-			"subl %%esi, %%ecx				\n"
+			"subl %%esi, %%ecx				\n"    // ecx = 32 - bits
-			"movl $-1, %%edx				\n"
+			"movl $-1, %%edx				\n"    // edx = -1 (all bits set to one)
-			"shrl %%cl, %%edx				\n"
+			"shrl %%cl, %%edx				\n"    // shifting (0 -> edx -> cf)  (cl times)
-			"movl %%edx, %[amask]			\n"
+			"movl %%edx, %%ebp				\n"    // ebp = edx = mask
 			"movl %%esi, %%ecx				\n"
 			"xorl %%edx, %%edx				\n"
 			"movl %%edx, %%esi				\n"
 			"orl %%eax, %%eax				\n"
-			"cmovnz %[amask], %%esi			\n"
+			"cmovnz %%ebp, %%esi			\n"    // if(c) esi=mask else esi=0
 		"1:									\n"
 			"roll %%cl, (%%ebx,%%edx,4)		\n"
 			"movl (%%ebx,%%edx,4), %%eax	\n"
-			"andl %[amask], %%eax			\n"
+			"andl %%ebp, %%eax				\n"
 			"xorl %%eax, (%%ebx,%%edx,4)	\n"
 			"orl  %%esi, (%%ebx,%%edx,4)	\n"
 			"movl %%eax, %%esi				\n"
@ -779,13 +1013,12 @@ namespace ttmath
 			"and $1, %%eax					\n"
-			"pop %%edi						\n"
+			"pop %%ebp						\n"
 			"pop %%esi						\n"
 			"pop %%edx						\n"
-			: "=a" (c)
+			: "=a" (c), "=D" (dummy), "=S" (dummy2), "=d" (dummy3)
-			: "0" (c), "D" (b), "b" (p1), "c" (bits), [amask] "m" (mask)
+			: "0" (c),  "1" (b), "b" (p1), "c" (bits)
 			: "cc", "memory" );
 		#endif
 		TTMATH_LOG("UInt::Rcl2")
@ -813,9 +1046,9 @@ namespace ttmath
 	{
 	TTMATH_ASSERT( bits>0 && bits<TTMATH_BITS_PER_UINT )
-	register sint b = value_size;
+	uint b = value_size;
-	register uint * p1 = table;
+	uint * p1 = table;
-	register uint mask;
+	uint mask;
 		#ifndef __GNUC__
 			__asm
@ -841,7 +1074,7 @@ namespace ttmath
 				cmovnz esi, [mask] // if c then old value = mask
 				ALIGN 16
-			p:
+			ttmath_loop:
 				ror dword ptr [ebx+edx*4], cl
 				mov eax, [ebx+edx*4]
@ -852,7 +1085,7 @@ namespace ttmath
 				dec edx
 				dec edi
-				jnz p
+			jnz ttmath_loop
 				rol eax, 1    // bit 31 will be bit 0
 				and eax, 1  
@ -863,33 +1096,32 @@ namespace ttmath
 		#ifdef __GNUC__
 		uint dummy, dummy2, dummy3;
 			__asm__  __volatile__(
-			"push %%edx						\n"
+			"push %%ebp						\n"
 			"push %%esi						\n"
 			"push %%edi						\n"
 			"movl %%ecx, %%esi				\n"
 			"movl $32, %%ecx				\n"
-			"subl %%esi, %%ecx				\n"
+			"subl %%esi, %%ecx				\n"    // ecx = 32 - bits
-			"movl $-1, %%edx				\n"
+			"movl $-1, %%edx				\n"    // edx = -1 (all bits set to one)
-			"shll %%cl, %%edx				\n"
+			"shll %%cl, %%edx				\n"    // shifting (cf <- edx <- 0)  (cl times)
-			"movl %%edx, %[amask]			\n"
+			"movl %%edx, %%ebp				\n"    // ebp = edx = mask
 			"movl %%esi, %%ecx				\n"
 			"xorl %%edx, %%edx				\n"
 			"movl %%edx, %%esi				\n"
 			"addl %%edi, %%edx				\n"
-			"decl %%edx						\n"
+			"decl %%edx						\n"    // edx is pointing at the end of the table (on last word)
 			"orl %%eax, %%eax				\n"
-			"cmovnz %[amask], %%esi			\n"
+			"cmovnz %%ebp, %%esi			\n"    // if(c) esi=mask else esi=0
 		"1:									\n"
 			"rorl %%cl, (%%ebx,%%edx,4)		\n"
 			"movl (%%ebx,%%edx,4), %%eax	\n"
-			"andl %[amask], %%eax			\n"
+			"andl %%ebp, %%eax				\n"
 			"xorl %%eax, (%%ebx,%%edx,4)	\n"
 			"orl  %%esi, (%%ebx,%%edx,4)	\n"
 			"movl %%eax, %%esi				\n"
@ -901,13 +1133,12 @@ namespace ttmath
 			"roll $1, %%eax					\n"
 			"andl $1, %%eax					\n"
-			"pop %%edi						\n"
+			"pop %%ebp						\n"
 			"pop %%esi						\n"
 			"pop %%edx						\n"
-			: "=a" (c)
+			: "=a" (c), "=D" (dummy), "=S" (dummy2), "=d" (dummy3)
-			: "0" (c), "D" (b), "b" (p1), "c" (bits), [amask] "m" (mask)
+			: "0" (c),  "1" (b), "b" (p1), "c" (bits)
 			: "cc", "memory" );
 		#endif
 		TTMATH_LOG("UInt::Rcr2")
@ -939,15 +1170,16 @@ namespace ttmath
 		#ifdef __GNUC__
-			__asm__  __volatile__(
+		uint dummy;
-			"bsrl %1, %0		\n"
+				__asm__ (
 			"jnz 1f				\n"
 			"movl $-1, %0		\n"
 			"1:					\n"
-			: "=R" (result)
+				"movl $-1, %1          \n"
-			: "R" (x)
+				"bsrl %2, %0           \n"
 				"cmovz %1, %0          \n"
 				: "=r" (result), "=&r" (dummy)
 				: "r" (x)
 				: "cc" );
 		#endif
@ -974,8 +1206,8 @@ namespace ttmath
 	{
 		TTMATH_ASSERT( bit < TTMATH_BITS_PER_UINT )
 		uint v = value;
 		uint old_bit;
 		uint v = value;
 		#ifndef __GNUC__
 			__asm
@ -993,17 +1225,16 @@ namespace ttmath
 		#ifdef __GNUC__
-
+			__asm__ (
 			__asm__  __volatile__(
 			"btsl %%ebx, %%eax		\n"
 			"setc %%bl				\n"
 			"movzx %%bl, %%ebx		\n"
 			: "=a" (v), "=b" (old_bit)
 			: "0" (v),  "1" (bit)
 			: "cc" );
 		#endif
 		value = v;
@ -1031,8 +1262,8 @@ namespace ttmath
 		this has no effect in visual studio but it's useful when
 		using gcc and options like -Ox
 	*/
-	register uint result1_;
+	uint result1_;
-	register uint result2_;
+	uint result2_;
 		#ifndef __GNUC__
@ -1050,7 +1281,7 @@ namespace ttmath
 		#ifdef __GNUC__
-		__asm__ __volatile__(
+		__asm__ (
 			"mull %%edx			\n"
@ -1093,8 +1324,8 @@ namespace ttmath
 	template<uint value_size>
 	void UInt<value_size>::DivTwoWords(uint a, uint b, uint c, uint * r, uint * rest)
 	{
-		register uint r_;
+		uint r_;
-		register uint rest_;
+		uint rest_;
 		/*
 			these variables have similar meaning like those in
 			the multiplication algorithm MulTwoWords
@ -1117,12 +1348,12 @@ namespace ttmath
 		#ifdef __GNUC__
-			__asm__ __volatile__(
+			__asm__ (
 			"divl %%ecx				\n"
 			: "=a" (r_), "=d" (rest_)
-			: "d" (a), "a" (b), "c" (c)
+			: "0" (b),   "1" (a), "c" (c)
 			: "cc" );
 		#endif
--- a/ttmath/ttmathuint_x86_64.h
+++ b/ttmath/ttmathuint_x86_64.h
@ -911,6 +911,131 @@ namespace ttmath
 		*rest = rest_;
 	}
 	template<uint value_size>
 	uint UInt<value_size>::AddTwoWords(uint a, uint b, uint carry, uint * result)
 	{
 	uint temp;
 		if( carry == 0 )
 		{
 			temp = a + b;
 			if( temp < a )
 				carry = 1;
 		}
 		else
 		{
 			carry = 1;
 			temp  = a + b + carry;
 			if( temp > a ) // !(temp<=a)
 				carry = 0;
 		}
 		*result = temp;
 	return carry;
 	}
 	template<uint value_size>
 	uint UInt<value_size>::SubTwoWords(uint a, uint b, uint carry, uint * result)
 	{
 		if( carry == 0 )
 		{
 			*result = a - b;
 			if( a < b )
 				carry = 1;
 		}
 		else
 		{
 			carry   = 1;
 			*result = a - b - carry;
 			if( a > b ) // !(a <= b )
 				carry = 0;
 		}
 	return carry;
 	}
 	/*!
 		this static method addes one vector to the other
 		'ss1' is larger in size or equal to 'ss2'
 		ss1 points to the first (larger) vector
 		ss2 points to the second vector
 		ss1_size - size of the ss1 (and size of the result too)
 		ss2_size - size of the ss2
 		result - is the result vector (which has size the same as ss1: ss1_size)
 		Example:  ss1_size is 5, ss2_size is 3
 		ss1:      ss2:   result (output):
 		  5        1         5+1
 		  4        3         4+3
 		  2        7         2+7
 		  6                  6
 		  9                  9
 	  of course the carry is propagated and will be returned from the last item
 	  (this method is used by the Karatsuba multiplication algorithm)
 	*/
 	template<uint value_size>
 	uint UInt<value_size>::AddVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result)
 	{
 	uint i, c = 0;
 		TTMATH_ASSERT( ss1_size >= ss2_size )
 		for(i=0 ; i<ss2_size ; ++i)
 			c = AddTwoWords(ss1[i], ss2[i], c, &result[i]);
 		for( ; i<ss1_size ; ++i)
 			c = AddTwoWords(ss1[i], 0, c, &result[i]);
 		TTMATH_LOG("UInt::AddVector")
 	return c;
 	}
 	/*!
 		this static method subtractes one vector from the other
 		'ss1' is larger in size or equal to 'ss2'
 		ss1 points to the first (larger) vector
 		ss2 points to the second vector
 		ss1_size - size of the ss1 (and size of the result too)
 		ss2_size - size of the ss2
 		result - is the result vector (which has size the same as ss1: ss1_size)
 		Example:  ss1_size is 5, ss2_size is 3
 		ss1:      ss2:   result (output):
 		  5        1         5-1
 		  4        3         4-3
 		  2        7         2-7
 		  6                  6-1  (the borrow from previous item)
 		  9                  9
 		                 return (carry): 0
 	  of course the carry (borrow) is propagated and will be returned from the last item
 	  (this method is used by the Karatsuba multiplication algorithm)
 	*/
 	template<uint value_size>
 	uint UInt<value_size>::SubVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result)
 	{
 	uint i, c = 0;
 		TTMATH_ASSERT( ss1_size >= ss2_size )
 		for(i=0 ; i<ss2_size ; ++i)
 			c = SubTwoWords(ss1[i], ss2[i], c, &result[i]);
 		for( ; i<ss1_size ; ++i)
 			c = SubTwoWords(ss1[i], 0, c, &result[i]);
 		TTMATH_LOG("UInt::SubVector")
 	return c;
 	}
 } //namespace