- update to current root trunc's version

- update to root trunc's UNICODE support git-svn-id: svn://ttmath.org/publicrep/ttmath/branches/chk@182 e52654a7-88a9-db11-a3e9-0013d4bc506e
- fixed a bug in 64 bit ASM for MSVC
2009-07-29 10:46:48 +00:00 · 2009-07-28 16:34:04 +00:00 · 2009-06-26 15:24:27 +00:00 · 2009-06-26 11:14:51 +00:00 · 2009-06-25 14:11:17 +00:00 · 2009-06-25 11:07:55 +00:00
11 changed files with 16524 additions and 15986 deletions
--- a/ttmath/ttmath.h
+++ b/ttmath/ttmath.h
@ -64,6 +64,7 @@ namespace ttmath
 	 */
 	/*
 	 *
 	 *  functions for rounding
@ -2609,6 +2610,7 @@ namespace ttmath
 	ValueType result;
 	ErrorCode err_tmp;
 		if( n.IsNan() )
 		{
 			if( err )
@ -2617,6 +2619,8 @@ namespace ttmath
 		return result; // NaN is set by default
 		}
 		TTMATH_USE_THREADSAFE_OBJ(cgamma.history);
 		if( cgamma.history.Get(n, result, err_tmp) )
 		{
 			if( err )
--- a/ttmath/ttmathbig.h
+++ b/ttmath/ttmathbig.h
@ -38,6 +38,8 @@
 #ifndef headerfilettmathbig
 #define headerfilettmathbig
 #include "ttmathconfig.h"
 /*!
 	\file ttmathbig.h
    \brief A Class for representing floating point numbers
@ -47,6 +49,10 @@
 #include <iostream>
 #if defined(_MSC_VER)
 	#pragma warning(disable:4127) // conditional expression is constant
 #endif
 namespace ttmath
 {
@ -81,7 +87,7 @@ public:
 Int<exp>	exponent;
 UInt<man>	mantissa;
-unsigned char info;
+tt_char		info;
 /*!
@ -420,7 +426,7 @@ public:
 		// (TTMATH_BUILTIN_VARIABLES_SIZE on 32bit platform should have the value 256,
 		// and on 64bit platform value 128 (256/2=128))
-		mantissa.SetFromTable(temp_table, sizeof(temp_table) / sizeof(int));
+		mantissa.SetFromTable(temp_table, sizeof(temp_table) / sizeof(unsigned int));
 		exponent = -sint(man)*sint(TTMATH_BITS_PER_UINT);
 		info = 0;
 	}
@ -1028,7 +1034,7 @@ public:
 	UInt<man*2> man1;
 	UInt<man*2> man2;
-	uint i,c = 0;
+	uint i,c;
 		if( IsNan() || ss2.IsNan() || ss2.IsZero() )
 			return CheckCarry(1);
@ -1049,9 +1055,7 @@ public:
 		i = man1.CompensationToLeft();
-		if( i )
+		c  = exponent.Sub(i);
 			c += exponent.Sub(i);
 		c += exponent.Sub(ss2.exponent);
 		for(i=0 ; i<man ; ++i)
@ -1073,7 +1077,7 @@ public:
 		e.g.
 		 12.6 mod  3 =  0.6   because 12.6 = 3*4 + 0.6
-		-12.6 mod  3 = -0.6   bacause -12.6 = 3*(-4) + (-0.6)
+		-12.6 mod  3 = -0.6
 		 12.6 mod -3 =  0.6
 		-12.6 mod -3 = -0.6
@ -1107,7 +1111,6 @@ public:
 	/*!
 		power this = this ^ pow
 		(pow without a sign)
@ -1766,7 +1769,7 @@ public:
 		// MS Visual Express 2005 reports a warning (in the lines with 'uint man_diff = ...'):
 		// warning C4307: '*' : integral constant overflow
 		// but we're using 'if( man > another_man )' and 'if( man < another_man )' and there'll be no such situation here
-		#ifdef _MSC_VER
+		#ifndef __GNUC__
 		#pragma warning( disable: 4307 )
 		#endif
@ -1782,7 +1785,7 @@ public:
 			c += exponent.AddInt(man_diff, 0);
 		}
-		#ifdef _MSC_VER
+		#ifndef __GNUC__
 		#pragma warning( default: 4307 )
 		#endif
@ -2007,7 +2010,7 @@ public:
 		// error but I leave it at the moment as is
 		TTMATH_ASSERT( sizeof(double) == 8 )
-		// I am not sure what will be on a platform which has 
+		// I am not sure what will be on a plaltform which has 
 		// a different endianness... but we use this library only
 		// on x86 and amd (intel) 64 bits (as there's a lot of assembler code)
 		union 
@ -2172,7 +2175,7 @@ public:
 				// then V=(-1)**S * 2 ** (-1022) * (0.F)
 				// These are "unnormalized" values.
-				FromDouble_SetExpAndMan(bool(temp.u & 0x8000000000000000ul),
+				FromDouble_SetExpAndMan((temp.u & 0x8000000000000000ul) != 0,
 										e - 1022 - man*TTMATH_BITS_PER_UINT + 1, 0, m);
 				Standardizing();
 			}
@ -2667,6 +2670,18 @@ public:
 		operator=(value);
 	}
 	class LogHistory
 		{
 		public:
 			Big<exp,man>	val[15];
 							LogHistory()
 								{
 								for (int i = 0; i < 15; ++i)
 									val[i].SetZero();
 								}
 		TTMATH_IMPLEMENT_THREADSAFE_OBJ
 		};
 	/*!
 		a method for converting the value into a string with a base equal 'base'
@ -2961,11 +2976,12 @@ private:
 		// (LnSurrounding1() will return one immediately)
 		uint c = Ln(x);
 		// warning! this 'static' is not thread safe
 		static Big<exp,man> log_history[15] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
 		uint index = base - 2;
-		if( log_history[index].IsZero() )
+		static LogHistory	log_history;
 		TTMATH_USE_THREADSAFE_OBJ(log_history);
 		if( log_history.val[index].IsZero() )
 		{	
 			// we don't have 'base' in 'log_history' then we calculate it now
@ -2983,14 +2999,14 @@ private:
 			// the next time we'll get the 'Ln(base)' from the history,
 			// this 'log_history' can have (16-2+1) items max
-			log_history[index] = temp;
+			log_history.val[index] = temp;
 			c += Div(temp);
 		}
 		else
 		{
 			// we've calculated the 'Ln(base)' beforehand and we're getting it now
-			c += Div( log_history[index] );
+			c += Div( log_history.val[index] );
 		}
 	return (c==0)? 0 : 1;
@ -3848,6 +3864,72 @@ public:
 	return false;
 	}
 	bool AboutEqual(const Big<exp,man> & ss2, int nBitsToIgnore = 4) const
 	{
 		// we should check the mantissas beforehand because sometimes we can have
 		// a mantissa set to zero but in the exponent something another value
 		// (maybe we've forgotten about calling CorrectZero() ?)
 		if( mantissa.IsZero())
 			{
 			if (ss2.mantissa.IsZero())
 				return true;
 			return(ss2.AboutEqual(*this,nBitsToIgnore));
 			}
 		if (ss2.mantissa.IsZero())
 			{
 			return(this->exponent <= uint(2*(-sint(man*TTMATH_BITS_PER_UINT))+nBitsToIgnore));
 			}
 		// exponents may not differ much!
 		ttmath::Int<exp>	expdiff(this->exponent - ss2.exponent);
 		// they may differ one if for example mantissa1=0x80000000, mantissa2=0xffffffff
 		if (ttmath::Abs(expdiff) > 1)
 			return(false);		
 		// calculate the 'difference' mantissa		
 		ttmath::UInt<man>	man1(this->mantissa);
 		ttmath::UInt<man>	man2(ss2.mantissa);
 		ttmath::UInt<man>	mandiff;
 		switch (expdiff.ToInt())
 			{
 			case +1:
 				man2.Rcr(1,0);
 				mandiff = man1;
 				mandiff.Sub(man2);
 				break;
 			case -1:
 				man1.Rcr(1,0);
 				mandiff = man2;
 				mandiff.Sub(man1);
 				break;
 			default:
 				if (man2 > man1)
 					{
 					mandiff = man2;
 					mandiff.Sub(man1);
 					}
 				  else
 					{
 					mandiff = man1;
 					mandiff.Sub(man2);
 					}
 				break;
 			}
 		// faster to mask the bits!
 		ASSERT(nBitsToIgnore < TTMATH_BITS_PER_UINT);
 		for (int n = man-1; n > 0; --n)
 			{
 			if (mandiff.table[n] != 0)
 				return(false);
 			}
 		uint	nMask = ~((1 << nBitsToIgnore) - 1);
 		return((mandiff.table[0] & nMask) == 0);
 	}
 	bool operator<(const Big<exp,man> & ss2) const
 	{
@ -3879,8 +3961,6 @@ public:
 	}
 	bool operator>(const Big<exp,man> & ss2) const
 	{
 		if( IsSign() && !ss2.IsSign() )
@ -4253,6 +4333,9 @@ public:
 };
 #if defined(_MSC_VER)
 	#pragma warning(default:4127) // conditional expression is constant
 #endif
 } // namespace
--- a/ttmath/ttmathconfig.h
+++ b/ttmath/ttmathconfig.h
@ -0,0 +1,110 @@
 /*
 * This file is a part of TTMath Bignum Library
 * and is distributed under the PNG licence.
 * Author: Christian Kaiser <chk@online.de>
 */
 /* 
 	Copyright (c) 2009 Christian Kaiser
 	This software is provided 'as-is', without any express or implied
 	warranty. In no event will the authors be held liable for any damages
 	arising from the use of this software.
 	Permission is granted to anyone to use this software for any purpose,
 	including commercial applications, and to alter it and redistribute it
 	freely, subject to the following restrictions:
 		1. The origin of this software must not be misrepresented; you must not
 		claim that you wrote the original software. If you use this software
 		in a product, an acknowledgment in the product documentation would be
 		appreciated but is not required.
 		2. Altered source versions must be plainly marked as such, and must not be
 		misrepresented as being the original software.
 		3. This notice may not be removed or altered from any source
 		distribution.
 */
 #ifndef headerfilettmathconfig
 #define headerfilettmathconfig
 #pragma once
 #include <sstream>
 namespace ttmath
 {
 #if defined(WIN32)
 	#include <windows.h>
 	#if defined(_MT)
 		class 	clsCrit
 			{
 			private:
 				mutable CRITICAL_SECTION 		_Crit;
 												clsCrit(const clsCrit&) // inhibit copy (easy mistake to do; use clsCritObj instead!!!)
 													{
 													}
 				clsCrit&						operator=(const clsCrit& rhs); // inhibit assignment
 			public:
 												clsCrit(void)
 													{
 													InitializeCriticalSection(&_Crit);
 													}
 				virtual							~clsCrit(void)
 													{
 													DeleteCriticalSection(&_Crit);
 													}
 				void							Enter(void) const
 													{
 													EnterCriticalSection(&_Crit);
 													}
 				void							Leave(void) const
 													{
 													LeaveCriticalSection(&_Crit);
 													}
 			};
 		class 	clsCritObj
 			{
 			private:
 				const clsCrit&					_Crit;
 				clsCritObj&						operator=(const clsCritObj& rhs); // not applicable
 			public:
 												clsCritObj(const clsCrit& Sync)
 													: _Crit(Sync)
 													{
 													_Crit.Enter();
 													}
 												~clsCritObj(void)
 													{
 													_Crit.Leave();
 													}
 			};
 		#define TTMATH_IMPLEMENT_THREADSAFE_OBJ					\
 			private:											\
 				clsCrit CritSect;								\
 			public:												\
 				operator clsCrit&()								\
 				{												\
 					return(CritSect);							\
 				}
 		#define TTMATH_USE_THREADSAFE_OBJ(c)	clsCritObj	lock(c)
 	#endif
 #else // defined(WIN32) 
 	// not Windows world: no threading synchronization for now
 #endif
 #if !defined(TTMATH_IMPLEMENT_THREADSAFE_OBJ)
 	// if we don't know about serialization, make it a no-op
 	#define TTMATH_IMPLEMENT_THREADSAFE_OBJ		/* */
 	#define TTMATH_USE_THREADSAFE_OBJ(c)		/* */
 #endif
 } // namespace
 #endif // headerfilettmathconfig
--- a/ttmath/ttmathint.h
+++ b/ttmath/ttmathint.h
@ -47,6 +47,10 @@
 #include "ttmathuint.h"
 #if defined(_MSC_VER)
 	#pragma warning(disable:4127) // conditional expression is constant
 #endif
 namespace ttmath
 {
@ -641,8 +645,14 @@ public:
 		// there can be a carry here when the size of this value is equal one word
 		// and the 'value' has the highest bit set
 		#if defined(_MSC_VER)
 			#pragma warning(disable:4127) // conditional expression is constant
 		#endif
 		if( value_size==1 && (value & TTMATH_UINT_HIGHEST_BIT)!=0 )
 			return 1;
 		#if defined(_MSC_VER)
 			#pragma warning(default:4127) // conditional expression is constant
 		#endif
 	return 0;
 	}
@ -1327,4 +1337,9 @@ public:
 } // namespace
 #if defined(_MSC_VER)
 	#pragma warning(default:4127) // conditional expression is constant
 #endif
 #endif
--- a/ttmath/ttmathtypes.h
+++ b/ttmath/ttmathtypes.h
@ -120,6 +120,7 @@ namespace ttmath
 	typedef unsigned int uint;
 	typedef signed   int sint;
 	/*!
 		this type is twice bigger than uint
 		(64bit on a 32bit platforms)
@ -128,43 +129,39 @@ namespace ttmath
 		but it is defined in C99 and in upcoming C++0x /3.9.1 (2)/ and many compilers support it
 		this type is used in UInt::MulTwoWords and UInt::DivTwoWords when macro TTMATH_NOASM is defined
 		but only on a 32bit platform
 	*/
 	#ifdef TTMATH_NOASM
 	typedef unsigned long long int ulint;
 	#endif
 	/*!
 		how many bits there are in the uint type
 	*/
 	#define TTMATH_BITS_PER_UINT 32u
 	/*!
 		the mask for the highest bit in the unsigned 32bit word (2^31)
 	*/
-	#define TTMATH_UINT_HIGHEST_BIT 2147483648u
+	const uint TTMATH_UINT_HIGHEST_BIT = 0x80000000ul;
 	/*!
 		the max value of the unsigned 32bit word (2^32 - 1)
 		(all bits equal one)
 	*/
-	#define TTMATH_UINT_MAX_VALUE 4294967295u
+	const uint TTMATH_UINT_MAX_VALUE = 0xfffffffful;
 	/*!
 		the number of words (32bit words on 32bit platform)
 		which are kept in built-in variables for a Big<> type
 		(these variables are defined in ttmathbig.h)
 	*/
-	#define TTMATH_BUILTIN_VARIABLES_SIZE 256u
+	const uint TTMATH_BUILTIN_VARIABLES_SIZE  = 256u;
 #else
 	/*!
 		on 64bit platforms one word (uint, sint) will be equal 64bits
 	*/
-	typedef unsigned long uint;
+	#if defined(_MSC_VER)
-	typedef signed   long sint;
+		typedef unsigned __int64 uint;
-
+		typedef signed __int64 sint;
 	#else
 		typedef unsigned long long uint;
 		typedef signed long long sint;
 	#endif
 	/*!
 		on 64bit platform we do not define ulint
 		sizeof(long long) is 8 (64bit) but we need 128bit
@ -174,30 +171,28 @@ namespace ttmath
 	*/
 	//typedef unsigned long long int ulint;
 	/*!
 		how many bits there are in the uint type
 	*/
 	#define TTMATH_BITS_PER_UINT 64ul
 	/*!
 		the mask for the highest bit in the unsigned 64bit word (2^63)
 	*/
-	#define TTMATH_UINT_HIGHEST_BIT 9223372036854775808ul
+	const uint TTMATH_UINT_HIGHEST_BIT = 0x8000000000000000ul;
 	/*!
 		the max value of the unsigned 64bit word (2^64 - 1)
 		(all bits equal one)
 	*/
-	#define TTMATH_UINT_MAX_VALUE 18446744073709551615ul
+	const uint TTMATH_UINT_MAX_VALUE = 0xfffffffffffffffful;
 	/*!
 		the number of words (64bit words on 64bit platforms)
 		which are kept in built-in variables for a Big<> type
 		(these variables are defined in ttmathbig.h)
 	*/
-	#define TTMATH_BUILTIN_VARIABLES_SIZE 128ul
+	const uint TTMATH_BUILTIN_VARIABLES_SIZE = 128ul;
 #endif
 	const uint TTMATH_BITS_PER_UINT = (sizeof(uint)*8);
 }
@ -279,7 +274,6 @@ namespace ttmath
 #endif
 /*!
 	this is a special value used when calculating the Gamma(x) function
 	if x is greater than this value then the Gamma(x) will be calculated using
@ -476,6 +470,9 @@ namespace ttmath
 			#define TTMATH_ASSERT(expression) \
 				if( !(expression) ) throw ttmath::RuntimeError(TTMATH_FILE, __LINE__);
 			#define TTMATH_VERIFY(expression) \
 				if( !(expression) ) throw ttmath::RuntimeError(TTMATH_TEXT(__FILE__), __LINE__);
 		#else
 			#define TTMATH_REFERENCE_ASSERT(expression) \
@ -483,14 +480,20 @@ namespace ttmath
 			#define TTMATH_ASSERT(expression) \
 				if( !(expression) ) throw RuntimeError();
 			#define TTMATH_VERIFY(expression) \
 				if( !(expression) ) throw RuntimeError();
 		#endif
 	#else
 		#define TTMATH_REFERENCE_ASSERT(expression)
 		#define TTMATH_ASSERT(expression)
 		#define TTMATH_VERIFY(expression)	(void)(expression);
 	#endif
-
+	#if !defined(LOG_PRINTF)
 		#define LOG_PRINTF printf
 	#endif
 	#ifdef TTMATH_DEBUG_LOG
--- a/ttmath/ttmathuint.h
+++ b/ttmath/ttmathuint.h
@ -52,7 +52,9 @@
 #include "ttmathtypes.h"
-
+#if defined(_MSC_VER)
 	#pragma warning(disable:4127) // conditional expression is constant
 #endif
 /*!
    \brief a namespace for the TTMath library
@ -1969,7 +1971,7 @@ private:
 		for(uint i = j+1 ; i<value_size ; ++i)
 			q.table[i] = 0;
-		while( true )
+		for (;;)
 		{
 			u1 = table[j+n-1];
 			u0 = table[j+n-2];
@ -2286,7 +2288,7 @@ public:
 	*/
 	bool IsTheLowestBitSet() const
 	{
-		return (*table & 1) != 0;
+		return (table[0] & 1) != 0;
 	}
@ -2370,7 +2372,7 @@ public:
 	/*!
-		this method converts a digit into a char
+		this method converts a digit into a tt_char
 		digit should be from <0,F>
 		(we don't have to get a base)
@ -2380,12 +2382,12 @@ public:
 			10 -> A
 			15 -> F
 	*/
-	static uint DigitToChar(uint digit)
+	static tt_char DigitToChar(uint digit)
 	{
 		if( digit < 10 )
-			return digit + '0';
+			return (tt_char)(digit + '0');
-	return digit - 10 + 'A';
+	return((tt_char)(digit - 10 + 'A'));
 	}
@ -3251,7 +3253,6 @@ public:
 			ttmathuint_noasm.h
 	*/
 #ifdef TTMATH_NOASM
 	static uint AddTwoWords(uint a, uint b, uint carry, uint * result);
 	static uint SubTwoWords(uint a, uint b, uint carry, uint * result);
@ -3276,8 +3277,6 @@ public:
 	static void MultiplySubtract(uint_ & u_, unsigned int & u3, unsigned int & q, uint_ v_);
 #endif // TTMATH_PLATFORM64
 #endif // TTMATH_NOASM
 private:
 	uint Rcl2_one(uint c);
@ -3319,6 +3318,10 @@ public:
 } //namespace
 #if defined(_MSC_VER)
 	#pragma warning(default:4127) // conditional expression is constant
 #endif
 #include "ttmathuint_x86.h"
 #include "ttmathuint_x86_64.h"
--- a/ttmath/ttmathuint_noasm.h
+++ b/ttmath/ttmathuint_noasm.h
@ -41,6 +41,8 @@
 #ifdef TTMATH_NOASM
 #pragma message("TTMATH_NOASM")
 /*!
 	\file ttmathuint_noasm.h
    \brief template class UInt<uint> with methods without any assembler code
--- a/ttmath/ttmathuint_x86.h
+++ b/ttmath/ttmathuint_x86.h
@ -36,14 +36,13 @@
 */
 #ifndef headerfilettmathuint_x86
 #define headerfilettmathuint_x86
 #ifndef TTMATH_NOASM
 #ifdef TTMATH_PLATFORM32
 #pragma message("TTMATH_ASM32")
 /*!
 	\file ttmathuint_x86.h
@ -66,7 +65,6 @@ namespace ttmath
 	*
 	*/
 	/*!
 		adding ss2 to the this and adding carry if it's defined
 		(this = this + ss2 + c)
@ -85,46 +83,31 @@ namespace ttmath
 		// this algorithm doesn't require it
 		#ifndef __GNUC__
 			//	this part might be compiled with for example visual c
 			__asm
 			{
-				push eax
+				xor eax,eax  // eax=0
-				push ebx
+				xor edx,edx  // edx=0
 				push ecx
 				push edx
 				push esi
 				mov ecx,[b]
 				mov ebx,[p1]
 				mov esi,[p2]
-				xor edx,edx          // edx=0
+				sub eax,[c]  // CF=c
 				mov eax,[c]
 				neg eax              // CF=1 if rax!=0 , CF=0 if rax==0
 				ALIGN 16
 			ttmath_loop:				
-				mov eax,[esi+edx*4]
+				mov eax,[esi+edx*4+0]
-				adc [ebx+edx*4],eax
+				adc [ebx+edx*4+0],eax
-				inc edx
+				lea edx, [edx+1] // inc edx, but faster (no flags dependencies)
 				dec ecx
 			jnz ttmath_loop
-				adc ecx, ecx
+				setc	al
-				mov [c], ecx
+				movzx	eax, al
-
+				mov		[c], eax
 				pop esi
 				pop edx
 				pop ecx
 				pop ebx
 				pop eax
 			}
 		#endif		
@ -188,14 +171,8 @@ namespace ttmath
 		TTMATH_ASSERT( index < value_size )
 		#ifndef __GNUC__
 			__asm
 			{
 				push eax
 				push ebx
 				push ecx
 				push edx
 				mov ecx, [b]
 				sub ecx, [index]				
@ -204,26 +181,21 @@ namespace ttmath
 				mov eax, [value]
 				ALIGN 16
 			ttmath_loop:
 				add [ebx+edx*4], eax
 			jnc ttmath_end
 				mov eax, 1
-				inc edx
+				lea edx, [edx+1] // inc edx, but faster (no flags dependencies)
 				dec ecx
 			jnz ttmath_loop
 			ttmath_end:
 				setc	al
-				movzx edx, al
+				movzx	eax, al
-				mov [c], edx
+				mov		[c], eax
 				pop edx
 				pop ecx
 				pop ebx
 				pop eax
 			}
 		#endif		
@ -303,52 +275,40 @@ namespace ttmath
 		#ifndef __GNUC__
 			__asm
 			{
 				push eax
 				push ebx
 				push ecx
 				push edx
 				mov ecx, [b]
 				sub ecx, [index]				
 				mov ebx, [p1]
 				mov edx, [index]
-
+				mov ebx, [p1]
 				mov eax, [x1]
 				sub ecx, edx	// max uints to add (value_size - index)
 				add [ebx+edx*4], eax
-				inc edx
+				lea ecx, [ecx-1]
 				dec ecx
 				mov eax, [x2]
 				ALIGN 16
 			ttmath_loop:
-				adc [ebx+edx*4], eax
+				adc [ebx+edx*4+4], eax
 			jnc ttmath_end
 				mov eax, 0
-				inc edx
+				lea edx, [edx+1] // inc edx, but faster (no flags dependencies)
 				dec ecx
 			jnz ttmath_loop
 			ttmath_end:
 				setc	al
-				movzx edx, al
+				movzx	eax, al
-				mov [c], edx
+				mov		[c], eax
 				pop edx
 				pop ecx
 				pop ebx
 				pop eax
 			}
 		#endif		
 		#ifdef __GNUC__
 		uint dummy, dummy2;
 			__asm__ __volatile__(
 				"push %%ecx						\n"
 				"push %%edx						\n"
 				"subl %%edx, %%ecx 				\n"
 				"addl %%esi, (%%ebx,%%edx,4) 	\n"
@ -414,21 +374,19 @@ namespace ttmath
 			//	this part might be compiled with for example visual c
 			__asm
 			{
 				pushad
 				mov ecx, [ss2_size]
 				xor edx, edx               // edx = 0, cf = 0
 				mov esi, [ss1]
 				mov ebx, [ss2]
 				mov edi, [result]
-
+				ALIGN 16
 			ttmath_loop:
 				mov eax, [esi+edx*4]
 				adc eax, [ebx+edx*4]
 				mov [edi+edx*4], eax
-				inc edx
+				lea edx, [edx+1] // inc edx, but faster (no flags dependencies)
 				dec ecx
 			jnz ttmath_loop
@ -447,7 +405,7 @@ namespace ttmath
 				adc eax, ebx 
 				mov [edi+edx*4], eax
-				inc edx
+				lea edx, [edx+1] // inc edx, but faster (no flags dependencies)
 				dec ecx
 			jnz ttmath_loop2
@ -455,8 +413,6 @@ namespace ttmath
 			ttmath_end:
 				mov [c], ecx
 				popad
 			}
 		#endif		
@ -529,40 +485,30 @@ namespace ttmath
 		// this algorithm doesn't require it
 		#ifndef __GNUC__
 			__asm
 			{
 				push eax
 				push ebx
 				push ecx
 				push edx
 				push esi
 				mov ecx,[b]
 				mov ebx,[p1]
 				mov esi,[p2]
-				xor edx,edx          // edx=0
+				xor eax, eax
-				mov eax,[c]
+				mov edx, eax
 				neg eax              // CF=1 if rax!=0 , CF=0 if rax==0
 				sub eax, [c]
 				ALIGN 16
 			ttmath_loop:
 				mov eax, [esi+edx*4]
 				sbb [ebx+edx*4], eax
-				inc edx
+				lea edx, [edx+1] // inc edx, but faster (no flags dependencies)
 				dec ecx
 			jnz ttmath_loop
-				adc ecx, ecx
+				setc	al
-				mov [c], ecx
+				movzx	eax, al
-
+				mov		[c], eax
 				pop esi
 				pop edx
 				pop ecx
 				pop ebx
 				pop eax
 			}
 		#endif
@ -629,14 +575,8 @@ namespace ttmath
 		TTMATH_ASSERT( index < value_size )
 		#ifndef __GNUC__
 			__asm
 			{
 				push eax
 				push ebx
 				push ecx
 				push edx
 				mov ecx, [b]
 				sub ecx, [index]				
@ -645,26 +585,21 @@ namespace ttmath
 				mov eax, [value]
 				ALIGN 16
 			ttmath_loop:
 				sub [ebx+edx*4], eax
 			jnc ttmath_end
 				mov eax, 1
-				inc edx
+				lea edx, [edx+1] // inc edx, but faster (no flags dependencies)
 				dec ecx
 			jnz ttmath_loop
 			ttmath_end:
 				setc	al
-				movzx edx, al
+				movzx	eax, al
-				mov [c], edx
+				mov		[c], eax
 				pop edx
 				pop ecx
 				pop ebx
 				pop eax
 			}
 		#endif		
@ -740,7 +675,6 @@ namespace ttmath
 			*/
 			__asm
 			{
 				pushad
 				mov ecx, [ss2_size]
 				xor edx, edx               // edx = 0, cf = 0
@ -754,7 +688,7 @@ namespace ttmath
 				sbb eax, [ebx+edx*4]
 				mov [edi+edx*4], eax
-				inc edx
+				lea edx, [edx+1]
 				dec ecx
 			jnz ttmath_loop
@ -773,7 +707,7 @@ namespace ttmath
 				sbb eax, ebx 
 				mov [edi+edx*4], eax
-				inc edx
+				lea edx, [edx+1]
 				dec ecx
 			jnz ttmath_loop2
@ -781,8 +715,6 @@ namespace ttmath
 			ttmath_end:
 				mov [c], ecx
 				popad
 			}
 		#endif		
@ -858,29 +790,25 @@ namespace ttmath
 		#ifndef __GNUC__
 			__asm
 			{
 				push ebx
 				push ecx
 				push edx
 				mov ebx, [p1]
 				xor edx, edx
-				mov ecx, [c]
+				mov ecx, edx
-				neg ecx
+				sub ecx, [c]
 				mov ecx, [b]
 				ALIGN 16
 			ttmath_loop:
 				rcl dword ptr [ebx+edx*4], 1
-				inc edx
+				lea edx, [edx+1] // inc edx, but faster (no flags dependencies)
 				dec ecx
 			jnz ttmath_loop
-				adc ecx, ecx
+				setc	al
-				mov [c], ecx
+				movzx	eax, al
-				
+				mov		[c], eax
 				pop edx
 				pop ecx
 				pop ebx
 			}
 		#endif
@ -936,25 +864,22 @@ namespace ttmath
 		#ifndef __GNUC__
 			__asm
 			{
-				push ebx
+				xor ecx, ecx
-				push ecx
+				sub ecx, [c]
 				mov ebx, [p1]
 				mov ecx, [c]
 				neg ecx
 				mov ecx, [b]
 				ALIGN 16
 			ttmath_loop:
 				rcr dword ptr [ebx+ecx*4-4], 1
 				dec ecx
 			jnz ttmath_loop
-				adc ecx, ecx
+				setc	al
-				mov [c], ecx
+				movzx	eax, al
-
+				mov		[c], eax
 				pop ecx
 				pop ebx
 			}
 		#endif
@ -987,13 +912,6 @@ namespace ttmath
 #ifdef _MSC_VER
 #pragma warning (disable : 4731)
 //warning C4731: frame pointer register 'ebp' modified by inline assembly code
 #endif
 	/*!
 		this method moves all bits into the left hand side
 		return value <- this <- c
@ -1011,62 +929,47 @@ namespace ttmath
 	{
 	TTMATH_ASSERT( bits>0 && bits<TTMATH_BITS_PER_UINT )
-	uint b = value_size;
+	register sint b = value_size;
-	uint * p1 = table;
+	register uint * p1 = table;
 	register uint mask;
 		#ifndef __GNUC__
 			__asm
 			{
 				push eax
 				push ebx
 				push ecx
 				push edx
 				push esi
 				push edi
 				push ebp
 				mov edi, [b]
 				mov ecx, 32
 				sub ecx, [bits]
 				mov edx, -1
 				shr edx, cl
 				mov [mask], edx
 				mov ecx, [bits]
 				mov ebx, [p1]
 				mov eax, [c]
 				mov ebp, edx         // ebp = mask (modified ebp - don't read/write to variables)
 				xor edx, edx   // edx = 0
-				mov esi, edx
+				mov esi, edx   // old value = 0 
 				or eax, eax
 				cmovnz esi, ebp      // if(c) esi=mask else esi=0
 				mov eax, [c]
 				or eax, eax
 				cmovnz esi, [mask] // if c then old value = mask
 				ALIGN 16
 			ttmath_loop:
 				rol dword ptr [ebx+edx*4], cl
 				mov eax, [ebx+edx*4]
-				and eax, ebp
+				and eax, [mask] 
 				xor [ebx+edx*4], eax // clearing bits
 				or [ebx+edx*4], esi  // saving old value
 				mov esi, eax
-				inc edx
+				lea edx, [edx+1] // inc edx, but faster (no flags dependencies)
 				dec edi
 			jnz ttmath_loop
 				pop ebp              // restoring ebp
 				and eax, 1
-				mov [c], eax
+				mov	dword ptr [c], eax
 				pop edi
 				pop esi
 				pop edx
 				pop ecx
 				pop ebx
 				pop eax
 			}
 		#endif
@ -1141,43 +1044,37 @@ namespace ttmath
 	uint b = value_size;
 	uint * p1 = table;
 	uint mask;
 		#ifndef __GNUC__
 			__asm
 			{
 				push eax
 				push ebx
 				push ecx
 				push edx
 				push esi
 				push edi
 				push ebp
 				mov edi, [b]
 				mov ecx, 32
 				sub ecx, [bits]
 				mov edx, -1
 				shl edx, cl
 				mov [mask], edx
 				mov ecx, [bits]
 				mov ebx, [p1]
 				mov eax, [c]
 				mov ebp, edx         // ebp = mask (modified ebp - don't read/write to variables)
 				xor edx, edx   // edx = 0
-				mov esi, edx
+				mov esi, edx   // old value = 0 
 				add edx, edi   
-				dec edx              // edx is pointing at the end of the table (on last word)
+				dec edx        // edx - is pointing at the last word
 				or eax, eax
 				cmovnz esi, ebp      // if(c) esi=mask else esi=0
 				mov eax, [c]
 				or eax, eax
 				cmovnz esi, [mask] // if c then old value = mask
 				ALIGN 16
 			ttmath_loop:
 				ror dword ptr [ebx+edx*4], cl
 				mov eax, [ebx+edx*4]
-				and eax, ebp 
+				and eax, [mask] 
 				xor [ebx+edx*4], eax // clearing bits
 				or [ebx+edx*4], esi  // saving old value
 				mov esi, eax
@ -1186,18 +1083,10 @@ namespace ttmath
 				dec edi
 			jnz ttmath_loop
-				pop ebp              // restoring ebp
+				rol eax, 1    // bit 31 will be bit 0
 				rol eax, 1           // 31bit will be first
 				and eax, 1  
 				mov [c], eax
-				pop edi
+				mov	dword ptr [c], eax
 				pop esi
 				pop edx
 				pop ecx
 				pop ebx
 				pop eax
 			}
 		#endif
@ -1254,10 +1143,6 @@ namespace ttmath
 	}
 #ifdef _MSC_VER
 #pragma warning (default : 4731)
 #endif
 	/*
 		this method returns the number of the highest set bit in one 32-bit word
@ -1271,16 +1156,11 @@ namespace ttmath
 		#ifndef __GNUC__
 			__asm
 			{
 				push eax
 				push edx
 				mov edx,-1
 				bsr eax,[x]
 				cmovz eax,edx
 				mov [result], eax
-				pop edx
+				mov [result], eax
 				pop eax
 			}
 		#endif
@ -1328,9 +1208,6 @@ namespace ttmath
 		#ifndef __GNUC__
 			__asm
 			{
 			push ebx
 			push eax
 				mov eax, [v]
 				mov ebx, [bit]
 				bts eax, ebx
@ -1339,9 +1216,6 @@ namespace ttmath
 				setc bl
 				movzx ebx, bl
 				mov [old_bit], ebx
 			pop eax
 			pop ebx
 			}
 		#endif
@ -1360,7 +1234,6 @@ namespace ttmath
 		#endif
 		value = v;
 		return old_bit;
 	}
@ -1392,17 +1265,11 @@ namespace ttmath
 			__asm
 			{
 			push eax
 			push edx
 				mov eax, [a]
 				mul dword ptr [b]
 				mov [result2_], edx
 				mov [result1_], eax
 			pop edx
 			pop eax
 			}
 		#endif
@ -1465,18 +1332,12 @@ namespace ttmath
 		#ifndef __GNUC__
 			__asm
 			{
 				push eax
 				push edx
 				mov edx, [a]
 				mov eax, [b]
 				div dword ptr [c]
 				mov [r_], eax
 				mov [rest_], edx
 				pop edx
 				pop eax
 			}
 		#endif
--- a/ttmath/ttmathuint_x86_64.h
+++ b/ttmath/ttmathuint_x86_64.h
@ -39,11 +39,10 @@
 #ifndef headerfilettmathuint_x86_64
 #define headerfilettmathuint_x86_64
 #ifndef TTMATH_NOASM
 #ifdef TTMATH_PLATFORM64
-
+#pragma message("TTMATH_ASM64")
 /*!
 	\file ttmathuint_x86_64.h
    \brief template class UInt<uint> with assembler code for 64bit x86_64 processors
@ -51,10 +50,31 @@
 	this file is included at the end of ttmathuint.h
 */
 #define WIN32_LEAN_AND_MEAN
 #include <windows.h>
 namespace ttmath
 {
 	#if defined(_M_X64)
 		#include <intrin.h>
 		extern "C"
 			{
 			uint	__fastcall	adc_x64(uint* p1, const uint* p2, uint nSize, uint c);
 			uint	__fastcall	addindexed_x64(uint* p1, uint nSize, uint nPos, uint nValue);
 			uint	__fastcall	addindexed2_x64(uint* p1, uint nSize, uint nPos, uint nValue1, uint nValue2);
 			uint	__fastcall	sbb_x64(uint* p1, const uint* p2, uint nSize, uint c);
 			uint	__fastcall	subindexed_x64(uint* p1, uint nSize, uint nPos, uint nValue);
 			uint	__fastcall	rcl_x64(uint* p1, uint nSize, uint nLowestBit);
 			uint	__fastcall	rcr_x64(uint* p1, uint nSize, uint nLowestBit);
 			uint	__fastcall	div_x64(uint* pnValHi, uint* pnValLo, uint nDiv);
 			uint	__fastcall	rcl2_x64(uint* p1, uint nSize, uint nBits, uint c);
 			uint	__fastcall	rcr2_x64(uint* p1, uint nSize, uint nBits, uint c);
 			};
 	#endif
 	/*!
 	*
 	*	basic mathematic functions
@ -83,12 +103,15 @@ namespace ttmath
 		// this algorithm doesn't require it
 		#ifndef __GNUC__
 			#if defined(_M_X64)
 				c = adc_x64(p1,p2,b,c);
 			#else
 				#error "another compiler than GCC is currently not supported in 64bit mode"
 			#endif
 		#endif
 		#ifdef __GNUC__
 			uint dummy, dummy2;
 			/*
 				this part should be compiled with gcc
 			*/
@ -150,8 +173,12 @@ namespace ttmath
 		TTMATH_ASSERT( index < value_size )
 		#ifndef __GNUC__
 			#if defined(_M_X64)
 				c = addindexed_x64(p1,b,index,value);
 			#else
 				#error "another compiler than GCC is currently not supported in 64bit mode"
 			#endif
 		#endif
 		#ifdef __GNUC__
 			uint dummy, dummy2;
@ -220,6 +247,30 @@ namespace ttmath
 	*/
 	template<uint value_size>
 	uint UInt<value_size>::AddTwoInts(uint x2, uint x1, uint index)
 	#if 0
 	{
 	uint i, c;
 		TTMATH_ASSERT( index < value_size )
 		printf("add %Id + %Id\n",x1,x2);
 		for(int i=index ; i<value_size ; ++i)
 			printf("%d: %Id\n",i,table[i]);
 		c = AddTwoWords(table[index],   x1, 0, &table[index]);
 		c = AddTwoWords(table[index+1], x2, c, &table[index+1]);
 		for(i=index+2 ; i<value_size && c ; ++i)
 			c = AddTwoWords(table[i], 0, c, &table[i]);
 		for(i=index ; i<value_size ; ++i)
 			printf("%d: %Id\n",i,table[i]);
 		printf(" -> %d\n",c);
 		TTMATH_LOG("UInt::AddTwoInts")
 	return c;
 	}
 	#else
 	{
 	uint b = value_size;
 	uint * p1 = table;
@ -228,8 +279,19 @@ namespace ttmath
 		TTMATH_ASSERT( index < value_size - 1 )
 		#ifndef __GNUC__
 			#if defined(_M_X64)
 				//printf("add %Id + %Id\n",x1,x2);
 				//for(int i=index ; i<value_size ; ++i)
 				//	printf("%d: %Id\n",i,table[i]);
 				//if (table[0] == 1265784741359897913) DebugBreak();
 				c = addindexed2_x64(p1,b,index,x1,x2);
 				//for(int i=index ; i<value_size ; ++i)
 				//	printf("%d: %Id\n",i,table[i]);
 				//printf(" -> %d\n",c);
 			#else
 				#error "another compiler than GCC is currently not supported in 64bit mode"
 			#endif
 		#endif
 		#ifdef __GNUC__
 			uint dummy, dummy2;
@ -265,89 +327,8 @@ namespace ttmath
 	return c;
 	}
 	/*!
 		this static method addes one vector to the other
 		'ss1' is larger in size or equal to 'ss2'
 		ss1 points to the first (larger) vector
 		ss2 points to the second vector
 		ss1_size - size of the ss1 (and size of the result too)
 		ss2_size - size of the ss2
 		result - is the result vector (which has size the same as ss1: ss1_size)
 		Example:  ss1_size is 5, ss2_size is 3
 		ss1:      ss2:   result (output):
 		  5        1         5+1
 		  4        3         4+3
 		  2        7         2+7
 		  6                  6
 		  9                  9
 	  of course the carry is propagated and will be returned from the last item
 	  (this method is used by the Karatsuba multiplication algorithm)
 	*/
 	template<uint value_size>
 	uint UInt<value_size>::AddVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result)
 	{
 		TTMATH_ASSERT( ss1_size >= ss2_size )
 		uint rest = ss1_size - ss2_size;
 		uint c;
 		#ifndef __GNUC__
 			#error "another compiler than GCC is currently not supported in 64bit mode"
 	#endif
 		#ifdef __GNUC__
 		uint dummy1, dummy2, dummy3;	
 			//	this part should be compiled with gcc
 			__asm__ __volatile__(
 				"mov %%rdx, %%r8					\n"
 				"xor %%rdx, %%rdx					\n"   // rdx = 0, cf = 0
 			"1:										\n"
 				"mov (%%rsi,%%rdx,8), %%rax			\n"
 				"adc (%%rbx,%%rdx,8), %%rax			\n"
 				"mov %%rax, (%%rdi,%%rdx,8)			\n"
 				"inc %%rdx							\n"
 				"dec %%rcx							\n"
 			"jnz 1b									\n"
 				"adc %%rcx, %%rcx					\n"   // rcx has the cf state
 				"or %%r8, %%r8						\n"
 				"jz 3f								\n"
 				"xor %%rbx, %%rbx					\n"   // ebx = 0
 				"neg %%rcx							\n"   // setting cf from rcx
 				"mov %%r8, %%rcx					\n"   // rcx=rest and is != 0
 			"2:										\n"
 				"mov (%%rsi, %%rdx, 8), %%rax		\n"
 				"adc %%rbx, %%rax 					\n"
 				"mov %%rax, (%%rdi, %%rdx, 8)		\n"
 				"inc %%rdx							\n"
 				"dec %%rcx							\n"
 			"jnz 2b									\n"
 				"adc %%rcx, %%rcx					\n"
 			"3:										\n"
 				: "=a" (dummy1), "=b" (dummy2), "=c" (c),       "=d" (dummy3)
 				:                "1" (ss2),     "2" (ss2_size), "3" (rest),   "S" (ss1),  "D" (result)
 				: "%r8", "cc", "memory" );
 		#endif
 		TTMATH_LOG("UInt::AddVector")
 	return c;
 	}
@ -369,13 +350,16 @@ namespace ttmath
 	uint * p1 = table;
 	const uint * p2 = ss2.table;
 		// we don't have to use TTMATH_REFERENCE_ASSERT here
 		// this algorithm doesn't require it
 		#ifndef __GNUC__
 			#if defined(_M_X64)
 				c = sbb_x64(p1,p2,b,c);
 			#else
 				#error "another compiler than GCC is currently not supported in 64bit mode"
 			#endif
 		#endif
 		#ifdef __GNUC__
 			uint dummy, dummy2;
@ -399,6 +383,7 @@ namespace ttmath
 				: "0" (b), "1" (c), "b" (p1), "S" (p2)
 				: "cc", "memory" );
 		#endif
 		TTMATH_LOG("UInt::Sub")
@ -432,15 +417,20 @@ namespace ttmath
 	uint b = value_size;
 	uint * p1 = table;
 	uint c;
 	uint dummy, dummy2;
 		TTMATH_ASSERT( index < value_size )
 		#ifndef __GNUC__
 			#if defined(_M_X64)
 				c = subindexed_x64(p1,b,index,value);
 			#else
 				#error "another compiler than GCC is currently not supported in 64bit mode"
 			#endif
 		#endif
 		#ifdef __GNUC__
 			uint dummy, dummy2;
 			__asm__ __volatile__(
 				"subq %%rdx, %%rcx 				\n"
@ -464,100 +454,12 @@ namespace ttmath
 		#endif
-		TTMATH_LOG("UInt::SubInt")
+		TTMATH_LOG("UInt64::SubInt")
 	return c;
 	}
 	/*!
 		this static method subtractes one vector from the other
 		'ss1' is larger in size or equal to 'ss2'
 		ss1 points to the first (larger) vector
 		ss2 points to the second vector
 		ss1_size - size of the ss1 (and size of the result too)
 		ss2_size - size of the ss2
 		result - is the result vector (which has size the same as ss1: ss1_size)
 		Example:  ss1_size is 5, ss2_size is 3
 		ss1:      ss2:   result (output):
 		  5        1         5-1
 		  4        3         4-3
 		  2        7         2-7
 		  6                  6-1  (the borrow from previous item)
 		  9                  9
 		               return (carry): 0
 	  of course the carry (borrow) is propagated and will be returned from the last item
 	  (this method is used by the Karatsuba multiplication algorithm)
 	*/
 	template<uint value_size>
 	uint UInt<value_size>::SubVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result)
 	{
 		TTMATH_ASSERT( ss1_size >= ss2_size )
 		uint rest = ss1_size - ss2_size;
 		uint c;
 		#ifndef __GNUC__
 			#error "another compiler than GCC is currently not supported in 64bit mode"
 		#endif
 		#ifdef __GNUC__
 		/*
 			the asm code is nearly the same as in AddVector
 			only two instructions 'adc' are changed to 'sbb'
 		*/
 		uint dummy1, dummy2, dummy3;
 			__asm__ __volatile__(
 				"mov %%rdx, %%r8					\n"
 				"xor %%rdx, %%rdx					\n"   // rdx = 0, cf = 0
 			"1:										\n"
 				"mov (%%rsi,%%rdx,8), %%rax			\n"
 				"sbb (%%rbx,%%rdx,8), %%rax			\n"
 				"mov %%rax, (%%rdi,%%rdx,8)			\n"
 				"inc %%rdx							\n"
 				"dec %%rcx							\n"
 			"jnz 1b									\n"
 				"adc %%rcx, %%rcx					\n"   // rcx has the cf state
 				"or %%r8, %%r8						\n"
 				"jz 3f								\n"
 				"xor %%rbx, %%rbx					\n"   // ebx = 0
 				"neg %%rcx							\n"   // setting cf from rcx
 				"mov %%r8, %%rcx					\n"   // rcx=rest and is != 0
 			"2:										\n"
 				"mov (%%rsi, %%rdx, 8), %%rax		\n"
 				"sbb %%rbx, %%rax 					\n"
 				"mov %%rax, (%%rdi, %%rdx, 8)		\n"
 				"inc %%rdx							\n"
 				"dec %%rcx							\n"
 			"jnz 2b									\n"
 				"adc %%rcx, %%rcx					\n"
 			"3:										\n"
 				: "=a" (dummy1), "=b" (dummy2), "=c" (c),       "=d" (dummy3)
 				:                "1" (ss2),     "2" (ss2_size), "3" (rest),   "S" (ss1),  "D" (result)
 				: "%r8", "cc", "memory" );
 		#endif
 		TTMATH_LOG("UInt::SubVector")
 	return c;
 	}
 	/*!
 		this method moves all bits into the left hand side
 		return value <- this <- c
@ -578,10 +480,13 @@ namespace ttmath
 	sint b = value_size;
 	uint * p1 = table;
 		#ifndef __GNUC__
 			#if defined(_M_X64)
 				c = rcl_x64(p1,b,c);
 			#else
 				#error "another compiler than GCC is currently not supported in 64bit mode"
 			#endif
 		#endif
 		#ifdef __GNUC__
 		uint dummy, dummy2;
@ -632,10 +537,13 @@ namespace ttmath
 	sint b = value_size;
 	uint * p1 = table;
 		#ifndef __GNUC__
 			#if defined(_M_X64)
 				c = rcr_x64(p1,b,c);
 			#else
 				#error "another compiler than GCC is currently not supported in 64bit mode"
 			#endif
 		#endif
 		#ifdef __GNUC__
 		uint dummy;
@ -687,10 +595,13 @@ namespace ttmath
 	uint b = value_size;
 	uint * p1 = table;
 		#ifndef __GNUC__
 			#if defined(_M_X64)
 				c = rcl2_x64(p1,b,bits,c);
 			#else
 				#error "another compiler than GCC is currently not supported in 64bit mode"
 			#endif
 		#endif
 		#ifdef __GNUC__
 		uint dummy, dummy2, dummy3;
@ -707,6 +618,7 @@ namespace ttmath
 			"xorq %%rdx, %%rdx				\n"
 			"movq %%rdx, %%rsi				\n"
 			"orq %%rax, %%rax				\n"
 			"cmovnz %%r8, %%rsi				\n"
@ -758,14 +670,19 @@ namespace ttmath
 	sint b = value_size;
 	uint * p1 = table;
 	uint dummy, dummy2, dummy3;
 		#ifndef __GNUC__
 			#if defined(_M_X64)
 				c = rcr2_x64(p1,b,bits,c);
 			#else
 				#error "another compiler than GCC is currently not supported in 64bit mode"
 			#endif
 		#endif
 		#ifdef __GNUC__
 			uint dummy, dummy2, dummy3;
 			__asm__  __volatile__(
 			"movq %%rcx, %%rsi				\n"
@ -780,6 +697,7 @@ namespace ttmath
 			"movq %%rdx, %%rsi				\n"
 			"addq %%rdi, %%rdx				\n"
 			"decq %%rdx						\n"
 			"orq %%rax, %%rax				\n"
 			"cmovnz %%R8, %%rsi				\n"
@ -820,12 +738,20 @@ namespace ttmath
 	template<uint value_size>
 	sint UInt<value_size>::FindLeadingBitInWord(uint x)
 	{
-	sint result;
+	register sint result;
 		#ifndef __GNUC__
 			#if defined(_MSC_VER)
 				unsigned long	nIndex(0);
 				if (_BitScanReverse64(&nIndex,x) == 0)
 					result = -1;
 				  else
 					result = nIndex;
 			#else
 				#error "another compiler than GCC is currently not supported in 64bit mode"
 			#endif
 		#endif
 		#ifdef __GNUC__
 		uint dummy;
@ -870,8 +796,16 @@ namespace ttmath
 		#ifndef __GNUC__
 			#if defined(_MSC_VER)
 				#if defined(TTMATH_PLATFORM64)
 					old_bit = _bittestandset64((__int64*)&value,bit) != 0;
 				  #else
 					old_bit = _bittestandset((long*)&value,bit) != 0;
 				#endif
 			#else
 				#error "another compiler than GCC is currently not supported in 64bit mode"
 			#endif
 		#endif
 		#ifdef __GNUC__
@ -925,8 +859,12 @@ namespace ttmath
 	uint result2_;
 		#ifndef __GNUC__
 			#if defined(_MSC_VER)
 				result1_ = _umul128(a,b,&result2_);
 			#else
 				#error "another compiler than GCC is currently not supported in 64bit mode"
 			#endif
 		#endif
 		#ifdef __GNUC__
@ -955,6 +893,7 @@ namespace ttmath
 	 *
 	*/
 	#ifndef __GNUC__
 	/*!
 		this method calculates 64bits word a:b / 32bits c (a higher, b lower word)
@ -982,8 +921,14 @@ namespace ttmath
 		TTMATH_ASSERT( c != 0 )
 		#ifndef __GNUC__
 			#if defined(_MSC_VER)
 				div_x64(&a,&b,c);
 				r_ = a;
 				rest_ = b;
 			#else
 				#error "another compiler than GCC is currently not supported in 64bit mode"
 			#endif
 		#endif
 		#ifdef __GNUC__
@ -1002,6 +947,132 @@ namespace ttmath
 		*rest = rest_;
 	}
 	template<uint value_size>
 	uint UInt<value_size>::AddTwoWords(uint a, uint b, uint carry, uint * result)
 	{
 	uint temp;
 		if( carry == 0 )
 		{
 			temp = a + b;
 			if( temp < a )
 				carry = 1;
 		}
 		else
 		{
 			carry = 1;
 			temp  = a + b + carry;
 			if( temp > a ) // !(temp<=a)
 				carry = 0;
 		}
 		*result = temp;
 	return carry;
 	}
 	template<uint value_size>
 	uint UInt<value_size>::SubTwoWords(uint a, uint b, uint carry, uint * result)
 	{
 		if( carry == 0 )
 		{
 			*result = a - b;
 			if( a < b )
 				carry = 1;
 		}
 		else
 		{
 			carry   = 1;
 			*result = a - b - carry;
 			if( a > b ) // !(a <= b )
 				carry = 0;
 		}
 	return carry;
 	}
 	/*!
 		this static method addes one vector to the other
 		'ss1' is larger in size or equal to 'ss2'
 		ss1 points to the first (larger) vector
 		ss2 points to the second vector
 		ss1_size - size of the ss1 (and size of the result too)
 		ss2_size - size of the ss2
 		result - is the result vector (which has size the same as ss1: ss1_size)
 		Example:  ss1_size is 5, ss2_size is 3
 		ss1:      ss2:   result (output):
 		  5        1         5+1
 		  4        3         4+3
 		  2        7         2+7
 		  6                  6
 		  9                  9
 	  of course the carry is propagated and will be returned from the last item
 	  (this method is used by the Karatsuba multiplication algorithm)
 	*/
 	template<uint value_size>
 	uint UInt<value_size>::AddVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result)
 	{
 	uint i, c = 0;
 		TTMATH_ASSERT( ss1_size >= ss2_size )
 		for(i=0 ; i<ss2_size ; ++i)
 			c = AddTwoWords(ss1[i], ss2[i], c, &result[i]);
 		for( ; i<ss1_size ; ++i)
 			c = AddTwoWords(ss1[i], 0, c, &result[i]);
 		TTMATH_LOG("UInt::AddVector")
 	return c;
 	}
 	/*!
 		this static method subtractes one vector from the other
 		'ss1' is larger in size or equal to 'ss2'
 		ss1 points to the first (larger) vector
 		ss2 points to the second vector
 		ss1_size - size of the ss1 (and size of the result too)
 		ss2_size - size of the ss2
 		result - is the result vector (which has size the same as ss1: ss1_size)
 		Example:  ss1_size is 5, ss2_size is 3
 		ss1:      ss2:   result (output):
 		  5        1         5-1
 		  4        3         4-3
 		  2        7         2-7
 		  6                  6-1  (the borrow from previous item)
 		  9                  9
 		                 return (carry): 0
 	  of course the carry (borrow) is propagated and will be returned from the last item
 	  (this method is used by the Karatsuba multiplication algorithm)
 	*/
 	template<uint value_size>
 	uint UInt<value_size>::SubVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result)
 	{
 	uint i, c = 0;
 		TTMATH_ASSERT( ss1_size >= ss2_size )
 		for(i=0 ; i<ss2_size ; ++i)
 			c = SubTwoWords(ss1[i], ss2[i], c, &result[i]);
 		for( ; i<ss1_size ; ++i)
 			c = SubTwoWords(ss1[i], 0, c, &result[i]);
 		TTMATH_LOG("UInt::SubVector")
 	return c;
 	}
 	#endif // #ifndef __GNUC__
 } //namespace
--- a/ttmath/ttmathuint_x86_amd64_msvc.asm
+++ b/ttmath/ttmathuint_x86_amd64_msvc.asm
@ -0,0 +1,386 @@
 PUBLIC	adc_x64
 PUBLIC	addindexed_x64
 PUBLIC	addindexed2_x64
 PUBLIC	sbb_x64
 PUBLIC	subindexed_x64
 PUBLIC	rcl_x64
 PUBLIC	rcr_x64
 PUBLIC	rcl2_x64
 PUBLIC	rcr2_x64
 PUBLIC	div_x64
 ;
 ;	"rax, rcx, rdx, r8-r11 are volatile."
 ;	"rbx, rbp, rdi, rsi, r12-r15 are nonvolatile."
 ;
 .CODE
        ALIGN       8
 ;----------------------------------------
 adc_x64				PROC
        ; rcx = p1
        ; rdx = p2
        ; r8 = nSize
        ; r9 = nCarry
        xor		rax, rax
        xor		r11, r11
        sub		rax, r9		; sets CARRY if r9 != 0
 		ALIGN 16
 loop1:
 		mov		rax,qword ptr [rdx + r11 * 8]
 		adc		qword ptr [rcx + r11 * 8], rax
 		lea		r11, [r11+1]
 		dec		r8
 		jnz		loop1
 		setc	al
 		movzx	rax, al
 		ret
 adc_x64				ENDP
 ;----------------------------------------
        ALIGN       8
 ;----------------------------------------
 addindexed_x64	PROC
        ; rcx = p1
        ; rdx = nSize
        ; r8 = nPos
        ; r9 = nValue
 		xor		rax, rax			; rax = result
 		sub		rdx, r8				; rdx = remaining count of uints
 		add		qword ptr [rcx + r8 * 8], r9
 		jc		next1
 		ret
 next1:
 		mov		r9, 1
 		ALIGN 16
 loop1:
 		dec		rdx
 		jz		done_with_cy
 		lea		r8, [r8+1]
 		add		qword ptr [rcx + r8 * 8], r9
 		jc		loop1
 		ret
 done_with_cy:
 		lea		rax, [rax+1]		; rax = 1
 		ret
 addindexed_x64	ENDP
 ;----------------------------------------
        ALIGN       8
 ;----------------------------------------
 addindexed2_x64	PROC
        ; rcx = p1 (pointer)
        ; rdx = b  (value size)
        ; r8 = nPos
        ; r9 = nValue1
        ; [esp+0x28] = nValue2
 		xor		rax, rax			; return value
 		mov		r11, rcx			; table
 		sub		rdx, r8				; rdx = remaining count of uints
 		mov		r10, [esp+028h]		; r10 = nValue2
 		add		qword ptr [r11 + r8 * 8], r9
 		lea		r8, [r8+1]
 		lea		rdx, [rdx-1]
 		adc		qword ptr [r11 + r8 * 8], r10
 		jc		next
 		ret
 		ALIGN 16
 loop1:
 		lea		r8, [r8+1]
 		add		qword ptr [r11 + r8 * 8], 1
 		jc		next
 		ret
 next:
 		dec		rdx					; does not modify CY too...
 		jnz		loop1
 		lea		rax, [rax+1]
 		ret
 addindexed2_x64	ENDP
 ;----------------------------------------
        ALIGN       8
 ;----------------------------------------
 sbb_x64				PROC
        ; rcx = p1
        ; rdx = p2
        ; r8 = nCount
        ; r9 = nCarry
        xor		rax, rax
        xor		r11, r11
        sub		rax, r9				; sets CARRY if r9 != 0
 		ALIGN 16
 loop1:
 		mov		rax,qword ptr [rdx + r11 * 8]
 		sbb		qword ptr [rcx + r11 * 8], rax
 		lea		r11, [r11+1]
 		dec		r8
 		jnz		loop1
 		setc	al
 		movzx	rax, al
 		ret
 sbb_x64				ENDP
 ;----------------------------------------
        ALIGN       8
 ;----------------------------------------
 subindexed_x64	PROC
        ; rcx = p1
        ; rdx = nSize
        ; r8 = nPos
        ; r9 = nValue
 		sub		rdx, r8				; rdx = remaining count of uints
 		ALIGN 16
 loop1:
 		sub		qword ptr [rcx + r8 * 8], r9
 		jnc		done
 		lea		r8, [r8+1]
 		mov		r9, 1
 		dec		rdx
 		jnz		loop1
 		jc		return_1	; most of the times, there will be NO carry (I hope)
 done:
 		xor		rax, rax
 		ret
  return_1:
 		mov		rax, 1
 		ret
 subindexed_x64	ENDP
 ;----------------------------------------
        ALIGN       8
 ;----------------------------------------
 rcl_x64	PROC
        ; rcx = p1
        ; rdx = b
        ; r8 = nLowestBit
 		mov		r11, rcx			; table
 		xor		r10, r10
 		neg		r8					; CY set if r8 <> 0
 		ALIGN 16
 loop1:
 		rcl		qword ptr [r11 + r10 * 8], 1
 		lea		r10, [r10+1]
 		dec		rdx
 		jnz		loop1
 		setc	al
 		movzx	rax, al
        ret
 rcl_x64	ENDP
 ;----------------------------------------
        ALIGN       8
 ;----------------------------------------
 rcr_x64	PROC
        ; rcx = p1
        ; rdx = nSize
        ; r8 = nLowestBit
 		xor		r10, r10
 		neg		r8					; CY set if r8 <> 0
 		ALIGN 16
 loop1:
 		rcr		qword ptr -8[rcx + rdx * 8], 1
 		dec		rdx
 		jnz		loop1
 		setc	al
 		movzx	rax, al
        ret
 rcr_x64	ENDP
 ;----------------------------------------
        ALIGN       8
 ;----------------------------------------
 div_x64	PROC
        ; rcx = &Hi
        ; rdx = &Lo
        ; r8 = nDiv
        mov		r11, rcx
        mov		r10, rdx
        mov		rdx, qword ptr [r11]
        mov		rax, qword ptr [r10]
        div		r8
        mov		qword ptr [r10], rdx ; remainder
        mov		qword ptr [r11], rax ; value
        ret
 div_x64	ENDP
 ;----------------------------------------
        ALIGN       8
 ;----------------------------------------
 rcl2_x64	PROC
        ; rcx = p1
        ; rdx = nSize
        ; r8 = bits
        ; r9 = c
        push	rbx
        mov		r10, rcx	; r10 = p1
        xor		rax, rax
        mov		rcx, 64
        sub		rcx, r8
        mov		r11, -1
        shr		r11, cl		; r11 = mask
 		mov		rcx, r8		; rcx = count of bits
 		mov		rbx, rax	; rbx = old value = 0
 		or		r9, r9
 		cmovnz	rbx, r11	; if (c) then old value = mask
        mov		r9, rax		; r9 = index (0..nSize-1)
 		ALIGN 16
 loop1:
 		rol		qword ptr [r10+r9*8], cl
 		mov		rax, qword ptr [r10+r9*8]
 		and		rax, r11
 		xor		qword ptr [r10+r9*8], rax
 		or		qword ptr [r10+r9*8], rbx
 		mov		rbx, rax
 		lea		r9, [r9+1]
 		dec		rdx
 		jnz		loop1
 		and		rax, 1
 		pop		rbx
        ret
 rcl2_x64	ENDP
 ;----------------------------------------
        ALIGN       8
 ;----------------------------------------
 rcr2_x64	PROC
        ; rcx = p1
        ; rdx = nSize
        ; r8 = bits
        ; r9 = c
        push	rbx
        mov		r10, rcx	; r10 = p1
        xor		rax, rax
        mov		rcx, 64
        sub		rcx, r8
        mov		r11, -1
        shl		r11, cl		; r11 = mask
 		mov		rcx, r8		; rcx = count of bits
 		mov		rbx, rax	; rbx = old value = 0
 		or		r9, r9
 		cmovnz	rbx, r11	; if (c) then old value = mask
        mov		r9, rdx		; r9 = index (0..nSize-1)
 		lea		r9, [r9-1]
 		ALIGN 16
 loop1:
 		ror		qword ptr [r10+r9*8], cl
 		mov		rax, qword ptr [r10+r9*8]
 		and		rax, r11
 		xor		qword ptr [r10+r9*8], rax
 		or		qword ptr [r10+r9*8], rbx
 		mov		rbx, rax
 		lea		r9, [r9-1]
 		dec		rdx
 		jnz		loop1
 		rol		rax, 1
 		and		rax, 1
 		pop		rbx
        ret
 rcr2_x64	ENDP
 END
Author	SHA1	Message	Date
Christian Kaiser	51e938eaa7	- update to current root trunc's version - update to root trunc's UNICODE support git-svn-id: svn://ttmath.org/publicrep/ttmath/branches/chk@182 e52654a7-88a9-db11-a3e9-0013d4bc506e	2009-07-29 10:46:48 +00:00
Christian Kaiser	e102086f80	- fixed a bug in 64 bit ASM for MSVC git-svn-id: svn://ttmath.org/publicrep/ttmath/branches/chk@181 e52654a7-88a9-db11-a3e9-0013d4bc506e	2009-07-28 16:34:04 +00:00
Christian Kaiser	51b2c974a1	- changed "AboutEqualWithoutSign()" to "AboutEqual()" because we need to take the sign into account! git-svn-id: svn://ttmath.org/publicrep/ttmath/branches/chk@173 e52654a7-88a9-db11-a3e9-0013d4bc506e	2009-06-26 15:24:27 +00:00
Christian Kaiser	5597373093	- "streamlined" ttmathconfig.h a bit: a) Unicode support if TTMATH_USE_WCHAR is set (compiler must know wchar_t etc, of course) b) threading synchonisation uses WIN32 instead of __MSVC__ define, as this is OS dependent, not compiler dependent git-svn-id: svn://ttmath.org/publicrep/ttmath/branches/chk@172 e52654a7-88a9-db11-a3e9-0013d4bc506e	2009-06-26 11:14:51 +00:00
Christian Kaiser	de58378488	- added AboutEqualWithoutSign() to big<> to allow 'suppression' of some unexpected results (that are perfectly logical though, given the possibly unrepresentable nature of binary representation of decimals) like big<>("10.456466") * 2 == big<>("20.912932") resulting in FALSE result. git-svn-id: svn://ttmath.org/publicrep/ttmath/branches/chk@171 e52654a7-88a9-db11-a3e9-0013d4bc506e	2009-06-25 14:11:17 +00:00
Christian Kaiser	de64608eba	Merged against the current original ttmath trunk git-svn-id: svn://ttmath.org/publicrep/ttmath/branches/chk@170 e52654a7-88a9-db11-a3e9-0013d4bc506e	2009-06-25 11:07:55 +00:00
Christian Kaiser	be8913866a	- 32 bit ASM code and ASSERTS did not work as the ASM code put its result in EAX, but the ASSERT afterwards did destroy the EAX's contents, of course. git-svn-id: svn://ttmath.org/publicrep/ttmath/branches/chk@155 e52654a7-88a9-db11-a3e9-0013d4bc506e	2009-05-28 14:42:19 +00:00
Christian Kaiser	b31d34ebdd	- fixed a bug in ttmath.g (missing closing brace in Cos()) git-svn-id: svn://ttmath.org/publicrep/ttmath/branches/chk@154 e52654a7-88a9-db11-a3e9-0013d4bc506e	2009-05-28 11:52:31 +00:00
Christian Kaiser	be821b59dd	- optimizations git-svn-id: svn://ttmath.org/publicrep/ttmath/branches/chk@153 e52654a7-88a9-db11-a3e9-0013d4bc506e	2009-05-28 11:31:29 +00:00
Christian Kaiser	de1e7ac957	more optimizations for MSVC assembler (parallelism, prefetch optimization, loop alignment, ...) git-svn-id: svn://ttmath.org/publicrep/ttmath/branches/chk@151 e52654a7-88a9-db11-a3e9-0013d4bc506e	2009-05-20 08:48:51 +00:00
Christian Kaiser	fdc292e91a	current chk version - too many changes on both sides for now ;-( git-svn-id: svn://ttmath.org/publicrep/ttmath/branches/chk@150 e52654a7-88a9-db11-a3e9-0013d4bc506e	2009-05-19 10:50:41 +00:00
Christian Kaiser	9b576ddbe2	- corrected 64 bit assembler code (ebx was not preserved) - minor optimization git-svn-id: svn://ttmath.org/publicrep/ttmath/branches/chk@147 e52654a7-88a9-db11-a3e9-0013d4bc506e	2009-05-15 14:42:43 +00:00
Christian Kaiser	a8c3a506ea	MSVC ASM improvements (no register saves necessary, as this is done automatically by the C compiler) git-svn-id: svn://ttmath.org/publicrep/ttmath/branches/chk@146 e52654a7-88a9-db11-a3e9-0013d4bc506e	2009-05-14 12:59:12 +00:00
Christian Kaiser	3ba94dca90	git-svn-id: svn://ttmath.org/publicrep/ttmath/branches/chk@145 e52654a7-88a9-db11-a3e9-0013d4bc506e	2009-05-11 12:30:05 +00:00
Christian Kaiser	cae50cd425	- merged Tomasz' version 0.8.5 git-svn-id: svn://ttmath.org/publicrep/ttmath/branches/chk@144 e52654a7-88a9-db11-a3e9-0013d4bc506e	2009-05-11 12:25:25 +00:00
Christian Kaiser	00e39d3608	added thread-safety to static history buffers (factorial and logarithm) for MSVC git-svn-id: svn://ttmath.org/publicrep/ttmath/branches/chk@135 e52654a7-88a9-db11-a3e9-0013d4bc506e	2009-05-07 11:37:10 +00:00
Christian Kaiser	37379d2f1f	- fulfills test file log diff (32 and 64 bit) - macro for issuing the debug output to something else than std::out if specified git-svn-id: svn://ttmath.org/publicrep/ttmath/branches/chk@134 e52654a7-88a9-db11-a3e9-0013d4bc506e	2009-05-07 09:33:57 +00:00
Christian Kaiser	d7b67e4d47	- minor changes for ASSERT macros - some more "unification" of 32 and 64 bits in typedefs - use of 'char' instead of 'unsigned char', as I may hope that 'char' usually is set to 'unsigned' in most development environments git-svn-id: svn://ttmath.org/publicrep/ttmath/branches/chk@133 e52654a7-88a9-db11-a3e9-0013d4bc506e	2009-05-06 15:47:15 +00:00
Christian Kaiser	c91bd24e98	- support for MS specific code (__int64 etc) and warnings - support for AMD64 assembler (not thoroughly tested) - support for UNICODE I/O (strings and streams) git-svn-id: svn://ttmath.org/publicrep/ttmath/branches/chk@132 e52654a7-88a9-db11-a3e9-0013d4bc506e	2009-05-06 15:11:29 +00:00
Christian Kaiser	cbc12db22f	dummy commit (user/password checking) git-svn-id: svn://ttmath.org/publicrep/ttmath/branches/chk@131 e52654a7-88a9-db11-a3e9-0013d4bc506e	2009-05-06 13:24:00 +00:00
Tomasz Sowa	3e9bd5b093	creating a chk branch for ChristianK git-svn-id: svn://ttmath.org/publicrep/ttmath/branches/chk@130 e52654a7-88a9-db11-a3e9-0013d4bc506e	2009-05-06 13:16:56 +00:00