From 85945b2bb06963a96bedf14012b0077d91220b90 Mon Sep 17 00:00:00 2001
From: Tomasz Sowa <t.sowa@ttmath.org>
Date: Mon, 4 May 2009 20:51:12 +0000
Subject: [PATCH] added:   ttmathuint_x86.h, ttmathuint_x86_64.h,
 ttmathuint_noasm.h,          all the methods which are using assembler code
 have been          rewritten to no-asm forms, now we have:          1. asm
 for x86      file: ttmathuint_x86.h          2. asm for x86_64   file:
 ttmathuint_x86_64.h          3. no asm           file: ttmathuint_noasm.h    
         (it's used when macro TTMATH_NOASM is defined)             The third
 form can be used on x86 and x86_64 as well and             on other platforms
 with a little effort.             (Temporarily I left there some '#ifdef's
 for debugging.)

git-svn-id: svn://ttmath.org/publicrep/ttmath/trunk@126 e52654a7-88a9-db11-a3e9-0013d4bc506e
---
 CHANGELOG                                     |   18 +-
 ttmath/ttmathtypes.h                          |   23 +-
 ttmath/ttmathuint.h                           | 1408 +++--------------
 ttmath/ttmathuint_noasm.h                     |  885 +++++++++++
 ttmath/ttmathuint_x86.h                       | 1281 +++++++++++++++
 .../{ttmathuint64.h => ttmathuint_x86_64.h}   |  173 +-
 6 files changed, 2436 insertions(+), 1352 deletions(-)
 create mode 100644 ttmath/ttmathuint_noasm.h
 create mode 100644 ttmath/ttmathuint_x86.h
 rename ttmath/{ttmathuint64.h => ttmathuint_x86_64.h} (83%)

diff --git a/CHANGELOG b/CHANGELOG
index 35755b0..7a634a2 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,9 +1,23 @@
-Version 0.8.4 prerelease (2009.05.01):
+Version 0.8.4 prerelease (2009.05.04):
+    * fixed:   UInt::DivInt() didn't check whether the divisor is zero
+               there was a hardware interruption when the divisor was zero
+               (now the method returns one)
 	* added:   UInt::PrintLog(const char * msg, std::ostream & output)
-               used for debugging purposes by macro TTMATH_LOG(msg)
+               used (for debugging purposes) by macro TTMATH_LOG(msg)
                (it is used in nearly all methods in UInt class)
     * added:   macro TTMATH_DEBUG_LOG: when defined then TTMATH_LOG() 
                put some debug information (to std::cout)
+    * added:   ttmathuint_x86.h, ttmathuint_x86_64.h, ttmathuint_noasm.h, 
+               all the methods which are using assembler code have been 
+               rewritten to no-asm forms, now we have:
+               1. asm for x86      file: ttmathuint_x86.h
+               2. asm for x86_64   file: ttmathuint_x86_64.h
+               3. no asm           file: ttmathuint_noasm.h
+                  (it's used when macro TTMATH_NOASM is defined)
+               The third form can be used on x86 and x86_64 as well and
+               on other platforms with a little effort.
+               (Temporarily I left there some '#ifdef's for debugging.)
+                              
 
 Version 0.8.3 (2009.04.06):
     * fixed:   RclMoveAllWords() and RcrMoveAllWords() sometimes didn't return
diff --git a/ttmath/ttmathtypes.h b/ttmath/ttmathtypes.h
index 42ac7e4..be98f12 100644
--- a/ttmath/ttmathtypes.h
+++ b/ttmath/ttmathtypes.h
@@ -83,7 +83,7 @@
 	 gcc -DTTMATH_RELEASE -o myprogram myprogram.cpp 
 	or by defining this macro in your code before using any header files of this library
 
-	if TTMATH_RELEASE is not set then TTMATH_DEBUG is set	
+	if TTMATH_RELEASE is not set then TTMATH_DEBUG is set automatically
 */
 #ifndef TTMATH_RELEASE
 	#define TTMATH_DEBUG
@@ -120,6 +120,18 @@ namespace ttmath
 	typedef unsigned int uint;
 	typedef signed   int sint;
 
+
+	/*!
+		this type is twice bigger than uint
+		(64bit on a 32bit platforms)
+
+		although C++ Standard - ANSI ISO IEC 14882:2003 doesn't define such a type (long long) 
+		but it is defined in C99 and in upcoming C++0x /3.9.1 (2)/ and many compilers support it
+
+		this type is used in UInt::MulTwoWords and UInt::DivTwoWords when macro TTMATH_NOASM is defined
+	*/
+	typedef unsigned long long int ulint;
+
 	/*!
 		how many bits there are in the uint type
 	*/
@@ -151,6 +163,15 @@ namespace ttmath
 	typedef unsigned long uint;
 	typedef signed   long sint;
 
+	/*!
+		on 64bit platform we do not define ulint
+		sizeof(long long) is 8 (64bit) but we need 128bit
+
+		on 64 bit platform (when there is defined TTMATH_NOASM macro)
+		methods UInt::MulTwoWords and UInt::DivTwoWords are using other algorithms than those on 32 bit
+	*/
+	//typedef unsigned long long int ulint;
+
 	/*!
 		how many bits there are in the uint type
 	*/
diff --git a/ttmath/ttmathuint.h b/ttmath/ttmathuint.h
index b39a623..d7cc7b8 100644
--- a/ttmath/ttmathuint.h
+++ b/ttmath/ttmathuint.h
@@ -241,6 +241,74 @@ public:
 		TTMATH_LOG("UInt32::SetFromTable")
 	}
 
+#endif
+
+
+#ifdef TTMATH_PLATFORM64
+	/*!
+		this method copies the value stored in an another table
+		(warning: first values in temp_table are the highest words -- it's different
+		from our table)
+
+		***this method is created only on a 64bit platform***
+
+		we copy as many words as it is possible
+		
+		if temp_table_len is bigger than value_size we'll try to round 
+		the lowest word from table depending on the last not used bit in temp_table
+		(this rounding isn't a perfect rounding -- look at the description below)
+
+		and if temp_table_len is smaller than value_size we'll clear the rest words
+		in the table
+
+		warning: we're using 'temp_table' as a pointer at 32bit words
+	*/
+	void SetFromTable(const unsigned int * temp_table, uint temp_table_len)
+	{
+		uint temp_table_index = 0;
+		sint i; // 'i' with a sign
+
+		for(i=value_size-1 ; i>=0 && temp_table_index<temp_table_len; --i, ++temp_table_index)
+		{
+			table[i] = uint(temp_table[ temp_table_index ]) << 32;
+
+			++temp_table_index;
+
+			if( temp_table_index<temp_table_len )
+				table[i] |= temp_table[ temp_table_index ];
+		}
+
+
+		// rounding mantissa
+		if( temp_table_index < temp_table_len )
+		{
+			if( (temp_table[temp_table_index] & TTMATH_UINT_HIGHEST_BIT) != 0 )
+			{
+				/*
+					very simply rounding
+					if the bit from not used last word from temp_table is set to one
+					we're rouding the lowest word in the table
+
+					in fact there should be a normal addition but
+					we don't use Add() or AddTwoInts() because these methods 
+					can set a carry and then there'll be a small problem
+					for optimization
+				*/
+				if( table[0] != TTMATH_UINT_MAX_VALUE )
+					++table[0];
+			}
+		}
+
+		// cleaning the rest of the mantissa
+		for( ; i >= 0 ; --i)
+			table[i] = 0;
+
+		TTMATH_LOG("UInt64::SetFromTable")
+	}
+
+#endif
+
+
 
 
 
@@ -251,530 +319,6 @@ public:
 	*/
 
 
-	/*!
-		adding ss2 to the this and adding carry if it's defined
-		(this = this + ss2 + c)
-
-		c must be zero or one (might be a bigger value than 1)
-		function returns carry (1) (if it has been)
-	*/
-	uint Add(const UInt<value_size> & ss2, uint c=0)
-	{
-	register uint b = value_size;
-	register uint * p1 = table;
-	register uint * p2 = const_cast<uint*>(ss2.table);
-
-		// we don't have to use TTMATH_REFERENCE_ASSERT here
-		// this algorithm doesn't require it
-
-		#ifndef __GNUC__
-			
-			//	this part might be compiled with for example visual c
-
-			__asm
-			{
-				push eax
-				push ebx
-				push ecx
-				push edx
-				push esi
-
-				mov ecx,[b]
-				
-				mov ebx,[p1]
-				mov esi,[p2]
-
-				xor eax,eax  // eax=0
-				mov edx,eax  // edx=0
-
-				sub eax,[c]  // CF=c
-
-			p:
-				mov eax,[esi+edx*4]
-				adc [ebx+edx*4],eax
-
-				inc edx
-				dec ecx
-			jnz p
-
-				setc al
-				movzx edx, al
-				mov [c], edx
-
-				pop esi
-				pop edx
-				pop ecx
-				pop ebx
-				pop eax
-			}
-
-
-
-		#endif		
-			
-
-		#ifdef __GNUC__
-			
-			//	this part should be compiled with gcc
-			
-			__asm__ __volatile__(
-			
-				"push %%ecx						\n"
-			
-				"xorl %%eax, %%eax				\n"
-				"movl %%eax, %%edx				\n"
-				"subl %%edi, %%eax				\n"
-
-
-			"1:									\n"
-				"movl (%%esi,%%edx,4),%%eax		\n"
-				"adcl %%eax, (%%ebx,%%edx,4)	\n"
-			
-				"incl %%edx						\n"
-				"decl %%ecx						\n"
-			"jnz 1b								\n"
-
-				"setc %%al						\n"
-				"movzx %%al,%%edx				\n"
-
-				"pop %%ecx						\n"
-
-				: "=d" (c)
-				: "D" (c), "c" (b), "b" (p1), "S" (p2)
-				: "%eax", "cc", "memory" );
-
-		#endif
-
-		TTMATH_LOG("UInt32::Add")
-
-	return c;
-	}
-
-
-	/*!
-		adding one word (at a specific position)
-		and returning a carry (if it has been)
-
-		e.g.
-
-		if we've got (value_size=3):
-			table[0] = 10;
-			table[1] = 30;
-			table[2] = 5;	
-		and we call:
-			AddInt(2,1)
-		then it'll be:
-			table[0] = 10;
-			table[1] = 30 + 2;
-			table[2] = 5;
-
-		of course if there was a carry from table[2] it would be returned
-	*/
-	uint AddInt(uint value, uint index = 0)
-	{
-	register uint b = value_size;
-	register uint * p1 = table;
-	register uint c;
-
-		TTMATH_ASSERT( index < value_size )
-
-		#ifndef __GNUC__
-
-			__asm
-			{
-				push eax
-				push ebx
-				push ecx
-				push edx
-
-				mov ecx, [b]
-				sub ecx, [index]				
-
-				mov edx, [index]
-				mov ebx, [p1]
-
-				mov eax, [value]
-
-			p:
-				add [ebx+edx*4], eax
-			jnc end
-
-				mov eax, 1
-				inc edx
-				dec ecx
-			jnz p
-
-			end:
-				setc al
-				movzx edx, al
-				mov [c], edx
-
-				pop edx
-				pop ecx
-				pop ebx
-				pop eax
-			}
-
-		#endif		
-			
-
-		#ifdef __GNUC__
-			__asm__ __volatile__(
-			
-				"push %%eax						\n"
-				"push %%ecx						\n"
-
-				"subl %%edx, %%ecx 				\n"
-
-			"1:									\n"
-				"addl %%eax, (%%ebx,%%edx,4)	\n"
-			"jnc 2f								\n"
-				
-				"movl $1, %%eax					\n"
-				"incl %%edx						\n"
-				"decl %%ecx						\n"
-			"jnz 1b								\n"
-
-			"2:									\n"
-				"setc %%al						\n"
-				"movzx %%al, %%edx				\n"
-
-				"pop %%ecx						\n"
-				"pop %%eax						\n"
-
-				: "=d" (c)
-				: "a" (value), "c" (b), "0" (index), "b" (p1)
-				: "cc", "memory" );
-
-		#endif
-	
-		TTMATH_LOG("UInt32::AddInt")
-
-	return c;
-	}
-
-
-
-	/*!
-		adding only two unsigned words to the existing value
-		and these words begin on the 'index' position
-		(it's used in the multiplication algorithm 2)
-
-		index should be equal or smaller than value_size-2 (index <= value_size-2)
-		x1 - lower word, x2 - higher word
-
-		for example if we've got value_size equal 4 and:
-			table[0] = 3
-			table[1] = 4
-			table[2] = 5
-			table[3] = 6
-		then let
-			x1 = 10
-			x2 = 20
-		and
-			index = 1
-
-		the result of this method will be:
-			table[0] = 3
-			table[1] = 4 + x1 = 14
-			table[2] = 5 + x2 = 25
-			table[3] = 6
-		
-		and no carry at the end of table[3]
-
-		(of course if there was a carry in table[2](5+20) then 
-		this carry would be passed to the table[3] etc.)
-	*/
-	uint AddTwoInts(uint x2, uint x1, uint index)
-	{
-	register uint b = value_size;
-	register uint * p1 = table;
-	register uint c;
-
-		TTMATH_ASSERT( index < value_size - 1 )
-
-		#ifndef __GNUC__
-			__asm
-			{
-				push eax
-				push ebx
-				push ecx
-				push edx
-
-				mov ecx, [b]
-				sub ecx, [index]				
-
-				mov ebx, [p1]
-				mov edx, [index]
-
-				mov eax, [x1]
-				add [ebx+edx*4], eax
-				inc edx
-				dec ecx
-
-				mov eax, [x2]
-			
-			p:
-				adc [ebx+edx*4], eax
-			jnc end
-
-				mov eax, 0
-				inc edx
-				dec ecx
-			jnz p
-
-			end:
-				setc al
-				movzx edx, al
-				mov [c], edx
-				
-				pop edx
-				pop ecx
-				pop ebx
-				pop eax
-
-			}
-		#endif		
-			
-
-		#ifdef __GNUC__
-			__asm__ __volatile__(
-			
-				"push %%ecx						\n"
-				"push %%edx						\n"
-
-				"subl %%edx, %%ecx 				\n"
-				
-				"addl %%esi, (%%ebx,%%edx,4) 	\n"
-				"incl %%edx						\n"
-				"decl %%ecx						\n"
-
-			"1:									\n"
-				"adcl %%eax, (%%ebx,%%edx,4)	\n"
-			"jnc 2f								\n"
-
-				"mov $0, %%eax					\n"
-				"incl %%edx						\n"
-				"decl %%ecx						\n"
-			"jnz 1b								\n"
-
-			"2:									\n"
-				"setc %%al						\n"
-				"movzx %%al, %%eax				\n"
-
-				"pop %%edx						\n"
-				"pop %%ecx						\n"
-
-				: "=a" (c)
-				: "c" (b), "d" (index), "b" (p1), "S" (x1), "0" (x2)
-				: "cc", "memory" );
-
-		#endif
-
-		TTMATH_LOG("UInt32::AddTwoInts")
-	
-	return c;
-	}
-
-
-
-
-
-	/*!
-		subtracting ss2 from the 'this' and subtracting
-		carry if it has been defined
-		(this = this - ss2 - c)
-
-		c must be zero or one (might be a bigger value than 1)
-		function returns carry (1) (if it has been)
-	*/
-	uint Sub(const UInt<value_size> & ss2, uint c=0)
-	{
-	register uint b = value_size;
-	register uint * p1 = table;
-	register uint * p2 = const_cast<uint*>(ss2.table);
-
-		// we don't have to use TTMATH_REFERENCE_ASSERT here
-		// this algorithm doesn't require it
-
-		#ifndef __GNUC__
-
-			__asm
-			{
-				push eax
-				push ebx
-				push ecx
-				push edx
-				push esi
-
-				mov ecx,[b]
-				
-				mov ebx,[p1]
-				mov esi,[p2]
-
-				xor eax, eax
-				mov edx, eax
-
-				sub eax, [c]
-
-			p:
-				mov eax, [esi+edx*4]
-				sbb [ebx+edx*4], eax
-
-				inc edx
-				dec ecx
-			jnz p
-
-				setc al
-				movzx edx, al
-				mov [c], edx
-
-				pop esi
-				pop edx
-				pop ecx
-				pop ebx
-				pop eax
-			}
-
-		#endif
-
-
-		#ifdef __GNUC__
-			__asm__  __volatile__(
-			
-				"push %%ecx						\n"
-			
-				"xorl %%eax, %%eax				\n"
-				"movl %%eax, %%edx				\n"
-				"subl %%edi, %%eax				\n"
-
-
-			"1:									\n"
-				"movl (%%esi,%%edx,4),%%eax		\n"
-				"sbbl %%eax, (%%ebx,%%edx,4)	\n"
-			
-				"incl %%edx						\n"
-				"decl %%ecx						\n"
-			"jnz 1b								\n"
-
-				"setc %%al						\n"
-				"movzx %%al,%%edx				\n"
-
-				"pop %%ecx						\n"
-
-				: "=d" (c)
-				: "D" (c), "c" (b), "b" (p1), "S" (p2)
-				: "%eax", "cc", "memory" );
-
-		#endif
-
-		TTMATH_LOG("UInt32::Sub")
-
-	return c;
-	}
-
-
-	/*!
-		this method subtracts one word (at a specific position)
-		and returns a carry (if it was)
-
-		e.g.
-
-		if we've got (value_size=3):
-			table[0] = 10;
-			table[1] = 30;
-			table[2] = 5;	
-		and we call:
-			SubInt(2,1)
-		then it'll be:
-			table[0] = 10;
-			table[1] = 30 - 2;
-			table[2] = 5;
-
-		of course if there was a carry from table[3] it would be returned
-	*/
-	uint SubInt(uint value, uint index = 0)
-	{
-	register uint b = value_size;
-	register uint * p1 = table;
-	register uint c;
-
-		TTMATH_ASSERT( index < value_size )
-
-		#ifndef __GNUC__
-			__asm
-			{
-				push eax
-				push ebx
-				push ecx
-				push edx
-
-				mov ecx, [b]
-				sub ecx, [index]				
-
-				mov edx, [index]
-				mov ebx, [p1]
-
-				mov eax, [value]
-
-			p:
-				sub [ebx+edx*4], eax
-			jnc end
-
-				mov eax, 1
-				inc edx
-				dec ecx
-			jnz p
-
-			end:
-				setc al
-				movzx edx, al
-				mov [c], edx
-
-				pop edx
-				pop ecx
-				pop ebx
-				pop eax
-			}
-		#endif		
-			
-
-		#ifdef __GNUC__
-			__asm__ __volatile__(
-			
-				"push %%eax						\n"
-				"push %%ecx						\n"
-
-				"subl %%edx, %%ecx 				\n"
-
-			"1:									\n"
-				"subl %%eax, (%%ebx,%%edx,4)	\n"
-			"jnc 2f								\n"
-				
-				"movl $1, %%eax					\n"
-				"incl %%edx						\n"
-				"decl %%ecx						\n"
-			"jnz 1b								\n"
-
-			"2:									\n"
-				"setc %%al						\n"
-				"movzx %%al, %%edx				\n"
-
-				"pop %%ecx						\n"
-				"pop %%eax						\n"
-
-				: "=d" (c)
-				: "a" (value), "c" (b), "0" (index), "b" (p1)
-				: "cc", "memory" );
-
-		#endif
-		
-		TTMATH_LOG("UInt32::SubInt")
-	
-	return c;
-	}
-
-#endif
 
 
 	/*!
@@ -798,430 +342,6 @@ public:
 private:
 
 
-#ifdef TTMATH_PLATFORM32
-
-
-	/*!
-		this method moves all bits into the left hand side
-		return value <- this <- c
-
-		the lowest *bit* will be held the 'c' and
-		the state of one additional bit (on the left hand side)
-		will be returned
-
-		for example:
-		let this is 001010000
-		after Rcl2_one(1) there'll be 010100001 and Rcl2_one returns 0
-	*/
-	uint Rcl2_one(uint c)
-	{
-	register sint b = value_size;
-	register uint * p1 = table;
-
-		#ifndef __GNUC__
-			__asm
-			{
-				push ebx
-				push ecx
-				push edx
-
-				mov ebx, [p1]
-
-				xor edx, edx
-				mov ecx, edx
-				sub ecx, [c]
-
-				mov ecx, [b]
-
-			p:
-				rcl dword ptr [ebx+edx*4], 1
-				
-				inc edx
-				dec ecx
-			jnz p
-
-				setc dl
-				movzx edx, dl
-				mov [c], edx
-
-				
-				pop edx
-				pop ecx
-				pop ebx
-			}
-		#endif
-
-
-		#ifdef __GNUC__
-		__asm__  __volatile__(
-
-			"push %%edx					\n"
-			"push %%ecx					\n"
-
-			"xorl %%edx, %%edx			\n"   // edx=0
-			"neg %%eax					\n"   // CF=1 if eax!=0 , CF=0 if eax==0
-
-		"1:								\n"
-			"rcll $1, (%%ebx, %%edx, 4)	\n"
-
-			"incl %%edx					\n"
-			"decl %%ecx					\n"
-		"jnz 1b							\n"
-
-			"setc %%al					\n"
-			"movzx %%al, %%eax			\n"
-
-			"pop %%ecx					\n"
-			"pop %%edx					\n"
-
-			: "=a" (c)
-			: "0" (c), "c" (b), "b" (p1)
-			: "cc", "memory" );
-
-		#endif
-
-		TTMATH_LOG("UInt32::Rcl2_one")
-
-	return c;
-	}
-
-
-	/*!
-		this method moves all bits into the right hand side
-		c -> this -> return value
-
-		the highest *bit* will be held the 'c' and
-		the state of one additional bit (on the right hand side)
-		will be returned
-
-		for example:
-		let this is 000000010
-		after Rcr2_one(1) there'll be 100000001 and Rcr2_one returns 0
-	*/
-	uint Rcr2_one(uint c)
-	{
-	register sint b = value_size;
-	register uint * p1 = table;
-
-		#ifndef __GNUC__
-			__asm
-			{
-				push ebx
-				push ecx
-
-				mov ebx, [p1]
-
-				xor ecx, ecx
-				sub ecx, [c]
-
-				mov ecx, [b]
-
-			p:
-				rcr dword ptr [ebx+ecx*4-4], 1
-				
-				dec ecx
-			jnz p
-
-				setc cl
-				movzx ecx, cl
-				mov [c], ecx
-
-				pop ecx
-				pop ebx
-			}
-		#endif
-
-
-		#ifdef __GNUC__
-		__asm__  __volatile__(
-
-			"push %%ecx						\n"
-
-			"neg %%eax						\n"   // CF=1 if eax!=0 , CF=0 if eax==0
-
-		"1:									\n"
-			"rcrl $1, -4(%%ebx, %%ecx, 4)	\n"
-
-			"decl %%ecx						\n"
-		"jnz 1b								\n"
-
-			"setc %%al						\n"
-			"movzx %%al, %%eax				\n"
-
-			"pop %%ecx						\n"
-
-			: "=a" (c)
-			: "0" (c), "c" (b), "b" (p1)
-			: "cc", "memory" );
-
-		#endif
-
-		TTMATH_LOG("UInt32::Rcr2_one")
-
-	return c;
-	}
-
-
-	/*!
-		this method moves all bits into the left hand side
-		return value <- this <- c
-
-		the lowest *bits* will be held the 'c' and
-		the state of one additional bit (on the left hand side)
-		will be returned
-
-		for example:
-		let this is 001010000
-		after Rcl2(3, 1) there'll be 010000111 and Rcl2 returns 1
-	*/
-	uint Rcl2(uint bits, uint c)
-	{
-	TTMATH_ASSERT( bits>0 && bits<TTMATH_BITS_PER_UINT )
-		
-	register sint b = value_size;
-	register uint * p1 = table;
-	register uint mask;
-
-		#ifndef __GNUC__
-			__asm
-			{
-				push eax
-				push ebx
-				push ecx
-				push edx
-				push esi
-				push edi
-
-				mov edi, [b]
-
-				mov ecx, 32
-				sub ecx, [bits]
-				mov edx, -1
-				shr edx, cl
-				mov [mask], edx
-
-				mov ecx, [bits]
-				mov ebx, [p1]
-
-				xor edx, edx   // edx = 0
-				mov esi, edx   // old value = 0 
-
-				mov eax, [c]
-				or eax, eax
-				cmovnz esi, [mask] // if c then old value = mask
-
-		p:
-				rol dword ptr [ebx+edx*4], cl
-				
-				mov eax, [ebx+edx*4]
-				and eax, [mask] 
-				xor [ebx+edx*4], eax // clearing bits
-				or [ebx+edx*4], esi  // saving old value
-				mov esi, eax
-
-				inc edx
-				dec edi
-			jnz p
-
-				and eax, 1
-				mov [c], eax
-
-				pop edi
-				pop esi
-				pop edx
-				pop ecx
-				pop ebx
-				pop eax
-			}
-		#endif
-
-
-		#ifdef __GNUC__
-		__asm__  __volatile__(
-
-			"push %%edx						\n"
-			"push %%esi						\n"
-			"push %%edi						\n"
-			
-			"movl %%ecx, %%esi				\n"
-			"movl $32, %%ecx				\n"
-			"subl %%esi, %%ecx				\n"
-			"movl $-1, %%edx				\n"
-			"shrl %%cl, %%edx				\n"
-			"movl %%edx, %[amask]			\n"
-			"movl %%esi, %%ecx				\n"
-
-			"xorl %%edx, %%edx				\n"
-			"movl %%edx, %%esi				\n"
-
-			"orl %%eax, %%eax				\n"
-			"cmovnz %[amask], %%esi			\n"
-
-		"1:									\n"
-			"roll %%cl, (%%ebx,%%edx,4)		\n"
-
-			"movl (%%ebx,%%edx,4), %%eax	\n"
-			"andl %[amask], %%eax			\n"
-			"xorl %%eax, (%%ebx,%%edx,4)	\n"
-			"orl  %%esi, (%%ebx,%%edx,4)	\n"
-			"movl %%eax, %%esi				\n"
-			
-			"incl %%edx						\n"
-			"decl %%edi						\n"
-		"jnz 1b								\n"
-			
-			"and $1, %%eax					\n"
-
-			"pop %%edi						\n"
-			"pop %%esi						\n"
-			"pop %%edx						\n"
-
-			: "=a" (c)
-			: "0" (c), "D" (b), "b" (p1), "c" (bits), [amask] "m" (mask)
-			: "cc", "memory" );
-
-		#endif
-
-		TTMATH_LOG("UInt32::Rcl2")
-
-	return c;
-	}
-
-
-	/*!
-		this method moves all bits into the right hand side
-		C -> this -> return value
-
-		the highest *bits* will be held the 'c' and
-		the state of one additional bit (on the right hand side)
-		will be returned
-
-		for example:
-		let this is 000000010
-		after Rcr2(2, 1) there'll be 110000000 and Rcr2 returns 1
-	*/
-	uint Rcr2(uint bits, uint c)
-	{
-	TTMATH_ASSERT( bits>0 && bits<TTMATH_BITS_PER_UINT )
-
-	register sint b = value_size;
-	register uint * p1 = table;
-	register uint mask;
-
-		#ifndef __GNUC__
-			__asm
-			{
-				push eax
-				push ebx
-				push ecx
-				push edx
-				push esi
-				push edi
-
-				mov edi, [b]
-
-				mov ecx, 32
-				sub ecx, [bits]
-				mov edx, -1
-				shl edx, cl
-				mov [mask], edx
-
-				mov ecx, [bits]
-				mov ebx, [p1]
-
-				xor edx, edx   // edx = 0
-				mov esi, edx   // old value = 0 
-				add edx, edi   
-				dec edx        // edx - is pointing at the last word
-
-				mov eax, [c]
-				or eax, eax
-				cmovnz esi, [mask] // if c then old value = mask
-
-			p:
-				ror dword ptr [ebx+edx*4], cl
-				
-				mov eax, [ebx+edx*4]
-				and eax, [mask] 
-				xor [ebx+edx*4], eax // clearing bits
-				or [ebx+edx*4], esi  // saving old value
-				mov esi, eax
-
-				dec edx
-				dec edi
-			jnz p
-
-				rol eax, 1    // 31bit will be first
-				and eax, 1  
-				mov [c], eax
-
-				pop edi
-				pop esi
-				pop edx
-				pop ecx
-				pop ebx
-				pop eax
-			}
-		#endif
-
-
-		#ifdef __GNUC__
-			__asm__  __volatile__(
-
-			"push %%edx						\n"
-			"push %%esi						\n"
-			"push %%edi						\n"
-			
-			"movl %%ecx, %%esi				\n"
-			"movl $32, %%ecx				\n"
-			"subl %%esi, %%ecx				\n"
-			"movl $-1, %%edx				\n"
-			"shll %%cl, %%edx				\n"
-			"movl %%edx, %[amask]			\n"
-			"movl %%esi, %%ecx				\n"
-
-			"xorl %%edx, %%edx				\n"
-			"movl %%edx, %%esi				\n"
-			"addl %%edi, %%edx				\n"
-			"decl %%edx						\n"
-
-			"orl %%eax, %%eax				\n"
-			"cmovnz %[amask], %%esi			\n"
-
-		"1:									\n"
-			"rorl %%cl, (%%ebx,%%edx,4)		\n"
-
-			"movl (%%ebx,%%edx,4), %%eax	\n"
-			"andl %[amask], %%eax			\n"
-			"xorl %%eax, (%%ebx,%%edx,4)	\n"
-			"orl  %%esi, (%%ebx,%%edx,4)	\n"
-			"movl %%eax, %%esi				\n"
-			
-			"decl %%edx						\n"
-			"decl %%edi						\n"
-		"jnz 1b								\n"
-			
-			"roll $1, %%eax					\n"
-			"andl $1, %%eax					\n"
-
-			"pop %%edi						\n"
-			"pop %%esi						\n"
-			"pop %%edx						\n"
-
-			: "=a" (c)
-			: "0" (c), "D" (b), "b" (p1), "c" (bits), [amask] "m" (mask)
-			: "cc", "memory" );
-
-		#endif
-
-		TTMATH_LOG("UInt32::Rcr2")
-
-	return c;
-	}
-
-#endif
-
-
 	/*!    
 		an auxiliary method for moving bits into the left hand side
 
@@ -1459,51 +579,6 @@ public:
 
 
 
-#ifdef TTMATH_PLATFORM32
-
-	/*
-		this method returns the number of the highest set bit in one 32-bit word
-		if the 'x' is zero this method returns '-1'
-	*/
-	static sint FindLeadingBitInWord(uint x)
-	{
-	register sint result;
-
-		#ifndef __GNUC__
-			__asm
-			{
-				push eax
-				push edx
-
-				mov edx,-1
-				bsr eax,[x]
-				cmovz eax,edx
-				mov [result], eax
-
-				pop edx
-				pop eax
-			}
-		#endif
-
-
-		#ifdef __GNUC__
-			__asm__  __volatile__(
-
-			"bsrl %1, %0		\n"
-			"jnz 1f				\n"
-			"movl $-1, %0		\n"
-			"1:					\n"
-
-			: "=R" (result)
-			: "R" (x)
-			: "cc" );
-
-		#endif
-
-	return result;
-	}
-
-#endif
 
 	/*!
 		this method looks for the highest set bit
@@ -1542,68 +617,6 @@ public:
 	
 
 
-#ifdef TTMATH_PLATFORM32
-
-
-
-	/*!
-		this method sets a special bit in the 'value'
-		and returns the last state of the bit (zero or one)
-
-		bit is from <0,31>
-		e.g.
-		 uint x = 100;
-		 uint bit = SetBitInWord(x, 3);
-		 now: x = 108 and bit = 0
-	*/
-	static uint SetBitInWord(uint & value, uint bit)
-	{
-		TTMATH_ASSERT( bit < TTMATH_BITS_PER_UINT )
-
-		uint old_bit;
-		uint v = value;
-
-		#ifndef __GNUC__
-			__asm
-			{
-			push ebx
-			push eax
-
-			mov eax, [v]
-			mov ebx, [bit]
-			bts eax, ebx
-			mov [v], eax
-
-			setc bl
-			movzx ebx, bl
-			mov [old_bit], ebx
-
-			pop eax
-			pop ebx
-			}
-		#endif
-
-
-		#ifdef __GNUC__
-			__asm__  __volatile__(
-
-			"btsl %%ebx, %%eax		\n"
-
-			"setc %%bl				\n"
-			"movzx %%bl, %%ebx		\n"
-			
-			: "=a" (v), "=b" (old_bit)
-			: "0" (v), "1" (bit)
-			: "cc" );
-
-		#endif
-
-		value = v;
-
-	return old_bit;
-	}
-
-#endif
 
 
 	/*!
@@ -1737,71 +750,6 @@ public:
 
 public:
 
-
-	
-#ifdef TTMATH_PLATFORM32
-
-
-	/*!
-		multiplication: result2:result1 = a * b
-		result2 - higher word
-		result1 - lower word of the result
-	
-		this method never returns a carry
-
-		it is an auxiliary method for second version of the multiplication algorithm
-	*/
-	static void MulTwoWords(uint a, uint b, uint * result2, uint * result1)
-	{
-	/*
-		we must use these temporary variables in order to inform the compilator
-		that value pointed with result1 and result2 has changed
-
-		this has no effect in visual studio but it's useful when
-		using gcc and options like -Ox
-	*/
-	register uint result1_;
-	register uint result2_;
-
-		#ifndef __GNUC__
-
-			__asm
-			{
-			push eax
-			push edx
-
-			mov eax, [a]
-			mul dword ptr [b]
-
-			mov [result2_], edx
-			mov [result1_], eax
-
-			pop edx
-			pop eax
-			}
-
-		#endif
-
-
-		#ifdef __GNUC__
-
-		__asm__ __volatile__(
-		
-			"mull %%edx			\n"
-
-			: "=a" (result1_), "=d" (result2_)
-			: "0" (a), "1" (b)
-			: "cc" );
-
-		#endif
-
-
-		*result1 = result1_;
-		*result2 = result2_;
-	}
-
-#endif
-
 	/*!
 		multiplication: this = this * ss2
 
@@ -2092,75 +1040,21 @@ public:
 	
 public:
 
-	#ifdef TTMATH_PLATFORM32
-
-
-	/*!
-		this method calculates 64bits word a:b / 32bits c (a higher, b lower word)
-		r = a:b / c and rest - remainder
-
-		*
-		* WARNING:
-		* if r (one word) is too small for the result or c is equal zero
-		* there'll be a hardware interruption (0)
-		* and probably the end of your program
-		*
-	*/
-	static void DivTwoWords(uint a, uint b, uint c, uint * r, uint * rest)
-	{
-		register uint r_;
-		register uint rest_;
-		/*
-			these variables have similar meaning like those in
-			the multiplication algorithm MulTwoWords
-		*/
-
-		#ifndef __GNUC__
-			__asm
-			{
-				push eax
-				push edx
-
-				mov edx, [a]
-				mov eax, [b]
-				div dword ptr [c]
-
-				mov [r_], eax
-				mov [rest_], edx
-
-				pop edx
-				pop eax
-			}
-		#endif
-
-
-		#ifdef __GNUC__
-		
-			__asm__ __volatile__(
-
-			"divl %%ecx				\n"
-
-			: "=a" (r_), "=d" (rest_)
-			: "d" (a), "a" (b), "c" (c)
-			: "cc" );
-
-		#endif
-
-
-		*r = r_;
-		*rest = rest_;
-
-	}
-
-#endif
-
-
 
 	/*!
 		division by one unsigned word
+
+		returns 1 when divisor is zero
 	*/
 	uint DivInt(uint divisor, uint * remainder = 0)
 	{
+		if( divisor == 0 )
+		{
+			TTMATH_LOG("UInt::DivInt")
+
+		return 1;
+		}
+
 		if( divisor == 1 )
 		{
 			if( remainder )
@@ -2887,10 +1781,12 @@ private:
 	void Div3_MultiplySubtract(	UInt<value_size+1> & uu,
 								const UInt<value_size+1> & vv, uint & qp)
 	{
+		// D4 (in the book)
+
 		UInt<value_size+1> vv_temp(vv);
 		vv_temp.MulInt(qp);
 
-		if( uu.Sub(vv_temp) )
+		if( uu.Sub(vv_temp) )  
 		{
 			// there was a carry
 			
@@ -2900,6 +1796,9 @@ private:
 
 			--qp;
 			uu.Add(vv);
+
+			// can be a carry from this additions but it should be ignored 
+			// because it cancels with the borrow from uu.Sub(vv_temp)
 		}
 
 		TTMATH_LOG("UInt::Div3_MultiplySubtract")
@@ -3227,7 +2126,6 @@ public:
 	}
 
 
-
 	/*!
 		this method converts the sint type to this class
 
@@ -3262,6 +2160,94 @@ public:
 		TTMATH_LOG("UInt::UInt(sint)")
 	}
 
+
+
+#ifdef TTMATH_PLATFORM64
+
+	/*!
+		in 64bit platforms we must define additional operators and contructors
+		in order to allow a user initializing the objects in this way:
+			UInt<...> type = 20;
+		or
+			UInt<...> type; 
+			type = 30;
+
+		decimal constants such as 20, 30 etc. are integer literal of type int,
+		if the value is greater it can even be long int,
+		0 is an octal integer of type int
+		(ISO 14882 p2.13.1 Integer literals)
+	*/
+
+	/*!
+		this operator converts the unsigned int type to this class
+
+		***this operator is created only on a 64bit platform***
+		it takes one argument of 32bit
+	*/
+	UInt<value_size> & operator=(unsigned int i)
+	{
+		FromUInt(uint(i));
+
+		TTMATH_LOG("UInt64::operator=(unsigned int)")
+
+	return *this;
+	}
+
+
+	/*!
+		a constructor for converting the unsigned int to this class
+
+		***this constructor is created only on a 64bit platform***
+		it takes one argument of 32bit
+	*/
+	UInt(unsigned int i)
+	{
+		FromUInt(uint(i));
+
+		TTMATH_LOG("UInt64::UInt(unsigned int)")
+	}
+
+
+	/*!
+		an operator for converting the signed int to this class
+
+		***this constructor is created only on a 64bit platform***
+		it takes one argument of 32bit
+
+		look at the description of UInt::operator=(sint)
+	*/
+	UInt<value_size> & operator=(signed int i)
+	{
+		FromUInt(uint(i));
+
+		TTMATH_LOG("UInt64::operator=(signed int)")
+
+	return *this;
+	}
+
+
+	/*!
+		a constructor for converting the signed int to this class
+
+		***this constructor is created only on a 64bit platform***
+		it takes one argument of 32bit
+
+		look at the description of UInt::operator=(sint)
+	*/
+	UInt(signed int i)
+	{
+		FromUInt(uint(i));
+
+		TTMATH_LOG("UInt64::UInt(signed int)")
+	}
+
+
+#endif
+
+
+
+
+
 	/*!
 		a constructor for converting a string to this class (with the base=10)
 	*/
@@ -3875,21 +2861,51 @@ public:
 	}
 
 
+
+	/*
+		following methods are defined in:
+			ttmathuint_x86.h
+			ttmathuint_x86_64.h
+			ttmathuint_noasm.h
+	*/
+
+#ifdef TTMATH_NOASM
+	static uint AddTwoWords(uint a, uint b, uint carry, uint * result);
+	static uint SubTwoWords(uint a, uint b, uint carry, uint * result);
+
 #ifdef TTMATH_PLATFORM64
 
+	union uint_
+	{
+		struct 
+		{
+			unsigned int low;  // 32 bit 
+			unsigned int high; // 32 bit
+		} u_;
+
+		uint u;                // 64 bit
+	};
+
+
+	static void DivTwoWords2(uint a,uint b, uint c, uint * r, uint * rest);
+	static uint DivTwoWordsNormalize(uint_ & a_, uint_ & b_, uint_ & c_);
+	static uint DivTwoWordsUnnormalize(uint u, uint d);
+	static unsigned int DivTwoWordsCalculate(uint_ u_, unsigned int u3, uint_ v_);
+	static void MultiplySubtract(uint_ & u_, unsigned int & u3, unsigned int & q, uint_ v_);
+
+#endif // TTMATH_PLATFORM64
+#endif // TTMATH_NOASM
+
+
 private:
+public: // !!! chwilowo public
 	uint Rcl2_one(uint c);
 	uint Rcr2_one(uint c);
 	uint Rcl2(uint bits, uint c);
 	uint Rcr2(uint bits, uint c);
 
 public:
-	// these methods are for 64bit processors and are defined in 'ttmathuint64.h'
-	UInt<value_size> & operator=(unsigned int i);
-	UInt(unsigned int i);
-	UInt<value_size> & operator=(signed int i);
-	UInt(signed int i);
-	void SetFromTable(const unsigned int * temp_table, uint temp_table_len);	
+	
 	uint Add(const UInt<value_size> & ss2, uint c=0);
 	uint AddInt(uint value, uint index = 0);
 	uint AddTwoInts(uint x2, uint x1, uint index);
@@ -3897,18 +2913,16 @@ public:
 	uint SubInt(uint value, uint index = 0);
 	static sint FindLeadingBitInWord(uint x);
 	static uint SetBitInWord(uint & value, uint bit);
-	static void MulTwoWords(uint a, uint b, uint * result2, uint * result1);
+	static void MulTwoWords(uint a, uint b, uint * result_high, uint * result_low);
 	static void DivTwoWords(uint a,uint b, uint c, uint * r, uint * rest);
-
-#endif
-
 };
 
 
 } //namespace
 
 
-#include "ttmathuint64.h"
-
+#include "ttmathuint_x86.h"
+#include "ttmathuint_x86_64.h"
+#include "ttmathuint_noasm.h"
 
 #endif
diff --git a/ttmath/ttmathuint_noasm.h b/ttmath/ttmathuint_noasm.h
new file mode 100644
index 0000000..e84f837
--- /dev/null
+++ b/ttmath/ttmathuint_noasm.h
@@ -0,0 +1,885 @@
+/*
+ * This file is a part of TTMath Bignum Library
+ * and is distributed under the (new) BSD licence.
+ * Author: Tomasz Sowa <t.sowa@slimaczek.pl>
+ */
+
+/* 
+ * Copyright (c) 2006-2009, Tomasz Sowa
+ * All rights reserved.
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * 
+ *  * Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *    
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *    
+ *  * Neither the name Tomasz Sowa nor the names of contributors to this
+ *    project may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef headerfilettmathuint_noasm
+#define headerfilettmathuint_noasm
+
+
+#ifdef TTMATH_NOASM
+
+/*!
+	\file ttmathuint_noasm.h
+    \brief template class UInt<uint> with methods without any assembler code
+
+	this file is included at the end of ttmathuint.h
+*/
+
+
+namespace ttmath
+{
+	template<uint value_size>
+	uint UInt<value_size>::AddTwoWords(uint a, uint b, uint carry, uint * result)
+	{
+	uint temp;
+
+		if( carry == 0 )
+		{
+			temp = a + b;
+
+			if( temp < a )
+				carry = 1;
+		}
+		else
+		{
+			carry = 1;
+			temp  = a + b + carry;
+
+			if( temp > a ) // !(temp<=a)
+				carry = 0;
+		}
+
+		*result = temp;
+
+	return carry;
+	}
+
+
+
+	/*!
+		this method adding ss2 to the this and adding carry if it's defined
+		(this = this + ss2 + c)
+
+		c must be zero or one (might be a bigger value than 1)
+		function returns carry (1) (if it was)
+	*/
+	
+	template<uint value_size>
+	uint UInt<value_size>::Add(const UInt<value_size> & ss2, uint c)
+	{
+	uint i;
+
+		for(i=0 ; i<value_size ; ++i)
+			c = AddTwoWords(table[i], ss2.table[i], c, &table[i]);
+
+		TTMATH_LOG("UInt_noasm::Add")
+	
+	return c;
+	}
+
+
+	/*!
+		this method adds one word (at a specific position)
+		and returns a carry (if it was)
+
+		if we've got (value_size=3):
+			table[0] = 10;
+			table[1] = 30;
+			table[2] = 5;	
+		and we call:
+			AddInt(2,1)
+		then it'll be:
+			table[0] = 10;
+			table[1] = 30 + 2;
+			table[2] = 5;
+
+		of course if there was a carry from table[3] it would be returned
+	*/
+	template<uint value_size>
+	uint UInt<value_size>::AddInt(uint value, uint index)
+	{
+	uint i, c;
+
+		TTMATH_ASSERT( index < value_size )
+
+
+		c = AddTwoWords(table[index], value, 0, &table[index]);
+
+		for(i=index+1 ; i<value_size && c ; ++i)
+			c = AddTwoWords(table[i], 0, c, &table[i]);
+
+		TTMATH_LOG("UInt_noasm::AddInt")
+	
+	return c;
+	}
+
+
+
+
+
+	/*!
+		this method adds only two unsigned words to the existing value
+		and these words begin on the 'index' position
+		(it's used in the multiplication algorithm 2)
+
+		index should be equal or smaller than value_size-2 (index <= value_size-2)
+		x1 - lower word, x2 - higher word
+
+		for example if we've got value_size equal 4 and:
+			table[0] = 3
+			table[1] = 4
+			table[2] = 5
+			table[3] = 6
+		then let
+			x1 = 10
+			x2 = 20
+		and
+			index = 1
+
+		the result of this method will be:
+			table[0] = 3
+			table[1] = 4 + x1 = 14
+			table[2] = 5 + x2 = 25
+			table[3] = 6
+		
+		and no carry at the end of table[3]
+
+		(of course if there was a carry in table[2](5+20) then 
+		this carry would be passed to the table[3] etc.)
+	*/
+	template<uint value_size>
+	uint UInt<value_size>::AddTwoInts(uint x2, uint x1, uint index)
+	{
+	uint i, c;
+
+		TTMATH_ASSERT( index < value_size )
+
+
+		c = AddTwoWords(table[index],   x1, 0, &table[index]);
+		c = AddTwoWords(table[index+1], x2, c, &table[index+1]);
+
+		for(i=index+2 ; i<value_size && c ; ++i)
+			c = AddTwoWords(table[i], 0, c, &table[i]);
+
+		TTMATH_LOG("UInt64::AddTwoInts")
+	
+	return c;
+	}
+
+
+
+	template<uint value_size>
+	uint UInt<value_size>::SubTwoWords(uint a, uint b, uint carry, uint * result)
+	{
+		if( carry == 0 )
+		{
+			*result = a - b;
+
+			if( a < b )
+				carry = 1;
+		}
+		else
+		{
+			carry   = 1;
+			*result = a - b - carry;
+
+			if( a > b ) // !(a <= b )
+				carry = 0;
+		}
+
+	return carry;
+	}
+
+
+
+
+	/*!
+		this method's subtracting ss2 from the 'this' and subtracting
+		carry if it has been defined
+		(this = this - ss2 - c)
+
+		c must be zero or one (might be a bigger value than 1)
+		function returns carry (1) (if it was)
+	*/
+	template<uint value_size>
+	uint UInt<value_size>::Sub(const UInt<value_size> & ss2, uint c)
+	{
+	uint i;
+
+		for(i=0 ; i<value_size ; ++i)
+			c = SubTwoWords(table[i], ss2.table[i], c, &table[i]);
+
+		TTMATH_LOG("UInt_noasm::Sub")
+
+	return c;
+	}
+
+
+
+
+	/*!
+		this method subtracts one word (at a specific position)
+		and returns a carry (if it was)
+
+		if we've got (value_size=3):
+			table[0] = 10;
+			table[1] = 30;
+			table[2] = 5;	
+		and we call:
+			SubInt(2,1)
+		then it'll be:
+			table[0] = 10;
+			table[1] = 30 - 2;
+			table[2] = 5;
+
+		of course if there was a carry from table[3] it would be returned
+	*/
+	template<uint value_size>
+	uint UInt<value_size>::SubInt(uint value, uint index)
+	{
+	uint i, c;
+
+		TTMATH_ASSERT( index < value_size )
+
+
+		c = SubTwoWords(table[index], value, 0, &table[index]);
+
+		for(i=index+1 ; i<value_size && c ; ++i)
+			c = SubTwoWords(table[i], 0, c, &table[i]);
+
+		TTMATH_LOG("UInt_noasm::SubInt")
+	
+	return c;
+	}
+
+
+
+
+	/*!
+		this method moves all bits into the left hand side
+		return value <- this <- c
+
+		the lowest *bit* will be held the 'c' and
+		the state of one additional bit (on the left hand side)
+		will be returned
+
+		for example:
+		let this is 001010000
+		after Rcl2_one(1) there'll be 010100001 and Rcl2_one returns 0
+	*/
+	template<uint value_size>
+	uint UInt<value_size>::Rcl2_one(uint c)
+	{
+	uint i, new_c;
+
+		if( c != 0 )
+			c = 1;
+
+		for(i=0 ; i<value_size ; ++i)
+		{
+			new_c    = (table[i] & TTMATH_UINT_HIGHEST_BIT) ? 1 : 0;
+			table[i] = (table[i] << 1) | c;
+			c        = new_c;
+		}
+
+		TTMATH_LOG("UInt64::Rcl2_one")
+
+	return c;
+	}
+
+
+
+
+
+
+
+	/*!
+		this method moves all bits into the right hand side
+		c -> this -> return value
+
+		the highest *bit* will be held the 'c' and
+		the state of one additional bit (on the right hand side)
+		will be returned
+
+		for example:
+		let this is 000000010
+		after Rcr2_one(1) there'll be 100000001 and Rcr2_one returns 0
+	*/
+	template<uint value_size>
+	uint UInt<value_size>::Rcr2_one(uint c)
+	{
+	sint i; // signed i
+	uint new_c;
+
+		if( c != 0 )
+			c = TTMATH_UINT_HIGHEST_BIT;
+
+		for(i=sint(value_size)-1 ; i>=0 ; --i)
+		{
+			new_c    = (table[i] & 1) ? TTMATH_UINT_HIGHEST_BIT : 0;
+			table[i] = (table[i] >> 1) | c;
+			c        = new_c;
+		}
+
+		TTMATH_LOG("UInt64::Rcr2_one")
+
+	return c;
+	}
+
+
+
+
+	/*!
+		this method moves all bits into the left hand side
+		return value <- this <- c
+
+		the lowest *bits* will be held the 'c' and
+		the state of one additional bit (on the left hand side)
+		will be returned
+
+		for example:
+		let this is 001010000
+		after Rcl2(3, 1) there'll be 010000111 and Rcl2 returns 1
+	*/
+	template<uint value_size>
+	uint UInt<value_size>::Rcl2(uint bits, uint c)
+	{
+		TTMATH_ASSERT( bits>0 && bits<TTMATH_BITS_PER_UINT )
+
+		uint move = TTMATH_BITS_PER_UINT - bits;
+		uint i, new_c;
+
+		if( c != 0 )
+			c = TTMATH_UINT_MAX_VALUE >> move;
+
+		for(i=0 ; i<value_size ; ++i)
+		{
+			new_c    = table[i] >> move;
+			table[i] = (table[i] << bits) | c;
+			c        = new_c;
+		}
+
+		TTMATH_LOG("UInt::Rcl2")
+
+	return (c & 1);
+	}
+
+
+
+
+	/*!
+		this method moves all bits into the right hand side
+		C -> this -> return value
+
+		the highest *bits* will be held the 'c' and
+		the state of one additional bit (on the right hand side)
+		will be returned
+
+		for example:
+		let this is 000000010
+		after Rcr2(2, 1) there'll be 110000000 and Rcr2 returns 1
+	*/
+	template<uint value_size>
+	uint UInt<value_size>::Rcr2(uint bits, uint c)
+	{
+		TTMATH_ASSERT( bits>0 && bits<TTMATH_BITS_PER_UINT )
+
+		uint move = TTMATH_BITS_PER_UINT - bits;
+		sint i; // signed
+		uint new_c;
+
+		if( c != 0 )
+			c = TTMATH_UINT_MAX_VALUE << move;
+
+		for(i=value_size-1 ; i>=0 ; --i)
+		{
+			new_c    = table[i] << move;
+			table[i] = (table[i] >> bits) | c;
+			c        = new_c;
+		}
+
+		TTMATH_LOG("UInt64::Rcr2")
+
+	return (c & TTMATH_UINT_HIGHEST_BIT) ? 1 : 0;
+	}
+
+
+
+
+	/*
+		this method returns the number of the highest set bit in x
+		if the 'x' is zero this method returns '-1'
+
+	*/
+	template<uint value_size>
+	sint UInt<value_size>::FindLeadingBitInWord(uint x)
+	{
+		if( x == 0 )
+			return -1;
+
+		uint bit = TTMATH_BITS_PER_UINT - 1;
+		
+		while( (x & TTMATH_UINT_HIGHEST_BIT) == 0 )
+		{
+			x = x << 1;
+			--bit;
+		}
+
+	return bit;
+	}
+
+
+
+
+
+	/*!
+		this method sets a special bit in the 'value'
+		and returns the last state of the bit (zero or one)
+
+		bit is from <0,63>
+
+		e.g.
+		 uint x = 100;
+		 uint bit = SetBitInWord(x, 3);
+		 now: x = 108 and bit = 0
+	*/
+	template<uint value_size>
+	uint UInt<value_size>::SetBitInWord(uint & value, uint bit)
+	{
+		TTMATH_ASSERT( bit < TTMATH_BITS_PER_UINT )
+
+		uint mask = 1;
+
+		while( bit-- > 0 )
+			mask = mask << 1;
+
+		uint last = value & mask;
+		value     = value | mask;
+
+	return (last != 0) ? 1 : 0;
+	}
+
+
+
+
+
+
+	/*!
+	 *
+	 * Multiplication
+	 *
+	 *
+	*/
+
+
+	/*!
+		multiplication: result_high:result_low = a * b
+		result_high - higher word of the result
+		result_low  - lower word of the result
+	
+		this methos never returns a carry
+		this method is used in the second version of the multiplication algorithms
+	*/
+	template<uint value_size>
+	void UInt<value_size>::MulTwoWords(uint a, uint b, uint * result_high, uint * result_low)
+	{
+	#ifdef TTMATH_PLATFORM32
+
+		/*
+			on 32bit platforms we have defined 'unsigned long long int' type known as 'ulint' in ttmath namespace
+			this type has 64 bits, then we're using only one multiplication: 32bit * 32bit = 64bit
+		*/
+
+		union uint_
+		{
+			struct
+			{
+				uint low;  // 32 bits
+				uint high; // 32 bits
+			} u_;
+
+			ulint u;       // 64 bits
+		} res;
+
+		res.u = ulint(a) * ulint(b);     // multiply two 32bit words, the result has 64 bits
+
+		*result_high = res.u_.high;
+		*result_low  = res.u_.low;
+
+	#else
+
+		/*
+			64 bits platforms
+
+			we don't have a native type which has 128 bits
+			then we're splitting 'a' and 'b' to 4 parts (high and low halves)
+			and using 4 multiplications (with additions and carry correctness)
+		*/
+
+		uint_ a_;
+		uint_ b_;
+		uint_ res_high1, res_high2;
+		uint_ res_low1,  res_low2;
+		
+		a_.u = a;
+		b_.u = b;
+
+		/*
+			the multiplication is as follows (schoolbook algorithm with O(n^2) ):
+
+                                                   32 bits         32 bits
+
+                                             +--------------------------------+
+                                             |   a_.u_.high   |   a_.u_.low   |
+                                             +--------------------------------+
+                                             |   b_.u_.high   |   b_.u_.low   |
+            +--------------------------------+--------------------------------+
+            |           res_high1.u          |           res_low1.u           |
+            +--------------------------------+--------------------------------+
+            |           res_high2.u          |           res_low2.u           |
+            +--------------------------------+--------------------------------+
+
+                          64 bits                          64 bits
+		*/
+
+
+		uint_ temp;
+
+		res_low1.u        = uint(b_.u_.low) * uint(a_.u_.low);
+
+		temp.u            = uint(res_low1.u_.high) + uint(b_.u_.low) * uint(a_.u_.high);
+		res_low1.u_.high  = temp.u_.low;
+		res_high1.u_.low  = temp.u_.high;
+		res_high1.u_.high = 0;
+
+		res_low2.u_.low   = 0;
+		temp.u            = uint(b_.u_.high) * uint(a_.u_.low);
+		res_low2.u_.high  = temp.u_.low;
+
+		res_high2.u       = uint(b_.u_.high) * uint(a_.u_.high) + uint(temp.u_.high);
+
+		uint c = AddTwoWords(res_low1.u, res_low2.u, 0, &res_low2.u);
+		AddTwoWords(res_high1.u, res_high2.u, c, &res_high2.u);                 // there is no carry from here
+
+		*result_high = res_high2.u;
+		*result_low  = res_low2.u;
+
+	#endif
+	}
+
+
+
+
+	/*!
+	 *
+	 * Division
+	 *
+	 *
+	*/
+	
+
+	// !! maybe returns something? a carry? or when c is zero?
+	/*!
+		this method calculates 64bits word a:b / 32bits c (a higher, b lower word)
+		r = a:b / c and rest - remainder
+		
+		*
+		* WARNING:
+		* the c has to be suitably large for the result being keeped in one word,
+		* if c is equal zero there'll be a hardware interruption (0)
+		* and probably the end of your program
+		*
+	*/
+	template<uint value_size>
+	void UInt<value_size>::DivTwoWords(uint a, uint b, uint c, uint * r, uint * rest)
+	{
+	// (a < c ) for the result to be one word
+	TTMATH_ASSERT( c != 0 && a < c )
+
+	#ifdef TTMATH_PLATFORM32
+
+		union
+		{
+			struct
+			{
+				uint low;  // 32 bits
+				uint high; // 32 bits
+			} u_;
+
+			ulint u;       // 64 bits
+		} ab;
+
+		ab.u_.high = a;
+		ab.u_.low  = b;
+
+		*r    = uint(ab.u / c);
+		*rest = uint(ab.u % c);
+
+	#else
+
+		uint_ c_;
+		c_.u = c;
+
+
+		if( a == 0 )
+		{
+			*r    = b / c;
+			*rest = b % c;
+
+#ifdef TTMATH_WARTOWNIK
+			++tester_wartownik1; // !!!!! skasowac
+#endif
+		}
+		else
+		if( c_.u_.high == 0 )
+		{
+			// higher half of 'c' is zero
+			// then higher half of 'a' is zero too (look at the asserts at the beginning - 'a' is smaller than 'c')
+			uint_ a_, b_, res_, temp1, temp2;
+
+			a_.u = a;
+			b_.u = b;
+
+			temp1.u_.high = a_.u_.low;
+			temp1.u_.low  = b_.u_.high;
+
+			res_.u_.high  = temp1.u / c;
+			temp2.u_.high = temp1.u % c;
+			temp2.u_.low  = b_.u_.low;
+			
+			res_.u_.low  = temp2.u / c;
+			*rest        = temp2.u % c;
+
+			*r = res_.u;
+#ifdef TTMATH_WARTOWNIK
+			++tester_wartownik2; // !!!!! skasowac
+#endif
+
+		}
+		else
+		{
+			return DivTwoWords2(a, b, c,  r,  rest);
+		}
+
+	#endif
+	}
+
+
+#ifdef TTMATH_PLATFORM64
+
+	template<uint value_size>
+	void UInt<value_size>::DivTwoWords2(uint a, uint b, uint c, uint * r, uint * rest)
+	{
+		// a is not zero
+		// c_.u_.high is not zero
+
+		uint_ a_, b_, c_, u_, q_;
+		unsigned int u3; // 32 bit
+
+		a_.u  = a;
+		b_.u  = b;
+		c_.u  = c;
+
+		// normalizing
+		// a0 will actually not be used
+		uint d = DivTwoWordsNormalize(a_, b_, c_);
+
+		// loop from j=1 to j=0
+		//   the first step (for j=2) is skipped because our result is only in one word,
+		//   (first 'q' were 0 and nothing would be changed)
+		u_.u_.high = a_.u_.high;
+		u_.u_.low  = a_.u_.low;
+		u3         = b_.u_.high;
+		q_.u_.high = DivTwoWordsCalculate(u_, u3, c_);
+		MultiplySubtract(u_, u3, q_.u_.high, c_);
+		
+		u_.u_.high = u_.u_.low;
+		u_.u_.low  = u3;
+		u3         = b_.u_.low;
+		q_.u_.low  = DivTwoWordsCalculate(u_, u3, c_);
+		MultiplySubtract(u_, u3, q_.u_.low, c_);
+
+		*r = q_.u;
+
+		// unnormalizing for the remainder
+		u_.u_.high = u_.u_.low;
+		u_.u_.low  = u3;
+		*rest = DivTwoWordsUnnormalize(u_.u, d);
+	}
+
+
+
+	
+	template<uint value_size>
+	uint UInt<value_size>::DivTwoWordsNormalize(uint_ & a_, uint_ & b_, uint_ & c_)
+	{
+		uint d = 0;
+
+		for( ; (c_.u & TTMATH_UINT_HIGHEST_BIT) == 0 ; ++d )
+		{
+			c_.u = c_.u << 1;
+			
+			uint bc = b_.u & TTMATH_UINT_HIGHEST_BIT; // carry from 'b'
+
+			b_.u = b_.u << 1;
+			a_.u = a_.u << 1; // carry bits from 'a' are simply skipped 
+
+			if( bc )
+			{
+				a_.u = a_.u | 1;
+	#ifdef TTMATH_WARTOWNIK
+				++tester_wartownik3; // !!!!! skasowac
+	#endif
+			}
+		}
+
+	return d;
+	}
+
+
+	template<uint value_size>
+	uint UInt<value_size>::DivTwoWordsUnnormalize(uint u, uint d)
+	{
+		if( d == 0 )
+			return u;
+
+		u = u >> d;
+
+	return u;
+	}
+
+
+	template<uint value_size>
+	unsigned int UInt<value_size>::DivTwoWordsCalculate(uint_ u_, unsigned int u3, uint_ v_)
+	{
+	bool next_test;
+	uint_ qp_, rp_, temp_;
+
+		qp_.u = u_.u / uint(v_.u_.high);
+		rp_.u = u_.u % uint(v_.u_.high);
+
+		TTMATH_ASSERT( qp_.u_.high==0 || qp_.u_.high==1 )
+
+		do
+		{
+			bool decrease = false;
+
+			if( qp_.u_.high == 1 )
+				decrease = true;
+			else
+			{
+				temp_.u_.high = rp_.u_.low;
+				temp_.u_.low  = u3;
+
+				if( qp_.u * uint(v_.u_.low) > temp_.u )
+					decrease = true;
+			}
+			
+			next_test = false;
+
+			if( decrease )
+			{
+				#ifdef TTMATH_WARTOWNIK
+				++tester_wartownik4; // !!!!! skasowac
+				#endif
+
+				--qp_.u;
+				rp_.u += v_.u_.high;
+
+				if( rp_.u_.high == 0 ) 
+				{
+					next_test = true;
+
+					#ifdef TTMATH_WARTOWNIK
+					++tester_wartownik5; // !!!!! skasowac
+					#endif
+				}
+
+				
+			}
+		}
+		while( next_test );
+
+	return qp_.u_.low;
+	}
+
+
+	template<uint value_size>
+	void UInt<value_size>::MultiplySubtract(uint_ & u_, unsigned int & u3, unsigned int & q, uint_ v_)
+	{
+	uint_ temp_;
+		
+		uint res_high;
+		uint res_low;
+
+		MulTwoWords(v_.u, q, &res_high, &res_low);
+
+		uint_ sub_res_high_;
+		uint_ sub_res_low_;
+
+		temp_.u_.high = u_.u_.low;
+		temp_.u_.low  = u3;
+
+		uint c = SubTwoWords(temp_.u, res_low, 0, &sub_res_low_.u);
+			
+		temp_.u_.high = 0;
+		temp_.u_.low  = u_.u_.high;
+		c = SubTwoWords(temp_.u, res_high, c, &sub_res_high_.u);
+
+#ifdef TTMATH_WARTOWNIK
+		++tester_wartownik6; // !!!!! skasowac
+#endif
+
+		if( c )
+		{
+			--q;
+
+			c = AddTwoWords(sub_res_low_.u, v_.u, 0, &sub_res_low_.u);
+			AddTwoWords(sub_res_high_.u, 0, c, &sub_res_high_.u);
+
+			#ifdef TTMATH_WARTOWNIK
+			++tester_wartownik7; // !!!!! skasowac
+			#endif
+		}
+
+		u_.u_.high = sub_res_high_.u_.low;
+		u_.u_.low  = sub_res_low_.u_.high;
+		u3         = sub_res_low_.u_.low;
+	}
+
+#endif // #ifdef TTMATH_PLATFORM64
+
+
+
+} //namespace
+
+
+#endif //ifdef TTMATH_NOASM
+#endif
+
+
+
+
diff --git a/ttmath/ttmathuint_x86.h b/ttmath/ttmathuint_x86.h
new file mode 100644
index 0000000..69da94f
--- /dev/null
+++ b/ttmath/ttmathuint_x86.h
@@ -0,0 +1,1281 @@
+/*
+ * This file is a part of TTMath Bignum Library
+ * and is distributed under the (new) BSD licence.
+ * Author: Tomasz Sowa <t.sowa@slimaczek.pl>
+ */
+
+/* 
+ * Copyright (c) 2006-2009, Tomasz Sowa
+ * All rights reserved.
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * 
+ *  * Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *    
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *    
+ *  * Neither the name Tomasz Sowa nor the names of contributors to this
+ *    project may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+
+#ifndef headerfilettmathuint_x86
+#define headerfilettmathuint_x86
+
+
+#ifndef TTMATH_NOASM
+#ifdef TTMATH_PLATFORM32
+
+
+/*!
+	\file ttmathuint_x86.h
+    \brief template class UInt<uint> with assembler code for 32bit x86 processors
+
+	this file is included at the end of ttmathuint.h
+*/
+
+
+
+/*!
+    \brief a namespace for the TTMath library
+*/
+namespace ttmath
+{
+
+	/*!
+	*
+	*	basic mathematic functions
+	*
+	*/
+
+
+	/*!
+		adding ss2 to the this and adding carry if it's defined
+		(this = this + ss2 + c)
+
+		c must be zero or one (might be a bigger value than 1)
+		function returns carry (1) (if it has been)
+	*/
+	template<uint value_size>
+	uint UInt<value_size>::Add(const UInt<value_size> & ss2, uint c=0)
+	{
+	register uint b = value_size;
+	register uint * p1 = table;
+	register uint * p2 = const_cast<uint*>(ss2.table);
+
+		// we don't have to use TTMATH_REFERENCE_ASSERT here
+		// this algorithm doesn't require it
+
+		#ifndef __GNUC__
+			
+			//	this part might be compiled with for example visual c
+
+			__asm
+			{
+				push eax
+				push ebx
+				push ecx
+				push edx
+				push esi
+
+				mov ecx,[b]
+				
+				mov ebx,[p1]
+				mov esi,[p2]
+
+				xor eax,eax  // eax=0
+				mov edx,eax  // edx=0
+
+				sub eax,[c]  // CF=c
+
+			p:
+				mov eax,[esi+edx*4]
+				adc [ebx+edx*4],eax
+
+				inc edx
+				dec ecx
+			jnz p
+
+				setc al
+				movzx edx, al
+				mov [c], edx
+
+				pop esi
+				pop edx
+				pop ecx
+				pop ebx
+				pop eax
+			}
+
+
+
+		#endif		
+			
+
+		#ifdef __GNUC__
+			
+			//	this part should be compiled with gcc
+			
+			__asm__ __volatile__(
+			
+				"push %%ecx						\n"
+			
+				"xorl %%eax, %%eax				\n"
+				"movl %%eax, %%edx				\n"
+				"subl %%edi, %%eax				\n"
+
+
+			"1:									\n"
+				"movl (%%esi,%%edx,4),%%eax		\n"
+				"adcl %%eax, (%%ebx,%%edx,4)	\n"
+			
+				"incl %%edx						\n"
+				"decl %%ecx						\n"
+			"jnz 1b								\n"
+
+				"setc %%al						\n"
+				"movzx %%al,%%edx				\n"
+
+				"pop %%ecx						\n"
+
+				: "=d" (c)
+				: "D" (c), "c" (b), "b" (p1), "S" (p2)
+				: "%eax", "cc", "memory" );
+
+		#endif
+
+		TTMATH_LOG("UInt32::Add")
+
+	return c;
+	}
+
+
+
+	/*!
+		adding one word (at a specific position)
+		and returning a carry (if it has been)
+
+		e.g.
+
+		if we've got (value_size=3):
+			table[0] = 10;
+			table[1] = 30;
+			table[2] = 5;	
+		and we call:
+			AddInt(2,1)
+		then it'll be:
+			table[0] = 10;
+			table[1] = 30 + 2;
+			table[2] = 5;
+
+		of course if there was a carry from table[2] it would be returned
+	*/
+	template<uint value_size>
+	uint UInt<value_size>::AddInt(uint value, uint index = 0)
+	{
+	register uint b = value_size;
+	register uint * p1 = table;
+	register uint c;
+
+		TTMATH_ASSERT( index < value_size )
+
+		#ifndef __GNUC__
+
+			__asm
+			{
+				push eax
+				push ebx
+				push ecx
+				push edx
+
+				mov ecx, [b]
+				sub ecx, [index]				
+
+				mov edx, [index]
+				mov ebx, [p1]
+
+				mov eax, [value]
+
+			p:
+				add [ebx+edx*4], eax
+			jnc end
+
+				mov eax, 1
+				inc edx
+				dec ecx
+			jnz p
+
+			end:
+				setc al
+				movzx edx, al
+				mov [c], edx
+
+				pop edx
+				pop ecx
+				pop ebx
+				pop eax
+			}
+
+		#endif		
+			
+
+		#ifdef __GNUC__
+			__asm__ __volatile__(
+			
+				"push %%eax						\n"
+				"push %%ecx						\n"
+
+				"subl %%edx, %%ecx 				\n"
+
+			"1:									\n"
+				"addl %%eax, (%%ebx,%%edx,4)	\n"
+			"jnc 2f								\n"
+				
+				"movl $1, %%eax					\n"
+				"incl %%edx						\n"
+				"decl %%ecx						\n"
+			"jnz 1b								\n"
+
+			"2:									\n"
+				"setc %%al						\n"
+				"movzx %%al, %%edx				\n"
+
+				"pop %%ecx						\n"
+				"pop %%eax						\n"
+
+				: "=d" (c)
+				: "a" (value), "c" (b), "0" (index), "b" (p1)
+				: "cc", "memory" );
+
+		#endif
+	
+		TTMATH_LOG("UInt32::AddInt")
+
+	return c;
+	}
+
+
+
+
+	/*!
+		adding only two unsigned words to the existing value
+		and these words begin on the 'index' position
+		(it's used in the multiplication algorithm 2)
+
+		index should be equal or smaller than value_size-2 (index <= value_size-2)
+		x1 - lower word, x2 - higher word
+
+		for example if we've got value_size equal 4 and:
+			table[0] = 3
+			table[1] = 4
+			table[2] = 5
+			table[3] = 6
+		then let
+			x1 = 10
+			x2 = 20
+		and
+			index = 1
+
+		the result of this method will be:
+			table[0] = 3
+			table[1] = 4 + x1 = 14
+			table[2] = 5 + x2 = 25
+			table[3] = 6
+		
+		and no carry at the end of table[3]
+
+		(of course if there was a carry in table[2](5+20) then 
+		this carry would be passed to the table[3] etc.)
+	*/
+	template<uint value_size>
+	uint UInt<value_size>::AddTwoInts(uint x2, uint x1, uint index)
+	{
+	register uint b = value_size;
+	register uint * p1 = table;
+	register uint c;
+
+		TTMATH_ASSERT( index < value_size - 1 )
+
+		#ifndef __GNUC__
+			__asm
+			{
+				push eax
+				push ebx
+				push ecx
+				push edx
+
+				mov ecx, [b]
+				sub ecx, [index]				
+
+				mov ebx, [p1]
+				mov edx, [index]
+
+				mov eax, [x1]
+				add [ebx+edx*4], eax
+				inc edx
+				dec ecx
+
+				mov eax, [x2]
+			
+			p:
+				adc [ebx+edx*4], eax
+			jnc end
+
+				mov eax, 0
+				inc edx
+				dec ecx
+			jnz p
+
+			end:
+				setc al
+				movzx edx, al
+				mov [c], edx
+				
+				pop edx
+				pop ecx
+				pop ebx
+				pop eax
+
+			}
+		#endif		
+			
+
+		#ifdef __GNUC__
+			__asm__ __volatile__(
+			
+				"push %%ecx						\n"
+				"push %%edx						\n"
+
+				"subl %%edx, %%ecx 				\n"
+				
+				"addl %%esi, (%%ebx,%%edx,4) 	\n"
+				"incl %%edx						\n"
+				"decl %%ecx						\n"
+
+			"1:									\n"
+				"adcl %%eax, (%%ebx,%%edx,4)	\n"
+			"jnc 2f								\n"
+
+				"mov $0, %%eax					\n"
+				"incl %%edx						\n"
+				"decl %%ecx						\n"
+			"jnz 1b								\n"
+
+			"2:									\n"
+				"setc %%al						\n"
+				"movzx %%al, %%eax				\n"
+
+				"pop %%edx						\n"
+				"pop %%ecx						\n"
+
+				: "=a" (c)
+				: "c" (b), "d" (index), "b" (p1), "S" (x1), "0" (x2)
+				: "cc", "memory" );
+
+		#endif
+
+		TTMATH_LOG("UInt32::AddTwoInts")
+	
+	return c;
+	}
+
+
+
+
+
+	/*!
+		subtracting ss2 from the 'this' and subtracting
+		carry if it has been defined
+		(this = this - ss2 - c)
+
+		c must be zero or one (might be a bigger value than 1)
+		function returns carry (1) (if it has been)
+	*/
+	template<uint value_size>
+	uint UInt<value_size>::Sub(const UInt<value_size> & ss2, uint c=0)
+	{
+	register uint b = value_size;
+	register uint * p1 = table;
+	register uint * p2 = const_cast<uint*>(ss2.table);
+
+		// we don't have to use TTMATH_REFERENCE_ASSERT here
+		// this algorithm doesn't require it
+
+		#ifndef __GNUC__
+
+			__asm
+			{
+				push eax
+				push ebx
+				push ecx
+				push edx
+				push esi
+
+				mov ecx,[b]
+				
+				mov ebx,[p1]
+				mov esi,[p2]
+
+				xor eax, eax
+				mov edx, eax
+
+				sub eax, [c]
+
+			p:
+				mov eax, [esi+edx*4]
+				sbb [ebx+edx*4], eax
+
+				inc edx
+				dec ecx
+			jnz p
+
+				setc al
+				movzx edx, al
+				mov [c], edx
+
+				pop esi
+				pop edx
+				pop ecx
+				pop ebx
+				pop eax
+			}
+
+		#endif
+
+
+		#ifdef __GNUC__
+			__asm__  __volatile__(
+			
+				"push %%ecx						\n"
+			
+				"xorl %%eax, %%eax				\n"
+				"movl %%eax, %%edx				\n"
+				"subl %%edi, %%eax				\n"
+
+
+			"1:									\n"
+				"movl (%%esi,%%edx,4),%%eax		\n"
+				"sbbl %%eax, (%%ebx,%%edx,4)	\n"
+			
+				"incl %%edx						\n"
+				"decl %%ecx						\n"
+			"jnz 1b								\n"
+
+				"setc %%al						\n"
+				"movzx %%al,%%edx				\n"
+
+				"pop %%ecx						\n"
+
+				: "=d" (c)
+				: "D" (c), "c" (b), "b" (p1), "S" (p2)
+				: "%eax", "cc", "memory" );
+
+		#endif
+
+		TTMATH_LOG("UInt32::Sub")
+
+	return c;
+	}
+
+
+
+
+	/*!
+		this method subtracts one word (at a specific position)
+		and returns a carry (if it was)
+
+		e.g.
+
+		if we've got (value_size=3):
+			table[0] = 10;
+			table[1] = 30;
+			table[2] = 5;	
+		and we call:
+			SubInt(2,1)
+		then it'll be:
+			table[0] = 10;
+			table[1] = 30 - 2;
+			table[2] = 5;
+
+		of course if there was a carry from table[3] it would be returned
+	*/
+	template<uint value_size>
+	uint UInt<value_size>::SubInt(uint value, uint index = 0)
+	{
+	register uint b = value_size;
+	register uint * p1 = table;
+	register uint c;
+
+		TTMATH_ASSERT( index < value_size )
+
+		#ifndef __GNUC__
+			__asm
+			{
+				push eax
+				push ebx
+				push ecx
+				push edx
+
+				mov ecx, [b]
+				sub ecx, [index]				
+
+				mov edx, [index]
+				mov ebx, [p1]
+
+				mov eax, [value]
+
+			p:
+				sub [ebx+edx*4], eax
+			jnc end
+
+				mov eax, 1
+				inc edx
+				dec ecx
+			jnz p
+
+			end:
+				setc al
+				movzx edx, al
+				mov [c], edx
+
+				pop edx
+				pop ecx
+				pop ebx
+				pop eax
+			}
+		#endif		
+			
+
+		#ifdef __GNUC__
+			__asm__ __volatile__(
+			
+				"push %%eax						\n"
+				"push %%ecx						\n"
+
+				"subl %%edx, %%ecx 				\n"
+
+			"1:									\n"
+				"subl %%eax, (%%ebx,%%edx,4)	\n"
+			"jnc 2f								\n"
+				
+				"movl $1, %%eax					\n"
+				"incl %%edx						\n"
+				"decl %%ecx						\n"
+			"jnz 1b								\n"
+
+			"2:									\n"
+				"setc %%al						\n"
+				"movzx %%al, %%edx				\n"
+
+				"pop %%ecx						\n"
+				"pop %%eax						\n"
+
+				: "=d" (c)
+				: "a" (value), "c" (b), "0" (index), "b" (p1)
+				: "cc", "memory" );
+
+		#endif
+		
+		TTMATH_LOG("UInt32::SubInt")
+	
+	return c;
+	}
+
+
+
+	/*!
+		this method moves all bits into the left hand side
+		return value <- this <- c
+
+		the lowest *bit* will be held the 'c' and
+		the state of one additional bit (on the left hand side)
+		will be returned
+
+		for example:
+		let this is 001010000
+		after Rcl2_one(1) there'll be 010100001 and Rcl2_one returns 0
+	*/
+	template<uint value_size>
+	uint UInt<value_size>::Rcl2_one(uint c)
+	{
+	register sint b = value_size;
+	register uint * p1 = table;
+
+		#ifndef __GNUC__
+			__asm
+			{
+				push ebx
+				push ecx
+				push edx
+
+				mov ebx, [p1]
+
+				xor edx, edx
+				mov ecx, edx
+				sub ecx, [c]
+
+				mov ecx, [b]
+
+			p:
+				rcl dword ptr [ebx+edx*4], 1
+				
+				inc edx
+				dec ecx
+			jnz p
+
+				setc dl
+				movzx edx, dl
+				mov [c], edx
+
+				
+				pop edx
+				pop ecx
+				pop ebx
+			}
+		#endif
+
+
+		#ifdef __GNUC__
+		__asm__  __volatile__(
+
+			"push %%edx					\n"
+			"push %%ecx					\n"
+
+			"xorl %%edx, %%edx			\n"   // edx=0
+			"neg %%eax					\n"   // CF=1 if eax!=0 , CF=0 if eax==0
+
+		"1:								\n"
+			"rcll $1, (%%ebx, %%edx, 4)	\n"
+
+			"incl %%edx					\n"
+			"decl %%ecx					\n"
+		"jnz 1b							\n"
+
+			"setc %%al					\n"
+			"movzx %%al, %%eax			\n"
+
+			"pop %%ecx					\n"
+			"pop %%edx					\n"
+
+			: "=a" (c)
+			: "0" (c), "c" (b), "b" (p1)
+			: "cc", "memory" );
+
+		#endif
+
+		TTMATH_LOG("UInt32::Rcl2_one")
+
+	return c;
+	}
+
+
+
+	/*!
+		this method moves all bits into the right hand side
+		c -> this -> return value
+
+		the highest *bit* will be held the 'c' and
+		the state of one additional bit (on the right hand side)
+		will be returned
+
+		for example:
+		let this is 000000010
+		after Rcr2_one(1) there'll be 100000001 and Rcr2_one returns 0
+	*/
+	template<uint value_size>
+	uint UInt<value_size>::Rcr2_one(uint c)
+	{
+	register sint b = value_size;
+	register uint * p1 = table;
+
+		#ifndef __GNUC__
+			__asm
+			{
+				push ebx
+				push ecx
+
+				mov ebx, [p1]
+
+				xor ecx, ecx
+				sub ecx, [c]
+
+				mov ecx, [b]
+
+			p:
+				rcr dword ptr [ebx+ecx*4-4], 1
+				
+				dec ecx
+			jnz p
+
+				setc cl
+				movzx ecx, cl
+				mov [c], ecx
+
+				pop ecx
+				pop ebx
+			}
+		#endif
+
+
+		#ifdef __GNUC__
+		__asm__  __volatile__(
+
+			"push %%ecx						\n"
+
+			"neg %%eax						\n"   // CF=1 if eax!=0 , CF=0 if eax==0
+
+		"1:									\n"
+			"rcrl $1, -4(%%ebx, %%ecx, 4)	\n"
+
+			"decl %%ecx						\n"
+		"jnz 1b								\n"
+
+			"setc %%al						\n"
+			"movzx %%al, %%eax				\n"
+
+			"pop %%ecx						\n"
+
+			: "=a" (c)
+			: "0" (c), "c" (b), "b" (p1)
+			: "cc", "memory" );
+
+		#endif
+
+		TTMATH_LOG("UInt32::Rcr2_one")
+
+	return c;
+	}
+
+
+
+	/*!
+		this method moves all bits into the left hand side
+		return value <- this <- c
+
+		the lowest *bits* will be held the 'c' and
+		the state of one additional bit (on the left hand side)
+		will be returned
+
+		for example:
+		let this is 001010000
+		after Rcl2(3, 1) there'll be 010000111 and Rcl2 returns 1
+	*/
+	template<uint value_size>
+	uint UInt<value_size>::Rcl2(uint bits, uint c)
+	{
+	TTMATH_ASSERT( bits>0 && bits<TTMATH_BITS_PER_UINT )
+		
+	register sint b = value_size;
+	register uint * p1 = table;
+	register uint mask;
+
+		#ifndef __GNUC__
+			__asm
+			{
+				push eax
+				push ebx
+				push ecx
+				push edx
+				push esi
+				push edi
+
+				mov edi, [b]
+
+				mov ecx, 32
+				sub ecx, [bits]
+				mov edx, -1
+				shr edx, cl
+				mov [mask], edx
+
+				mov ecx, [bits]
+				mov ebx, [p1]
+
+				xor edx, edx   // edx = 0
+				mov esi, edx   // old value = 0 
+
+				mov eax, [c]
+				or eax, eax
+				cmovnz esi, [mask] // if c then old value = mask
+
+		p:
+				rol dword ptr [ebx+edx*4], cl
+				
+				mov eax, [ebx+edx*4]
+				and eax, [mask] 
+				xor [ebx+edx*4], eax // clearing bits
+				or [ebx+edx*4], esi  // saving old value
+				mov esi, eax
+
+				inc edx
+				dec edi
+			jnz p
+
+				and eax, 1
+				mov [c], eax
+
+				pop edi
+				pop esi
+				pop edx
+				pop ecx
+				pop ebx
+				pop eax
+			}
+		#endif
+
+
+		#ifdef __GNUC__
+		__asm__  __volatile__(
+
+			"push %%edx						\n"
+			"push %%esi						\n"
+			"push %%edi						\n"
+			
+			"movl %%ecx, %%esi				\n"
+			"movl $32, %%ecx				\n"
+			"subl %%esi, %%ecx				\n"
+			"movl $-1, %%edx				\n"
+			"shrl %%cl, %%edx				\n"
+			"movl %%edx, %[amask]			\n"
+			"movl %%esi, %%ecx				\n"
+
+			"xorl %%edx, %%edx				\n"
+			"movl %%edx, %%esi				\n"
+
+			"orl %%eax, %%eax				\n"
+			"cmovnz %[amask], %%esi			\n"
+
+		"1:									\n"
+			"roll %%cl, (%%ebx,%%edx,4)		\n"
+
+			"movl (%%ebx,%%edx,4), %%eax	\n"
+			"andl %[amask], %%eax			\n"
+			"xorl %%eax, (%%ebx,%%edx,4)	\n"
+			"orl  %%esi, (%%ebx,%%edx,4)	\n"
+			"movl %%eax, %%esi				\n"
+			
+			"incl %%edx						\n"
+			"decl %%edi						\n"
+		"jnz 1b								\n"
+			
+			"and $1, %%eax					\n"
+
+			"pop %%edi						\n"
+			"pop %%esi						\n"
+			"pop %%edx						\n"
+
+			: "=a" (c)
+			: "0" (c), "D" (b), "b" (p1), "c" (bits), [amask] "m" (mask)
+			: "cc", "memory" );
+
+		#endif
+
+		TTMATH_LOG("UInt32::Rcl2")
+
+	return c;
+	}
+
+
+
+
+	/*!
+		this method moves all bits into the right hand side
+		C -> this -> return value
+
+		the highest *bits* will be held the 'c' and
+		the state of one additional bit (on the right hand side)
+		will be returned
+
+		for example:
+		let this is 000000010
+		after Rcr2(2, 1) there'll be 110000000 and Rcr2 returns 1
+	*/
+	template<uint value_size>
+	uint UInt<value_size>::Rcr2(uint bits, uint c)
+	{
+	TTMATH_ASSERT( bits>0 && bits<TTMATH_BITS_PER_UINT )
+
+	register sint b = value_size;
+	register uint * p1 = table;
+	register uint mask;
+
+		#ifndef __GNUC__
+			__asm
+			{
+				push eax
+				push ebx
+				push ecx
+				push edx
+				push esi
+				push edi
+
+				mov edi, [b]
+
+				mov ecx, 32
+				sub ecx, [bits]
+				mov edx, -1
+				shl edx, cl
+				mov [mask], edx
+
+				mov ecx, [bits]
+				mov ebx, [p1]
+
+				xor edx, edx   // edx = 0
+				mov esi, edx   // old value = 0 
+				add edx, edi   
+				dec edx        // edx - is pointing at the last word
+
+				mov eax, [c]
+				or eax, eax
+				cmovnz esi, [mask] // if c then old value = mask
+
+			p:
+				ror dword ptr [ebx+edx*4], cl
+				
+				mov eax, [ebx+edx*4]
+				and eax, [mask] 
+				xor [ebx+edx*4], eax // clearing bits
+				or [ebx+edx*4], esi  // saving old value
+				mov esi, eax
+
+				dec edx
+				dec edi
+			jnz p
+
+				rol eax, 1    // 31bit will be first
+				and eax, 1  
+				mov [c], eax
+
+				pop edi
+				pop esi
+				pop edx
+				pop ecx
+				pop ebx
+				pop eax
+			}
+		#endif
+
+
+		#ifdef __GNUC__
+			__asm__  __volatile__(
+
+			"push %%edx						\n"
+			"push %%esi						\n"
+			"push %%edi						\n"
+			
+			"movl %%ecx, %%esi				\n"
+			"movl $32, %%ecx				\n"
+			"subl %%esi, %%ecx				\n"
+			"movl $-1, %%edx				\n"
+			"shll %%cl, %%edx				\n"
+			"movl %%edx, %[amask]			\n"
+			"movl %%esi, %%ecx				\n"
+
+			"xorl %%edx, %%edx				\n"
+			"movl %%edx, %%esi				\n"
+			"addl %%edi, %%edx				\n"
+			"decl %%edx						\n"
+
+			"orl %%eax, %%eax				\n"
+			"cmovnz %[amask], %%esi			\n"
+
+		"1:									\n"
+			"rorl %%cl, (%%ebx,%%edx,4)		\n"
+
+			"movl (%%ebx,%%edx,4), %%eax	\n"
+			"andl %[amask], %%eax			\n"
+			"xorl %%eax, (%%ebx,%%edx,4)	\n"
+			"orl  %%esi, (%%ebx,%%edx,4)	\n"
+			"movl %%eax, %%esi				\n"
+			
+			"decl %%edx						\n"
+			"decl %%edi						\n"
+		"jnz 1b								\n"
+			
+			"roll $1, %%eax					\n"
+			"andl $1, %%eax					\n"
+
+			"pop %%edi						\n"
+			"pop %%esi						\n"
+			"pop %%edx						\n"
+
+			: "=a" (c)
+			: "0" (c), "D" (b), "b" (p1), "c" (bits), [amask] "m" (mask)
+			: "cc", "memory" );
+
+		#endif
+
+		TTMATH_LOG("UInt32::Rcr2")
+
+	return c;
+	}
+
+
+
+	/*
+		this method returns the number of the highest set bit in one 32-bit word
+		if the 'x' is zero this method returns '-1'
+	*/
+	template<uint value_size>
+	sint UInt<value_size>::FindLeadingBitInWord(uint x)
+	{
+	register sint result;
+
+		#ifndef __GNUC__
+			__asm
+			{
+				push eax
+				push edx
+
+				mov edx,-1
+				bsr eax,[x]
+				cmovz eax,edx
+				mov [result], eax
+
+				pop edx
+				pop eax
+			}
+		#endif
+
+
+		#ifdef __GNUC__
+			__asm__  __volatile__(
+
+			"bsrl %1, %0		\n"
+			"jnz 1f				\n"
+			"movl $-1, %0		\n"
+			"1:					\n"
+
+			: "=R" (result)
+			: "R" (x)
+			: "cc" );
+
+		#endif
+
+	return result;
+	}
+
+
+
+
+
+	/*!
+		this method sets a special bit in the 'value'
+		and returns the last state of the bit (zero or one)
+
+		bit is from <0,31>
+		e.g.
+		 uint x = 100;
+		 uint bit = SetBitInWord(x, 3);
+		 now: x = 108 and bit = 0
+	*/
+	template<uint value_size>
+	uint UInt<value_size>::SetBitInWord(uint & value, uint bit)
+	{
+		TTMATH_ASSERT( bit < TTMATH_BITS_PER_UINT )
+
+		uint old_bit;
+		uint v = value;
+
+		#ifndef __GNUC__
+			__asm
+			{
+			push ebx
+			push eax
+
+			mov eax, [v]
+			mov ebx, [bit]
+			bts eax, ebx
+			mov [v], eax
+
+			setc bl
+			movzx ebx, bl
+			mov [old_bit], ebx
+
+			pop eax
+			pop ebx
+			}
+		#endif
+
+
+		#ifdef __GNUC__
+			__asm__  __volatile__(
+
+			"btsl %%ebx, %%eax		\n"
+
+			"setc %%bl				\n"
+			"movzx %%bl, %%ebx		\n"
+			
+			: "=a" (v), "=b" (old_bit)
+			: "0" (v), "1" (bit)
+			: "cc" );
+
+		#endif
+
+		value = v;
+
+	return old_bit;
+	}
+
+
+
+
+	/*!
+		multiplication: result2:result1 = a * b
+		result2 - higher word
+		result1 - lower word of the result
+	
+		this method never returns a carry
+
+		it is an auxiliary method for second version of the multiplication algorithm
+	*/
+	template<uint value_size>
+	void UInt<value_size>::MulTwoWords(uint a, uint b, uint * result2, uint * result1)
+	{
+	/*
+		we must use these temporary variables in order to inform the compilator
+		that value pointed with result1 and result2 has changed
+
+		this has no effect in visual studio but it's useful when
+		using gcc and options like -Ox
+	*/
+	register uint result1_;
+	register uint result2_;
+
+		#ifndef __GNUC__
+
+			__asm
+			{
+			push eax
+			push edx
+
+			mov eax, [a]
+			mul dword ptr [b]
+
+			mov [result2_], edx
+			mov [result1_], eax
+
+			pop edx
+			pop eax
+			}
+
+		#endif
+
+
+		#ifdef __GNUC__
+
+		__asm__ __volatile__(
+		
+			"mull %%edx			\n"
+
+			: "=a" (result1_), "=d" (result2_)
+			: "0" (a), "1" (b)
+			: "cc" );
+
+		#endif
+
+
+		*result1 = result1_;
+		*result2 = result2_;
+	}
+
+
+
+
+
+	/*!
+	 *
+	 * Division
+	 *
+	 *
+	*/
+	
+
+
+
+	/*!
+		this method calculates 64bits word a:b / 32bits c (a higher, b lower word)
+		r = a:b / c and rest - remainder
+
+		*
+		* WARNING:
+		* if r (one word) is too small for the result or c is equal zero
+		* there'll be a hardware interruption (0)
+		* and probably the end of your program
+		*
+	*/
+	template<uint value_size>
+	void UInt<value_size>::DivTwoWords(uint a, uint b, uint c, uint * r, uint * rest)
+	{
+		register uint r_;
+		register uint rest_;
+		/*
+			these variables have similar meaning like those in
+			the multiplication algorithm MulTwoWords
+		*/
+
+		TTMATH_ASSERT( c != 0 )
+
+		#ifndef __GNUC__
+			__asm
+			{
+				push eax
+				push edx
+
+				mov edx, [a]
+				mov eax, [b]
+				div dword ptr [c]
+
+				mov [r_], eax
+				mov [rest_], edx
+
+				pop edx
+				pop eax
+			}
+		#endif
+
+
+		#ifdef __GNUC__
+		
+			__asm__ __volatile__(
+
+			"divl %%ecx				\n"
+
+			: "=a" (r_), "=d" (rest_)
+			: "d" (a), "a" (b), "c" (c)
+			: "cc" );
+
+		#endif
+
+
+		*r = r_;
+		*rest = rest_;
+
+	}
+
+
+
+} //namespace
+
+
+
+#endif //ifdef TTMATH_PLATFORM32
+#endif //ifndef TTMATH_NOASM
+#endif
diff --git a/ttmath/ttmathuint64.h b/ttmath/ttmathuint_x86_64.h
similarity index 83%
rename from ttmath/ttmathuint64.h
rename to ttmath/ttmathuint_x86_64.h
index 031dd8a..96bf662 100644
--- a/ttmath/ttmathuint64.h
+++ b/ttmath/ttmathuint_x86_64.h
@@ -36,10 +36,19 @@
  */
 
 
+#ifndef headerfilettmathuint_x86_64
+#define headerfilettmathuint_x86_64
+
+
+#ifndef TTMATH_NOASM
+#ifdef TTMATH_PLATFORM64
+
 
 /*!
-	\file ttmathuint.h
-    \brief template class UInt<uint> for 64bit processors
+	\file ttmathuint_x86_64.h
+    \brief template class UInt<uint> with assembler code for 64bit x86_64 processors
+
+	this file is included at the end of ttmathuint.h
 */
 
 
@@ -52,155 +61,6 @@ namespace ttmath
 	*
 	*/
 
-#ifdef TTMATH_PLATFORM64
-
-
-
-	/*!
-		in 64bit platforms we must define additional operators and contructors
-		in order to allow a user initializing the objects in this way:
-			UInt<...> type = 20;
-		or
-			UInt<...> type; 
-			type = 30;
-
-		decimal constants such as 20, 30 etc. are integer literal of type int,
-		if the value is greater it can even be long int,
-		0 is an octal integer of type int
-		(ISO 14882 p2.13.1 Integer literals)
-	*/
-
-	/*!
-		this operator converts the unsigned int type to this class
-
-		***this operator is created only on a 64bit platform***
-		it takes one argument of 32bit
-	*/
-	template<uint value_size>
-	UInt<value_size> & UInt<value_size>::operator=(unsigned int i)
-	{
-		FromUInt(uint(i));
-
-		TTMATH_LOG("UInt64::operator=(unsigned int)")
-
-	return *this;
-	}
-
-
-	/*!
-		a constructor for converting the unsigned int to this class
-
-		***this constructor is created only on a 64bit platform***
-		it takes one argument of 32bit
-	*/
-	template<uint value_size>
-	UInt<value_size>::UInt(unsigned int i)
-	{
-		FromUInt(uint(i));
-
-		TTMATH_LOG("UInt64::UInt(unsigned int)")
-	}
-
-
-	/*!
-		an operator for converting the signed int to this class
-
-		***this constructor is created only on a 64bit platform***
-		it takes one argument of 32bit
-
-		look at the description of UInt::operator=(sint)
-	*/
-	template<uint value_size>
-	UInt<value_size> & UInt<value_size>::operator=(signed int i)
-	{
-		FromUInt(uint(i));
-
-		TTMATH_LOG("UInt64::operator=(signed int)")
-
-	return *this;
-	}
-
-
-	/*!
-		a constructor for converting the signed int to this class
-
-		***this constructor is created only on a 64bit platform***
-		it takes one argument of 32bit
-
-		look at the description of UInt::operator=(sint)
-	*/
-	template<uint value_size>
-	UInt<value_size>::UInt(signed int i)
-	{
-		FromUInt(uint(i));
-
-		TTMATH_LOG("UInt64::UInt(signed int)")
-	}
-
-
-
-	/*!
-		this method copies the value stored in an another table
-		(warning: first values in temp_table are the highest words -- it's different
-		from our table)
-
-		***this method is created only on a 64bit platform***
-
-		we copy as many words as it is possible
-		
-		if temp_table_len is bigger than value_size we'll try to round 
-		the lowest word from table depending on the last not used bit in temp_table
-		(this rounding isn't a perfect rounding -- look at the description below)
-
-		and if temp_table_len is smaller than value_size we'll clear the rest words
-		in the table
-
-		warning: we're using 'temp_table' as a pointer at 32bit words
-	*/
-	template<uint value_size>
-	void UInt<value_size>::SetFromTable(const unsigned int * temp_table, uint temp_table_len)
-	{
-		uint temp_table_index = 0;
-		sint i; // 'i' with a sign
-
-		for(i=value_size-1 ; i>=0 && temp_table_index<temp_table_len; --i, ++temp_table_index)
-		{
-			table[i] = uint(temp_table[ temp_table_index ]) << 32;
-
-			++temp_table_index;
-
-			if( temp_table_index<temp_table_len )
-				table[i] |= temp_table[ temp_table_index ];
-		}
-
-
-		// rounding mantissa
-		if( temp_table_index < temp_table_len )
-		{
-			if( (temp_table[temp_table_index] & TTMATH_UINT_HIGHEST_BIT) != 0 )
-			{
-				/*
-					very simply rounding
-					if the bit from not used last word from temp_table is set to one
-					we're rouding the lowest word in the table
-
-					in fact there should be a normal addition but
-					we don't use Add() or AddTwoInts() because these methods 
-					can set a carry and then there'll be a small problem
-					for optimization
-				*/
-				if( table[0] != TTMATH_UINT_MAX_VALUE )
-					++table[0];
-			}
-		}
-
-		// cleaning the rest of the mantissa
-		for( ; i >= 0 ; --i)
-			table[i] = 0;
-
-		TTMATH_LOG("UInt64::SetFromTable")
-	}
-
 
 
 	/*!
@@ -687,6 +547,7 @@ namespace ttmath
 	{
 	TTMATH_ASSERT( bits>0 && bits<TTMATH_BITS_PER_UINT )
 
+	// !!! why there is signed here?
 	register sint b = value_size;
 	register uint * p1 = table;
 	register uint mask;
@@ -997,6 +858,8 @@ namespace ttmath
 			the multiplication algorithm MulTwoWords
 		*/
 
+		TTMATH_ASSERT( c != 0 )
+
 		#ifndef __GNUC__
 			#error "another compiler than GCC is currently not supported in 64bit mode"
 		#endif
@@ -1018,6 +881,12 @@ namespace ttmath
 		*rest = rest_;
 	}
 
-#endif
 
 } //namespace
+
+
+#endif //ifdef TTMATH_PLATFORM64
+#endif //ifndef TTMATH_NOASM
+#endif
+
+