From 85945b2bb06963a96bedf14012b0077d91220b90 Mon Sep 17 00:00:00 2001 From: Tomasz Sowa Date: Mon, 4 May 2009 20:51:12 +0000 Subject: [PATCH] added: ttmathuint_x86.h, ttmathuint_x86_64.h, ttmathuint_noasm.h, all the methods which are using assembler code have been rewritten to no-asm forms, now we have: 1. asm for x86 file: ttmathuint_x86.h 2. asm for x86_64 file: ttmathuint_x86_64.h 3. no asm file: ttmathuint_noasm.h (it's used when macro TTMATH_NOASM is defined) The third form can be used on x86 and x86_64 as well and on other platforms with a little effort. (Temporarily I left there some '#ifdef's for debugging.) git-svn-id: svn://ttmath.org/publicrep/ttmath/trunk@126 e52654a7-88a9-db11-a3e9-0013d4bc506e --- CHANGELOG | 18 +- ttmath/ttmathtypes.h | 23 +- ttmath/ttmathuint.h | 1408 +++-------------- ttmath/ttmathuint_noasm.h | 885 +++++++++++ ttmath/ttmathuint_x86.h | 1281 +++++++++++++++ .../{ttmathuint64.h => ttmathuint_x86_64.h} | 173 +- 6 files changed, 2436 insertions(+), 1352 deletions(-) create mode 100644 ttmath/ttmathuint_noasm.h create mode 100644 ttmath/ttmathuint_x86.h rename ttmath/{ttmathuint64.h => ttmathuint_x86_64.h} (83%) diff --git a/CHANGELOG b/CHANGELOG index 35755b0..7a634a2 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,9 +1,23 @@ -Version 0.8.4 prerelease (2009.05.01): +Version 0.8.4 prerelease (2009.05.04): + * fixed: UInt::DivInt() didn't check whether the divisor is zero + there was a hardware interruption when the divisor was zero + (now the method returns one) * added: UInt::PrintLog(const char * msg, std::ostream & output) - used for debugging purposes by macro TTMATH_LOG(msg) + used (for debugging purposes) by macro TTMATH_LOG(msg) (it is used in nearly all methods in UInt class) * added: macro TTMATH_DEBUG_LOG: when defined then TTMATH_LOG() put some debug information (to std::cout) + * added: ttmathuint_x86.h, ttmathuint_x86_64.h, ttmathuint_noasm.h, + all the methods which are using assembler code have been + rewritten to no-asm forms, now we have: + 1. asm for x86 file: ttmathuint_x86.h + 2. asm for x86_64 file: ttmathuint_x86_64.h + 3. no asm file: ttmathuint_noasm.h + (it's used when macro TTMATH_NOASM is defined) + The third form can be used on x86 and x86_64 as well and + on other platforms with a little effort. + (Temporarily I left there some '#ifdef's for debugging.) + Version 0.8.3 (2009.04.06): * fixed: RclMoveAllWords() and RcrMoveAllWords() sometimes didn't return diff --git a/ttmath/ttmathtypes.h b/ttmath/ttmathtypes.h index 42ac7e4..be98f12 100644 --- a/ttmath/ttmathtypes.h +++ b/ttmath/ttmathtypes.h @@ -83,7 +83,7 @@ gcc -DTTMATH_RELEASE -o myprogram myprogram.cpp or by defining this macro in your code before using any header files of this library - if TTMATH_RELEASE is not set then TTMATH_DEBUG is set + if TTMATH_RELEASE is not set then TTMATH_DEBUG is set automatically */ #ifndef TTMATH_RELEASE #define TTMATH_DEBUG @@ -120,6 +120,18 @@ namespace ttmath typedef unsigned int uint; typedef signed int sint; + + /*! + this type is twice bigger than uint + (64bit on a 32bit platforms) + + although C++ Standard - ANSI ISO IEC 14882:2003 doesn't define such a type (long long) + but it is defined in C99 and in upcoming C++0x /3.9.1 (2)/ and many compilers support it + + this type is used in UInt::MulTwoWords and UInt::DivTwoWords when macro TTMATH_NOASM is defined + */ + typedef unsigned long long int ulint; + /*! how many bits there are in the uint type */ @@ -151,6 +163,15 @@ namespace ttmath typedef unsigned long uint; typedef signed long sint; + /*! + on 64bit platform we do not define ulint + sizeof(long long) is 8 (64bit) but we need 128bit + + on 64 bit platform (when there is defined TTMATH_NOASM macro) + methods UInt::MulTwoWords and UInt::DivTwoWords are using other algorithms than those on 32 bit + */ + //typedef unsigned long long int ulint; + /*! how many bits there are in the uint type */ diff --git a/ttmath/ttmathuint.h b/ttmath/ttmathuint.h index b39a623..d7cc7b8 100644 --- a/ttmath/ttmathuint.h +++ b/ttmath/ttmathuint.h @@ -241,6 +241,74 @@ public: TTMATH_LOG("UInt32::SetFromTable") } +#endif + + +#ifdef TTMATH_PLATFORM64 + /*! + this method copies the value stored in an another table + (warning: first values in temp_table are the highest words -- it's different + from our table) + + ***this method is created only on a 64bit platform*** + + we copy as many words as it is possible + + if temp_table_len is bigger than value_size we'll try to round + the lowest word from table depending on the last not used bit in temp_table + (this rounding isn't a perfect rounding -- look at the description below) + + and if temp_table_len is smaller than value_size we'll clear the rest words + in the table + + warning: we're using 'temp_table' as a pointer at 32bit words + */ + void SetFromTable(const unsigned int * temp_table, uint temp_table_len) + { + uint temp_table_index = 0; + sint i; // 'i' with a sign + + for(i=value_size-1 ; i>=0 && temp_table_index= 0 ; --i) + table[i] = 0; + + TTMATH_LOG("UInt64::SetFromTable") + } + +#endif + + @@ -251,530 +319,6 @@ public: */ - /*! - adding ss2 to the this and adding carry if it's defined - (this = this + ss2 + c) - - c must be zero or one (might be a bigger value than 1) - function returns carry (1) (if it has been) - */ - uint Add(const UInt & ss2, uint c=0) - { - register uint b = value_size; - register uint * p1 = table; - register uint * p2 = const_cast(ss2.table); - - // we don't have to use TTMATH_REFERENCE_ASSERT here - // this algorithm doesn't require it - - #ifndef __GNUC__ - - // this part might be compiled with for example visual c - - __asm - { - push eax - push ebx - push ecx - push edx - push esi - - mov ecx,[b] - - mov ebx,[p1] - mov esi,[p2] - - xor eax,eax // eax=0 - mov edx,eax // edx=0 - - sub eax,[c] // CF=c - - p: - mov eax,[esi+edx*4] - adc [ebx+edx*4],eax - - inc edx - dec ecx - jnz p - - setc al - movzx edx, al - mov [c], edx - - pop esi - pop edx - pop ecx - pop ebx - pop eax - } - - - - #endif - - - #ifdef __GNUC__ - - // this part should be compiled with gcc - - __asm__ __volatile__( - - "push %%ecx \n" - - "xorl %%eax, %%eax \n" - "movl %%eax, %%edx \n" - "subl %%edi, %%eax \n" - - - "1: \n" - "movl (%%esi,%%edx,4),%%eax \n" - "adcl %%eax, (%%ebx,%%edx,4) \n" - - "incl %%edx \n" - "decl %%ecx \n" - "jnz 1b \n" - - "setc %%al \n" - "movzx %%al,%%edx \n" - - "pop %%ecx \n" - - : "=d" (c) - : "D" (c), "c" (b), "b" (p1), "S" (p2) - : "%eax", "cc", "memory" ); - - #endif - - TTMATH_LOG("UInt32::Add") - - return c; - } - - - /*! - adding one word (at a specific position) - and returning a carry (if it has been) - - e.g. - - if we've got (value_size=3): - table[0] = 10; - table[1] = 30; - table[2] = 5; - and we call: - AddInt(2,1) - then it'll be: - table[0] = 10; - table[1] = 30 + 2; - table[2] = 5; - - of course if there was a carry from table[2] it would be returned - */ - uint AddInt(uint value, uint index = 0) - { - register uint b = value_size; - register uint * p1 = table; - register uint c; - - TTMATH_ASSERT( index < value_size ) - - #ifndef __GNUC__ - - __asm - { - push eax - push ebx - push ecx - push edx - - mov ecx, [b] - sub ecx, [index] - - mov edx, [index] - mov ebx, [p1] - - mov eax, [value] - - p: - add [ebx+edx*4], eax - jnc end - - mov eax, 1 - inc edx - dec ecx - jnz p - - end: - setc al - movzx edx, al - mov [c], edx - - pop edx - pop ecx - pop ebx - pop eax - } - - #endif - - - #ifdef __GNUC__ - __asm__ __volatile__( - - "push %%eax \n" - "push %%ecx \n" - - "subl %%edx, %%ecx \n" - - "1: \n" - "addl %%eax, (%%ebx,%%edx,4) \n" - "jnc 2f \n" - - "movl $1, %%eax \n" - "incl %%edx \n" - "decl %%ecx \n" - "jnz 1b \n" - - "2: \n" - "setc %%al \n" - "movzx %%al, %%edx \n" - - "pop %%ecx \n" - "pop %%eax \n" - - : "=d" (c) - : "a" (value), "c" (b), "0" (index), "b" (p1) - : "cc", "memory" ); - - #endif - - TTMATH_LOG("UInt32::AddInt") - - return c; - } - - - - /*! - adding only two unsigned words to the existing value - and these words begin on the 'index' position - (it's used in the multiplication algorithm 2) - - index should be equal or smaller than value_size-2 (index <= value_size-2) - x1 - lower word, x2 - higher word - - for example if we've got value_size equal 4 and: - table[0] = 3 - table[1] = 4 - table[2] = 5 - table[3] = 6 - then let - x1 = 10 - x2 = 20 - and - index = 1 - - the result of this method will be: - table[0] = 3 - table[1] = 4 + x1 = 14 - table[2] = 5 + x2 = 25 - table[3] = 6 - - and no carry at the end of table[3] - - (of course if there was a carry in table[2](5+20) then - this carry would be passed to the table[3] etc.) - */ - uint AddTwoInts(uint x2, uint x1, uint index) - { - register uint b = value_size; - register uint * p1 = table; - register uint c; - - TTMATH_ASSERT( index < value_size - 1 ) - - #ifndef __GNUC__ - __asm - { - push eax - push ebx - push ecx - push edx - - mov ecx, [b] - sub ecx, [index] - - mov ebx, [p1] - mov edx, [index] - - mov eax, [x1] - add [ebx+edx*4], eax - inc edx - dec ecx - - mov eax, [x2] - - p: - adc [ebx+edx*4], eax - jnc end - - mov eax, 0 - inc edx - dec ecx - jnz p - - end: - setc al - movzx edx, al - mov [c], edx - - pop edx - pop ecx - pop ebx - pop eax - - } - #endif - - - #ifdef __GNUC__ - __asm__ __volatile__( - - "push %%ecx \n" - "push %%edx \n" - - "subl %%edx, %%ecx \n" - - "addl %%esi, (%%ebx,%%edx,4) \n" - "incl %%edx \n" - "decl %%ecx \n" - - "1: \n" - "adcl %%eax, (%%ebx,%%edx,4) \n" - "jnc 2f \n" - - "mov $0, %%eax \n" - "incl %%edx \n" - "decl %%ecx \n" - "jnz 1b \n" - - "2: \n" - "setc %%al \n" - "movzx %%al, %%eax \n" - - "pop %%edx \n" - "pop %%ecx \n" - - : "=a" (c) - : "c" (b), "d" (index), "b" (p1), "S" (x1), "0" (x2) - : "cc", "memory" ); - - #endif - - TTMATH_LOG("UInt32::AddTwoInts") - - return c; - } - - - - - - /*! - subtracting ss2 from the 'this' and subtracting - carry if it has been defined - (this = this - ss2 - c) - - c must be zero or one (might be a bigger value than 1) - function returns carry (1) (if it has been) - */ - uint Sub(const UInt & ss2, uint c=0) - { - register uint b = value_size; - register uint * p1 = table; - register uint * p2 = const_cast(ss2.table); - - // we don't have to use TTMATH_REFERENCE_ASSERT here - // this algorithm doesn't require it - - #ifndef __GNUC__ - - __asm - { - push eax - push ebx - push ecx - push edx - push esi - - mov ecx,[b] - - mov ebx,[p1] - mov esi,[p2] - - xor eax, eax - mov edx, eax - - sub eax, [c] - - p: - mov eax, [esi+edx*4] - sbb [ebx+edx*4], eax - - inc edx - dec ecx - jnz p - - setc al - movzx edx, al - mov [c], edx - - pop esi - pop edx - pop ecx - pop ebx - pop eax - } - - #endif - - - #ifdef __GNUC__ - __asm__ __volatile__( - - "push %%ecx \n" - - "xorl %%eax, %%eax \n" - "movl %%eax, %%edx \n" - "subl %%edi, %%eax \n" - - - "1: \n" - "movl (%%esi,%%edx,4),%%eax \n" - "sbbl %%eax, (%%ebx,%%edx,4) \n" - - "incl %%edx \n" - "decl %%ecx \n" - "jnz 1b \n" - - "setc %%al \n" - "movzx %%al,%%edx \n" - - "pop %%ecx \n" - - : "=d" (c) - : "D" (c), "c" (b), "b" (p1), "S" (p2) - : "%eax", "cc", "memory" ); - - #endif - - TTMATH_LOG("UInt32::Sub") - - return c; - } - - - /*! - this method subtracts one word (at a specific position) - and returns a carry (if it was) - - e.g. - - if we've got (value_size=3): - table[0] = 10; - table[1] = 30; - table[2] = 5; - and we call: - SubInt(2,1) - then it'll be: - table[0] = 10; - table[1] = 30 - 2; - table[2] = 5; - - of course if there was a carry from table[3] it would be returned - */ - uint SubInt(uint value, uint index = 0) - { - register uint b = value_size; - register uint * p1 = table; - register uint c; - - TTMATH_ASSERT( index < value_size ) - - #ifndef __GNUC__ - __asm - { - push eax - push ebx - push ecx - push edx - - mov ecx, [b] - sub ecx, [index] - - mov edx, [index] - mov ebx, [p1] - - mov eax, [value] - - p: - sub [ebx+edx*4], eax - jnc end - - mov eax, 1 - inc edx - dec ecx - jnz p - - end: - setc al - movzx edx, al - mov [c], edx - - pop edx - pop ecx - pop ebx - pop eax - } - #endif - - - #ifdef __GNUC__ - __asm__ __volatile__( - - "push %%eax \n" - "push %%ecx \n" - - "subl %%edx, %%ecx \n" - - "1: \n" - "subl %%eax, (%%ebx,%%edx,4) \n" - "jnc 2f \n" - - "movl $1, %%eax \n" - "incl %%edx \n" - "decl %%ecx \n" - "jnz 1b \n" - - "2: \n" - "setc %%al \n" - "movzx %%al, %%edx \n" - - "pop %%ecx \n" - "pop %%eax \n" - - : "=d" (c) - : "a" (value), "c" (b), "0" (index), "b" (p1) - : "cc", "memory" ); - - #endif - - TTMATH_LOG("UInt32::SubInt") - - return c; - } - -#endif /*! @@ -798,430 +342,6 @@ public: private: -#ifdef TTMATH_PLATFORM32 - - - /*! - this method moves all bits into the left hand side - return value <- this <- c - - the lowest *bit* will be held the 'c' and - the state of one additional bit (on the left hand side) - will be returned - - for example: - let this is 001010000 - after Rcl2_one(1) there'll be 010100001 and Rcl2_one returns 0 - */ - uint Rcl2_one(uint c) - { - register sint b = value_size; - register uint * p1 = table; - - #ifndef __GNUC__ - __asm - { - push ebx - push ecx - push edx - - mov ebx, [p1] - - xor edx, edx - mov ecx, edx - sub ecx, [c] - - mov ecx, [b] - - p: - rcl dword ptr [ebx+edx*4], 1 - - inc edx - dec ecx - jnz p - - setc dl - movzx edx, dl - mov [c], edx - - - pop edx - pop ecx - pop ebx - } - #endif - - - #ifdef __GNUC__ - __asm__ __volatile__( - - "push %%edx \n" - "push %%ecx \n" - - "xorl %%edx, %%edx \n" // edx=0 - "neg %%eax \n" // CF=1 if eax!=0 , CF=0 if eax==0 - - "1: \n" - "rcll $1, (%%ebx, %%edx, 4) \n" - - "incl %%edx \n" - "decl %%ecx \n" - "jnz 1b \n" - - "setc %%al \n" - "movzx %%al, %%eax \n" - - "pop %%ecx \n" - "pop %%edx \n" - - : "=a" (c) - : "0" (c), "c" (b), "b" (p1) - : "cc", "memory" ); - - #endif - - TTMATH_LOG("UInt32::Rcl2_one") - - return c; - } - - - /*! - this method moves all bits into the right hand side - c -> this -> return value - - the highest *bit* will be held the 'c' and - the state of one additional bit (on the right hand side) - will be returned - - for example: - let this is 000000010 - after Rcr2_one(1) there'll be 100000001 and Rcr2_one returns 0 - */ - uint Rcr2_one(uint c) - { - register sint b = value_size; - register uint * p1 = table; - - #ifndef __GNUC__ - __asm - { - push ebx - push ecx - - mov ebx, [p1] - - xor ecx, ecx - sub ecx, [c] - - mov ecx, [b] - - p: - rcr dword ptr [ebx+ecx*4-4], 1 - - dec ecx - jnz p - - setc cl - movzx ecx, cl - mov [c], ecx - - pop ecx - pop ebx - } - #endif - - - #ifdef __GNUC__ - __asm__ __volatile__( - - "push %%ecx \n" - - "neg %%eax \n" // CF=1 if eax!=0 , CF=0 if eax==0 - - "1: \n" - "rcrl $1, -4(%%ebx, %%ecx, 4) \n" - - "decl %%ecx \n" - "jnz 1b \n" - - "setc %%al \n" - "movzx %%al, %%eax \n" - - "pop %%ecx \n" - - : "=a" (c) - : "0" (c), "c" (b), "b" (p1) - : "cc", "memory" ); - - #endif - - TTMATH_LOG("UInt32::Rcr2_one") - - return c; - } - - - /*! - this method moves all bits into the left hand side - return value <- this <- c - - the lowest *bits* will be held the 'c' and - the state of one additional bit (on the left hand side) - will be returned - - for example: - let this is 001010000 - after Rcl2(3, 1) there'll be 010000111 and Rcl2 returns 1 - */ - uint Rcl2(uint bits, uint c) - { - TTMATH_ASSERT( bits>0 && bits this -> return value - - the highest *bits* will be held the 'c' and - the state of one additional bit (on the right hand side) - will be returned - - for example: - let this is 000000010 - after Rcr2(2, 1) there'll be 110000000 and Rcr2 returns 1 - */ - uint Rcr2(uint bits, uint c) - { - TTMATH_ASSERT( bits>0 && bits - e.g. - uint x = 100; - uint bit = SetBitInWord(x, 3); - now: x = 108 and bit = 0 - */ - static uint SetBitInWord(uint & value, uint bit) - { - TTMATH_ASSERT( bit < TTMATH_BITS_PER_UINT ) - - uint old_bit; - uint v = value; - - #ifndef __GNUC__ - __asm - { - push ebx - push eax - - mov eax, [v] - mov ebx, [bit] - bts eax, ebx - mov [v], eax - - setc bl - movzx ebx, bl - mov [old_bit], ebx - - pop eax - pop ebx - } - #endif - - - #ifdef __GNUC__ - __asm__ __volatile__( - - "btsl %%ebx, %%eax \n" - - "setc %%bl \n" - "movzx %%bl, %%ebx \n" - - : "=a" (v), "=b" (old_bit) - : "0" (v), "1" (bit) - : "cc" ); - - #endif - - value = v; - - return old_bit; - } - -#endif /*! @@ -1737,71 +750,6 @@ public: public: - - -#ifdef TTMATH_PLATFORM32 - - - /*! - multiplication: result2:result1 = a * b - result2 - higher word - result1 - lower word of the result - - this method never returns a carry - - it is an auxiliary method for second version of the multiplication algorithm - */ - static void MulTwoWords(uint a, uint b, uint * result2, uint * result1) - { - /* - we must use these temporary variables in order to inform the compilator - that value pointed with result1 and result2 has changed - - this has no effect in visual studio but it's useful when - using gcc and options like -Ox - */ - register uint result1_; - register uint result2_; - - #ifndef __GNUC__ - - __asm - { - push eax - push edx - - mov eax, [a] - mul dword ptr [b] - - mov [result2_], edx - mov [result1_], eax - - pop edx - pop eax - } - - #endif - - - #ifdef __GNUC__ - - __asm__ __volatile__( - - "mull %%edx \n" - - : "=a" (result1_), "=d" (result2_) - : "0" (a), "1" (b) - : "cc" ); - - #endif - - - *result1 = result1_; - *result2 = result2_; - } - -#endif - /*! multiplication: this = this * ss2 @@ -2092,75 +1040,21 @@ public: public: - #ifdef TTMATH_PLATFORM32 - - - /*! - this method calculates 64bits word a:b / 32bits c (a higher, b lower word) - r = a:b / c and rest - remainder - - * - * WARNING: - * if r (one word) is too small for the result or c is equal zero - * there'll be a hardware interruption (0) - * and probably the end of your program - * - */ - static void DivTwoWords(uint a, uint b, uint c, uint * r, uint * rest) - { - register uint r_; - register uint rest_; - /* - these variables have similar meaning like those in - the multiplication algorithm MulTwoWords - */ - - #ifndef __GNUC__ - __asm - { - push eax - push edx - - mov edx, [a] - mov eax, [b] - div dword ptr [c] - - mov [r_], eax - mov [rest_], edx - - pop edx - pop eax - } - #endif - - - #ifdef __GNUC__ - - __asm__ __volatile__( - - "divl %%ecx \n" - - : "=a" (r_), "=d" (rest_) - : "d" (a), "a" (b), "c" (c) - : "cc" ); - - #endif - - - *r = r_; - *rest = rest_; - - } - -#endif - - /*! division by one unsigned word + + returns 1 when divisor is zero */ uint DivInt(uint divisor, uint * remainder = 0) { + if( divisor == 0 ) + { + TTMATH_LOG("UInt::DivInt") + + return 1; + } + if( divisor == 1 ) { if( remainder ) @@ -2887,10 +1781,12 @@ private: void Div3_MultiplySubtract( UInt & uu, const UInt & vv, uint & qp) { + // D4 (in the book) + UInt vv_temp(vv); vv_temp.MulInt(qp); - if( uu.Sub(vv_temp) ) + if( uu.Sub(vv_temp) ) { // there was a carry @@ -2900,6 +1796,9 @@ private: --qp; uu.Add(vv); + + // can be a carry from this additions but it should be ignored + // because it cancels with the borrow from uu.Sub(vv_temp) } TTMATH_LOG("UInt::Div3_MultiplySubtract") @@ -3227,7 +2126,6 @@ public: } - /*! this method converts the sint type to this class @@ -3262,6 +2160,94 @@ public: TTMATH_LOG("UInt::UInt(sint)") } + + +#ifdef TTMATH_PLATFORM64 + + /*! + in 64bit platforms we must define additional operators and contructors + in order to allow a user initializing the objects in this way: + UInt<...> type = 20; + or + UInt<...> type; + type = 30; + + decimal constants such as 20, 30 etc. are integer literal of type int, + if the value is greater it can even be long int, + 0 is an octal integer of type int + (ISO 14882 p2.13.1 Integer literals) + */ + + /*! + this operator converts the unsigned int type to this class + + ***this operator is created only on a 64bit platform*** + it takes one argument of 32bit + */ + UInt & operator=(unsigned int i) + { + FromUInt(uint(i)); + + TTMATH_LOG("UInt64::operator=(unsigned int)") + + return *this; + } + + + /*! + a constructor for converting the unsigned int to this class + + ***this constructor is created only on a 64bit platform*** + it takes one argument of 32bit + */ + UInt(unsigned int i) + { + FromUInt(uint(i)); + + TTMATH_LOG("UInt64::UInt(unsigned int)") + } + + + /*! + an operator for converting the signed int to this class + + ***this constructor is created only on a 64bit platform*** + it takes one argument of 32bit + + look at the description of UInt::operator=(sint) + */ + UInt & operator=(signed int i) + { + FromUInt(uint(i)); + + TTMATH_LOG("UInt64::operator=(signed int)") + + return *this; + } + + + /*! + a constructor for converting the signed int to this class + + ***this constructor is created only on a 64bit platform*** + it takes one argument of 32bit + + look at the description of UInt::operator=(sint) + */ + UInt(signed int i) + { + FromUInt(uint(i)); + + TTMATH_LOG("UInt64::UInt(signed int)") + } + + +#endif + + + + + /*! a constructor for converting a string to this class (with the base=10) */ @@ -3875,21 +2861,51 @@ public: } + + /* + following methods are defined in: + ttmathuint_x86.h + ttmathuint_x86_64.h + ttmathuint_noasm.h + */ + +#ifdef TTMATH_NOASM + static uint AddTwoWords(uint a, uint b, uint carry, uint * result); + static uint SubTwoWords(uint a, uint b, uint carry, uint * result); + #ifdef TTMATH_PLATFORM64 + union uint_ + { + struct + { + unsigned int low; // 32 bit + unsigned int high; // 32 bit + } u_; + + uint u; // 64 bit + }; + + + static void DivTwoWords2(uint a,uint b, uint c, uint * r, uint * rest); + static uint DivTwoWordsNormalize(uint_ & a_, uint_ & b_, uint_ & c_); + static uint DivTwoWordsUnnormalize(uint u, uint d); + static unsigned int DivTwoWordsCalculate(uint_ u_, unsigned int u3, uint_ v_); + static void MultiplySubtract(uint_ & u_, unsigned int & u3, unsigned int & q, uint_ v_); + +#endif // TTMATH_PLATFORM64 +#endif // TTMATH_NOASM + + private: +public: // !!! chwilowo public uint Rcl2_one(uint c); uint Rcr2_one(uint c); uint Rcl2(uint bits, uint c); uint Rcr2(uint bits, uint c); public: - // these methods are for 64bit processors and are defined in 'ttmathuint64.h' - UInt & operator=(unsigned int i); - UInt(unsigned int i); - UInt & operator=(signed int i); - UInt(signed int i); - void SetFromTable(const unsigned int * temp_table, uint temp_table_len); + uint Add(const UInt & ss2, uint c=0); uint AddInt(uint value, uint index = 0); uint AddTwoInts(uint x2, uint x1, uint index); @@ -3897,18 +2913,16 @@ public: uint SubInt(uint value, uint index = 0); static sint FindLeadingBitInWord(uint x); static uint SetBitInWord(uint & value, uint bit); - static void MulTwoWords(uint a, uint b, uint * result2, uint * result1); + static void MulTwoWords(uint a, uint b, uint * result_high, uint * result_low); static void DivTwoWords(uint a,uint b, uint c, uint * r, uint * rest); - -#endif - }; } //namespace -#include "ttmathuint64.h" - +#include "ttmathuint_x86.h" +#include "ttmathuint_x86_64.h" +#include "ttmathuint_noasm.h" #endif diff --git a/ttmath/ttmathuint_noasm.h b/ttmath/ttmathuint_noasm.h new file mode 100644 index 0000000..e84f837 --- /dev/null +++ b/ttmath/ttmathuint_noasm.h @@ -0,0 +1,885 @@ +/* + * This file is a part of TTMath Bignum Library + * and is distributed under the (new) BSD licence. + * Author: Tomasz Sowa + */ + +/* + * Copyright (c) 2006-2009, Tomasz Sowa + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the name Tomasz Sowa nor the names of contributors to this + * project may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef headerfilettmathuint_noasm +#define headerfilettmathuint_noasm + + +#ifdef TTMATH_NOASM + +/*! + \file ttmathuint_noasm.h + \brief template class UInt with methods without any assembler code + + this file is included at the end of ttmathuint.h +*/ + + +namespace ttmath +{ + template + uint UInt::AddTwoWords(uint a, uint b, uint carry, uint * result) + { + uint temp; + + if( carry == 0 ) + { + temp = a + b; + + if( temp < a ) + carry = 1; + } + else + { + carry = 1; + temp = a + b + carry; + + if( temp > a ) // !(temp<=a) + carry = 0; + } + + *result = temp; + + return carry; + } + + + + /*! + this method adding ss2 to the this and adding carry if it's defined + (this = this + ss2 + c) + + c must be zero or one (might be a bigger value than 1) + function returns carry (1) (if it was) + */ + + template + uint UInt::Add(const UInt & ss2, uint c) + { + uint i; + + for(i=0 ; i + uint UInt::AddInt(uint value, uint index) + { + uint i, c; + + TTMATH_ASSERT( index < value_size ) + + + c = AddTwoWords(table[index], value, 0, &table[index]); + + for(i=index+1 ; i + uint UInt::AddTwoInts(uint x2, uint x1, uint index) + { + uint i, c; + + TTMATH_ASSERT( index < value_size ) + + + c = AddTwoWords(table[index], x1, 0, &table[index]); + c = AddTwoWords(table[index+1], x2, c, &table[index+1]); + + for(i=index+2 ; i + uint UInt::SubTwoWords(uint a, uint b, uint carry, uint * result) + { + if( carry == 0 ) + { + *result = a - b; + + if( a < b ) + carry = 1; + } + else + { + carry = 1; + *result = a - b - carry; + + if( a > b ) // !(a <= b ) + carry = 0; + } + + return carry; + } + + + + + /*! + this method's subtracting ss2 from the 'this' and subtracting + carry if it has been defined + (this = this - ss2 - c) + + c must be zero or one (might be a bigger value than 1) + function returns carry (1) (if it was) + */ + template + uint UInt::Sub(const UInt & ss2, uint c) + { + uint i; + + for(i=0 ; i + uint UInt::SubInt(uint value, uint index) + { + uint i, c; + + TTMATH_ASSERT( index < value_size ) + + + c = SubTwoWords(table[index], value, 0, &table[index]); + + for(i=index+1 ; i + uint UInt::Rcl2_one(uint c) + { + uint i, new_c; + + if( c != 0 ) + c = 1; + + for(i=0 ; i this -> return value + + the highest *bit* will be held the 'c' and + the state of one additional bit (on the right hand side) + will be returned + + for example: + let this is 000000010 + after Rcr2_one(1) there'll be 100000001 and Rcr2_one returns 0 + */ + template + uint UInt::Rcr2_one(uint c) + { + sint i; // signed i + uint new_c; + + if( c != 0 ) + c = TTMATH_UINT_HIGHEST_BIT; + + for(i=sint(value_size)-1 ; i>=0 ; --i) + { + new_c = (table[i] & 1) ? TTMATH_UINT_HIGHEST_BIT : 0; + table[i] = (table[i] >> 1) | c; + c = new_c; + } + + TTMATH_LOG("UInt64::Rcr2_one") + + return c; + } + + + + + /*! + this method moves all bits into the left hand side + return value <- this <- c + + the lowest *bits* will be held the 'c' and + the state of one additional bit (on the left hand side) + will be returned + + for example: + let this is 001010000 + after Rcl2(3, 1) there'll be 010000111 and Rcl2 returns 1 + */ + template + uint UInt::Rcl2(uint bits, uint c) + { + TTMATH_ASSERT( bits>0 && bits> move; + + for(i=0 ; i> move; + table[i] = (table[i] << bits) | c; + c = new_c; + } + + TTMATH_LOG("UInt::Rcl2") + + return (c & 1); + } + + + + + /*! + this method moves all bits into the right hand side + C -> this -> return value + + the highest *bits* will be held the 'c' and + the state of one additional bit (on the right hand side) + will be returned + + for example: + let this is 000000010 + after Rcr2(2, 1) there'll be 110000000 and Rcr2 returns 1 + */ + template + uint UInt::Rcr2(uint bits, uint c) + { + TTMATH_ASSERT( bits>0 && bits=0 ; --i) + { + new_c = table[i] << move; + table[i] = (table[i] >> bits) | c; + c = new_c; + } + + TTMATH_LOG("UInt64::Rcr2") + + return (c & TTMATH_UINT_HIGHEST_BIT) ? 1 : 0; + } + + + + + /* + this method returns the number of the highest set bit in x + if the 'x' is zero this method returns '-1' + + */ + template + sint UInt::FindLeadingBitInWord(uint x) + { + if( x == 0 ) + return -1; + + uint bit = TTMATH_BITS_PER_UINT - 1; + + while( (x & TTMATH_UINT_HIGHEST_BIT) == 0 ) + { + x = x << 1; + --bit; + } + + return bit; + } + + + + + + /*! + this method sets a special bit in the 'value' + and returns the last state of the bit (zero or one) + + bit is from <0,63> + + e.g. + uint x = 100; + uint bit = SetBitInWord(x, 3); + now: x = 108 and bit = 0 + */ + template + uint UInt::SetBitInWord(uint & value, uint bit) + { + TTMATH_ASSERT( bit < TTMATH_BITS_PER_UINT ) + + uint mask = 1; + + while( bit-- > 0 ) + mask = mask << 1; + + uint last = value & mask; + value = value | mask; + + return (last != 0) ? 1 : 0; + } + + + + + + + /*! + * + * Multiplication + * + * + */ + + + /*! + multiplication: result_high:result_low = a * b + result_high - higher word of the result + result_low - lower word of the result + + this methos never returns a carry + this method is used in the second version of the multiplication algorithms + */ + template + void UInt::MulTwoWords(uint a, uint b, uint * result_high, uint * result_low) + { + #ifdef TTMATH_PLATFORM32 + + /* + on 32bit platforms we have defined 'unsigned long long int' type known as 'ulint' in ttmath namespace + this type has 64 bits, then we're using only one multiplication: 32bit * 32bit = 64bit + */ + + union uint_ + { + struct + { + uint low; // 32 bits + uint high; // 32 bits + } u_; + + ulint u; // 64 bits + } res; + + res.u = ulint(a) * ulint(b); // multiply two 32bit words, the result has 64 bits + + *result_high = res.u_.high; + *result_low = res.u_.low; + + #else + + /* + 64 bits platforms + + we don't have a native type which has 128 bits + then we're splitting 'a' and 'b' to 4 parts (high and low halves) + and using 4 multiplications (with additions and carry correctness) + */ + + uint_ a_; + uint_ b_; + uint_ res_high1, res_high2; + uint_ res_low1, res_low2; + + a_.u = a; + b_.u = b; + + /* + the multiplication is as follows (schoolbook algorithm with O(n^2) ): + + 32 bits 32 bits + + +--------------------------------+ + | a_.u_.high | a_.u_.low | + +--------------------------------+ + | b_.u_.high | b_.u_.low | + +--------------------------------+--------------------------------+ + | res_high1.u | res_low1.u | + +--------------------------------+--------------------------------+ + | res_high2.u | res_low2.u | + +--------------------------------+--------------------------------+ + + 64 bits 64 bits + */ + + + uint_ temp; + + res_low1.u = uint(b_.u_.low) * uint(a_.u_.low); + + temp.u = uint(res_low1.u_.high) + uint(b_.u_.low) * uint(a_.u_.high); + res_low1.u_.high = temp.u_.low; + res_high1.u_.low = temp.u_.high; + res_high1.u_.high = 0; + + res_low2.u_.low = 0; + temp.u = uint(b_.u_.high) * uint(a_.u_.low); + res_low2.u_.high = temp.u_.low; + + res_high2.u = uint(b_.u_.high) * uint(a_.u_.high) + uint(temp.u_.high); + + uint c = AddTwoWords(res_low1.u, res_low2.u, 0, &res_low2.u); + AddTwoWords(res_high1.u, res_high2.u, c, &res_high2.u); // there is no carry from here + + *result_high = res_high2.u; + *result_low = res_low2.u; + + #endif + } + + + + + /*! + * + * Division + * + * + */ + + + // !! maybe returns something? a carry? or when c is zero? + /*! + this method calculates 64bits word a:b / 32bits c (a higher, b lower word) + r = a:b / c and rest - remainder + + * + * WARNING: + * the c has to be suitably large for the result being keeped in one word, + * if c is equal zero there'll be a hardware interruption (0) + * and probably the end of your program + * + */ + template + void UInt::DivTwoWords(uint a, uint b, uint c, uint * r, uint * rest) + { + // (a < c ) for the result to be one word + TTMATH_ASSERT( c != 0 && a < c ) + + #ifdef TTMATH_PLATFORM32 + + union + { + struct + { + uint low; // 32 bits + uint high; // 32 bits + } u_; + + ulint u; // 64 bits + } ab; + + ab.u_.high = a; + ab.u_.low = b; + + *r = uint(ab.u / c); + *rest = uint(ab.u % c); + + #else + + uint_ c_; + c_.u = c; + + + if( a == 0 ) + { + *r = b / c; + *rest = b % c; + +#ifdef TTMATH_WARTOWNIK + ++tester_wartownik1; // !!!!! skasowac +#endif + } + else + if( c_.u_.high == 0 ) + { + // higher half of 'c' is zero + // then higher half of 'a' is zero too (look at the asserts at the beginning - 'a' is smaller than 'c') + uint_ a_, b_, res_, temp1, temp2; + + a_.u = a; + b_.u = b; + + temp1.u_.high = a_.u_.low; + temp1.u_.low = b_.u_.high; + + res_.u_.high = temp1.u / c; + temp2.u_.high = temp1.u % c; + temp2.u_.low = b_.u_.low; + + res_.u_.low = temp2.u / c; + *rest = temp2.u % c; + + *r = res_.u; +#ifdef TTMATH_WARTOWNIK + ++tester_wartownik2; // !!!!! skasowac +#endif + + } + else + { + return DivTwoWords2(a, b, c, r, rest); + } + + #endif + } + + +#ifdef TTMATH_PLATFORM64 + + template + void UInt::DivTwoWords2(uint a, uint b, uint c, uint * r, uint * rest) + { + // a is not zero + // c_.u_.high is not zero + + uint_ a_, b_, c_, u_, q_; + unsigned int u3; // 32 bit + + a_.u = a; + b_.u = b; + c_.u = c; + + // normalizing + // a0 will actually not be used + uint d = DivTwoWordsNormalize(a_, b_, c_); + + // loop from j=1 to j=0 + // the first step (for j=2) is skipped because our result is only in one word, + // (first 'q' were 0 and nothing would be changed) + u_.u_.high = a_.u_.high; + u_.u_.low = a_.u_.low; + u3 = b_.u_.high; + q_.u_.high = DivTwoWordsCalculate(u_, u3, c_); + MultiplySubtract(u_, u3, q_.u_.high, c_); + + u_.u_.high = u_.u_.low; + u_.u_.low = u3; + u3 = b_.u_.low; + q_.u_.low = DivTwoWordsCalculate(u_, u3, c_); + MultiplySubtract(u_, u3, q_.u_.low, c_); + + *r = q_.u; + + // unnormalizing for the remainder + u_.u_.high = u_.u_.low; + u_.u_.low = u3; + *rest = DivTwoWordsUnnormalize(u_.u, d); + } + + + + + template + uint UInt::DivTwoWordsNormalize(uint_ & a_, uint_ & b_, uint_ & c_) + { + uint d = 0; + + for( ; (c_.u & TTMATH_UINT_HIGHEST_BIT) == 0 ; ++d ) + { + c_.u = c_.u << 1; + + uint bc = b_.u & TTMATH_UINT_HIGHEST_BIT; // carry from 'b' + + b_.u = b_.u << 1; + a_.u = a_.u << 1; // carry bits from 'a' are simply skipped + + if( bc ) + { + a_.u = a_.u | 1; + #ifdef TTMATH_WARTOWNIK + ++tester_wartownik3; // !!!!! skasowac + #endif + } + } + + return d; + } + + + template + uint UInt::DivTwoWordsUnnormalize(uint u, uint d) + { + if( d == 0 ) + return u; + + u = u >> d; + + return u; + } + + + template + unsigned int UInt::DivTwoWordsCalculate(uint_ u_, unsigned int u3, uint_ v_) + { + bool next_test; + uint_ qp_, rp_, temp_; + + qp_.u = u_.u / uint(v_.u_.high); + rp_.u = u_.u % uint(v_.u_.high); + + TTMATH_ASSERT( qp_.u_.high==0 || qp_.u_.high==1 ) + + do + { + bool decrease = false; + + if( qp_.u_.high == 1 ) + decrease = true; + else + { + temp_.u_.high = rp_.u_.low; + temp_.u_.low = u3; + + if( qp_.u * uint(v_.u_.low) > temp_.u ) + decrease = true; + } + + next_test = false; + + if( decrease ) + { + #ifdef TTMATH_WARTOWNIK + ++tester_wartownik4; // !!!!! skasowac + #endif + + --qp_.u; + rp_.u += v_.u_.high; + + if( rp_.u_.high == 0 ) + { + next_test = true; + + #ifdef TTMATH_WARTOWNIK + ++tester_wartownik5; // !!!!! skasowac + #endif + } + + + } + } + while( next_test ); + + return qp_.u_.low; + } + + + template + void UInt::MultiplySubtract(uint_ & u_, unsigned int & u3, unsigned int & q, uint_ v_) + { + uint_ temp_; + + uint res_high; + uint res_low; + + MulTwoWords(v_.u, q, &res_high, &res_low); + + uint_ sub_res_high_; + uint_ sub_res_low_; + + temp_.u_.high = u_.u_.low; + temp_.u_.low = u3; + + uint c = SubTwoWords(temp_.u, res_low, 0, &sub_res_low_.u); + + temp_.u_.high = 0; + temp_.u_.low = u_.u_.high; + c = SubTwoWords(temp_.u, res_high, c, &sub_res_high_.u); + +#ifdef TTMATH_WARTOWNIK + ++tester_wartownik6; // !!!!! skasowac +#endif + + if( c ) + { + --q; + + c = AddTwoWords(sub_res_low_.u, v_.u, 0, &sub_res_low_.u); + AddTwoWords(sub_res_high_.u, 0, c, &sub_res_high_.u); + + #ifdef TTMATH_WARTOWNIK + ++tester_wartownik7; // !!!!! skasowac + #endif + } + + u_.u_.high = sub_res_high_.u_.low; + u_.u_.low = sub_res_low_.u_.high; + u3 = sub_res_low_.u_.low; + } + +#endif // #ifdef TTMATH_PLATFORM64 + + + +} //namespace + + +#endif //ifdef TTMATH_NOASM +#endif + + + + diff --git a/ttmath/ttmathuint_x86.h b/ttmath/ttmathuint_x86.h new file mode 100644 index 0000000..69da94f --- /dev/null +++ b/ttmath/ttmathuint_x86.h @@ -0,0 +1,1281 @@ +/* + * This file is a part of TTMath Bignum Library + * and is distributed under the (new) BSD licence. + * Author: Tomasz Sowa + */ + +/* + * Copyright (c) 2006-2009, Tomasz Sowa + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the name Tomasz Sowa nor the names of contributors to this + * project may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + + + +#ifndef headerfilettmathuint_x86 +#define headerfilettmathuint_x86 + + +#ifndef TTMATH_NOASM +#ifdef TTMATH_PLATFORM32 + + +/*! + \file ttmathuint_x86.h + \brief template class UInt with assembler code for 32bit x86 processors + + this file is included at the end of ttmathuint.h +*/ + + + +/*! + \brief a namespace for the TTMath library +*/ +namespace ttmath +{ + + /*! + * + * basic mathematic functions + * + */ + + + /*! + adding ss2 to the this and adding carry if it's defined + (this = this + ss2 + c) + + c must be zero or one (might be a bigger value than 1) + function returns carry (1) (if it has been) + */ + template + uint UInt::Add(const UInt & ss2, uint c=0) + { + register uint b = value_size; + register uint * p1 = table; + register uint * p2 = const_cast(ss2.table); + + // we don't have to use TTMATH_REFERENCE_ASSERT here + // this algorithm doesn't require it + + #ifndef __GNUC__ + + // this part might be compiled with for example visual c + + __asm + { + push eax + push ebx + push ecx + push edx + push esi + + mov ecx,[b] + + mov ebx,[p1] + mov esi,[p2] + + xor eax,eax // eax=0 + mov edx,eax // edx=0 + + sub eax,[c] // CF=c + + p: + mov eax,[esi+edx*4] + adc [ebx+edx*4],eax + + inc edx + dec ecx + jnz p + + setc al + movzx edx, al + mov [c], edx + + pop esi + pop edx + pop ecx + pop ebx + pop eax + } + + + + #endif + + + #ifdef __GNUC__ + + // this part should be compiled with gcc + + __asm__ __volatile__( + + "push %%ecx \n" + + "xorl %%eax, %%eax \n" + "movl %%eax, %%edx \n" + "subl %%edi, %%eax \n" + + + "1: \n" + "movl (%%esi,%%edx,4),%%eax \n" + "adcl %%eax, (%%ebx,%%edx,4) \n" + + "incl %%edx \n" + "decl %%ecx \n" + "jnz 1b \n" + + "setc %%al \n" + "movzx %%al,%%edx \n" + + "pop %%ecx \n" + + : "=d" (c) + : "D" (c), "c" (b), "b" (p1), "S" (p2) + : "%eax", "cc", "memory" ); + + #endif + + TTMATH_LOG("UInt32::Add") + + return c; + } + + + + /*! + adding one word (at a specific position) + and returning a carry (if it has been) + + e.g. + + if we've got (value_size=3): + table[0] = 10; + table[1] = 30; + table[2] = 5; + and we call: + AddInt(2,1) + then it'll be: + table[0] = 10; + table[1] = 30 + 2; + table[2] = 5; + + of course if there was a carry from table[2] it would be returned + */ + template + uint UInt::AddInt(uint value, uint index = 0) + { + register uint b = value_size; + register uint * p1 = table; + register uint c; + + TTMATH_ASSERT( index < value_size ) + + #ifndef __GNUC__ + + __asm + { + push eax + push ebx + push ecx + push edx + + mov ecx, [b] + sub ecx, [index] + + mov edx, [index] + mov ebx, [p1] + + mov eax, [value] + + p: + add [ebx+edx*4], eax + jnc end + + mov eax, 1 + inc edx + dec ecx + jnz p + + end: + setc al + movzx edx, al + mov [c], edx + + pop edx + pop ecx + pop ebx + pop eax + } + + #endif + + + #ifdef __GNUC__ + __asm__ __volatile__( + + "push %%eax \n" + "push %%ecx \n" + + "subl %%edx, %%ecx \n" + + "1: \n" + "addl %%eax, (%%ebx,%%edx,4) \n" + "jnc 2f \n" + + "movl $1, %%eax \n" + "incl %%edx \n" + "decl %%ecx \n" + "jnz 1b \n" + + "2: \n" + "setc %%al \n" + "movzx %%al, %%edx \n" + + "pop %%ecx \n" + "pop %%eax \n" + + : "=d" (c) + : "a" (value), "c" (b), "0" (index), "b" (p1) + : "cc", "memory" ); + + #endif + + TTMATH_LOG("UInt32::AddInt") + + return c; + } + + + + + /*! + adding only two unsigned words to the existing value + and these words begin on the 'index' position + (it's used in the multiplication algorithm 2) + + index should be equal or smaller than value_size-2 (index <= value_size-2) + x1 - lower word, x2 - higher word + + for example if we've got value_size equal 4 and: + table[0] = 3 + table[1] = 4 + table[2] = 5 + table[3] = 6 + then let + x1 = 10 + x2 = 20 + and + index = 1 + + the result of this method will be: + table[0] = 3 + table[1] = 4 + x1 = 14 + table[2] = 5 + x2 = 25 + table[3] = 6 + + and no carry at the end of table[3] + + (of course if there was a carry in table[2](5+20) then + this carry would be passed to the table[3] etc.) + */ + template + uint UInt::AddTwoInts(uint x2, uint x1, uint index) + { + register uint b = value_size; + register uint * p1 = table; + register uint c; + + TTMATH_ASSERT( index < value_size - 1 ) + + #ifndef __GNUC__ + __asm + { + push eax + push ebx + push ecx + push edx + + mov ecx, [b] + sub ecx, [index] + + mov ebx, [p1] + mov edx, [index] + + mov eax, [x1] + add [ebx+edx*4], eax + inc edx + dec ecx + + mov eax, [x2] + + p: + adc [ebx+edx*4], eax + jnc end + + mov eax, 0 + inc edx + dec ecx + jnz p + + end: + setc al + movzx edx, al + mov [c], edx + + pop edx + pop ecx + pop ebx + pop eax + + } + #endif + + + #ifdef __GNUC__ + __asm__ __volatile__( + + "push %%ecx \n" + "push %%edx \n" + + "subl %%edx, %%ecx \n" + + "addl %%esi, (%%ebx,%%edx,4) \n" + "incl %%edx \n" + "decl %%ecx \n" + + "1: \n" + "adcl %%eax, (%%ebx,%%edx,4) \n" + "jnc 2f \n" + + "mov $0, %%eax \n" + "incl %%edx \n" + "decl %%ecx \n" + "jnz 1b \n" + + "2: \n" + "setc %%al \n" + "movzx %%al, %%eax \n" + + "pop %%edx \n" + "pop %%ecx \n" + + : "=a" (c) + : "c" (b), "d" (index), "b" (p1), "S" (x1), "0" (x2) + : "cc", "memory" ); + + #endif + + TTMATH_LOG("UInt32::AddTwoInts") + + return c; + } + + + + + + /*! + subtracting ss2 from the 'this' and subtracting + carry if it has been defined + (this = this - ss2 - c) + + c must be zero or one (might be a bigger value than 1) + function returns carry (1) (if it has been) + */ + template + uint UInt::Sub(const UInt & ss2, uint c=0) + { + register uint b = value_size; + register uint * p1 = table; + register uint * p2 = const_cast(ss2.table); + + // we don't have to use TTMATH_REFERENCE_ASSERT here + // this algorithm doesn't require it + + #ifndef __GNUC__ + + __asm + { + push eax + push ebx + push ecx + push edx + push esi + + mov ecx,[b] + + mov ebx,[p1] + mov esi,[p2] + + xor eax, eax + mov edx, eax + + sub eax, [c] + + p: + mov eax, [esi+edx*4] + sbb [ebx+edx*4], eax + + inc edx + dec ecx + jnz p + + setc al + movzx edx, al + mov [c], edx + + pop esi + pop edx + pop ecx + pop ebx + pop eax + } + + #endif + + + #ifdef __GNUC__ + __asm__ __volatile__( + + "push %%ecx \n" + + "xorl %%eax, %%eax \n" + "movl %%eax, %%edx \n" + "subl %%edi, %%eax \n" + + + "1: \n" + "movl (%%esi,%%edx,4),%%eax \n" + "sbbl %%eax, (%%ebx,%%edx,4) \n" + + "incl %%edx \n" + "decl %%ecx \n" + "jnz 1b \n" + + "setc %%al \n" + "movzx %%al,%%edx \n" + + "pop %%ecx \n" + + : "=d" (c) + : "D" (c), "c" (b), "b" (p1), "S" (p2) + : "%eax", "cc", "memory" ); + + #endif + + TTMATH_LOG("UInt32::Sub") + + return c; + } + + + + + /*! + this method subtracts one word (at a specific position) + and returns a carry (if it was) + + e.g. + + if we've got (value_size=3): + table[0] = 10; + table[1] = 30; + table[2] = 5; + and we call: + SubInt(2,1) + then it'll be: + table[0] = 10; + table[1] = 30 - 2; + table[2] = 5; + + of course if there was a carry from table[3] it would be returned + */ + template + uint UInt::SubInt(uint value, uint index = 0) + { + register uint b = value_size; + register uint * p1 = table; + register uint c; + + TTMATH_ASSERT( index < value_size ) + + #ifndef __GNUC__ + __asm + { + push eax + push ebx + push ecx + push edx + + mov ecx, [b] + sub ecx, [index] + + mov edx, [index] + mov ebx, [p1] + + mov eax, [value] + + p: + sub [ebx+edx*4], eax + jnc end + + mov eax, 1 + inc edx + dec ecx + jnz p + + end: + setc al + movzx edx, al + mov [c], edx + + pop edx + pop ecx + pop ebx + pop eax + } + #endif + + + #ifdef __GNUC__ + __asm__ __volatile__( + + "push %%eax \n" + "push %%ecx \n" + + "subl %%edx, %%ecx \n" + + "1: \n" + "subl %%eax, (%%ebx,%%edx,4) \n" + "jnc 2f \n" + + "movl $1, %%eax \n" + "incl %%edx \n" + "decl %%ecx \n" + "jnz 1b \n" + + "2: \n" + "setc %%al \n" + "movzx %%al, %%edx \n" + + "pop %%ecx \n" + "pop %%eax \n" + + : "=d" (c) + : "a" (value), "c" (b), "0" (index), "b" (p1) + : "cc", "memory" ); + + #endif + + TTMATH_LOG("UInt32::SubInt") + + return c; + } + + + + /*! + this method moves all bits into the left hand side + return value <- this <- c + + the lowest *bit* will be held the 'c' and + the state of one additional bit (on the left hand side) + will be returned + + for example: + let this is 001010000 + after Rcl2_one(1) there'll be 010100001 and Rcl2_one returns 0 + */ + template + uint UInt::Rcl2_one(uint c) + { + register sint b = value_size; + register uint * p1 = table; + + #ifndef __GNUC__ + __asm + { + push ebx + push ecx + push edx + + mov ebx, [p1] + + xor edx, edx + mov ecx, edx + sub ecx, [c] + + mov ecx, [b] + + p: + rcl dword ptr [ebx+edx*4], 1 + + inc edx + dec ecx + jnz p + + setc dl + movzx edx, dl + mov [c], edx + + + pop edx + pop ecx + pop ebx + } + #endif + + + #ifdef __GNUC__ + __asm__ __volatile__( + + "push %%edx \n" + "push %%ecx \n" + + "xorl %%edx, %%edx \n" // edx=0 + "neg %%eax \n" // CF=1 if eax!=0 , CF=0 if eax==0 + + "1: \n" + "rcll $1, (%%ebx, %%edx, 4) \n" + + "incl %%edx \n" + "decl %%ecx \n" + "jnz 1b \n" + + "setc %%al \n" + "movzx %%al, %%eax \n" + + "pop %%ecx \n" + "pop %%edx \n" + + : "=a" (c) + : "0" (c), "c" (b), "b" (p1) + : "cc", "memory" ); + + #endif + + TTMATH_LOG("UInt32::Rcl2_one") + + return c; + } + + + + /*! + this method moves all bits into the right hand side + c -> this -> return value + + the highest *bit* will be held the 'c' and + the state of one additional bit (on the right hand side) + will be returned + + for example: + let this is 000000010 + after Rcr2_one(1) there'll be 100000001 and Rcr2_one returns 0 + */ + template + uint UInt::Rcr2_one(uint c) + { + register sint b = value_size; + register uint * p1 = table; + + #ifndef __GNUC__ + __asm + { + push ebx + push ecx + + mov ebx, [p1] + + xor ecx, ecx + sub ecx, [c] + + mov ecx, [b] + + p: + rcr dword ptr [ebx+ecx*4-4], 1 + + dec ecx + jnz p + + setc cl + movzx ecx, cl + mov [c], ecx + + pop ecx + pop ebx + } + #endif + + + #ifdef __GNUC__ + __asm__ __volatile__( + + "push %%ecx \n" + + "neg %%eax \n" // CF=1 if eax!=0 , CF=0 if eax==0 + + "1: \n" + "rcrl $1, -4(%%ebx, %%ecx, 4) \n" + + "decl %%ecx \n" + "jnz 1b \n" + + "setc %%al \n" + "movzx %%al, %%eax \n" + + "pop %%ecx \n" + + : "=a" (c) + : "0" (c), "c" (b), "b" (p1) + : "cc", "memory" ); + + #endif + + TTMATH_LOG("UInt32::Rcr2_one") + + return c; + } + + + + /*! + this method moves all bits into the left hand side + return value <- this <- c + + the lowest *bits* will be held the 'c' and + the state of one additional bit (on the left hand side) + will be returned + + for example: + let this is 001010000 + after Rcl2(3, 1) there'll be 010000111 and Rcl2 returns 1 + */ + template + uint UInt::Rcl2(uint bits, uint c) + { + TTMATH_ASSERT( bits>0 && bits this -> return value + + the highest *bits* will be held the 'c' and + the state of one additional bit (on the right hand side) + will be returned + + for example: + let this is 000000010 + after Rcr2(2, 1) there'll be 110000000 and Rcr2 returns 1 + */ + template + uint UInt::Rcr2(uint bits, uint c) + { + TTMATH_ASSERT( bits>0 && bits + sint UInt::FindLeadingBitInWord(uint x) + { + register sint result; + + #ifndef __GNUC__ + __asm + { + push eax + push edx + + mov edx,-1 + bsr eax,[x] + cmovz eax,edx + mov [result], eax + + pop edx + pop eax + } + #endif + + + #ifdef __GNUC__ + __asm__ __volatile__( + + "bsrl %1, %0 \n" + "jnz 1f \n" + "movl $-1, %0 \n" + "1: \n" + + : "=R" (result) + : "R" (x) + : "cc" ); + + #endif + + return result; + } + + + + + + /*! + this method sets a special bit in the 'value' + and returns the last state of the bit (zero or one) + + bit is from <0,31> + e.g. + uint x = 100; + uint bit = SetBitInWord(x, 3); + now: x = 108 and bit = 0 + */ + template + uint UInt::SetBitInWord(uint & value, uint bit) + { + TTMATH_ASSERT( bit < TTMATH_BITS_PER_UINT ) + + uint old_bit; + uint v = value; + + #ifndef __GNUC__ + __asm + { + push ebx + push eax + + mov eax, [v] + mov ebx, [bit] + bts eax, ebx + mov [v], eax + + setc bl + movzx ebx, bl + mov [old_bit], ebx + + pop eax + pop ebx + } + #endif + + + #ifdef __GNUC__ + __asm__ __volatile__( + + "btsl %%ebx, %%eax \n" + + "setc %%bl \n" + "movzx %%bl, %%ebx \n" + + : "=a" (v), "=b" (old_bit) + : "0" (v), "1" (bit) + : "cc" ); + + #endif + + value = v; + + return old_bit; + } + + + + + /*! + multiplication: result2:result1 = a * b + result2 - higher word + result1 - lower word of the result + + this method never returns a carry + + it is an auxiliary method for second version of the multiplication algorithm + */ + template + void UInt::MulTwoWords(uint a, uint b, uint * result2, uint * result1) + { + /* + we must use these temporary variables in order to inform the compilator + that value pointed with result1 and result2 has changed + + this has no effect in visual studio but it's useful when + using gcc and options like -Ox + */ + register uint result1_; + register uint result2_; + + #ifndef __GNUC__ + + __asm + { + push eax + push edx + + mov eax, [a] + mul dword ptr [b] + + mov [result2_], edx + mov [result1_], eax + + pop edx + pop eax + } + + #endif + + + #ifdef __GNUC__ + + __asm__ __volatile__( + + "mull %%edx \n" + + : "=a" (result1_), "=d" (result2_) + : "0" (a), "1" (b) + : "cc" ); + + #endif + + + *result1 = result1_; + *result2 = result2_; + } + + + + + + /*! + * + * Division + * + * + */ + + + + + /*! + this method calculates 64bits word a:b / 32bits c (a higher, b lower word) + r = a:b / c and rest - remainder + + * + * WARNING: + * if r (one word) is too small for the result or c is equal zero + * there'll be a hardware interruption (0) + * and probably the end of your program + * + */ + template + void UInt::DivTwoWords(uint a, uint b, uint c, uint * r, uint * rest) + { + register uint r_; + register uint rest_; + /* + these variables have similar meaning like those in + the multiplication algorithm MulTwoWords + */ + + TTMATH_ASSERT( c != 0 ) + + #ifndef __GNUC__ + __asm + { + push eax + push edx + + mov edx, [a] + mov eax, [b] + div dword ptr [c] + + mov [r_], eax + mov [rest_], edx + + pop edx + pop eax + } + #endif + + + #ifdef __GNUC__ + + __asm__ __volatile__( + + "divl %%ecx \n" + + : "=a" (r_), "=d" (rest_) + : "d" (a), "a" (b), "c" (c) + : "cc" ); + + #endif + + + *r = r_; + *rest = rest_; + + } + + + +} //namespace + + + +#endif //ifdef TTMATH_PLATFORM32 +#endif //ifndef TTMATH_NOASM +#endif diff --git a/ttmath/ttmathuint64.h b/ttmath/ttmathuint_x86_64.h similarity index 83% rename from ttmath/ttmathuint64.h rename to ttmath/ttmathuint_x86_64.h index 031dd8a..96bf662 100644 --- a/ttmath/ttmathuint64.h +++ b/ttmath/ttmathuint_x86_64.h @@ -36,10 +36,19 @@ */ +#ifndef headerfilettmathuint_x86_64 +#define headerfilettmathuint_x86_64 + + +#ifndef TTMATH_NOASM +#ifdef TTMATH_PLATFORM64 + /*! - \file ttmathuint.h - \brief template class UInt for 64bit processors + \file ttmathuint_x86_64.h + \brief template class UInt with assembler code for 64bit x86_64 processors + + this file is included at the end of ttmathuint.h */ @@ -52,155 +61,6 @@ namespace ttmath * */ -#ifdef TTMATH_PLATFORM64 - - - - /*! - in 64bit platforms we must define additional operators and contructors - in order to allow a user initializing the objects in this way: - UInt<...> type = 20; - or - UInt<...> type; - type = 30; - - decimal constants such as 20, 30 etc. are integer literal of type int, - if the value is greater it can even be long int, - 0 is an octal integer of type int - (ISO 14882 p2.13.1 Integer literals) - */ - - /*! - this operator converts the unsigned int type to this class - - ***this operator is created only on a 64bit platform*** - it takes one argument of 32bit - */ - template - UInt & UInt::operator=(unsigned int i) - { - FromUInt(uint(i)); - - TTMATH_LOG("UInt64::operator=(unsigned int)") - - return *this; - } - - - /*! - a constructor for converting the unsigned int to this class - - ***this constructor is created only on a 64bit platform*** - it takes one argument of 32bit - */ - template - UInt::UInt(unsigned int i) - { - FromUInt(uint(i)); - - TTMATH_LOG("UInt64::UInt(unsigned int)") - } - - - /*! - an operator for converting the signed int to this class - - ***this constructor is created only on a 64bit platform*** - it takes one argument of 32bit - - look at the description of UInt::operator=(sint) - */ - template - UInt & UInt::operator=(signed int i) - { - FromUInt(uint(i)); - - TTMATH_LOG("UInt64::operator=(signed int)") - - return *this; - } - - - /*! - a constructor for converting the signed int to this class - - ***this constructor is created only on a 64bit platform*** - it takes one argument of 32bit - - look at the description of UInt::operator=(sint) - */ - template - UInt::UInt(signed int i) - { - FromUInt(uint(i)); - - TTMATH_LOG("UInt64::UInt(signed int)") - } - - - - /*! - this method copies the value stored in an another table - (warning: first values in temp_table are the highest words -- it's different - from our table) - - ***this method is created only on a 64bit platform*** - - we copy as many words as it is possible - - if temp_table_len is bigger than value_size we'll try to round - the lowest word from table depending on the last not used bit in temp_table - (this rounding isn't a perfect rounding -- look at the description below) - - and if temp_table_len is smaller than value_size we'll clear the rest words - in the table - - warning: we're using 'temp_table' as a pointer at 32bit words - */ - template - void UInt::SetFromTable(const unsigned int * temp_table, uint temp_table_len) - { - uint temp_table_index = 0; - sint i; // 'i' with a sign - - for(i=value_size-1 ; i>=0 && temp_table_index= 0 ; --i) - table[i] = 0; - - TTMATH_LOG("UInt64::SetFromTable") - } - /*! @@ -687,6 +547,7 @@ namespace ttmath { TTMATH_ASSERT( bits>0 && bits