diff --git a/CHANGELOG b/CHANGELOG index 7a634a2..8e0c110 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,7 +1,16 @@ -Version 0.8.4 prerelease (2009.05.04): +Version 0.8.4 prerelease (2009.05.05): * fixed: UInt::DivInt() didn't check whether the divisor is zero there was a hardware interruption when the divisor was zero (now the method returns one) + * fixed: the problem with GCC optimization on x86_64 + sometimes when using -O2 or -O3 GCC doesn't set correctly + the stack pointer (actually the stack is used for other things) + and you can't use instructions like push/pop in assembler code. + All the asm code in x86_64 have been rewritten, now instructions + push/pop are not used, other thing which have access to stack + (like "m" (mask) constraints in Rcl2 and Rcr2) have also gone away, + now the library works well with -O2 and -O3 and the asm code + is a little faster * added: UInt::PrintLog(const char * msg, std::ostream & output) used (for debugging purposes) by macro TTMATH_LOG(msg) (it is used in nearly all methods in UInt class) diff --git a/ttmath/ttmathuint.h b/ttmath/ttmathuint.h index d7cc7b8..61e773e 100644 --- a/ttmath/ttmathuint.h +++ b/ttmath/ttmathuint.h @@ -1050,6 +1050,9 @@ public: { if( divisor == 0 ) { + if( remainder ) + *remainder = 0; // this is for convenience, without it the compiler can report that 'remainder' is uninitialized + TTMATH_LOG("UInt::DivInt") return 1; @@ -1733,6 +1736,8 @@ private: uint rp; bool next_test; + TTMATH_ASSERT( v1 != 0 ) + u_temp.table[1] = u2; u_temp.table[0] = u1; u_temp.DivInt(v1, &rp); diff --git a/ttmath/ttmathuint_x86_64.h b/ttmath/ttmathuint_x86_64.h index 96bf662..a211596 100644 --- a/ttmath/ttmathuint_x86_64.h +++ b/ttmath/ttmathuint_x86_64.h @@ -75,10 +75,10 @@ namespace ttmath template uint UInt::Add(const UInt & ss2, uint c) { - register uint b = value_size; - register uint * p1 = table; - register uint * p2 = const_cast(ss2.table); - + uint b = value_size; + uint * p1 = table; + const uint * p2 = const_cast(ss2.table); + uint dummy, dummy2; // we don't have to use TTMATH_REFERENCE_ASSERT here // this algorithm doesn't require it @@ -92,30 +92,23 @@ namespace ttmath this part should be compiled with gcc */ __asm__ __volatile__( - - "push %%rcx \n" - - "xorq %%rax, %%rax \n" - "movq %%rax, %%rdx \n" - "subq %%rdi, %%rax \n" - + + "xorq %%rdx, %%rdx \n" + "neg %%rax \n" // CF=1 if rax!=0 , CF=0 if rax==0 "1: \n" - "movq (%%rsi,%%rdx,8),%%rax \n" + "movq (%%rsi,%%rdx,8), %%rax \n" "adcq %%rax, (%%rbx,%%rdx,8) \n" "incq %%rdx \n" "decq %%rcx \n" "jnz 1b \n" - "setc %%al \n" - "movzx %%al,%%rdx \n" + "adcq %%rcx, %%rcx \n" - "pop %%rcx \n" - - : "=d" (c) - : "D" (c), "c" (b), "b" (p1), "S" (p2) - : "%rax", "cc", "memory" ); + : "=c" (c), "=a" (dummy), "=d" (dummy2) + : "0" (b), "1" (c), "b" (p1), "S" (p2) + : "cc", "memory" ); #endif @@ -149,9 +142,10 @@ namespace ttmath template uint UInt::AddInt(uint value, uint index) { - register uint b = value_size; - register uint * p1 = table; - register uint c; + uint b = value_size; + uint * p1 = table; + uint c; + uint dummy, dummy2; TTMATH_ASSERT( index < value_size ) @@ -163,9 +157,6 @@ namespace ttmath __asm__ __volatile__( - "push %%rax \n" - "push %%rcx \n" - "subq %%rdx, %%rcx \n" "1: \n" @@ -181,10 +172,7 @@ namespace ttmath "setc %%al \n" "movzx %%al, %%rdx \n" - "pop %%rcx \n" - "pop %%rax \n" - - : "=d" (c) + : "=d" (c), "=a" (dummy), "=c" (dummy2) : "a" (value), "c" (b), "0" (index), "b" (p1) : "cc", "memory" ); @@ -232,9 +220,10 @@ namespace ttmath template uint UInt::AddTwoInts(uint x2, uint x1, uint index) { - register uint b = value_size; - register uint * p1 = table; - register uint c; + uint b = value_size; + uint * p1 = table; + uint c; + uint dummy, dummy2; TTMATH_ASSERT( index < value_size - 1 ) @@ -245,9 +234,6 @@ namespace ttmath #ifdef __GNUC__ __asm__ __volatile__( - "push %%rcx \n" - "push %%rdx \n" - "subq %%rdx, %%rcx \n" "addq %%rsi, (%%rbx,%%rdx,8) \n" @@ -267,11 +253,8 @@ namespace ttmath "setc %%al \n" "movzx %%al, %%rax \n" - "pop %%rdx \n" - "pop %%rcx \n" - - : "=a" (c) - : "c" (b), "d" (index), "b" (p1), "S" (x1), "0" (x2) + : "=a" (c), "=c" (dummy), "=d" (dummy2) + : "1" (b), "2" (index), "b" (p1), "S" (x1), "0" (x2) : "cc", "memory" ); #endif @@ -298,9 +281,10 @@ namespace ttmath template uint UInt::Sub(const UInt & ss2, uint c) { - register uint b = value_size; - register uint * p1 = table; - register uint * p2 = const_cast(ss2.table); + uint b = value_size; + uint * p1 = table; + const uint * p2 = ss2.table; + uint dummy, dummy2; // we don't have to use TTMATH_REFERENCE_ASSERT here // this algorithm doesn't require it @@ -311,30 +295,23 @@ namespace ttmath #ifdef __GNUC__ __asm__ __volatile__( - - "push %%rcx \n" - - "xorq %%rax, %%rax \n" - "movq %%rax, %%rdx \n" - "subq %%rdi, %%rax \n" - + + "xorq %%rdx, %%rdx \n" + "neg %%rax \n" // CF=1 if rax!=0 , CF=0 if rax==0 "1: \n" - "movq (%%rsi,%%rdx,8),%%rax \n" + "movq (%%rsi,%%rdx,8), %%rax \n" "sbbq %%rax, (%%rbx,%%rdx,8) \n" "incq %%rdx \n" "decq %%rcx \n" "jnz 1b \n" - "setc %%al \n" - "movzx %%al,%%rdx \n" + "adcq %%rcx, %%rcx \n" - "pop %%rcx \n" - - : "=d" (c) - : "D" (c), "c" (b), "b" (p1), "S" (p2) - : "%rax", "cc", "memory" ); + : "=c" (c), "=a" (dummy), "=d" (dummy2) + : "0" (b), "1" (c), "b" (p1), "S" (p2) + : "cc", "memory" ); #endif @@ -367,9 +344,10 @@ namespace ttmath template uint UInt::SubInt(uint value, uint index) { - register uint b = value_size; - register uint * p1 = table; - register uint c; + uint b = value_size; + uint * p1 = table; + uint c; + uint dummy, dummy2; TTMATH_ASSERT( index < value_size ) @@ -380,9 +358,6 @@ namespace ttmath #ifdef __GNUC__ __asm__ __volatile__( - "push %%rax \n" - "push %%rcx \n" - "subq %%rdx, %%rcx \n" "1: \n" @@ -398,11 +373,8 @@ namespace ttmath "setc %%al \n" "movzx %%al, %%rdx \n" - "pop %%rcx \n" - "pop %%rax \n" - - : "=d" (c) - : "a" (value), "c" (b), "0" (index), "b" (p1) + : "=d" (c), "=a" (dummy), "=c" (dummy2) + : "1" (value), "2" (b), "0" (index), "b" (p1) : "cc", "memory" ); #endif @@ -430,9 +402,9 @@ namespace ttmath template uint UInt::Rcl2_one(uint c) { - register sint b = value_size; - register uint * p1 = table; - + sint b = value_size; + uint * p1 = table; + uint dummy, dummy2; #ifndef __GNUC__ #error "another compiler than GCC is currently not supported in 64bit mode" @@ -441,9 +413,6 @@ namespace ttmath #ifdef __GNUC__ __asm__ __volatile__( - "push %%rdx \n" - "push %%rcx \n" - "xorq %%rdx, %%rdx \n" // rdx=0 "neg %%rax \n" // CF=1 if rax!=0 , CF=0 if rax==0 @@ -454,14 +423,10 @@ namespace ttmath "decq %%rcx \n" "jnz 1b \n" - "setc %%al \n" - "movzx %%al, %%rax \n" + "adcq %%rcx, %%rcx \n" - "pop %%rcx \n" - "pop %%rdx \n" - - : "=a" (c) - : "0" (c), "c" (b), "b" (p1) + : "=c" (c), "=a" (dummy), "=d" (dummy2) + : "1" (c), "0" (b), "b" (p1) : "cc", "memory" ); #endif @@ -489,9 +454,9 @@ namespace ttmath template uint UInt::Rcr2_one(uint c) { - register sint b = value_size; - register uint * p1 = table; - + sint b = value_size; + uint * p1 = table; + uint dummy; #ifndef __GNUC__ #error "another compiler than GCC is currently not supported in 64bit mode" @@ -500,8 +465,6 @@ namespace ttmath #ifdef __GNUC__ __asm__ __volatile__( - "push %%rcx \n" - "neg %%rax \n" // CF=1 if rax!=0 , CF=0 if rax==0 "1: \n" @@ -510,13 +473,10 @@ namespace ttmath "decq %%rcx \n" "jnz 1b \n" - "setc %%al \n" - "movzx %%al, %%rax \n" + "adcq %%rcx, %%rcx \n" - "pop %%rcx \n" - - : "=a" (c) - : "0" (c), "c" (b), "b" (p1) + : "=c" (c), "=a" (dummy) + : "1" (c), "0" (b), "b" (p1) : "cc", "memory" ); #endif @@ -547,10 +507,9 @@ namespace ttmath { TTMATH_ASSERT( bits>0 && bits0 && bits