fixed: the problem with GCC optimization on x86_64
sometimes when using -O2 or -O3 GCC doesn't set correctly the stack pointer (actually the stack is used for other things) and you can't use instructions like push/pop in assembler code. All the asm code in x86_64 have been rewritten, now instructions push/pop are not used, other thing which have access to stack (like "m" (mask) constraints in Rcl2 and Rcr2) have also gone away, now the library works well with -O2 and -O3 and the asm code is a little faster git-svn-id: svn://ttmath.org/publicrep/ttmath/trunk@127 e52654a7-88a9-db11-a3e9-0013d4bc506e
This commit is contained in:
parent
85945b2bb0
commit
e665f91682
11
CHANGELOG
11
CHANGELOG
|
@ -1,7 +1,16 @@
|
||||||
Version 0.8.4 prerelease (2009.05.04):
|
Version 0.8.4 prerelease (2009.05.05):
|
||||||
* fixed: UInt::DivInt() didn't check whether the divisor is zero
|
* fixed: UInt::DivInt() didn't check whether the divisor is zero
|
||||||
there was a hardware interruption when the divisor was zero
|
there was a hardware interruption when the divisor was zero
|
||||||
(now the method returns one)
|
(now the method returns one)
|
||||||
|
* fixed: the problem with GCC optimization on x86_64
|
||||||
|
sometimes when using -O2 or -O3 GCC doesn't set correctly
|
||||||
|
the stack pointer (actually the stack is used for other things)
|
||||||
|
and you can't use instructions like push/pop in assembler code.
|
||||||
|
All the asm code in x86_64 have been rewritten, now instructions
|
||||||
|
push/pop are not used, other thing which have access to stack
|
||||||
|
(like "m" (mask) constraints in Rcl2 and Rcr2) have also gone away,
|
||||||
|
now the library works well with -O2 and -O3 and the asm code
|
||||||
|
is a little faster
|
||||||
* added: UInt::PrintLog(const char * msg, std::ostream & output)
|
* added: UInt::PrintLog(const char * msg, std::ostream & output)
|
||||||
used (for debugging purposes) by macro TTMATH_LOG(msg)
|
used (for debugging purposes) by macro TTMATH_LOG(msg)
|
||||||
(it is used in nearly all methods in UInt class)
|
(it is used in nearly all methods in UInt class)
|
||||||
|
|
|
@ -1050,6 +1050,9 @@ public:
|
||||||
{
|
{
|
||||||
if( divisor == 0 )
|
if( divisor == 0 )
|
||||||
{
|
{
|
||||||
|
if( remainder )
|
||||||
|
*remainder = 0; // this is for convenience, without it the compiler can report that 'remainder' is uninitialized
|
||||||
|
|
||||||
TTMATH_LOG("UInt::DivInt")
|
TTMATH_LOG("UInt::DivInt")
|
||||||
|
|
||||||
return 1;
|
return 1;
|
||||||
|
@ -1733,6 +1736,8 @@ private:
|
||||||
uint rp;
|
uint rp;
|
||||||
bool next_test;
|
bool next_test;
|
||||||
|
|
||||||
|
TTMATH_ASSERT( v1 != 0 )
|
||||||
|
|
||||||
u_temp.table[1] = u2;
|
u_temp.table[1] = u2;
|
||||||
u_temp.table[0] = u1;
|
u_temp.table[0] = u1;
|
||||||
u_temp.DivInt(v1, &rp);
|
u_temp.DivInt(v1, &rp);
|
||||||
|
|
|
@ -75,10 +75,10 @@ namespace ttmath
|
||||||
template<uint value_size>
|
template<uint value_size>
|
||||||
uint UInt<value_size>::Add(const UInt<value_size> & ss2, uint c)
|
uint UInt<value_size>::Add(const UInt<value_size> & ss2, uint c)
|
||||||
{
|
{
|
||||||
register uint b = value_size;
|
uint b = value_size;
|
||||||
register uint * p1 = table;
|
uint * p1 = table;
|
||||||
register uint * p2 = const_cast<uint*>(ss2.table);
|
const uint * p2 = const_cast<uint*>(ss2.table);
|
||||||
|
uint dummy, dummy2;
|
||||||
|
|
||||||
// we don't have to use TTMATH_REFERENCE_ASSERT here
|
// we don't have to use TTMATH_REFERENCE_ASSERT here
|
||||||
// this algorithm doesn't require it
|
// this algorithm doesn't require it
|
||||||
|
@ -92,30 +92,23 @@ namespace ttmath
|
||||||
this part should be compiled with gcc
|
this part should be compiled with gcc
|
||||||
*/
|
*/
|
||||||
__asm__ __volatile__(
|
__asm__ __volatile__(
|
||||||
|
|
||||||
"push %%rcx \n"
|
"xorq %%rdx, %%rdx \n"
|
||||||
|
"neg %%rax \n" // CF=1 if rax!=0 , CF=0 if rax==0
|
||||||
"xorq %%rax, %%rax \n"
|
|
||||||
"movq %%rax, %%rdx \n"
|
|
||||||
"subq %%rdi, %%rax \n"
|
|
||||||
|
|
||||||
|
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"movq (%%rsi,%%rdx,8),%%rax \n"
|
"movq (%%rsi,%%rdx,8), %%rax \n"
|
||||||
"adcq %%rax, (%%rbx,%%rdx,8) \n"
|
"adcq %%rax, (%%rbx,%%rdx,8) \n"
|
||||||
|
|
||||||
"incq %%rdx \n"
|
"incq %%rdx \n"
|
||||||
"decq %%rcx \n"
|
"decq %%rcx \n"
|
||||||
"jnz 1b \n"
|
"jnz 1b \n"
|
||||||
|
|
||||||
"setc %%al \n"
|
"adcq %%rcx, %%rcx \n"
|
||||||
"movzx %%al,%%rdx \n"
|
|
||||||
|
|
||||||
"pop %%rcx \n"
|
: "=c" (c), "=a" (dummy), "=d" (dummy2)
|
||||||
|
: "0" (b), "1" (c), "b" (p1), "S" (p2)
|
||||||
: "=d" (c)
|
: "cc", "memory" );
|
||||||
: "D" (c), "c" (b), "b" (p1), "S" (p2)
|
|
||||||
: "%rax", "cc", "memory" );
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -149,9 +142,10 @@ namespace ttmath
|
||||||
template<uint value_size>
|
template<uint value_size>
|
||||||
uint UInt<value_size>::AddInt(uint value, uint index)
|
uint UInt<value_size>::AddInt(uint value, uint index)
|
||||||
{
|
{
|
||||||
register uint b = value_size;
|
uint b = value_size;
|
||||||
register uint * p1 = table;
|
uint * p1 = table;
|
||||||
register uint c;
|
uint c;
|
||||||
|
uint dummy, dummy2;
|
||||||
|
|
||||||
TTMATH_ASSERT( index < value_size )
|
TTMATH_ASSERT( index < value_size )
|
||||||
|
|
||||||
|
@ -163,9 +157,6 @@ namespace ttmath
|
||||||
|
|
||||||
__asm__ __volatile__(
|
__asm__ __volatile__(
|
||||||
|
|
||||||
"push %%rax \n"
|
|
||||||
"push %%rcx \n"
|
|
||||||
|
|
||||||
"subq %%rdx, %%rcx \n"
|
"subq %%rdx, %%rcx \n"
|
||||||
|
|
||||||
"1: \n"
|
"1: \n"
|
||||||
|
@ -181,10 +172,7 @@ namespace ttmath
|
||||||
"setc %%al \n"
|
"setc %%al \n"
|
||||||
"movzx %%al, %%rdx \n"
|
"movzx %%al, %%rdx \n"
|
||||||
|
|
||||||
"pop %%rcx \n"
|
: "=d" (c), "=a" (dummy), "=c" (dummy2)
|
||||||
"pop %%rax \n"
|
|
||||||
|
|
||||||
: "=d" (c)
|
|
||||||
: "a" (value), "c" (b), "0" (index), "b" (p1)
|
: "a" (value), "c" (b), "0" (index), "b" (p1)
|
||||||
: "cc", "memory" );
|
: "cc", "memory" );
|
||||||
|
|
||||||
|
@ -232,9 +220,10 @@ namespace ttmath
|
||||||
template<uint value_size>
|
template<uint value_size>
|
||||||
uint UInt<value_size>::AddTwoInts(uint x2, uint x1, uint index)
|
uint UInt<value_size>::AddTwoInts(uint x2, uint x1, uint index)
|
||||||
{
|
{
|
||||||
register uint b = value_size;
|
uint b = value_size;
|
||||||
register uint * p1 = table;
|
uint * p1 = table;
|
||||||
register uint c;
|
uint c;
|
||||||
|
uint dummy, dummy2;
|
||||||
|
|
||||||
TTMATH_ASSERT( index < value_size - 1 )
|
TTMATH_ASSERT( index < value_size - 1 )
|
||||||
|
|
||||||
|
@ -245,9 +234,6 @@ namespace ttmath
|
||||||
#ifdef __GNUC__
|
#ifdef __GNUC__
|
||||||
__asm__ __volatile__(
|
__asm__ __volatile__(
|
||||||
|
|
||||||
"push %%rcx \n"
|
|
||||||
"push %%rdx \n"
|
|
||||||
|
|
||||||
"subq %%rdx, %%rcx \n"
|
"subq %%rdx, %%rcx \n"
|
||||||
|
|
||||||
"addq %%rsi, (%%rbx,%%rdx,8) \n"
|
"addq %%rsi, (%%rbx,%%rdx,8) \n"
|
||||||
|
@ -267,11 +253,8 @@ namespace ttmath
|
||||||
"setc %%al \n"
|
"setc %%al \n"
|
||||||
"movzx %%al, %%rax \n"
|
"movzx %%al, %%rax \n"
|
||||||
|
|
||||||
"pop %%rdx \n"
|
: "=a" (c), "=c" (dummy), "=d" (dummy2)
|
||||||
"pop %%rcx \n"
|
: "1" (b), "2" (index), "b" (p1), "S" (x1), "0" (x2)
|
||||||
|
|
||||||
: "=a" (c)
|
|
||||||
: "c" (b), "d" (index), "b" (p1), "S" (x1), "0" (x2)
|
|
||||||
: "cc", "memory" );
|
: "cc", "memory" );
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -298,9 +281,10 @@ namespace ttmath
|
||||||
template<uint value_size>
|
template<uint value_size>
|
||||||
uint UInt<value_size>::Sub(const UInt<value_size> & ss2, uint c)
|
uint UInt<value_size>::Sub(const UInt<value_size> & ss2, uint c)
|
||||||
{
|
{
|
||||||
register uint b = value_size;
|
uint b = value_size;
|
||||||
register uint * p1 = table;
|
uint * p1 = table;
|
||||||
register uint * p2 = const_cast<uint*>(ss2.table);
|
const uint * p2 = ss2.table;
|
||||||
|
uint dummy, dummy2;
|
||||||
|
|
||||||
// we don't have to use TTMATH_REFERENCE_ASSERT here
|
// we don't have to use TTMATH_REFERENCE_ASSERT here
|
||||||
// this algorithm doesn't require it
|
// this algorithm doesn't require it
|
||||||
|
@ -311,30 +295,23 @@ namespace ttmath
|
||||||
|
|
||||||
#ifdef __GNUC__
|
#ifdef __GNUC__
|
||||||
__asm__ __volatile__(
|
__asm__ __volatile__(
|
||||||
|
|
||||||
"push %%rcx \n"
|
"xorq %%rdx, %%rdx \n"
|
||||||
|
"neg %%rax \n" // CF=1 if rax!=0 , CF=0 if rax==0
|
||||||
"xorq %%rax, %%rax \n"
|
|
||||||
"movq %%rax, %%rdx \n"
|
|
||||||
"subq %%rdi, %%rax \n"
|
|
||||||
|
|
||||||
|
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"movq (%%rsi,%%rdx,8),%%rax \n"
|
"movq (%%rsi,%%rdx,8), %%rax \n"
|
||||||
"sbbq %%rax, (%%rbx,%%rdx,8) \n"
|
"sbbq %%rax, (%%rbx,%%rdx,8) \n"
|
||||||
|
|
||||||
"incq %%rdx \n"
|
"incq %%rdx \n"
|
||||||
"decq %%rcx \n"
|
"decq %%rcx \n"
|
||||||
"jnz 1b \n"
|
"jnz 1b \n"
|
||||||
|
|
||||||
"setc %%al \n"
|
"adcq %%rcx, %%rcx \n"
|
||||||
"movzx %%al,%%rdx \n"
|
|
||||||
|
|
||||||
"pop %%rcx \n"
|
: "=c" (c), "=a" (dummy), "=d" (dummy2)
|
||||||
|
: "0" (b), "1" (c), "b" (p1), "S" (p2)
|
||||||
: "=d" (c)
|
: "cc", "memory" );
|
||||||
: "D" (c), "c" (b), "b" (p1), "S" (p2)
|
|
||||||
: "%rax", "cc", "memory" );
|
|
||||||
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -367,9 +344,10 @@ namespace ttmath
|
||||||
template<uint value_size>
|
template<uint value_size>
|
||||||
uint UInt<value_size>::SubInt(uint value, uint index)
|
uint UInt<value_size>::SubInt(uint value, uint index)
|
||||||
{
|
{
|
||||||
register uint b = value_size;
|
uint b = value_size;
|
||||||
register uint * p1 = table;
|
uint * p1 = table;
|
||||||
register uint c;
|
uint c;
|
||||||
|
uint dummy, dummy2;
|
||||||
|
|
||||||
TTMATH_ASSERT( index < value_size )
|
TTMATH_ASSERT( index < value_size )
|
||||||
|
|
||||||
|
@ -380,9 +358,6 @@ namespace ttmath
|
||||||
#ifdef __GNUC__
|
#ifdef __GNUC__
|
||||||
__asm__ __volatile__(
|
__asm__ __volatile__(
|
||||||
|
|
||||||
"push %%rax \n"
|
|
||||||
"push %%rcx \n"
|
|
||||||
|
|
||||||
"subq %%rdx, %%rcx \n"
|
"subq %%rdx, %%rcx \n"
|
||||||
|
|
||||||
"1: \n"
|
"1: \n"
|
||||||
|
@ -398,11 +373,8 @@ namespace ttmath
|
||||||
"setc %%al \n"
|
"setc %%al \n"
|
||||||
"movzx %%al, %%rdx \n"
|
"movzx %%al, %%rdx \n"
|
||||||
|
|
||||||
"pop %%rcx \n"
|
: "=d" (c), "=a" (dummy), "=c" (dummy2)
|
||||||
"pop %%rax \n"
|
: "1" (value), "2" (b), "0" (index), "b" (p1)
|
||||||
|
|
||||||
: "=d" (c)
|
|
||||||
: "a" (value), "c" (b), "0" (index), "b" (p1)
|
|
||||||
: "cc", "memory" );
|
: "cc", "memory" );
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -430,9 +402,9 @@ namespace ttmath
|
||||||
template<uint value_size>
|
template<uint value_size>
|
||||||
uint UInt<value_size>::Rcl2_one(uint c)
|
uint UInt<value_size>::Rcl2_one(uint c)
|
||||||
{
|
{
|
||||||
register sint b = value_size;
|
sint b = value_size;
|
||||||
register uint * p1 = table;
|
uint * p1 = table;
|
||||||
|
uint dummy, dummy2;
|
||||||
|
|
||||||
#ifndef __GNUC__
|
#ifndef __GNUC__
|
||||||
#error "another compiler than GCC is currently not supported in 64bit mode"
|
#error "another compiler than GCC is currently not supported in 64bit mode"
|
||||||
|
@ -441,9 +413,6 @@ namespace ttmath
|
||||||
#ifdef __GNUC__
|
#ifdef __GNUC__
|
||||||
__asm__ __volatile__(
|
__asm__ __volatile__(
|
||||||
|
|
||||||
"push %%rdx \n"
|
|
||||||
"push %%rcx \n"
|
|
||||||
|
|
||||||
"xorq %%rdx, %%rdx \n" // rdx=0
|
"xorq %%rdx, %%rdx \n" // rdx=0
|
||||||
"neg %%rax \n" // CF=1 if rax!=0 , CF=0 if rax==0
|
"neg %%rax \n" // CF=1 if rax!=0 , CF=0 if rax==0
|
||||||
|
|
||||||
|
@ -454,14 +423,10 @@ namespace ttmath
|
||||||
"decq %%rcx \n"
|
"decq %%rcx \n"
|
||||||
"jnz 1b \n"
|
"jnz 1b \n"
|
||||||
|
|
||||||
"setc %%al \n"
|
"adcq %%rcx, %%rcx \n"
|
||||||
"movzx %%al, %%rax \n"
|
|
||||||
|
|
||||||
"pop %%rcx \n"
|
: "=c" (c), "=a" (dummy), "=d" (dummy2)
|
||||||
"pop %%rdx \n"
|
: "1" (c), "0" (b), "b" (p1)
|
||||||
|
|
||||||
: "=a" (c)
|
|
||||||
: "0" (c), "c" (b), "b" (p1)
|
|
||||||
: "cc", "memory" );
|
: "cc", "memory" );
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -489,9 +454,9 @@ namespace ttmath
|
||||||
template<uint value_size>
|
template<uint value_size>
|
||||||
uint UInt<value_size>::Rcr2_one(uint c)
|
uint UInt<value_size>::Rcr2_one(uint c)
|
||||||
{
|
{
|
||||||
register sint b = value_size;
|
sint b = value_size;
|
||||||
register uint * p1 = table;
|
uint * p1 = table;
|
||||||
|
uint dummy;
|
||||||
|
|
||||||
#ifndef __GNUC__
|
#ifndef __GNUC__
|
||||||
#error "another compiler than GCC is currently not supported in 64bit mode"
|
#error "another compiler than GCC is currently not supported in 64bit mode"
|
||||||
|
@ -500,8 +465,6 @@ namespace ttmath
|
||||||
#ifdef __GNUC__
|
#ifdef __GNUC__
|
||||||
__asm__ __volatile__(
|
__asm__ __volatile__(
|
||||||
|
|
||||||
"push %%rcx \n"
|
|
||||||
|
|
||||||
"neg %%rax \n" // CF=1 if rax!=0 , CF=0 if rax==0
|
"neg %%rax \n" // CF=1 if rax!=0 , CF=0 if rax==0
|
||||||
|
|
||||||
"1: \n"
|
"1: \n"
|
||||||
|
@ -510,13 +473,10 @@ namespace ttmath
|
||||||
"decq %%rcx \n"
|
"decq %%rcx \n"
|
||||||
"jnz 1b \n"
|
"jnz 1b \n"
|
||||||
|
|
||||||
"setc %%al \n"
|
"adcq %%rcx, %%rcx \n"
|
||||||
"movzx %%al, %%rax \n"
|
|
||||||
|
|
||||||
"pop %%rcx \n"
|
: "=c" (c), "=a" (dummy)
|
||||||
|
: "1" (c), "0" (b), "b" (p1)
|
||||||
: "=a" (c)
|
|
||||||
: "0" (c), "c" (b), "b" (p1)
|
|
||||||
: "cc", "memory" );
|
: "cc", "memory" );
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -547,10 +507,9 @@ namespace ttmath
|
||||||
{
|
{
|
||||||
TTMATH_ASSERT( bits>0 && bits<TTMATH_BITS_PER_UINT )
|
TTMATH_ASSERT( bits>0 && bits<TTMATH_BITS_PER_UINT )
|
||||||
|
|
||||||
// !!! why there is signed here?
|
uint b = value_size;
|
||||||
register sint b = value_size;
|
uint * p1 = table;
|
||||||
register uint * p1 = table;
|
uint dummy, dummy2, dummy3;
|
||||||
register uint mask;
|
|
||||||
|
|
||||||
#ifndef __GNUC__
|
#ifndef __GNUC__
|
||||||
#error "another compiler than GCC is currently not supported in 64bit mode"
|
#error "another compiler than GCC is currently not supported in 64bit mode"
|
||||||
|
@ -559,29 +518,25 @@ namespace ttmath
|
||||||
#ifdef __GNUC__
|
#ifdef __GNUC__
|
||||||
__asm__ __volatile__(
|
__asm__ __volatile__(
|
||||||
|
|
||||||
"push %%rdx \n"
|
|
||||||
"push %%rsi \n"
|
|
||||||
"push %%rdi \n"
|
|
||||||
|
|
||||||
"movq %%rcx, %%rsi \n"
|
"movq %%rcx, %%rsi \n"
|
||||||
"movq $64, %%rcx \n"
|
"movq $64, %%rcx \n"
|
||||||
"subq %%rsi, %%rcx \n"
|
"subq %%rsi, %%rcx \n"
|
||||||
"movq $-1, %%rdx \n"
|
"movq $-1, %%rdx \n"
|
||||||
"shrq %%cl, %%rdx \n"
|
"shrq %%cl, %%rdx \n"
|
||||||
"movq %%rdx, %[amask] \n"
|
"movq %%rdx, %%r8 \n"
|
||||||
"movq %%rsi, %%rcx \n"
|
"movq %%rsi, %%rcx \n"
|
||||||
|
|
||||||
"xorq %%rdx, %%rdx \n"
|
"xorq %%rdx, %%rdx \n"
|
||||||
"movq %%rdx, %%rsi \n"
|
"movq %%rdx, %%rsi \n"
|
||||||
|
|
||||||
"orq %%rax, %%rax \n"
|
"orq %%rax, %%rax \n"
|
||||||
"cmovnz %[amask], %%rsi \n"
|
"cmovnz %%r8, %%rsi \n"
|
||||||
|
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"rolq %%cl, (%%rbx,%%rdx,8) \n"
|
"rolq %%cl, (%%rbx,%%rdx,8) \n"
|
||||||
|
|
||||||
"movq (%%rbx,%%rdx,8), %%rax \n"
|
"movq (%%rbx,%%rdx,8), %%rax \n"
|
||||||
"andq %[amask], %%rax \n"
|
"andq %%r8, %%rax \n"
|
||||||
"xorq %%rax, (%%rbx,%%rdx,8) \n"
|
"xorq %%rax, (%%rbx,%%rdx,8) \n"
|
||||||
"orq %%rsi, (%%rbx,%%rdx,8) \n"
|
"orq %%rsi, (%%rbx,%%rdx,8) \n"
|
||||||
"movq %%rax, %%rsi \n"
|
"movq %%rax, %%rsi \n"
|
||||||
|
@ -592,13 +547,9 @@ namespace ttmath
|
||||||
|
|
||||||
"and $1, %%rax \n"
|
"and $1, %%rax \n"
|
||||||
|
|
||||||
"pop %%rdi \n"
|
: "=a" (c), "=D" (dummy), "=S" (dummy2), "=d" (dummy3)
|
||||||
"pop %%rsi \n"
|
: "0" (c), "1" (b), "b" (p1), "c" (bits)
|
||||||
"pop %%rdx \n"
|
: "%r8", "cc", "memory" );
|
||||||
|
|
||||||
: "=a" (c)
|
|
||||||
: "0" (c), "D" (b), "b" (p1), "c" (bits), [amask] "m" (mask)
|
|
||||||
: "cc", "memory" );
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -627,9 +578,9 @@ namespace ttmath
|
||||||
{
|
{
|
||||||
TTMATH_ASSERT( bits>0 && bits<TTMATH_BITS_PER_UINT )
|
TTMATH_ASSERT( bits>0 && bits<TTMATH_BITS_PER_UINT )
|
||||||
|
|
||||||
register sint b = value_size;
|
sint b = value_size;
|
||||||
register uint * p1 = table;
|
uint * p1 = table;
|
||||||
register uint mask;
|
uint dummy, dummy2, dummy3;
|
||||||
|
|
||||||
#ifndef __GNUC__
|
#ifndef __GNUC__
|
||||||
#error "another compiler than GCC is currently not supported in 64bit mode"
|
#error "another compiler than GCC is currently not supported in 64bit mode"
|
||||||
|
@ -639,16 +590,12 @@ namespace ttmath
|
||||||
#ifdef __GNUC__
|
#ifdef __GNUC__
|
||||||
__asm__ __volatile__(
|
__asm__ __volatile__(
|
||||||
|
|
||||||
"push %%rdx \n"
|
|
||||||
"push %%rsi \n"
|
|
||||||
"push %%rdi \n"
|
|
||||||
|
|
||||||
"movq %%rcx, %%rsi \n"
|
"movq %%rcx, %%rsi \n"
|
||||||
"movq $64, %%rcx \n"
|
"movq $64, %%rcx \n"
|
||||||
"subq %%rsi, %%rcx \n"
|
"subq %%rsi, %%rcx \n"
|
||||||
"movq $-1, %%rdx \n"
|
"movq $-1, %%rdx \n"
|
||||||
"shlq %%cl, %%rdx \n"
|
"shlq %%cl, %%rdx \n"
|
||||||
"movq %%rdx, %[amask] \n"
|
"movq %%rdx, %%R8 \n"
|
||||||
"movq %%rsi, %%rcx \n"
|
"movq %%rsi, %%rcx \n"
|
||||||
|
|
||||||
"xorq %%rdx, %%rdx \n"
|
"xorq %%rdx, %%rdx \n"
|
||||||
|
@ -657,13 +604,13 @@ namespace ttmath
|
||||||
"decq %%rdx \n"
|
"decq %%rdx \n"
|
||||||
|
|
||||||
"orq %%rax, %%rax \n"
|
"orq %%rax, %%rax \n"
|
||||||
"cmovnz %[amask], %%rsi \n"
|
"cmovnz %%R8, %%rsi \n"
|
||||||
|
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"rorq %%cl, (%%rbx,%%rdx,8) \n"
|
"rorq %%cl, (%%rbx,%%rdx,8) \n"
|
||||||
|
|
||||||
"movq (%%rbx,%%rdx,8), %%rax \n"
|
"movq (%%rbx,%%rdx,8), %%rax \n"
|
||||||
"andq %[amask], %%rax \n"
|
"andq %%R8, %%rax \n"
|
||||||
"xorq %%rax, (%%rbx,%%rdx,8) \n"
|
"xorq %%rax, (%%rbx,%%rdx,8) \n"
|
||||||
"orq %%rsi, (%%rbx,%%rdx,8) \n"
|
"orq %%rsi, (%%rbx,%%rdx,8) \n"
|
||||||
"movq %%rax, %%rsi \n"
|
"movq %%rax, %%rsi \n"
|
||||||
|
@ -675,13 +622,9 @@ namespace ttmath
|
||||||
"rolq $1, %%rax \n"
|
"rolq $1, %%rax \n"
|
||||||
"andq $1, %%rax \n"
|
"andq $1, %%rax \n"
|
||||||
|
|
||||||
"pop %%rdi \n"
|
: "=a" (c), "=D" (dummy), "=S" (dummy2), "=d" (dummy3)
|
||||||
"pop %%rsi \n"
|
: "0" (c), "1" (b), "b" (p1), "c" (bits)
|
||||||
"pop %%rdx \n"
|
: "%r8", "cc", "memory" );
|
||||||
|
|
||||||
: "=a" (c)
|
|
||||||
: "0" (c), "D" (b), "b" (p1), "c" (bits), [amask] "m" (mask)
|
|
||||||
: "cc", "memory" );
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue