changed: a little changes in all asm code

it should be a little faster


git-svn-id: svn://ttmath.org/publicrep/ttmath/trunk@149 e52654a7-88a9-db11-a3e9-0013d4bc506e
This commit is contained in:
Tomasz Sowa 2009-05-17 00:04:42 +00:00
parent eaa19dd46a
commit 5e5a106605
2 changed files with 236 additions and 265 deletions

View File

@ -77,9 +77,9 @@ namespace ttmath
template<uint value_size> template<uint value_size>
uint UInt<value_size>::Add(const UInt<value_size> & ss2, uint c) uint UInt<value_size>::Add(const UInt<value_size> & ss2, uint c)
{ {
register uint b = value_size; uint b = value_size;
register uint * p1 = table; uint * p1 = table;
register uint * p2 = const_cast<uint*>(ss2.table); uint * p2 = const_cast<uint*>(ss2.table);
// we don't have to use TTMATH_REFERENCE_ASSERT here // we don't have to use TTMATH_REFERENCE_ASSERT here
// this algorithm doesn't require it // this algorithm doesn't require it
@ -101,10 +101,9 @@ namespace ttmath
mov ebx,[p1] mov ebx,[p1]
mov esi,[p2] mov esi,[p2]
xor eax,eax // eax=0 xor edx,edx // edx=0
mov edx,eax // edx=0 mov eax,[c]
neg eax // CF=1 if rax!=0 , CF=0 if rax==0
sub eax,[c] // CF=c
p: p:
mov eax,[esi+edx*4] mov eax,[esi+edx*4]
@ -114,9 +113,8 @@ namespace ttmath
dec ecx dec ecx
jnz p jnz p
setc al adc ecx, ecx
movzx edx, al mov [c], ecx
mov [c], edx
pop esi pop esi
pop edx pop edx
@ -131,35 +129,27 @@ namespace ttmath
#ifdef __GNUC__ #ifdef __GNUC__
uint dummy, dummy2;
// this part should be compiled with gcc // this part should be compiled with gcc
__asm__ __volatile__( __asm__ __volatile__(
"push %%ecx \n"
"xorl %%eax, %%eax \n"
"movl %%eax, %%edx \n"
"subl %%edi, %%eax \n"
"xorl %%edx, %%edx \n"
"negl %%eax \n" // CF=1 if rax!=0 , CF=0 if rax==0
"1: \n" "1: \n"
"movl (%%esi,%%edx,4),%%eax \n" "movl (%%esi,%%edx,4), %%eax \n"
"adcl %%eax, (%%ebx,%%edx,4) \n" "adcl %%eax, (%%ebx,%%edx,4) \n"
"incl %%edx \n" "incl %%edx \n"
"decl %%ecx \n" "decl %%ecx \n"
"jnz 1b \n" "jnz 1b \n"
"setc %%al \n" "adc %%ecx, %%ecx \n"
"movzx %%al,%%edx \n"
"pop %%ecx \n"
: "=d" (c)
: "D" (c), "c" (b), "b" (p1), "S" (p2)
: "%eax", "cc", "memory" );
: "=c" (c), "=a" (dummy), "=d" (dummy2)
: "0" (b), "1" (c), "b" (p1), "S" (p2)
: "cc", "memory" );
#endif #endif
TTMATH_LOG("UInt::Add") TTMATH_LOG("UInt::Add")
@ -191,9 +181,9 @@ namespace ttmath
template<uint value_size> template<uint value_size>
uint UInt<value_size>::AddInt(uint value, uint index) uint UInt<value_size>::AddInt(uint value, uint index)
{ {
register uint b = value_size; uint b = value_size;
register uint * p1 = table; uint * p1 = table;
register uint c; uint c;
TTMATH_ASSERT( index < value_size ) TTMATH_ASSERT( index < value_size )
@ -238,11 +228,10 @@ namespace ttmath
#ifdef __GNUC__ #ifdef __GNUC__
uint dummy, dummy2;
__asm__ __volatile__( __asm__ __volatile__(
"push %%eax \n"
"push %%ecx \n"
"subl %%edx, %%ecx \n" "subl %%edx, %%ecx \n"
"1: \n" "1: \n"
@ -258,11 +247,8 @@ namespace ttmath
"setc %%al \n" "setc %%al \n"
"movzx %%al, %%edx \n" "movzx %%al, %%edx \n"
"pop %%ecx \n" : "=d" (c), "=a" (dummy), "=c" (dummy2)
"pop %%eax \n" : "0" (index), "1" (value), "2" (b), "b" (p1)
: "=d" (c)
: "a" (value), "c" (b), "0" (index), "b" (p1)
: "cc", "memory" ); : "cc", "memory" );
#endif #endif
@ -308,9 +294,9 @@ namespace ttmath
template<uint value_size> template<uint value_size>
uint UInt<value_size>::AddTwoInts(uint x2, uint x1, uint index) uint UInt<value_size>::AddTwoInts(uint x2, uint x1, uint index)
{ {
register uint b = value_size; uint b = value_size;
register uint * p1 = table; uint * p1 = table;
register uint c; uint c;
TTMATH_ASSERT( index < value_size - 1 ) TTMATH_ASSERT( index < value_size - 1 )
@ -359,11 +345,10 @@ namespace ttmath
#ifdef __GNUC__ #ifdef __GNUC__
uint dummy, dummy2;
__asm__ __volatile__( __asm__ __volatile__(
"push %%ecx \n"
"push %%edx \n"
"subl %%edx, %%ecx \n" "subl %%edx, %%ecx \n"
"addl %%esi, (%%ebx,%%edx,4) \n" "addl %%esi, (%%ebx,%%edx,4) \n"
@ -383,11 +368,8 @@ namespace ttmath
"setc %%al \n" "setc %%al \n"
"movzx %%al, %%eax \n" "movzx %%al, %%eax \n"
"pop %%edx \n" : "=a" (c), "=c" (dummy), "=d" (dummy2)
"pop %%ecx \n" : "0" (x2), "1" (b), "2" (index), "b" (p1), "S" (x1)
: "=a" (c)
: "c" (b), "d" (index), "b" (p1), "S" (x1), "0" (x2)
: "cc", "memory" ); : "cc", "memory" );
#endif #endif
@ -456,10 +438,9 @@ namespace ttmath
or ebx, ebx or ebx, ebx
jz end jz end
xor ebx, ebx xor ebx, ebx // ebx = 0
sub ebx, ecx // setting cf from ecx neg ecx // setting cf from ecx
mov ecx, [rest] // ecx is != 0 mov ecx, [rest] // ecx is != 0
mov ebx, 0
p2: p2:
mov eax, [esi+edx*4] mov eax, [esi+edx*4]
adc eax, ebx adc eax, ebx
@ -503,10 +484,9 @@ namespace ttmath
"or %%eax, %%eax \n" "or %%eax, %%eax \n"
"jz 3f \n" "jz 3f \n"
"xor %%ebx, %%ebx \n" "xor %%ebx, %%ebx \n" // ebx = 0
"sub %%ecx, %%ebx \n" // setting cf from ecx "neg %%ecx \n" // setting cf from ecx
"mov %%eax, %%ecx \n" // ecx=rest and is != 0 "mov %%eax, %%ecx \n" // ecx=rest and is != 0
"mov $0, %%ebx \n"
"2: \n" "2: \n"
"mov (%%esi, %%edx, 4), %%eax \n" "mov (%%esi, %%edx, 4), %%eax \n"
"adc %%ebx, %%eax \n" "adc %%ebx, %%eax \n"
@ -542,9 +522,9 @@ namespace ttmath
template<uint value_size> template<uint value_size>
uint UInt<value_size>::Sub(const UInt<value_size> & ss2, uint c) uint UInt<value_size>::Sub(const UInt<value_size> & ss2, uint c)
{ {
register uint b = value_size; uint b = value_size;
register uint * p1 = table; uint * p1 = table;
register uint * p2 = const_cast<uint*>(ss2.table); uint * p2 = const_cast<uint*>(ss2.table);
// we don't have to use TTMATH_REFERENCE_ASSERT here // we don't have to use TTMATH_REFERENCE_ASSERT here
// this algorithm doesn't require it // this algorithm doesn't require it
@ -564,22 +544,20 @@ namespace ttmath
mov ebx,[p1] mov ebx,[p1]
mov esi,[p2] mov esi,[p2]
xor eax, eax xor edx,edx // edx=0
mov edx, eax mov eax,[c]
neg eax // CF=1 if rax!=0 , CF=0 if rax==0
sub eax, [c]
p: p:
mov eax, [esi+edx*4] mov eax,[esi+edx*4]
sbb [ebx+edx*4], eax sbb [ebx+edx*4],eax
inc edx inc edx
dec ecx dec ecx
jnz p jnz p
setc al adc ecx, ecx
movzx edx, al mov [c], ecx
mov [c], edx
pop esi pop esi
pop edx pop edx
@ -592,30 +570,26 @@ namespace ttmath
#ifdef __GNUC__ #ifdef __GNUC__
__asm__ __volatile__( uint dummy, dummy2;
"push %%ecx \n"
"xorl %%eax, %%eax \n"
"movl %%eax, %%edx \n"
"subl %%edi, %%eax \n"
__asm__ __volatile__(
"xorl %%edx, %%edx \n"
"negl %%eax \n" // CF=1 if rax!=0 , CF=0 if rax==0
"1: \n" "1: \n"
"movl (%%esi,%%edx,4),%%eax \n" "movl (%%esi,%%edx,4), %%eax \n"
"sbbl %%eax, (%%ebx,%%edx,4) \n" "sbbl %%eax, (%%ebx,%%edx,4) \n"
"incl %%edx \n" "incl %%edx \n"
"decl %%ecx \n" "decl %%ecx \n"
"jnz 1b \n" "jnz 1b \n"
"setc %%al \n" "adc %%ecx, %%ecx \n"
"movzx %%al,%%edx \n"
"pop %%ecx \n" : "=c" (c), "=a" (dummy), "=d" (dummy2)
: "0" (b), "1" (c), "b" (p1), "S" (p2)
: "=d" (c) : "cc", "memory" );
: "D" (c), "c" (b), "b" (p1), "S" (p2)
: "%eax", "cc", "memory" );
#endif #endif
@ -649,13 +623,14 @@ namespace ttmath
template<uint value_size> template<uint value_size>
uint UInt<value_size>::SubInt(uint value, uint index) uint UInt<value_size>::SubInt(uint value, uint index)
{ {
register uint b = value_size; uint b = value_size;
register uint * p1 = table; uint * p1 = table;
register uint c; uint c;
TTMATH_ASSERT( index < value_size ) TTMATH_ASSERT( index < value_size )
#ifndef __GNUC__ #ifndef __GNUC__
__asm __asm
{ {
push eax push eax
@ -690,15 +665,15 @@ namespace ttmath
pop ebx pop ebx
pop eax pop eax
} }
#endif #endif
#ifdef __GNUC__ #ifdef __GNUC__
uint dummy, dummy2;
__asm__ __volatile__( __asm__ __volatile__(
"push %%eax \n"
"push %%ecx \n"
"subl %%edx, %%ecx \n" "subl %%edx, %%ecx \n"
"1: \n" "1: \n"
@ -714,11 +689,8 @@ namespace ttmath
"setc %%al \n" "setc %%al \n"
"movzx %%al, %%edx \n" "movzx %%al, %%edx \n"
"pop %%ecx \n" : "=d" (c), "=a" (dummy), "=c" (dummy2)
"pop %%eax \n" : "0" (index), "1" (value), "2" (b), "b" (p1)
: "=d" (c)
: "a" (value), "c" (b), "0" (index), "b" (p1)
: "cc", "memory" ); : "cc", "memory" );
#endif #endif
@ -793,10 +765,9 @@ namespace ttmath
or ebx, ebx or ebx, ebx
jz end jz end
xor ebx, ebx xor ebx, ebx // ebx = 0
sub ebx, ecx // setting cf from ecx neg ecx // setting cf from ecx
mov ecx, [rest] // ecx is != 0 mov ecx, [rest] // ecx is != 0
mov ebx, 0
p2: p2:
mov eax, [esi+edx*4] mov eax, [esi+edx*4]
sbb eax, ebx sbb eax, ebx
@ -840,10 +811,9 @@ namespace ttmath
"or %%eax, %%eax \n" "or %%eax, %%eax \n"
"jz 3f \n" "jz 3f \n"
"xor %%ebx, %%ebx \n" "xor %%ebx, %%ebx \n" // ebx = 0
"sub %%ecx, %%ebx \n" // setting cf from ecx "neg %%ecx \n" // setting cf from ecx
"mov %%eax, %%ecx \n" // ecx=rest and is != 0 "mov %%eax, %%ecx \n" // ecx=rest and is != 0
"mov $0, %%ebx \n"
"2: \n" "2: \n"
"mov (%%esi, %%edx, 4), %%eax \n" "mov (%%esi, %%edx, 4), %%eax \n"
"sbb %%ebx, %%eax \n" "sbb %%ebx, %%eax \n"
@ -884,8 +854,8 @@ namespace ttmath
template<uint value_size> template<uint value_size>
uint UInt<value_size>::Rcl2_one(uint c) uint UInt<value_size>::Rcl2_one(uint c)
{ {
register sint b = value_size; uint b = value_size;
register uint * p1 = table; uint * p1 = table;
#ifndef __GNUC__ #ifndef __GNUC__
__asm __asm
@ -895,11 +865,9 @@ namespace ttmath
push edx push edx
mov ebx, [p1] mov ebx, [p1]
xor edx, edx xor edx, edx
mov ecx, edx mov ecx, [c]
sub ecx, [c] neg ecx
mov ecx, [b] mov ecx, [b]
p: p:
@ -909,10 +877,8 @@ namespace ttmath
dec ecx dec ecx
jnz p jnz p
setc dl adc ecx, ecx
movzx edx, dl mov [c], ecx
mov [c], edx
pop edx pop edx
pop ecx pop ecx
@ -922,13 +888,12 @@ namespace ttmath
#ifdef __GNUC__ #ifdef __GNUC__
uint dummy, dummy2;
__asm__ __volatile__( __asm__ __volatile__(
"push %%edx \n"
"push %%ecx \n"
"xorl %%edx, %%edx \n" // edx=0 "xorl %%edx, %%edx \n" // edx=0
"neg %%eax \n" // CF=1 if eax!=0 , CF=0 if eax==0 "negl %%eax \n" // CF=1 if eax!=0 , CF=0 if eax==0
"1: \n" "1: \n"
"rcll $1, (%%ebx, %%edx, 4) \n" "rcll $1, (%%ebx, %%edx, 4) \n"
@ -937,14 +902,10 @@ namespace ttmath
"decl %%ecx \n" "decl %%ecx \n"
"jnz 1b \n" "jnz 1b \n"
"setc %%al \n" "adcl %%ecx, %%ecx \n"
"movzx %%al, %%eax \n"
"pop %%ecx \n" : "=c" (c), "=a" (dummy), "=d" (dummy2)
"pop %%edx \n" : "0" (b), "1" (c), "b" (p1)
: "=a" (c)
: "0" (c), "c" (b), "b" (p1)
: "cc", "memory" ); : "cc", "memory" );
#endif #endif
@ -971,8 +932,8 @@ namespace ttmath
template<uint value_size> template<uint value_size>
uint UInt<value_size>::Rcr2_one(uint c) uint UInt<value_size>::Rcr2_one(uint c)
{ {
register sint b = value_size; uint b = value_size;
register uint * p1 = table; uint * p1 = table;
#ifndef __GNUC__ #ifndef __GNUC__
__asm __asm
@ -981,10 +942,8 @@ namespace ttmath
push ecx push ecx
mov ebx, [p1] mov ebx, [p1]
mov ecx, [c]
xor ecx, ecx neg ecx
sub ecx, [c]
mov ecx, [b] mov ecx, [b]
p: p:
@ -993,8 +952,7 @@ namespace ttmath
dec ecx dec ecx
jnz p jnz p
setc cl adc ecx, ecx
movzx ecx, cl
mov [c], ecx mov [c], ecx
pop ecx pop ecx
@ -1004,11 +962,11 @@ namespace ttmath
#ifdef __GNUC__ #ifdef __GNUC__
uint dummy;
__asm__ __volatile__( __asm__ __volatile__(
"push %%ecx \n" "negl %%eax \n" // CF=1 if eax!=0 , CF=0 if eax==0
"neg %%eax \n" // CF=1 if eax!=0 , CF=0 if eax==0
"1: \n" "1: \n"
"rcrl $1, -4(%%ebx, %%ecx, 4) \n" "rcrl $1, -4(%%ebx, %%ecx, 4) \n"
@ -1016,13 +974,10 @@ namespace ttmath
"decl %%ecx \n" "decl %%ecx \n"
"jnz 1b \n" "jnz 1b \n"
"setc %%al \n" "adcl %%ecx, %%ecx \n"
"movzx %%al, %%eax \n"
"pop %%ecx \n" : "=c" (c), "=a" (dummy)
: "0" (b), "1" (c), "b" (p1)
: "=a" (c)
: "0" (c), "c" (b), "b" (p1)
: "cc", "memory" ); : "cc", "memory" );
#endif #endif
@ -1034,6 +989,13 @@ namespace ttmath
#ifdef _MSC_VER
#pragma warning (disable : 4731)
//warning C4731: frame pointer register 'ebp' modified by inline assembly code
#endif
/*! /*!
this method moves all bits into the left hand side this method moves all bits into the left hand side
return value <- this <- c return value <- this <- c
@ -1051,9 +1013,8 @@ namespace ttmath
{ {
TTMATH_ASSERT( bits>0 && bits<TTMATH_BITS_PER_UINT ) TTMATH_ASSERT( bits>0 && bits<TTMATH_BITS_PER_UINT )
register sint b = value_size; uint b = value_size;
register uint * p1 = table; uint * p1 = table;
register uint mask;
#ifndef __GNUC__ #ifndef __GNUC__
__asm __asm
@ -1064,6 +1025,7 @@ namespace ttmath
push edx push edx
push esi push esi
push edi push edi
push ebp
mov edi, [b] mov edi, [b]
@ -1071,23 +1033,23 @@ namespace ttmath
sub ecx, [bits] sub ecx, [bits]
mov edx, -1 mov edx, -1
shr edx, cl shr edx, cl
mov [mask], edx
mov ecx, [bits] mov ecx, [bits]
mov ebx, [p1] mov ebx, [p1]
xor edx, edx // edx = 0
mov esi, edx // old value = 0
mov eax, [c] mov eax, [c]
mov ebp, edx // ebp = mask (modified ebp - don't read/write to variables)
xor edx, edx // edx = 0
mov esi, edx
or eax, eax or eax, eax
cmovnz esi, [mask] // if c then old value = mask cmovnz esi, ebp // if(c) esi=mask else esi=0
p: p:
rol dword ptr [ebx+edx*4], cl rol dword ptr [ebx+edx*4], cl
mov eax, [ebx+edx*4] mov eax, [ebx+edx*4]
and eax, [mask] and eax, ebp
xor [ebx+edx*4], eax // clearing bits xor [ebx+edx*4], eax // clearing bits
or [ebx+edx*4], esi // saving old value or [ebx+edx*4], esi // saving old value
mov esi, eax mov esi, eax
@ -1096,6 +1058,8 @@ namespace ttmath
dec edi dec edi
jnz p jnz p
pop ebp // restoring ebp
and eax, 1 and eax, 1
mov [c], eax mov [c], eax
@ -1110,31 +1074,30 @@ namespace ttmath
#ifdef __GNUC__ #ifdef __GNUC__
uint dummy, dummy2, dummy3;
__asm__ __volatile__( __asm__ __volatile__(
"push %%edx \n" "push %%ebp \n"
"push %%esi \n"
"push %%edi \n"
"movl %%ecx, %%esi \n" "movl %%ecx, %%esi \n"
"movl $32, %%ecx \n" "movl $32, %%ecx \n"
"subl %%esi, %%ecx \n" "subl %%esi, %%ecx \n" // ecx = 32 - bits
"movl $-1, %%edx \n" "movl $-1, %%edx \n" // edx = -1 (all bits set to one)
"shrl %%cl, %%edx \n" "shrl %%cl, %%edx \n" // shifting (0 -> edx -> cf) (cl times)
"movl %%edx, %[amask] \n" "movl %%edx, %%ebp \n" // ebp = edx = mask
"movl %%esi, %%ecx \n" "movl %%esi, %%ecx \n"
"xorl %%edx, %%edx \n" "xorl %%edx, %%edx \n"
"movl %%edx, %%esi \n" "movl %%edx, %%esi \n"
"orl %%eax, %%eax \n" "orl %%eax, %%eax \n"
"cmovnz %[amask], %%esi \n" "cmovnz %%ebp, %%esi \n" // if(c) esi=mask else esi=0
"1: \n" "1: \n"
"roll %%cl, (%%ebx,%%edx,4) \n" "roll %%cl, (%%ebx,%%edx,4) \n"
"movl (%%ebx,%%edx,4), %%eax \n" "movl (%%ebx,%%edx,4), %%eax \n"
"andl %[amask], %%eax \n" "andl %%ebp, %%eax \n"
"xorl %%eax, (%%ebx,%%edx,4) \n" "xorl %%eax, (%%ebx,%%edx,4) \n"
"orl %%esi, (%%ebx,%%edx,4) \n" "orl %%esi, (%%ebx,%%edx,4) \n"
"movl %%eax, %%esi \n" "movl %%eax, %%esi \n"
@ -1145,12 +1108,10 @@ namespace ttmath
"and $1, %%eax \n" "and $1, %%eax \n"
"pop %%edi \n" "pop %%ebp \n"
"pop %%esi \n"
"pop %%edx \n"
: "=a" (c) : "=a" (c), "=D" (dummy), "=S" (dummy2), "=d" (dummy3)
: "0" (c), "D" (b), "b" (p1), "c" (bits), [amask] "m" (mask) : "0" (c), "1" (b), "b" (p1), "c" (bits)
: "cc", "memory" ); : "cc", "memory" );
#endif #endif
@ -1180,9 +1141,8 @@ namespace ttmath
{ {
TTMATH_ASSERT( bits>0 && bits<TTMATH_BITS_PER_UINT ) TTMATH_ASSERT( bits>0 && bits<TTMATH_BITS_PER_UINT )
register sint b = value_size; uint b = value_size;
register uint * p1 = table; uint * p1 = table;
register uint mask;
#ifndef __GNUC__ #ifndef __GNUC__
__asm __asm
@ -1193,6 +1153,7 @@ namespace ttmath
push edx push edx
push esi push esi
push edi push edi
push ebp
mov edi, [b] mov edi, [b]
@ -1200,25 +1161,25 @@ namespace ttmath
sub ecx, [bits] sub ecx, [bits]
mov edx, -1 mov edx, -1
shl edx, cl shl edx, cl
mov [mask], edx
mov ecx, [bits] mov ecx, [bits]
mov ebx, [p1] mov ebx, [p1]
xor edx, edx // edx = 0
mov esi, edx // old value = 0
add edx, edi
dec edx // edx - is pointing at the last word
mov eax, [c] mov eax, [c]
mov ebp, edx // ebp = mask (modified ebp - don't read/write to variables)
xor edx, edx // edx = 0
mov esi, edx
add edx, edi
dec edx // edx is pointing at the end of the table (on last word)
or eax, eax or eax, eax
cmovnz esi, [mask] // if c then old value = mask cmovnz esi, ebp // if(c) esi=mask else esi=0
p: p:
ror dword ptr [ebx+edx*4], cl ror dword ptr [ebx+edx*4], cl
mov eax, [ebx+edx*4] mov eax, [ebx+edx*4]
and eax, [mask] and eax, ebp
xor [ebx+edx*4], eax // clearing bits xor [ebx+edx*4], eax // clearing bits
or [ebx+edx*4], esi // saving old value or [ebx+edx*4], esi // saving old value
mov esi, eax mov esi, eax
@ -1227,7 +1188,9 @@ namespace ttmath
dec edi dec edi
jnz p jnz p
rol eax, 1 // 31bit will be first pop ebp // restoring ebp
rol eax, 1 // 31bit will be first
and eax, 1 and eax, 1
mov [c], eax mov [c], eax
@ -1242,33 +1205,32 @@ namespace ttmath
#ifdef __GNUC__ #ifdef __GNUC__
uint dummy, dummy2, dummy3;
__asm__ __volatile__( __asm__ __volatile__(
"push %%edx \n" "push %%ebp \n"
"push %%esi \n"
"push %%edi \n"
"movl %%ecx, %%esi \n" "movl %%ecx, %%esi \n"
"movl $32, %%ecx \n" "movl $32, %%ecx \n"
"subl %%esi, %%ecx \n" "subl %%esi, %%ecx \n" // ecx = 32 - bits
"movl $-1, %%edx \n" "movl $-1, %%edx \n" // edx = -1 (all bits set to one)
"shll %%cl, %%edx \n" "shll %%cl, %%edx \n" // shifting (cf <- edx <- 0) (cl times)
"movl %%edx, %[amask] \n" "movl %%edx, %%ebp \n" // ebp = edx = mask
"movl %%esi, %%ecx \n" "movl %%esi, %%ecx \n"
"xorl %%edx, %%edx \n" "xorl %%edx, %%edx \n"
"movl %%edx, %%esi \n" "movl %%edx, %%esi \n"
"addl %%edi, %%edx \n" "addl %%edi, %%edx \n"
"decl %%edx \n" "decl %%edx \n" // edx is pointing at the end of the table (on last word)
"orl %%eax, %%eax \n" "orl %%eax, %%eax \n"
"cmovnz %[amask], %%esi \n" "cmovnz %%ebp, %%esi \n" // if(c) esi=mask else esi=0
"1: \n" "1: \n"
"rorl %%cl, (%%ebx,%%edx,4) \n" "rorl %%cl, (%%ebx,%%edx,4) \n"
"movl (%%ebx,%%edx,4), %%eax \n" "movl (%%ebx,%%edx,4), %%eax \n"
"andl %[amask], %%eax \n" "andl %%ebp, %%eax \n"
"xorl %%eax, (%%ebx,%%edx,4) \n" "xorl %%eax, (%%ebx,%%edx,4) \n"
"orl %%esi, (%%ebx,%%edx,4) \n" "orl %%esi, (%%ebx,%%edx,4) \n"
"movl %%eax, %%esi \n" "movl %%eax, %%esi \n"
@ -1280,12 +1242,10 @@ namespace ttmath
"roll $1, %%eax \n" "roll $1, %%eax \n"
"andl $1, %%eax \n" "andl $1, %%eax \n"
"pop %%edi \n" "pop %%ebp \n"
"pop %%esi \n"
"pop %%edx \n"
: "=a" (c) : "=a" (c), "=D" (dummy), "=S" (dummy2), "=d" (dummy3)
: "0" (c), "D" (b), "b" (p1), "c" (bits), [amask] "m" (mask) : "0" (c), "1" (b), "b" (p1), "c" (bits)
: "cc", "memory" ); : "cc", "memory" );
#endif #endif
@ -1296,6 +1256,10 @@ namespace ttmath
} }
#ifdef _MSC_VER
#pragma warning (default : 4731)
#endif
/* /*
this method returns the number of the highest set bit in one 32-bit word this method returns the number of the highest set bit in one 32-bit word
@ -1304,7 +1268,7 @@ namespace ttmath
template<uint value_size> template<uint value_size>
sint UInt<value_size>::FindLeadingBitInWord(uint x) sint UInt<value_size>::FindLeadingBitInWord(uint x)
{ {
register sint result; sint result;
#ifndef __GNUC__ #ifndef __GNUC__
__asm __asm
@ -1324,16 +1288,17 @@ namespace ttmath
#ifdef __GNUC__ #ifdef __GNUC__
__asm__ __volatile__( uint dummy;
"bsrl %1, %0 \n" __asm__ (
"jnz 1f \n"
"movl $-1, %0 \n" "movl $-1, %1 \n"
"1: \n" "bsrl %2, %0 \n"
"cmovz %1, %0 \n"
: "=R" (result)
: "R" (x) : "=r" (result), "=&r" (dummy)
: "cc" ); : "r" (x)
: "cc" );
#endif #endif
@ -1384,15 +1349,14 @@ namespace ttmath
#ifdef __GNUC__ #ifdef __GNUC__
__asm__ __volatile__( __asm__ (
"btsl %%ebx, %%eax \n" "btsl %%ebx, %%eax \n"
"setc %%bl \n" "setc %%bl \n"
"movzx %%bl, %%ebx \n" "movzx %%bl, %%ebx \n"
: "=a" (v), "=b" (old_bit) : "=a" (v), "=b" (old_bit)
: "0" (v), "1" (bit) : "0" (v), "1" (bit)
: "cc" ); : "cc" );
#endif #endif
@ -1423,8 +1387,8 @@ namespace ttmath
this has no effect in visual studio but it's useful when this has no effect in visual studio but it's useful when
using gcc and options like -Ox using gcc and options like -Ox
*/ */
register uint result1_; uint result1_;
register uint result2_; uint result2_;
#ifndef __GNUC__ #ifndef __GNUC__
@ -1448,12 +1412,12 @@ namespace ttmath
#ifdef __GNUC__ #ifdef __GNUC__
__asm__ __volatile__( __asm__ (
"mull %%edx \n" "mull %%edx \n"
: "=a" (result1_), "=d" (result2_) : "=a" (result1_), "=d" (result2_)
: "0" (a), "1" (b) : "0" (a), "1" (b)
: "cc" ); : "cc" );
#endif #endif
@ -1491,8 +1455,8 @@ namespace ttmath
template<uint value_size> template<uint value_size>
void UInt<value_size>::DivTwoWords(uint a, uint b, uint c, uint * r, uint * rest) void UInt<value_size>::DivTwoWords(uint a, uint b, uint c, uint * r, uint * rest)
{ {
register uint r_; uint r_;
register uint rest_; uint rest_;
/* /*
these variables have similar meaning like those in these variables have similar meaning like those in
the multiplication algorithm MulTwoWords the multiplication algorithm MulTwoWords
@ -1521,12 +1485,12 @@ namespace ttmath
#ifdef __GNUC__ #ifdef __GNUC__
__asm__ __volatile__( __asm__ (
"divl %%ecx \n" "divl %%ecx \n"
: "=a" (r_), "=d" (rest_) : "=a" (r_), "=d" (rest_)
: "d" (a), "a" (b), "c" (c) : "0" (b), "1" (a), "c" (c)
: "cc" ); : "cc" );
#endif #endif

View File

@ -78,7 +78,6 @@ namespace ttmath
uint b = value_size; uint b = value_size;
uint * p1 = table; uint * p1 = table;
const uint * p2 = ss2.table; const uint * p2 = ss2.table;
uint dummy, dummy2;
// we don't have to use TTMATH_REFERENCE_ASSERT here // we don't have to use TTMATH_REFERENCE_ASSERT here
// this algorithm doesn't require it // this algorithm doesn't require it
@ -88,13 +87,15 @@ namespace ttmath
#endif #endif
#ifdef __GNUC__ #ifdef __GNUC__
uint dummy, dummy2;
/* /*
this part should be compiled with gcc this part should be compiled with gcc
*/ */
__asm__ __volatile__( __asm__ __volatile__(
"xorq %%rdx, %%rdx \n" "xorq %%rdx, %%rdx \n"
"neg %%rax \n" // CF=1 if rax!=0 , CF=0 if rax==0 "negq %%rax \n" // CF=1 if rax!=0 , CF=0 if rax==0
"1: \n" "1: \n"
"movq (%%rsi,%%rdx,8), %%rax \n" "movq (%%rsi,%%rdx,8), %%rax \n"
@ -107,7 +108,7 @@ namespace ttmath
"adcq %%rcx, %%rcx \n" "adcq %%rcx, %%rcx \n"
: "=c" (c), "=a" (dummy), "=d" (dummy2) : "=c" (c), "=a" (dummy), "=d" (dummy2)
: "0" (b), "1" (c), "b" (p1), "S" (p2) : "0" (b), "1" (c), "b" (p1), "S" (p2)
: "cc", "memory" ); : "cc", "memory" );
#endif #endif
@ -145,7 +146,6 @@ namespace ttmath
uint b = value_size; uint b = value_size;
uint * p1 = table; uint * p1 = table;
uint c; uint c;
uint dummy, dummy2;
TTMATH_ASSERT( index < value_size ) TTMATH_ASSERT( index < value_size )
@ -154,7 +154,8 @@ namespace ttmath
#endif #endif
#ifdef __GNUC__ #ifdef __GNUC__
uint dummy, dummy2;
__asm__ __volatile__( __asm__ __volatile__(
"subq %%rdx, %%rcx \n" "subq %%rdx, %%rcx \n"
@ -172,8 +173,8 @@ namespace ttmath
"setc %%al \n" "setc %%al \n"
"movzx %%al, %%rdx \n" "movzx %%al, %%rdx \n"
: "=d" (c), "=a" (dummy), "=c" (dummy2) : "=d" (c), "=a" (dummy), "=c" (dummy2)
: "a" (value), "c" (b), "0" (index), "b" (p1) : "0" (index), "1" (value), "2" (b), "b" (p1)
: "cc", "memory" ); : "cc", "memory" );
#endif #endif
@ -223,7 +224,6 @@ namespace ttmath
uint b = value_size; uint b = value_size;
uint * p1 = table; uint * p1 = table;
uint c; uint c;
uint dummy, dummy2;
TTMATH_ASSERT( index < value_size - 1 ) TTMATH_ASSERT( index < value_size - 1 )
@ -232,6 +232,8 @@ namespace ttmath
#endif #endif
#ifdef __GNUC__ #ifdef __GNUC__
uint dummy, dummy2;
__asm__ __volatile__( __asm__ __volatile__(
"subq %%rdx, %%rcx \n" "subq %%rdx, %%rcx \n"
@ -254,7 +256,7 @@ namespace ttmath
"movzx %%al, %%rax \n" "movzx %%al, %%rax \n"
: "=a" (c), "=c" (dummy), "=d" (dummy2) : "=a" (c), "=c" (dummy), "=d" (dummy2)
: "1" (b), "2" (index), "b" (p1), "S" (x1), "0" (x2) : "0" (x2), "1" (b), "2" (index), "b" (p1), "S" (x1)
: "cc", "memory" ); : "cc", "memory" );
#endif #endif
@ -299,10 +301,10 @@ namespace ttmath
#endif #endif
#ifdef __GNUC__ #ifdef __GNUC__
uint dummy1, dummy2, dummy3;
// this part should be compiled with gcc // this part should be compiled with gcc
uint dummy1, dummy2, dummy3;
__asm__ __volatile__( __asm__ __volatile__(
"mov %%rdx, %%r8 \n" "mov %%rdx, %%r8 \n"
"xor %%rdx, %%rdx \n" // rdx = 0, cf = 0 "xor %%rdx, %%rdx \n" // rdx = 0, cf = 0
@ -320,10 +322,9 @@ namespace ttmath
"or %%r8, %%r8 \n" "or %%r8, %%r8 \n"
"jz 3f \n" "jz 3f \n"
"xor %%rbx, %%rbx \n" "xor %%rbx, %%rbx \n" // ebx = 0
"sub %%rcx, %%rbx \n" // setting cf from rcx "neg %%rcx \n" // setting cf from rcx
"mov %%r8, %%rcx \n" // rcx=rest and is != 0 "mov %%r8, %%rcx \n" // rcx=rest and is != 0
"mov $0, %%rbx \n"
"2: \n" "2: \n"
"mov (%%rsi, %%rdx, 8), %%rax \n" "mov (%%rsi, %%rdx, 8), %%rax \n"
"adc %%rbx, %%rax \n" "adc %%rbx, %%rax \n"
@ -367,7 +368,7 @@ namespace ttmath
uint b = value_size; uint b = value_size;
uint * p1 = table; uint * p1 = table;
const uint * p2 = ss2.table; const uint * p2 = ss2.table;
uint dummy, dummy2;
// we don't have to use TTMATH_REFERENCE_ASSERT here // we don't have to use TTMATH_REFERENCE_ASSERT here
// this algorithm doesn't require it // this algorithm doesn't require it
@ -377,10 +378,12 @@ namespace ttmath
#endif #endif
#ifdef __GNUC__ #ifdef __GNUC__
uint dummy, dummy2;
__asm__ __volatile__( __asm__ __volatile__(
"xorq %%rdx, %%rdx \n" "xorq %%rdx, %%rdx \n"
"neg %%rax \n" // CF=1 if rax!=0 , CF=0 if rax==0 "negq %%rax \n" // CF=1 if rax!=0 , CF=0 if rax==0
"1: \n" "1: \n"
"movq (%%rsi,%%rdx,8), %%rax \n" "movq (%%rsi,%%rdx,8), %%rax \n"
@ -393,10 +396,9 @@ namespace ttmath
"adcq %%rcx, %%rcx \n" "adcq %%rcx, %%rcx \n"
: "=c" (c), "=a" (dummy), "=d" (dummy2) : "=c" (c), "=a" (dummy), "=d" (dummy2)
: "0" (b), "1" (c), "b" (p1), "S" (p2) : "0" (b), "1" (c), "b" (p1), "S" (p2)
: "cc", "memory" ); : "cc", "memory" );
#endif #endif
TTMATH_LOG("UInt::Sub") TTMATH_LOG("UInt::Sub")
@ -456,8 +458,8 @@ namespace ttmath
"setc %%al \n" "setc %%al \n"
"movzx %%al, %%rdx \n" "movzx %%al, %%rdx \n"
: "=d" (c), "=a" (dummy), "=c" (dummy2) : "=d" (c), "=a" (dummy), "=c" (dummy2)
: "1" (value), "2" (b), "0" (index), "b" (p1) : "0" (index), "1" (value), "2" (b), "b" (p1)
: "cc", "memory" ); : "cc", "memory" );
#endif #endif
@ -528,10 +530,9 @@ namespace ttmath
"or %%r8, %%r8 \n" "or %%r8, %%r8 \n"
"jz 3f \n" "jz 3f \n"
"xor %%rbx, %%rbx \n" "xor %%rbx, %%rbx \n" // ebx = 0
"sub %%rcx, %%rbx \n" // setting cf from rcx "neg %%rcx \n" // setting cf from rcx
"mov %%r8, %%rcx \n" // rcx=rest and is != 0 "mov %%r8, %%rcx \n" // rcx=rest and is != 0
"mov $0, %%rbx \n"
"2: \n" "2: \n"
"mov (%%rsi, %%rdx, 8), %%rax \n" "mov (%%rsi, %%rdx, 8), %%rax \n"
"sbb %%rbx, %%rax \n" "sbb %%rbx, %%rax \n"
@ -576,17 +577,19 @@ namespace ttmath
{ {
sint b = value_size; sint b = value_size;
uint * p1 = table; uint * p1 = table;
uint dummy, dummy2;
#ifndef __GNUC__ #ifndef __GNUC__
#error "another compiler than GCC is currently not supported in 64bit mode" #error "another compiler than GCC is currently not supported in 64bit mode"
#endif #endif
#ifdef __GNUC__ #ifdef __GNUC__
uint dummy, dummy2;
__asm__ __volatile__( __asm__ __volatile__(
"xorq %%rdx, %%rdx \n" // rdx=0 "xorq %%rdx, %%rdx \n" // rdx=0
"neg %%rax \n" // CF=1 if rax!=0 , CF=0 if rax==0 "negq %%rax \n" // CF=1 if rax!=0 , CF=0 if rax==0
"1: \n" "1: \n"
"rclq $1, (%%rbx, %%rdx, 8) \n" "rclq $1, (%%rbx, %%rdx, 8) \n"
@ -598,7 +601,7 @@ namespace ttmath
"adcq %%rcx, %%rcx \n" "adcq %%rcx, %%rcx \n"
: "=c" (c), "=a" (dummy), "=d" (dummy2) : "=c" (c), "=a" (dummy), "=d" (dummy2)
: "1" (c), "0" (b), "b" (p1) : "0" (b), "1" (c), "b" (p1)
: "cc", "memory" ); : "cc", "memory" );
#endif #endif
@ -628,16 +631,18 @@ namespace ttmath
{ {
sint b = value_size; sint b = value_size;
uint * p1 = table; uint * p1 = table;
uint dummy;
#ifndef __GNUC__ #ifndef __GNUC__
#error "another compiler than GCC is currently not supported in 64bit mode" #error "another compiler than GCC is currently not supported in 64bit mode"
#endif #endif
#ifdef __GNUC__ #ifdef __GNUC__
uint dummy;
__asm__ __volatile__( __asm__ __volatile__(
"neg %%rax \n" // CF=1 if rax!=0 , CF=0 if rax==0 "negq %%rax \n" // CF=1 if rax!=0 , CF=0 if rax==0
"1: \n" "1: \n"
"rcrq $1, -8(%%rbx, %%rcx, 8) \n" "rcrq $1, -8(%%rbx, %%rcx, 8) \n"
@ -648,7 +653,7 @@ namespace ttmath
"adcq %%rcx, %%rcx \n" "adcq %%rcx, %%rcx \n"
: "=c" (c), "=a" (dummy) : "=c" (c), "=a" (dummy)
: "1" (c), "0" (b), "b" (p1) : "0" (b), "1" (c), "b" (p1)
: "cc", "memory" ); : "cc", "memory" );
#endif #endif
@ -681,13 +686,15 @@ namespace ttmath
uint b = value_size; uint b = value_size;
uint * p1 = table; uint * p1 = table;
uint dummy, dummy2, dummy3;
#ifndef __GNUC__ #ifndef __GNUC__
#error "another compiler than GCC is currently not supported in 64bit mode" #error "another compiler than GCC is currently not supported in 64bit mode"
#endif #endif
#ifdef __GNUC__ #ifdef __GNUC__
uint dummy, dummy2, dummy3;
__asm__ __volatile__( __asm__ __volatile__(
"movq %%rcx, %%rsi \n" "movq %%rcx, %%rsi \n"
@ -700,7 +707,6 @@ namespace ttmath
"xorq %%rdx, %%rdx \n" "xorq %%rdx, %%rdx \n"
"movq %%rdx, %%rsi \n" "movq %%rdx, %%rsi \n"
"orq %%rax, %%rax \n" "orq %%rax, %%rax \n"
"cmovnz %%r8, %%rsi \n" "cmovnz %%r8, %%rsi \n"
@ -720,7 +726,7 @@ namespace ttmath
"and $1, %%rax \n" "and $1, %%rax \n"
: "=a" (c), "=D" (dummy), "=S" (dummy2), "=d" (dummy3) : "=a" (c), "=D" (dummy), "=S" (dummy2), "=d" (dummy3)
: "0" (c), "1" (b), "b" (p1), "c" (bits) : "0" (c), "1" (b), "b" (p1), "c" (bits)
: "%r8", "cc", "memory" ); : "%r8", "cc", "memory" );
#endif #endif
@ -774,7 +780,6 @@ namespace ttmath
"movq %%rdx, %%rsi \n" "movq %%rdx, %%rsi \n"
"addq %%rdi, %%rdx \n" "addq %%rdi, %%rdx \n"
"decq %%rdx \n" "decq %%rdx \n"
"orq %%rax, %%rax \n" "orq %%rax, %%rax \n"
"cmovnz %%R8, %%rsi \n" "cmovnz %%R8, %%rsi \n"
@ -782,7 +787,7 @@ namespace ttmath
"rorq %%cl, (%%rbx,%%rdx,8) \n" "rorq %%cl, (%%rbx,%%rdx,8) \n"
"movq (%%rbx,%%rdx,8), %%rax \n" "movq (%%rbx,%%rdx,8), %%rax \n"
"andq %%R8, %%rax \n" "andq %%R8, %%rax \n"
"xorq %%rax, (%%rbx,%%rdx,8) \n" "xorq %%rax, (%%rbx,%%rdx,8) \n"
"orq %%rsi, (%%rbx,%%rdx,8) \n" "orq %%rsi, (%%rbx,%%rdx,8) \n"
"movq %%rax, %%rsi \n" "movq %%rax, %%rsi \n"
@ -815,23 +820,25 @@ namespace ttmath
template<uint value_size> template<uint value_size>
sint UInt<value_size>::FindLeadingBitInWord(uint x) sint UInt<value_size>::FindLeadingBitInWord(uint x)
{ {
register sint result; sint result;
#ifndef __GNUC__ #ifndef __GNUC__
#error "another compiler than GCC is currently not supported in 64bit mode" #error "another compiler than GCC is currently not supported in 64bit mode"
#endif #endif
#ifdef __GNUC__ #ifdef __GNUC__
__asm__ __volatile__( uint dummy;
"bsrq %1, %0 \n" __asm__ (
"jnz 1f \n"
"movq $-1, %0 \n" "movq $-1, %1 \n"
"1: \n" "bsrq %2, %0 \n"
"cmovz %1, %0 \n"
: "=R" (result)
: "R" (x) : "=r" (result), "=&r" (dummy)
: "cc" ); : "r" (x)
: "cc" );
#endif #endif
@ -867,15 +874,15 @@ namespace ttmath
#endif #endif
#ifdef __GNUC__ #ifdef __GNUC__
__asm__ __volatile__(
__asm__ (
"btsq %%rbx, %%rax \n" "btsq %%rbx, %%rax \n"
"setc %%bl \n" "setc %%bl \n"
"movzx %%bl, %%rbx \n" "movzx %%bl, %%rbx \n"
: "=a" (v), "=b" (old_bit) : "=a" (v), "=b" (old_bit)
: "0" (v), "1" (bit) : "0" (v), "1" (bit)
: "cc" ); : "cc" );
#endif #endif
@ -914,8 +921,8 @@ namespace ttmath
this has no effect in visual studio but it's usefull when this has no effect in visual studio but it's usefull when
using gcc and options like -O using gcc and options like -O
*/ */
register uint result1_; uint result1_;
register uint result2_; uint result2_;
#ifndef __GNUC__ #ifndef __GNUC__
#error "another compiler than GCC is currently not supported in 64bit mode" #error "another compiler than GCC is currently not supported in 64bit mode"
@ -923,12 +930,12 @@ namespace ttmath
#ifdef __GNUC__ #ifdef __GNUC__
__asm__ __volatile__( __asm__ (
"mulq %%rdx \n" "mulq %%rdx \n"
: "=a" (result1_), "=d" (result2_) : "=a" (result1_), "=d" (result2_)
: "0" (a), "1" (b) : "0" (a), "1" (b)
: "cc" ); : "cc" );
#endif #endif
@ -965,8 +972,8 @@ namespace ttmath
template<uint value_size> template<uint value_size>
void UInt<value_size>::DivTwoWords(uint a,uint b, uint c, uint * r, uint * rest) void UInt<value_size>::DivTwoWords(uint a,uint b, uint c, uint * r, uint * rest)
{ {
register uint r_; uint r_;
register uint rest_; uint rest_;
/* /*
these variables have similar meaning like those in these variables have similar meaning like those in
the multiplication algorithm MulTwoWords the multiplication algorithm MulTwoWords
@ -980,7 +987,7 @@ namespace ttmath
#ifdef __GNUC__ #ifdef __GNUC__
__asm__ __volatile__( __asm__ (
"divq %%rcx \n" "divq %%rcx \n"