some optimisations made in assembler code by thomasbraby at zoom.co.uk

(not verified yet)
modified files: ttmathuint.h ttmathuint64.h
I've changed a little the intel syntax (it didn't want to compile)


git-svn-id: svn://ttmath.org/publicrep/ttmath/trunk@78 e52654a7-88a9-db11-a3e9-0013d4bc506e
This commit is contained in:
Tomasz Sowa 2008-10-17 09:57:36 +00:00
parent 98c2379182
commit cdd95f602c
2 changed files with 179 additions and 147 deletions

View File

@ -262,29 +262,32 @@ public:
mov ebx,[p1] mov ebx,[p1]
mov edx,[p2] mov edx,[p2]
mov eax,0 xor eax,eax
sub eax,[c] sub eax,[c]
lahf
p: p:
sahf
mov eax,[ebx] mov eax,[ebx]
adc eax,[edx] adc eax,[edx]
mov [ebx],eax mov [ebx],eax
lahf
add ebx,4
add edx,4
inc ebx sub ecx,1
inc ebx jnz p
inc ebx
inc ebx
inc edx test ah,1
inc edx setnz al
inc edx
inc edx //
// movzx dword ptr [c],al
loop p //
movzx edx, al
mov eax,0 mov [c], edx
adc eax,eax //
mov [c],eax
pop edx pop edx
pop ecx pop ecx
@ -304,29 +307,26 @@ public:
"push %%ecx \n" "push %%ecx \n"
"push %%edx \n" "push %%edx \n"
"movl $0, %%eax \n" "xorl %%eax, %%eax \n"
"subl %%esi, %%eax \n" "subl %%esi, %%eax \n"
"lahf \n"
"1: \n" "1: \n"
"sahf \n"
"movl (%%ebx),%%eax \n" "movl (%%ebx),%%eax \n"
"adcl (%%edx),%%eax \n" "adcl (%%edx),%%eax \n"
"movl %%eax,(%%ebx) \n" "movl %%eax,(%%ebx) \n"
"lahf \n"
"inc %%ebx \n" "add $4,%%ebx \n"
"inc %%ebx \n" "add $4,%%edx \n"
"inc %%ebx \n"
"inc %%ebx \n"
"inc %%edx \n"
"inc %%edx \n"
"inc %%edx \n"
"inc %%edx \n"
"loop 1b \n"
"movl $0, %%eax \n" "subl $1,%%ecx \n"
"adcl %%eax,%%eax \n" "jnz 1b \n"
"movl %%eax, %%esi \n"
"test $1,%%ah \n"
"setnz %%al \n"
"movzx %%al,%%esi \n"
"pop %%edx \n" "pop %%edx \n"
"pop %%ecx \n" "pop %%ecx \n"
@ -375,6 +375,7 @@ public:
push ebx push ebx
push ecx push ecx
push edx push edx
push edi
mov ecx, [b] mov ecx, [b]
sub ecx, [index] sub ecx, [index]
@ -385,28 +386,37 @@ public:
lea ebx, [eax+4*edx] lea ebx, [eax+4*edx]
mov edx, [value] mov edx, [value]
mov edi,1
clc clc
lahf
p: p:
sahf ; restore flags
mov eax, [ebx] mov eax, [ebx]
adc eax, edx adc eax, edx
mov [ebx], eax mov [ebx], eax
lahf ; save flags
jnc end
mov edx, 0
inc ebx cmovnc ecx,edi
inc ebx xor edx,edx
inc ebx add ebx,4
inc ebx
loop p sub ecx,1
jnz p
end: // end:
mov eax,0 test ah,1
adc eax,eax setnz al
mov [c],eax
//
// movzx dword ptr [c],al
//
movzx edx, al
mov [c], edx
//
pop edi
pop edx pop edx
pop ecx pop ecx
pop ebx pop ebx
@ -421,35 +431,38 @@ public:
"push %%ebx \n" "push %%ebx \n"
"push %%ecx \n" "push %%ecx \n"
"push %%edx \n" "push %%edx \n"
"push %%edi \n"
"subl %%edx, %%ecx \n" "subl %%edx, %%ecx \n"
"leal (%%ebx,%%edx,4), %%ebx \n" "leal (%%ebx,%%edx,4), %%ebx \n"
"movl %%esi, %%edx \n" "movl %%esi, %%edx \n"
"movl $1, %%edi \n"
"clc \n" "clc \n"
"lahf \n"
"1: \n" "1: \n"
"sahf \n"
"movl (%%ebx), %%eax \n" "movl (%%ebx), %%eax \n"
"adcl %%edx, %%eax \n" "adcl %%edx, %%eax \n"
"movl %%eax, (%%ebx) \n" "movl %%eax, (%%ebx) \n"
"lahf \n"
"jnc 2f \n" "cmovnc %%edi,%%ecx \n"
"movl $0, %%edx \n" "xorl %%edx, %%edx \n"
"inc %%ebx \n" "addl $4,%%ebx \n"
"inc %%ebx \n"
"inc %%ebx \n"
"inc %%ebx \n"
"loop 1b \n" "subl $1,%%ecx \n"
"jnz 1b \n"
"2: \n"
"movl $0, %%eax \n" "test $1,%%ah \n"
"adcl %%eax,%%eax \n" "setnz %%al \n"
"movzx %%al,%%eax \n"
"pop %%edi \n"
"pop %%edx \n" "pop %%edx \n"
"pop %%ecx \n" "pop %%ecx \n"
"pop %%ebx \n" "pop %%ebx \n"
@ -518,19 +531,18 @@ public:
lea ebx, [eax+4*edx] lea ebx, [eax+4*edx]
mov edx, 0 xor edx,edx
mov eax, [ebx] mov eax, [ebx]
add eax, [x1] add eax, [x1]
mov [ebx], eax mov [ebx], eax
inc ebx setc al
inc ebx movzx eax,al
inc ebx add ebx,4
inc ebx
mov eax, [ebx] add eax, [ebx]
adc eax, [x2] add eax, [x2]
mov [ebx], eax mov [ebx], eax
jnc end jnc end
@ -554,9 +566,14 @@ public:
end: end:
mov eax,0 setc al
adc eax,eax
mov [c],eax //
// movzx dword ptr [c],al
//
movzx edx, al
mov [c], edx
//
pop edx pop edx
pop ecx pop ecx
@ -577,7 +594,7 @@ public:
"leal (%%ebx,%%edx,4), %%ebx \n" "leal (%%ebx,%%edx,4), %%ebx \n"
"movl $0, %%edx \n" "xorl %%edx, %%edx \n"
"movl (%%ebx), %%eax \n" "movl (%%ebx), %%eax \n"
"addl %%esi, %%eax \n" "addl %%esi, %%eax \n"
@ -613,8 +630,8 @@ public:
"2: \n" "2: \n"
"movl $0, %%eax \n" "setc %%al \n"
"adcl %%eax,%%eax \n" "movzx %%al,%%eax \n"
"pop %%edx \n" "pop %%edx \n"
"pop %%ecx \n" "pop %%ecx \n"
@ -929,23 +946,29 @@ private:
mov ecx, [b] mov ecx, [b]
mov ebx, [p1] mov ebx, [p1]
lahf
p: p:
sahf
rcl dword ptr[ebx],1 rcl dword ptr[ebx],1
lahf
inc ebx add ebx,4
inc ebx
inc ebx
inc ebx
loop p sub ecx,1
jnz p
dec edx
sub edx,1
jnz a jnz a
mov eax,0 sahf
adc eax,eax setc al
mov [c],eax
//
// movzx dword ptr [c],al
//
movzx edx, al
mov [c], edx
//
pop edx pop edx
pop ecx pop ecx
@ -968,25 +991,26 @@ private:
"push %%ebx \n" "push %%ebx \n"
"push %%ecx \n" "push %%ecx \n"
"lahf \n"
"1: \n" "1: \n"
"sahf \n"
"rcll $1,(%%ebx) \n" "rcll $1,(%%ebx) \n"
"lahf \n"
"inc %%ebx \n"
"inc %%ebx \n" "addl $4,%%ebx \n"
"inc %%ebx \n"
"inc %%ebx \n" "subl $1,%%ecx \n"
"jnz 1b \n"
"loop 1b \n"
"pop %%ecx \n" "pop %%ecx \n"
"pop %%ebx \n" "pop %%ebx \n"
"decl %%esi \n" "subl $1,%%esi \n"
"jnz 2b \n" "jnz 2b \n"
"movl $0, %%edx \n" "sahf \n"
"adcl %%edx, %%edx \n" "setc %%dl \n"
"movzx %%dl, %%edx \n"
"pop %%esi \n" "pop %%esi \n"
@ -1043,23 +1067,29 @@ private:
mov ecx,[b] mov ecx,[b]
lea ebx,[ebx+4*ecx] lea ebx,[ebx+4*ecx]
lahf
p: p:
dec ebx sub ebx,4
dec ebx
dec ebx
dec ebx
sahf
rcr dword ptr [ebx],1 rcr dword ptr [ebx],1
lahf
loop p sub ecx,1
jnz p
dec edx sub edx,1
jnz a jnz a
mov eax,0 sahf
adc eax,eax setc al
mov [c],eax
//
// movzx dword ptr [c],al
//
movzx edx, al
mov [c], edx
//
pop edx pop edx
pop ecx pop ecx
@ -1085,25 +1115,27 @@ private:
"xorl %%eax, %%eax \n" "xorl %%eax, %%eax \n"
"subl %%edx, %%eax \n" "subl %%edx, %%eax \n"
"lahf \n"
"1: \n" "1: \n"
"dec %%ebx \n" "subl $4,%%ebx \n"
"dec %%ebx \n"
"dec %%ebx \n" "sahf \n"
"dec %%ebx \n"
"rcrl $1,(%%ebx) \n" "rcrl $1,(%%ebx) \n"
"lahf \n"
"loop 1b \n"
"subl $1,%%ecx \n"
"jnz 1b \n"
"pop %%ecx \n" "pop %%ecx \n"
"pop %%ebx \n" "pop %%ebx \n"
"decl %%esi \n" "subl $1,%%esi \n"
"jnz 2b \n" "jnz 2b \n"
"movl $0, %%edx \n" "sahf \n"
"adcl %%edx, %%edx \n" "setc %%dl \n"
"movzx %%dl, %%edx \n"
"pop %%esi \n" "pop %%esi \n"
@ -1365,13 +1397,17 @@ public:
__asm __asm
{ {
push eax push eax
push edx
and edx,-1
bsr eax, x bsr eax, x
jnz found cmovz eax,edx
mov eax, -1
found:
mov result, eax mov result, eax
//
pop edx
//
pop eax pop eax
} }
#endif #endif
@ -1380,10 +1416,11 @@ public:
#ifdef __GNUC__ #ifdef __GNUC__
__asm__ __volatile__( __asm__ __volatile__(
"push %%edx \n"
"andl $-1,%%edx \n"
"bsrl %1, %0 \n" "bsrl %1, %0 \n"
"jnz 1f \n" "cmovz %%edx,%0 \n"
"movl $-1, %0 \n" "pop %%edx \n"
"1: \n"
: "=R" (result) : "=R" (result)
: "R" (x) : "R" (x)
@ -1594,7 +1631,7 @@ public:
that value pointed with result1 and result2 has changed that value pointed with result1 and result2 has changed
this has no effect in visual studio but it's usefull when this has no effect in visual studio but it's usefull when
using gcc and options like -O using gcc and options like -Ox
*/ */
register uint result1_; register uint result1_;
register uint result2_; register uint result2_;
@ -2736,7 +2773,7 @@ public:
/*! /*!
* *
* convertion method * conversion method
* *
*/ */

View File

@ -680,29 +680,26 @@ namespace ttmath
"push %%rbx \n" "push %%rbx \n"
"push %%rcx \n" "push %%rcx \n"
"lahf \n"
"1: \n" "1: \n"
"sahf \n"
"rclq $1,(%%rbx) \n" "rclq $1,(%%rbx) \n"
"lahf \n"
"inc %%rbx \n"
"inc %%rbx \n" "addq $8,%%rbx \n"
"inc %%rbx \n"
"inc %%rbx \n" "subq $1,%%rcx \n"
"inc %%rbx \n"
"inc %%rbx \n"
"inc %%rbx \n"
"inc %%rbx \n"
"loop 1b \n" "loop 1b \n"
"pop %%rcx \n" "pop %%rcx \n"
"pop %%rbx \n" "pop %%rbx \n"
"decq %%rsi \n" "subq $1,%%rsi \n"
"jnz 2b \n" "jnz 2b \n"
"movq $0, %%rdx \n" "xor %%rdx,%%rdx \n"
"adcq %%rdx, %%rdx \n" "sahf \n"
"setc %%dl \n"
"pop %%rsi \n" "pop %%rsi \n"
@ -765,29 +762,27 @@ namespace ttmath
"xorq %%rax, %%rax \n" "xorq %%rax, %%rax \n"
"subq %%rdx, %%rax \n" "subq %%rdx, %%rax \n"
"lahf \n"
"1: \n" "1: \n"
"dec %%rbx \n" "subq $8, %%rbx \n"
"dec %%rbx \n"
"dec %%rbx \n" "sahf \n"
"dec %%rbx \n"
"dec %%rbx \n"
"dec %%rbx \n"
"dec %%rbx \n"
"dec %%rbx \n"
"rcrq $1,(%%rbx) \n" "rcrq $1,(%%rbx) \n"
"lahf \n"
"loop 1b \n"
"subq $1,%%rcx \n"
"jnz 1b \n"
"pop %%rcx \n" "pop %%rcx \n"
"pop %%rbx \n" "pop %%rbx \n"
"decq %%rsi \n" "subq $1,%%rsi \n"
"jnz 2b \n" "jnz 2b \n"
"movq $0, %%rdx \n" "xor %%rdx,%%rdx \n"
"adcq %%rdx,%%rdx \n" "sahf \n"
"setc %%dl \n"
"pop %%rsi \n" "pop %%rsi \n"
@ -820,11 +815,11 @@ namespace ttmath
#ifdef __GNUC__ #ifdef __GNUC__
__asm__ __volatile__( __asm__ __volatile__(
"push %%rdx \n"
"andq $-1,%%rdx \n"
"bsrq %%rbx, %%rax \n" "bsrq %%rbx, %%rax \n"
"jnz 1f \n" "cmovz %%rdx,%%rax \n"
"movq $-1, %%rax \n" "pop %%rdx \n"
"1: \n"
: "=a" (result) : "=a" (result)
: "b" (x) : "b" (x)