changed: asm code in UInt::Add, UInt::AddInt, AddTwoInts

32 and 64 bits, much faster now
added:   tests for UInt::AddInt
fixed:   tests: test_lahf() returned incorrect value for 32bit platform


git-svn-id: svn://ttmath.org/publicrep/ttmath/trunk@82 e52654a7-88a9-db11-a3e9-0013d4bc506e
This commit is contained in:
Tomasz Sowa 2008-10-25 20:05:51 +00:00
parent f1115a2ce9
commit cfd719cca2
7 changed files with 280 additions and 320 deletions

View File

@ -1,6 +1,6 @@
CC = g++
o = main.o uinttest.o
CFLAGS = -Wall -pedantic
CFLAGS = -Wall
ttmath = ..
name = tests

View File

@ -40,6 +40,7 @@
#include <ttmath/ttmath.h>
#include "uinttest.h"
//#include <windows.h>
const char uint_tests_file[] = "tests.uint32";
@ -90,9 +91,13 @@ bool test_lahf()
std::cout << "fail" << std::endl;
return false;
#endif
return false;
// 32bit platform
return true;
}
@ -117,7 +122,5 @@ using namespace ttmath;
return 0;
}

View File

@ -22,6 +22,42 @@ add 192 192 6277101735386680763835789423207666416102355444464034
add 192 192 6277101735386680763835789423207666416102355444464034512895 0 6277101735386680763835789423207666416102355444464034512895 0
# AddInt
# min_bits max_bits bits_per_int a b(int) index result carry
addint 32 0 32 0 0 0 0 0
addint 32 0 32 1000 2000 0 3000 0
addint 64 0 32 562342345 1423445 1 6113650284997065 0
addint 64 0 32 5342342455 3423553423 0 8765895878 0
addint 96 0 32 478895734 46756734 2 862509505820513898647477878 0
addint 128 0 32 27370506140054471803784984408165997441 24543 3 27372450636847059393422542757339093889 0
addint 128 128 32 340282366841711102552375003685868034945 2234543 3 177038656721750864719686733515479937 1
addint 160 160 32 1461501637330902918124457471805283415910032366465 3 3 158457126631793409034731674497 1
addint 192 0 32 6277101735386680763835789423128439055191355840718134336385 3354 1 6277101735386680763835789423128439055191355855123454647169 0
addint 192 192 32 6277101735386680763835789423128439055191355840718134336385 3354 5 4901876491607848387655079701569502248322251848964993 1
addint 64 0 64 0 0 0 0 0
addint 64 0 64 5342342 345534234 0 350876576 0
addint 64 0 64 5342342455 34235534234 0 39577876689 0
addint 64 64 64 18446744073709550615 2000 0 999 1
addint 128 0 64 42895062544824211012058135 3453234 0 42895062544824211015511369 0
addint 128 0 64 42895062544824211012058135 456234234 1 8458931214807741031021280279 0
addint 128 128 64 340282366920938426569886460012664978455 45623 1 804702316727431770143767 1
addint 192 192 64 6277101735386680763835789423207666379208867297044931279895 45623234 1 841563227924816702308613143 1
addint 192 192 64 6277101735386679588840776445207152040176347835149297122327 45623234 2 15523607057094857017675614218510090830281178135 1
addint 192 192 64 6277101735386680763835789423207666416102355444464034512895 1 0 0 1

View File

@ -46,9 +46,9 @@ void UIntTest::set_file_name(const std::string & f)
}
int UIntTest::read_int()
uuint UIntTest::read_uint()
{
int result = 0;
uuint result = 0;
skip_white_characters();
@ -60,39 +60,55 @@ return result;
template<unsigned int type_size>
void UIntTest::test_add()
bool UIntTest::check_minmax_bits(int type_size)
{
using namespace ttmath;
UInt<type_size> a,b,result, new_result;
int min_bits = read_int();
int max_bits = read_int();
int min_bits = read_uint();
int max_bits = read_uint();
if( min_bits != 0 && type_size * TTMATH_BITS_PER_UINT < (unsigned int)min_bits )
return;
return false;
if( max_bits != 0 && type_size * TTMATH_BITS_PER_UINT > (unsigned int)max_bits )
return;
return false;
a.FromString(pline, 10, &pline);
b.FromString(pline, 10, &pline);
result.FromString(pline, 10, &pline);
int carry = read_int();
return true;
}
std::cerr << '[' << row << "] Add<" << type_size << ">: ";
bool UIntTest::check_minmax_bits_bitperint(int type_size)
{
if( !check_minmax_bits(type_size) )
return false;
int bits = read_uint();
if( TTMATH_BITS_PER_UINT != bits )
return false;
return true;
}
bool UIntTest::check_end()
{
skip_white_characters();
if( *pline!='#' && *pline!= 0 )
{
std::cerr << "syntax error" << std::endl;
return;
return false;
}
new_result = a;
int new_carry = new_result.Add(b);
bool ok = true;
return true;
}
template<uuint type_size>
bool UIntTest::check_result_carry(const ttmath::UInt<type_size> & result, const ttmath::UInt<type_size> & new_result,
int carry, int new_carry)
{
bool ok = true;
if( new_carry != carry )
{
@ -106,15 +122,67 @@ using namespace ttmath;
ok = false;
}
if( ok )
{
std::cerr << "ok" << std::endl;
}
return ok;
}
template<uuint type_size>
void UIntTest::test_add()
{
UInt<type_size> a,b,result, new_result;
if( !check_minmax_bits(type_size) )
return;
a.FromString(pline, 10, &pline);
b.FromString(pline, 10, &pline);
result.FromString(pline, 10, &pline);
int carry = read_uint();
std::cerr << '[' << row << "] Add<" << type_size << ">: ";
if( !check_end() )
return;
new_result = a;
int new_carry = new_result.Add(b);
if( check_result_carry(result, new_result, carry, new_carry) )
std::cerr << "ok" << std::endl;
}
template<uuint type_size>
void UIntTest::test_addint()
{
UInt<type_size> a, result, new_result;
if( !check_minmax_bits_bitperint(type_size) )
return;
a.FromString(pline, 10, &pline);
uuint b = read_uint();
uuint index = read_uint();
result.FromString(pline, 10, &pline);
int carry = read_uint();
std::cerr << '[' << row << "] AddInt<" << type_size << ">: ";
if( !check_end() )
return;
new_result = a;
int new_carry = new_result.AddInt(b, index);
if( check_result_carry(result, new_result, carry, new_carry) )
std::cerr << "ok" << std::endl;
}
int UIntTest::upper_char(int c)
{
if( c>='a' && c<='z' )
@ -191,6 +259,19 @@ const char * p = pline;
pline = p; test_add<9>();
}
else
if( method == "ADDINT" )
{
pline = p; test_addint<1>();
pline = p; test_addint<2>();
pline = p; test_addint<3>();
pline = p; test_addint<4>();
pline = p; test_addint<5>();
pline = p; test_addint<6>();
pline = p; test_addint<7>();
pline = p; test_addint<8>();
pline = p; test_addint<9>();
}
else
{
std::cerr << '[' << row << "] ";
std::cerr << "method " << method << " is not supported" << std::endl;

View File

@ -45,7 +45,8 @@
#include <ttmath/ttmath.h>
using namespace ttmath;
typedef ttmath::uint uuint;
class UIntTest
@ -70,8 +71,11 @@ public:
void go();
template<unsigned int type_size>
void test_add();
template<uuint type_size> void test_add();
template<uuint type_size> void test_addint();
template<uuint type_size> bool check_result_carry(const ttmath::UInt<type_size> & result, const ttmath::UInt<type_size> & new_result,
int carry, int new_carry);
int upper_char(int c);
@ -80,8 +84,10 @@ void skip_white_characters();
bool read_method();
void test_method();
bool check_line();
int read_int();
uuint read_uint();
bool check_minmax_bits(int type_size);
bool check_minmax_bits_bitperint(int type_size);
bool check_end();
};

View File

@ -249,48 +249,46 @@ public:
#ifndef __GNUC__
// this part might be compiled with for example visual c
__asm
{
push eax
push ebx
push ecx
push edx
push esi
mov ecx,[b]
mov ebx,[p1]
mov edx,[p2]
mov esi,[p2]
xor eax,eax
sub eax,[c]
xor eax,eax // eax=0
mov edx,eax // edx=0
sub eax,[c] // CF=c
lahf // flags -> AH (flags: SF ZF AF PF CF)
p:
sahf // AH -> flags (flags: SF ZF AF PF CF)
mov eax,[ebx]
adc eax,[edx]
mov [ebx],eax
lahf
add ebx,4
add edx,4
mov eax,[esi+edx*4]
adc [ebx+edx*4],eax
inc edx
dec ecx
jnz p
// checking carry from the last word
// CF = bit 0
test ah,1
setnz al
setc al
movzx edx, al
mov [c], edx
pop esi
pop edx
pop ecx
pop ebx
pop eax
}
#endif
@ -300,37 +298,28 @@ public:
__asm__ __volatile__(
"push %%ebx \n"
"push %%ecx \n"
"push %%edx \n"
"push %%ecx \n"
"xorl %%eax, %%eax \n"
"subl %%esi, %%eax \n"
"xorl %%eax, %%eax \n"
"movl %%eax, %%edx \n"
"subl %%edi, %%eax \n"
"lahf \n"
"1: \n"
"sahf \n"
"movl (%%ebx),%%eax \n"
"adcl (%%edx),%%eax \n"
"movl %%eax,(%%ebx) \n"
"lahf \n"
"add $4,%%ebx \n"
"add $4,%%edx \n"
"decl %%ecx \n"
"jnz 1b \n"
"1: \n"
"movl (%%esi,%%edx,4),%%eax \n"
"adcl %%eax, (%%ebx,%%edx,4) \n"
"incl %%edx \n"
"decl %%ecx \n"
"jnz 1b \n"
"test $1,%%ah \n"
"setnz %%al \n"
"movzx %%al,%%esi \n"
"setc %%al \n"
"movzx %%al,%%edx \n"
"pop %%edx \n"
"pop %%ecx \n"
"pop %%ebx \n"
"pop %%ecx \n"
: "=S" (c)
: "0" (c), "c" (b), "b" (p1), "d" (p2)
: "=d" (c)
: "D" (c), "c" (b), "b" (p1), "S" (p2)
: "%eax", "cc", "memory" );
#endif
@ -366,106 +355,71 @@ public:
register uint c;
#ifndef __GNUC__
__asm
{
push eax
push ebx
push ecx
push edx
push edi
mov ecx, [b]
sub ecx, [index]
mov edx, [index]
mov eax, [p1]
lea ebx, [eax+4*edx]
mov edx, [value]
mov ebx, [p1]
mov edi,1
mov eax, [value]
clc
lahf
p:
sahf ; restore flags
mov eax, [ebx]
adc eax, edx
mov [ebx], eax
lahf ; save flags
add [ebx+edx*4], eax
jnc end
cmovnc ecx,edi
xor edx,edx
add ebx,4
sub ecx,1
mov eax, 1
inc edx
dec ecx
jnz p
// end:
test ah,1
setnz al
//
// movzx dword ptr [c],al
//
end:
setc al
movzx edx, al
mov [c], edx
//
pop edi
pop edx
pop ecx
pop ebx
pop eax
}
#endif
#ifdef __GNUC__
__asm__ __volatile__(
"push %%ebx \n"
"push %%eax \n"
"push %%ecx \n"
"push %%edx \n"
"push %%edi \n"
"subl %%edx, %%ecx \n"
"leal (%%ebx,%%edx,4), %%ebx \n"
"movl %%esi, %%edx \n"
"movl $1, %%edi \n"
"clc \n"
"lahf \n"
"1: \n"
"sahf \n"
"movl (%%ebx), %%eax \n"
"adcl %%edx, %%eax \n"
"movl %%eax, (%%ebx) \n"
"lahf \n"
"cmovnc %%edi,%%ecx \n"
"xorl %%edx, %%edx \n"
"addl $4,%%ebx \n"
"subl $1,%%ecx \n"
"addl %%eax, (%%ebx,%%edx,4) \n"
"jnc 2f \n"
"movl $1, %%eax \n"
"incl %%edx \n"
"decl %%ecx \n"
"jnz 1b \n"
"2: \n"
"setc %%al \n"
"movzx %%al, %%edx \n"
"test $1,%%ah \n"
"setnz %%al \n"
"movzx %%al,%%eax \n"
"pop %%edi \n"
"pop %%edx \n"
"pop %%ecx \n"
"pop %%ebx \n"
"pop %%eax \n"
: "=a" (c)
: "c" (b), "d" (index), "b" (p1), "S" (value)
: "=d" (c)
: "a" (value), "c" (b), "0" (index), "b" (p1)
: "cc", "memory" );
#endif
@ -523,59 +477,35 @@ public:
mov ecx, [b]
sub ecx, [index]
mov ebx, [p1]
mov edx, [index]
mov eax, [p1]
mov eax, [x1]
add [ebx+edx*4], eax
inc edx
dec ecx
mov eax, [x2]
lea ebx, [eax+4*edx]
xor edx,edx
mov eax, [ebx]
add eax, [x1]
mov [ebx], eax
setc al
movzx eax,al
add ebx,4
add eax, [ebx]
add eax, [x2]
mov [ebx], eax
jnc end
dec ecx
dec ecx
jz end
p:
inc ebx
inc ebx
inc ebx
inc ebx
mov eax,[ebx]
adc eax, edx
mov [ebx], eax
adc [ebx+edx*4], eax
jnc end
loop p
mov eax, 0
inc edx
dec ecx
jnz p
end:
setc al
//
// movzx dword ptr [c],al
//
movzx edx, al
mov [c], edx
//
pop edx
pop ecx
pop ebx
pop eax
}
#endif
@ -583,59 +513,33 @@ public:
#ifdef __GNUC__
__asm__ __volatile__(
"push %%ebx \n"
"push %%ecx \n"
"push %%edx \n"
"subl %%edx, %%ecx \n"
"leal (%%ebx,%%edx,4), %%ebx \n"
"xorl %%edx, %%edx \n"
"movl (%%ebx), %%eax \n"
"addl %%esi, %%eax \n"
"movl %%eax, (%%ebx) \n"
"inc %%ebx \n"
"inc %%ebx \n"
"inc %%ebx \n"
"inc %%ebx \n"
"movl (%%ebx), %%eax \n"
"adcl %%edi, %%eax \n"
"movl %%eax, (%%ebx) \n"
"jnc 2f \n"
"dec %%ecx \n"
"dec %%ecx \n"
"jz 2f \n"
"addl %%esi, (%%ebx,%%edx,4) \n"
"incl %%edx \n"
"decl %%ecx \n"
"1: \n"
"inc %%ebx \n"
"inc %%ebx \n"
"inc %%ebx \n"
"inc %%ebx \n"
"movl (%%ebx), %%eax \n"
"adcl %%edx, %%eax \n"
"movl %%eax, (%%ebx) \n"
"adcl %%eax, (%%ebx,%%edx,4) \n"
"jnc 2f \n"
"loop 1b \n"
"mov $0, %%eax \n"
"incl %%edx \n"
"decl %%ecx \n"
"jnz 1b \n"
"2: \n"
"setc %%al \n"
"movzx %%al,%%eax \n"
"movzx %%al, %%eax \n"
"pop %%edx \n"
"pop %%ecx \n"
"pop %%ebx \n"
: "=a" (c)
: "c" (b), "d" (index), "b" (p1), "S" (x1), "D" (x2)
: "c" (b), "d" (index), "b" (p1), "S" (x1), "0" (x2)
: "cc", "memory" );
#endif

View File

@ -221,47 +221,28 @@ namespace ttmath
*/
__asm__ __volatile__(
"push %%rbx \n"
"push %%rcx \n"
"push %%rdx \n"
"push %%rcx \n"
"xorq %%rax, %%rax \n"
"subq %%rsi, %%rax \n"
"xorq %%rax, %%rax \n"
"movq %%rax, %%rdx \n"
"subq %%rdi, %%rax \n"
//"lahf \n"
// in order to use this instruction one need to use -msahf option of the GCC
// but in my compiler (gcc version 4.2.1) there is no such option
// at the moment I'm using the opcode of this instruction
// In the future this can be simply change into 'lahf'
".byte 0x9f \n"
"1: \n"
//"sahf \n"
".byte 0x9e \n"
"1: \n"
"movq (%%rsi,%%rdx,8),%%rax \n"
"adcq %%rax, (%%rbx,%%rdx,8) \n"
"movq (%%rbx),%%rax \n"
"adcq (%%rdx),%%rax \n"
"movq %%rax,(%%rbx) \n"
"incq %%rdx \n"
"decq %%rcx \n"
"jnz 1b \n"
//"lahf \n"
".byte 0x9f \n"
"setc %%al \n"
"movzx %%al,%%rdx \n"
"addq $8, %%rbx \n"
"addq $8, %%rdx \n"
"decq %%rcx \n"
"jnz 1b \n"
"pop %%rcx \n"
"test $1, %%ah \n"
"setnz %%al \n"
"movzx %%al, %%rsi \n"
"pop %%rdx \n"
"pop %%rcx \n"
"pop %%rbx \n"
: "=S" (c)
: "0" (c), "c" (b), "b" (p1), "d" (p2)
: "=d" (c)
: "D" (c), "c" (b), "b" (p1), "S" (p2)
: "%rax", "cc", "memory" );
#endif
@ -305,49 +286,32 @@ namespace ttmath
#endif
#ifdef __GNUC__
__asm__ __volatile__(
"push %%rbx \n"
"push %%rax \n"
"push %%rcx \n"
"push %%rdx \n"
"subq %%rdx, %%rcx \n"
"leaq (%%rbx,%%rdx,8), %%rbx \n"
"movq %%rsi, %%rdx \n"
"clc \n"
"1: \n"
"movq (%%rbx), %%rax \n"
"adcq %%rdx, %%rax \n"
"movq %%rax, (%%rbx) \n"
"addq %%rax, (%%rbx,%%rdx,8) \n"
"jnc 2f \n"
"movq $0, %%rdx \n"
"inc %%rbx \n"
"inc %%rbx \n"
"inc %%rbx \n"
"inc %%rbx \n"
"inc %%rbx \n"
"inc %%rbx \n"
"inc %%rbx \n"
"inc %%rbx \n"
"loop 1b \n"
"movq $1, %%rax \n"
"incq %%rdx \n"
"decq %%rcx \n"
"jnz 1b \n"
"2: \n"
"setc %%al \n"
"movzx %%al, %%rdx \n"
"movq $0, %%rax \n"
"adcq %%rax,%%rax \n"
"pop %%rdx \n"
"pop %%rcx \n"
"pop %%rbx \n"
"pop %%rax \n"
: "=a" (c)
: "c" (b), "d" (index), "b" (p1), "S" (value)
: "=d" (c)
: "a" (value), "c" (b), "0" (index), "b" (p1)
: "cc", "memory" );
#endif
@ -404,67 +368,33 @@ namespace ttmath
#ifdef __GNUC__
__asm__ __volatile__(
"push %%rbx \n"
"push %%rcx \n"
"push %%rdx \n"
"subq %%rdx, %%rcx \n"
"leaq (%%rbx,%%rdx,8), %%rbx \n"
"movq $0, %%rdx \n"
"movq (%%rbx), %%rax \n"
"addq %%rsi, %%rax \n"
"movq %%rax, (%%rbx) \n"
"inc %%rbx \n"
"inc %%rbx \n"
"inc %%rbx \n"
"inc %%rbx \n"
"inc %%rbx \n"
"inc %%rbx \n"
"inc %%rbx \n"
"inc %%rbx \n"
"movq (%%rbx), %%rax \n"
"adcq %%rdi, %%rax \n"
"movq %%rax, (%%rbx) \n"
"jnc 2f \n"
"dec %%rcx \n"
"dec %%rcx \n"
"jz 2f \n"
"addq %%rsi, (%%rbx,%%rdx,8) \n"
"incq %%rdx \n"
"decq %%rcx \n"
"1: \n"
"inc %%rbx \n"
"inc %%rbx \n"
"inc %%rbx \n"
"inc %%rbx \n"
"inc %%rbx \n"
"inc %%rbx \n"
"inc %%rbx \n"
"inc %%rbx \n"
"movq (%%rbx), %%rax \n"
"adcq %%rdx, %%rax \n"
"movq %%rax, (%%rbx) \n"
"adcq %%rax, (%%rbx,%%rdx,8) \n"
"jnc 2f \n"
"loop 1b \n"
"mov $0, %%rax \n"
"incq %%rdx \n"
"decq %%rcx \n"
"jnz 1b \n"
"2: \n"
"movq $0, %%rax \n"
"adcq %%rax,%%rax \n"
"setc %%al \n"
"movzx %%al, %%rax \n"
"pop %%rdx \n"
"pop %%rcx \n"
"pop %%rbx \n"
: "=a" (c)
: "c" (b), "d" (index), "b" (p1), "S" (x1), "D" (x2)
: "c" (b), "d" (index), "b" (p1), "S" (x1), "0" (x2)
: "cc", "memory" );
#endif