From cfd719cca2bed1c8074eb5d49668a227224f8274 Mon Sep 17 00:00:00 2001 From: Tomasz Sowa Date: Sat, 25 Oct 2008 20:05:51 +0000 Subject: [PATCH] changed: asm code in UInt::Add, UInt::AddInt, AddTwoInts 32 and 64 bits, much faster now added: tests for UInt::AddInt fixed: tests: test_lahf() returned incorrect value for 32bit platform git-svn-id: svn://ttmath.org/publicrep/ttmath/trunk@82 e52654a7-88a9-db11-a3e9-0013d4bc506e --- tests/Makefile | 2 +- tests/main.cpp | 9 +- tests/tests.uint32 | 36 ++++++ tests/uinttest.cpp | 131 ++++++++++++++++----- tests/uinttest.h | 16 ++- ttmath/ttmathuint.h | 256 +++++++++++++----------------------------- ttmath/ttmathuint64.h | 150 +++++++------------------ 7 files changed, 280 insertions(+), 320 deletions(-) diff --git a/tests/Makefile b/tests/Makefile index 73e0c07..3985d27 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -1,6 +1,6 @@ CC = g++ o = main.o uinttest.o -CFLAGS = -Wall -pedantic +CFLAGS = -Wall ttmath = .. name = tests diff --git a/tests/main.cpp b/tests/main.cpp index ed2482e..1305c31 100644 --- a/tests/main.cpp +++ b/tests/main.cpp @@ -40,6 +40,7 @@ #include #include "uinttest.h" +//#include const char uint_tests_file[] = "tests.uint32"; @@ -90,9 +91,13 @@ bool test_lahf() std::cout << "fail" << std::endl; + return false; + #endif -return false; + + // 32bit platform +return true; } @@ -117,7 +122,5 @@ using namespace ttmath; - - return 0; } diff --git a/tests/tests.uint32 b/tests/tests.uint32 index 7aeabd6..2787792 100644 --- a/tests/tests.uint32 +++ b/tests/tests.uint32 @@ -22,6 +22,42 @@ add 192 192 6277101735386680763835789423207666416102355444464034 add 192 192 6277101735386680763835789423207666416102355444464034512895 0 6277101735386680763835789423207666416102355444464034512895 0 +# AddInt + +# min_bits max_bits bits_per_int a b(int) index result carry +addint 32 0 32 0 0 0 0 0 +addint 32 0 32 1000 2000 0 3000 0 +addint 64 0 32 562342345 1423445 1 6113650284997065 0 +addint 64 0 32 5342342455 3423553423 0 8765895878 0 +addint 96 0 32 478895734 46756734 2 862509505820513898647477878 0 +addint 128 0 32 27370506140054471803784984408165997441 24543 3 27372450636847059393422542757339093889 0 +addint 128 128 32 340282366841711102552375003685868034945 2234543 3 177038656721750864719686733515479937 1 +addint 160 160 32 1461501637330902918124457471805283415910032366465 3 3 158457126631793409034731674497 1 +addint 192 0 32 6277101735386680763835789423128439055191355840718134336385 3354 1 6277101735386680763835789423128439055191355855123454647169 0 +addint 192 192 32 6277101735386680763835789423128439055191355840718134336385 3354 5 4901876491607848387655079701569502248322251848964993 1 + +addint 64 0 64 0 0 0 0 0 +addint 64 0 64 5342342 345534234 0 350876576 0 +addint 64 0 64 5342342455 34235534234 0 39577876689 0 +addint 64 64 64 18446744073709550615 2000 0 999 1 +addint 128 0 64 42895062544824211012058135 3453234 0 42895062544824211015511369 0 +addint 128 0 64 42895062544824211012058135 456234234 1 8458931214807741031021280279 0 +addint 128 128 64 340282366920938426569886460012664978455 45623 1 804702316727431770143767 1 +addint 192 192 64 6277101735386680763835789423207666379208867297044931279895 45623234 1 841563227924816702308613143 1 +addint 192 192 64 6277101735386679588840776445207152040176347835149297122327 45623234 2 15523607057094857017675614218510090830281178135 1 +addint 192 192 64 6277101735386680763835789423207666416102355444464034512895 1 0 0 1 + + + + + + + + + + + + diff --git a/tests/uinttest.cpp b/tests/uinttest.cpp index 3fd41d6..b4748b0 100644 --- a/tests/uinttest.cpp +++ b/tests/uinttest.cpp @@ -46,9 +46,9 @@ void UIntTest::set_file_name(const std::string & f) } -int UIntTest::read_int() +uuint UIntTest::read_uint() { -int result = 0; +uuint result = 0; skip_white_characters(); @@ -60,39 +60,55 @@ return result; -template -void UIntTest::test_add() +bool UIntTest::check_minmax_bits(int type_size) { -using namespace ttmath; - - UInt a,b,result, new_result; - - int min_bits = read_int(); - int max_bits = read_int(); + int min_bits = read_uint(); + int max_bits = read_uint(); if( min_bits != 0 && type_size * TTMATH_BITS_PER_UINT < (unsigned int)min_bits ) - return; + return false; if( max_bits != 0 && type_size * TTMATH_BITS_PER_UINT > (unsigned int)max_bits ) - return; + return false; - a.FromString(pline, 10, &pline); - b.FromString(pline, 10, &pline); - result.FromString(pline, 10, &pline); - int carry = read_int(); +return true; +} - std::cerr << '[' << row << "] Add<" << type_size << ">: "; +bool UIntTest::check_minmax_bits_bitperint(int type_size) +{ + if( !check_minmax_bits(type_size) ) + return false; + + int bits = read_uint(); + + if( TTMATH_BITS_PER_UINT != bits ) + return false; + +return true; +} + + + +bool UIntTest::check_end() +{ skip_white_characters(); + if( *pline!='#' && *pline!= 0 ) { std::cerr << "syntax error" << std::endl; - return; + return false; } - new_result = a; - int new_carry = new_result.Add(b); - bool ok = true; +return true; +} + + +template +bool UIntTest::check_result_carry(const ttmath::UInt & result, const ttmath::UInt & new_result, + int carry, int new_carry) +{ +bool ok = true; if( new_carry != carry ) { @@ -106,15 +122,67 @@ using namespace ttmath; ok = false; } - if( ok ) - { - std::cerr << "ok" << std::endl; - } +return ok; } + + +template +void UIntTest::test_add() +{ + UInt a,b,result, new_result; + + if( !check_minmax_bits(type_size) ) + return; + + a.FromString(pline, 10, &pline); + b.FromString(pline, 10, &pline); + result.FromString(pline, 10, &pline); + int carry = read_uint(); + + std::cerr << '[' << row << "] Add<" << type_size << ">: "; + + if( !check_end() ) + return; + + new_result = a; + int new_carry = new_result.Add(b); + + if( check_result_carry(result, new_result, carry, new_carry) ) + std::cerr << "ok" << std::endl; +} + + +template +void UIntTest::test_addint() +{ + UInt a, result, new_result; + + if( !check_minmax_bits_bitperint(type_size) ) + return; + + a.FromString(pline, 10, &pline); + uuint b = read_uint(); + uuint index = read_uint(); + result.FromString(pline, 10, &pline); + int carry = read_uint(); + + std::cerr << '[' << row << "] AddInt<" << type_size << ">: "; + + if( !check_end() ) + return; + + new_result = a; + int new_carry = new_result.AddInt(b, index); + + if( check_result_carry(result, new_result, carry, new_carry) ) + std::cerr << "ok" << std::endl; +} + + int UIntTest::upper_char(int c) { if( c>='a' && c<='z' ) @@ -191,6 +259,19 @@ const char * p = pline; pline = p; test_add<9>(); } else + if( method == "ADDINT" ) + { + pline = p; test_addint<1>(); + pline = p; test_addint<2>(); + pline = p; test_addint<3>(); + pline = p; test_addint<4>(); + pline = p; test_addint<5>(); + pline = p; test_addint<6>(); + pline = p; test_addint<7>(); + pline = p; test_addint<8>(); + pline = p; test_addint<9>(); + } + else { std::cerr << '[' << row << "] "; std::cerr << "method " << method << " is not supported" << std::endl; diff --git a/tests/uinttest.h b/tests/uinttest.h index 69933de..6947180 100644 --- a/tests/uinttest.h +++ b/tests/uinttest.h @@ -45,7 +45,8 @@ #include - +using namespace ttmath; +typedef ttmath::uint uuint; class UIntTest @@ -70,8 +71,11 @@ public: void go(); - template - void test_add(); + template void test_add(); + template void test_addint(); + + template bool check_result_carry(const ttmath::UInt & result, const ttmath::UInt & new_result, + int carry, int new_carry); int upper_char(int c); @@ -80,8 +84,10 @@ void skip_white_characters(); bool read_method(); void test_method(); bool check_line(); -int read_int(); - +uuint read_uint(); +bool check_minmax_bits(int type_size); +bool check_minmax_bits_bitperint(int type_size); +bool check_end(); }; diff --git a/ttmath/ttmathuint.h b/ttmath/ttmathuint.h index 0046361..06bbd3b 100644 --- a/ttmath/ttmathuint.h +++ b/ttmath/ttmathuint.h @@ -249,48 +249,46 @@ public: #ifndef __GNUC__ // this part might be compiled with for example visual c - + __asm { push eax push ebx push ecx push edx + push esi mov ecx,[b] mov ebx,[p1] - mov edx,[p2] + mov esi,[p2] - xor eax,eax - sub eax,[c] + xor eax,eax // eax=0 + mov edx,eax // edx=0 + + sub eax,[c] // CF=c - lahf // flags -> AH (flags: SF ZF AF PF CF) p: - sahf // AH -> flags (flags: SF ZF AF PF CF) - mov eax,[ebx] - adc eax,[edx] - mov [ebx],eax - lahf - - add ebx,4 - add edx,4 + mov eax,[esi+edx*4] + adc [ebx+edx*4],eax + inc edx dec ecx jnz p - // checking carry from the last word - // CF = bit 0 - test ah,1 - setnz al + setc al movzx edx, al mov [c], edx + pop esi pop edx pop ecx pop ebx pop eax } + + + #endif @@ -300,37 +298,28 @@ public: __asm__ __volatile__( - "push %%ebx \n" - "push %%ecx \n" - "push %%edx \n" + "push %%ecx \n" - "xorl %%eax, %%eax \n" - "subl %%esi, %%eax \n" + "xorl %%eax, %%eax \n" + "movl %%eax, %%edx \n" + "subl %%edi, %%eax \n" - "lahf \n" - "1: \n" - "sahf \n" - "movl (%%ebx),%%eax \n" - "adcl (%%edx),%%eax \n" - "movl %%eax,(%%ebx) \n" - "lahf \n" - - "add $4,%%ebx \n" - "add $4,%%edx \n" - "decl %%ecx \n" - "jnz 1b \n" + "1: \n" + "movl (%%esi,%%edx,4),%%eax \n" + "adcl %%eax, (%%ebx,%%edx,4) \n" + + "incl %%edx \n" + "decl %%ecx \n" + "jnz 1b \n" - "test $1,%%ah \n" - "setnz %%al \n" - "movzx %%al,%%esi \n" + "setc %%al \n" + "movzx %%al,%%edx \n" - "pop %%edx \n" - "pop %%ecx \n" - "pop %%ebx \n" + "pop %%ecx \n" - : "=S" (c) - : "0" (c), "c" (b), "b" (p1), "d" (p2) + : "=d" (c) + : "D" (c), "c" (b), "b" (p1), "S" (p2) : "%eax", "cc", "memory" ); #endif @@ -366,106 +355,71 @@ public: register uint c; #ifndef __GNUC__ + __asm { push eax push ebx push ecx push edx - push edi mov ecx, [b] sub ecx, [index] mov edx, [index] - mov eax, [p1] - - lea ebx, [eax+4*edx] - mov edx, [value] + mov ebx, [p1] - mov edi,1 + mov eax, [value] - clc - lahf p: - sahf ; restore flags - mov eax, [ebx] - adc eax, edx - mov [ebx], eax - lahf ; save flags + add [ebx+edx*4], eax + jnc end - cmovnc ecx,edi - xor edx,edx - add ebx,4 - - sub ecx,1 + mov eax, 1 + inc edx + dec ecx jnz p -// end: - - test ah,1 - setnz al - - // - // movzx dword ptr [c],al - // + end: + setc al movzx edx, al mov [c], edx - // - pop edi pop edx pop ecx pop ebx pop eax } + #endif #ifdef __GNUC__ __asm__ __volatile__( - "push %%ebx \n" + "push %%eax \n" "push %%ecx \n" - "push %%edx \n" - "push %%edi \n" "subl %%edx, %%ecx \n" - "leal (%%ebx,%%edx,4), %%ebx \n" - - "movl %%esi, %%edx \n" - "movl $1, %%edi \n" - "clc \n" - "lahf \n" "1: \n" - "sahf \n" - "movl (%%ebx), %%eax \n" - "adcl %%edx, %%eax \n" - "movl %%eax, (%%ebx) \n" - "lahf \n" - - "cmovnc %%edi,%%ecx \n" - - "xorl %%edx, %%edx \n" - - "addl $4,%%ebx \n" - - "subl $1,%%ecx \n" + "addl %%eax, (%%ebx,%%edx,4) \n" + "jnc 2f \n" + + "movl $1, %%eax \n" + "incl %%edx \n" + "decl %%ecx \n" "jnz 1b \n" + "2: \n" + "setc %%al \n" + "movzx %%al, %%edx \n" - "test $1,%%ah \n" - "setnz %%al \n" - "movzx %%al,%%eax \n" - - "pop %%edi \n" - "pop %%edx \n" "pop %%ecx \n" - "pop %%ebx \n" + "pop %%eax \n" - : "=a" (c) - : "c" (b), "d" (index), "b" (p1), "S" (value) + : "=d" (c) + : "a" (value), "c" (b), "0" (index), "b" (p1) : "cc", "memory" ); #endif @@ -523,59 +477,35 @@ public: mov ecx, [b] sub ecx, [index] + mov ebx, [p1] mov edx, [index] - mov eax, [p1] + + mov eax, [x1] + add [ebx+edx*4], eax + inc edx + dec ecx + + mov eax, [x2] - lea ebx, [eax+4*edx] - - xor edx,edx - - mov eax, [ebx] - add eax, [x1] - mov [ebx], eax - - setc al - movzx eax,al - add ebx,4 - - add eax, [ebx] - add eax, [x2] - mov [ebx], eax - jnc end - - dec ecx - dec ecx - jz end - p: - inc ebx - inc ebx - inc ebx - inc ebx - - mov eax,[ebx] - adc eax, edx - mov [ebx], eax - + adc [ebx+edx*4], eax jnc end - loop p + mov eax, 0 + inc edx + dec ecx + jnz p end: - setc al - - // - // movzx dword ptr [c],al - // movzx edx, al mov [c], edx - // - + pop edx pop ecx pop ebx pop eax + } #endif @@ -583,59 +513,33 @@ public: #ifdef __GNUC__ __asm__ __volatile__( - "push %%ebx \n" "push %%ecx \n" "push %%edx \n" "subl %%edx, %%ecx \n" - "leal (%%ebx,%%edx,4), %%ebx \n" - - "xorl %%edx, %%edx \n" - - "movl (%%ebx), %%eax \n" - "addl %%esi, %%eax \n" - "movl %%eax, (%%ebx) \n" - - "inc %%ebx \n" - "inc %%ebx \n" - "inc %%ebx \n" - "inc %%ebx \n" - - "movl (%%ebx), %%eax \n" - "adcl %%edi, %%eax \n" - "movl %%eax, (%%ebx) \n" - "jnc 2f \n" - - "dec %%ecx \n" - "dec %%ecx \n" - "jz 2f \n" + "addl %%esi, (%%ebx,%%edx,4) \n" + "incl %%edx \n" + "decl %%ecx \n" "1: \n" - "inc %%ebx \n" - "inc %%ebx \n" - "inc %%ebx \n" - "inc %%ebx \n" - - "movl (%%ebx), %%eax \n" - "adcl %%edx, %%eax \n" - "movl %%eax, (%%ebx) \n" - + "adcl %%eax, (%%ebx,%%edx,4) \n" "jnc 2f \n" - "loop 1b \n" + "mov $0, %%eax \n" + "incl %%edx \n" + "decl %%ecx \n" + "jnz 1b \n" "2: \n" - "setc %%al \n" - "movzx %%al,%%eax \n" + "movzx %%al, %%eax \n" "pop %%edx \n" "pop %%ecx \n" - "pop %%ebx \n" : "=a" (c) - : "c" (b), "d" (index), "b" (p1), "S" (x1), "D" (x2) + : "c" (b), "d" (index), "b" (p1), "S" (x1), "0" (x2) : "cc", "memory" ); #endif diff --git a/ttmath/ttmathuint64.h b/ttmath/ttmathuint64.h index 02adb0f..ec33c73 100644 --- a/ttmath/ttmathuint64.h +++ b/ttmath/ttmathuint64.h @@ -221,47 +221,28 @@ namespace ttmath */ __asm__ __volatile__( - "push %%rbx \n" - "push %%rcx \n" - "push %%rdx \n" + "push %%rcx \n" - "xorq %%rax, %%rax \n" - "subq %%rsi, %%rax \n" + "xorq %%rax, %%rax \n" + "movq %%rax, %%rdx \n" + "subq %%rdi, %%rax \n" - //"lahf \n" - // in order to use this instruction one need to use -msahf option of the GCC - // but in my compiler (gcc version 4.2.1) there is no such option - // at the moment I'm using the opcode of this instruction - // In the future this can be simply change into 'lahf' - ".byte 0x9f \n" - "1: \n" - //"sahf \n" - ".byte 0x9e \n" + "1: \n" + "movq (%%rsi,%%rdx,8),%%rax \n" + "adcq %%rax, (%%rbx,%%rdx,8) \n" - "movq (%%rbx),%%rax \n" - "adcq (%%rdx),%%rax \n" - "movq %%rax,(%%rbx) \n" + "incq %%rdx \n" + "decq %%rcx \n" + "jnz 1b \n" - //"lahf \n" - ".byte 0x9f \n" + "setc %%al \n" + "movzx %%al,%%rdx \n" - "addq $8, %%rbx \n" - "addq $8, %%rdx \n" - - "decq %%rcx \n" - "jnz 1b \n" + "pop %%rcx \n" - "test $1, %%ah \n" - "setnz %%al \n" - "movzx %%al, %%rsi \n" - - "pop %%rdx \n" - "pop %%rcx \n" - "pop %%rbx \n" - - : "=S" (c) - : "0" (c), "c" (b), "b" (p1), "d" (p2) + : "=d" (c) + : "D" (c), "c" (b), "b" (p1), "S" (p2) : "%rax", "cc", "memory" ); #endif @@ -305,49 +286,32 @@ namespace ttmath #endif #ifdef __GNUC__ + __asm__ __volatile__( - "push %%rbx \n" + + "push %%rax \n" "push %%rcx \n" - "push %%rdx \n" "subq %%rdx, %%rcx \n" - "leaq (%%rbx,%%rdx,8), %%rbx \n" - - "movq %%rsi, %%rdx \n" - "clc \n" "1: \n" - - "movq (%%rbx), %%rax \n" - "adcq %%rdx, %%rax \n" - "movq %%rax, (%%rbx) \n" - + "addq %%rax, (%%rbx,%%rdx,8) \n" "jnc 2f \n" - - "movq $0, %%rdx \n" - - "inc %%rbx \n" - "inc %%rbx \n" - "inc %%rbx \n" - "inc %%rbx \n" - "inc %%rbx \n" - "inc %%rbx \n" - "inc %%rbx \n" - "inc %%rbx \n" - - "loop 1b \n" + + "movq $1, %%rax \n" + "incq %%rdx \n" + "decq %%rcx \n" + "jnz 1b \n" "2: \n" + "setc %%al \n" + "movzx %%al, %%rdx \n" - "movq $0, %%rax \n" - "adcq %%rax,%%rax \n" - - "pop %%rdx \n" "pop %%rcx \n" - "pop %%rbx \n" + "pop %%rax \n" - : "=a" (c) - : "c" (b), "d" (index), "b" (p1), "S" (value) + : "=d" (c) + : "a" (value), "c" (b), "0" (index), "b" (p1) : "cc", "memory" ); #endif @@ -404,67 +368,33 @@ namespace ttmath #ifdef __GNUC__ __asm__ __volatile__( - "push %%rbx \n" "push %%rcx \n" "push %%rdx \n" "subq %%rdx, %%rcx \n" - "leaq (%%rbx,%%rdx,8), %%rbx \n" - - "movq $0, %%rdx \n" - - "movq (%%rbx), %%rax \n" - "addq %%rsi, %%rax \n" - "movq %%rax, (%%rbx) \n" - - "inc %%rbx \n" - "inc %%rbx \n" - "inc %%rbx \n" - "inc %%rbx \n" - "inc %%rbx \n" - "inc %%rbx \n" - "inc %%rbx \n" - "inc %%rbx \n" - - "movq (%%rbx), %%rax \n" - "adcq %%rdi, %%rax \n" - "movq %%rax, (%%rbx) \n" - "jnc 2f \n" - - "dec %%rcx \n" - "dec %%rcx \n" - "jz 2f \n" + "addq %%rsi, (%%rbx,%%rdx,8) \n" + "incq %%rdx \n" + "decq %%rcx \n" "1: \n" - "inc %%rbx \n" - "inc %%rbx \n" - "inc %%rbx \n" - "inc %%rbx \n" - "inc %%rbx \n" - "inc %%rbx \n" - "inc %%rbx \n" - "inc %%rbx \n" - - "movq (%%rbx), %%rax \n" - "adcq %%rdx, %%rax \n" - "movq %%rax, (%%rbx) \n" - + "adcq %%rax, (%%rbx,%%rdx,8) \n" "jnc 2f \n" - "loop 1b \n" + "mov $0, %%rax \n" + "incq %%rdx \n" + "decq %%rcx \n" + "jnz 1b \n" "2: \n" - - "movq $0, %%rax \n" - "adcq %%rax,%%rax \n" + "setc %%al \n" + "movzx %%al, %%rax \n" "pop %%rdx \n" "pop %%rcx \n" - "pop %%rbx \n" : "=a" (c) - : "c" (b), "d" (index), "b" (p1), "S" (x1), "D" (x2) + : "c" (b), "d" (index), "b" (p1), "S" (x1), "0" (x2) : "cc", "memory" ); #endif