diff --git a/CHANGELOG b/CHANGELOG index 762b2d0..f25d0e6 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,4 +1,4 @@ -Version 0.9.0 prerelease (2009.09.05): +Version 0.9.0 prerelease (2009.09.07): * added: support for wide characters (wchar_t) wide characters are used when macro TTMATH_USE_WCHAR is defined this macro is defined automatically when there is macro UNICODE or _UNICODE defined @@ -22,6 +22,9 @@ Version 0.9.0 prerelease (2009.09.05): and use TTMATH_MULTITHREADS_HELPER macro somewhere in your *.cpp file * added: Big::AboutEqual(const Big & ss2, int nBitsToIgnore = 4) the last nBitsToIgnore bits from mantissas will be skipped when comparing + * added: x86_64 asm code for Microsoft Visual compiler + file: ttmathuint_x86_64_msvc.asm + (this file should be compiled first because MS VC doesn't support inline assembler in x86_64 mode) * changed: Factorial() is using the Gamma() function now * removed: Parser<>::SetFactorialMax() method the factorial() is such a fast now that we don't need the method longer diff --git a/ttmath/ttmathbig.h b/ttmath/ttmathbig.h index 1688c30..ddbd382 100644 --- a/ttmath/ttmathbig.h +++ b/ttmath/ttmathbig.h @@ -3916,75 +3916,75 @@ public: } - bool AboutEqual(const Big & ss2, int nBitsToIgnore = 4) const - { - // we should check the mantissas beforehand because sometimes we can have - // a mantissa set to zero but in the exponent something another value - // (maybe we've forgotten about calling CorrectZero() ?) - if( mantissa.IsZero() ) - { - if( ss2.mantissa.IsZero() ) - return true; - - return(ss2.AboutEqual(*this,nBitsToIgnore)); - } - - if( ss2.mantissa.IsZero() ) - { - return(this->exponent <= uint(2*(-sint(man*TTMATH_BITS_PER_UINT))+nBitsToIgnore)); - } - - // exponents may not differ much! - ttmath::Int expdiff(this->exponent - ss2.exponent); - - // they may differ one if for example mantissa1=0x80000000, mantissa2=0xffffffff - if( ttmath::Abs(expdiff) > 1 ) - return(false); - - // calculate the 'difference' mantissa - ttmath::UInt man1(this->mantissa); - ttmath::UInt man2(ss2.mantissa); - ttmath::UInt mandiff; - - switch( expdiff.ToInt() ) - { - case +1: - man2.Rcr(1,0); - mandiff = man1; - mandiff.Sub(man2); - break; - case -1: - man1.Rcr(1,0); - mandiff = man2; - mandiff.Sub(man1); - break; - default: - if( man2 > man1 ) - { - mandiff = man2; - mandiff.Sub(man1); - } - else - { - mandiff = man1; - mandiff.Sub(man2); - } - break; - } - - // faster to mask the bits! - TTMATH_ASSERT( nBitsToIgnore < TTMATH_BITS_PER_UINT ); - - for( int n = man-1; n > 0; --n ) - { - if( mandiff.table[n] != 0 ) - return(false); - } - - uint nMask = ~((1 << nBitsToIgnore) - 1); - - return((mandiff.table[0] & nMask) == 0); - } + bool AboutEqual(const Big & ss2, int nBitsToIgnore = 4) const + { + // we should check the mantissas beforehand because sometimes we can have + // a mantissa set to zero but in the exponent something another value + // (maybe we've forgotten about calling CorrectZero() ?) + if( mantissa.IsZero() ) + { + if( ss2.mantissa.IsZero() ) + return true; + + return(ss2.AboutEqual(*this,nBitsToIgnore)); + } + + if( ss2.mantissa.IsZero() ) + { + return(this->exponent <= uint(2*(-sint(man*TTMATH_BITS_PER_UINT))+nBitsToIgnore)); + } + + // exponents may not differ much! + ttmath::Int expdiff(this->exponent - ss2.exponent); + + // they may differ one if for example mantissa1=0x80000000, mantissa2=0xffffffff + if( ttmath::Abs(expdiff) > 1 ) + return(false); + + // calculate the 'difference' mantissa + ttmath::UInt man1(this->mantissa); + ttmath::UInt man2(ss2.mantissa); + ttmath::UInt mandiff; + + switch( expdiff.ToInt() ) + { + case +1: + man2.Rcr(1,0); + mandiff = man1; + mandiff.Sub(man2); + break; + case -1: + man1.Rcr(1,0); + mandiff = man2; + mandiff.Sub(man1); + break; + default: + if( man2 > man1 ) + { + mandiff = man2; + mandiff.Sub(man1); + } + else + { + mandiff = man1; + mandiff.Sub(man2); + } + break; + } + + // faster to mask the bits! + TTMATH_ASSERT( nBitsToIgnore < TTMATH_BITS_PER_UINT ); + + for( int n = man-1; n > 0; --n ) + { + if( mandiff.table[n] != 0 ) + return(false); + } + + uint nMask = ~((1 << nBitsToIgnore) - 1); + + return((mandiff.table[0] & nMask) == 0); + } bool operator<(const Big & ss2) const diff --git a/ttmath/ttmathtypes.h b/ttmath/ttmathtypes.h index b69e4f6..c91f6fe 100644 --- a/ttmath/ttmathtypes.h +++ b/ttmath/ttmathtypes.h @@ -162,8 +162,14 @@ namespace ttmath /*! on 64bit platforms one word (uint, sint) will be equal 64bits */ - typedef unsigned long uint; - typedef signed long sint; + #ifdef _MSC_VER + /* in VC 'long' type has 32 bits, __int64 is VC extension */ + typedef unsigned __int64 uint; + typedef signed __int64 sint; + #else + typedef unsigned long uint; + typedef signed long sint; + #endif /*! on 64bit platform we do not define ulint diff --git a/ttmath/ttmathuint.h b/ttmath/ttmathuint.h index 35f501c..37e591c 100644 --- a/ttmath/ttmathuint.h +++ b/ttmath/ttmathuint.h @@ -3297,6 +3297,17 @@ public: static uint SetBitInWord(uint & value, uint bit); static void MulTwoWords(uint a, uint b, uint * result_high, uint * result_low); static void DivTwoWords(uint a,uint b, uint c, uint * r, uint * rest); + + + /* temporarily */ + #ifndef TTMATH_NOASM + #ifdef TTMATH_PLATFORM64 + #ifdef _MSC_VER + static uint AddTwoWords(uint a, uint b, uint carry, uint * result); + static uint SubTwoWords(uint a, uint b, uint carry, uint * result); + #endif + #endif + #endif }; diff --git a/ttmath/ttmathuint_x86_64.h b/ttmath/ttmathuint_x86_64.h index adbe803..d4ae8c8 100644 --- a/ttmath/ttmathuint_x86_64.h +++ b/ttmath/ttmathuint_x86_64.h @@ -51,10 +51,33 @@ this file is included at the end of ttmathuint.h */ +#ifdef _MSC_VER +#include +#endif + namespace ttmath { + #ifdef _MSC_VER + + extern "C" + { + uint __fastcall adc_x64(uint* p1, const uint* p2, uint nSize, uint c); + uint __fastcall addindexed_x64(uint* p1, uint nSize, uint nPos, uint nValue); + uint __fastcall addindexed2_x64(uint* p1, uint nSize, uint nPos, uint nValue1, uint nValue2); + uint __fastcall sbb_x64(uint* p1, const uint* p2, uint nSize, uint c); + uint __fastcall subindexed_x64(uint* p1, uint nSize, uint nPos, uint nValue); + uint __fastcall rcl_x64(uint* p1, uint nSize, uint nLowestBit); + uint __fastcall rcr_x64(uint* p1, uint nSize, uint nLowestBit); + uint __fastcall div_x64(uint* pnValHi, uint* pnValLo, uint nDiv); + uint __fastcall rcl2_x64(uint* p1, uint nSize, uint nBits, uint c); + uint __fastcall rcr2_x64(uint* p1, uint nSize, uint nBits, uint c); + }; + #endif + + + /*! * * basic mathematic functions @@ -82,8 +105,12 @@ namespace ttmath // we don't have to use TTMATH_REFERENCE_ASSERT here // this algorithm doesn't require it - #ifndef __GNUC__ - #error "another compiler than GCC is currently not supported in 64bit mode" + #if !defined(__GNUC__) && !defined(_MSC_VER) + #error "another compiler than GCC or Microsoft VC is currently not supported in 64bit mode, you can compile with TTMATH_NOASM macro" + #endif + + #ifdef _MSC_VER + c = adc_x64(p1,p2,b,c); #endif #ifdef __GNUC__ @@ -149,10 +176,16 @@ namespace ttmath TTMATH_ASSERT( index < value_size ) - #ifndef __GNUC__ - #error "another compiler than GCC is currently not supported in 64bit mode" + #if !defined(__GNUC__) && !defined(_MSC_VER) + #error "another compiler than GCC or Microsoft VC is currently not supported in 64bit mode, you can compile with TTMATH_NOASM macro" #endif + + #ifdef _MSC_VER + c = addindexed_x64(p1,b,index,value); + #endif + + #ifdef __GNUC__ uint dummy, dummy2; @@ -227,10 +260,16 @@ namespace ttmath TTMATH_ASSERT( index < value_size - 1 ) - #ifndef __GNUC__ - #error "another compiler than GCC is currently not supported in 64bit mode" + #if !defined(__GNUC__) && !defined(_MSC_VER) + #error "another compiler than GCC or Microsoft VC is currently not supported in 64bit mode, you can compile with TTMATH_NOASM macro" #endif + + #ifdef _MSC_VER + c = addindexed2_x64(p1,b,index,x1,x2); + #endif + + #ifdef __GNUC__ uint dummy, dummy2; @@ -288,6 +327,9 @@ namespace ttmath of course the carry is propagated and will be returned from the last item (this method is used by the Karatsuba multiplication algorithm) */ + +#ifndef _MSC_VER + template uint UInt::AddVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result) { @@ -296,10 +338,16 @@ namespace ttmath uint rest = ss1_size - ss2_size; uint c; - #ifndef __GNUC__ - #error "another compiler than GCC is currently not supported in 64bit mode" + #if !defined(__GNUC__) && !defined(_MSC_VER) + #error "another compiler than GCC or Microsoft VC is currently not supported in 64bit mode, you can compile with TTMATH_NOASM macro" #endif + + #ifdef _MSC_VER + + #endif + + #ifdef __GNUC__ uint dummy1, dummy2, dummy3; @@ -348,8 +396,27 @@ namespace ttmath return c; } +#else + /* temporarily */ + template + uint UInt::AddVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result) + { + uint i, c = 0; + TTMATH_ASSERT( ss1_size >= ss2_size ) + for(i=0 ; i uint UInt::SubVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result) { @@ -501,16 +584,22 @@ namespace ttmath uint rest = ss1_size - ss2_size; uint c; - #ifndef __GNUC__ - #error "another compiler than GCC is currently not supported in 64bit mode" + #if !defined(__GNUC__) && !defined(_MSC_VER) + #error "another compiler than GCC or Microsoft VC is currently not supported in 64bit mode, you can compile with TTMATH_NOASM macro" #endif + + #ifdef _MSC_VER + + #endif + + #ifdef __GNUC__ - /* - the asm code is nearly the same as in AddVector - only two instructions 'adc' are changed to 'sbb' - */ + + // the asm code is nearly the same as in AddVector + // only two instructions 'adc' are changed to 'sbb' + uint dummy1, dummy2, dummy3; __asm__ __volatile__( @@ -556,6 +645,27 @@ namespace ttmath return c; } +#else + /* temporarily */ + template + uint UInt::SubVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result) + { + uint i, c = 0; + + TTMATH_ASSERT( ss1_size >= ss2_size ) + + for(i=0 ; i + uint UInt::AddTwoWords(uint a, uint b, uint carry, uint * result) + { + uint temp; + + if( carry == 0 ) + { + temp = a + b; + + if( temp < a ) + carry = 1; + } + else + { + carry = 1; + temp = a + b + carry; + + if( temp > a ) // !(temp<=a) + carry = 0; + } + + *result = temp; + + return carry; + } + + + /* temporarily */ + template + uint UInt::SubTwoWords(uint a, uint b, uint carry, uint * result) + { + if( carry == 0 ) + { + *result = a - b; + + if( a < b ) + carry = 1; + } + else + { + carry = 1; + *result = a - b - carry; + + if( a > b ) // !(a <= b ) + carry = 0; + } + + return carry; + } + + + } //namespace diff --git a/ttmath/ttmathuint_x86_64_msvc.asm b/ttmath/ttmathuint_x86_64_msvc.asm new file mode 100644 index 0000000..34e68c5 --- /dev/null +++ b/ttmath/ttmathuint_x86_64_msvc.asm @@ -0,0 +1,430 @@ +; +; This file is a part of TTMath Bignum Library +; and is distributed under the (new) BSD licence. +; Author: Christian Kaiser <> +; + +; +; Copyright (c) 2009, Christian Kaiser +; All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions are met: +; +; * Redistributions of source code must retain the above copyright notice, +; this list of conditions and the following disclaimer. +; +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in the +; documentation and/or other materials provided with the distribution. +; +; * Neither the name Tomasz Sowa nor the names of contributors to this +; project may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +; AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +; ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +; LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +; CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +; SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +; CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +; ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +; THE POSSIBILITY OF SUCH DAMAGE. +; + +; +; compile with debug info: ml64.exe /Zd /Zi ttmathuint_x86_64_msvc.asm +; compile without debug info: ml64.exe ttmathuint_x86_64_msvc.asm +; this create ttmathuint_x86_64_msvc.obj file which can be linked with your program +; + +PUBLIC adc_x64 +PUBLIC addindexed_x64 +PUBLIC addindexed2_x64 + +PUBLIC sbb_x64 +PUBLIC subindexed_x64 + +PUBLIC rcl_x64 +PUBLIC rcr_x64 + +PUBLIC rcl2_x64 +PUBLIC rcr2_x64 + +PUBLIC div_x64 + +; +; "rax, rcx, rdx, r8-r11 are volatile." +; "rbx, rbp, rdi, rsi, r12-r15 are nonvolatile." +; + + +.CODE + + ALIGN 8 + +;---------------------------------------- + +adc_x64 PROC + ; rcx = p1 + ; rdx = p2 + ; r8 = nSize + ; r9 = nCarry + + xor rax, rax + xor r11, r11 + sub rax, r9 ; sets CARRY if r9 != 0 + + ALIGN 16 + loop1: + mov rax,qword ptr [rdx + r11 * 8] + adc qword ptr [rcx + r11 * 8], rax + lea r11, [r11+1] + dec r8 + jnz loop1 + + setc al + movzx rax, al + + ret + +adc_x64 ENDP + +;---------------------------------------- + + ALIGN 8 + +;---------------------------------------- + +addindexed_x64 PROC + + ; rcx = p1 + ; rdx = nSize + ; r8 = nPos + ; r9 = nValue + + xor rax, rax ; rax = result + sub rdx, r8 ; rdx = remaining count of uints + + add qword ptr [rcx + r8 * 8], r9 + jc next1 + + ret + +next1: + mov r9, 1 + + ALIGN 16 +loop1: + dec rdx + jz done_with_cy + lea r8, [r8+1] + add qword ptr [rcx + r8 * 8], r9 + jc loop1 + + ret + +done_with_cy: + lea rax, [rax+1] ; rax = 1 + + ret + +addindexed_x64 ENDP + +;---------------------------------------- + + ALIGN 8 + +;---------------------------------------- + +addindexed2_x64 PROC + + ; rcx = p1 (pointer) + ; rdx = b (value size) + ; r8 = nPos + ; r9 = nValue1 + ; [esp+0x28] = nValue2 + + xor rax, rax ; return value + mov r11, rcx ; table + sub rdx, r8 ; rdx = remaining count of uints + mov r10, [esp+028h] ; r10 = nValue2 + + add qword ptr [r11 + r8 * 8], r9 + lea r8, [r8+1] + lea rdx, [rdx-1] + adc qword ptr [r11 + r8 * 8], r10 + jc next + ret + + ALIGN 16 +loop1: + lea r8, [r8+1] + add qword ptr [r11 + r8 * 8], 1 + jc next + ret + +next: + dec rdx ; does not modify CY too... + jnz loop1 + lea rax, [rax+1] + ret + +addindexed2_x64 ENDP + +;---------------------------------------- + + ALIGN 8 + +;---------------------------------------- + +sbb_x64 PROC + + ; rcx = p1 + ; rdx = p2 + ; r8 = nCount + ; r9 = nCarry + + xor rax, rax + xor r11, r11 + sub rax, r9 ; sets CARRY if r9 != 0 + + ALIGN 16 + loop1: + mov rax,qword ptr [rdx + r11 * 8] + sbb qword ptr [rcx + r11 * 8], rax + lea r11, [r11+1] + dec r8 + jnz loop1 + + setc al + movzx rax, al + + ret + +sbb_x64 ENDP + +;---------------------------------------- + + ALIGN 8 + +;---------------------------------------- + +subindexed_x64 PROC + ; rcx = p1 + ; rdx = nSize + ; r8 = nPos + ; r9 = nValue + + sub rdx, r8 ; rdx = remaining count of uints + + ALIGN 16 +loop1: + sub qword ptr [rcx + r8 * 8], r9 + jnc done + + lea r8, [r8+1] + mov r9, 1 + dec rdx + jnz loop1 + jc return_1 ; most of the times, there will be NO carry (I hope) + +done: + xor rax, rax + ret + + return_1: + mov rax, 1 + ret + +subindexed_x64 ENDP + +;---------------------------------------- + + ALIGN 8 + +;---------------------------------------- + +rcl_x64 PROC + ; rcx = p1 + ; rdx = b + ; r8 = nLowestBit + + mov r11, rcx ; table + xor r10, r10 + neg r8 ; CY set if r8 <> 0 + + ALIGN 16 +loop1: + rcl qword ptr [r11 + r10 * 8], 1 + lea r10, [r10+1] + dec rdx + jnz loop1 + + setc al + movzx rax, al + + ret + +rcl_x64 ENDP + +;---------------------------------------- + + ALIGN 8 + +;---------------------------------------- + +rcr_x64 PROC + ; rcx = p1 + ; rdx = nSize + ; r8 = nLowestBit + + xor r10, r10 + neg r8 ; CY set if r8 <> 0 + + ALIGN 16 +loop1: + rcr qword ptr -8[rcx + rdx * 8], 1 + dec rdx + jnz loop1 + + setc al + movzx rax, al + + ret + +rcr_x64 ENDP + +;---------------------------------------- + + ALIGN 8 + +;---------------------------------------- + +div_x64 PROC + + ; rcx = &Hi + ; rdx = &Lo + ; r8 = nDiv + + mov r11, rcx + mov r10, rdx + + mov rdx, qword ptr [r11] + mov rax, qword ptr [r10] + div r8 + mov qword ptr [r10], rdx ; remainder + mov qword ptr [r11], rax ; value + + ret + +div_x64 ENDP + +;---------------------------------------- + + ALIGN 8 + +;---------------------------------------- + +rcl2_x64 PROC + ; rcx = p1 + ; rdx = nSize + ; r8 = bits + ; r9 = c + + push rbx + + mov r10, rcx ; r10 = p1 + xor rax, rax + + mov rcx, 64 + sub rcx, r8 + + mov r11, -1 + shr r11, cl ; r11 = mask + + mov rcx, r8 ; rcx = count of bits + + mov rbx, rax ; rbx = old value = 0 + or r9, r9 + cmovnz rbx, r11 ; if (c) then old value = mask + + mov r9, rax ; r9 = index (0..nSize-1) + + ALIGN 16 +loop1: + rol qword ptr [r10+r9*8], cl + mov rax, qword ptr [r10+r9*8] + and rax, r11 + xor qword ptr [r10+r9*8], rax + or qword ptr [r10+r9*8], rbx + mov rbx, rax + + lea r9, [r9+1] + dec rdx + + jnz loop1 + + and rax, 1 + pop rbx + ret + +rcl2_x64 ENDP + +;---------------------------------------- + + ALIGN 8 + +;---------------------------------------- + +rcr2_x64 PROC + ; rcx = p1 + ; rdx = nSize + ; r8 = bits + ; r9 = c + + push rbx + mov r10, rcx ; r10 = p1 + xor rax, rax + + mov rcx, 64 + sub rcx, r8 + + mov r11, -1 + shl r11, cl ; r11 = mask + + mov rcx, r8 ; rcx = count of bits + + mov rbx, rax ; rbx = old value = 0 + or r9, r9 + cmovnz rbx, r11 ; if (c) then old value = mask + + mov r9, rdx ; r9 = index (0..nSize-1) + lea r9, [r9-1] + + ALIGN 16 +loop1: + ror qword ptr [r10+r9*8], cl + mov rax, qword ptr [r10+r9*8] + and rax, r11 + xor qword ptr [r10+r9*8], rax + or qword ptr [r10+r9*8], rbx + mov rbx, rax + + lea r9, [r9-1] + dec rdx + + jnz loop1 + + rol rax, 1 + and rax, 1 + pop rbx + + ret + +rcr2_x64 ENDP + +END