merged: x86_64 asm code for Microsoft Visual compiler
file: ttmathuint_x86_64_msvc.asm from chk branch (original was: ttmathuint_x86_amd64_msvc.asm) (this file should be compiled first because MS VC doesn't support inline assembler in x86_64 mode) git-svn-id: svn://ttmath.org/publicrep/ttmath/trunk@187 e52654a7-88a9-db11-a3e9-0013d4bc506e
This commit is contained in:
parent
0d71b0cec2
commit
28964d30f7
|
@ -1,4 +1,4 @@
|
|||
Version 0.9.0 prerelease (2009.09.05):
|
||||
Version 0.9.0 prerelease (2009.09.07):
|
||||
* added: support for wide characters (wchar_t)
|
||||
wide characters are used when macro TTMATH_USE_WCHAR is defined
|
||||
this macro is defined automatically when there is macro UNICODE or _UNICODE defined
|
||||
|
@ -22,6 +22,9 @@ Version 0.9.0 prerelease (2009.09.05):
|
|||
and use TTMATH_MULTITHREADS_HELPER macro somewhere in your *.cpp file
|
||||
* added: Big::AboutEqual(const Big<exp,man> & ss2, int nBitsToIgnore = 4)
|
||||
the last nBitsToIgnore bits from mantissas will be skipped when comparing
|
||||
* added: x86_64 asm code for Microsoft Visual compiler
|
||||
file: ttmathuint_x86_64_msvc.asm
|
||||
(this file should be compiled first because MS VC doesn't support inline assembler in x86_64 mode)
|
||||
* changed: Factorial() is using the Gamma() function now
|
||||
* removed: Parser<>::SetFactorialMax() method
|
||||
the factorial() is such a fast now that we don't need the method longer
|
||||
|
|
|
@ -3916,75 +3916,75 @@ public:
|
|||
}
|
||||
|
||||
|
||||
bool AboutEqual(const Big<exp,man> & ss2, int nBitsToIgnore = 4) const
|
||||
{
|
||||
// we should check the mantissas beforehand because sometimes we can have
|
||||
// a mantissa set to zero but in the exponent something another value
|
||||
// (maybe we've forgotten about calling CorrectZero() ?)
|
||||
if( mantissa.IsZero() )
|
||||
{
|
||||
if( ss2.mantissa.IsZero() )
|
||||
return true;
|
||||
|
||||
return(ss2.AboutEqual(*this,nBitsToIgnore));
|
||||
}
|
||||
|
||||
if( ss2.mantissa.IsZero() )
|
||||
{
|
||||
return(this->exponent <= uint(2*(-sint(man*TTMATH_BITS_PER_UINT))+nBitsToIgnore));
|
||||
}
|
||||
|
||||
// exponents may not differ much!
|
||||
ttmath::Int<exp> expdiff(this->exponent - ss2.exponent);
|
||||
|
||||
// they may differ one if for example mantissa1=0x80000000, mantissa2=0xffffffff
|
||||
if( ttmath::Abs(expdiff) > 1 )
|
||||
return(false);
|
||||
|
||||
// calculate the 'difference' mantissa
|
||||
ttmath::UInt<man> man1(this->mantissa);
|
||||
ttmath::UInt<man> man2(ss2.mantissa);
|
||||
ttmath::UInt<man> mandiff;
|
||||
|
||||
switch( expdiff.ToInt() )
|
||||
{
|
||||
case +1:
|
||||
man2.Rcr(1,0);
|
||||
mandiff = man1;
|
||||
mandiff.Sub(man2);
|
||||
break;
|
||||
case -1:
|
||||
man1.Rcr(1,0);
|
||||
mandiff = man2;
|
||||
mandiff.Sub(man1);
|
||||
break;
|
||||
default:
|
||||
if( man2 > man1 )
|
||||
{
|
||||
mandiff = man2;
|
||||
mandiff.Sub(man1);
|
||||
}
|
||||
else
|
||||
{
|
||||
mandiff = man1;
|
||||
mandiff.Sub(man2);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// faster to mask the bits!
|
||||
TTMATH_ASSERT( nBitsToIgnore < TTMATH_BITS_PER_UINT );
|
||||
|
||||
for( int n = man-1; n > 0; --n )
|
||||
{
|
||||
if( mandiff.table[n] != 0 )
|
||||
return(false);
|
||||
}
|
||||
|
||||
uint nMask = ~((1 << nBitsToIgnore) - 1);
|
||||
|
||||
return((mandiff.table[0] & nMask) == 0);
|
||||
}
|
||||
bool AboutEqual(const Big<exp,man> & ss2, int nBitsToIgnore = 4) const
|
||||
{
|
||||
// we should check the mantissas beforehand because sometimes we can have
|
||||
// a mantissa set to zero but in the exponent something another value
|
||||
// (maybe we've forgotten about calling CorrectZero() ?)
|
||||
if( mantissa.IsZero() )
|
||||
{
|
||||
if( ss2.mantissa.IsZero() )
|
||||
return true;
|
||||
|
||||
return(ss2.AboutEqual(*this,nBitsToIgnore));
|
||||
}
|
||||
|
||||
if( ss2.mantissa.IsZero() )
|
||||
{
|
||||
return(this->exponent <= uint(2*(-sint(man*TTMATH_BITS_PER_UINT))+nBitsToIgnore));
|
||||
}
|
||||
|
||||
// exponents may not differ much!
|
||||
ttmath::Int<exp> expdiff(this->exponent - ss2.exponent);
|
||||
|
||||
// they may differ one if for example mantissa1=0x80000000, mantissa2=0xffffffff
|
||||
if( ttmath::Abs(expdiff) > 1 )
|
||||
return(false);
|
||||
|
||||
// calculate the 'difference' mantissa
|
||||
ttmath::UInt<man> man1(this->mantissa);
|
||||
ttmath::UInt<man> man2(ss2.mantissa);
|
||||
ttmath::UInt<man> mandiff;
|
||||
|
||||
switch( expdiff.ToInt() )
|
||||
{
|
||||
case +1:
|
||||
man2.Rcr(1,0);
|
||||
mandiff = man1;
|
||||
mandiff.Sub(man2);
|
||||
break;
|
||||
case -1:
|
||||
man1.Rcr(1,0);
|
||||
mandiff = man2;
|
||||
mandiff.Sub(man1);
|
||||
break;
|
||||
default:
|
||||
if( man2 > man1 )
|
||||
{
|
||||
mandiff = man2;
|
||||
mandiff.Sub(man1);
|
||||
}
|
||||
else
|
||||
{
|
||||
mandiff = man1;
|
||||
mandiff.Sub(man2);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// faster to mask the bits!
|
||||
TTMATH_ASSERT( nBitsToIgnore < TTMATH_BITS_PER_UINT );
|
||||
|
||||
for( int n = man-1; n > 0; --n )
|
||||
{
|
||||
if( mandiff.table[n] != 0 )
|
||||
return(false);
|
||||
}
|
||||
|
||||
uint nMask = ~((1 << nBitsToIgnore) - 1);
|
||||
|
||||
return((mandiff.table[0] & nMask) == 0);
|
||||
}
|
||||
|
||||
|
||||
bool operator<(const Big<exp,man> & ss2) const
|
||||
|
|
|
@ -162,8 +162,14 @@ namespace ttmath
|
|||
/*!
|
||||
on 64bit platforms one word (uint, sint) will be equal 64bits
|
||||
*/
|
||||
typedef unsigned long uint;
|
||||
typedef signed long sint;
|
||||
#ifdef _MSC_VER
|
||||
/* in VC 'long' type has 32 bits, __int64 is VC extension */
|
||||
typedef unsigned __int64 uint;
|
||||
typedef signed __int64 sint;
|
||||
#else
|
||||
typedef unsigned long uint;
|
||||
typedef signed long sint;
|
||||
#endif
|
||||
|
||||
/*!
|
||||
on 64bit platform we do not define ulint
|
||||
|
|
|
@ -3297,6 +3297,17 @@ public:
|
|||
static uint SetBitInWord(uint & value, uint bit);
|
||||
static void MulTwoWords(uint a, uint b, uint * result_high, uint * result_low);
|
||||
static void DivTwoWords(uint a,uint b, uint c, uint * r, uint * rest);
|
||||
|
||||
|
||||
/* temporarily */
|
||||
#ifndef TTMATH_NOASM
|
||||
#ifdef TTMATH_PLATFORM64
|
||||
#ifdef _MSC_VER
|
||||
static uint AddTwoWords(uint a, uint b, uint carry, uint * result);
|
||||
static uint SubTwoWords(uint a, uint b, uint carry, uint * result);
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -51,10 +51,33 @@
|
|||
this file is included at the end of ttmathuint.h
|
||||
*/
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#include <intrin.h>
|
||||
#endif
|
||||
|
||||
|
||||
namespace ttmath
|
||||
{
|
||||
|
||||
#ifdef _MSC_VER
|
||||
|
||||
extern "C"
|
||||
{
|
||||
uint __fastcall adc_x64(uint* p1, const uint* p2, uint nSize, uint c);
|
||||
uint __fastcall addindexed_x64(uint* p1, uint nSize, uint nPos, uint nValue);
|
||||
uint __fastcall addindexed2_x64(uint* p1, uint nSize, uint nPos, uint nValue1, uint nValue2);
|
||||
uint __fastcall sbb_x64(uint* p1, const uint* p2, uint nSize, uint c);
|
||||
uint __fastcall subindexed_x64(uint* p1, uint nSize, uint nPos, uint nValue);
|
||||
uint __fastcall rcl_x64(uint* p1, uint nSize, uint nLowestBit);
|
||||
uint __fastcall rcr_x64(uint* p1, uint nSize, uint nLowestBit);
|
||||
uint __fastcall div_x64(uint* pnValHi, uint* pnValLo, uint nDiv);
|
||||
uint __fastcall rcl2_x64(uint* p1, uint nSize, uint nBits, uint c);
|
||||
uint __fastcall rcr2_x64(uint* p1, uint nSize, uint nBits, uint c);
|
||||
};
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
*
|
||||
* basic mathematic functions
|
||||
|
@ -82,8 +105,12 @@ namespace ttmath
|
|||
// we don't have to use TTMATH_REFERENCE_ASSERT here
|
||||
// this algorithm doesn't require it
|
||||
|
||||
#ifndef __GNUC__
|
||||
#error "another compiler than GCC is currently not supported in 64bit mode"
|
||||
#if !defined(__GNUC__) && !defined(_MSC_VER)
|
||||
#error "another compiler than GCC or Microsoft VC is currently not supported in 64bit mode, you can compile with TTMATH_NOASM macro"
|
||||
#endif
|
||||
|
||||
#ifdef _MSC_VER
|
||||
c = adc_x64(p1,p2,b,c);
|
||||
#endif
|
||||
|
||||
#ifdef __GNUC__
|
||||
|
@ -149,10 +176,16 @@ namespace ttmath
|
|||
|
||||
TTMATH_ASSERT( index < value_size )
|
||||
|
||||
#ifndef __GNUC__
|
||||
#error "another compiler than GCC is currently not supported in 64bit mode"
|
||||
#if !defined(__GNUC__) && !defined(_MSC_VER)
|
||||
#error "another compiler than GCC or Microsoft VC is currently not supported in 64bit mode, you can compile with TTMATH_NOASM macro"
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef _MSC_VER
|
||||
c = addindexed_x64(p1,b,index,value);
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef __GNUC__
|
||||
uint dummy, dummy2;
|
||||
|
||||
|
@ -227,10 +260,16 @@ namespace ttmath
|
|||
|
||||
TTMATH_ASSERT( index < value_size - 1 )
|
||||
|
||||
#ifndef __GNUC__
|
||||
#error "another compiler than GCC is currently not supported in 64bit mode"
|
||||
#if !defined(__GNUC__) && !defined(_MSC_VER)
|
||||
#error "another compiler than GCC or Microsoft VC is currently not supported in 64bit mode, you can compile with TTMATH_NOASM macro"
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef _MSC_VER
|
||||
c = addindexed2_x64(p1,b,index,x1,x2);
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef __GNUC__
|
||||
uint dummy, dummy2;
|
||||
|
||||
|
@ -288,6 +327,9 @@ namespace ttmath
|
|||
of course the carry is propagated and will be returned from the last item
|
||||
(this method is used by the Karatsuba multiplication algorithm)
|
||||
*/
|
||||
|
||||
#ifndef _MSC_VER
|
||||
|
||||
template<uint value_size>
|
||||
uint UInt<value_size>::AddVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result)
|
||||
{
|
||||
|
@ -296,10 +338,16 @@ namespace ttmath
|
|||
uint rest = ss1_size - ss2_size;
|
||||
uint c;
|
||||
|
||||
#ifndef __GNUC__
|
||||
#error "another compiler than GCC is currently not supported in 64bit mode"
|
||||
#if !defined(__GNUC__) && !defined(_MSC_VER)
|
||||
#error "another compiler than GCC or Microsoft VC is currently not supported in 64bit mode, you can compile with TTMATH_NOASM macro"
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef _MSC_VER
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef __GNUC__
|
||||
uint dummy1, dummy2, dummy3;
|
||||
|
||||
|
@ -348,8 +396,27 @@ namespace ttmath
|
|||
return c;
|
||||
}
|
||||
|
||||
#else
|
||||
/* temporarily */
|
||||
template<uint value_size>
|
||||
uint UInt<value_size>::AddVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result)
|
||||
{
|
||||
uint i, c = 0;
|
||||
|
||||
TTMATH_ASSERT( ss1_size >= ss2_size )
|
||||
|
||||
for(i=0 ; i<ss2_size ; ++i)
|
||||
c = AddTwoWords(ss1[i], ss2[i], c, &result[i]);
|
||||
|
||||
for( ; i<ss1_size ; ++i)
|
||||
c = AddTwoWords(ss1[i], 0, c, &result[i]);
|
||||
|
||||
TTMATH_LOG("UInt::AddVector")
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/*!
|
||||
|
@ -373,10 +440,16 @@ namespace ttmath
|
|||
// we don't have to use TTMATH_REFERENCE_ASSERT here
|
||||
// this algorithm doesn't require it
|
||||
|
||||
#ifndef __GNUC__
|
||||
#error "another compiler than GCC is currently not supported in 64bit mode"
|
||||
#if !defined(__GNUC__) && !defined(_MSC_VER)
|
||||
#error "another compiler than GCC or Microsoft VC is currently not supported in 64bit mode, you can compile with TTMATH_NOASM macro"
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef _MSC_VER
|
||||
c = sbb_x64(p1,p2,b,c);
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef __GNUC__
|
||||
uint dummy, dummy2;
|
||||
|
||||
|
@ -432,15 +505,22 @@ namespace ttmath
|
|||
uint b = value_size;
|
||||
uint * p1 = table;
|
||||
uint c;
|
||||
uint dummy, dummy2;
|
||||
|
||||
TTMATH_ASSERT( index < value_size )
|
||||
|
||||
#ifndef __GNUC__
|
||||
#error "another compiler than GCC is currently not supported in 64bit mode"
|
||||
#if !defined(__GNUC__) && !defined(_MSC_VER)
|
||||
#error "another compiler than GCC or Microsoft VC is currently not supported in 64bit mode, you can compile with TTMATH_NOASM macro"
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef _MSC_VER
|
||||
c = subindexed_x64(p1,b,index,value);
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef __GNUC__
|
||||
uint dummy, dummy2;
|
||||
|
||||
__asm__ __volatile__(
|
||||
|
||||
"subq %%rdx, %%rcx \n"
|
||||
|
@ -493,6 +573,9 @@ namespace ttmath
|
|||
of course the carry (borrow) is propagated and will be returned from the last item
|
||||
(this method is used by the Karatsuba multiplication algorithm)
|
||||
*/
|
||||
|
||||
#ifndef _MSC_VER
|
||||
|
||||
template<uint value_size>
|
||||
uint UInt<value_size>::SubVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result)
|
||||
{
|
||||
|
@ -501,16 +584,22 @@ namespace ttmath
|
|||
uint rest = ss1_size - ss2_size;
|
||||
uint c;
|
||||
|
||||
#ifndef __GNUC__
|
||||
#error "another compiler than GCC is currently not supported in 64bit mode"
|
||||
#if !defined(__GNUC__) && !defined(_MSC_VER)
|
||||
#error "another compiler than GCC or Microsoft VC is currently not supported in 64bit mode, you can compile with TTMATH_NOASM macro"
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef _MSC_VER
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef __GNUC__
|
||||
|
||||
/*
|
||||
the asm code is nearly the same as in AddVector
|
||||
only two instructions 'adc' are changed to 'sbb'
|
||||
*/
|
||||
|
||||
// the asm code is nearly the same as in AddVector
|
||||
// only two instructions 'adc' are changed to 'sbb'
|
||||
|
||||
uint dummy1, dummy2, dummy3;
|
||||
|
||||
__asm__ __volatile__(
|
||||
|
@ -556,6 +645,27 @@ namespace ttmath
|
|||
return c;
|
||||
}
|
||||
|
||||
#else
|
||||
/* temporarily */
|
||||
template<uint value_size>
|
||||
uint UInt<value_size>::SubVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result)
|
||||
{
|
||||
uint i, c = 0;
|
||||
|
||||
TTMATH_ASSERT( ss1_size >= ss2_size )
|
||||
|
||||
for(i=0 ; i<ss2_size ; ++i)
|
||||
c = SubTwoWords(ss1[i], ss2[i], c, &result[i]);
|
||||
|
||||
for( ; i<ss1_size ; ++i)
|
||||
c = SubTwoWords(ss1[i], 0, c, &result[i]);
|
||||
|
||||
TTMATH_LOG("UInt::SubVector")
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/*!
|
||||
|
@ -579,10 +689,16 @@ namespace ttmath
|
|||
uint * p1 = table;
|
||||
|
||||
|
||||
#ifndef __GNUC__
|
||||
#error "another compiler than GCC is currently not supported in 64bit mode"
|
||||
#if !defined(__GNUC__) && !defined(_MSC_VER)
|
||||
#error "another compiler than GCC or Microsoft VC is currently not supported in 64bit mode, you can compile with TTMATH_NOASM macro"
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef _MSC_VER
|
||||
c = rcl_x64(p1,b,c);
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef __GNUC__
|
||||
uint dummy, dummy2;
|
||||
|
||||
|
@ -633,10 +749,16 @@ namespace ttmath
|
|||
uint * p1 = table;
|
||||
|
||||
|
||||
#ifndef __GNUC__
|
||||
#error "another compiler than GCC is currently not supported in 64bit mode"
|
||||
#if !defined(__GNUC__) && !defined(_MSC_VER)
|
||||
#error "another compiler than GCC or Microsoft VC is currently not supported in 64bit mode, you can compile with TTMATH_NOASM macro"
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef _MSC_VER
|
||||
c = rcr_x64(p1,b,c);
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef __GNUC__
|
||||
uint dummy;
|
||||
|
||||
|
@ -688,10 +810,16 @@ namespace ttmath
|
|||
uint * p1 = table;
|
||||
|
||||
|
||||
#ifndef __GNUC__
|
||||
#error "another compiler than GCC is currently not supported in 64bit mode"
|
||||
#if !defined(__GNUC__) && !defined(_MSC_VER)
|
||||
#error "another compiler than GCC or Microsoft VC is currently not supported in 64bit mode, you can compile with TTMATH_NOASM macro"
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef _MSC_VER
|
||||
c = rcl2_x64(p1,b,bits,c);
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef __GNUC__
|
||||
uint dummy, dummy2, dummy3;
|
||||
|
||||
|
@ -758,14 +886,20 @@ namespace ttmath
|
|||
|
||||
sint b = value_size;
|
||||
uint * p1 = table;
|
||||
uint dummy, dummy2, dummy3;
|
||||
|
||||
#ifndef __GNUC__
|
||||
#error "another compiler than GCC is currently not supported in 64bit mode"
|
||||
#if !defined(__GNUC__) && !defined(_MSC_VER)
|
||||
#error "another compiler than GCC or Microsoft VC is currently not supported in 64bit mode, you can compile with TTMATH_NOASM macro"
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef _MSC_VER
|
||||
c = rcr2_x64(p1,b,bits,c);
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef __GNUC__
|
||||
uint dummy, dummy2, dummy3;
|
||||
|
||||
__asm__ __volatile__(
|
||||
|
||||
"movq %%rcx, %%rsi \n"
|
||||
|
@ -823,10 +957,23 @@ namespace ttmath
|
|||
sint result;
|
||||
|
||||
|
||||
#ifndef __GNUC__
|
||||
#error "another compiler than GCC is currently not supported in 64bit mode"
|
||||
#if !defined(__GNUC__) && !defined(_MSC_VER)
|
||||
#error "another compiler than GCC or Microsoft VC is currently not supported in 64bit mode, you can compile with TTMATH_NOASM macro"
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef _MSC_VER
|
||||
|
||||
unsigned long nIndex = 0;
|
||||
|
||||
if( _BitScanReverse64(&nIndex,x) == 0 )
|
||||
result = -1;
|
||||
else
|
||||
result = nIndex;
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef __GNUC__
|
||||
uint dummy;
|
||||
|
||||
|
@ -868,11 +1015,16 @@ namespace ttmath
|
|||
uint old_bit;
|
||||
uint v = value;
|
||||
|
||||
|
||||
#ifndef __GNUC__
|
||||
#error "another compiler than GCC is currently not supported in 64bit mode"
|
||||
#if !defined(__GNUC__) && !defined(_MSC_VER)
|
||||
#error "another compiler than GCC or Microsoft VC is currently not supported in 64bit mode, you can compile with TTMATH_NOASM macro"
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef _MSC_VER
|
||||
old_bit = _bittestandset64((__int64*)&value,bit) != 0;
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef __GNUC__
|
||||
|
||||
__asm__ (
|
||||
|
@ -924,10 +1076,16 @@ namespace ttmath
|
|||
uint result1_;
|
||||
uint result2_;
|
||||
|
||||
#ifndef __GNUC__
|
||||
#error "another compiler than GCC is currently not supported in 64bit mode"
|
||||
#if !defined(__GNUC__) && !defined(_MSC_VER)
|
||||
#error "another compiler than GCC or Microsoft VC is currently not supported in 64bit mode, you can compile with TTMATH_NOASM macro"
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef _MSC_VER
|
||||
result1_ = _umul128(a,b,&result2_);
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef __GNUC__
|
||||
|
||||
__asm__ (
|
||||
|
@ -981,10 +1139,20 @@ namespace ttmath
|
|||
|
||||
TTMATH_ASSERT( c != 0 )
|
||||
|
||||
#ifndef __GNUC__
|
||||
#error "another compiler than GCC is currently not supported in 64bit mode"
|
||||
#if !defined(__GNUC__) && !defined(_MSC_VER)
|
||||
#error "another compiler than GCC or Microsoft VC is currently not supported in 64bit mode, you can compile with TTMATH_NOASM macro"
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef _MSC_VER
|
||||
|
||||
div_x64(&a,&b,c);
|
||||
r_ = a;
|
||||
rest_ = b;
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef __GNUC__
|
||||
|
||||
__asm__ (
|
||||
|
@ -1003,6 +1171,59 @@ namespace ttmath
|
|||
}
|
||||
|
||||
|
||||
/* temporarily */
|
||||
template<uint value_size>
|
||||
uint UInt<value_size>::AddTwoWords(uint a, uint b, uint carry, uint * result)
|
||||
{
|
||||
uint temp;
|
||||
|
||||
if( carry == 0 )
|
||||
{
|
||||
temp = a + b;
|
||||
|
||||
if( temp < a )
|
||||
carry = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
carry = 1;
|
||||
temp = a + b + carry;
|
||||
|
||||
if( temp > a ) // !(temp<=a)
|
||||
carry = 0;
|
||||
}
|
||||
|
||||
*result = temp;
|
||||
|
||||
return carry;
|
||||
}
|
||||
|
||||
|
||||
/* temporarily */
|
||||
template<uint value_size>
|
||||
uint UInt<value_size>::SubTwoWords(uint a, uint b, uint carry, uint * result)
|
||||
{
|
||||
if( carry == 0 )
|
||||
{
|
||||
*result = a - b;
|
||||
|
||||
if( a < b )
|
||||
carry = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
carry = 1;
|
||||
*result = a - b - carry;
|
||||
|
||||
if( a > b ) // !(a <= b )
|
||||
carry = 0;
|
||||
}
|
||||
|
||||
return carry;
|
||||
}
|
||||
|
||||
|
||||
|
||||
} //namespace
|
||||
|
||||
|
||||
|
|
|
@ -0,0 +1,430 @@
|
|||
;
|
||||
; This file is a part of TTMath Bignum Library
|
||||
; and is distributed under the (new) BSD licence.
|
||||
; Author: Christian Kaiser <>
|
||||
;
|
||||
|
||||
;
|
||||
; Copyright (c) 2009, Christian Kaiser
|
||||
; All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions are met:
|
||||
;
|
||||
; * Redistributions of source code must retain the above copyright notice,
|
||||
; this list of conditions and the following disclaimer.
|
||||
;
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in the
|
||||
; documentation and/or other materials provided with the distribution.
|
||||
;
|
||||
; * Neither the name Tomasz Sowa nor the names of contributors to this
|
||||
; project may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
; AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
; ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
; LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
; CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
; SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
; CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
; ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
||||
; THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;
|
||||
|
||||
;
|
||||
; compile with debug info: ml64.exe /Zd /Zi ttmathuint_x86_64_msvc.asm
|
||||
; compile without debug info: ml64.exe ttmathuint_x86_64_msvc.asm
|
||||
; this create ttmathuint_x86_64_msvc.obj file which can be linked with your program
|
||||
;
|
||||
|
||||
PUBLIC adc_x64
|
||||
PUBLIC addindexed_x64
|
||||
PUBLIC addindexed2_x64
|
||||
|
||||
PUBLIC sbb_x64
|
||||
PUBLIC subindexed_x64
|
||||
|
||||
PUBLIC rcl_x64
|
||||
PUBLIC rcr_x64
|
||||
|
||||
PUBLIC rcl2_x64
|
||||
PUBLIC rcr2_x64
|
||||
|
||||
PUBLIC div_x64
|
||||
|
||||
;
|
||||
; "rax, rcx, rdx, r8-r11 are volatile."
|
||||
; "rbx, rbp, rdi, rsi, r12-r15 are nonvolatile."
|
||||
;
|
||||
|
||||
|
||||
.CODE
|
||||
|
||||
ALIGN 8
|
||||
|
||||
;----------------------------------------
|
||||
|
||||
adc_x64 PROC
|
||||
; rcx = p1
|
||||
; rdx = p2
|
||||
; r8 = nSize
|
||||
; r9 = nCarry
|
||||
|
||||
xor rax, rax
|
||||
xor r11, r11
|
||||
sub rax, r9 ; sets CARRY if r9 != 0
|
||||
|
||||
ALIGN 16
|
||||
loop1:
|
||||
mov rax,qword ptr [rdx + r11 * 8]
|
||||
adc qword ptr [rcx + r11 * 8], rax
|
||||
lea r11, [r11+1]
|
||||
dec r8
|
||||
jnz loop1
|
||||
|
||||
setc al
|
||||
movzx rax, al
|
||||
|
||||
ret
|
||||
|
||||
adc_x64 ENDP
|
||||
|
||||
;----------------------------------------
|
||||
|
||||
ALIGN 8
|
||||
|
||||
;----------------------------------------
|
||||
|
||||
addindexed_x64 PROC
|
||||
|
||||
; rcx = p1
|
||||
; rdx = nSize
|
||||
; r8 = nPos
|
||||
; r9 = nValue
|
||||
|
||||
xor rax, rax ; rax = result
|
||||
sub rdx, r8 ; rdx = remaining count of uints
|
||||
|
||||
add qword ptr [rcx + r8 * 8], r9
|
||||
jc next1
|
||||
|
||||
ret
|
||||
|
||||
next1:
|
||||
mov r9, 1
|
||||
|
||||
ALIGN 16
|
||||
loop1:
|
||||
dec rdx
|
||||
jz done_with_cy
|
||||
lea r8, [r8+1]
|
||||
add qword ptr [rcx + r8 * 8], r9
|
||||
jc loop1
|
||||
|
||||
ret
|
||||
|
||||
done_with_cy:
|
||||
lea rax, [rax+1] ; rax = 1
|
||||
|
||||
ret
|
||||
|
||||
addindexed_x64 ENDP
|
||||
|
||||
;----------------------------------------
|
||||
|
||||
ALIGN 8
|
||||
|
||||
;----------------------------------------
|
||||
|
||||
addindexed2_x64 PROC
|
||||
|
||||
; rcx = p1 (pointer)
|
||||
; rdx = b (value size)
|
||||
; r8 = nPos
|
||||
; r9 = nValue1
|
||||
; [esp+0x28] = nValue2
|
||||
|
||||
xor rax, rax ; return value
|
||||
mov r11, rcx ; table
|
||||
sub rdx, r8 ; rdx = remaining count of uints
|
||||
mov r10, [esp+028h] ; r10 = nValue2
|
||||
|
||||
add qword ptr [r11 + r8 * 8], r9
|
||||
lea r8, [r8+1]
|
||||
lea rdx, [rdx-1]
|
||||
adc qword ptr [r11 + r8 * 8], r10
|
||||
jc next
|
||||
ret
|
||||
|
||||
ALIGN 16
|
||||
loop1:
|
||||
lea r8, [r8+1]
|
||||
add qword ptr [r11 + r8 * 8], 1
|
||||
jc next
|
||||
ret
|
||||
|
||||
next:
|
||||
dec rdx ; does not modify CY too...
|
||||
jnz loop1
|
||||
lea rax, [rax+1]
|
||||
ret
|
||||
|
||||
addindexed2_x64 ENDP
|
||||
|
||||
;----------------------------------------
|
||||
|
||||
ALIGN 8
|
||||
|
||||
;----------------------------------------
|
||||
|
||||
sbb_x64 PROC
|
||||
|
||||
; rcx = p1
|
||||
; rdx = p2
|
||||
; r8 = nCount
|
||||
; r9 = nCarry
|
||||
|
||||
xor rax, rax
|
||||
xor r11, r11
|
||||
sub rax, r9 ; sets CARRY if r9 != 0
|
||||
|
||||
ALIGN 16
|
||||
loop1:
|
||||
mov rax,qword ptr [rdx + r11 * 8]
|
||||
sbb qword ptr [rcx + r11 * 8], rax
|
||||
lea r11, [r11+1]
|
||||
dec r8
|
||||
jnz loop1
|
||||
|
||||
setc al
|
||||
movzx rax, al
|
||||
|
||||
ret
|
||||
|
||||
sbb_x64 ENDP
|
||||
|
||||
;----------------------------------------
|
||||
|
||||
ALIGN 8
|
||||
|
||||
;----------------------------------------
|
||||
|
||||
subindexed_x64 PROC
|
||||
; rcx = p1
|
||||
; rdx = nSize
|
||||
; r8 = nPos
|
||||
; r9 = nValue
|
||||
|
||||
sub rdx, r8 ; rdx = remaining count of uints
|
||||
|
||||
ALIGN 16
|
||||
loop1:
|
||||
sub qword ptr [rcx + r8 * 8], r9
|
||||
jnc done
|
||||
|
||||
lea r8, [r8+1]
|
||||
mov r9, 1
|
||||
dec rdx
|
||||
jnz loop1
|
||||
jc return_1 ; most of the times, there will be NO carry (I hope)
|
||||
|
||||
done:
|
||||
xor rax, rax
|
||||
ret
|
||||
|
||||
return_1:
|
||||
mov rax, 1
|
||||
ret
|
||||
|
||||
subindexed_x64 ENDP
|
||||
|
||||
;----------------------------------------
|
||||
|
||||
ALIGN 8
|
||||
|
||||
;----------------------------------------
|
||||
|
||||
rcl_x64 PROC
|
||||
; rcx = p1
|
||||
; rdx = b
|
||||
; r8 = nLowestBit
|
||||
|
||||
mov r11, rcx ; table
|
||||
xor r10, r10
|
||||
neg r8 ; CY set if r8 <> 0
|
||||
|
||||
ALIGN 16
|
||||
loop1:
|
||||
rcl qword ptr [r11 + r10 * 8], 1
|
||||
lea r10, [r10+1]
|
||||
dec rdx
|
||||
jnz loop1
|
||||
|
||||
setc al
|
||||
movzx rax, al
|
||||
|
||||
ret
|
||||
|
||||
rcl_x64 ENDP
|
||||
|
||||
;----------------------------------------
|
||||
|
||||
ALIGN 8
|
||||
|
||||
;----------------------------------------
|
||||
|
||||
rcr_x64 PROC
|
||||
; rcx = p1
|
||||
; rdx = nSize
|
||||
; r8 = nLowestBit
|
||||
|
||||
xor r10, r10
|
||||
neg r8 ; CY set if r8 <> 0
|
||||
|
||||
ALIGN 16
|
||||
loop1:
|
||||
rcr qword ptr -8[rcx + rdx * 8], 1
|
||||
dec rdx
|
||||
jnz loop1
|
||||
|
||||
setc al
|
||||
movzx rax, al
|
||||
|
||||
ret
|
||||
|
||||
rcr_x64 ENDP
|
||||
|
||||
;----------------------------------------
|
||||
|
||||
ALIGN 8
|
||||
|
||||
;----------------------------------------
|
||||
|
||||
div_x64 PROC
|
||||
|
||||
; rcx = &Hi
|
||||
; rdx = &Lo
|
||||
; r8 = nDiv
|
||||
|
||||
mov r11, rcx
|
||||
mov r10, rdx
|
||||
|
||||
mov rdx, qword ptr [r11]
|
||||
mov rax, qword ptr [r10]
|
||||
div r8
|
||||
mov qword ptr [r10], rdx ; remainder
|
||||
mov qword ptr [r11], rax ; value
|
||||
|
||||
ret
|
||||
|
||||
div_x64 ENDP
|
||||
|
||||
;----------------------------------------
|
||||
|
||||
ALIGN 8
|
||||
|
||||
;----------------------------------------
|
||||
|
||||
rcl2_x64 PROC
|
||||
; rcx = p1
|
||||
; rdx = nSize
|
||||
; r8 = bits
|
||||
; r9 = c
|
||||
|
||||
push rbx
|
||||
|
||||
mov r10, rcx ; r10 = p1
|
||||
xor rax, rax
|
||||
|
||||
mov rcx, 64
|
||||
sub rcx, r8
|
||||
|
||||
mov r11, -1
|
||||
shr r11, cl ; r11 = mask
|
||||
|
||||
mov rcx, r8 ; rcx = count of bits
|
||||
|
||||
mov rbx, rax ; rbx = old value = 0
|
||||
or r9, r9
|
||||
cmovnz rbx, r11 ; if (c) then old value = mask
|
||||
|
||||
mov r9, rax ; r9 = index (0..nSize-1)
|
||||
|
||||
ALIGN 16
|
||||
loop1:
|
||||
rol qword ptr [r10+r9*8], cl
|
||||
mov rax, qword ptr [r10+r9*8]
|
||||
and rax, r11
|
||||
xor qword ptr [r10+r9*8], rax
|
||||
or qword ptr [r10+r9*8], rbx
|
||||
mov rbx, rax
|
||||
|
||||
lea r9, [r9+1]
|
||||
dec rdx
|
||||
|
||||
jnz loop1
|
||||
|
||||
and rax, 1
|
||||
pop rbx
|
||||
ret
|
||||
|
||||
rcl2_x64 ENDP
|
||||
|
||||
;----------------------------------------
|
||||
|
||||
ALIGN 8
|
||||
|
||||
;----------------------------------------
|
||||
|
||||
rcr2_x64 PROC
|
||||
; rcx = p1
|
||||
; rdx = nSize
|
||||
; r8 = bits
|
||||
; r9 = c
|
||||
|
||||
push rbx
|
||||
mov r10, rcx ; r10 = p1
|
||||
xor rax, rax
|
||||
|
||||
mov rcx, 64
|
||||
sub rcx, r8
|
||||
|
||||
mov r11, -1
|
||||
shl r11, cl ; r11 = mask
|
||||
|
||||
mov rcx, r8 ; rcx = count of bits
|
||||
|
||||
mov rbx, rax ; rbx = old value = 0
|
||||
or r9, r9
|
||||
cmovnz rbx, r11 ; if (c) then old value = mask
|
||||
|
||||
mov r9, rdx ; r9 = index (0..nSize-1)
|
||||
lea r9, [r9-1]
|
||||
|
||||
ALIGN 16
|
||||
loop1:
|
||||
ror qword ptr [r10+r9*8], cl
|
||||
mov rax, qword ptr [r10+r9*8]
|
||||
and rax, r11
|
||||
xor qword ptr [r10+r9*8], rax
|
||||
or qword ptr [r10+r9*8], rbx
|
||||
mov rbx, rax
|
||||
|
||||
lea r9, [r9-1]
|
||||
dec rdx
|
||||
|
||||
jnz loop1
|
||||
|
||||
rol rax, 1
|
||||
and rax, 1
|
||||
pop rbx
|
||||
|
||||
ret
|
||||
|
||||
rcr2_x64 ENDP
|
||||
|
||||
END
|
Loading…
Reference in New Issue