- fixed a bug in 64 bit ASM for MSVC
git-svn-id: svn://ttmath.org/publicrep/ttmath/branches/chk@181 e52654a7-88a9-db11-a3e9-0013d4bc506e
This commit is contained in:
parent
51b2c974a1
commit
e102086f80
|
@ -3869,47 +3869,66 @@ public:
|
|||
// we should check the mantissas beforehand because sometimes we can have
|
||||
// a mantissa set to zero but in the exponent something another value
|
||||
// (maybe we've forgotten about calling CorrectZero() ?)
|
||||
if( mantissa.IsZero() && ss2.mantissa.IsZero())
|
||||
{
|
||||
return true;
|
||||
}
|
||||
if( mantissa.IsZero())
|
||||
{
|
||||
if (ss2.mantissa.IsZero())
|
||||
return true;
|
||||
return(ss2.AboutEqual(*this,nBitsToIgnore));
|
||||
}
|
||||
|
||||
if( IsSign() != ss2.IsSign() )
|
||||
{
|
||||
return false;
|
||||
}
|
||||
if (ss2.mantissa.IsZero())
|
||||
{
|
||||
return(this->exponent <= uint(2*(-sint(man*TTMATH_BITS_PER_UINT))+nBitsToIgnore));
|
||||
}
|
||||
|
||||
if( exponent==ss2.exponent )
|
||||
{
|
||||
if (mantissa == ss2.mantissa)
|
||||
{
|
||||
return(true);
|
||||
}
|
||||
if( IsSign() != ss2.IsSign() )
|
||||
{
|
||||
// we need to check the difference (both might be around Zero)
|
||||
Big<exp,man> temp(*this);
|
||||
// exponents may not differ much!
|
||||
ttmath::Int<exp> expdiff(this->exponent - ss2.exponent);
|
||||
|
||||
temp.Sub(ss2);
|
||||
// they may differ one if for example mantissa1=0x80000000, mantissa2=0xffffffff
|
||||
if (ttmath::Abs(expdiff) > 1)
|
||||
return(false);
|
||||
|
||||
Int<exp> exponent_diff(exponent - temp.exponent);
|
||||
// calculate the 'difference' mantissa
|
||||
ttmath::UInt<man> man1(this->mantissa);
|
||||
ttmath::UInt<man> man2(ss2.mantissa);
|
||||
ttmath::UInt<man> mandiff;
|
||||
|
||||
return(exponent_diff > man*TTMATH_BITS_PER_UINT-nBitsToIgnore);
|
||||
}
|
||||
switch (expdiff.ToInt())
|
||||
{
|
||||
case +1:
|
||||
man2.Rcr(1,0);
|
||||
mandiff = man1;
|
||||
mandiff.Sub(man2);
|
||||
break;
|
||||
case -1:
|
||||
man1.Rcr(1,0);
|
||||
mandiff = man2;
|
||||
mandiff.Sub(man1);
|
||||
break;
|
||||
case 0:
|
||||
if (man2 > man1)
|
||||
{
|
||||
mandiff = man2;
|
||||
mandiff.Sub(man1);
|
||||
}
|
||||
else
|
||||
{
|
||||
mandiff = man1;
|
||||
mandiff.Sub(man2);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// faster to mask the bits!
|
||||
ASSERT(nBitsToIgnore < TTMATH_BITS_PER_UINT);
|
||||
// faster to mask the bits!
|
||||
ASSERT(nBitsToIgnore < TTMATH_BITS_PER_UINT);
|
||||
|
||||
for (int n = man-1; n > 0; --n)
|
||||
{
|
||||
if (mantissa.table[n] != ss2.mantissa.table[n])
|
||||
return(false);
|
||||
}
|
||||
uint nMask = ~((1 << nBitsToIgnore) - 1);
|
||||
return((mantissa.table[0] & nMask) == (ss2.mantissa.table[0] & nMask));
|
||||
}
|
||||
|
||||
return false;
|
||||
for (int n = man-1; n > 0; --n)
|
||||
{
|
||||
if (mandiff.table[n] != 0)
|
||||
return(false);
|
||||
}
|
||||
uint nMask = ~((1 << nBitsToIgnore) - 1);
|
||||
return((mandiff.table[0] & nMask) == 0);
|
||||
}
|
||||
|
||||
bool operator<(const Big<exp,man> & ss2) const
|
||||
|
|
|
@ -871,6 +871,83 @@ namespace ttmath
|
|||
u3 = sub_res_low_.u_.low;
|
||||
}
|
||||
|
||||
/*!
|
||||
this static method addes one vector to the other
|
||||
'ss1' is larger in size or equal to 'ss2'
|
||||
|
||||
ss1 points to the first (larger) vector
|
||||
ss2 points to the second vector
|
||||
ss1_size - size of the ss1 (and size of the result too)
|
||||
ss2_size - size of the ss2
|
||||
result - is the result vector (which has size the same as ss1: ss1_size)
|
||||
|
||||
Example: ss1_size is 5, ss2_size is 3
|
||||
ss1: ss2: result (output):
|
||||
5 1 5+1
|
||||
4 3 4+3
|
||||
2 7 2+7
|
||||
6 6
|
||||
9 9
|
||||
of course the carry is propagated and will be returned from the last item
|
||||
(this method is used by the Karatsuba multiplication algorithm)
|
||||
*/
|
||||
template<uint value_size>
|
||||
uint UInt<value_size>::AddVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result)
|
||||
{
|
||||
uint i, c = 0;
|
||||
|
||||
TTMATH_ASSERT( ss1_size >= ss2_size )
|
||||
|
||||
for(i=0 ; i<ss2_size ; ++i)
|
||||
c = AddTwoWords(ss1[i], ss2[i], c, &result[i]);
|
||||
|
||||
for( ; i<ss1_size ; ++i)
|
||||
c = AddTwoWords(ss1[i], 0, c, &result[i]);
|
||||
|
||||
TTMATH_LOG("UInt::AddVector")
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
/*!
|
||||
this static method subtractes one vector from the other
|
||||
'ss1' is larger in size or equal to 'ss2'
|
||||
|
||||
ss1 points to the first (larger) vector
|
||||
ss2 points to the second vector
|
||||
ss1_size - size of the ss1 (and size of the result too)
|
||||
ss2_size - size of the ss2
|
||||
result - is the result vector (which has size the same as ss1: ss1_size)
|
||||
|
||||
Example: ss1_size is 5, ss2_size is 3
|
||||
ss1: ss2: result (output):
|
||||
5 1 5-1
|
||||
4 3 4-3
|
||||
2 7 2-7
|
||||
6 6-1 (the borrow from previous item)
|
||||
9 9
|
||||
return (carry): 0
|
||||
of course the carry (borrow) is propagated and will be returned from the last item
|
||||
(this method is used by the Karatsuba multiplication algorithm)
|
||||
*/
|
||||
template<uint value_size>
|
||||
uint UInt<value_size>::SubVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result)
|
||||
{
|
||||
uint i, c = 0;
|
||||
|
||||
TTMATH_ASSERT( ss1_size >= ss2_size )
|
||||
|
||||
for(i=0 ; i<ss2_size ; ++i)
|
||||
c = SubTwoWords(ss1[i], ss2[i], c, &result[i]);
|
||||
|
||||
for( ; i<ss1_size ; ++i)
|
||||
c = SubTwoWords(ss1[i], 0, c, &result[i]);
|
||||
|
||||
TTMATH_LOG("UInt::SubVector")
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
#endif // #ifdef TTMATH_PLATFORM64
|
||||
|
||||
|
||||
|
|
|
@ -42,7 +42,7 @@
|
|||
#ifndef TTMATH_NOASM
|
||||
#ifdef TTMATH_PLATFORM32
|
||||
|
||||
#pragma message("TTMATH_ASM")
|
||||
#pragma message("TTMATH_ASM32")
|
||||
|
||||
/*!
|
||||
\file ttmathuint_x86.h
|
||||
|
|
|
@ -39,10 +39,10 @@
|
|||
#ifndef headerfilettmathuint_x86_64
|
||||
#define headerfilettmathuint_x86_64
|
||||
|
||||
|
||||
#ifndef TTMATH_NOASM
|
||||
#ifdef TTMATH_PLATFORM64
|
||||
|
||||
#pragma message("TTMATH_ASM64")
|
||||
/*!
|
||||
\file ttmathuint_x86_64.h
|
||||
\brief template class UInt<uint> with assembler code for 64bit x86_64 processors
|
||||
|
@ -50,6 +50,9 @@
|
|||
this file is included at the end of ttmathuint.h
|
||||
*/
|
||||
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#include <windows.h>
|
||||
|
||||
|
||||
namespace ttmath
|
||||
{
|
||||
|
@ -244,6 +247,30 @@ namespace ttmath
|
|||
*/
|
||||
template<uint value_size>
|
||||
uint UInt<value_size>::AddTwoInts(uint x2, uint x1, uint index)
|
||||
#if 0
|
||||
{
|
||||
uint i, c;
|
||||
|
||||
TTMATH_ASSERT( index < value_size )
|
||||
|
||||
printf("add %Id + %Id\n",x1,x2);
|
||||
for(int i=index ; i<value_size ; ++i)
|
||||
printf("%d: %Id\n",i,table[i]);
|
||||
|
||||
c = AddTwoWords(table[index], x1, 0, &table[index]);
|
||||
c = AddTwoWords(table[index+1], x2, c, &table[index+1]);
|
||||
|
||||
for(i=index+2 ; i<value_size && c ; ++i)
|
||||
c = AddTwoWords(table[i], 0, c, &table[i]);
|
||||
for(i=index ; i<value_size ; ++i)
|
||||
printf("%d: %Id\n",i,table[i]);
|
||||
printf(" -> %d\n",c);
|
||||
|
||||
TTMATH_LOG("UInt::AddTwoInts")
|
||||
|
||||
return c;
|
||||
}
|
||||
#else
|
||||
{
|
||||
uint b = value_size;
|
||||
uint * p1 = table;
|
||||
|
@ -253,7 +280,14 @@ namespace ttmath
|
|||
|
||||
#ifndef __GNUC__
|
||||
#if defined(_M_X64)
|
||||
c = addindexed2_x64(p1,b,index,x2,x1);
|
||||
//printf("add %Id + %Id\n",x1,x2);
|
||||
//for(int i=index ; i<value_size ; ++i)
|
||||
// printf("%d: %Id\n",i,table[i]);
|
||||
//if (table[0] == 1265784741359897913) DebugBreak();
|
||||
c = addindexed2_x64(p1,b,index,x1,x2);
|
||||
//for(int i=index ; i<value_size ; ++i)
|
||||
// printf("%d: %Id\n",i,table[i]);
|
||||
//printf(" -> %d\n",c);
|
||||
#else
|
||||
#error "another compiler than GCC is currently not supported in 64bit mode"
|
||||
#endif
|
||||
|
@ -289,10 +323,12 @@ namespace ttmath
|
|||
|
||||
#endif
|
||||
|
||||
|
||||
TTMATH_LOG("UInt64::AddTwoInts")
|
||||
|
||||
return c;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -98,8 +98,8 @@ addindexed_x64 ENDP
|
|||
|
||||
addindexed2_x64 PROC
|
||||
|
||||
; rcx = p1
|
||||
; rdx = b
|
||||
; rcx = p1 (pointer)
|
||||
; rdx = b (value size)
|
||||
; r8 = nPos
|
||||
; r9 = nValue1
|
||||
; [esp+0x28] = nValue2
|
||||
|
@ -109,26 +109,23 @@ addindexed2_x64 PROC
|
|||
sub rdx, r8 ; rdx = remaining count of uints
|
||||
mov r10, [esp+028h] ; r10 = nValue2
|
||||
|
||||
add qword ptr [r11 + r8 * 8], r10
|
||||
add qword ptr [r11 + r8 * 8], r9
|
||||
lea r8, [r8+1]
|
||||
lea rdx, [rdx-1]
|
||||
adc qword ptr [r11 + r8 * 8], r10
|
||||
jc next
|
||||
ret
|
||||
|
||||
ALIGN 16
|
||||
loop1:
|
||||
adc qword ptr [r11 + r8 * 8], r9
|
||||
lea r8, [r8+1]
|
||||
add qword ptr [r11 + r8 * 8], 1
|
||||
jc next
|
||||
ret
|
||||
|
||||
next:
|
||||
lea r8, [r8+1]
|
||||
xor r9, r9 ; set to 0 -> cy still set!
|
||||
dec rdx
|
||||
dec rdx ; does not modify CY too...
|
||||
jnz loop1
|
||||
jc return_1 ; most of the times, there will be NO carry (I hope)
|
||||
|
||||
done:
|
||||
ret
|
||||
|
||||
return_1:
|
||||
lea rax, [rax+1]
|
||||
ret
|
||||
|
||||
|
@ -138,8 +135,6 @@ addindexed2_x64 ENDP
|
|||
|
||||
ALIGN 8
|
||||
|
||||
ALIGN 8
|
||||
|
||||
;----------------------------------------
|
||||
|
||||
sbb_x64 PROC
|
||||
|
|
Loading…
Reference in New Issue