- fixed a bug in 64 bit ASM for MSVC
git-svn-id: svn://ttmath.org/publicrep/ttmath/branches/chk@181 e52654a7-88a9-db11-a3e9-0013d4bc506e
This commit is contained in:
parent
51b2c974a1
commit
e102086f80
|
@ -3869,47 +3869,66 @@ public:
|
||||||
// we should check the mantissas beforehand because sometimes we can have
|
// we should check the mantissas beforehand because sometimes we can have
|
||||||
// a mantissa set to zero but in the exponent something another value
|
// a mantissa set to zero but in the exponent something another value
|
||||||
// (maybe we've forgotten about calling CorrectZero() ?)
|
// (maybe we've forgotten about calling CorrectZero() ?)
|
||||||
if( mantissa.IsZero() && ss2.mantissa.IsZero())
|
if( mantissa.IsZero())
|
||||||
{
|
{
|
||||||
return true;
|
if (ss2.mantissa.IsZero())
|
||||||
}
|
return true;
|
||||||
|
return(ss2.AboutEqual(*this,nBitsToIgnore));
|
||||||
if( IsSign() != ss2.IsSign() )
|
}
|
||||||
{
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if( exponent==ss2.exponent )
|
if (ss2.mantissa.IsZero())
|
||||||
{
|
{
|
||||||
if (mantissa == ss2.mantissa)
|
return(this->exponent <= uint(2*(-sint(man*TTMATH_BITS_PER_UINT))+nBitsToIgnore));
|
||||||
{
|
}
|
||||||
return(true);
|
|
||||||
}
|
// exponents may not differ much!
|
||||||
if( IsSign() != ss2.IsSign() )
|
ttmath::Int<exp> expdiff(this->exponent - ss2.exponent);
|
||||||
{
|
|
||||||
// we need to check the difference (both might be around Zero)
|
// they may differ one if for example mantissa1=0x80000000, mantissa2=0xffffffff
|
||||||
Big<exp,man> temp(*this);
|
if (ttmath::Abs(expdiff) > 1)
|
||||||
|
return(false);
|
||||||
temp.Sub(ss2);
|
|
||||||
|
|
||||||
Int<exp> exponent_diff(exponent - temp.exponent);
|
// calculate the 'difference' mantissa
|
||||||
|
ttmath::UInt<man> man1(this->mantissa);
|
||||||
return(exponent_diff > man*TTMATH_BITS_PER_UINT-nBitsToIgnore);
|
ttmath::UInt<man> man2(ss2.mantissa);
|
||||||
}
|
ttmath::UInt<man> mandiff;
|
||||||
|
|
||||||
// faster to mask the bits!
|
switch (expdiff.ToInt())
|
||||||
ASSERT(nBitsToIgnore < TTMATH_BITS_PER_UINT);
|
{
|
||||||
|
case +1:
|
||||||
|
man2.Rcr(1,0);
|
||||||
|
mandiff = man1;
|
||||||
|
mandiff.Sub(man2);
|
||||||
|
break;
|
||||||
|
case -1:
|
||||||
|
man1.Rcr(1,0);
|
||||||
|
mandiff = man2;
|
||||||
|
mandiff.Sub(man1);
|
||||||
|
break;
|
||||||
|
case 0:
|
||||||
|
if (man2 > man1)
|
||||||
|
{
|
||||||
|
mandiff = man2;
|
||||||
|
mandiff.Sub(man1);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
mandiff = man1;
|
||||||
|
mandiff.Sub(man2);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// faster to mask the bits!
|
||||||
|
ASSERT(nBitsToIgnore < TTMATH_BITS_PER_UINT);
|
||||||
|
|
||||||
for (int n = man-1; n > 0; --n)
|
for (int n = man-1; n > 0; --n)
|
||||||
{
|
{
|
||||||
if (mantissa.table[n] != ss2.mantissa.table[n])
|
if (mandiff.table[n] != 0)
|
||||||
return(false);
|
return(false);
|
||||||
}
|
}
|
||||||
uint nMask = ~((1 << nBitsToIgnore) - 1);
|
uint nMask = ~((1 << nBitsToIgnore) - 1);
|
||||||
return((mantissa.table[0] & nMask) == (ss2.mantissa.table[0] & nMask));
|
return((mandiff.table[0] & nMask) == 0);
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool operator<(const Big<exp,man> & ss2) const
|
bool operator<(const Big<exp,man> & ss2) const
|
||||||
|
|
|
@ -871,6 +871,83 @@ namespace ttmath
|
||||||
u3 = sub_res_low_.u_.low;
|
u3 = sub_res_low_.u_.low;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*!
|
||||||
|
this static method addes one vector to the other
|
||||||
|
'ss1' is larger in size or equal to 'ss2'
|
||||||
|
|
||||||
|
ss1 points to the first (larger) vector
|
||||||
|
ss2 points to the second vector
|
||||||
|
ss1_size - size of the ss1 (and size of the result too)
|
||||||
|
ss2_size - size of the ss2
|
||||||
|
result - is the result vector (which has size the same as ss1: ss1_size)
|
||||||
|
|
||||||
|
Example: ss1_size is 5, ss2_size is 3
|
||||||
|
ss1: ss2: result (output):
|
||||||
|
5 1 5+1
|
||||||
|
4 3 4+3
|
||||||
|
2 7 2+7
|
||||||
|
6 6
|
||||||
|
9 9
|
||||||
|
of course the carry is propagated and will be returned from the last item
|
||||||
|
(this method is used by the Karatsuba multiplication algorithm)
|
||||||
|
*/
|
||||||
|
template<uint value_size>
|
||||||
|
uint UInt<value_size>::AddVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result)
|
||||||
|
{
|
||||||
|
uint i, c = 0;
|
||||||
|
|
||||||
|
TTMATH_ASSERT( ss1_size >= ss2_size )
|
||||||
|
|
||||||
|
for(i=0 ; i<ss2_size ; ++i)
|
||||||
|
c = AddTwoWords(ss1[i], ss2[i], c, &result[i]);
|
||||||
|
|
||||||
|
for( ; i<ss1_size ; ++i)
|
||||||
|
c = AddTwoWords(ss1[i], 0, c, &result[i]);
|
||||||
|
|
||||||
|
TTMATH_LOG("UInt::AddVector")
|
||||||
|
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*!
|
||||||
|
this static method subtractes one vector from the other
|
||||||
|
'ss1' is larger in size or equal to 'ss2'
|
||||||
|
|
||||||
|
ss1 points to the first (larger) vector
|
||||||
|
ss2 points to the second vector
|
||||||
|
ss1_size - size of the ss1 (and size of the result too)
|
||||||
|
ss2_size - size of the ss2
|
||||||
|
result - is the result vector (which has size the same as ss1: ss1_size)
|
||||||
|
|
||||||
|
Example: ss1_size is 5, ss2_size is 3
|
||||||
|
ss1: ss2: result (output):
|
||||||
|
5 1 5-1
|
||||||
|
4 3 4-3
|
||||||
|
2 7 2-7
|
||||||
|
6 6-1 (the borrow from previous item)
|
||||||
|
9 9
|
||||||
|
return (carry): 0
|
||||||
|
of course the carry (borrow) is propagated and will be returned from the last item
|
||||||
|
(this method is used by the Karatsuba multiplication algorithm)
|
||||||
|
*/
|
||||||
|
template<uint value_size>
|
||||||
|
uint UInt<value_size>::SubVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result)
|
||||||
|
{
|
||||||
|
uint i, c = 0;
|
||||||
|
|
||||||
|
TTMATH_ASSERT( ss1_size >= ss2_size )
|
||||||
|
|
||||||
|
for(i=0 ; i<ss2_size ; ++i)
|
||||||
|
c = SubTwoWords(ss1[i], ss2[i], c, &result[i]);
|
||||||
|
|
||||||
|
for( ; i<ss1_size ; ++i)
|
||||||
|
c = SubTwoWords(ss1[i], 0, c, &result[i]);
|
||||||
|
|
||||||
|
TTMATH_LOG("UInt::SubVector")
|
||||||
|
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
|
||||||
#endif // #ifdef TTMATH_PLATFORM64
|
#endif // #ifdef TTMATH_PLATFORM64
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -42,7 +42,7 @@
|
||||||
#ifndef TTMATH_NOASM
|
#ifndef TTMATH_NOASM
|
||||||
#ifdef TTMATH_PLATFORM32
|
#ifdef TTMATH_PLATFORM32
|
||||||
|
|
||||||
#pragma message("TTMATH_ASM")
|
#pragma message("TTMATH_ASM32")
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
\file ttmathuint_x86.h
|
\file ttmathuint_x86.h
|
||||||
|
|
|
@ -4,20 +4,20 @@
|
||||||
* Author: Tomasz Sowa <t.sowa@slimaczek.pl>
|
* Author: Tomasz Sowa <t.sowa@slimaczek.pl>
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2006-2009, Tomasz Sowa
|
* Copyright (c) 2006-2009, Tomasz Sowa
|
||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
*
|
*
|
||||||
* * Redistributions of source code must retain the above copyright notice,
|
* * Redistributions of source code must retain the above copyright notice,
|
||||||
* this list of conditions and the following disclaimer.
|
* this list of conditions and the following disclaimer.
|
||||||
*
|
*
|
||||||
* * Redistributions in binary form must reproduce the above copyright
|
* * Redistributions in binary form must reproduce the above copyright
|
||||||
* notice, this list of conditions and the following disclaimer in the
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
* documentation and/or other materials provided with the distribution.
|
* documentation and/or other materials provided with the distribution.
|
||||||
*
|
*
|
||||||
* * Neither the name Tomasz Sowa nor the names of contributors to this
|
* * Neither the name Tomasz Sowa nor the names of contributors to this
|
||||||
* project may be used to endorse or promote products derived
|
* project may be used to endorse or promote products derived
|
||||||
* from this software without specific prior written permission.
|
* from this software without specific prior written permission.
|
||||||
|
@ -39,10 +39,10 @@
|
||||||
#ifndef headerfilettmathuint_x86_64
|
#ifndef headerfilettmathuint_x86_64
|
||||||
#define headerfilettmathuint_x86_64
|
#define headerfilettmathuint_x86_64
|
||||||
|
|
||||||
|
|
||||||
#ifndef TTMATH_NOASM
|
#ifndef TTMATH_NOASM
|
||||||
#ifdef TTMATH_PLATFORM64
|
#ifdef TTMATH_PLATFORM64
|
||||||
|
|
||||||
|
#pragma message("TTMATH_ASM64")
|
||||||
/*!
|
/*!
|
||||||
\file ttmathuint_x86_64.h
|
\file ttmathuint_x86_64.h
|
||||||
\brief template class UInt<uint> with assembler code for 64bit x86_64 processors
|
\brief template class UInt<uint> with assembler code for 64bit x86_64 processors
|
||||||
|
@ -50,6 +50,9 @@
|
||||||
this file is included at the end of ttmathuint.h
|
this file is included at the end of ttmathuint.h
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#define WIN32_LEAN_AND_MEAN
|
||||||
|
#include <windows.h>
|
||||||
|
|
||||||
|
|
||||||
namespace ttmath
|
namespace ttmath
|
||||||
{
|
{
|
||||||
|
@ -113,14 +116,14 @@ namespace ttmath
|
||||||
this part should be compiled with gcc
|
this part should be compiled with gcc
|
||||||
*/
|
*/
|
||||||
__asm__ __volatile__(
|
__asm__ __volatile__(
|
||||||
|
|
||||||
"xorq %%rdx, %%rdx \n"
|
"xorq %%rdx, %%rdx \n"
|
||||||
"neg %%rax \n" // CF=1 if rax!=0 , CF=0 if rax==0
|
"neg %%rax \n" // CF=1 if rax!=0 , CF=0 if rax==0
|
||||||
|
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"movq (%%rsi,%%rdx,8), %%rax \n"
|
"movq (%%rsi,%%rdx,8), %%rax \n"
|
||||||
"adcq %%rax, (%%rbx,%%rdx,8) \n"
|
"adcq %%rax, (%%rbx,%%rdx,8) \n"
|
||||||
|
|
||||||
"incq %%rdx \n"
|
"incq %%rdx \n"
|
||||||
"decq %%rcx \n"
|
"decq %%rcx \n"
|
||||||
"jnz 1b \n"
|
"jnz 1b \n"
|
||||||
|
@ -134,7 +137,7 @@ namespace ttmath
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
TTMATH_LOG("UInt64::Add")
|
TTMATH_LOG("UInt64::Add")
|
||||||
|
|
||||||
return c;
|
return c;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -150,7 +153,7 @@ namespace ttmath
|
||||||
if we've got (value_size=3):
|
if we've got (value_size=3):
|
||||||
table[0] = 10;
|
table[0] = 10;
|
||||||
table[1] = 30;
|
table[1] = 30;
|
||||||
table[2] = 5;
|
table[2] = 5;
|
||||||
and we call:
|
and we call:
|
||||||
AddInt(2,1)
|
AddInt(2,1)
|
||||||
then it'll be:
|
then it'll be:
|
||||||
|
@ -187,7 +190,7 @@ namespace ttmath
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"addq %%rax, (%%rbx,%%rdx,8) \n"
|
"addq %%rax, (%%rbx,%%rdx,8) \n"
|
||||||
"jnc 2f \n"
|
"jnc 2f \n"
|
||||||
|
|
||||||
"movq $1, %%rax \n"
|
"movq $1, %%rax \n"
|
||||||
"incq %%rdx \n"
|
"incq %%rdx \n"
|
||||||
"decq %%rcx \n"
|
"decq %%rcx \n"
|
||||||
|
@ -204,7 +207,7 @@ namespace ttmath
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
TTMATH_LOG("UInt64::AddInt")
|
TTMATH_LOG("UInt64::AddInt")
|
||||||
|
|
||||||
return c;
|
return c;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -236,14 +239,38 @@ namespace ttmath
|
||||||
table[1] = 4 + x1 = 14
|
table[1] = 4 + x1 = 14
|
||||||
table[2] = 5 + x2 = 25
|
table[2] = 5 + x2 = 25
|
||||||
table[3] = 6
|
table[3] = 6
|
||||||
|
|
||||||
and no carry at the end of table[3]
|
and no carry at the end of table[3]
|
||||||
|
|
||||||
(of course if there was a carry in table[2](5+20) then
|
(of course if there was a carry in table[2](5+20) then
|
||||||
this carry would be passed to the table[3] etc.)
|
this carry would be passed to the table[3] etc.)
|
||||||
*/
|
*/
|
||||||
template<uint value_size>
|
template<uint value_size>
|
||||||
uint UInt<value_size>::AddTwoInts(uint x2, uint x1, uint index)
|
uint UInt<value_size>::AddTwoInts(uint x2, uint x1, uint index)
|
||||||
|
#if 0
|
||||||
|
{
|
||||||
|
uint i, c;
|
||||||
|
|
||||||
|
TTMATH_ASSERT( index < value_size )
|
||||||
|
|
||||||
|
printf("add %Id + %Id\n",x1,x2);
|
||||||
|
for(int i=index ; i<value_size ; ++i)
|
||||||
|
printf("%d: %Id\n",i,table[i]);
|
||||||
|
|
||||||
|
c = AddTwoWords(table[index], x1, 0, &table[index]);
|
||||||
|
c = AddTwoWords(table[index+1], x2, c, &table[index+1]);
|
||||||
|
|
||||||
|
for(i=index+2 ; i<value_size && c ; ++i)
|
||||||
|
c = AddTwoWords(table[i], 0, c, &table[i]);
|
||||||
|
for(i=index ; i<value_size ; ++i)
|
||||||
|
printf("%d: %Id\n",i,table[i]);
|
||||||
|
printf(" -> %d\n",c);
|
||||||
|
|
||||||
|
TTMATH_LOG("UInt::AddTwoInts")
|
||||||
|
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
#else
|
||||||
{
|
{
|
||||||
uint b = value_size;
|
uint b = value_size;
|
||||||
uint * p1 = table;
|
uint * p1 = table;
|
||||||
|
@ -253,7 +280,14 @@ namespace ttmath
|
||||||
|
|
||||||
#ifndef __GNUC__
|
#ifndef __GNUC__
|
||||||
#if defined(_M_X64)
|
#if defined(_M_X64)
|
||||||
c = addindexed2_x64(p1,b,index,x2,x1);
|
//printf("add %Id + %Id\n",x1,x2);
|
||||||
|
//for(int i=index ; i<value_size ; ++i)
|
||||||
|
// printf("%d: %Id\n",i,table[i]);
|
||||||
|
//if (table[0] == 1265784741359897913) DebugBreak();
|
||||||
|
c = addindexed2_x64(p1,b,index,x1,x2);
|
||||||
|
//for(int i=index ; i<value_size ; ++i)
|
||||||
|
// printf("%d: %Id\n",i,table[i]);
|
||||||
|
//printf(" -> %d\n",c);
|
||||||
#else
|
#else
|
||||||
#error "another compiler than GCC is currently not supported in 64bit mode"
|
#error "another compiler than GCC is currently not supported in 64bit mode"
|
||||||
#endif
|
#endif
|
||||||
|
@ -261,11 +295,11 @@ namespace ttmath
|
||||||
|
|
||||||
#ifdef __GNUC__
|
#ifdef __GNUC__
|
||||||
uint dummy, dummy2;
|
uint dummy, dummy2;
|
||||||
|
|
||||||
__asm__ __volatile__(
|
__asm__ __volatile__(
|
||||||
|
|
||||||
"subq %%rdx, %%rcx \n"
|
"subq %%rdx, %%rcx \n"
|
||||||
|
|
||||||
"addq %%rsi, (%%rbx,%%rdx,8) \n"
|
"addq %%rsi, (%%rbx,%%rdx,8) \n"
|
||||||
"incq %%rdx \n"
|
"incq %%rdx \n"
|
||||||
"decq %%rcx \n"
|
"decq %%rcx \n"
|
||||||
|
@ -289,10 +323,12 @@ namespace ttmath
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
TTMATH_LOG("UInt64::AddTwoInts")
|
TTMATH_LOG("UInt64::AddTwoInts")
|
||||||
|
|
||||||
return c;
|
return c;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -328,16 +364,16 @@ namespace ttmath
|
||||||
|
|
||||||
#ifdef __GNUC__
|
#ifdef __GNUC__
|
||||||
uint dummy, dummy2;
|
uint dummy, dummy2;
|
||||||
|
|
||||||
__asm__ __volatile__(
|
__asm__ __volatile__(
|
||||||
|
|
||||||
"xorq %%rdx, %%rdx \n"
|
"xorq %%rdx, %%rdx \n"
|
||||||
"neg %%rax \n" // CF=1 if rax!=0 , CF=0 if rax==0
|
"neg %%rax \n" // CF=1 if rax!=0 , CF=0 if rax==0
|
||||||
|
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"movq (%%rsi,%%rdx,8), %%rax \n"
|
"movq (%%rsi,%%rdx,8), %%rax \n"
|
||||||
"sbbq %%rax, (%%rbx,%%rdx,8) \n"
|
"sbbq %%rax, (%%rbx,%%rdx,8) \n"
|
||||||
|
|
||||||
"incq %%rdx \n"
|
"incq %%rdx \n"
|
||||||
"decq %%rcx \n"
|
"decq %%rcx \n"
|
||||||
"jnz 1b \n"
|
"jnz 1b \n"
|
||||||
|
@ -366,7 +402,7 @@ namespace ttmath
|
||||||
if we've got (value_size=3):
|
if we've got (value_size=3):
|
||||||
table[0] = 10;
|
table[0] = 10;
|
||||||
table[1] = 30;
|
table[1] = 30;
|
||||||
table[2] = 5;
|
table[2] = 5;
|
||||||
and we call:
|
and we call:
|
||||||
SubInt(2,1)
|
SubInt(2,1)
|
||||||
then it'll be:
|
then it'll be:
|
||||||
|
@ -395,15 +431,15 @@ namespace ttmath
|
||||||
|
|
||||||
#ifdef __GNUC__
|
#ifdef __GNUC__
|
||||||
uint dummy, dummy2;
|
uint dummy, dummy2;
|
||||||
|
|
||||||
__asm__ __volatile__(
|
__asm__ __volatile__(
|
||||||
|
|
||||||
"subq %%rdx, %%rcx \n"
|
"subq %%rdx, %%rcx \n"
|
||||||
|
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"subq %%rax, (%%rbx,%%rdx,8) \n"
|
"subq %%rax, (%%rbx,%%rdx,8) \n"
|
||||||
"jnc 2f \n"
|
"jnc 2f \n"
|
||||||
|
|
||||||
"movq $1, %%rax \n"
|
"movq $1, %%rax \n"
|
||||||
"incq %%rdx \n"
|
"incq %%rdx \n"
|
||||||
"decq %%rcx \n"
|
"decq %%rcx \n"
|
||||||
|
@ -436,7 +472,7 @@ namespace ttmath
|
||||||
for example:
|
for example:
|
||||||
let this is 001010000
|
let this is 001010000
|
||||||
after Rcl2_one(1) there'll be 010100001 and Rcl2_one returns 0
|
after Rcl2_one(1) there'll be 010100001 and Rcl2_one returns 0
|
||||||
|
|
||||||
***this method is created only on a 64bit platform***
|
***this method is created only on a 64bit platform***
|
||||||
*/
|
*/
|
||||||
template<uint value_size>
|
template<uint value_size>
|
||||||
|
@ -455,9 +491,9 @@ namespace ttmath
|
||||||
|
|
||||||
#ifdef __GNUC__
|
#ifdef __GNUC__
|
||||||
uint dummy, dummy2;
|
uint dummy, dummy2;
|
||||||
|
|
||||||
__asm__ __volatile__(
|
__asm__ __volatile__(
|
||||||
|
|
||||||
"xorq %%rdx, %%rdx \n" // rdx=0
|
"xorq %%rdx, %%rdx \n" // rdx=0
|
||||||
"neg %%rax \n" // CF=1 if rax!=0 , CF=0 if rax==0
|
"neg %%rax \n" // CF=1 if rax!=0 , CF=0 if rax==0
|
||||||
|
|
||||||
|
@ -473,7 +509,7 @@ namespace ttmath
|
||||||
: "=c" (c), "=a" (dummy), "=d" (dummy2)
|
: "=c" (c), "=a" (dummy), "=d" (dummy2)
|
||||||
: "1" (c), "0" (b), "b" (p1)
|
: "1" (c), "0" (b), "b" (p1)
|
||||||
: "cc", "memory" );
|
: "cc", "memory" );
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
TTMATH_LOG("UInt64::Rcl2_one")
|
TTMATH_LOG("UInt64::Rcl2_one")
|
||||||
|
@ -512,7 +548,7 @@ namespace ttmath
|
||||||
|
|
||||||
#ifdef __GNUC__
|
#ifdef __GNUC__
|
||||||
uint dummy;
|
uint dummy;
|
||||||
|
|
||||||
__asm__ __volatile__(
|
__asm__ __volatile__(
|
||||||
|
|
||||||
"neg %%rax \n" // CF=1 if rax!=0 , CF=0 if rax==0
|
"neg %%rax \n" // CF=1 if rax!=0 , CF=0 if rax==0
|
||||||
|
@ -549,7 +585,7 @@ namespace ttmath
|
||||||
for example:
|
for example:
|
||||||
let this is 001010000
|
let this is 001010000
|
||||||
after Rcl2(3, 1) there'll be 010000111 and Rcl2 returns 1
|
after Rcl2(3, 1) there'll be 010000111 and Rcl2 returns 1
|
||||||
|
|
||||||
***this method is created only on a 64bit platform***
|
***this method is created only on a 64bit platform***
|
||||||
*/
|
*/
|
||||||
template<uint value_size>
|
template<uint value_size>
|
||||||
|
@ -570,9 +606,9 @@ namespace ttmath
|
||||||
|
|
||||||
#ifdef __GNUC__
|
#ifdef __GNUC__
|
||||||
uint dummy, dummy2, dummy3;
|
uint dummy, dummy2, dummy3;
|
||||||
|
|
||||||
__asm__ __volatile__(
|
__asm__ __volatile__(
|
||||||
|
|
||||||
"movq %%rcx, %%rsi \n"
|
"movq %%rcx, %%rsi \n"
|
||||||
"movq $64, %%rcx \n"
|
"movq $64, %%rcx \n"
|
||||||
"subq %%rsi, %%rcx \n"
|
"subq %%rsi, %%rcx \n"
|
||||||
|
@ -595,11 +631,11 @@ namespace ttmath
|
||||||
"xorq %%rax, (%%rbx,%%rdx,8) \n"
|
"xorq %%rax, (%%rbx,%%rdx,8) \n"
|
||||||
"orq %%rsi, (%%rbx,%%rdx,8) \n"
|
"orq %%rsi, (%%rbx,%%rdx,8) \n"
|
||||||
"movq %%rax, %%rsi \n"
|
"movq %%rax, %%rsi \n"
|
||||||
|
|
||||||
"incq %%rdx \n"
|
"incq %%rdx \n"
|
||||||
"decq %%rdi \n"
|
"decq %%rdi \n"
|
||||||
"jnz 1b \n"
|
"jnz 1b \n"
|
||||||
|
|
||||||
"and $1, %%rax \n"
|
"and $1, %%rax \n"
|
||||||
|
|
||||||
: "=a" (c), "=D" (dummy), "=S" (dummy2), "=d" (dummy3)
|
: "=a" (c), "=D" (dummy), "=S" (dummy2), "=d" (dummy3)
|
||||||
|
@ -647,7 +683,7 @@ namespace ttmath
|
||||||
|
|
||||||
#ifdef __GNUC__
|
#ifdef __GNUC__
|
||||||
uint dummy, dummy2, dummy3;
|
uint dummy, dummy2, dummy3;
|
||||||
|
|
||||||
__asm__ __volatile__(
|
__asm__ __volatile__(
|
||||||
|
|
||||||
"movq %%rcx, %%rsi \n"
|
"movq %%rcx, %%rsi \n"
|
||||||
|
@ -674,11 +710,11 @@ namespace ttmath
|
||||||
"xorq %%rax, (%%rbx,%%rdx,8) \n"
|
"xorq %%rax, (%%rbx,%%rdx,8) \n"
|
||||||
"orq %%rsi, (%%rbx,%%rdx,8) \n"
|
"orq %%rsi, (%%rbx,%%rdx,8) \n"
|
||||||
"movq %%rax, %%rsi \n"
|
"movq %%rax, %%rsi \n"
|
||||||
|
|
||||||
"decq %%rdx \n"
|
"decq %%rdx \n"
|
||||||
"decq %%rdi \n"
|
"decq %%rdi \n"
|
||||||
"jnz 1b \n"
|
"jnz 1b \n"
|
||||||
|
|
||||||
"rolq $1, %%rax \n"
|
"rolq $1, %%rax \n"
|
||||||
"andq $1, %%rax \n"
|
"andq $1, %%rax \n"
|
||||||
|
|
||||||
|
@ -754,7 +790,7 @@ namespace ttmath
|
||||||
uint UInt<value_size>::SetBitInWord(uint & value, uint bit)
|
uint UInt<value_size>::SetBitInWord(uint & value, uint bit)
|
||||||
{
|
{
|
||||||
TTMATH_ASSERT( bit < TTMATH_BITS_PER_UINT )
|
TTMATH_ASSERT( bit < TTMATH_BITS_PER_UINT )
|
||||||
|
|
||||||
uint old_bit;
|
uint old_bit;
|
||||||
uint v = value;
|
uint v = value;
|
||||||
|
|
||||||
|
@ -778,7 +814,7 @@ namespace ttmath
|
||||||
|
|
||||||
"setc %%bl \n"
|
"setc %%bl \n"
|
||||||
"movzx %%bl, %%rbx \n"
|
"movzx %%bl, %%rbx \n"
|
||||||
|
|
||||||
: "=a" (v), "=b" (old_bit)
|
: "=a" (v), "=b" (old_bit)
|
||||||
: "0" (v), "1" (bit)
|
: "0" (v), "1" (bit)
|
||||||
: "cc" );
|
: "cc" );
|
||||||
|
@ -803,7 +839,7 @@ namespace ttmath
|
||||||
multiplication: result2:result1 = a * b
|
multiplication: result2:result1 = a * b
|
||||||
result2 - higher word
|
result2 - higher word
|
||||||
result1 - lower word of the result
|
result1 - lower word of the result
|
||||||
|
|
||||||
this methos never returns a carry
|
this methos never returns a carry
|
||||||
|
|
||||||
***this method is created only on a 64bit platform***
|
***this method is created only on a 64bit platform***
|
||||||
|
@ -834,7 +870,7 @@ namespace ttmath
|
||||||
#ifdef __GNUC__
|
#ifdef __GNUC__
|
||||||
|
|
||||||
__asm__ __volatile__(
|
__asm__ __volatile__(
|
||||||
|
|
||||||
"mulq %%rdx \n"
|
"mulq %%rdx \n"
|
||||||
|
|
||||||
: "=a" (result1_), "=d" (result2_)
|
: "=a" (result1_), "=d" (result2_)
|
||||||
|
@ -857,13 +893,13 @@ namespace ttmath
|
||||||
*
|
*
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef __GNUC__
|
#ifndef __GNUC__
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
this method calculates 64bits word a:b / 32bits c (a higher, b lower word)
|
this method calculates 64bits word a:b / 32bits c (a higher, b lower word)
|
||||||
r = a:b / c and rest - remainder
|
r = a:b / c and rest - remainder
|
||||||
|
|
||||||
***this method is created only on a 64bit platform***
|
***this method is created only on a 64bit platform***
|
||||||
|
|
||||||
*
|
*
|
||||||
|
@ -896,7 +932,7 @@ namespace ttmath
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef __GNUC__
|
#ifdef __GNUC__
|
||||||
|
|
||||||
__asm__ __volatile__(
|
__asm__ __volatile__(
|
||||||
|
|
||||||
"divq %%rcx \n"
|
"divq %%rcx \n"
|
||||||
|
@ -986,7 +1022,7 @@ namespace ttmath
|
||||||
uint i, c = 0;
|
uint i, c = 0;
|
||||||
|
|
||||||
TTMATH_ASSERT( ss1_size >= ss2_size )
|
TTMATH_ASSERT( ss1_size >= ss2_size )
|
||||||
|
|
||||||
for(i=0 ; i<ss2_size ; ++i)
|
for(i=0 ; i<ss2_size ; ++i)
|
||||||
c = AddTwoWords(ss1[i], ss2[i], c, &result[i]);
|
c = AddTwoWords(ss1[i], ss2[i], c, &result[i]);
|
||||||
|
|
||||||
|
@ -1025,7 +1061,7 @@ namespace ttmath
|
||||||
uint i, c = 0;
|
uint i, c = 0;
|
||||||
|
|
||||||
TTMATH_ASSERT( ss1_size >= ss2_size )
|
TTMATH_ASSERT( ss1_size >= ss2_size )
|
||||||
|
|
||||||
for(i=0 ; i<ss2_size ; ++i)
|
for(i=0 ; i<ss2_size ; ++i)
|
||||||
c = SubTwoWords(ss1[i], ss2[i], c, &result[i]);
|
c = SubTwoWords(ss1[i], ss2[i], c, &result[i]);
|
||||||
|
|
||||||
|
|
|
@ -19,7 +19,7 @@ PUBLIC div_x64
|
||||||
;
|
;
|
||||||
|
|
||||||
.CODE
|
.CODE
|
||||||
|
|
||||||
ALIGN 8
|
ALIGN 8
|
||||||
|
|
||||||
;----------------------------------------
|
;----------------------------------------
|
||||||
|
@ -33,20 +33,20 @@ adc_x64 PROC
|
||||||
xor rax, rax
|
xor rax, rax
|
||||||
xor r11, r11
|
xor r11, r11
|
||||||
sub rax, r9 ; sets CARRY if r9 != 0
|
sub rax, r9 ; sets CARRY if r9 != 0
|
||||||
|
|
||||||
ALIGN 16
|
ALIGN 16
|
||||||
loop1:
|
loop1:
|
||||||
mov rax,qword ptr [rdx + r11 * 8]
|
mov rax,qword ptr [rdx + r11 * 8]
|
||||||
adc qword ptr [rcx + r11 * 8], rax
|
adc qword ptr [rcx + r11 * 8], rax
|
||||||
lea r11, [r11+1]
|
lea r11, [r11+1]
|
||||||
dec r8
|
dec r8
|
||||||
jnz loop1
|
jnz loop1
|
||||||
|
|
||||||
setc al
|
setc al
|
||||||
movzx rax, al
|
movzx rax, al
|
||||||
|
|
||||||
ret
|
ret
|
||||||
|
|
||||||
adc_x64 ENDP
|
adc_x64 ENDP
|
||||||
|
|
||||||
;----------------------------------------
|
;----------------------------------------
|
||||||
|
@ -80,14 +80,14 @@ loop1:
|
||||||
lea r8, [r8+1]
|
lea r8, [r8+1]
|
||||||
add qword ptr [rcx + r8 * 8], r9
|
add qword ptr [rcx + r8 * 8], r9
|
||||||
jc loop1
|
jc loop1
|
||||||
|
|
||||||
ret
|
ret
|
||||||
|
|
||||||
done_with_cy:
|
done_with_cy:
|
||||||
lea rax, [rax+1] ; rax = 1
|
lea rax, [rax+1] ; rax = 1
|
||||||
|
|
||||||
ret
|
ret
|
||||||
|
|
||||||
addindexed_x64 ENDP
|
addindexed_x64 ENDP
|
||||||
|
|
||||||
;----------------------------------------
|
;----------------------------------------
|
||||||
|
@ -98,8 +98,8 @@ addindexed_x64 ENDP
|
||||||
|
|
||||||
addindexed2_x64 PROC
|
addindexed2_x64 PROC
|
||||||
|
|
||||||
; rcx = p1
|
; rcx = p1 (pointer)
|
||||||
; rdx = b
|
; rdx = b (value size)
|
||||||
; r8 = nPos
|
; r8 = nPos
|
||||||
; r9 = nValue1
|
; r9 = nValue1
|
||||||
; [esp+0x28] = nValue2
|
; [esp+0x28] = nValue2
|
||||||
|
@ -109,26 +109,23 @@ addindexed2_x64 PROC
|
||||||
sub rdx, r8 ; rdx = remaining count of uints
|
sub rdx, r8 ; rdx = remaining count of uints
|
||||||
mov r10, [esp+028h] ; r10 = nValue2
|
mov r10, [esp+028h] ; r10 = nValue2
|
||||||
|
|
||||||
add qword ptr [r11 + r8 * 8], r10
|
add qword ptr [r11 + r8 * 8], r9
|
||||||
lea r8, [r8+1]
|
lea r8, [r8+1]
|
||||||
|
lea rdx, [rdx-1]
|
||||||
|
adc qword ptr [r11 + r8 * 8], r10
|
||||||
|
jc next
|
||||||
|
ret
|
||||||
|
|
||||||
ALIGN 16
|
ALIGN 16
|
||||||
loop1:
|
loop1:
|
||||||
adc qword ptr [r11 + r8 * 8], r9
|
lea r8, [r8+1]
|
||||||
|
add qword ptr [r11 + r8 * 8], 1
|
||||||
jc next
|
jc next
|
||||||
ret
|
ret
|
||||||
|
|
||||||
next:
|
|
||||||
lea r8, [r8+1]
|
|
||||||
xor r9, r9 ; set to 0 -> cy still set!
|
|
||||||
dec rdx
|
|
||||||
jnz loop1
|
|
||||||
jc return_1 ; most of the times, there will be NO carry (I hope)
|
|
||||||
|
|
||||||
done:
|
next:
|
||||||
ret
|
dec rdx ; does not modify CY too...
|
||||||
|
jnz loop1
|
||||||
return_1:
|
|
||||||
lea rax, [rax+1]
|
lea rax, [rax+1]
|
||||||
ret
|
ret
|
||||||
|
|
||||||
|
@ -138,8 +135,6 @@ addindexed2_x64 ENDP
|
||||||
|
|
||||||
ALIGN 8
|
ALIGN 8
|
||||||
|
|
||||||
ALIGN 8
|
|
||||||
|
|
||||||
;----------------------------------------
|
;----------------------------------------
|
||||||
|
|
||||||
sbb_x64 PROC
|
sbb_x64 PROC
|
||||||
|
@ -152,15 +147,15 @@ sbb_x64 PROC
|
||||||
xor rax, rax
|
xor rax, rax
|
||||||
xor r11, r11
|
xor r11, r11
|
||||||
sub rax, r9 ; sets CARRY if r9 != 0
|
sub rax, r9 ; sets CARRY if r9 != 0
|
||||||
|
|
||||||
ALIGN 16
|
ALIGN 16
|
||||||
loop1:
|
loop1:
|
||||||
mov rax,qword ptr [rdx + r11 * 8]
|
mov rax,qword ptr [rdx + r11 * 8]
|
||||||
sbb qword ptr [rcx + r11 * 8], rax
|
sbb qword ptr [rcx + r11 * 8], rax
|
||||||
lea r11, [r11+1]
|
lea r11, [r11+1]
|
||||||
dec r8
|
dec r8
|
||||||
jnz loop1
|
jnz loop1
|
||||||
|
|
||||||
setc al
|
setc al
|
||||||
movzx rax, al
|
movzx rax, al
|
||||||
|
|
||||||
|
@ -181,12 +176,12 @@ subindexed_x64 PROC
|
||||||
; r9 = nValue
|
; r9 = nValue
|
||||||
|
|
||||||
sub rdx, r8 ; rdx = remaining count of uints
|
sub rdx, r8 ; rdx = remaining count of uints
|
||||||
|
|
||||||
ALIGN 16
|
ALIGN 16
|
||||||
loop1:
|
loop1:
|
||||||
sub qword ptr [rcx + r8 * 8], r9
|
sub qword ptr [rcx + r8 * 8], r9
|
||||||
jnc done
|
jnc done
|
||||||
|
|
||||||
lea r8, [r8+1]
|
lea r8, [r8+1]
|
||||||
mov r9, 1
|
mov r9, 1
|
||||||
dec rdx
|
dec rdx
|
||||||
|
@ -196,7 +191,7 @@ loop1:
|
||||||
done:
|
done:
|
||||||
xor rax, rax
|
xor rax, rax
|
||||||
ret
|
ret
|
||||||
|
|
||||||
return_1:
|
return_1:
|
||||||
mov rax, 1
|
mov rax, 1
|
||||||
ret
|
ret
|
||||||
|
@ -217,17 +212,17 @@ rcl_x64 PROC
|
||||||
mov r11, rcx ; table
|
mov r11, rcx ; table
|
||||||
xor r10, r10
|
xor r10, r10
|
||||||
neg r8 ; CY set if r8 <> 0
|
neg r8 ; CY set if r8 <> 0
|
||||||
|
|
||||||
ALIGN 16
|
ALIGN 16
|
||||||
loop1:
|
loop1:
|
||||||
rcl qword ptr [r11 + r10 * 8], 1
|
rcl qword ptr [r11 + r10 * 8], 1
|
||||||
lea r10, [r10+1]
|
lea r10, [r10+1]
|
||||||
dec rdx
|
dec rdx
|
||||||
jnz loop1
|
jnz loop1
|
||||||
|
|
||||||
setc al
|
setc al
|
||||||
movzx rax, al
|
movzx rax, al
|
||||||
|
|
||||||
ret
|
ret
|
||||||
|
|
||||||
rcl_x64 ENDP
|
rcl_x64 ENDP
|
||||||
|
@ -245,16 +240,16 @@ rcr_x64 PROC
|
||||||
|
|
||||||
xor r10, r10
|
xor r10, r10
|
||||||
neg r8 ; CY set if r8 <> 0
|
neg r8 ; CY set if r8 <> 0
|
||||||
|
|
||||||
ALIGN 16
|
ALIGN 16
|
||||||
loop1:
|
loop1:
|
||||||
rcr qword ptr -8[rcx + rdx * 8], 1
|
rcr qword ptr -8[rcx + rdx * 8], 1
|
||||||
dec rdx
|
dec rdx
|
||||||
jnz loop1
|
jnz loop1
|
||||||
|
|
||||||
setc al
|
setc al
|
||||||
movzx rax, al
|
movzx rax, al
|
||||||
|
|
||||||
ret
|
ret
|
||||||
|
|
||||||
rcr_x64 ENDP
|
rcr_x64 ENDP
|
||||||
|
@ -270,7 +265,7 @@ div_x64 PROC
|
||||||
; rcx = &Hi
|
; rcx = &Hi
|
||||||
; rdx = &Lo
|
; rdx = &Lo
|
||||||
; r8 = nDiv
|
; r8 = nDiv
|
||||||
|
|
||||||
mov r11, rcx
|
mov r11, rcx
|
||||||
mov r10, rdx
|
mov r10, rdx
|
||||||
|
|
||||||
|
@ -295,21 +290,21 @@ rcl2_x64 PROC
|
||||||
; rdx = nSize
|
; rdx = nSize
|
||||||
; r8 = bits
|
; r8 = bits
|
||||||
; r9 = c
|
; r9 = c
|
||||||
|
|
||||||
push rbx
|
push rbx
|
||||||
|
|
||||||
mov r10, rcx ; r10 = p1
|
mov r10, rcx ; r10 = p1
|
||||||
xor rax, rax
|
xor rax, rax
|
||||||
|
|
||||||
mov rcx, 64
|
mov rcx, 64
|
||||||
sub rcx, r8
|
sub rcx, r8
|
||||||
|
|
||||||
mov r11, -1
|
mov r11, -1
|
||||||
shr r11, cl ; r11 = mask
|
shr r11, cl ; r11 = mask
|
||||||
|
|
||||||
mov rcx, r8 ; rcx = count of bits
|
mov rcx, r8 ; rcx = count of bits
|
||||||
|
|
||||||
mov rbx, rax ; rbx = old value = 0
|
mov rbx, rax ; rbx = old value = 0
|
||||||
or r9, r9
|
or r9, r9
|
||||||
cmovnz rbx, r11 ; if (c) then old value = mask
|
cmovnz rbx, r11 ; if (c) then old value = mask
|
||||||
|
|
||||||
|
@ -323,7 +318,7 @@ loop1:
|
||||||
xor qword ptr [r10+r9*8], rax
|
xor qword ptr [r10+r9*8], rax
|
||||||
or qword ptr [r10+r9*8], rbx
|
or qword ptr [r10+r9*8], rbx
|
||||||
mov rbx, rax
|
mov rbx, rax
|
||||||
|
|
||||||
lea r9, [r9+1]
|
lea r9, [r9+1]
|
||||||
dec rdx
|
dec rdx
|
||||||
|
|
||||||
|
@ -332,8 +327,8 @@ loop1:
|
||||||
and rax, 1
|
and rax, 1
|
||||||
pop rbx
|
pop rbx
|
||||||
ret
|
ret
|
||||||
|
|
||||||
rcl2_x64 ENDP
|
rcl2_x64 ENDP
|
||||||
|
|
||||||
;----------------------------------------
|
;----------------------------------------
|
||||||
|
|
||||||
|
@ -346,20 +341,20 @@ rcr2_x64 PROC
|
||||||
; rdx = nSize
|
; rdx = nSize
|
||||||
; r8 = bits
|
; r8 = bits
|
||||||
; r9 = c
|
; r9 = c
|
||||||
|
|
||||||
push rbx
|
push rbx
|
||||||
mov r10, rcx ; r10 = p1
|
mov r10, rcx ; r10 = p1
|
||||||
xor rax, rax
|
xor rax, rax
|
||||||
|
|
||||||
mov rcx, 64
|
mov rcx, 64
|
||||||
sub rcx, r8
|
sub rcx, r8
|
||||||
|
|
||||||
mov r11, -1
|
mov r11, -1
|
||||||
shl r11, cl ; r11 = mask
|
shl r11, cl ; r11 = mask
|
||||||
|
|
||||||
mov rcx, r8 ; rcx = count of bits
|
mov rcx, r8 ; rcx = count of bits
|
||||||
|
|
||||||
mov rbx, rax ; rbx = old value = 0
|
mov rbx, rax ; rbx = old value = 0
|
||||||
or r9, r9
|
or r9, r9
|
||||||
cmovnz rbx, r11 ; if (c) then old value = mask
|
cmovnz rbx, r11 ; if (c) then old value = mask
|
||||||
|
|
||||||
|
@ -374,18 +369,18 @@ loop1:
|
||||||
xor qword ptr [r10+r9*8], rax
|
xor qword ptr [r10+r9*8], rax
|
||||||
or qword ptr [r10+r9*8], rbx
|
or qword ptr [r10+r9*8], rbx
|
||||||
mov rbx, rax
|
mov rbx, rax
|
||||||
|
|
||||||
lea r9, [r9-1]
|
lea r9, [r9-1]
|
||||||
dec rdx
|
dec rdx
|
||||||
|
|
||||||
jnz loop1
|
jnz loop1
|
||||||
|
|
||||||
rol rax, 1
|
rol rax, 1
|
||||||
and rax, 1
|
and rax, 1
|
||||||
pop rbx
|
pop rbx
|
||||||
|
|
||||||
ret
|
ret
|
||||||
|
|
||||||
rcr2_x64 ENDP
|
rcr2_x64 ENDP
|
||||||
|
|
||||||
END
|
END
|
||||||
|
|
Loading…
Reference in New Issue