added: uint UInt::Mul3(const UInt<value_size> & ss2)
void UInt::Mul3Big(const UInt<value_size> & ss2, UInt<value_size*2> & result) a new multiplication algorithm: Karatsuba multiplication, on a vector UInt<100> with all items different from zero this algorithm is faster about 3 times than Mul2Big(), and on a vector UInt<1000> with all items different from zero this algorithm is faster more than 5 times than Mul2Big() (measured on 32bit platform with GCC 4.3.3 with -O3 and -DTTMATH_RELEASE) added: uint MulFastest(const UInt<value_size> & ss2) void MulFastestBig(const UInt<value_size> & ss2, UInt<value_size*2> & result) those methods are trying to select the fastest multiplication algorithm changed: uint Mul(const UInt<value_size> & ss2, uint algorithm = 100) void MulBig(const UInt<value_size> & ss2, UInt<value_size*2> & result, uint algorithm = 100) those methods by default use MulFastest() and MulFastestBig() changed: changed a little Mul2Big() to cooperate with Mul3Big() changed: names of methods in macros TTMATH_LOG() added: uint AddVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result) uint SubVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result) three forms: asm x86, asm x86_64, no-asm those methods are used by the Karatsuba multiplication algorithm git-svn-id: svn://ttmath.org/publicrep/ttmath/trunk@148 e52654a7-88a9-db11-a3e9-0013d4bc506e
This commit is contained in:
parent
939d0f7519
commit
eaa19dd46a
21
CHANGELOG
21
CHANGELOG
|
@ -1,4 +1,4 @@
|
|||
Version 0.8.5 (2009.05.11):
|
||||
Version 0.8.5 prerelease (2009.05.15):
|
||||
* fixed: Big::Mod(x) didn't correctly return a carry
|
||||
and the result was sometimes very big (even greater than x)
|
||||
* fixed: global function Mod(x) didn't set an ErrorCode object
|
||||
|
@ -11,6 +11,25 @@ Version 0.8.5 (2009.05.11):
|
|||
the same is to Cos() function
|
||||
* changed: PrepareSin(x) is using Big::Mod() now when reducing 2PI period
|
||||
should be a little accurate especially on a very big 'x'
|
||||
* changed: uint Mul(const UInt<value_size> & ss2, uint algorithm = 100)
|
||||
void MulBig(const UInt<value_size> & ss2, UInt<value_size*2> & result, uint algorithm = 100)
|
||||
those methods by default use MulFastest() and MulFastestBig()
|
||||
* changed: changed a little Mul2Big() to cooperate with Mul3Big()
|
||||
* added: uint UInt::Mul3(const UInt<value_size> & ss2)
|
||||
void UInt::Mul3Big(const UInt<value_size> & ss2, UInt<value_size*2> & result)
|
||||
a new multiplication algorithm: Karatsuba multiplication,
|
||||
on a vector UInt<100> with all items different from zero this algorithm is faster
|
||||
about 3 times than Mul2Big(), and on a vector UInt<1000> with all items different from
|
||||
zero this algorithm is faster more than 5 times than Mul2Big()
|
||||
(measured on 32bit platform with GCC 4.3.3 with -O3 and -DTTMATH_RELEASE)
|
||||
* added: uint MulFastest(const UInt<value_size> & ss2)
|
||||
void MulFastestBig(const UInt<value_size> & ss2, UInt<value_size*2> & result)
|
||||
those methods are trying to select the fastest multiplication algorithm
|
||||
* added: uint AddVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result)
|
||||
uint SubVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result)
|
||||
three forms: asm x86, asm x86_64, no-asm
|
||||
those methods are used by the Karatsuba multiplication algorithm
|
||||
|
||||
|
||||
Version 0.8.4 (2009.05.08):
|
||||
* fixed: UInt::DivInt() didn't check whether the divisor is zero
|
||||
|
|
|
@ -1642,7 +1642,7 @@ public:
|
|||
// MS Visual Express 2005 reports a warning (in the lines with 'uint man_diff = ...'):
|
||||
// warning C4307: '*' : integral constant overflow
|
||||
// but we're using 'if( man > another_man )' and 'if( man < another_man )' and there'll be no such a situation here
|
||||
#ifndef __GNUC__
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning( disable: 4307 )
|
||||
#endif
|
||||
|
||||
|
@ -1658,7 +1658,7 @@ public:
|
|||
c += exponent.AddInt(man_diff, 0);
|
||||
}
|
||||
|
||||
#ifndef __GNUC__
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning( default: 4307 )
|
||||
#endif
|
||||
|
||||
|
@ -3426,9 +3426,9 @@ private:
|
|||
|
||||
it is called when the base is 10 and some digits were read before
|
||||
*/
|
||||
int FromString_ReadScientificIfExists(const char * & source)
|
||||
uint FromString_ReadScientificIfExists(const char * & source)
|
||||
{
|
||||
int c = 0;
|
||||
uint c = 0;
|
||||
|
||||
bool scientific_read = false;
|
||||
const char * before_scientific = source;
|
||||
|
|
|
@ -237,6 +237,17 @@ namespace ttmath
|
|||
|
||||
|
||||
|
||||
/*!
|
||||
this is a limit when calculating Karatsuba multiplication
|
||||
if the size of a vector is smaller than TTMATH_USE_KARATSUBA_MULTIPLICATION_FROM_SIZE
|
||||
the Karatsuba algorithm will use standard schoolbook multiplication
|
||||
*/
|
||||
#ifdef __GNUC__
|
||||
#define TTMATH_USE_KARATSUBA_MULTIPLICATION_FROM_SIZE 3
|
||||
#else
|
||||
#define TTMATH_USE_KARATSUBA_MULTIPLICATION_FROM_SIZE 5
|
||||
#endif
|
||||
|
||||
|
||||
namespace ttmath
|
||||
{
|
||||
|
|
|
@ -840,8 +840,10 @@ public:
|
|||
|
||||
/*!
|
||||
the multiplication 'this' = 'this' * ss2
|
||||
|
||||
algorithm: 100 - means automatically choose the fastest algorithm
|
||||
*/
|
||||
uint Mul(const UInt<value_size> & ss2, uint algorithm = 2)
|
||||
uint Mul(const UInt<value_size> & ss2, uint algorithm = 100)
|
||||
{
|
||||
switch( algorithm )
|
||||
{
|
||||
|
@ -849,8 +851,14 @@ public:
|
|||
return Mul1(ss2);
|
||||
|
||||
case 2:
|
||||
default:
|
||||
return Mul2(ss2);
|
||||
|
||||
case 3:
|
||||
return Mul3(ss2);
|
||||
|
||||
case 100:
|
||||
default:
|
||||
return MulFastest(ss2);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -860,10 +868,12 @@ public:
|
|||
|
||||
since the 'result' is twice bigger than 'this' and 'ss2'
|
||||
this method never returns a carry
|
||||
|
||||
algorithm: 100 - means automatically choose the fastest algorithm
|
||||
*/
|
||||
void MulBig(const UInt<value_size> & ss2,
|
||||
UInt<value_size*2> & result,
|
||||
uint algorithm = 2)
|
||||
uint algorithm = 100)
|
||||
{
|
||||
switch( algorithm )
|
||||
{
|
||||
|
@ -871,8 +881,14 @@ public:
|
|||
return Mul1Big(ss2, result);
|
||||
|
||||
case 2:
|
||||
default:
|
||||
return Mul2Big(ss2, result);
|
||||
|
||||
case 3:
|
||||
return Mul3Big(ss2, result);
|
||||
|
||||
case 100:
|
||||
default:
|
||||
return MulFastestBig(ss2, result);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -964,7 +980,7 @@ public:
|
|||
uint Mul2(const UInt<value_size> & ss2)
|
||||
{
|
||||
UInt<value_size*2> result;
|
||||
uint i;
|
||||
uint i, c = 0;
|
||||
|
||||
Mul2Big(ss2, result);
|
||||
|
||||
|
@ -975,11 +991,14 @@ public:
|
|||
// testing carry
|
||||
for( ; i<value_size*2 ; ++i)
|
||||
if( result.table[i] != 0 )
|
||||
return 1;
|
||||
{
|
||||
c = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
TTMATH_LOG("UInt::Mul2")
|
||||
|
||||
return 0;
|
||||
return c;
|
||||
}
|
||||
|
||||
|
||||
|
@ -991,44 +1010,385 @@ public:
|
|||
*/
|
||||
void Mul2Big(const UInt<value_size> & ss2, UInt<value_size*2> & result)
|
||||
{
|
||||
uint r2,r1;
|
||||
uint x1size=value_size, x2size=value_size;
|
||||
uint x1start=0, x2start=0;
|
||||
|
||||
result.SetZero();
|
||||
|
||||
if( value_size > 2 )
|
||||
{
|
||||
// if the value_size is smaller than or equal to 2
|
||||
// there is no sense to set x1size (and others) to another values
|
||||
|
||||
for(x1size=value_size ; x1size>0 && table[x1size-1]==0 ; --x1size);
|
||||
for(x2size=value_size ; x2size>0 && ss2.table[x2size-1]==0 ; --x2size);
|
||||
|
||||
if( x1size==0 || x2size==0 )
|
||||
{
|
||||
TTMATH_LOG("UInt::Mul2Big")
|
||||
return;
|
||||
}
|
||||
|
||||
for(x1start=0 ; x1start<x1size && table[x1start]==0 ; ++x1start);
|
||||
for(x2start=0 ; x2start<x2size && ss2.table[x2start]==0 ; ++x2start);
|
||||
}
|
||||
|
||||
for(uint x1=x1start ; x1<x1size ; ++x1)
|
||||
{
|
||||
for(uint x2=x2start ; x2<x2size ; ++x2)
|
||||
{
|
||||
MulTwoWords(table[x1], ss2.table[x2], &r2, &r1);
|
||||
result.AddTwoInts(r2,r1,x2+x1);
|
||||
// here will never be a carry
|
||||
}
|
||||
}
|
||||
Mul2Big2<value_size>(table, ss2.table, result);
|
||||
|
||||
TTMATH_LOG("UInt::Mul2Big")
|
||||
}
|
||||
|
||||
|
||||
private:
|
||||
|
||||
/*!
|
||||
an auxiliary method for calculating the multiplication
|
||||
|
||||
arguments we're taking as pointers (this is to improve the Mul3Big2()- avoiding
|
||||
unnecessary copying objects), the result should be taken as a pointer too,
|
||||
but at the moment there is no method AddTwoInts() which can operate on pointers
|
||||
*/
|
||||
template<uint ss_size>
|
||||
void Mul2Big2(const uint * ss1, const uint * ss2, UInt<ss_size*2> & result)
|
||||
{
|
||||
uint x1size = ss_size, x2size = ss_size;
|
||||
uint x1start = 0, x2start = 0;
|
||||
|
||||
if( ss_size > 2 )
|
||||
{
|
||||
// if the ss_size is smaller than or equal to 2
|
||||
// there is no sense to set x1size (and others) to another values
|
||||
|
||||
for(x1size=ss_size ; x1size>0 && ss1[x1size-1]==0 ; --x1size);
|
||||
for(x2size=ss_size ; x2size>0 && ss2[x2size-1]==0 ; --x2size);
|
||||
|
||||
for(x1start=0 ; x1start<x1size && ss1[x1start]==0 ; ++x1start);
|
||||
for(x2start=0 ; x2start<x2size && ss2[x2start]==0 ; ++x2start);
|
||||
}
|
||||
|
||||
Mul2Big3<ss_size>(ss1, ss2, result, x1start, x1size, x2start, x2size);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
an auxiliary method for calculating the multiplication
|
||||
*/
|
||||
template<uint ss_size>
|
||||
void Mul2Big3(const uint * ss1, const uint * ss2, UInt<ss_size*2> & result, uint x1start, uint x1size, uint x2start, uint x2size)
|
||||
{
|
||||
uint r2, r1;
|
||||
|
||||
result.SetZero();
|
||||
|
||||
if( x1size==0 || x2size==0 )
|
||||
return;
|
||||
|
||||
for(uint x1=x1start ; x1<x1size ; ++x1)
|
||||
{
|
||||
for(uint x2=x2start ; x2<x2size ; ++x2)
|
||||
{
|
||||
MulTwoWords(ss1[x1], ss2[x2], &r2, &r1);
|
||||
result.AddTwoInts(r2, r1, x2+x1);
|
||||
// here will never be a carry
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public:
|
||||
|
||||
|
||||
/*!
|
||||
multiplication: this = this * ss2
|
||||
|
||||
This is Karatsuba Multiplication algorithm, we're using it when value_size is greater than
|
||||
or equal to TTMATH_USE_KARATSUBA_MULTIPLICATION_FROM_SIZE macro (defined in ttmathuint.h).
|
||||
If value_size is smaller then we're using Mul2Big() instead.
|
||||
|
||||
Karatsuba multiplication:
|
||||
Assume we have:
|
||||
this = x = x1*B^m + x0
|
||||
ss2 = y = y1*B^m + y0
|
||||
where x0 and y0 are less than B^m
|
||||
the product from multiplication we can show as:
|
||||
x*y = (x1*B^m + x0)(y1*B^m + y0) = z2*B^(2m) + z1*B^m + z0
|
||||
where
|
||||
z2 = x1*y1
|
||||
z1 = x1*y0 + x0*y1
|
||||
z0 = x0*y0
|
||||
this is standard schoolbook algorithm with O(n^2), Karatsuba observed that z1 can be given in other form:
|
||||
z1 = (x1 + x0)*(y1 + y0) - z2 - z0 / z1 = (x1*y1 + x1*y0 + x0*y1 + x0*y0) - x1*y1 - x0*y0 = x1*y0 + x0*y1 /
|
||||
and to calculate the multiplication we need only three multiplications (with some additions and subtractions)
|
||||
|
||||
Our objects 'this' and 'ss2' we divide into two parts and by using recurrence we calculate the multiplication.
|
||||
Karatsuba multiplication has O( n^(ln(3)/ln(2)) )
|
||||
*/
|
||||
uint Mul3(const UInt<value_size> & ss2)
|
||||
{
|
||||
UInt<value_size*2> result;
|
||||
uint i, c = 0;
|
||||
|
||||
Mul3Big(ss2, result);
|
||||
|
||||
// copying result
|
||||
for(i=0 ; i<value_size ; ++i)
|
||||
table[i] = result.table[i];
|
||||
|
||||
// testing carry
|
||||
for( ; i<value_size*2 ; ++i)
|
||||
if( result.table[i] != 0 )
|
||||
{
|
||||
c = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
TTMATH_LOG("UInt::Mul3")
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
multiplication: result = this * ss2
|
||||
|
||||
result is twice bigger than this and ss2,
|
||||
this method never returns carry,
|
||||
(Karatsuba multiplication)
|
||||
*/
|
||||
void Mul3Big(const UInt<value_size> & ss2, UInt<value_size*2> & result)
|
||||
{
|
||||
Mul3Big2<value_size>(table, ss2.table, result.table);
|
||||
|
||||
TTMATH_LOG("UInt::Mul3Big")
|
||||
}
|
||||
|
||||
|
||||
|
||||
private:
|
||||
|
||||
/*!
|
||||
an auxiliary method for calculating the Karatsuba multiplication
|
||||
|
||||
result_size is equal ss_size*2
|
||||
*/
|
||||
template<uint ss_size>
|
||||
void Mul3Big2(const uint * ss1, const uint * ss2, uint * result)
|
||||
{
|
||||
const uint * x1, * x0, * y1, * y0;
|
||||
|
||||
|
||||
if( ss_size>1 && ss_size<TTMATH_USE_KARATSUBA_MULTIPLICATION_FROM_SIZE )
|
||||
{
|
||||
UInt<ss_size*2> res;
|
||||
Mul2Big2<ss_size>(ss1, ss2, res);
|
||||
|
||||
for(uint i=0 ; i<ss_size*2 ; ++i)
|
||||
result[i] = res.table[i];
|
||||
|
||||
return;
|
||||
}
|
||||
else
|
||||
if( ss_size == 1 )
|
||||
{
|
||||
return MulTwoWords(*ss1, *ss2, &result[1], &result[0]);
|
||||
}
|
||||
|
||||
|
||||
if( (ss_size & 1) == 1 )
|
||||
{
|
||||
// ss_size is odd
|
||||
x0 = ss1;
|
||||
y0 = ss2;
|
||||
x1 = ss1 + ss_size / 2 + 1;
|
||||
y1 = ss2 + ss_size / 2 + 1;
|
||||
|
||||
// the second vectors (x1 and y1) are smaller about one from the first ones (x0 and y0)
|
||||
Mul3Big3<ss_size/2 + 1, ss_size/2, ss_size*2>(x1, x0, y1, y0, result);
|
||||
}
|
||||
else
|
||||
{
|
||||
// ss_size is even
|
||||
x0 = ss1;
|
||||
y0 = ss2;
|
||||
x1 = ss1 + ss_size / 2;
|
||||
y1 = ss2 + ss_size / 2;
|
||||
|
||||
// all four vectors (x0 x1 y0 y1) are equal in size
|
||||
Mul3Big3<ss_size/2, ss_size/2, ss_size*2>(x1, x0, y1, y0, result);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning (disable : 4717)
|
||||
//warning C4717: recursive on all control paths, function will cause runtime stack overflow
|
||||
//we have the stop point in Mul3Big2() method
|
||||
#endif
|
||||
|
||||
|
||||
/*!
|
||||
an auxiliary method for calculating the Karatsuba multiplication
|
||||
|
||||
x = x1*B^m + x0
|
||||
y = y1*B^m + y0
|
||||
|
||||
first_size - is the size of vectors: x0 and y0
|
||||
second_size - is the size of vectors: x1 and y1 (can be either equal first_size or smaller about one from first_size)
|
||||
|
||||
x*y = (x1*B^m + x0)(y1*B^m + y0) = z2*B^(2m) + z1*B^m + z0
|
||||
where
|
||||
z0 = x0*y0
|
||||
z2 = x1*y1
|
||||
z1 = (x1 + x0)*(y1 + y0) - z2 - z0
|
||||
*/
|
||||
template<uint first_size, uint second_size, uint result_size>
|
||||
void Mul3Big3(const uint * x1, const uint * x0, const uint * y1, const uint * y0, uint * result)
|
||||
{
|
||||
uint i, c, xc, yc;
|
||||
|
||||
UInt<first_size> temp, temp2;
|
||||
UInt<first_size*3> z1;
|
||||
|
||||
// z0 and z2 we store directly in the result (we don't use any temporary variables)
|
||||
Mul3Big2<first_size>(x0, y0, result); // z0
|
||||
Mul3Big2<second_size>(x1, y1, result+first_size*2); // z2
|
||||
|
||||
// now we calculate z1
|
||||
// temp = (x0 + x1)
|
||||
// temp2 = (y0 + y1)
|
||||
// we're using temp and temp2 with UInt<first_size>, although there can be a carry but
|
||||
// we simple remember it in xc and yc (xc and yc can be either 0 or 1),
|
||||
// and (x0 + x1)*(y0 + y1) we calculate in this way (schoolbook algorithm):
|
||||
//
|
||||
// xc | temp
|
||||
// yc | temp2
|
||||
// --------------------
|
||||
// (temp * temp2)
|
||||
// xc*temp2 |
|
||||
// yc*temp |
|
||||
// xc*yc |
|
||||
// ---------- z1 --------
|
||||
//
|
||||
// and the result is never larger in size than 3*first_size
|
||||
|
||||
xc = AddVector(x0, x1, first_size, second_size, temp.table);
|
||||
yc = AddVector(y0, y1, first_size, second_size, temp2.table);
|
||||
|
||||
Mul3Big2<first_size>(temp.table, temp2.table, z1.table);
|
||||
|
||||
// clearing the rest of z1
|
||||
for(i=first_size*2 ; i<first_size*3 ; ++i)
|
||||
z1.table[i] = 0;
|
||||
|
||||
|
||||
if( xc )
|
||||
{
|
||||
c = AddVector(z1.table+first_size, temp2.table, first_size*3-first_size, first_size, z1.table+first_size);
|
||||
TTMATH_ASSERT( c==0 )
|
||||
}
|
||||
|
||||
if( yc )
|
||||
{
|
||||
c = AddVector(z1.table+first_size, temp.table, first_size*3-first_size, first_size, z1.table+first_size);
|
||||
TTMATH_ASSERT( c==0 )
|
||||
}
|
||||
|
||||
|
||||
if( xc && yc )
|
||||
{
|
||||
for( i=first_size*2 ; i<first_size*3 ; ++i )
|
||||
if( ++z1.table[i] != 0 )
|
||||
break; // break if there was no carry
|
||||
}
|
||||
|
||||
// z1 = z1 - z2
|
||||
c = SubVector(z1.table, result+first_size*2, first_size*3, second_size*2, z1.table);
|
||||
TTMATH_ASSERT(c==0)
|
||||
|
||||
// z1 = z1 - z0
|
||||
c = SubVector(z1.table, result, first_size*3, first_size*2, z1.table);
|
||||
TTMATH_ASSERT(c==0)
|
||||
|
||||
// here we've calculated the z1
|
||||
// now we're adding it to the result
|
||||
|
||||
if( first_size > second_size )
|
||||
{
|
||||
uint z1_size = result_size - first_size;
|
||||
TTMATH_ASSERT( z1_size <= first_size*3 )
|
||||
|
||||
for(i=z1_size ; i<first_size*3 ; ++i)
|
||||
TTMATH_ASSERT( z1.table[i] == 0 )
|
||||
;
|
||||
|
||||
c = AddVector(result+first_size, z1.table, result_size-first_size, z1_size, result+first_size);
|
||||
TTMATH_ASSERT(c==0)
|
||||
}
|
||||
else
|
||||
{
|
||||
c = AddVector(result+first_size, z1.table, result_size-first_size, first_size*3, result+first_size);
|
||||
TTMATH_ASSERT(c==0)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning (default : 4717)
|
||||
#endif
|
||||
|
||||
|
||||
public:
|
||||
|
||||
|
||||
/*!
|
||||
multiplication this = this * ss2
|
||||
*/
|
||||
uint MulFastest(const UInt<value_size> & ss2)
|
||||
{
|
||||
UInt<value_size*2> result;
|
||||
uint i, c = 0;
|
||||
|
||||
MulFastestBig(ss2, result);
|
||||
|
||||
// copying result
|
||||
for(i=0 ; i<value_size ; ++i)
|
||||
table[i] = result.table[i];
|
||||
|
||||
// testing carry
|
||||
for( ; i<value_size*2 ; ++i)
|
||||
if( result.table[i] != 0 )
|
||||
{
|
||||
c = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
TTMATH_LOG("UInt::MulFastest")
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
|
||||
/*!
|
||||
multiplication result = this * ss2
|
||||
|
||||
this method is trying to select the fastest algorithm
|
||||
(in the future this method can be improved)
|
||||
*/
|
||||
void MulFastestBig(const UInt<value_size> & ss2, UInt<value_size*2> & result)
|
||||
{
|
||||
if( value_size < TTMATH_USE_KARATSUBA_MULTIPLICATION_FROM_SIZE )
|
||||
return Mul2Big(ss2, result);
|
||||
|
||||
uint x1size = value_size, x2size = value_size;
|
||||
uint x1start = 0, x2start = 0;
|
||||
|
||||
for(x1size=value_size ; x1size>0 && table[x1size-1]==0 ; --x1size);
|
||||
for(x2size=value_size ; x2size>0 && ss2.table[x2size-1]==0 ; --x2size);
|
||||
|
||||
if( x1size==0 || x2size==0 )
|
||||
{
|
||||
// either 'this' or 'ss2' is equal zero - the result is zero too
|
||||
result.SetZero();
|
||||
return;
|
||||
}
|
||||
|
||||
for(x1start=0 ; x1start<x1size && table[x1start]==0 ; ++x1start);
|
||||
for(x2start=0 ; x2start<x2size && ss2.table[x2start]==0 ; ++x2start);
|
||||
|
||||
uint distancex1 = x1size - x1start;
|
||||
uint distancex2 = x2size - x2start;
|
||||
|
||||
if( distancex1 < 3 || distancex2 < 3 )
|
||||
// either 'this' or 'ss2' have only 2 (or 1) item different from zero (side by side)
|
||||
// (this condition in the future can be improved)
|
||||
return Mul2Big3<value_size>(table, ss2.table, result, x1start, x1size, x2start, x2size);
|
||||
|
||||
|
||||
// Karatsuba multiplication
|
||||
Mul3Big(ss2, result);
|
||||
|
||||
TTMATH_LOG("UInt::MulFastestBig")
|
||||
}
|
||||
|
||||
|
||||
/*!
|
||||
|
@ -2909,12 +3269,13 @@ private:
|
|||
uint Rcr2(uint bits, uint c);
|
||||
|
||||
public:
|
||||
|
||||
uint Add(const UInt<value_size> & ss2, uint c=0);
|
||||
uint AddInt(uint value, uint index = 0);
|
||||
uint AddTwoInts(uint x2, uint x1, uint index);
|
||||
static uint AddVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result);
|
||||
uint Sub(const UInt<value_size> & ss2, uint c=0);
|
||||
uint SubInt(uint value, uint index = 0);
|
||||
static uint SubVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result);
|
||||
static sint FindLeadingBitInWord(uint x);
|
||||
static uint SetBitInWord(uint & value, uint bit);
|
||||
static void MulTwoWords(uint a, uint b, uint * result_high, uint * result_low);
|
||||
|
@ -2922,6 +3283,23 @@ public:
|
|||
};
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
this specialization is needed in order to not confused the compiler "error: ISO C++ forbids zero-size array"
|
||||
when compiling Mul3Big2() method
|
||||
*/
|
||||
template<>
|
||||
class UInt<0>
|
||||
{
|
||||
public:
|
||||
uint table[1];
|
||||
|
||||
void Mul2Big(const UInt<0> &, UInt<0> &) { TTMATH_ASSERT(false) };
|
||||
void SetZero() { TTMATH_ASSERT(false) };
|
||||
uint AddTwoInts(uint, uint, uint) { TTMATH_ASSERT(false) return 0; };
|
||||
};
|
||||
|
||||
|
||||
} //namespace
|
||||
|
||||
|
||||
|
|
|
@ -95,7 +95,7 @@ namespace ttmath
|
|||
for(i=0 ; i<value_size ; ++i)
|
||||
c = AddTwoWords(table[i], ss2.table[i], c, &table[i]);
|
||||
|
||||
TTMATH_LOG("UInt_noasm::Add")
|
||||
TTMATH_LOG("UInt::Add")
|
||||
|
||||
return c;
|
||||
}
|
||||
|
@ -131,7 +131,7 @@ namespace ttmath
|
|||
for(i=index+1 ; i<value_size && c ; ++i)
|
||||
c = AddTwoWords(table[i], 0, c, &table[i]);
|
||||
|
||||
TTMATH_LOG("UInt_noasm::AddInt")
|
||||
TTMATH_LOG("UInt::AddInt")
|
||||
|
||||
return c;
|
||||
}
|
||||
|
@ -184,13 +184,54 @@ namespace ttmath
|
|||
for(i=index+2 ; i<value_size && c ; ++i)
|
||||
c = AddTwoWords(table[i], 0, c, &table[i]);
|
||||
|
||||
TTMATH_LOG("UInt64::AddTwoInts")
|
||||
TTMATH_LOG("UInt::AddTwoInts")
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
this static method addes one vector to the other
|
||||
'ss1' is larger in size or equal to 'ss2'
|
||||
|
||||
ss1 points to the first (larger) vector
|
||||
ss2 points to the second vector
|
||||
ss1_size - size of the ss1 (and size of the result too)
|
||||
ss2_size - size of the ss2
|
||||
result - is the result vector (which has size the same as ss1: ss1_size)
|
||||
|
||||
Example: ss1_size is 5, ss2_size is 3
|
||||
ss1: ss2: result (output):
|
||||
5 1 5+1
|
||||
4 3 4+3
|
||||
2 7 2+7
|
||||
6 6
|
||||
9 9
|
||||
of course the carry is propagated and will be returned from the last item
|
||||
(this method is used by the Karatsuba multiplication algorithm)
|
||||
*/
|
||||
template<uint value_size>
|
||||
uint UInt<value_size>::AddVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result)
|
||||
{
|
||||
uint i, c = 0;
|
||||
|
||||
TTMATH_ASSERT( ss1_size >= ss2_size )
|
||||
|
||||
for(i=0 ; i<ss2_size ; ++i)
|
||||
c = AddTwoWords(ss1[i], ss2[i], c, &result[i]);
|
||||
|
||||
for( ; i<ss1_size ; ++i)
|
||||
c = AddTwoWords(ss1[i], 0, c, &result[i]);
|
||||
|
||||
TTMATH_LOG("UInt::AddVector")
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
template<uint value_size>
|
||||
uint UInt<value_size>::SubTwoWords(uint a, uint b, uint carry, uint * result)
|
||||
{
|
||||
|
@ -232,7 +273,7 @@ namespace ttmath
|
|||
for(i=0 ; i<value_size ; ++i)
|
||||
c = SubTwoWords(table[i], ss2.table[i], c, &table[i]);
|
||||
|
||||
TTMATH_LOG("UInt_noasm::Sub")
|
||||
TTMATH_LOG("UInt::Sub")
|
||||
|
||||
return c;
|
||||
}
|
||||
|
@ -270,12 +311,51 @@ namespace ttmath
|
|||
for(i=index+1 ; i<value_size && c ; ++i)
|
||||
c = SubTwoWords(table[i], 0, c, &table[i]);
|
||||
|
||||
TTMATH_LOG("UInt_noasm::SubInt")
|
||||
TTMATH_LOG("UInt::SubInt")
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
|
||||
/*!
|
||||
this static method subtractes one vector from the other
|
||||
'ss1' is larger in size or equal to 'ss2'
|
||||
|
||||
ss1 points to the first (larger) vector
|
||||
ss2 points to the second vector
|
||||
ss1_size - size of the ss1 (and size of the result too)
|
||||
ss2_size - size of the ss2
|
||||
result - is the result vector (which has size the same as ss1: ss1_size)
|
||||
|
||||
Example: ss1_size is 5, ss2_size is 3
|
||||
ss1: ss2: result (output):
|
||||
5 1 5-1
|
||||
4 3 4-3
|
||||
2 7 2-7
|
||||
6 6-1 (the borrow from previous item)
|
||||
9 9
|
||||
return (carry): 0
|
||||
of course the carry (borrow) is propagated and will be returned from the last item
|
||||
(this method is used by the Karatsuba multiplication algorithm)
|
||||
*/
|
||||
template<uint value_size>
|
||||
uint UInt<value_size>::SubVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result)
|
||||
{
|
||||
uint i, c = 0;
|
||||
|
||||
TTMATH_ASSERT( ss1_size >= ss2_size )
|
||||
|
||||
for(i=0 ; i<ss2_size ; ++i)
|
||||
c = SubTwoWords(ss1[i], ss2[i], c, &result[i]);
|
||||
|
||||
for( ; i<ss1_size ; ++i)
|
||||
c = SubTwoWords(ss1[i], 0, c, &result[i]);
|
||||
|
||||
TTMATH_LOG("UInt::SubVector")
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
|
@ -305,7 +385,7 @@ namespace ttmath
|
|||
c = new_c;
|
||||
}
|
||||
|
||||
TTMATH_LOG("UInt64::Rcl2_one")
|
||||
TTMATH_LOG("UInt::Rcl2_one")
|
||||
|
||||
return c;
|
||||
}
|
||||
|
@ -344,7 +424,7 @@ namespace ttmath
|
|||
c = new_c;
|
||||
}
|
||||
|
||||
TTMATH_LOG("UInt64::Rcr2_one")
|
||||
TTMATH_LOG("UInt::Rcr2_one")
|
||||
|
||||
return c;
|
||||
}
|
||||
|
@ -421,7 +501,7 @@ namespace ttmath
|
|||
c = new_c;
|
||||
}
|
||||
|
||||
TTMATH_LOG("UInt64::Rcr2")
|
||||
TTMATH_LOG("UInt::Rcr2")
|
||||
|
||||
return (c & TTMATH_UINT_HIGHEST_BIT) ? 1 : 0;
|
||||
}
|
||||
|
|
|
@ -162,7 +162,7 @@ namespace ttmath
|
|||
|
||||
#endif
|
||||
|
||||
TTMATH_LOG("UInt32::Add")
|
||||
TTMATH_LOG("UInt::Add")
|
||||
|
||||
return c;
|
||||
}
|
||||
|
@ -267,7 +267,7 @@ namespace ttmath
|
|||
|
||||
#endif
|
||||
|
||||
TTMATH_LOG("UInt32::AddInt")
|
||||
TTMATH_LOG("UInt::AddInt")
|
||||
|
||||
return c;
|
||||
}
|
||||
|
@ -392,13 +392,143 @@ namespace ttmath
|
|||
|
||||
#endif
|
||||
|
||||
TTMATH_LOG("UInt32::AddTwoInts")
|
||||
TTMATH_LOG("UInt::AddTwoInts")
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
this static method addes one vector to the other
|
||||
'ss1' is larger in size or equal to 'ss2'
|
||||
|
||||
ss1 points to the first (larger) vector
|
||||
ss2 points to the second vector
|
||||
ss1_size - size of the ss1 (and size of the result too)
|
||||
ss2_size - size of the ss2
|
||||
result - is the result vector (which has size the same as ss1: ss1_size)
|
||||
|
||||
Example: ss1_size is 5, ss2_size is 3
|
||||
ss1: ss2: result (output):
|
||||
5 1 5+1
|
||||
4 3 4+3
|
||||
2 7 2+7
|
||||
6 6
|
||||
9 9
|
||||
of course the carry is propagated and will be returned from the last item
|
||||
(this method is used by the Karatsuba multiplication algorithm)
|
||||
*/
|
||||
template<uint value_size>
|
||||
uint UInt<value_size>::AddVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result)
|
||||
{
|
||||
TTMATH_ASSERT( ss1_size >= ss2_size )
|
||||
|
||||
uint rest = ss1_size - ss2_size;
|
||||
uint c;
|
||||
|
||||
#ifndef __GNUC__
|
||||
|
||||
// this part might be compiled with for example visual c
|
||||
__asm
|
||||
{
|
||||
pushad
|
||||
|
||||
mov ecx, [ss2_size]
|
||||
xor edx, edx // edx = 0, cf = 0
|
||||
|
||||
mov esi, [ss1]
|
||||
mov ebx, [ss2]
|
||||
mov edi, [result]
|
||||
|
||||
p:
|
||||
mov eax, [esi+edx*4]
|
||||
adc eax, [ebx+edx*4]
|
||||
mov [edi+edx*4], eax
|
||||
|
||||
inc edx
|
||||
dec ecx
|
||||
jnz p
|
||||
|
||||
adc ecx, ecx // ecx has the cf state
|
||||
|
||||
mov ebx, [rest]
|
||||
or ebx, ebx
|
||||
jz end
|
||||
|
||||
xor ebx, ebx
|
||||
sub ebx, ecx // setting cf from ecx
|
||||
mov ecx, [rest] // ecx is != 0
|
||||
mov ebx, 0
|
||||
p2:
|
||||
mov eax, [esi+edx*4]
|
||||
adc eax, ebx
|
||||
mov [edi+edx*4], eax
|
||||
|
||||
inc edx
|
||||
dec ecx
|
||||
jnz p2
|
||||
|
||||
adc ecx, ecx
|
||||
|
||||
end:
|
||||
mov [c], ecx
|
||||
|
||||
popad
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef __GNUC__
|
||||
|
||||
// this part should be compiled with gcc
|
||||
uint dummy1, dummy2, dummy3;
|
||||
|
||||
__asm__ __volatile__(
|
||||
"push %%edx \n"
|
||||
"xor %%edx, %%edx \n" // edx = 0, cf = 0
|
||||
"1: \n"
|
||||
"mov (%%esi,%%edx,4), %%eax \n"
|
||||
"adc (%%ebx,%%edx,4), %%eax \n"
|
||||
"mov %%eax, (%%edi,%%edx,4) \n"
|
||||
|
||||
"inc %%edx \n"
|
||||
"dec %%ecx \n"
|
||||
"jnz 1b \n"
|
||||
|
||||
"adc %%ecx, %%ecx \n" // ecx has the cf state
|
||||
"pop %%eax \n" // eax = rest
|
||||
|
||||
"or %%eax, %%eax \n"
|
||||
"jz 3f \n"
|
||||
|
||||
"xor %%ebx, %%ebx \n"
|
||||
"sub %%ecx, %%ebx \n" // setting cf from ecx
|
||||
"mov %%eax, %%ecx \n" // ecx=rest and is != 0
|
||||
"mov $0, %%ebx \n"
|
||||
"2: \n"
|
||||
"mov (%%esi, %%edx, 4), %%eax \n"
|
||||
"adc %%ebx, %%eax \n"
|
||||
"mov %%eax, (%%edi, %%edx, 4) \n"
|
||||
|
||||
"inc %%edx \n"
|
||||
"dec %%ecx \n"
|
||||
"jnz 2b \n"
|
||||
|
||||
"adc %%ecx, %%ecx \n"
|
||||
"3: \n"
|
||||
|
||||
: "=a" (dummy1), "=b" (dummy2), "=c" (c), "=d" (dummy3)
|
||||
: "1" (ss2), "2" (ss2_size), "3" (rest), "S" (ss1), "D" (result)
|
||||
: "cc", "memory" );
|
||||
|
||||
#endif
|
||||
|
||||
TTMATH_LOG("UInt::AddVector")
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
|
||||
/*!
|
||||
|
@ -489,7 +619,7 @@ namespace ttmath
|
|||
|
||||
#endif
|
||||
|
||||
TTMATH_LOG("UInt32::Sub")
|
||||
TTMATH_LOG("UInt::Sub")
|
||||
|
||||
return c;
|
||||
}
|
||||
|
@ -593,13 +723,152 @@ namespace ttmath
|
|||
|
||||
#endif
|
||||
|
||||
TTMATH_LOG("UInt32::SubInt")
|
||||
TTMATH_LOG("UInt::SubInt")
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
this static method subtractes one vector from the other
|
||||
'ss1' is larger in size or equal to 'ss2'
|
||||
|
||||
ss1 points to the first (larger) vector
|
||||
ss2 points to the second vector
|
||||
ss1_size - size of the ss1 (and size of the result too)
|
||||
ss2_size - size of the ss2
|
||||
result - is the result vector (which has size the same as ss1: ss1_size)
|
||||
|
||||
Example: ss1_size is 5, ss2_size is 3
|
||||
ss1: ss2: result (output):
|
||||
5 1 5-1
|
||||
4 3 4-3
|
||||
2 7 2-7
|
||||
6 6-1 (the borrow from previous item)
|
||||
9 9
|
||||
return (carry): 0
|
||||
of course the carry (borrow) is propagated and will be returned from the last item
|
||||
(this method is used by the Karatsuba multiplication algorithm)
|
||||
*/
|
||||
template<uint value_size>
|
||||
uint UInt<value_size>::SubVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result)
|
||||
{
|
||||
TTMATH_ASSERT( ss1_size >= ss2_size )
|
||||
|
||||
uint rest = ss1_size - ss2_size;
|
||||
uint c;
|
||||
|
||||
#ifndef __GNUC__
|
||||
|
||||
// this part might be compiled with for example visual c
|
||||
|
||||
/*
|
||||
the asm code is nearly the same as in AddVector
|
||||
only two instructions 'adc' are changed to 'sbb'
|
||||
*/
|
||||
__asm
|
||||
{
|
||||
pushad
|
||||
|
||||
mov ecx, [ss2_size]
|
||||
xor edx, edx // edx = 0, cf = 0
|
||||
|
||||
mov esi, [ss1]
|
||||
mov ebx, [ss2]
|
||||
mov edi, [result]
|
||||
|
||||
p:
|
||||
mov eax, [esi+edx*4]
|
||||
sbb eax, [ebx+edx*4]
|
||||
mov [edi+edx*4], eax
|
||||
|
||||
inc edx
|
||||
dec ecx
|
||||
jnz p
|
||||
|
||||
adc ecx, ecx // ecx has the cf state
|
||||
|
||||
mov ebx, [rest]
|
||||
or ebx, ebx
|
||||
jz end
|
||||
|
||||
xor ebx, ebx
|
||||
sub ebx, ecx // setting cf from ecx
|
||||
mov ecx, [rest] // ecx is != 0
|
||||
mov ebx, 0
|
||||
p2:
|
||||
mov eax, [esi+edx*4]
|
||||
sbb eax, ebx
|
||||
mov [edi+edx*4], eax
|
||||
|
||||
inc edx
|
||||
dec ecx
|
||||
jnz p2
|
||||
|
||||
adc ecx, ecx
|
||||
|
||||
end:
|
||||
mov [c], ecx
|
||||
|
||||
popad
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef __GNUC__
|
||||
|
||||
// this part should be compiled with gcc
|
||||
uint dummy1, dummy2, dummy3;
|
||||
|
||||
__asm__ __volatile__(
|
||||
"push %%edx \n"
|
||||
"xor %%edx, %%edx \n" // edx = 0, cf = 0
|
||||
"1: \n"
|
||||
"mov (%%esi,%%edx,4), %%eax \n"
|
||||
"sbb (%%ebx,%%edx,4), %%eax \n"
|
||||
"mov %%eax, (%%edi,%%edx,4) \n"
|
||||
|
||||
"inc %%edx \n"
|
||||
"dec %%ecx \n"
|
||||
"jnz 1b \n"
|
||||
|
||||
"adc %%ecx, %%ecx \n" // ecx has the cf state
|
||||
"pop %%eax \n" // eax = rest
|
||||
|
||||
"or %%eax, %%eax \n"
|
||||
"jz 3f \n"
|
||||
|
||||
"xor %%ebx, %%ebx \n"
|
||||
"sub %%ecx, %%ebx \n" // setting cf from ecx
|
||||
"mov %%eax, %%ecx \n" // ecx=rest and is != 0
|
||||
"mov $0, %%ebx \n"
|
||||
"2: \n"
|
||||
"mov (%%esi, %%edx, 4), %%eax \n"
|
||||
"sbb %%ebx, %%eax \n"
|
||||
"mov %%eax, (%%edi, %%edx, 4) \n"
|
||||
|
||||
"inc %%edx \n"
|
||||
"dec %%ecx \n"
|
||||
"jnz 2b \n"
|
||||
|
||||
"adc %%ecx, %%ecx \n"
|
||||
"3: \n"
|
||||
|
||||
: "=a" (dummy1), "=b" (dummy2), "=c" (c), "=d" (dummy3)
|
||||
: "1" (ss2), "2" (ss2_size), "3" (rest), "S" (ss1), "D" (result)
|
||||
: "cc", "memory" );
|
||||
|
||||
#endif
|
||||
|
||||
TTMATH_LOG("UInt::SubVector")
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
this method moves all bits into the left hand side
|
||||
return value <- this <- c
|
||||
|
@ -680,7 +949,7 @@ namespace ttmath
|
|||
|
||||
#endif
|
||||
|
||||
TTMATH_LOG("UInt32::Rcl2_one")
|
||||
TTMATH_LOG("UInt::Rcl2_one")
|
||||
|
||||
return c;
|
||||
}
|
||||
|
@ -758,7 +1027,7 @@ namespace ttmath
|
|||
|
||||
#endif
|
||||
|
||||
TTMATH_LOG("UInt32::Rcr2_one")
|
||||
TTMATH_LOG("UInt::Rcr2_one")
|
||||
|
||||
return c;
|
||||
}
|
||||
|
@ -886,7 +1155,7 @@ namespace ttmath
|
|||
|
||||
#endif
|
||||
|
||||
TTMATH_LOG("UInt32::Rcl2")
|
||||
TTMATH_LOG("UInt::Rcl2")
|
||||
|
||||
return c;
|
||||
}
|
||||
|
@ -1021,7 +1290,7 @@ namespace ttmath
|
|||
|
||||
#endif
|
||||
|
||||
TTMATH_LOG("UInt32::Rcr2")
|
||||
TTMATH_LOG("UInt::Rcr2")
|
||||
|
||||
return c;
|
||||
}
|
||||
|
|
|
@ -112,7 +112,7 @@ namespace ttmath
|
|||
|
||||
#endif
|
||||
|
||||
TTMATH_LOG("UInt64::Add")
|
||||
TTMATH_LOG("UInt::Add")
|
||||
|
||||
return c;
|
||||
}
|
||||
|
@ -178,7 +178,7 @@ namespace ttmath
|
|||
|
||||
#endif
|
||||
|
||||
TTMATH_LOG("UInt64::AddInt")
|
||||
TTMATH_LOG("UInt::AddInt")
|
||||
|
||||
return c;
|
||||
}
|
||||
|
@ -259,7 +259,90 @@ namespace ttmath
|
|||
|
||||
#endif
|
||||
|
||||
TTMATH_LOG("UInt64::AddTwoInts")
|
||||
TTMATH_LOG("UInt::AddTwoInts")
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
this static method addes one vector to the other
|
||||
'ss1' is larger in size or equal to 'ss2'
|
||||
|
||||
ss1 points to the first (larger) vector
|
||||
ss2 points to the second vector
|
||||
ss1_size - size of the ss1 (and size of the result too)
|
||||
ss2_size - size of the ss2
|
||||
result - is the result vector (which has size the same as ss1: ss1_size)
|
||||
|
||||
Example: ss1_size is 5, ss2_size is 3
|
||||
ss1: ss2: result (output):
|
||||
5 1 5+1
|
||||
4 3 4+3
|
||||
2 7 2+7
|
||||
6 6
|
||||
9 9
|
||||
of course the carry is propagated and will be returned from the last item
|
||||
(this method is used by the Karatsuba multiplication algorithm)
|
||||
*/
|
||||
template<uint value_size>
|
||||
uint UInt<value_size>::AddVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result)
|
||||
{
|
||||
TTMATH_ASSERT( ss1_size >= ss2_size )
|
||||
|
||||
uint rest = ss1_size - ss2_size;
|
||||
uint c;
|
||||
|
||||
#ifndef __GNUC__
|
||||
#error "another compiler than GCC is currently not supported in 64bit mode"
|
||||
#endif
|
||||
|
||||
#ifdef __GNUC__
|
||||
|
||||
// this part should be compiled with gcc
|
||||
uint dummy1, dummy2, dummy3;
|
||||
|
||||
__asm__ __volatile__(
|
||||
"mov %%rdx, %%r8 \n"
|
||||
"xor %%rdx, %%rdx \n" // rdx = 0, cf = 0
|
||||
"1: \n"
|
||||
"mov (%%rsi,%%rdx,8), %%rax \n"
|
||||
"adc (%%rbx,%%rdx,8), %%rax \n"
|
||||
"mov %%rax, (%%rdi,%%rdx,8) \n"
|
||||
|
||||
"inc %%rdx \n"
|
||||
"dec %%rcx \n"
|
||||
"jnz 1b \n"
|
||||
|
||||
"adc %%rcx, %%rcx \n" // rcx has the cf state
|
||||
|
||||
"or %%r8, %%r8 \n"
|
||||
"jz 3f \n"
|
||||
|
||||
"xor %%rbx, %%rbx \n"
|
||||
"sub %%rcx, %%rbx \n" // setting cf from rcx
|
||||
"mov %%r8, %%rcx \n" // rcx=rest and is != 0
|
||||
"mov $0, %%rbx \n"
|
||||
"2: \n"
|
||||
"mov (%%rsi, %%rdx, 8), %%rax \n"
|
||||
"adc %%rbx, %%rax \n"
|
||||
"mov %%rax, (%%rdi, %%rdx, 8) \n"
|
||||
|
||||
"inc %%rdx \n"
|
||||
"dec %%rcx \n"
|
||||
"jnz 2b \n"
|
||||
|
||||
"adc %%rcx, %%rcx \n"
|
||||
"3: \n"
|
||||
|
||||
: "=a" (dummy1), "=b" (dummy2), "=c" (c), "=d" (dummy3)
|
||||
: "1" (ss2), "2" (ss2_size), "3" (rest), "S" (ss1), "D" (result)
|
||||
: "%r8", "cc", "memory" );
|
||||
|
||||
#endif
|
||||
|
||||
TTMATH_LOG("UInt::AddVector")
|
||||
|
||||
return c;
|
||||
}
|
||||
|
@ -316,7 +399,7 @@ namespace ttmath
|
|||
|
||||
#endif
|
||||
|
||||
TTMATH_LOG("UInt64::Sub")
|
||||
TTMATH_LOG("UInt::Sub")
|
||||
|
||||
return c;
|
||||
}
|
||||
|
@ -379,12 +462,101 @@ namespace ttmath
|
|||
|
||||
#endif
|
||||
|
||||
TTMATH_LOG("UInt64::SubInt")
|
||||
TTMATH_LOG("UInt::SubInt")
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
this static method subtractes one vector from the other
|
||||
'ss1' is larger in size or equal to 'ss2'
|
||||
|
||||
ss1 points to the first (larger) vector
|
||||
ss2 points to the second vector
|
||||
ss1_size - size of the ss1 (and size of the result too)
|
||||
ss2_size - size of the ss2
|
||||
result - is the result vector (which has size the same as ss1: ss1_size)
|
||||
|
||||
Example: ss1_size is 5, ss2_size is 3
|
||||
ss1: ss2: result (output):
|
||||
5 1 5-1
|
||||
4 3 4-3
|
||||
2 7 2-7
|
||||
6 6-1 (the borrow from previous item)
|
||||
9 9
|
||||
return (carry): 0
|
||||
of course the carry (borrow) is propagated and will be returned from the last item
|
||||
(this method is used by the Karatsuba multiplication algorithm)
|
||||
*/
|
||||
template<uint value_size>
|
||||
uint UInt<value_size>::SubVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result)
|
||||
{
|
||||
TTMATH_ASSERT( ss1_size >= ss2_size )
|
||||
|
||||
uint rest = ss1_size - ss2_size;
|
||||
uint c;
|
||||
|
||||
#ifndef __GNUC__
|
||||
#error "another compiler than GCC is currently not supported in 64bit mode"
|
||||
#endif
|
||||
|
||||
#ifdef __GNUC__
|
||||
|
||||
/*
|
||||
the asm code is nearly the same as in AddVector
|
||||
only two instructions 'adc' are changed to 'sbb'
|
||||
*/
|
||||
uint dummy1, dummy2, dummy3;
|
||||
|
||||
__asm__ __volatile__(
|
||||
"mov %%rdx, %%r8 \n"
|
||||
"xor %%rdx, %%rdx \n" // rdx = 0, cf = 0
|
||||
"1: \n"
|
||||
"mov (%%rsi,%%rdx,8), %%rax \n"
|
||||
"sbb (%%rbx,%%rdx,8), %%rax \n"
|
||||
"mov %%rax, (%%rdi,%%rdx,8) \n"
|
||||
|
||||
"inc %%rdx \n"
|
||||
"dec %%rcx \n"
|
||||
"jnz 1b \n"
|
||||
|
||||
"adc %%rcx, %%rcx \n" // rcx has the cf state
|
||||
|
||||
"or %%r8, %%r8 \n"
|
||||
"jz 3f \n"
|
||||
|
||||
"xor %%rbx, %%rbx \n"
|
||||
"sub %%rcx, %%rbx \n" // setting cf from rcx
|
||||
"mov %%r8, %%rcx \n" // rcx=rest and is != 0
|
||||
"mov $0, %%rbx \n"
|
||||
"2: \n"
|
||||
"mov (%%rsi, %%rdx, 8), %%rax \n"
|
||||
"sbb %%rbx, %%rax \n"
|
||||
"mov %%rax, (%%rdi, %%rdx, 8) \n"
|
||||
|
||||
"inc %%rdx \n"
|
||||
"dec %%rcx \n"
|
||||
"jnz 2b \n"
|
||||
|
||||
"adc %%rcx, %%rcx \n"
|
||||
"3: \n"
|
||||
|
||||
: "=a" (dummy1), "=b" (dummy2), "=c" (c), "=d" (dummy3)
|
||||
: "1" (ss2), "2" (ss2_size), "3" (rest), "S" (ss1), "D" (result)
|
||||
: "%r8", "cc", "memory" );
|
||||
|
||||
#endif
|
||||
|
||||
TTMATH_LOG("UInt::SubVector")
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
this method moves all bits into the left hand side
|
||||
return value <- this <- c
|
||||
|
@ -431,7 +603,7 @@ namespace ttmath
|
|||
|
||||
#endif
|
||||
|
||||
TTMATH_LOG("UInt64::Rcl2_one")
|
||||
TTMATH_LOG("UInt::Rcl2_one")
|
||||
|
||||
return c;
|
||||
}
|
||||
|
@ -481,7 +653,7 @@ namespace ttmath
|
|||
|
||||
#endif
|
||||
|
||||
TTMATH_LOG("UInt64::Rcr2_one")
|
||||
TTMATH_LOG("UInt::Rcr2_one")
|
||||
|
||||
return c;
|
||||
}
|
||||
|
@ -553,7 +725,7 @@ namespace ttmath
|
|||
|
||||
#endif
|
||||
|
||||
TTMATH_LOG("UInt64::Rcl2")
|
||||
TTMATH_LOG("UInt::Rcl2")
|
||||
|
||||
return c;
|
||||
}
|
||||
|
@ -628,7 +800,7 @@ namespace ttmath
|
|||
|
||||
#endif
|
||||
|
||||
TTMATH_LOG("UInt64::Rcr2")
|
||||
TTMATH_LOG("UInt::Rcr2")
|
||||
|
||||
return c;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue