Merged against the current original ttmath trunk

git-svn-id: svn://ttmath.org/publicrep/ttmath/branches/chk@170 e52654a7-88a9-db11-a3e9-0013d4bc506e
This commit is contained in:
Christian Kaiser 2009-06-25 11:07:55 +00:00
commit de64608eba
8 changed files with 1396 additions and 346 deletions

View File

@ -1,4 +1,4 @@
Version 0.8.5 prerelease (2009.05.15): Version 0.8.5 (2009.06.16):
* fixed: Big::Mod(x) didn't correctly return a carry * fixed: Big::Mod(x) didn't correctly return a carry
and the result was sometimes very big (even greater than x) and the result was sometimes very big (even greater than x)
* fixed: global function Mod(x) didn't set an ErrorCode object * fixed: global function Mod(x) didn't set an ErrorCode object
@ -7,7 +7,7 @@ Version 0.8.5 prerelease (2009.05.15):
* changed: function Sin(x) to Sin(x, ErrorCode * err=0) * changed: function Sin(x) to Sin(x, ErrorCode * err=0)
when x was very big the function returns zero when x was very big the function returns zero
now it sets ErrorCode object to err_overflow now it sets ErrorCode object to err_overflow
and the result is undefined and the result has a NaN flag set
the same is to Cos() function the same is to Cos() function
* changed: PrepareSin(x) is using Big::Mod() now when reducing 2PI period * changed: PrepareSin(x) is using Big::Mod() now when reducing 2PI period
should be a little accurate especially on a very big 'x' should be a little accurate especially on a very big 'x'
@ -29,6 +29,31 @@ Version 0.8.5 prerelease (2009.05.15):
uint SubVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result) uint SubVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result)
three forms: asm x86, asm x86_64, no-asm three forms: asm x86, asm x86_64, no-asm
those methods are used by the Karatsuba multiplication algorithm those methods are used by the Karatsuba multiplication algorithm
* added: to Big<> class: support for NaN flag (Not a Number)
bool Big::IsNan() - returns true if the NaN flag is set
void Big::SetNan() - sets the NaN flag
The NaN flag is set by default after creating an object:
Big<1, 2> a; // NaN is set (it means the object has not a valid number)
std::cout << a; // cout gives "NaN"
a = 123; // now NaN is not set
std::cout << a; // cout gives "123"
The NaN is set if there was a carry during calculations
a.Mul(very_big_value); // a will have a NaN set
The NaN is set if an argument is NaN too
b.SetNan();
a.Add(b); // a will have NaN because b has NaN too
If you try to do something on a NaN object, the result is a NaN too
a.SetNan();
a.Add(2); // a is still a NaN
The NaN is set if you use incorrect arguments
a.Ln(-10); // a will have the NaN flag
The only way to clear the NaN flag is to assign a correct value or other correct object,
supposing 'a' has NaN flag, to remove the flag you can either:
a = 10;
a.FromInt(30);
a.SetOne();
a.FromBig(other_object_without_nan);
etc.
Version 0.8.4 (2009.05.08): Version 0.8.4 (2009.05.08):

View File

@ -1,4 +1,3 @@
/* /*
* This file is a part of TTMath Bignum Library * This file is a part of TTMath Bignum Library
* and is distributed under the (new) BSD licence. * and is distributed under the (new) BSD licence.
@ -99,6 +98,14 @@ namespace ttmath
template<class ValueType> template<class ValueType>
ValueType Round(const ValueType & x, ErrorCode * err = 0) ValueType Round(const ValueType & x, ErrorCode * err = 0)
{ {
if( x.IsNan() )
{
if( err )
*err = err_improper_argument;
return x; // NaN
}
ValueType result( x ); ValueType result( x );
uint c = result.Round(); uint c = result.Round();
@ -124,6 +131,14 @@ namespace ttmath
template<class ValueType> template<class ValueType>
ValueType Ceil(const ValueType & x, ErrorCode * err = 0) ValueType Ceil(const ValueType & x, ErrorCode * err = 0)
{ {
if( x.IsNan() )
{
if( err )
*err = err_improper_argument;
return x; // NaN
}
ValueType result(x); ValueType result(x);
uint c = 0; uint c = 0;
@ -163,6 +178,14 @@ namespace ttmath
template<class ValueType> template<class ValueType>
ValueType Floor(const ValueType & x, ErrorCode * err = 0) ValueType Floor(const ValueType & x, ErrorCode * err = 0)
{ {
if( x.IsNan() )
{
if( err )
*err = err_improper_argument;
return x; // NaN
}
ValueType result(x); ValueType result(x);
uint c = 0; uint c = 0;
@ -203,8 +226,15 @@ namespace ttmath
template<class ValueType> template<class ValueType>
ValueType Ln(const ValueType & x, ErrorCode * err = 0) ValueType Ln(const ValueType & x, ErrorCode * err = 0)
{ {
ValueType result; if( x.IsNan() )
{
if( err )
*err = err_improper_argument;
return x; // NaN
}
ValueType result;
uint state = result.Ln(x); uint state = result.Ln(x);
if( err ) if( err )
@ -237,8 +267,15 @@ namespace ttmath
template<class ValueType> template<class ValueType>
ValueType Log(const ValueType & x, const ValueType & base, ErrorCode * err = 0) ValueType Log(const ValueType & x, const ValueType & base, ErrorCode * err = 0)
{ {
ValueType result; if( x.IsNan() || base.IsNan() )
{
if( err )
*err = err_improper_argument;
return ValueType(); // default NaN
}
ValueType result;
uint state = result.Log(x, base); uint state = result.Log(x, base);
if( err ) if( err )
@ -271,8 +308,15 @@ namespace ttmath
template<class ValueType> template<class ValueType>
ValueType Exp(const ValueType & x, ErrorCode * err = 0) ValueType Exp(const ValueType & x, ErrorCode * err = 0)
{ {
ValueType result; if( x.IsNan() )
{
if( err )
*err = err_improper_argument;
return x; // NaN
}
ValueType result;
uint c = result.Exp(x); uint c = result.Exp(x);
if( err ) if( err )
@ -468,6 +512,14 @@ namespace ttmath
ValueType one, result; ValueType one, result;
bool change_sign; bool change_sign;
if( x.IsNan() )
{
if( err )
*err = err_improper_argument;
return result; // NaN is set by default
}
if( err ) if( err )
*err = err_ok; *err = err_ok;
@ -479,7 +531,7 @@ namespace ttmath
// result has NaN flag set by default // result has NaN flag set by default
if( err ) if( err )
*err = err_overflow; // maybe another error code? *err = err_overflow; // maybe another error code? err_improper_argument?
return result; // NaN is set by default return result; // NaN is set by default
} }
@ -511,6 +563,14 @@ namespace ttmath
template<class ValueType> template<class ValueType>
ValueType Cos(ValueType x, ErrorCode * err = 0) ValueType Cos(ValueType x, ErrorCode * err = 0)
{ {
if( x.IsNan() )
{
if( err )
*err = err_improper_argument;
return x; // NaN
}
ValueType pi05; ValueType pi05;
pi05.Set05Pi(); pi05.Set05Pi();
@ -783,6 +843,14 @@ namespace ttmath
one.SetOne(); one.SetOne();
bool change_sign = false; bool change_sign = false;
if( x.IsNan() )
{
if( err )
*err = err_improper_argument;
return result; // NaN is set by default
}
if( x.GreaterWithoutSignThan(one) ) if( x.GreaterWithoutSignThan(one) )
{ {
if( err ) if( err )
@ -1005,6 +1073,9 @@ namespace ttmath
one.SetOne(); one.SetOne();
bool change_sign = false; bool change_sign = false;
if( x.IsNan() )
return result; // NaN is set by default
// if x is negative we're using the formula: // if x is negative we're using the formula:
// atan(-x) = -atan(x) // atan(-x) = -atan(x)
if( x.IsSign() ) if( x.IsSign() )
@ -1085,6 +1156,14 @@ namespace ttmath
template<class ValueType> template<class ValueType>
ValueType Sinh(const ValueType & x, ErrorCode * err = 0) ValueType Sinh(const ValueType & x, ErrorCode * err = 0)
{ {
if( x.IsNan() )
{
if( err )
*err = err_improper_argument;
return x; // NaN
}
ValueType ex, emx; ValueType ex, emx;
uint c = 0; uint c = 0;
@ -1109,6 +1188,14 @@ namespace ttmath
template<class ValueType> template<class ValueType>
ValueType Cosh(const ValueType & x, ErrorCode * err = 0) ValueType Cosh(const ValueType & x, ErrorCode * err = 0)
{ {
if( x.IsNan() )
{
if( err )
*err = err_improper_argument;
return x; // NaN
}
ValueType ex, emx; ValueType ex, emx;
uint c = 0; uint c = 0;
@ -1133,6 +1220,14 @@ namespace ttmath
template<class ValueType> template<class ValueType>
ValueType Tanh(const ValueType & x, ErrorCode * err = 0) ValueType Tanh(const ValueType & x, ErrorCode * err = 0)
{ {
if( x.IsNan() )
{
if( err )
*err = err_improper_argument;
return x; // NaN
}
ValueType ex, emx, nominator, denominator; ValueType ex, emx, nominator, denominator;
uint c = 0; uint c = 0;
@ -1173,6 +1268,14 @@ namespace ttmath
template<class ValueType> template<class ValueType>
ValueType Coth(const ValueType & x, ErrorCode * err = 0) ValueType Coth(const ValueType & x, ErrorCode * err = 0)
{ {
if( x.IsNan() )
{
if( err )
*err = err_improper_argument;
return x; // NaN
}
if( x.IsZero() ) if( x.IsZero() )
{ {
if( err ) if( err )
@ -1230,6 +1333,14 @@ namespace ttmath
template<class ValueType> template<class ValueType>
ValueType ASinh(const ValueType & x, ErrorCode * err = 0) ValueType ASinh(const ValueType & x, ErrorCode * err = 0)
{ {
if( x.IsNan() )
{
if( err )
*err = err_improper_argument;
return x; // NaN
}
ValueType xx(x), one, result; ValueType xx(x), one, result;
uint c = 0; uint c = 0;
one.SetOne(); one.SetOne();
@ -1258,6 +1369,14 @@ namespace ttmath
template<class ValueType> template<class ValueType>
ValueType ACosh(const ValueType & x, ErrorCode * err = 0) ValueType ACosh(const ValueType & x, ErrorCode * err = 0)
{ {
if( x.IsNan() )
{
if( err )
*err = err_improper_argument;
return x; // NaN
}
ValueType xx(x), one, result; ValueType xx(x), one, result;
uint c = 0; uint c = 0;
one.SetOne(); one.SetOne();
@ -1299,6 +1418,14 @@ namespace ttmath
template<class ValueType> template<class ValueType>
ValueType ATanh(const ValueType & x, ErrorCode * err = 0) ValueType ATanh(const ValueType & x, ErrorCode * err = 0)
{ {
if( x.IsNan() )
{
if( err )
*err = err_improper_argument;
return x; // NaN
}
ValueType nominator(x), denominator, one, result; ValueType nominator(x), denominator, one, result;
uint c = 0; uint c = 0;
one.SetOne(); one.SetOne();
@ -1344,6 +1471,14 @@ namespace ttmath
template<class ValueType> template<class ValueType>
ValueType ACoth(const ValueType & x, ErrorCode * err = 0) ValueType ACoth(const ValueType & x, ErrorCode * err = 0)
{ {
if( x.IsNan() )
{
if( err )
*err = err_improper_argument;
return x; // NaN
}
ValueType nominator(x), denominator(x), one, result; ValueType nominator(x), denominator(x), one, result;
uint c = 0; uint c = 0;
one.SetOne(); one.SetOne();
@ -1402,6 +1537,14 @@ namespace ttmath
ValueType result, temp; ValueType result, temp;
uint c = 0; uint c = 0;
if( x.IsNan() )
{
if( err )
*err = err_improper_argument;
return result; // NaN is set by default
}
result = x; result = x;
// it is better to make division first and then multiplication // it is better to make division first and then multiplication
@ -1430,6 +1573,14 @@ namespace ttmath
ValueType result, delimiter; ValueType result, delimiter;
uint c = 0; uint c = 0;
if( x.IsNan() )
{
if( err )
*err = err_improper_argument;
return result; // NaN is set by default
}
result = 180; result = 180;
c += result.Mul(x); c += result.Mul(x);
@ -1467,7 +1618,7 @@ namespace ttmath
ValueType delimiter, multipler; ValueType delimiter, multipler;
uint c = 0; uint c = 0;
if( m.IsSign() || s.IsSign() ) if( d.IsNan() || m.IsNan() || s.IsNan() || m.IsSign() || s.IsSign() )
{ {
if( err ) if( err )
*err = err_improper_argument; *err = err_improper_argument;
@ -1521,6 +1672,14 @@ namespace ttmath
ValueType result, temp; ValueType result, temp;
uint c = 0; uint c = 0;
if( x.IsNan() )
{
if( err )
*err = err_improper_argument;
return result; // NaN is set by default
}
result = x; result = x;
// it is better to make division first and then multiplication // it is better to make division first and then multiplication
@ -1549,6 +1708,14 @@ namespace ttmath
ValueType result, delimiter; ValueType result, delimiter;
uint c = 0; uint c = 0;
if( x.IsNan() )
{
if( err )
*err = err_improper_argument;
return result; // NaN is set by default
}
result = 200; result = 200;
c += result.Mul(x); c += result.Mul(x);
@ -1573,6 +1740,14 @@ namespace ttmath
ValueType result, temp; ValueType result, temp;
uint c = 0; uint c = 0;
if( x.IsNan() )
{
if( err )
*err = err_improper_argument;
return result; // NaN is set by default
}
result = x; result = x;
temp = 200; temp = 200;
@ -1615,6 +1790,14 @@ namespace ttmath
ValueType result, temp; ValueType result, temp;
uint c = 0; uint c = 0;
if( x.IsNan() )
{
if( err )
*err = err_improper_argument;
return result; // NaN is set by default
}
result = x; result = x;
temp = 180; temp = 180;
@ -1648,7 +1831,7 @@ namespace ttmath
template<class ValueType> template<class ValueType>
ValueType Sqrt(ValueType x, ErrorCode * err = 0) ValueType Sqrt(ValueType x, ErrorCode * err = 0)
{ {
if( x.IsSign() ) if( x.IsNan() || x.IsSign() )
{ {
if( err ) if( err )
*err = err_improper_argument; *err = err_improper_argument;
@ -1730,7 +1913,7 @@ namespace ttmath
template<class ValueType> template<class ValueType>
bool RootCheckIndexOne(ValueType & x, const ValueType & index, ErrorCode * err) bool RootCheckIndexOne(const ValueType & index, ErrorCode * err)
{ {
ValueType one; ValueType one;
one.SetOne(); one.SetOne();
@ -1772,11 +1955,12 @@ namespace ttmath
template<class ValueType> template<class ValueType>
bool RootCheckXZero(ValueType & x, const ValueType & index, ErrorCode * err) bool RootCheckXZero(ValueType & x, ErrorCode * err)
{ {
if( x.IsZero() ) if( x.IsZero() )
{ {
// root(0;index) is zero (if index!=0) // root(0;index) is zero (if index!=0)
// RootCheckIndexZero() must be called beforehand
x.SetZero(); x.SetZero();
if( err ) if( err )
@ -1843,11 +2027,19 @@ namespace ttmath
{ {
using namespace auxiliaryfunctions; using namespace auxiliaryfunctions;
if( x.IsNan() || index.IsNan() )
{
if( err )
*err = err_improper_argument;
return ValueType(); // NaN is set by default
}
if( RootCheckIndexSign(x, index, err) ) return x; if( RootCheckIndexSign(x, index, err) ) return x;
if( RootCheckIndexZero(x, index, err) ) return x; if( RootCheckIndexZero(x, index, err) ) return x;
if( RootCheckIndexOne (x, index, err) ) return x; if( RootCheckIndexOne ( index, err) ) return x;
if( RootCheckIndexFrac(x, index, err) ) return x; if( RootCheckIndexFrac(x, index, err) ) return x;
if( RootCheckXZero(x, index, err) ) return x; if( RootCheckXZero (x, err) ) return x;
// index integer and index!=0 // index integer and index!=0
// x!=0 // x!=0
@ -1898,14 +2090,17 @@ namespace ttmath
while( !carry && multipler<maxvalue ) while( !carry && multipler<maxvalue )
{ {
// !! the test here we don't have to make in all iterations (performance) if( stop && (multipler & 127)==0 ) // it means 'stop && (multipler % 128)==0'
if( stop && stop->WasStopSignal() ) {
// after each 128 iterations we make a test
if( stop->WasStopSignal() )
{ {
if( err ) if( err )
*err = err_interrupt; *err = err_interrupt;
return 2; return 2;
} }
}
++multipler; ++multipler;
carry += result.MulUInt(multipler); carry += result.MulUInt(multipler);
@ -1928,20 +2123,25 @@ namespace ttmath
one.SetOne(); one.SetOne();
uint carry = 0; uint carry = 0;
uint iter = 1; // only for testing the stop object
while( !carry && multipler < x ) while( !carry && multipler < x )
{ {
// !! the test here we don't have to make in all iterations (performance) if( stop && (iter & 31)==0 ) // it means 'stop && (iter % 32)==0'
if( stop && stop->WasStopSignal() ) {
// after each 32 iterations we make a test
if( stop->WasStopSignal() )
{ {
if( err ) if( err )
*err = err_interrupt; *err = err_interrupt;
return 2; return 2;
} }
}
carry += multipler.Add(one); carry += multipler.Add(one);
carry += result.Mul(multipler); carry += result.Mul(multipler);
++iter;
} }
if( err ) if( err )
@ -1968,18 +2168,16 @@ namespace ttmath
static History<ValueType> history; static History<ValueType> history;
ValueType result; ValueType result;
result.SetOne(); if( x.IsNan() || x.IsSign() )
if( x.IsSign() )
{ {
if( err ) if( err )
*err = err_improper_argument; *err = err_improper_argument;
result.SetNan(); return result; // NaN set by default
return result;
} }
result.SetOne();
if( !x.exponent.IsSign() && !x.exponent.IsZero() ) if( !x.exponent.IsSign() && !x.exponent.IsZero() )
{ {
// when x.exponent>0 there's no sense to calculate the formula // when x.exponent>0 there's no sense to calculate the formula
@ -2065,6 +2263,14 @@ namespace ttmath
template<class ValueType> template<class ValueType>
ValueType Mod(ValueType a, const ValueType & b, ErrorCode * err = 0) ValueType Mod(ValueType a, const ValueType & b, ErrorCode * err = 0)
{ {
if( a.IsNan() || b.IsNan() )
{
if( err )
*err = err_improper_argument;
return ValueType(); // NaN is set by default
}
uint c = a.Mod(b); uint c = a.Mod(b);
if( err ) if( err )
@ -2084,4 +2290,11 @@ namespace ttmath
*/ */
#include "ttmathparser.h" #include "ttmathparser.h"
#ifdef _MSC_VER
#pragma warning( default: 4127 )
//warning C4127: conditional expression is constant
#endif
#endif #endif

View File

@ -143,9 +143,8 @@ public:
return 0; return 0;
uint comp = mantissa.CompensationToLeft(); uint comp = mantissa.CompensationToLeft();
uint c = exponent.Sub( comp );
return CheckCarry(c); return exponent.Sub( comp );
} }
@ -540,6 +539,7 @@ public:
/* /*
we only have to test the mantissa we only have to test the mantissa
also we don't check the NaN flag also we don't check the NaN flag
(maybe this method should return false if there is NaN flag set?)
*/ */
return mantissa.IsZero(); return mantissa.IsZero();
} }
@ -587,6 +587,7 @@ public:
*/ */
void Sgn() void Sgn()
{ {
// we have to check the NaN flag, because the next SetOne() method would clear it
if( IsNan() ) if( IsNan() )
return; return;
@ -622,6 +623,7 @@ public:
/*! /*!
this method changes the sign this method changes the sign
when there is a value of zero then the sign is not changed
e.g. e.g.
-1 -> 1 -1 -> 1
@ -629,6 +631,8 @@ public:
*/ */
void ChangeSign() void ChangeSign()
{ {
// we don't have to check the NaN flag here
if( info & TTMATH_BIG_SIGN ) if( info & TTMATH_BIG_SIGN )
{ {
info &= ~TTMATH_BIG_SIGN; info &= ~TTMATH_BIG_SIGN;
@ -708,7 +712,14 @@ public:
// there shouldn't be a carry here because // there shouldn't be a carry here because
// (1) (2) guarantee that the mantissa of this // (1) (2) guarantee that the mantissa of this
// is greater than or equal to the mantissa of the ss2 // is greater than or equal to the mantissa of the ss2
TTMATH_VERIFY( mantissa.Sub(ss2.mantissa) >= 0 )
#ifdef TTMATH_DEBUG
// this is to get rid of a warning saying that c_temp is not used
uint c_temp = /* mantissa.Sub(ss2.mantissa); */
#endif
mantissa.Sub(ss2.mantissa);
TTMATH_ASSERT( c_temp == 0 )
} }
c += Standardizing(); c += Standardizing();
@ -1393,19 +1404,17 @@ public:
denominator.SetOne(); denominator.SetOne();
denominator_i.SetOne(); denominator_i.SetOne();
// every 'step_test' times we make a test
const uint step_test = 5;
uint i; uint i;
old_value = *this; old_value = *this;
// we begin from 1 in order to not testing at the beginning // we begin from 1 in order to not test at the beginning
#ifdef TTMATH_CONSTANTSGENERATOR #ifdef TTMATH_CONSTANTSGENERATOR
for(i=1 ; true ; ++i) for(i=1 ; true ; ++i)
#else #else
for(i=1 ; i<=TTMATH_ARITHMETIC_MAX_LOOP ; ++i) for(i=1 ; i<=TTMATH_ARITHMETIC_MAX_LOOP ; ++i)
#endif #endif
{ {
bool testing = ((i % step_test) == 0); bool testing = ((i & 3) == 0); // it means '(i % 4) == 0'
next_part = numerator; next_part = numerator;
@ -1418,13 +1427,15 @@ public:
// there shouldn't be a carry here // there shouldn't be a carry here
Add( next_part ); Add( next_part );
if( testing && old_value==*this ) if( testing )
{
if( old_value == *this )
// we've added next few parts of the formula but the result // we've added next few parts of the formula but the result
// is still the same then we break the loop // is still the same then we break the loop
break; break;
else else
old_value = *this; old_value = *this;
}
// we set the denominator and the numerator for a next part of the formula // we set the denominator and the numerator for a next part of the formula
if( denominator_i.Add(one) ) if( denominator_i.Add(one) )
@ -1559,20 +1570,17 @@ public:
SetZero(); SetZero();
old_value = *this; old_value = *this;
// every 'step_test' times we make a test
const uint step_test = 5;
uint i; uint i;
#ifdef TTMATH_CONSTANTSGENERATOR #ifdef TTMATH_CONSTANTSGENERATOR
for(i=1 ; true ; ++i) for(i=1 ; true ; ++i)
#else #else
// we begin from 1 in order to not testing at the beginning // we begin from 1 in order to not test at the beginning
for(i=1 ; i<=TTMATH_ARITHMETIC_MAX_LOOP ; ++i) for(i=1 ; i<=TTMATH_ARITHMETIC_MAX_LOOP ; ++i)
#endif #endif
{ {
bool testing = ((i % step_test) == 0); bool testing = ((i & 3) == 0); // it means '(i % 4) == 0'
next_part = x1; next_part = x1;
@ -1585,12 +1593,15 @@ public:
// there shouldn't be a carry here // there shouldn't be a carry here
Add(next_part); Add(next_part);
if( testing && old_value == *this ) if( testing )
{
if( old_value == *this )
// we've added next (step_test) parts of the formula but the result // we've added next (step_test) parts of the formula but the result
// is still the same then we break the loop // is still the same then we break the loop
break; break;
else else
old_value = *this; old_value = *this;
}
if( x1.Mul(x2) ) if( x1.Mul(x2) )
// if there is a carry here the result we return as good // if there is a carry here the result we return as good
@ -2021,9 +2032,10 @@ public:
// If E=2047 and F is zero and S is 1, then V=-Infinity // If E=2047 and F is zero and S is 1, then V=-Infinity
// If E=2047 and F is zero and S is 0, then V=Infinity // If E=2047 and F is zero and S is 0, then V=Infinity
// at the moment we do not support NaN, -Infinity and +Infinity // we do not support -Infinity and +Infinity
// we assume that there is always NaN
SetZero(); SetNan();
} }
else else
if( e > 0 ) if( e > 0 )
@ -2134,9 +2146,10 @@ public:
// If E=2047 and F is zero and S is 1, then V=-Infinity // If E=2047 and F is zero and S is 1, then V=-Infinity
// If E=2047 and F is zero and S is 0, then V=Infinity // If E=2047 and F is zero and S is 0, then V=Infinity
// at the moment we do not support NaN, -Infinity and +Infinity // we do not support -Infinity and +Infinity
// we assume that there is always NaN
SetZero(); SetNan();
} }
else else
if( e > 0 ) if( e > 0 )
@ -2226,12 +2239,19 @@ public:
return 0; return 0;
} }
if( IsNan() )
{
result = ToDouble_SetDouble( false, 2047, 0, false, true);
return 0;
}
sint e_correction = sint(man*TTMATH_BITS_PER_UINT) - 1; sint e_correction = sint(man*TTMATH_BITS_PER_UINT) - 1;
if( exponent >= 1024 - e_correction ) if( exponent >= 1024 - e_correction )
{ {
// +/- infinity // +/- infinity
result = ToDouble_SetDouble( IsSign(), 2047, 0, true); result = ToDouble_SetDouble( 0, 2047, 0, true);
return 1; return 1;
} }
@ -2266,7 +2286,7 @@ private:
#ifdef TTMATH_PLATFORM32 #ifdef TTMATH_PLATFORM32
// 32bit platforms // 32bit platforms
double ToDouble_SetDouble(bool is_sign, uint e, sint move, bool infinity = false) const double ToDouble_SetDouble(bool is_sign, uint e, sint move, bool infinity = false, bool nan = false) const
{ {
union union
{ {
@ -2281,6 +2301,12 @@ private:
temp.u[1] |= (e << 20) & 0x7FF00000u; temp.u[1] |= (e << 20) & 0x7FF00000u;
if( nan )
{
temp.u[0] |= 1;
return temp.d;
}
if( infinity ) if( infinity )
return temp.d; return temp.d;
@ -2303,7 +2329,7 @@ private:
#else #else
// 64bit platforms // 64bit platforms
double ToDouble_SetDouble(bool is_sign, uint e, sint move, bool infinity = false) const double ToDouble_SetDouble(bool is_sign, uint e, sint move, bool infinity = false, bool nan = false) const
{ {
union union
{ {
@ -2318,6 +2344,12 @@ private:
temp.u |= (e << 52) & 0x7FF0000000000000ul; temp.u |= (e << 52) & 0x7FF0000000000000ul;
if( nan )
{
temp.u |= 1;
return temp.d;
}
if( infinity ) if( infinity )
return temp.d; return temp.d;
@ -2600,7 +2632,12 @@ public:
*/ */
Big() Big()
{ {
SetNan(); info = TTMATH_BIG_NAN;
/*
we're directly setting 'info' (instead of calling SetNan())
in order to get rid of a warning saying that 'info' is uninitialized
*/
} }
@ -2682,7 +2719,8 @@ public:
output: output:
return value: return value:
0 - ok and 'result' will be an object of type tstr_t which holds the value 0 - ok and 'result' will be an object of type tstr_t which holds the value
1 - if there was a carry 1 - if there was a carry (shoudn't be in a normal situation - if is that means there
is somewhere an error in the library)
*/ */
uint ToString( tstr_t & result, uint ToString( tstr_t & result,
uint base = 10, uint base = 10,
@ -3100,7 +3138,7 @@ private:
else else
was_carry = false; was_carry = false;
new_man[i] = UInt<man>::DigitToChar( digit ); new_man[i] = static_cast<char>( UInt<man>::DigitToChar(digit) );
} }
if( i<0 && was_carry ) if( i<0 && was_carry )
@ -3826,6 +3864,25 @@ public:
return false; return false;
} }
bool IsNearZero() const
{
// we should check the mantissas beforehand because sometimes we can have
// a mantissa set to zero but in the exponent something another value
// (maybe we've forgotten about calling CorrectZero() ?)
if( mantissa.IsZero() )
{
return true;
}
UInt<man> m(mantissa);
m.Rcr(man*3); // pi * thumb rule...
return(m.IsZero());
return false;
}
bool operator<(const Big<exp,man> & ss2) const bool operator<(const Big<exp,man> & ss2) const
{ {
@ -3857,6 +3914,14 @@ public:
} }
bool operator^=(const Big<exp,man> & ss2) const
{
if( IsSign() != ss2.IsSign() )
return false;
return AboutEqualWithoutSign( ss2 );
}
bool operator>(const Big<exp,man> & ss2) const bool operator>(const Big<exp,man> & ss2) const

View File

@ -1,5 +1,5 @@
/* /*
* This file is a part of TTMath Mathematical Library * This file is a part of TTMath Bignum Library
* and is distributed under the (new) BSD licence. * and is distributed under the (new) BSD licence.
* Author: Tomasz Sowa <t.sowa@slimaczek.pl> * Author: Tomasz Sowa <t.sowa@slimaczek.pl>
*/ */
@ -973,7 +973,7 @@ void Not(int sindex, int amount_of_args, ValueType & result)
void DegToRad(int sindex, int amount_of_args, ValueType & result) void DegToRad(int sindex, int amount_of_args, ValueType & result)
{ {
ErrorCode err; ErrorCode err = err_ok;
if( amount_of_args == 1 ) if( amount_of_args == 1 )
{ {
@ -1052,7 +1052,7 @@ void RadToGrad(int sindex, int amount_of_args, ValueType & result)
void DegToGrad(int sindex, int amount_of_args, ValueType & result) void DegToGrad(int sindex, int amount_of_args, ValueType & result)
{ {
ErrorCode err; ErrorCode err = err_ok;
if( amount_of_args == 1 ) if( amount_of_args == 1 )
{ {
@ -1555,7 +1555,7 @@ int character;
do do
{ {
result += character; result += static_cast<char>( character );
character = * ++pstring; character = * ++pstring;
} }
while( (character>='a' && character<='z') || while( (character>='a' && character<='z') ||

View File

@ -64,8 +64,8 @@
*/ */
#define TTMATH_MAJOR_VER 0 #define TTMATH_MAJOR_VER 0
#define TTMATH_MINOR_VER 8 #define TTMATH_MINOR_VER 8
#define TTMATH_REVISION_VER 4 #define TTMATH_REVISION_VER 5
#define TTMATH_PRERELEASE_VER 1 #define TTMATH_PRERELEASE_VER 0
/*! /*!
@ -232,6 +232,16 @@ namespace ttmath
/*!
this is a limit when calculating Karatsuba multiplication
if the size of a vector is smaller than TTMATH_USE_KARATSUBA_MULTIPLICATION_FROM_SIZE
the Karatsuba algorithm will use standard schoolbook multiplication
*/
#ifdef __GNUC__
#define TTMATH_USE_KARATSUBA_MULTIPLICATION_FROM_SIZE 3
#else
#define TTMATH_USE_KARATSUBA_MULTIPLICATION_FROM_SIZE 5
#endif
namespace ttmath namespace ttmath
{ {

View File

@ -40,6 +40,7 @@
#ifndef headerfilettmathuint #ifndef headerfilettmathuint
#define headerfilettmathuint #define headerfilettmathuint
/*! /*!
\file ttmathuint.h \file ttmathuint.h
\brief template class UInt<uint> \brief template class UInt<uint>
@ -48,6 +49,7 @@
#include <iostream> #include <iostream>
#include <iomanip> #include <iomanip>
#include "ttmathtypes.h" #include "ttmathtypes.h"
#if defined(_MSC_VER) #if defined(_MSC_VER)
@ -842,8 +844,10 @@ public:
/*! /*!
the multiplication 'this' = 'this' * ss2 the multiplication 'this' = 'this' * ss2
algorithm: 100 - means automatically choose the fastest algorithm
*/ */
uint Mul(const UInt<value_size> & ss2, uint algorithm = 2) uint Mul(const UInt<value_size> & ss2, uint algorithm = 100)
{ {
switch( algorithm ) switch( algorithm )
{ {
@ -851,8 +855,14 @@ public:
return Mul1(ss2); return Mul1(ss2);
case 2: case 2:
default:
return Mul2(ss2); return Mul2(ss2);
case 3:
return Mul3(ss2);
case 100:
default:
return MulFastest(ss2);
} }
} }
@ -862,10 +872,12 @@ public:
since the 'result' is twice bigger than 'this' and 'ss2' since the 'result' is twice bigger than 'this' and 'ss2'
this method never returns a carry this method never returns a carry
algorithm: 100 - means automatically choose the fastest algorithm
*/ */
void MulBig(const UInt<value_size> & ss2, void MulBig(const UInt<value_size> & ss2,
UInt<value_size*2> & result, UInt<value_size*2> & result,
uint algorithm = 2) uint algorithm = 100)
{ {
switch( algorithm ) switch( algorithm )
{ {
@ -873,8 +885,14 @@ public:
return Mul1Big(ss2, result); return Mul1Big(ss2, result);
case 2: case 2:
default:
return Mul2Big(ss2, result); return Mul2Big(ss2, result);
case 3:
return Mul3Big(ss2, result);
case 100:
default:
return MulFastestBig(ss2, result);
} }
} }
@ -966,23 +984,25 @@ public:
uint Mul2(const UInt<value_size> & ss2) uint Mul2(const UInt<value_size> & ss2)
{ {
UInt<value_size*2> result; UInt<value_size*2> result;
uint i; uint i, c = 0;
Mul2Big(ss2, result); Mul2Big(ss2, result);
// copying result // copying result
memcpy(table,result.table,sizeof(table)); for(i=0 ; i<value_size ; ++i)
//for(i=0 ; i<value_size ; ++i) table[i] = result.table[i];
// table[i] = result.table[i];
// testing carry // testing carry
for(i = value_size ; i<value_size*2 ; ++i) for( ; i<value_size*2 ; ++i)
if( result.table[i] != 0 ) if( result.table[i] != 0 )
return 1; {
c = 1;
break;
}
TTMATH_LOG("UInt::Mul2") TTMATH_LOG("UInt::Mul2")
return 0; return c;
} }
@ -994,44 +1014,385 @@ public:
*/ */
void Mul2Big(const UInt<value_size> & ss2, UInt<value_size*2> & result) void Mul2Big(const UInt<value_size> & ss2, UInt<value_size*2> & result)
{ {
uint r2,r1; Mul2Big2<value_size>(table, ss2.table, result);
uint x1size=value_size, x2size=value_size;
TTMATH_LOG("UInt::Mul2Big")
}
private:
/*!
an auxiliary method for calculating the multiplication
arguments we're taking as pointers (this is to improve the Mul3Big2()- avoiding
unnecessary copying objects), the result should be taken as a pointer too,
but at the moment there is no method AddTwoInts() which can operate on pointers
*/
template<uint ss_size>
void Mul2Big2(const uint * ss1, const uint * ss2, UInt<ss_size*2> & result)
{
uint x1size = ss_size, x2size = ss_size;
uint x1start = 0, x2start = 0; uint x1start = 0, x2start = 0;
if( ss_size > 2 )
{
// if the ss_size is smaller than or equal to 2
// there is no sense to set x1size (and others) to another values
for(x1size=ss_size ; x1size>0 && ss1[x1size-1]==0 ; --x1size);
for(x2size=ss_size ; x2size>0 && ss2[x2size-1]==0 ; --x2size);
for(x1start=0 ; x1start<x1size && ss1[x1start]==0 ; ++x1start);
for(x2start=0 ; x2start<x2size && ss2[x2start]==0 ; ++x2start);
}
Mul2Big3<ss_size>(ss1, ss2, result, x1start, x1size, x2start, x2size);
}
/*!
an auxiliary method for calculating the multiplication
*/
template<uint ss_size>
void Mul2Big3(const uint * ss1, const uint * ss2, UInt<ss_size*2> & result, uint x1start, uint x1size, uint x2start, uint x2size)
{
uint r2, r1;
result.SetZero(); result.SetZero();
if( value_size > 2 ) if( x1size==0 || x2size==0 )
return;
for(uint x1=x1start ; x1<x1size ; ++x1)
{ {
// if the value_size is smaller than or equal to 2 for(uint x2=x2start ; x2<x2size ; ++x2)
// there is no sense to set x1size (and others) to another values {
MulTwoWords(ss1[x1], ss2[x2], &r2, &r1);
result.AddTwoInts(r2, r1, x2+x1);
// here will never be a carry
}
}
}
public:
/*!
multiplication: this = this * ss2
This is Karatsuba Multiplication algorithm, we're using it when value_size is greater than
or equal to TTMATH_USE_KARATSUBA_MULTIPLICATION_FROM_SIZE macro (defined in ttmathuint.h).
If value_size is smaller then we're using Mul2Big() instead.
Karatsuba multiplication:
Assume we have:
this = x = x1*B^m + x0
ss2 = y = y1*B^m + y0
where x0 and y0 are less than B^m
the product from multiplication we can show as:
x*y = (x1*B^m + x0)(y1*B^m + y0) = z2*B^(2m) + z1*B^m + z0
where
z2 = x1*y1
z1 = x1*y0 + x0*y1
z0 = x0*y0
this is standard schoolbook algorithm with O(n^2), Karatsuba observed that z1 can be given in other form:
z1 = (x1 + x0)*(y1 + y0) - z2 - z0 / z1 = (x1*y1 + x1*y0 + x0*y1 + x0*y0) - x1*y1 - x0*y0 = x1*y0 + x0*y1 /
and to calculate the multiplication we need only three multiplications (with some additions and subtractions)
Our objects 'this' and 'ss2' we divide into two parts and by using recurrence we calculate the multiplication.
Karatsuba multiplication has O( n^(ln(3)/ln(2)) )
*/
uint Mul3(const UInt<value_size> & ss2)
{
UInt<value_size*2> result;
uint i, c = 0;
Mul3Big(ss2, result);
// copying result
for(i=0 ; i<value_size ; ++i)
table[i] = result.table[i];
// testing carry
for( ; i<value_size*2 ; ++i)
if( result.table[i] != 0 )
{
c = 1;
break;
}
TTMATH_LOG("UInt::Mul3")
return c;
}
/*!
multiplication: result = this * ss2
result is twice bigger than this and ss2,
this method never returns carry,
(Karatsuba multiplication)
*/
void Mul3Big(const UInt<value_size> & ss2, UInt<value_size*2> & result)
{
Mul3Big2<value_size>(table, ss2.table, result.table);
TTMATH_LOG("UInt::Mul3Big")
}
private:
/*!
an auxiliary method for calculating the Karatsuba multiplication
result_size is equal ss_size*2
*/
template<uint ss_size>
void Mul3Big2(const uint * ss1, const uint * ss2, uint * result)
{
const uint * x1, * x0, * y1, * y0;
if( ss_size>1 && ss_size<TTMATH_USE_KARATSUBA_MULTIPLICATION_FROM_SIZE )
{
UInt<ss_size*2> res;
Mul2Big2<ss_size>(ss1, ss2, res);
for(uint i=0 ; i<ss_size*2 ; ++i)
result[i] = res.table[i];
return;
}
else
if( ss_size == 1 )
{
return MulTwoWords(*ss1, *ss2, &result[1], &result[0]);
}
if( (ss_size & 1) == 1 )
{
// ss_size is odd
x0 = ss1;
y0 = ss2;
x1 = ss1 + ss_size / 2 + 1;
y1 = ss2 + ss_size / 2 + 1;
// the second vectors (x1 and y1) are smaller about one from the first ones (x0 and y0)
Mul3Big3<ss_size/2 + 1, ss_size/2, ss_size*2>(x1, x0, y1, y0, result);
}
else
{
// ss_size is even
x0 = ss1;
y0 = ss2;
x1 = ss1 + ss_size / 2;
y1 = ss2 + ss_size / 2;
// all four vectors (x0 x1 y0 y1) are equal in size
Mul3Big3<ss_size/2, ss_size/2, ss_size*2>(x1, x0, y1, y0, result);
}
}
#ifdef _MSC_VER
#pragma warning (disable : 4717)
//warning C4717: recursive on all control paths, function will cause runtime stack overflow
//we have the stop point in Mul3Big2() method
#endif
/*!
an auxiliary method for calculating the Karatsuba multiplication
x = x1*B^m + x0
y = y1*B^m + y0
first_size - is the size of vectors: x0 and y0
second_size - is the size of vectors: x1 and y1 (can be either equal first_size or smaller about one from first_size)
x*y = (x1*B^m + x0)(y1*B^m + y0) = z2*B^(2m) + z1*B^m + z0
where
z0 = x0*y0
z2 = x1*y1
z1 = (x1 + x0)*(y1 + y0) - z2 - z0
*/
template<uint first_size, uint second_size, uint result_size>
void Mul3Big3(const uint * x1, const uint * x0, const uint * y1, const uint * y0, uint * result)
{
uint i, c, xc, yc;
UInt<first_size> temp, temp2;
UInt<first_size*3> z1;
// z0 and z2 we store directly in the result (we don't use any temporary variables)
Mul3Big2<first_size>(x0, y0, result); // z0
Mul3Big2<second_size>(x1, y1, result+first_size*2); // z2
// now we calculate z1
// temp = (x0 + x1)
// temp2 = (y0 + y1)
// we're using temp and temp2 with UInt<first_size>, although there can be a carry but
// we simple remember it in xc and yc (xc and yc can be either 0 or 1),
// and (x0 + x1)*(y0 + y1) we calculate in this way (schoolbook algorithm):
//
// xc | temp
// yc | temp2
// --------------------
// (temp * temp2)
// xc*temp2 |
// yc*temp |
// xc*yc |
// ---------- z1 --------
//
// and the result is never larger in size than 3*first_size
xc = AddVector(x0, x1, first_size, second_size, temp.table);
yc = AddVector(y0, y1, first_size, second_size, temp2.table);
Mul3Big2<first_size>(temp.table, temp2.table, z1.table);
// clearing the rest of z1
for(i=first_size*2 ; i<first_size*3 ; ++i)
z1.table[i] = 0;
if( xc )
{
c = AddVector(z1.table+first_size, temp2.table, first_size*3-first_size, first_size, z1.table+first_size);
TTMATH_ASSERT( c==0 )
}
if( yc )
{
c = AddVector(z1.table+first_size, temp.table, first_size*3-first_size, first_size, z1.table+first_size);
TTMATH_ASSERT( c==0 )
}
if( xc && yc )
{
for( i=first_size*2 ; i<first_size*3 ; ++i )
if( ++z1.table[i] != 0 )
break; // break if there was no carry
}
// z1 = z1 - z2
c = SubVector(z1.table, result+first_size*2, first_size*3, second_size*2, z1.table);
TTMATH_ASSERT(c==0)
// z1 = z1 - z0
c = SubVector(z1.table, result, first_size*3, first_size*2, z1.table);
TTMATH_ASSERT(c==0)
// here we've calculated the z1
// now we're adding it to the result
if( first_size > second_size )
{
uint z1_size = result_size - first_size;
TTMATH_ASSERT( z1_size <= first_size*3 )
for(i=z1_size ; i<first_size*3 ; ++i)
TTMATH_ASSERT( z1.table[i] == 0 )
;
c = AddVector(result+first_size, z1.table, result_size-first_size, z1_size, result+first_size);
TTMATH_ASSERT(c==0)
}
else
{
c = AddVector(result+first_size, z1.table, result_size-first_size, first_size*3, result+first_size);
TTMATH_ASSERT(c==0)
}
}
#ifdef _MSC_VER
#pragma warning (default : 4717)
#endif
public:
/*!
multiplication this = this * ss2
*/
uint MulFastest(const UInt<value_size> & ss2)
{
UInt<value_size*2> result;
uint i, c = 0;
MulFastestBig(ss2, result);
// copying result
for(i=0 ; i<value_size ; ++i)
table[i] = result.table[i];
// testing carry
for( ; i<value_size*2 ; ++i)
if( result.table[i] != 0 )
{
c = 1;
break;
}
TTMATH_LOG("UInt::MulFastest")
return c;
}
/*!
multiplication result = this * ss2
this method is trying to select the fastest algorithm
(in the future this method can be improved)
*/
void MulFastestBig(const UInt<value_size> & ss2, UInt<value_size*2> & result)
{
if( value_size < TTMATH_USE_KARATSUBA_MULTIPLICATION_FROM_SIZE )
return Mul2Big(ss2, result);
uint x1size = value_size, x2size = value_size;
uint x1start = 0, x2start = 0;
for(x1size=value_size ; x1size>0 && table[x1size-1]==0 ; --x1size); for(x1size=value_size ; x1size>0 && table[x1size-1]==0 ; --x1size);
for(x2size=value_size ; x2size>0 && ss2.table[x2size-1]==0 ; --x2size); for(x2size=value_size ; x2size>0 && ss2.table[x2size-1]==0 ; --x2size);
if( x1size==0 || x2size==0 ) if( x1size==0 || x2size==0 )
{ {
TTMATH_LOG("UInt::Mul2Big") // either 'this' or 'ss2' is equal zero - the result is zero too
result.SetZero();
return; return;
} }
for(x1start=0 ; x1start<x1size && table[x1start]==0 ; ++x1start); for(x1start=0 ; x1start<x1size && table[x1start]==0 ; ++x1start);
for(x2start=0 ; x2start<x2size && ss2.table[x2start]==0 ; ++x2start); for(x2start=0 ; x2start<x2size && ss2.table[x2start]==0 ; ++x2start);
}
for(uint x1=x1start ; x1<x1size ; ++x1) uint distancex1 = x1size - x1start;
{ uint distancex2 = x2size - x2start;
for(uint x2=x2start ; x2<x2size ; ++x2)
{
MulTwoWords(table[x1], ss2.table[x2], &r2, &r1);
result.AddTwoInts(r2,r1,x2+x1);
// here will never be a carry
}
}
TTMATH_LOG("UInt::Mul2Big") if( distancex1 < 3 || distancex2 < 3 )
} // either 'this' or 'ss2' have only 2 (or 1) item different from zero (side by side)
// (this condition in the future can be improved)
return Mul2Big3<value_size>(table, ss2.table, result, x1start, x1size, x2start, x2size);
// Karatsuba multiplication
Mul3Big(ss2, result);
TTMATH_LOG("UInt::MulFastestBig")
}
/*! /*!
@ -1456,7 +1817,7 @@ private:
if( Div2_DivisorGreaterOrEqual( divisor, remainder, if( Div2_DivisorGreaterOrEqual( divisor, remainder,
table_id, index, table_id, index,
divisor_table_id, divisor_index) ) divisor_index) )
{ {
TTMATH_LOG("UInt::Div2_FindLeadingBitsAndCheck") TTMATH_LOG("UInt::Div2_FindLeadingBitsAndCheck")
return 0; return 0;
@ -1476,7 +1837,7 @@ private:
bool Div2_DivisorGreaterOrEqual( const UInt<value_size> & divisor, bool Div2_DivisorGreaterOrEqual( const UInt<value_size> & divisor,
UInt<value_size> * remainder, UInt<value_size> * remainder,
uint table_id, uint index, uint table_id, uint index,
uint /*divisor_table_id*/, uint divisor_index ) uint divisor_index )
{ {
if( divisor_index > index ) if( divisor_index > index )
{ {
@ -2349,7 +2710,7 @@ public:
do do
{ {
temp.DivInt(b, &rem); temp.DivInt(b, &rem);
character = DigitToChar( rem ); character = static_cast<char>( DigitToChar(rem) );
result.insert(result.begin(), character); result.insert(result.begin(), character);
} }
while( !temp.IsZero() ); while( !temp.IsZero() );
@ -2877,10 +3238,13 @@ public:
ttmathuint_noasm.h ttmathuint_noasm.h
*/ */
#if defined(TTMATH_NOASM) #if defined(TTMATH_NOASM) || !defined(__GNUC__)
// not yet implemented in 64 bit ASM
static uint AddTwoWords(uint a, uint b, uint carry, uint * result); static uint AddTwoWords(uint a, uint b, uint carry, uint * result);
static uint SubTwoWords(uint a, uint b, uint carry, uint * result); static uint SubTwoWords(uint a, uint b, uint carry, uint * result);
#endif
#if defined(TTMATH_NOASM)
#ifdef TTMATH_PLATFORM64 #ifdef TTMATH_PLATFORM64
union uint_ union uint_
@ -2906,19 +3270,19 @@ public:
private: private:
public: // !!! chwilowo public
uint Rcl2_one(uint c); uint Rcl2_one(uint c);
uint Rcr2_one(uint c); uint Rcr2_one(uint c);
uint Rcl2(uint bits, uint c); uint Rcl2(uint bits, uint c);
uint Rcr2(uint bits, uint c); uint Rcr2(uint bits, uint c);
public: public:
uint Add(const UInt<value_size> & ss2, uint c=0); uint Add(const UInt<value_size> & ss2, uint c=0);
uint AddInt(uint value, uint index = 0); uint AddInt(uint value, uint index = 0);
uint AddTwoInts(uint x2, uint x1, uint index); uint AddTwoInts(uint x2, uint x1, uint index);
static uint AddVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result);
uint Sub(const UInt<value_size> & ss2, uint c=0); uint Sub(const UInt<value_size> & ss2, uint c=0);
uint SubInt(uint value, uint index = 0); uint SubInt(uint value, uint index = 0);
static uint SubVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result);
static sint FindLeadingBitInWord(uint x); static sint FindLeadingBitInWord(uint x);
static uint SetBitInWord(uint & value, uint bit); static uint SetBitInWord(uint & value, uint bit);
static void MulTwoWords(uint a, uint b, uint * result_high, uint * result_low); static void MulTwoWords(uint a, uint b, uint * result_high, uint * result_low);
@ -2926,6 +3290,23 @@ public:
}; };
/*!
this specialization is needed in order to not confused the compiler "error: ISO C++ forbids zero-size array"
when compiling Mul3Big2() method
*/
template<>
class UInt<0>
{
public:
uint table[1];
void Mul2Big(const UInt<0> &, UInt<0> &) { TTMATH_ASSERT(false) };
void SetZero() { TTMATH_ASSERT(false) };
uint AddTwoInts(uint, uint, uint) { TTMATH_ASSERT(false) return 0; };
};
} //namespace } //namespace
#if defined(_MSC_VER) #if defined(_MSC_VER)

View File

@ -75,9 +75,9 @@ namespace ttmath
template<uint value_size> template<uint value_size>
uint UInt<value_size>::Add(const UInt<value_size> & ss2, uint c) uint UInt<value_size>::Add(const UInt<value_size> & ss2, uint c)
{ {
register uint b = value_size; uint b = value_size;
register uint * p1 = table; uint * p1 = table;
register uint * p2 = const_cast<uint*>(ss2.table); uint * p2 = const_cast<uint*>(ss2.table);
// we don't have to use TTMATH_REFERENCE_ASSERT here // we don't have to use TTMATH_REFERENCE_ASSERT here
// this algorithm doesn't require it // this algorithm doesn't require it
@ -96,13 +96,13 @@ namespace ttmath
sub eax,[c] // CF=c sub eax,[c] // CF=c
ALIGN 16 ALIGN 16
p: ttmath_loop:
mov eax,[esi+edx*4+0] mov eax,[esi+edx*4+0]
adc [ebx+edx*4+0],eax adc [ebx+edx*4+0],eax
lea edx, [edx+1] // inc edx, but faster (no flags dependencies) lea edx, [edx+1] // inc edx, but faster (no flags dependencies)
dec ecx dec ecx
jnz p jnz ttmath_loop
setc al setc al
movzx eax, al movzx eax, al
@ -112,17 +112,13 @@ namespace ttmath
#ifdef __GNUC__ #ifdef __GNUC__
uint dummy, dummy2;
// this part should be compiled with gcc // this part should be compiled with gcc
__asm__ __volatile__( __asm__ __volatile__(
"push %%ecx \n" "xorl %%edx, %%edx \n"
"negl %%eax \n" // CF=1 if rax!=0 , CF=0 if rax==0
"xorl %%eax, %%eax \n"
"movl %%eax, %%edx \n"
"subl %%edi, %%eax \n"
"1: \n" "1: \n"
"movl (%%esi,%%edx,4), %%eax \n" "movl (%%esi,%%edx,4), %%eax \n"
@ -132,14 +128,11 @@ namespace ttmath
"decl %%ecx \n" "decl %%ecx \n"
"jnz 1b \n" "jnz 1b \n"
"setc %%al \n" "adc %%ecx, %%ecx \n"
"movzx %%al,%%edx \n"
"pop %%ecx \n" : "=c" (c), "=a" (dummy), "=d" (dummy2)
: "0" (b), "1" (c), "b" (p1), "S" (p2)
: "=d" (c) : "cc", "memory" );
: "D" (c), "c" (b), "b" (p1), "S" (p2)
: "%eax", "cc", "memory" );
#endif #endif
TTMATH_LOG("UInt::Add") TTMATH_LOG("UInt::Add")
@ -189,16 +182,16 @@ namespace ttmath
mov eax, [value] mov eax, [value]
ALIGN 16 ALIGN 16
p: ttmath_loop:
add [ebx+edx*4], eax add [ebx+edx*4], eax
jnc end jnc ttmath_end
mov eax, 1 mov eax, 1
lea edx, [edx+1] // inc edx, but faster (no flags dependencies) lea edx, [edx+1] // inc edx, but faster (no flags dependencies)
dec ecx dec ecx
jnz p jnz ttmath_loop
end: ttmath_end:
setc al setc al
movzx eax, al movzx eax, al
mov [c], eax mov [c], eax
@ -207,10 +200,9 @@ namespace ttmath
#ifdef __GNUC__ #ifdef __GNUC__
__asm__ __volatile__( uint dummy, dummy2;
"push %%eax \n" __asm__ __volatile__(
"push %%ecx \n"
"subl %%edx, %%ecx \n" "subl %%edx, %%ecx \n"
@ -227,12 +219,10 @@ namespace ttmath
"setc %%al \n" "setc %%al \n"
"movzx %%al, %%edx \n" "movzx %%al, %%edx \n"
"pop %%ecx \n" : "=d" (c), "=a" (dummy), "=c" (dummy2)
"pop %%eax \n" : "0" (index), "1" (value), "2" (b), "b" (p1)
: "=d" (c)
: "a" (value), "c" (b), "0" (index), "b" (p1)
: "cc", "memory" ); : "cc", "memory" );
#endif #endif
TTMATH_LOG("UInt::AddInt") TTMATH_LOG("UInt::AddInt")
@ -297,16 +287,16 @@ namespace ttmath
mov eax, [x2] mov eax, [x2]
ALIGN 16 ALIGN 16
p: ttmath_loop:
adc [ebx+edx*4+4], eax adc [ebx+edx*4+4], eax
jnc end jnc ttmath_end
mov eax, 0 mov eax, 0
lea edx, [edx+1] // inc edx, but faster (no flags dependencies) lea edx, [edx+1] // inc edx, but faster (no flags dependencies)
dec ecx dec ecx
jnz p jnz ttmath_loop
end: ttmath_end:
setc al setc al
movzx eax, al movzx eax, al
mov [c], eax mov [c], eax
@ -338,12 +328,10 @@ namespace ttmath
"setc %%al \n" "setc %%al \n"
"movzx %%al, %%eax \n" "movzx %%al, %%eax \n"
"pop %%edx \n" : "=a" (c), "=c" (dummy), "=d" (dummy2)
"pop %%ecx \n" : "0" (x2), "1" (b), "2" (index), "b" (p1), "S" (x1)
: "=a" (c)
: "c" (b), "d" (index), "b" (p1), "S" (x1), "0" (x2)
: "cc", "memory" ); : "cc", "memory" );
#endif #endif
TTMATH_LOG("UInt::AddTwoInts") TTMATH_LOG("UInt::AddTwoInts")
@ -353,6 +341,131 @@ namespace ttmath
/*!
this static method addes one vector to the other
'ss1' is larger in size or equal to 'ss2'
ss1 points to the first (larger) vector
ss2 points to the second vector
ss1_size - size of the ss1 (and size of the result too)
ss2_size - size of the ss2
result - is the result vector (which has size the same as ss1: ss1_size)
Example: ss1_size is 5, ss2_size is 3
ss1: ss2: result (output):
5 1 5+1
4 3 4+3
2 7 2+7
6 6
9 9
of course the carry is propagated and will be returned from the last item
(this method is used by the Karatsuba multiplication algorithm)
*/
template<uint value_size>
uint UInt<value_size>::AddVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result)
{
TTMATH_ASSERT( ss1_size >= ss2_size )
uint rest = ss1_size - ss2_size;
uint c;
#ifndef __GNUC__
// this part might be compiled with for example visual c
__asm
{
mov ecx, [ss2_size]
xor edx, edx // edx = 0, cf = 0
mov esi, [ss1]
mov ebx, [ss2]
mov edi, [result]
ALIGN 16
ttmath_loop:
mov eax, [esi+edx*4]
adc eax, [ebx+edx*4]
mov [edi+edx*4], eax
lea edx, [edx+1] // inc edx, but faster (no flags dependencies)
dec ecx
jnz ttmath_loop
adc ecx, ecx // ecx has the cf state
mov ebx, [rest]
or ebx, ebx
jz ttmath_end
xor ebx, ebx // ebx = 0
neg ecx // setting cf from ecx
mov ecx, [rest] // ecx is != 0
ttmath_loop2:
mov eax, [esi+edx*4]
adc eax, ebx
mov [edi+edx*4], eax
lea edx, [edx+1] // inc edx, but faster (no flags dependencies)
dec ecx
jnz ttmath_loop2
adc ecx, ecx
ttmath_end:
mov [c], ecx
}
#endif
#ifdef __GNUC__
// this part should be compiled with gcc
uint dummy1, dummy2, dummy3;
__asm__ __volatile__(
"push %%edx \n"
"xor %%edx, %%edx \n" // edx = 0, cf = 0
"1: \n"
"mov (%%esi,%%edx,4), %%eax \n"
"adc (%%ebx,%%edx,4), %%eax \n"
"mov %%eax, (%%edi,%%edx,4) \n"
"inc %%edx \n"
"dec %%ecx \n"
"jnz 1b \n"
"adc %%ecx, %%ecx \n" // ecx has the cf state
"pop %%eax \n" // eax = rest
"or %%eax, %%eax \n"
"jz 3f \n"
"xor %%ebx, %%ebx \n" // ebx = 0
"neg %%ecx \n" // setting cf from ecx
"mov %%eax, %%ecx \n" // ecx=rest and is != 0
"2: \n"
"mov (%%esi, %%edx, 4), %%eax \n"
"adc %%ebx, %%eax \n"
"mov %%eax, (%%edi, %%edx, 4) \n"
"inc %%edx \n"
"dec %%ecx \n"
"jnz 2b \n"
"adc %%ecx, %%ecx \n"
"3: \n"
: "=a" (dummy1), "=b" (dummy2), "=c" (c), "=d" (dummy3)
: "1" (ss2), "2" (ss2_size), "3" (rest), "S" (ss1), "D" (result)
: "cc", "memory" );
#endif
TTMATH_LOG("UInt::AddVector")
return c;
}
/*! /*!
@ -366,9 +479,9 @@ namespace ttmath
template<uint value_size> template<uint value_size>
uint UInt<value_size>::Sub(const UInt<value_size> & ss2, uint c) uint UInt<value_size>::Sub(const UInt<value_size> & ss2, uint c)
{ {
register uint b = value_size; uint b = value_size;
register uint * p1 = table; uint * p1 = table;
register uint * p2 = const_cast<uint*>(ss2.table); uint * p2 = const_cast<uint*>(ss2.table);
// we don't have to use TTMATH_REFERENCE_ASSERT here // we don't have to use TTMATH_REFERENCE_ASSERT here
// this algorithm doesn't require it // this algorithm doesn't require it
@ -387,13 +500,13 @@ namespace ttmath
sub eax, [c] sub eax, [c]
ALIGN 16 ALIGN 16
p: ttmath_loop:
mov eax, [esi+edx*4] mov eax, [esi+edx*4]
sbb [ebx+edx*4], eax sbb [ebx+edx*4], eax
lea edx, [edx+1] // inc edx, but faster (no flags dependencies) lea edx, [edx+1] // inc edx, but faster (no flags dependencies)
dec ecx dec ecx
jnz p jnz ttmath_loop
setc al setc al
movzx eax, al movzx eax, al
@ -404,13 +517,12 @@ namespace ttmath
#ifdef __GNUC__ #ifdef __GNUC__
uint dummy, dummy2;
__asm__ __volatile__( __asm__ __volatile__(
"push %%ecx \n"
"xorl %%eax, %%eax \n"
"movl %%eax, %%edx \n"
"subl %%edi, %%eax \n"
"xorl %%edx, %%edx \n"
"negl %%eax \n" // CF=1 if rax!=0 , CF=0 if rax==0
"1: \n" "1: \n"
"movl (%%esi,%%edx,4), %%eax \n" "movl (%%esi,%%edx,4), %%eax \n"
@ -420,14 +532,11 @@ namespace ttmath
"decl %%ecx \n" "decl %%ecx \n"
"jnz 1b \n" "jnz 1b \n"
"setc %%al \n" "adc %%ecx, %%ecx \n"
"movzx %%al,%%edx \n"
"pop %%ecx \n" : "=c" (c), "=a" (dummy), "=d" (dummy2)
: "0" (b), "1" (c), "b" (p1), "S" (p2)
: "=d" (c) : "cc", "memory" );
: "D" (c), "c" (b), "b" (p1), "S" (p2)
: "%eax", "cc", "memory" );
#endif #endif
@ -479,16 +588,16 @@ namespace ttmath
mov eax, [value] mov eax, [value]
ALIGN 16 ALIGN 16
p: ttmath_loop:
sub [ebx+edx*4], eax sub [ebx+edx*4], eax
jnc end jnc ttmath_end
mov eax, 1 mov eax, 1
lea edx, [edx+1] // inc edx, but faster (no flags dependencies) lea edx, [edx+1] // inc edx, but faster (no flags dependencies)
dec ecx dec ecx
jnz p jnz ttmath_loop
end: ttmath_end:
setc al setc al
movzx eax, al movzx eax, al
mov [c], eax mov [c], eax
@ -497,10 +606,9 @@ namespace ttmath
#ifdef __GNUC__ #ifdef __GNUC__
__asm__ __volatile__( uint dummy, dummy2;
"push %%eax \n" __asm__ __volatile__(
"push %%ecx \n"
"subl %%edx, %%ecx \n" "subl %%edx, %%ecx \n"
@ -517,12 +625,10 @@ namespace ttmath
"setc %%al \n" "setc %%al \n"
"movzx %%al, %%edx \n" "movzx %%al, %%edx \n"
"pop %%ecx \n" : "=d" (c), "=a" (dummy), "=c" (dummy2)
"pop %%eax \n" : "0" (index), "1" (value), "2" (b), "b" (p1)
: "=d" (c)
: "a" (value), "c" (b), "0" (index), "b" (p1)
: "cc", "memory" ); : "cc", "memory" );
#endif #endif
TTMATH_LOG("UInt::SubInt") TTMATH_LOG("UInt::SubInt")
@ -532,6 +638,141 @@ namespace ttmath
/*!
this static method subtractes one vector from the other
'ss1' is larger in size or equal to 'ss2'
ss1 points to the first (larger) vector
ss2 points to the second vector
ss1_size - size of the ss1 (and size of the result too)
ss2_size - size of the ss2
result - is the result vector (which has size the same as ss1: ss1_size)
Example: ss1_size is 5, ss2_size is 3
ss1: ss2: result (output):
5 1 5-1
4 3 4-3
2 7 2-7
6 6-1 (the borrow from previous item)
9 9
return (carry): 0
of course the carry (borrow) is propagated and will be returned from the last item
(this method is used by the Karatsuba multiplication algorithm)
*/
template<uint value_size>
uint UInt<value_size>::SubVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result)
{
TTMATH_ASSERT( ss1_size >= ss2_size )
uint rest = ss1_size - ss2_size;
uint c;
#ifndef __GNUC__
// this part might be compiled with for example visual c
/*
the asm code is nearly the same as in AddVector
only two instructions 'adc' are changed to 'sbb'
*/
__asm
{
mov ecx, [ss2_size]
xor edx, edx // edx = 0, cf = 0
mov esi, [ss1]
mov ebx, [ss2]
mov edi, [result]
ttmath_loop:
mov eax, [esi+edx*4]
sbb eax, [ebx+edx*4]
mov [edi+edx*4], eax
lea edx, [edx+1]
dec ecx
jnz ttmath_loop
adc ecx, ecx // ecx has the cf state
mov ebx, [rest]
or ebx, ebx
jz ttmath_end
xor ebx, ebx // ebx = 0
neg ecx // setting cf from ecx
mov ecx, [rest] // ecx is != 0
ttmath_loop2:
mov eax, [esi+edx*4]
sbb eax, ebx
mov [edi+edx*4], eax
lea edx, [edx+1]
dec ecx
jnz ttmath_loop2
adc ecx, ecx
ttmath_end:
mov [c], ecx
}
#endif
#ifdef __GNUC__
// this part should be compiled with gcc
uint dummy1, dummy2, dummy3;
__asm__ __volatile__(
"push %%edx \n"
"xor %%edx, %%edx \n" // edx = 0, cf = 0
"1: \n"
"mov (%%esi,%%edx,4), %%eax \n"
"sbb (%%ebx,%%edx,4), %%eax \n"
"mov %%eax, (%%edi,%%edx,4) \n"
"inc %%edx \n"
"dec %%ecx \n"
"jnz 1b \n"
"adc %%ecx, %%ecx \n" // ecx has the cf state
"pop %%eax \n" // eax = rest
"or %%eax, %%eax \n"
"jz 3f \n"
"xor %%ebx, %%ebx \n" // ebx = 0
"neg %%ecx \n" // setting cf from ecx
"mov %%eax, %%ecx \n" // ecx=rest and is != 0
"2: \n"
"mov (%%esi, %%edx, 4), %%eax \n"
"sbb %%ebx, %%eax \n"
"mov %%eax, (%%edi, %%edx, 4) \n"
"inc %%edx \n"
"dec %%ecx \n"
"jnz 2b \n"
"adc %%ecx, %%ecx \n"
"3: \n"
: "=a" (dummy1), "=b" (dummy2), "=c" (c), "=d" (dummy3)
: "1" (ss2), "2" (ss2_size), "3" (rest), "S" (ss1), "D" (result)
: "cc", "memory" );
#endif
TTMATH_LOG("UInt::SubVector")
return c;
}
/*! /*!
this method moves all bits into the left hand side this method moves all bits into the left hand side
return value <- this <- c return value <- this <- c
@ -547,8 +788,8 @@ namespace ttmath
template<uint value_size> template<uint value_size>
uint UInt<value_size>::Rcl2_one(uint c) uint UInt<value_size>::Rcl2_one(uint c)
{ {
register sint b = value_size; uint b = value_size;
register uint * p1 = table; uint * p1 = table;
#ifndef __GNUC__ #ifndef __GNUC__
__asm __asm
@ -562,12 +803,12 @@ namespace ttmath
mov ecx, [b] mov ecx, [b]
ALIGN 16 ALIGN 16
p: ttmath_loop:
rcl dword ptr [ebx+edx*4], 1 rcl dword ptr [ebx+edx*4], 1
lea edx, [edx+1] // inc edx, but faster (no flags dependencies) lea edx, [edx+1] // inc edx, but faster (no flags dependencies)
dec ecx dec ecx
jnz p jnz ttmath_loop
setc al setc al
movzx eax, al movzx eax, al
@ -577,13 +818,12 @@ namespace ttmath
#ifdef __GNUC__ #ifdef __GNUC__
uint dummy, dummy2;
__asm__ __volatile__( __asm__ __volatile__(
"push %%edx \n"
"push %%ecx \n"
"xorl %%edx, %%edx \n" // edx=0 "xorl %%edx, %%edx \n" // edx=0
"neg %%eax \n" // CF=1 if eax!=0 , CF=0 if eax==0 "negl %%eax \n" // CF=1 if eax!=0 , CF=0 if eax==0
"1: \n" "1: \n"
"rcll $1, (%%ebx, %%edx, 4) \n" "rcll $1, (%%ebx, %%edx, 4) \n"
@ -592,15 +832,12 @@ namespace ttmath
"decl %%ecx \n" "decl %%ecx \n"
"jnz 1b \n" "jnz 1b \n"
"setc %%al \n" "adcl %%ecx, %%ecx \n"
"movzx %%al, %%eax \n"
"pop %%ecx \n" : "=c" (c), "=a" (dummy), "=d" (dummy2)
"pop %%edx \n" : "0" (b), "1" (c), "b" (p1)
: "=a" (c)
: "0" (c), "c" (b), "b" (p1)
: "cc", "memory" ); : "cc", "memory" );
#endif #endif
TTMATH_LOG("UInt::Rcl2_one") TTMATH_LOG("UInt::Rcl2_one")
@ -625,8 +862,8 @@ namespace ttmath
template<uint value_size> template<uint value_size>
uint UInt<value_size>::Rcr2_one(uint c) uint UInt<value_size>::Rcr2_one(uint c)
{ {
register sint b = value_size; uint b = value_size;
register uint * p1 = table; uint * p1 = table;
#ifndef __GNUC__ #ifndef __GNUC__
__asm __asm
@ -638,11 +875,11 @@ namespace ttmath
mov ecx, [b] mov ecx, [b]
ALIGN 16 ALIGN 16
p: ttmath_loop:
rcr dword ptr [ebx+ecx*4-4], 1 rcr dword ptr [ebx+ecx*4-4], 1
dec ecx dec ecx
jnz p jnz ttmath_loop
setc al setc al
movzx eax, al movzx eax, al
@ -652,11 +889,11 @@ namespace ttmath
#ifdef __GNUC__ #ifdef __GNUC__
uint dummy;
__asm__ __volatile__( __asm__ __volatile__(
"push %%ecx \n" "negl %%eax \n" // CF=1 if eax!=0 , CF=0 if eax==0
"neg %%eax \n" // CF=1 if eax!=0 , CF=0 if eax==0
"1: \n" "1: \n"
"rcrl $1, -4(%%ebx, %%ecx, 4) \n" "rcrl $1, -4(%%ebx, %%ecx, 4) \n"
@ -664,14 +901,12 @@ namespace ttmath
"decl %%ecx \n" "decl %%ecx \n"
"jnz 1b \n" "jnz 1b \n"
"setc %%al \n" "adcl %%ecx, %%ecx \n"
"movzx %%al, %%eax \n"
"pop %%ecx \n" : "=c" (c), "=a" (dummy)
: "0" (b), "1" (c), "b" (p1)
: "=a" (c)
: "0" (c), "c" (b), "b" (p1)
: "cc", "memory" ); : "cc", "memory" );
#endif #endif
TTMATH_LOG("UInt::Rcr2_one") TTMATH_LOG("UInt::Rcr2_one")
@ -724,7 +959,7 @@ namespace ttmath
cmovnz esi, [mask] // if c then old value = mask cmovnz esi, [mask] // if c then old value = mask
ALIGN 16 ALIGN 16
p: ttmath_loop:
rol dword ptr [ebx+edx*4], cl rol dword ptr [ebx+edx*4], cl
mov eax, [ebx+edx*4] mov eax, [ebx+edx*4]
@ -735,7 +970,7 @@ namespace ttmath
lea edx, [edx+1] // inc edx, but faster (no flags dependencies) lea edx, [edx+1] // inc edx, but faster (no flags dependencies)
dec edi dec edi
jnz p jnz ttmath_loop
and eax, 1 and eax, 1
mov dword ptr [c], eax mov dword ptr [c], eax
@ -744,31 +979,30 @@ namespace ttmath
#ifdef __GNUC__ #ifdef __GNUC__
uint dummy, dummy2, dummy3;
__asm__ __volatile__( __asm__ __volatile__(
"push %%edx \n" "push %%ebp \n"
"push %%esi \n"
"push %%edi \n"
"movl %%ecx, %%esi \n" "movl %%ecx, %%esi \n"
"movl $32, %%ecx \n" "movl $32, %%ecx \n"
"subl %%esi, %%ecx \n" "subl %%esi, %%ecx \n" // ecx = 32 - bits
"movl $-1, %%edx \n" "movl $-1, %%edx \n" // edx = -1 (all bits set to one)
"shrl %%cl, %%edx \n" "shrl %%cl, %%edx \n" // shifting (0 -> edx -> cf) (cl times)
"movl %%edx, %[amask] \n" "movl %%edx, %%ebp \n" // ebp = edx = mask
"movl %%esi, %%ecx \n" "movl %%esi, %%ecx \n"
"xorl %%edx, %%edx \n" "xorl %%edx, %%edx \n"
"movl %%edx, %%esi \n" "movl %%edx, %%esi \n"
"orl %%eax, %%eax \n" "orl %%eax, %%eax \n"
"cmovnz %[amask], %%esi \n" "cmovnz %%ebp, %%esi \n" // if(c) esi=mask else esi=0
"1: \n" "1: \n"
"roll %%cl, (%%ebx,%%edx,4) \n" "roll %%cl, (%%ebx,%%edx,4) \n"
"movl (%%ebx,%%edx,4), %%eax \n" "movl (%%ebx,%%edx,4), %%eax \n"
"andl %[amask], %%eax \n" "andl %%ebp, %%eax \n"
"xorl %%eax, (%%ebx,%%edx,4) \n" "xorl %%eax, (%%ebx,%%edx,4) \n"
"orl %%esi, (%%ebx,%%edx,4) \n" "orl %%esi, (%%ebx,%%edx,4) \n"
"movl %%eax, %%esi \n" "movl %%eax, %%esi \n"
@ -779,13 +1013,12 @@ namespace ttmath
"and $1, %%eax \n" "and $1, %%eax \n"
"pop %%edi \n" "pop %%ebp \n"
"pop %%esi \n"
"pop %%edx \n"
: "=a" (c) : "=a" (c), "=D" (dummy), "=S" (dummy2), "=d" (dummy3)
: "0" (c), "D" (b), "b" (p1), "c" (bits), [amask] "m" (mask) : "0" (c), "1" (b), "b" (p1), "c" (bits)
: "cc", "memory" ); : "cc", "memory" );
#endif #endif
TTMATH_LOG("UInt::Rcl2") TTMATH_LOG("UInt::Rcl2")
@ -813,9 +1046,9 @@ namespace ttmath
{ {
TTMATH_ASSERT( bits>0 && bits<TTMATH_BITS_PER_UINT ) TTMATH_ASSERT( bits>0 && bits<TTMATH_BITS_PER_UINT )
register sint b = value_size; uint b = value_size;
register uint * p1 = table; uint * p1 = table;
register uint mask; uint mask;
#ifndef __GNUC__ #ifndef __GNUC__
__asm __asm
@ -841,7 +1074,7 @@ namespace ttmath
cmovnz esi, [mask] // if c then old value = mask cmovnz esi, [mask] // if c then old value = mask
ALIGN 16 ALIGN 16
p: ttmath_loop:
ror dword ptr [ebx+edx*4], cl ror dword ptr [ebx+edx*4], cl
mov eax, [ebx+edx*4] mov eax, [ebx+edx*4]
@ -852,7 +1085,7 @@ namespace ttmath
dec edx dec edx
dec edi dec edi
jnz p jnz ttmath_loop
rol eax, 1 // bit 31 will be bit 0 rol eax, 1 // bit 31 will be bit 0
and eax, 1 and eax, 1
@ -863,33 +1096,32 @@ namespace ttmath
#ifdef __GNUC__ #ifdef __GNUC__
uint dummy, dummy2, dummy3;
__asm__ __volatile__( __asm__ __volatile__(
"push %%edx \n" "push %%ebp \n"
"push %%esi \n"
"push %%edi \n"
"movl %%ecx, %%esi \n" "movl %%ecx, %%esi \n"
"movl $32, %%ecx \n" "movl $32, %%ecx \n"
"subl %%esi, %%ecx \n" "subl %%esi, %%ecx \n" // ecx = 32 - bits
"movl $-1, %%edx \n" "movl $-1, %%edx \n" // edx = -1 (all bits set to one)
"shll %%cl, %%edx \n" "shll %%cl, %%edx \n" // shifting (cf <- edx <- 0) (cl times)
"movl %%edx, %[amask] \n" "movl %%edx, %%ebp \n" // ebp = edx = mask
"movl %%esi, %%ecx \n" "movl %%esi, %%ecx \n"
"xorl %%edx, %%edx \n" "xorl %%edx, %%edx \n"
"movl %%edx, %%esi \n" "movl %%edx, %%esi \n"
"addl %%edi, %%edx \n" "addl %%edi, %%edx \n"
"decl %%edx \n" "decl %%edx \n" // edx is pointing at the end of the table (on last word)
"orl %%eax, %%eax \n" "orl %%eax, %%eax \n"
"cmovnz %[amask], %%esi \n" "cmovnz %%ebp, %%esi \n" // if(c) esi=mask else esi=0
"1: \n" "1: \n"
"rorl %%cl, (%%ebx,%%edx,4) \n" "rorl %%cl, (%%ebx,%%edx,4) \n"
"movl (%%ebx,%%edx,4), %%eax \n" "movl (%%ebx,%%edx,4), %%eax \n"
"andl %[amask], %%eax \n" "andl %%ebp, %%eax \n"
"xorl %%eax, (%%ebx,%%edx,4) \n" "xorl %%eax, (%%ebx,%%edx,4) \n"
"orl %%esi, (%%ebx,%%edx,4) \n" "orl %%esi, (%%ebx,%%edx,4) \n"
"movl %%eax, %%esi \n" "movl %%eax, %%esi \n"
@ -901,13 +1133,12 @@ namespace ttmath
"roll $1, %%eax \n" "roll $1, %%eax \n"
"andl $1, %%eax \n" "andl $1, %%eax \n"
"pop %%edi \n" "pop %%ebp \n"
"pop %%esi \n"
"pop %%edx \n"
: "=a" (c) : "=a" (c), "=D" (dummy), "=S" (dummy2), "=d" (dummy3)
: "0" (c), "D" (b), "b" (p1), "c" (bits), [amask] "m" (mask) : "0" (c), "1" (b), "b" (p1), "c" (bits)
: "cc", "memory" ); : "cc", "memory" );
#endif #endif
TTMATH_LOG("UInt::Rcr2") TTMATH_LOG("UInt::Rcr2")
@ -939,15 +1170,16 @@ namespace ttmath
#ifdef __GNUC__ #ifdef __GNUC__
__asm__ __volatile__( uint dummy;
"bsrl %1, %0 \n" __asm__ (
"jnz 1f \n"
"movl $-1, %0 \n"
"1: \n"
: "=R" (result) "movl $-1, %1 \n"
: "R" (x) "bsrl %2, %0 \n"
"cmovz %1, %0 \n"
: "=r" (result), "=&r" (dummy)
: "r" (x)
: "cc" ); : "cc" );
#endif #endif
@ -974,8 +1206,8 @@ namespace ttmath
{ {
TTMATH_ASSERT( bit < TTMATH_BITS_PER_UINT ) TTMATH_ASSERT( bit < TTMATH_BITS_PER_UINT )
uint v = value;
uint old_bit; uint old_bit;
uint v = value;
#ifndef __GNUC__ #ifndef __GNUC__
__asm __asm
@ -993,17 +1225,16 @@ namespace ttmath
#ifdef __GNUC__ #ifdef __GNUC__
__asm__ (
__asm__ __volatile__(
"btsl %%ebx, %%eax \n" "btsl %%ebx, %%eax \n"
"setc %%bl \n" "setc %%bl \n"
"movzx %%bl, %%ebx \n" "movzx %%bl, %%ebx \n"
: "=a" (v), "=b" (old_bit) : "=a" (v), "=b" (old_bit)
: "0" (v), "1" (bit) : "0" (v), "1" (bit)
: "cc" ); : "cc" );
#endif #endif
value = v; value = v;
@ -1031,8 +1262,8 @@ namespace ttmath
this has no effect in visual studio but it's useful when this has no effect in visual studio but it's useful when
using gcc and options like -Ox using gcc and options like -Ox
*/ */
register uint result1_; uint result1_;
register uint result2_; uint result2_;
#ifndef __GNUC__ #ifndef __GNUC__
@ -1050,7 +1281,7 @@ namespace ttmath
#ifdef __GNUC__ #ifdef __GNUC__
__asm__ __volatile__( __asm__ (
"mull %%edx \n" "mull %%edx \n"
@ -1093,8 +1324,8 @@ namespace ttmath
template<uint value_size> template<uint value_size>
void UInt<value_size>::DivTwoWords(uint a, uint b, uint c, uint * r, uint * rest) void UInt<value_size>::DivTwoWords(uint a, uint b, uint c, uint * r, uint * rest)
{ {
register uint r_; uint r_;
register uint rest_; uint rest_;
/* /*
these variables have similar meaning like those in these variables have similar meaning like those in
the multiplication algorithm MulTwoWords the multiplication algorithm MulTwoWords
@ -1117,12 +1348,12 @@ namespace ttmath
#ifdef __GNUC__ #ifdef __GNUC__
__asm__ __volatile__( __asm__ (
"divl %%ecx \n" "divl %%ecx \n"
: "=a" (r_), "=d" (rest_) : "=a" (r_), "=d" (rest_)
: "d" (a), "a" (b), "c" (c) : "0" (b), "1" (a), "c" (c)
: "cc" ); : "cc" );
#endif #endif

View File

@ -911,6 +911,131 @@ namespace ttmath
*rest = rest_; *rest = rest_;
} }
template<uint value_size>
uint UInt<value_size>::AddTwoWords(uint a, uint b, uint carry, uint * result)
{
uint temp;
if( carry == 0 )
{
temp = a + b;
if( temp < a )
carry = 1;
}
else
{
carry = 1;
temp = a + b + carry;
if( temp > a ) // !(temp<=a)
carry = 0;
}
*result = temp;
return carry;
}
template<uint value_size>
uint UInt<value_size>::SubTwoWords(uint a, uint b, uint carry, uint * result)
{
if( carry == 0 )
{
*result = a - b;
if( a < b )
carry = 1;
}
else
{
carry = 1;
*result = a - b - carry;
if( a > b ) // !(a <= b )
carry = 0;
}
return carry;
}
/*!
this static method addes one vector to the other
'ss1' is larger in size or equal to 'ss2'
ss1 points to the first (larger) vector
ss2 points to the second vector
ss1_size - size of the ss1 (and size of the result too)
ss2_size - size of the ss2
result - is the result vector (which has size the same as ss1: ss1_size)
Example: ss1_size is 5, ss2_size is 3
ss1: ss2: result (output):
5 1 5+1
4 3 4+3
2 7 2+7
6 6
9 9
of course the carry is propagated and will be returned from the last item
(this method is used by the Karatsuba multiplication algorithm)
*/
template<uint value_size>
uint UInt<value_size>::AddVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result)
{
uint i, c = 0;
TTMATH_ASSERT( ss1_size >= ss2_size )
for(i=0 ; i<ss2_size ; ++i)
c = AddTwoWords(ss1[i], ss2[i], c, &result[i]);
for( ; i<ss1_size ; ++i)
c = AddTwoWords(ss1[i], 0, c, &result[i]);
TTMATH_LOG("UInt::AddVector")
return c;
}
/*!
this static method subtractes one vector from the other
'ss1' is larger in size or equal to 'ss2'
ss1 points to the first (larger) vector
ss2 points to the second vector
ss1_size - size of the ss1 (and size of the result too)
ss2_size - size of the ss2
result - is the result vector (which has size the same as ss1: ss1_size)
Example: ss1_size is 5, ss2_size is 3
ss1: ss2: result (output):
5 1 5-1
4 3 4-3
2 7 2-7
6 6-1 (the borrow from previous item)
9 9
return (carry): 0
of course the carry (borrow) is propagated and will be returned from the last item
(this method is used by the Karatsuba multiplication algorithm)
*/
template<uint value_size>
uint UInt<value_size>::SubVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result)
{
uint i, c = 0;
TTMATH_ASSERT( ss1_size >= ss2_size )
for(i=0 ; i<ss2_size ; ++i)
c = SubTwoWords(ss1[i], ss2[i], c, &result[i]);
for( ; i<ss1_size ; ++i)
c = SubTwoWords(ss1[i], 0, c, &result[i]);
TTMATH_LOG("UInt::SubVector")
return c;
}
} //namespace } //namespace