fixed: Big::ToDouble(double &) set always +INF (infinity)

when the value was too large (even for negative values)
       (it should set -INF in such a case)
added: some missing methods for converting
       for UInt<>, Int<> and Big<> classes:
       uint ToUInt()
       sint ToInt()
       ToUInt(uint32_t &)
       ToInt(uint32_t &)
       ToInt(int32_t &)
       ToUInt(uint64_t &)
       ToInt(uint64_t &)
       ToInt(int64_t &)
       FromUInt(uint32_t &)
       FromInt(uint32_t &)
       FromInt(int32_t &)
       FromUInt(uint64_t &)
       FromInt(uint64_t &)
       FromInt(int64_t &)
       and appropriate constructors and operators
       *** version for 64 bit platforms are not tested yet ***
added: double Big::ToDouble() /there was only Big::ToDouble(double &) /
       uint Big::ToFloat(float &)
       float Big::ToFloat()



git-svn-id: svn://ttmath.org/publicrep/ttmath/trunk@316 e52654a7-88a9-db11-a3e9-0013d4bc506e
This commit is contained in:
Tomasz Sowa 2010-09-22 19:43:12 +00:00
parent b028896118
commit a40e951923
6 changed files with 419 additions and 68 deletions

View File

@ -1,4 +1,4 @@
Version 0.9.2 prerelease (2010.09.21): Version 0.9.2 prerelease (2010.09.22):
* fixed: Big::Add() sometimes incorrectly rounded the last bit from its mantissa * fixed: Big::Add() sometimes incorrectly rounded the last bit from its mantissa
* fixed: Big::BigAnd() Big::BigOr() Big::BigXor() should have set NaN * fixed: Big::BigAnd() Big::BigOr() Big::BigXor() should have set NaN
when the argument was negative (they only returned 2) when the argument was negative (they only returned 2)
@ -8,6 +8,9 @@ Version 0.9.2 prerelease (2010.09.21):
in some cases when in the output string the exponent should be equal zero in some cases when in the output string the exponent should be equal zero
the method changes the exponent to one so the last digit from the mantissa the method changes the exponent to one so the last digit from the mantissa
was lost was lost
* fixed: Big::ToDouble(double &) set always +INF (infinity)
when the value was too large (even for negative values)
(it should set -INF in such a case)
* added: some missing operators * added: some missing operators
UInt::operator~() /* bitwise neg */ UInt::operator~() /* bitwise neg */
UInt::operator&() /* bitwise and */ UInt::operator&() /* bitwise and */
@ -54,6 +57,26 @@ Version 0.9.2 prerelease (2010.09.21):
but you can force using asm version (the same asm as for Microsoft Visual) but you can force using asm version (the same asm as for Microsoft Visual)
by defining TTMATH_FORCEASM macro by defining TTMATH_FORCEASM macro
you have to be sure that your compiler accept such an asm format you have to be sure that your compiler accept such an asm format
* added: some missing methods for converting
for UInt<>, Int<> and Big<> classes:
uint ToUInt()
sint ToInt()
ToUInt(uint32_t &)
ToInt(uint32_t &)
ToInt(int32_t &)
ToUInt(uint64_t &)
ToInt(uint64_t &)
ToInt(int64_t &)
FromUInt(uint32_t &)
FromInt(uint32_t &)
FromInt(int32_t &)
FromUInt(uint64_t &)
FromInt(uint64_t &)
FromInt(int64_t &)
and appropriate constructors and operators
* added: double Big::ToDouble() /there was only Big::ToDouble(double &) /
uint Big::ToFloat(float &)
float Big::ToFloat()
* changed: now asm version is available only on x86 and amd64 * changed: now asm version is available only on x86 and amd64
(and only for GCC and MS VC compilers) (and only for GCC and MS VC compilers)
* removed: macro TTMATH_RELEASE * removed: macro TTMATH_RELEASE

View File

@ -88,13 +88,13 @@ a = 123456.543456
b = 98767878.124322 b = 98767878.124322
a + b = 98891334.667778 a + b = 98891334.667778
a - b = -98644421.580866 a - b = -98644421.580866
a * b = 12193540837712.2708 a * b = 12193540837712.27076
a / b = 0.00124996654580957646 a / b = 0.00124996654580957646
Calculating with a carry Calculating with a carry
a = 1.624801256066640878e+646457012 a = 1.6248012560666408782e+646457012
b = 456.319999999999993 b = 456.319999999999993
a + b = 1.624801256066640878e+646457012 a + b = 1.6248012560666408782e+646457012
a - b = 1.624801256066640878e+646457012 a - b = 1.6248012560666408782e+646457012
a * b = (carry) a * b = (carry)
a / b = 3.560661939136222174e+646457009 a / b = 3.560661939136222174e+646457009
*/ */

View File

@ -102,12 +102,12 @@ b = 98767878.124322
a + b = 98891334.667778 a + b = 98891334.667778
a - b = -98644421.580866 a - b = -98644421.580866
a * b = 12193540837712.270763536832 a * b = 12193540837712.270763536832
a / b = 0.001249966545809576460596448526166860913 a / b = 0.0012499665458095764605964485261668609133
Calculating with a carry Calculating with a carry
a = 2.3495345545711177736883282090959505003e+2776511644261678604 a = 2.34953455457111777368832820909595050034e+2776511644261678604
b = 456.3199999999999931787897367030382156 b = 456.3199999999999931787897367030382156
a + b = 2.3495345545711177736883282090959505003e+2776511644261678604 a + b = 2.34953455457111777368832820909595050034e+2776511644261678604
a - b = 2.3495345545711177736883282090959505003e+2776511644261678604 a - b = 2.34953455457111777368832820909595050034e+2776511644261678604
a * b = (carry) a * b = (carry)
a / b = 5.1488748127873374141170361292780486452e+2776511644261678601 a / b = 5.1488748127873374141170361292780486452e+2776511644261678601
*/ */

View File

@ -29,7 +29,7 @@ const char equation[] = " (34 + 24) * 123 - 34.32 ^ 6 * sin(2.56) - atan(10)";
/* /*
the result (on 32 bit platform): the result (on 32 bit platform):
-897705014.52573107 -897705014.525731067
*/ */

View File

@ -2260,22 +2260,19 @@ public:
} }
/*! private:
this method converts 'this' into 'result'
/*!
an auxiliary method for converting 'this' into 'result'
if the value is too big this method returns a carry (1) if the value is too big this method returns a carry (1)
*/ */
uint ToUInt(uint & result, bool test_sign = true) const uint ToUIntOrInt(uint & result) const
{ {
result = 0; result = 0;
if( IsZero() ) if( IsZero() )
return 0; return 0;
if( test_sign && IsSign() )
// the result should be positive
return 1;
sint maxbit = -sint(man*TTMATH_BITS_PER_UINT); sint maxbit = -sint(man*TTMATH_BITS_PER_UINT);
if( exponent > maxbit + sint(TTMATH_BITS_PER_UINT) ) if( exponent > maxbit + sint(TTMATH_BITS_PER_UINT) )
@ -2287,21 +2284,74 @@ public:
// our value is from the range of (-1,1) and we return zero // our value is from the range of (-1,1) and we return zero
return 0; return 0;
UInt<man> mantissa_temp(mantissa);
// exponent is from a range of (maxbit, maxbit + sint(TTMATH_BITS_PER_UINT) > // exponent is from a range of (maxbit, maxbit + sint(TTMATH_BITS_PER_UINT) >
// and [maxbit + sint(TTMATH_BITS_PER_UINT] <= 0
sint how_many_bits = exponent.ToInt(); sint how_many_bits = exponent.ToInt();
// how_many_bits is negative, we'll make it positive // how_many_bits is negative, we'll make it positive
how_many_bits = -how_many_bits; how_many_bits = -how_many_bits;
// we're taking into account only the last word in a mantissa table result = (mantissa.table[man-1] >> (how_many_bits % TTMATH_BITS_PER_UINT));
mantissa_temp.Rcr( how_many_bits % TTMATH_BITS_PER_UINT, 0 );
result = mantissa_temp.table[ man-1 ];
return 0; return 0;
} }
public:
/*!
this method converts 'this' into uint
*/
uint ToUInt() const
{
uint result;
ToUInt(result);
return result;
}
/*!
this method converts 'this' into 'result'
if the value is too big this method returns a carry (1)
*/
uint ToUInt(uint & result) const
{
if( ToUIntOrInt(result) )
return 1;
if( IsSign() )
return 1;
return 0;
}
/*!
this method converts 'this' into sint
*/
sint ToInt() const
{
sint result;
ToInt(result);
return result;
}
/*!
this method converts 'this' into 'result'
if the value is too big this method returns a carry (1)
*/
uint ToInt(uint & result) const
{
return ToUInt(result);
}
/*! /*!
this method converts 'this' into 'result' this method converts 'this' into 'result'
@ -2310,36 +2360,35 @@ public:
*/ */
uint ToInt(sint & result) const uint ToInt(sint & result) const
{ {
result = 0; uint result_uint;
uint result_uint;
if( ToUInt(result_uint, false) ) uint c = ToUIntOrInt(result_uint);
result = sint(result_uint);
if( c )
return 1; return 1;
result = static_cast<sint>( result_uint ); uint mask = 0;
// the exception for the minimal value
if( IsSign() && result_uint == TTMATH_UINT_HIGHEST_BIT )
return 0;
if( (result_uint & TTMATH_UINT_HIGHEST_BIT) != 0 )
// the value is too big
return 1;
if( IsSign() ) if( IsSign() )
{
mask = TTMATH_UINT_MAX_VALUE;
result = -result; result = -result;
}
return 0; return ((result & TTMATH_UINT_HIGHEST_BIT) == (mask & TTMATH_UINT_HIGHEST_BIT)) ? 0 : 1;
} }
private:
/*! /*!
this method converts 'this' into 'result' an auxiliary method for converting 'this' into 'result'
if the value is too big this method returns a carry (1) if the value is too big this method returns a carry (1)
*/ */
template<uint int_size> template<uint int_size>
uint ToInt(Int<int_size> & result) const uint ToUIntOrInt(UInt<int_size> & result) const
{ {
result.SetZero(); result.SetZero();
@ -2350,7 +2399,7 @@ public:
if( exponent > maxbit + sint(int_size*TTMATH_BITS_PER_UINT) ) if( exponent > maxbit + sint(int_size*TTMATH_BITS_PER_UINT) )
// if exponent > (maxbit + sint(int_size*TTMATH_BITS_PER_UINT)) the value can't be passed // if exponent > (maxbit + sint(int_size*TTMATH_BITS_PER_UINT)) the value can't be passed
// into the 'Int<int_size>' type (it's too big) // into the 'UInt<int_size>' type (it's too big)
return 1; return 1;
if( exponent <= maxbit ) if( exponent <= maxbit )
@ -2374,30 +2423,79 @@ public:
{ {
uint index = how_many_bits / TTMATH_BITS_PER_UINT; uint index = how_many_bits / TTMATH_BITS_PER_UINT;
for(uint i=0 ; i<man ; ++i) if( index + (man-1) < int_size )
result.table[index+i] = mantissa.table[i]; {
// above 'if' is always true
// this is only to get rid of a warning "warning: array subscript is above array bounds"
// (from gcc)
// we checked the condition there: "if( exponent > maxbit + sint(int_size*TTMATH_BITS_PER_UINT) )"
// but gcc doesn't understand our types - exponent is Int<>
for(uint i=0 ; i<man ; ++i)
result.table[index+i] = mantissa.table[i];
}
result.Rcl( how_many_bits % TTMATH_BITS_PER_UINT, 0 ); result.Rcl( how_many_bits % TTMATH_BITS_PER_UINT, 0 );
} }
// the exception for the minimal value return 0;
if( IsSign() ) }
{
Int<int_size> min;
min.SetMin();
if( result == min )
return 0;
}
if( (result.table[int_size-1] & TTMATH_UINT_HIGHEST_BIT) != 0 ) public:
// the value is too big
/*!
this method converts 'this' into 'result'
if the value is too big this method returns a carry (1)
*/
template<uint int_size>
uint ToUInt(UInt<int_size> & result) const
{
uint c = ToUIntOrInt(result);
if( c )
return 1; return 1;
if( IsSign() ) if( IsSign() )
result.ChangeSign(); return 1;
}
return 0;
/*!
this method converts 'this' into 'result'
if the value is too big this method returns a carry (1)
*/
template<uint int_size>
uint ToInt(UInt<int_size> & result) const
{
return ToUInt(result);
}
/*!
this method converts 'this' into 'result'
if the value is too big this method returns a carry (1)
*/
template<uint int_size>
uint ToInt(Int<int_size> & result) const
{
uint c = ToUIntOrInt(result);
if( c )
return 1;
uint mask = 0;
if( IsSign() )
{
result.ChangeSign();
mask = TTMATH_UINT_MAX_VALUE;
}
return ((result.table[int_size-1] & TTMATH_UINT_HIGHEST_BIT) == (mask & TTMATH_UINT_HIGHEST_BIT))? 0 : 1;
} }
@ -2439,7 +2537,7 @@ public:
/*! /*!
a method for converting 'sint' to this class a method for converting 'sint' to this class
*/ */
void FromInt(sint value) uint FromInt(sint value)
{ {
bool is_sign = false; bool is_sign = false;
@ -2453,6 +2551,8 @@ public:
if( is_sign ) if( is_sign )
SetSign(); SetSign();
return 0;
} }
@ -2487,12 +2587,8 @@ public:
#ifdef TTMATH_PLATFORM32 #ifdef TTMATH_PLATFORM32
void FromDouble(double value) uint FromDouble(double value)
{ {
// sizeof(double) should be 8 (64 bits), this is actually not a runtime
// error but I leave it at the moment as is
TTMATH_ASSERT( sizeof(double) == 8 )
// I am not sure what will be on a platform which has // I am not sure what will be on a platform which has
// a different endianness... but we use this library only // a different endianness... but we use this library only
// on x86 and amd (intel) 64 bits (as there's a lot of assembler code) // on x86 and amd (intel) 64 bits (as there's a lot of assembler code)
@ -2562,6 +2658,8 @@ public:
SetZero(); SetZero();
} }
} }
return 0; // never be a carry
} }
@ -2602,12 +2700,8 @@ private:
public: public:
// 64bit platforms // 64bit platforms
void FromDouble(double value) uint FromDouble(double value)
{ {
// sizeof(double) should be 8 (64 bits), this is actually not a runtime
// error but I leave it at the moment as is
TTMATH_ASSERT( sizeof(double) == 8 )
// I am not sure what will be on a plaltform which has // I am not sure what will be on a plaltform which has
// a different endianness... but we use this library only // a different endianness... but we use this library only
// on x86 and amd (intel) 64 bits (as there's a lot of assembler code) // on x86 and amd (intel) 64 bits (as there's a lot of assembler code)
@ -2671,6 +2765,8 @@ public:
SetZero(); SetZero();
} }
} }
return 0; // never be a carry
} }
private: private:
@ -2698,6 +2794,139 @@ private:
public: public:
/*!
this method converts from float to this class
*/
uint FromFloat(float value)
{
return FromDouble(double(value));
}
/*!
this method converts from this class into the 'double'
if the value is too big:
'result' will be +/-infinity (depending on the sign)
if the value is too small:
'result' will be 0
*/
double ToDouble() const
{
double result;
ToDouble(result);
return result;
}
private:
/*!
an auxiliary method to check if the float value is +/-infinity
we provide this method because isinf(float) in only in C99 language
description taken from: http://www.psc.edu/general/software/packages/ieee/ieee.php
The IEEE single precision floating point standard representation requires a 32 bit word,
which may be represented as numbered from 0 to 31, left to right.
The first bit is the sign bit, S, the next eight bits are the exponent bits, 'E',
and the final 23 bits are the fraction 'F':
S EEEEEEEE FFFFFFFFFFFFFFFFFFFFFFF
0 1 8 9 31
The value V represented by the word may be determined as follows:
* If E=255 and F is nonzero, then V=NaN ("Not a number")
* If E=255 and F is zero and S is 1, then V=-Infinity
* If E=255 and F is zero and S is 0, then V=Infinity
* If 0<E<255 then V=(-1)**S * 2 ** (E-127) * (1.F) where "1.F" is intended to represent
the binary number created by prefixing F with an implicit leading 1 and a binary point.
* If E=0 and F is nonzero, then V=(-1)**S * 2 ** (-126) * (0.F) These are "unnormalized" values.
* If E=0 and F is zero and S is 1, then V=-0
* If E=0 and F is zero and S is 0, then V=0
*/
bool IsInf(float value) const
{
// need testing on a 64 bit machine
union
{
float d;
uint u;
} temp;
temp.d = value;
if( ((temp.u >> 23) & 0xff) == 0xff )
{
if( (temp.u & 0x7FFFFF) == 0 )
return true; // +/- infinity
}
return false;
}
public:
/*!
this method converts from this class into the 'float'
if the value is too big:
'result' will be +/-infinity (depending on the sign)
if the value is too small:
'result' will be 0
*/
float ToFloat() const
{
float result;
ToFloat(result);
return result;
}
/*!
this method converts from this class into the 'float'
if the value is too big:
'result' will be +/-infinity (depending on the sign)
and the method returns 1
if the value is too small:
'result' will be 0
and the method returns 1
*/
uint ToFloat(float & result) const
{
double result_double;
uint c = ToDouble(result_double);
result = float(result_double);
if( result == -0.0f )
result = 0.0f;
if( c )
return 1;
// although the result_double can have a correct value
// but after converting to float there can be infinity
if( IsInf(result) )
return 1;
if( result == 0.0f && result_double != 0.0 )
// result_double was too small for float
return 1;
return 0;
}
/*! /*!
this method converts from this class into the 'double' this method converts from this class into the 'double'
@ -2711,10 +2940,6 @@ public:
*/ */
uint ToDouble(double & result) const uint ToDouble(double & result) const
{ {
// sizeof(double) should be 8 (64 bits), this is actually not a runtime
// error but I leave it at the moment as is
TTMATH_ASSERT( sizeof(double) == 8 )
if( IsZero() ) if( IsZero() )
{ {
result = 0.0; result = 0.0;
@ -2733,7 +2958,7 @@ public:
if( exponent >= 1024 - e_correction ) if( exponent >= 1024 - e_correction )
{ {
// +/- infinity // +/- infinity
result = ToDouble_SetDouble( 0, 2047, 0, true); result = ToDouble_SetDouble( IsSign(), 2047, 0, true);
return 1; return 1;
} }
@ -2872,6 +3097,17 @@ public:
} }
/*!
an operator= for converting 'float' to this class
*/
Big<exp, man> & operator=(float value)
{
FromFloat(value);
return *this;
}
/*! /*!
an operator= for converting 'double' to this class an operator= for converting 'double' to this class
*/ */
@ -2909,6 +3145,14 @@ public:
} }
/*!
a constructor for converting 'float' to this class
*/
Big(float value)
{
FromFloat(value);
}
#ifdef TTMATH_PLATFORM32 #ifdef TTMATH_PLATFORM32

View File

@ -1134,6 +1134,30 @@ public:
} }
/*!
this method converts the value to uint type
can return a carry if the value is too long to store it in uint type
*/
uint ToUInt(uint & result) const
{
uint c = UInt<value_size>::ToUInt(result);
if( value_size == 1 )
return (result & TTMATH_UINT_HIGHEST_BIT) == 0 ? 0 : 1;
return c;
}
/*!
this method converts the value to uint type
can return a carry if the value is too long to store it in uint type
*/
uint ToInt(uint & result) const
{
return ToUInt(result);
}
/*! /*!
this method converts the value to sint type this method converts the value to sint type
@ -1157,6 +1181,36 @@ public:
#ifdef TTMATH_PLATFORM32 #ifdef TTMATH_PLATFORM32
/*!
this method converts the value to ulint type (64 bit unsigned integer)
can return a carry if the value is too long to store it in ulint type
*** this method is created only on a 32 bit platform ***
*/
uint ToUInt(ulint & result) const
{
uint c = UInt<value_size>::ToUInt(result);
if( value_size == 1 )
return (UInt<value_size>::table[0] & TTMATH_UINT_HIGHEST_BIT) == 0 ? 0 : 1;
if( value_size == 2 )
return (UInt<value_size>::table[1] & TTMATH_UINT_HIGHEST_BIT) == 0 ? 0 : 1;
return c;
}
/*!
this method converts the value to ulint type (64 bit unsigned integer)
can return a carry if the value is too long to store it in ulint type
*** this method is created only on a 32 bit platform ***
*/
uint ToInt(ulint & result) const
{
return ToUInt(result);
}
/*! /*!
this method converts the value to slint type (64 bit signed integer) this method converts the value to slint type (64 bit signed integer)
can return a carry if the value is too long to store it in slint type can return a carry if the value is too long to store it in slint type
@ -1196,7 +1250,37 @@ public:
#ifdef TTMATH_PLATFORM64 #ifdef TTMATH_PLATFORM64
/*! /*!
this method converts the value to a 32 signed integer this method converts the value to a 32 bit unsigned integer
can return a carry if the value is too long to store it in this type
*** this method is created only on a 64 bit platform ***
*/
uint ToUInt(unsigned int & result) const
{
// !! need testing
uint c = UInt<value_size>::ToUInt(result);
if( c )
return 1;
return (int(result) < 0) ? 1 : 0;
}
/*!
this method converts the value to a 32 bit unsigned integer
can return a carry if the value is too long to store it in this type
*** this method is created only on a 64 bit platform ***
*/
uint ToInt(unsigned int & result) const
{
// !! need testing
return ToUInt(result);
}
/*!
this method converts the value to a 32 bit signed integer
can return a carry if the value is too long to store it in this type can return a carry if the value is too long to store it in this type
*** this method is created only on a 64 bit platform *** *** this method is created only on a 64 bit platform ***
*/ */