Compare commits

...

21 Commits
master ... chk

Author SHA1 Message Date
Christian Kaiser 51e938eaa7 - update to current root trunc's version
- update to root trunc's UNICODE support

git-svn-id: svn://ttmath.org/publicrep/ttmath/branches/chk@182 e52654a7-88a9-db11-a3e9-0013d4bc506e
2009-07-29 10:46:48 +00:00
Christian Kaiser e102086f80 - fixed a bug in 64 bit ASM for MSVC
git-svn-id: svn://ttmath.org/publicrep/ttmath/branches/chk@181 e52654a7-88a9-db11-a3e9-0013d4bc506e
2009-07-28 16:34:04 +00:00
Christian Kaiser 51b2c974a1 - changed "AboutEqualWithoutSign()" to "AboutEqual()" because we need to take the sign into account!
git-svn-id: svn://ttmath.org/publicrep/ttmath/branches/chk@173 e52654a7-88a9-db11-a3e9-0013d4bc506e
2009-06-26 15:24:27 +00:00
Christian Kaiser 5597373093 - "streamlined" ttmathconfig.h a bit:
a) Unicode support if TTMATH_USE_WCHAR is set (compiler must know wchar_t etc, of course)
  b) threading synchonisation uses WIN32 instead of __MSVC__ define, as this is OS dependent, not compiler dependent

git-svn-id: svn://ttmath.org/publicrep/ttmath/branches/chk@172 e52654a7-88a9-db11-a3e9-0013d4bc506e
2009-06-26 11:14:51 +00:00
Christian Kaiser de58378488 - added AboutEqualWithoutSign() to big<> to allow 'suppression' of some unexpected results (that are perfectly logical though, given the possibly unrepresentable nature of binary representation of decimals) like
big<>("10.456466") * 2 == big<>("20.912932")

resulting in FALSE result.

git-svn-id: svn://ttmath.org/publicrep/ttmath/branches/chk@171 e52654a7-88a9-db11-a3e9-0013d4bc506e
2009-06-25 14:11:17 +00:00
Christian Kaiser de64608eba Merged against the current original ttmath trunk
git-svn-id: svn://ttmath.org/publicrep/ttmath/branches/chk@170 e52654a7-88a9-db11-a3e9-0013d4bc506e
2009-06-25 11:07:55 +00:00
Christian Kaiser be8913866a - 32 bit ASM code and ASSERTS did not work as the ASM code put its result in EAX, but the ASSERT afterwards did destroy the EAX's contents, of course.
git-svn-id: svn://ttmath.org/publicrep/ttmath/branches/chk@155 e52654a7-88a9-db11-a3e9-0013d4bc506e
2009-05-28 14:42:19 +00:00
Christian Kaiser b31d34ebdd - fixed a bug in ttmath.g (missing closing brace in Cos())
git-svn-id: svn://ttmath.org/publicrep/ttmath/branches/chk@154 e52654a7-88a9-db11-a3e9-0013d4bc506e
2009-05-28 11:52:31 +00:00
Christian Kaiser be821b59dd - optimizations
git-svn-id: svn://ttmath.org/publicrep/ttmath/branches/chk@153 e52654a7-88a9-db11-a3e9-0013d4bc506e
2009-05-28 11:31:29 +00:00
Christian Kaiser de1e7ac957 more optimizations for MSVC assembler (parallelism, prefetch optimization, loop alignment, ...)
git-svn-id: svn://ttmath.org/publicrep/ttmath/branches/chk@151 e52654a7-88a9-db11-a3e9-0013d4bc506e
2009-05-20 08:48:51 +00:00
Christian Kaiser fdc292e91a current chk version - too many changes on both sides for now ;-(
git-svn-id: svn://ttmath.org/publicrep/ttmath/branches/chk@150 e52654a7-88a9-db11-a3e9-0013d4bc506e
2009-05-19 10:50:41 +00:00
Christian Kaiser 9b576ddbe2 - corrected 64 bit assembler code (ebx was not preserved)
- minor optimization

git-svn-id: svn://ttmath.org/publicrep/ttmath/branches/chk@147 e52654a7-88a9-db11-a3e9-0013d4bc506e
2009-05-15 14:42:43 +00:00
Christian Kaiser a8c3a506ea MSVC ASM improvements (no register saves necessary, as this is done automatically by the C compiler)
git-svn-id: svn://ttmath.org/publicrep/ttmath/branches/chk@146 e52654a7-88a9-db11-a3e9-0013d4bc506e
2009-05-14 12:59:12 +00:00
Christian Kaiser 3ba94dca90 git-svn-id: svn://ttmath.org/publicrep/ttmath/branches/chk@145 e52654a7-88a9-db11-a3e9-0013d4bc506e 2009-05-11 12:30:05 +00:00
Christian Kaiser cae50cd425 - merged Tomasz' version 0.8.5
git-svn-id: svn://ttmath.org/publicrep/ttmath/branches/chk@144 e52654a7-88a9-db11-a3e9-0013d4bc506e
2009-05-11 12:25:25 +00:00
Christian Kaiser 00e39d3608 added thread-safety to static history buffers (factorial and logarithm) for MSVC
git-svn-id: svn://ttmath.org/publicrep/ttmath/branches/chk@135 e52654a7-88a9-db11-a3e9-0013d4bc506e
2009-05-07 11:37:10 +00:00
Christian Kaiser 37379d2f1f - fulfills test file log diff (32 and 64 bit)
- macro for issuing the debug output to something else than std::out if specified


git-svn-id: svn://ttmath.org/publicrep/ttmath/branches/chk@134 e52654a7-88a9-db11-a3e9-0013d4bc506e
2009-05-07 09:33:57 +00:00
Christian Kaiser d7b67e4d47 - minor changes for ASSERT macros
- some more "unification" of 32 and 64 bits in typedefs
- use of 'char' instead of 'unsigned char', as I may hope that 'char' usually is set to 'unsigned' in most development environments

git-svn-id: svn://ttmath.org/publicrep/ttmath/branches/chk@133 e52654a7-88a9-db11-a3e9-0013d4bc506e
2009-05-06 15:47:15 +00:00
Christian Kaiser c91bd24e98 - support for MS specific code (__int64 etc) and warnings
- support for AMD64 assembler (not thoroughly tested)
- support for UNICODE I/O (strings and streams)

git-svn-id: svn://ttmath.org/publicrep/ttmath/branches/chk@132 e52654a7-88a9-db11-a3e9-0013d4bc506e
2009-05-06 15:11:29 +00:00
Christian Kaiser cbc12db22f dummy commit (user/password checking)
git-svn-id: svn://ttmath.org/publicrep/ttmath/branches/chk@131 e52654a7-88a9-db11-a3e9-0013d4bc506e
2009-05-06 13:24:00 +00:00
Tomasz Sowa 3e9bd5b093 creating a chk branch for ChristianK
git-svn-id: svn://ttmath.org/publicrep/ttmath/branches/chk@130 e52654a7-88a9-db11-a3e9-0013d4bc506e
2009-05-06 13:16:56 +00:00
11 changed files with 16524 additions and 15986 deletions

View File

@ -64,6 +64,7 @@ namespace ttmath
*/ */
/* /*
* *
* functions for rounding * functions for rounding
@ -2609,6 +2610,7 @@ namespace ttmath
ValueType result; ValueType result;
ErrorCode err_tmp; ErrorCode err_tmp;
if( n.IsNan() ) if( n.IsNan() )
{ {
if( err ) if( err )
@ -2617,6 +2619,8 @@ namespace ttmath
return result; // NaN is set by default return result; // NaN is set by default
} }
TTMATH_USE_THREADSAFE_OBJ(cgamma.history);
if( cgamma.history.Get(n, result, err_tmp) ) if( cgamma.history.Get(n, result, err_tmp) )
{ {
if( err ) if( err )

View File

@ -38,6 +38,8 @@
#ifndef headerfilettmathbig #ifndef headerfilettmathbig
#define headerfilettmathbig #define headerfilettmathbig
#include "ttmathconfig.h"
/*! /*!
\file ttmathbig.h \file ttmathbig.h
\brief A Class for representing floating point numbers \brief A Class for representing floating point numbers
@ -47,6 +49,10 @@
#include <iostream> #include <iostream>
#if defined(_MSC_VER)
#pragma warning(disable:4127) // conditional expression is constant
#endif
namespace ttmath namespace ttmath
{ {
@ -81,7 +87,7 @@ public:
Int<exp> exponent; Int<exp> exponent;
UInt<man> mantissa; UInt<man> mantissa;
unsigned char info; tt_char info;
/*! /*!
@ -420,7 +426,7 @@ public:
// (TTMATH_BUILTIN_VARIABLES_SIZE on 32bit platform should have the value 256, // (TTMATH_BUILTIN_VARIABLES_SIZE on 32bit platform should have the value 256,
// and on 64bit platform value 128 (256/2=128)) // and on 64bit platform value 128 (256/2=128))
mantissa.SetFromTable(temp_table, sizeof(temp_table) / sizeof(int)); mantissa.SetFromTable(temp_table, sizeof(temp_table) / sizeof(unsigned int));
exponent = -sint(man)*sint(TTMATH_BITS_PER_UINT); exponent = -sint(man)*sint(TTMATH_BITS_PER_UINT);
info = 0; info = 0;
} }
@ -1028,7 +1034,7 @@ public:
UInt<man*2> man1; UInt<man*2> man1;
UInt<man*2> man2; UInt<man*2> man2;
uint i,c = 0; uint i,c;
if( IsNan() || ss2.IsNan() || ss2.IsZero() ) if( IsNan() || ss2.IsNan() || ss2.IsZero() )
return CheckCarry(1); return CheckCarry(1);
@ -1049,9 +1055,7 @@ public:
i = man1.CompensationToLeft(); i = man1.CompensationToLeft();
if( i ) c = exponent.Sub(i);
c += exponent.Sub(i);
c += exponent.Sub(ss2.exponent); c += exponent.Sub(ss2.exponent);
for(i=0 ; i<man ; ++i) for(i=0 ; i<man ; ++i)
@ -1073,7 +1077,7 @@ public:
e.g. e.g.
12.6 mod 3 = 0.6 because 12.6 = 3*4 + 0.6 12.6 mod 3 = 0.6 because 12.6 = 3*4 + 0.6
-12.6 mod 3 = -0.6 bacause -12.6 = 3*(-4) + (-0.6) -12.6 mod 3 = -0.6
12.6 mod -3 = 0.6 12.6 mod -3 = 0.6
-12.6 mod -3 = -0.6 -12.6 mod -3 = -0.6
@ -1107,7 +1111,6 @@ public:
/*! /*!
power this = this ^ pow power this = this ^ pow
(pow without a sign) (pow without a sign)
@ -1766,7 +1769,7 @@ public:
// MS Visual Express 2005 reports a warning (in the lines with 'uint man_diff = ...'): // MS Visual Express 2005 reports a warning (in the lines with 'uint man_diff = ...'):
// warning C4307: '*' : integral constant overflow // warning C4307: '*' : integral constant overflow
// but we're using 'if( man > another_man )' and 'if( man < another_man )' and there'll be no such situation here // but we're using 'if( man > another_man )' and 'if( man < another_man )' and there'll be no such situation here
#ifdef _MSC_VER #ifndef __GNUC__
#pragma warning( disable: 4307 ) #pragma warning( disable: 4307 )
#endif #endif
@ -1782,7 +1785,7 @@ public:
c += exponent.AddInt(man_diff, 0); c += exponent.AddInt(man_diff, 0);
} }
#ifdef _MSC_VER #ifndef __GNUC__
#pragma warning( default: 4307 ) #pragma warning( default: 4307 )
#endif #endif
@ -2007,7 +2010,7 @@ public:
// error but I leave it at the moment as is // error but I leave it at the moment as is
TTMATH_ASSERT( sizeof(double) == 8 ) TTMATH_ASSERT( sizeof(double) == 8 )
// I am not sure what will be on a platform which has // I am not sure what will be on a plaltform which has
// a different endianness... but we use this library only // a different endianness... but we use this library only
// on x86 and amd (intel) 64 bits (as there's a lot of assembler code) // on x86 and amd (intel) 64 bits (as there's a lot of assembler code)
union union
@ -2172,7 +2175,7 @@ public:
// then V=(-1)**S * 2 ** (-1022) * (0.F) // then V=(-1)**S * 2 ** (-1022) * (0.F)
// These are "unnormalized" values. // These are "unnormalized" values.
FromDouble_SetExpAndMan(bool(temp.u & 0x8000000000000000ul), FromDouble_SetExpAndMan((temp.u & 0x8000000000000000ul) != 0,
e - 1022 - man*TTMATH_BITS_PER_UINT + 1, 0, m); e - 1022 - man*TTMATH_BITS_PER_UINT + 1, 0, m);
Standardizing(); Standardizing();
} }
@ -2667,6 +2670,18 @@ public:
operator=(value); operator=(value);
} }
class LogHistory
{
public:
Big<exp,man> val[15];
LogHistory()
{
for (int i = 0; i < 15; ++i)
val[i].SetZero();
}
TTMATH_IMPLEMENT_THREADSAFE_OBJ
};
/*! /*!
a method for converting the value into a string with a base equal 'base' a method for converting the value into a string with a base equal 'base'
@ -2961,11 +2976,12 @@ private:
// (LnSurrounding1() will return one immediately) // (LnSurrounding1() will return one immediately)
uint c = Ln(x); uint c = Ln(x);
// warning! this 'static' is not thread safe
static Big<exp,man> log_history[15] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
uint index = base - 2; uint index = base - 2;
if( log_history[index].IsZero() ) static LogHistory log_history;
TTMATH_USE_THREADSAFE_OBJ(log_history);
if( log_history.val[index].IsZero() )
{ {
// we don't have 'base' in 'log_history' then we calculate it now // we don't have 'base' in 'log_history' then we calculate it now
@ -2983,14 +2999,14 @@ private:
// the next time we'll get the 'Ln(base)' from the history, // the next time we'll get the 'Ln(base)' from the history,
// this 'log_history' can have (16-2+1) items max // this 'log_history' can have (16-2+1) items max
log_history[index] = temp; log_history.val[index] = temp;
c += Div(temp); c += Div(temp);
} }
else else
{ {
// we've calculated the 'Ln(base)' beforehand and we're getting it now // we've calculated the 'Ln(base)' beforehand and we're getting it now
c += Div( log_history[index] ); c += Div( log_history.val[index] );
} }
return (c==0)? 0 : 1; return (c==0)? 0 : 1;
@ -3848,6 +3864,72 @@ public:
return false; return false;
} }
bool AboutEqual(const Big<exp,man> & ss2, int nBitsToIgnore = 4) const
{
// we should check the mantissas beforehand because sometimes we can have
// a mantissa set to zero but in the exponent something another value
// (maybe we've forgotten about calling CorrectZero() ?)
if( mantissa.IsZero())
{
if (ss2.mantissa.IsZero())
return true;
return(ss2.AboutEqual(*this,nBitsToIgnore));
}
if (ss2.mantissa.IsZero())
{
return(this->exponent <= uint(2*(-sint(man*TTMATH_BITS_PER_UINT))+nBitsToIgnore));
}
// exponents may not differ much!
ttmath::Int<exp> expdiff(this->exponent - ss2.exponent);
// they may differ one if for example mantissa1=0x80000000, mantissa2=0xffffffff
if (ttmath::Abs(expdiff) > 1)
return(false);
// calculate the 'difference' mantissa
ttmath::UInt<man> man1(this->mantissa);
ttmath::UInt<man> man2(ss2.mantissa);
ttmath::UInt<man> mandiff;
switch (expdiff.ToInt())
{
case +1:
man2.Rcr(1,0);
mandiff = man1;
mandiff.Sub(man2);
break;
case -1:
man1.Rcr(1,0);
mandiff = man2;
mandiff.Sub(man1);
break;
default:
if (man2 > man1)
{
mandiff = man2;
mandiff.Sub(man1);
}
else
{
mandiff = man1;
mandiff.Sub(man2);
}
break;
}
// faster to mask the bits!
ASSERT(nBitsToIgnore < TTMATH_BITS_PER_UINT);
for (int n = man-1; n > 0; --n)
{
if (mandiff.table[n] != 0)
return(false);
}
uint nMask = ~((1 << nBitsToIgnore) - 1);
return((mandiff.table[0] & nMask) == 0);
}
bool operator<(const Big<exp,man> & ss2) const bool operator<(const Big<exp,man> & ss2) const
{ {
@ -3879,8 +3961,6 @@ public:
} }
bool operator>(const Big<exp,man> & ss2) const bool operator>(const Big<exp,man> & ss2) const
{ {
if( IsSign() && !ss2.IsSign() ) if( IsSign() && !ss2.IsSign() )
@ -4253,6 +4333,9 @@ public:
}; };
#if defined(_MSC_VER)
#pragma warning(default:4127) // conditional expression is constant
#endif
} // namespace } // namespace

110
ttmath/ttmathconfig.h Normal file
View File

@ -0,0 +1,110 @@
/*
* This file is a part of TTMath Bignum Library
* and is distributed under the PNG licence.
* Author: Christian Kaiser <chk@online.de>
*/
/*
Copyright (c) 2009 Christian Kaiser
This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages
arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it
freely, subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not
claim that you wrote the original software. If you use this software
in a product, an acknowledgment in the product documentation would be
appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be
misrepresented as being the original software.
3. This notice may not be removed or altered from any source
distribution.
*/
#ifndef headerfilettmathconfig
#define headerfilettmathconfig
#pragma once
#include <sstream>
namespace ttmath
{
#if defined(WIN32)
#include <windows.h>
#if defined(_MT)
class clsCrit
{
private:
mutable CRITICAL_SECTION _Crit;
clsCrit(const clsCrit&) // inhibit copy (easy mistake to do; use clsCritObj instead!!!)
{
}
clsCrit& operator=(const clsCrit& rhs); // inhibit assignment
public:
clsCrit(void)
{
InitializeCriticalSection(&_Crit);
}
virtual ~clsCrit(void)
{
DeleteCriticalSection(&_Crit);
}
void Enter(void) const
{
EnterCriticalSection(&_Crit);
}
void Leave(void) const
{
LeaveCriticalSection(&_Crit);
}
};
class clsCritObj
{
private:
const clsCrit& _Crit;
clsCritObj& operator=(const clsCritObj& rhs); // not applicable
public:
clsCritObj(const clsCrit& Sync)
: _Crit(Sync)
{
_Crit.Enter();
}
~clsCritObj(void)
{
_Crit.Leave();
}
};
#define TTMATH_IMPLEMENT_THREADSAFE_OBJ \
private: \
clsCrit CritSect; \
public: \
operator clsCrit&() \
{ \
return(CritSect); \
}
#define TTMATH_USE_THREADSAFE_OBJ(c) clsCritObj lock(c)
#endif
#else // defined(WIN32)
// not Windows world: no threading synchronization for now
#endif
#if !defined(TTMATH_IMPLEMENT_THREADSAFE_OBJ)
// if we don't know about serialization, make it a no-op
#define TTMATH_IMPLEMENT_THREADSAFE_OBJ /* */
#define TTMATH_USE_THREADSAFE_OBJ(c) /* */
#endif
} // namespace
#endif // headerfilettmathconfig

View File

@ -47,6 +47,10 @@
#include "ttmathuint.h" #include "ttmathuint.h"
#if defined(_MSC_VER)
#pragma warning(disable:4127) // conditional expression is constant
#endif
namespace ttmath namespace ttmath
{ {
@ -641,8 +645,14 @@ public:
// there can be a carry here when the size of this value is equal one word // there can be a carry here when the size of this value is equal one word
// and the 'value' has the highest bit set // and the 'value' has the highest bit set
#if defined(_MSC_VER)
#pragma warning(disable:4127) // conditional expression is constant
#endif
if( value_size==1 && (value & TTMATH_UINT_HIGHEST_BIT)!=0 ) if( value_size==1 && (value & TTMATH_UINT_HIGHEST_BIT)!=0 )
return 1; return 1;
#if defined(_MSC_VER)
#pragma warning(default:4127) // conditional expression is constant
#endif
return 0; return 0;
} }
@ -1327,4 +1337,9 @@ public:
} // namespace } // namespace
#if defined(_MSC_VER)
#pragma warning(default:4127) // conditional expression is constant
#endif
#endif #endif

View File

@ -120,6 +120,7 @@ namespace ttmath
typedef unsigned int uint; typedef unsigned int uint;
typedef signed int sint; typedef signed int sint;
/*! /*!
this type is twice bigger than uint this type is twice bigger than uint
(64bit on a 32bit platforms) (64bit on a 32bit platforms)
@ -128,43 +129,39 @@ namespace ttmath
but it is defined in C99 and in upcoming C++0x /3.9.1 (2)/ and many compilers support it but it is defined in C99 and in upcoming C++0x /3.9.1 (2)/ and many compilers support it
this type is used in UInt::MulTwoWords and UInt::DivTwoWords when macro TTMATH_NOASM is defined this type is used in UInt::MulTwoWords and UInt::DivTwoWords when macro TTMATH_NOASM is defined
but only on a 32bit platform
*/ */
#ifdef TTMATH_NOASM
typedef unsigned long long int ulint; typedef unsigned long long int ulint;
#endif
/*!
how many bits there are in the uint type
*/
#define TTMATH_BITS_PER_UINT 32u
/*! /*!
the mask for the highest bit in the unsigned 32bit word (2^31) the mask for the highest bit in the unsigned 32bit word (2^31)
*/ */
#define TTMATH_UINT_HIGHEST_BIT 2147483648u const uint TTMATH_UINT_HIGHEST_BIT = 0x80000000ul;
/*! /*!
the max value of the unsigned 32bit word (2^32 - 1) the max value of the unsigned 32bit word (2^32 - 1)
(all bits equal one) (all bits equal one)
*/ */
#define TTMATH_UINT_MAX_VALUE 4294967295u const uint TTMATH_UINT_MAX_VALUE = 0xfffffffful;
/*! /*!
the number of words (32bit words on 32bit platform) the number of words (32bit words on 32bit platform)
which are kept in built-in variables for a Big<> type which are kept in built-in variables for a Big<> type
(these variables are defined in ttmathbig.h) (these variables are defined in ttmathbig.h)
*/ */
#define TTMATH_BUILTIN_VARIABLES_SIZE 256u const uint TTMATH_BUILTIN_VARIABLES_SIZE = 256u;
#else #else
/*! /*!
on 64bit platforms one word (uint, sint) will be equal 64bits on 64bit platforms one word (uint, sint) will be equal 64bits
*/ */
typedef unsigned long uint; #if defined(_MSC_VER)
typedef signed long sint; typedef unsigned __int64 uint;
typedef signed __int64 sint;
#else
typedef unsigned long long uint;
typedef signed long long sint;
#endif
/*! /*!
on 64bit platform we do not define ulint on 64bit platform we do not define ulint
sizeof(long long) is 8 (64bit) but we need 128bit sizeof(long long) is 8 (64bit) but we need 128bit
@ -174,30 +171,28 @@ namespace ttmath
*/ */
//typedef unsigned long long int ulint; //typedef unsigned long long int ulint;
/*!
how many bits there are in the uint type
*/
#define TTMATH_BITS_PER_UINT 64ul
/*! /*!
the mask for the highest bit in the unsigned 64bit word (2^63) the mask for the highest bit in the unsigned 64bit word (2^63)
*/ */
#define TTMATH_UINT_HIGHEST_BIT 9223372036854775808ul const uint TTMATH_UINT_HIGHEST_BIT = 0x8000000000000000ul;
/*! /*!
the max value of the unsigned 64bit word (2^64 - 1) the max value of the unsigned 64bit word (2^64 - 1)
(all bits equal one) (all bits equal one)
*/ */
#define TTMATH_UINT_MAX_VALUE 18446744073709551615ul const uint TTMATH_UINT_MAX_VALUE = 0xfffffffffffffffful;
/*! /*!
the number of words (64bit words on 64bit platforms) the number of words (64bit words on 64bit platforms)
which are kept in built-in variables for a Big<> type which are kept in built-in variables for a Big<> type
(these variables are defined in ttmathbig.h) (these variables are defined in ttmathbig.h)
*/ */
#define TTMATH_BUILTIN_VARIABLES_SIZE 128ul const uint TTMATH_BUILTIN_VARIABLES_SIZE = 128ul;
#endif #endif
const uint TTMATH_BITS_PER_UINT = (sizeof(uint)*8);
} }
@ -279,7 +274,6 @@ namespace ttmath
#endif #endif
/*! /*!
this is a special value used when calculating the Gamma(x) function this is a special value used when calculating the Gamma(x) function
if x is greater than this value then the Gamma(x) will be calculated using if x is greater than this value then the Gamma(x) will be calculated using
@ -476,6 +470,9 @@ namespace ttmath
#define TTMATH_ASSERT(expression) \ #define TTMATH_ASSERT(expression) \
if( !(expression) ) throw ttmath::RuntimeError(TTMATH_FILE, __LINE__); if( !(expression) ) throw ttmath::RuntimeError(TTMATH_FILE, __LINE__);
#define TTMATH_VERIFY(expression) \
if( !(expression) ) throw ttmath::RuntimeError(TTMATH_TEXT(__FILE__), __LINE__);
#else #else
#define TTMATH_REFERENCE_ASSERT(expression) \ #define TTMATH_REFERENCE_ASSERT(expression) \
@ -483,14 +480,20 @@ namespace ttmath
#define TTMATH_ASSERT(expression) \ #define TTMATH_ASSERT(expression) \
if( !(expression) ) throw RuntimeError(); if( !(expression) ) throw RuntimeError();
#define TTMATH_VERIFY(expression) \
if( !(expression) ) throw RuntimeError();
#endif #endif
#else #else
#define TTMATH_REFERENCE_ASSERT(expression) #define TTMATH_REFERENCE_ASSERT(expression)
#define TTMATH_ASSERT(expression) #define TTMATH_ASSERT(expression)
#define TTMATH_VERIFY(expression) (void)(expression);
#endif #endif
#if !defined(LOG_PRINTF)
#define LOG_PRINTF printf
#endif
#ifdef TTMATH_DEBUG_LOG #ifdef TTMATH_DEBUG_LOG

View File

@ -52,7 +52,9 @@
#include "ttmathtypes.h" #include "ttmathtypes.h"
#if defined(_MSC_VER)
#pragma warning(disable:4127) // conditional expression is constant
#endif
/*! /*!
\brief a namespace for the TTMath library \brief a namespace for the TTMath library
@ -1969,7 +1971,7 @@ private:
for(uint i = j+1 ; i<value_size ; ++i) for(uint i = j+1 ; i<value_size ; ++i)
q.table[i] = 0; q.table[i] = 0;
while( true ) for (;;)
{ {
u1 = table[j+n-1]; u1 = table[j+n-1];
u0 = table[j+n-2]; u0 = table[j+n-2];
@ -2286,7 +2288,7 @@ public:
*/ */
bool IsTheLowestBitSet() const bool IsTheLowestBitSet() const
{ {
return (*table & 1) != 0; return (table[0] & 1) != 0;
} }
@ -2370,7 +2372,7 @@ public:
/*! /*!
this method converts a digit into a char this method converts a digit into a tt_char
digit should be from <0,F> digit should be from <0,F>
(we don't have to get a base) (we don't have to get a base)
@ -2380,12 +2382,12 @@ public:
10 -> A 10 -> A
15 -> F 15 -> F
*/ */
static uint DigitToChar(uint digit) static tt_char DigitToChar(uint digit)
{ {
if( digit < 10 ) if( digit < 10 )
return digit + '0'; return (tt_char)(digit + '0');
return digit - 10 + 'A'; return((tt_char)(digit - 10 + 'A'));
} }
@ -3251,7 +3253,6 @@ public:
ttmathuint_noasm.h ttmathuint_noasm.h
*/ */
#ifdef TTMATH_NOASM
static uint AddTwoWords(uint a, uint b, uint carry, uint * result); static uint AddTwoWords(uint a, uint b, uint carry, uint * result);
static uint SubTwoWords(uint a, uint b, uint carry, uint * result); static uint SubTwoWords(uint a, uint b, uint carry, uint * result);
@ -3276,8 +3277,6 @@ public:
static void MultiplySubtract(uint_ & u_, unsigned int & u3, unsigned int & q, uint_ v_); static void MultiplySubtract(uint_ & u_, unsigned int & u3, unsigned int & q, uint_ v_);
#endif // TTMATH_PLATFORM64 #endif // TTMATH_PLATFORM64
#endif // TTMATH_NOASM
private: private:
uint Rcl2_one(uint c); uint Rcl2_one(uint c);
@ -3319,6 +3318,10 @@ public:
} //namespace } //namespace
#if defined(_MSC_VER)
#pragma warning(default:4127) // conditional expression is constant
#endif
#include "ttmathuint_x86.h" #include "ttmathuint_x86.h"
#include "ttmathuint_x86_64.h" #include "ttmathuint_x86_64.h"

View File

@ -41,6 +41,8 @@
#ifdef TTMATH_NOASM #ifdef TTMATH_NOASM
#pragma message("TTMATH_NOASM")
/*! /*!
\file ttmathuint_noasm.h \file ttmathuint_noasm.h
\brief template class UInt<uint> with methods without any assembler code \brief template class UInt<uint> with methods without any assembler code

View File

@ -36,14 +36,13 @@
*/ */
#ifndef headerfilettmathuint_x86 #ifndef headerfilettmathuint_x86
#define headerfilettmathuint_x86 #define headerfilettmathuint_x86
#ifndef TTMATH_NOASM #ifndef TTMATH_NOASM
#ifdef TTMATH_PLATFORM32 #ifdef TTMATH_PLATFORM32
#pragma message("TTMATH_ASM32")
/*! /*!
\file ttmathuint_x86.h \file ttmathuint_x86.h
@ -66,7 +65,6 @@ namespace ttmath
* *
*/ */
/*! /*!
adding ss2 to the this and adding carry if it's defined adding ss2 to the this and adding carry if it's defined
(this = this + ss2 + c) (this = this + ss2 + c)
@ -85,46 +83,31 @@ namespace ttmath
// this algorithm doesn't require it // this algorithm doesn't require it
#ifndef __GNUC__ #ifndef __GNUC__
// this part might be compiled with for example visual c // this part might be compiled with for example visual c
__asm __asm
{ {
push eax xor eax,eax // eax=0
push ebx xor edx,edx // edx=0
push ecx
push edx
push esi
mov ecx,[b] mov ecx,[b]
mov ebx,[p1] mov ebx,[p1]
mov esi,[p2] mov esi,[p2]
xor edx,edx // edx=0 sub eax,[c] // CF=c
mov eax,[c]
neg eax // CF=1 if rax!=0 , CF=0 if rax==0
ALIGN 16
ttmath_loop: ttmath_loop:
mov eax,[esi+edx*4] mov eax,[esi+edx*4+0]
adc [ebx+edx*4],eax adc [ebx+edx*4+0],eax
inc edx lea edx, [edx+1] // inc edx, but faster (no flags dependencies)
dec ecx dec ecx
jnz ttmath_loop jnz ttmath_loop
adc ecx, ecx setc al
mov [c], ecx movzx eax, al
mov [c], eax
pop esi
pop edx
pop ecx
pop ebx
pop eax
} }
#endif #endif
@ -188,14 +171,8 @@ namespace ttmath
TTMATH_ASSERT( index < value_size ) TTMATH_ASSERT( index < value_size )
#ifndef __GNUC__ #ifndef __GNUC__
__asm __asm
{ {
push eax
push ebx
push ecx
push edx
mov ecx, [b] mov ecx, [b]
sub ecx, [index] sub ecx, [index]
@ -204,26 +181,21 @@ namespace ttmath
mov eax, [value] mov eax, [value]
ALIGN 16
ttmath_loop: ttmath_loop:
add [ebx+edx*4], eax add [ebx+edx*4], eax
jnc ttmath_end jnc ttmath_end
mov eax, 1 mov eax, 1
inc edx lea edx, [edx+1] // inc edx, but faster (no flags dependencies)
dec ecx dec ecx
jnz ttmath_loop jnz ttmath_loop
ttmath_end: ttmath_end:
setc al setc al
movzx edx, al movzx eax, al
mov [c], edx mov [c], eax
pop edx
pop ecx
pop ebx
pop eax
} }
#endif #endif
@ -303,52 +275,40 @@ namespace ttmath
#ifndef __GNUC__ #ifndef __GNUC__
__asm __asm
{ {
push eax
push ebx
push ecx
push edx
mov ecx, [b] mov ecx, [b]
sub ecx, [index]
mov ebx, [p1]
mov edx, [index] mov edx, [index]
mov ebx, [p1]
mov eax, [x1] mov eax, [x1]
sub ecx, edx // max uints to add (value_size - index)
add [ebx+edx*4], eax add [ebx+edx*4], eax
inc edx lea ecx, [ecx-1]
dec ecx
mov eax, [x2] mov eax, [x2]
ALIGN 16
ttmath_loop: ttmath_loop:
adc [ebx+edx*4], eax adc [ebx+edx*4+4], eax
jnc ttmath_end jnc ttmath_end
mov eax, 0 mov eax, 0
inc edx lea edx, [edx+1] // inc edx, but faster (no flags dependencies)
dec ecx dec ecx
jnz ttmath_loop jnz ttmath_loop
ttmath_end: ttmath_end:
setc al setc al
movzx edx, al movzx eax, al
mov [c], edx mov [c], eax
pop edx
pop ecx
pop ebx
pop eax
} }
#endif #endif
#ifdef __GNUC__ #ifdef __GNUC__
uint dummy, dummy2;
__asm__ __volatile__( __asm__ __volatile__(
"push %%ecx \n"
"push %%edx \n"
"subl %%edx, %%ecx \n" "subl %%edx, %%ecx \n"
"addl %%esi, (%%ebx,%%edx,4) \n" "addl %%esi, (%%ebx,%%edx,4) \n"
@ -414,21 +374,19 @@ namespace ttmath
// this part might be compiled with for example visual c // this part might be compiled with for example visual c
__asm __asm
{ {
pushad
mov ecx, [ss2_size] mov ecx, [ss2_size]
xor edx, edx // edx = 0, cf = 0 xor edx, edx // edx = 0, cf = 0
mov esi, [ss1] mov esi, [ss1]
mov ebx, [ss2] mov ebx, [ss2]
mov edi, [result] mov edi, [result]
ALIGN 16
ttmath_loop: ttmath_loop:
mov eax, [esi+edx*4] mov eax, [esi+edx*4]
adc eax, [ebx+edx*4] adc eax, [ebx+edx*4]
mov [edi+edx*4], eax mov [edi+edx*4], eax
inc edx lea edx, [edx+1] // inc edx, but faster (no flags dependencies)
dec ecx dec ecx
jnz ttmath_loop jnz ttmath_loop
@ -447,7 +405,7 @@ namespace ttmath
adc eax, ebx adc eax, ebx
mov [edi+edx*4], eax mov [edi+edx*4], eax
inc edx lea edx, [edx+1] // inc edx, but faster (no flags dependencies)
dec ecx dec ecx
jnz ttmath_loop2 jnz ttmath_loop2
@ -455,8 +413,6 @@ namespace ttmath
ttmath_end: ttmath_end:
mov [c], ecx mov [c], ecx
popad
} }
#endif #endif
@ -529,40 +485,30 @@ namespace ttmath
// this algorithm doesn't require it // this algorithm doesn't require it
#ifndef __GNUC__ #ifndef __GNUC__
__asm __asm
{ {
push eax
push ebx
push ecx
push edx
push esi
mov ecx,[b] mov ecx,[b]
mov ebx,[p1] mov ebx,[p1]
mov esi,[p2] mov esi,[p2]
xor edx,edx // edx=0 xor eax, eax
mov eax,[c] mov edx, eax
neg eax // CF=1 if rax!=0 , CF=0 if rax==0
sub eax, [c]
ALIGN 16
ttmath_loop: ttmath_loop:
mov eax, [esi+edx*4] mov eax, [esi+edx*4]
sbb [ebx+edx*4], eax sbb [ebx+edx*4], eax
inc edx lea edx, [edx+1] // inc edx, but faster (no flags dependencies)
dec ecx dec ecx
jnz ttmath_loop jnz ttmath_loop
adc ecx, ecx setc al
mov [c], ecx movzx eax, al
mov [c], eax
pop esi
pop edx
pop ecx
pop ebx
pop eax
} }
#endif #endif
@ -629,14 +575,8 @@ namespace ttmath
TTMATH_ASSERT( index < value_size ) TTMATH_ASSERT( index < value_size )
#ifndef __GNUC__ #ifndef __GNUC__
__asm __asm
{ {
push eax
push ebx
push ecx
push edx
mov ecx, [b] mov ecx, [b]
sub ecx, [index] sub ecx, [index]
@ -645,26 +585,21 @@ namespace ttmath
mov eax, [value] mov eax, [value]
ALIGN 16
ttmath_loop: ttmath_loop:
sub [ebx+edx*4], eax sub [ebx+edx*4], eax
jnc ttmath_end jnc ttmath_end
mov eax, 1 mov eax, 1
inc edx lea edx, [edx+1] // inc edx, but faster (no flags dependencies)
dec ecx dec ecx
jnz ttmath_loop jnz ttmath_loop
ttmath_end: ttmath_end:
setc al setc al
movzx edx, al movzx eax, al
mov [c], edx mov [c], eax
pop edx
pop ecx
pop ebx
pop eax
} }
#endif #endif
@ -740,7 +675,6 @@ namespace ttmath
*/ */
__asm __asm
{ {
pushad
mov ecx, [ss2_size] mov ecx, [ss2_size]
xor edx, edx // edx = 0, cf = 0 xor edx, edx // edx = 0, cf = 0
@ -754,7 +688,7 @@ namespace ttmath
sbb eax, [ebx+edx*4] sbb eax, [ebx+edx*4]
mov [edi+edx*4], eax mov [edi+edx*4], eax
inc edx lea edx, [edx+1]
dec ecx dec ecx
jnz ttmath_loop jnz ttmath_loop
@ -773,7 +707,7 @@ namespace ttmath
sbb eax, ebx sbb eax, ebx
mov [edi+edx*4], eax mov [edi+edx*4], eax
inc edx lea edx, [edx+1]
dec ecx dec ecx
jnz ttmath_loop2 jnz ttmath_loop2
@ -781,8 +715,6 @@ namespace ttmath
ttmath_end: ttmath_end:
mov [c], ecx mov [c], ecx
popad
} }
#endif #endif
@ -858,29 +790,25 @@ namespace ttmath
#ifndef __GNUC__ #ifndef __GNUC__
__asm __asm
{ {
push ebx
push ecx
push edx
mov ebx, [p1] mov ebx, [p1]
xor edx, edx xor edx, edx
mov ecx, [c] mov ecx, edx
neg ecx sub ecx, [c]
mov ecx, [b] mov ecx, [b]
ALIGN 16
ttmath_loop: ttmath_loop:
rcl dword ptr [ebx+edx*4], 1 rcl dword ptr [ebx+edx*4], 1
inc edx lea edx, [edx+1] // inc edx, but faster (no flags dependencies)
dec ecx dec ecx
jnz ttmath_loop jnz ttmath_loop
adc ecx, ecx setc al
mov [c], ecx movzx eax, al
mov [c], eax
pop edx
pop ecx
pop ebx
} }
#endif #endif
@ -936,25 +864,22 @@ namespace ttmath
#ifndef __GNUC__ #ifndef __GNUC__
__asm __asm
{ {
push ebx xor ecx, ecx
push ecx sub ecx, [c]
mov ebx, [p1] mov ebx, [p1]
mov ecx, [c]
neg ecx
mov ecx, [b] mov ecx, [b]
ALIGN 16
ttmath_loop: ttmath_loop:
rcr dword ptr [ebx+ecx*4-4], 1 rcr dword ptr [ebx+ecx*4-4], 1
dec ecx dec ecx
jnz ttmath_loop jnz ttmath_loop
adc ecx, ecx setc al
mov [c], ecx movzx eax, al
mov [c], eax
pop ecx
pop ebx
} }
#endif #endif
@ -987,13 +912,6 @@ namespace ttmath
#ifdef _MSC_VER
#pragma warning (disable : 4731)
//warning C4731: frame pointer register 'ebp' modified by inline assembly code
#endif
/*! /*!
this method moves all bits into the left hand side this method moves all bits into the left hand side
return value <- this <- c return value <- this <- c
@ -1011,62 +929,47 @@ namespace ttmath
{ {
TTMATH_ASSERT( bits>0 && bits<TTMATH_BITS_PER_UINT ) TTMATH_ASSERT( bits>0 && bits<TTMATH_BITS_PER_UINT )
uint b = value_size; register sint b = value_size;
uint * p1 = table; register uint * p1 = table;
register uint mask;
#ifndef __GNUC__ #ifndef __GNUC__
__asm __asm
{ {
push eax
push ebx
push ecx
push edx
push esi
push edi
push ebp
mov edi, [b] mov edi, [b]
mov ecx, 32 mov ecx, 32
sub ecx, [bits] sub ecx, [bits]
mov edx, -1 mov edx, -1
shr edx, cl shr edx, cl
mov [mask], edx
mov ecx, [bits] mov ecx, [bits]
mov ebx, [p1] mov ebx, [p1]
mov eax, [c]
mov ebp, edx // ebp = mask (modified ebp - don't read/write to variables)
xor edx, edx // edx = 0 xor edx, edx // edx = 0
mov esi, edx mov esi, edx // old value = 0
or eax, eax
cmovnz esi, ebp // if(c) esi=mask else esi=0
mov eax, [c]
or eax, eax
cmovnz esi, [mask] // if c then old value = mask
ALIGN 16
ttmath_loop: ttmath_loop:
rol dword ptr [ebx+edx*4], cl rol dword ptr [ebx+edx*4], cl
mov eax, [ebx+edx*4] mov eax, [ebx+edx*4]
and eax, ebp and eax, [mask]
xor [ebx+edx*4], eax // clearing bits xor [ebx+edx*4], eax // clearing bits
or [ebx+edx*4], esi // saving old value or [ebx+edx*4], esi // saving old value
mov esi, eax mov esi, eax
inc edx lea edx, [edx+1] // inc edx, but faster (no flags dependencies)
dec edi dec edi
jnz ttmath_loop jnz ttmath_loop
pop ebp // restoring ebp
and eax, 1 and eax, 1
mov [c], eax mov dword ptr [c], eax
pop edi
pop esi
pop edx
pop ecx
pop ebx
pop eax
} }
#endif #endif
@ -1141,43 +1044,37 @@ namespace ttmath
uint b = value_size; uint b = value_size;
uint * p1 = table; uint * p1 = table;
uint mask;
#ifndef __GNUC__ #ifndef __GNUC__
__asm __asm
{ {
push eax
push ebx
push ecx
push edx
push esi
push edi
push ebp
mov edi, [b] mov edi, [b]
mov ecx, 32 mov ecx, 32
sub ecx, [bits] sub ecx, [bits]
mov edx, -1 mov edx, -1
shl edx, cl shl edx, cl
mov [mask], edx
mov ecx, [bits] mov ecx, [bits]
mov ebx, [p1] mov ebx, [p1]
mov eax, [c]
mov ebp, edx // ebp = mask (modified ebp - don't read/write to variables)
xor edx, edx // edx = 0 xor edx, edx // edx = 0
mov esi, edx mov esi, edx // old value = 0
add edx, edi add edx, edi
dec edx // edx is pointing at the end of the table (on last word) dec edx // edx - is pointing at the last word
or eax, eax
cmovnz esi, ebp // if(c) esi=mask else esi=0
mov eax, [c]
or eax, eax
cmovnz esi, [mask] // if c then old value = mask
ALIGN 16
ttmath_loop: ttmath_loop:
ror dword ptr [ebx+edx*4], cl ror dword ptr [ebx+edx*4], cl
mov eax, [ebx+edx*4] mov eax, [ebx+edx*4]
and eax, ebp and eax, [mask]
xor [ebx+edx*4], eax // clearing bits xor [ebx+edx*4], eax // clearing bits
or [ebx+edx*4], esi // saving old value or [ebx+edx*4], esi // saving old value
mov esi, eax mov esi, eax
@ -1186,18 +1083,10 @@ namespace ttmath
dec edi dec edi
jnz ttmath_loop jnz ttmath_loop
pop ebp // restoring ebp rol eax, 1 // bit 31 will be bit 0
rol eax, 1 // 31bit will be first
and eax, 1 and eax, 1
mov [c], eax
pop edi mov dword ptr [c], eax
pop esi
pop edx
pop ecx
pop ebx
pop eax
} }
#endif #endif
@ -1254,10 +1143,6 @@ namespace ttmath
} }
#ifdef _MSC_VER
#pragma warning (default : 4731)
#endif
/* /*
this method returns the number of the highest set bit in one 32-bit word this method returns the number of the highest set bit in one 32-bit word
@ -1271,16 +1156,11 @@ namespace ttmath
#ifndef __GNUC__ #ifndef __GNUC__
__asm __asm
{ {
push eax
push edx
mov edx,-1 mov edx,-1
bsr eax,[x] bsr eax,[x]
cmovz eax,edx cmovz eax,edx
mov [result], eax
pop edx mov [result], eax
pop eax
} }
#endif #endif
@ -1328,9 +1208,6 @@ namespace ttmath
#ifndef __GNUC__ #ifndef __GNUC__
__asm __asm
{ {
push ebx
push eax
mov eax, [v] mov eax, [v]
mov ebx, [bit] mov ebx, [bit]
bts eax, ebx bts eax, ebx
@ -1339,9 +1216,6 @@ namespace ttmath
setc bl setc bl
movzx ebx, bl movzx ebx, bl
mov [old_bit], ebx mov [old_bit], ebx
pop eax
pop ebx
} }
#endif #endif
@ -1360,7 +1234,6 @@ namespace ttmath
#endif #endif
value = v; value = v;
return old_bit; return old_bit;
} }
@ -1392,17 +1265,11 @@ namespace ttmath
__asm __asm
{ {
push eax
push edx
mov eax, [a] mov eax, [a]
mul dword ptr [b] mul dword ptr [b]
mov [result2_], edx mov [result2_], edx
mov [result1_], eax mov [result1_], eax
pop edx
pop eax
} }
#endif #endif
@ -1465,18 +1332,12 @@ namespace ttmath
#ifndef __GNUC__ #ifndef __GNUC__
__asm __asm
{ {
push eax
push edx
mov edx, [a] mov edx, [a]
mov eax, [b] mov eax, [b]
div dword ptr [c] div dword ptr [c]
mov [r_], eax mov [r_], eax
mov [rest_], edx mov [rest_], edx
pop edx
pop eax
} }
#endif #endif

View File

@ -39,11 +39,10 @@
#ifndef headerfilettmathuint_x86_64 #ifndef headerfilettmathuint_x86_64
#define headerfilettmathuint_x86_64 #define headerfilettmathuint_x86_64
#ifndef TTMATH_NOASM #ifndef TTMATH_NOASM
#ifdef TTMATH_PLATFORM64 #ifdef TTMATH_PLATFORM64
#pragma message("TTMATH_ASM64")
/*! /*!
\file ttmathuint_x86_64.h \file ttmathuint_x86_64.h
\brief template class UInt<uint> with assembler code for 64bit x86_64 processors \brief template class UInt<uint> with assembler code for 64bit x86_64 processors
@ -51,10 +50,31 @@
this file is included at the end of ttmathuint.h this file is included at the end of ttmathuint.h
*/ */
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
namespace ttmath namespace ttmath
{ {
#if defined(_M_X64)
#include <intrin.h>
extern "C"
{
uint __fastcall adc_x64(uint* p1, const uint* p2, uint nSize, uint c);
uint __fastcall addindexed_x64(uint* p1, uint nSize, uint nPos, uint nValue);
uint __fastcall addindexed2_x64(uint* p1, uint nSize, uint nPos, uint nValue1, uint nValue2);
uint __fastcall sbb_x64(uint* p1, const uint* p2, uint nSize, uint c);
uint __fastcall subindexed_x64(uint* p1, uint nSize, uint nPos, uint nValue);
uint __fastcall rcl_x64(uint* p1, uint nSize, uint nLowestBit);
uint __fastcall rcr_x64(uint* p1, uint nSize, uint nLowestBit);
uint __fastcall div_x64(uint* pnValHi, uint* pnValLo, uint nDiv);
uint __fastcall rcl2_x64(uint* p1, uint nSize, uint nBits, uint c);
uint __fastcall rcr2_x64(uint* p1, uint nSize, uint nBits, uint c);
};
#endif
/*! /*!
* *
* basic mathematic functions * basic mathematic functions
@ -83,12 +103,15 @@ namespace ttmath
// this algorithm doesn't require it // this algorithm doesn't require it
#ifndef __GNUC__ #ifndef __GNUC__
#if defined(_M_X64)
c = adc_x64(p1,p2,b,c);
#else
#error "another compiler than GCC is currently not supported in 64bit mode" #error "another compiler than GCC is currently not supported in 64bit mode"
#endif #endif
#endif
#ifdef __GNUC__ #ifdef __GNUC__
uint dummy, dummy2; uint dummy, dummy2;
/* /*
this part should be compiled with gcc this part should be compiled with gcc
*/ */
@ -150,8 +173,12 @@ namespace ttmath
TTMATH_ASSERT( index < value_size ) TTMATH_ASSERT( index < value_size )
#ifndef __GNUC__ #ifndef __GNUC__
#if defined(_M_X64)
c = addindexed_x64(p1,b,index,value);
#else
#error "another compiler than GCC is currently not supported in 64bit mode" #error "another compiler than GCC is currently not supported in 64bit mode"
#endif #endif
#endif
#ifdef __GNUC__ #ifdef __GNUC__
uint dummy, dummy2; uint dummy, dummy2;
@ -220,6 +247,30 @@ namespace ttmath
*/ */
template<uint value_size> template<uint value_size>
uint UInt<value_size>::AddTwoInts(uint x2, uint x1, uint index) uint UInt<value_size>::AddTwoInts(uint x2, uint x1, uint index)
#if 0
{
uint i, c;
TTMATH_ASSERT( index < value_size )
printf("add %Id + %Id\n",x1,x2);
for(int i=index ; i<value_size ; ++i)
printf("%d: %Id\n",i,table[i]);
c = AddTwoWords(table[index], x1, 0, &table[index]);
c = AddTwoWords(table[index+1], x2, c, &table[index+1]);
for(i=index+2 ; i<value_size && c ; ++i)
c = AddTwoWords(table[i], 0, c, &table[i]);
for(i=index ; i<value_size ; ++i)
printf("%d: %Id\n",i,table[i]);
printf(" -> %d\n",c);
TTMATH_LOG("UInt::AddTwoInts")
return c;
}
#else
{ {
uint b = value_size; uint b = value_size;
uint * p1 = table; uint * p1 = table;
@ -228,8 +279,19 @@ namespace ttmath
TTMATH_ASSERT( index < value_size - 1 ) TTMATH_ASSERT( index < value_size - 1 )
#ifndef __GNUC__ #ifndef __GNUC__
#if defined(_M_X64)
//printf("add %Id + %Id\n",x1,x2);
//for(int i=index ; i<value_size ; ++i)
// printf("%d: %Id\n",i,table[i]);
//if (table[0] == 1265784741359897913) DebugBreak();
c = addindexed2_x64(p1,b,index,x1,x2);
//for(int i=index ; i<value_size ; ++i)
// printf("%d: %Id\n",i,table[i]);
//printf(" -> %d\n",c);
#else
#error "another compiler than GCC is currently not supported in 64bit mode" #error "another compiler than GCC is currently not supported in 64bit mode"
#endif #endif
#endif
#ifdef __GNUC__ #ifdef __GNUC__
uint dummy, dummy2; uint dummy, dummy2;
@ -265,89 +327,8 @@ namespace ttmath
return c; return c;
} }
/*!
this static method addes one vector to the other
'ss1' is larger in size or equal to 'ss2'
ss1 points to the first (larger) vector
ss2 points to the second vector
ss1_size - size of the ss1 (and size of the result too)
ss2_size - size of the ss2
result - is the result vector (which has size the same as ss1: ss1_size)
Example: ss1_size is 5, ss2_size is 3
ss1: ss2: result (output):
5 1 5+1
4 3 4+3
2 7 2+7
6 6
9 9
of course the carry is propagated and will be returned from the last item
(this method is used by the Karatsuba multiplication algorithm)
*/
template<uint value_size>
uint UInt<value_size>::AddVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result)
{
TTMATH_ASSERT( ss1_size >= ss2_size )
uint rest = ss1_size - ss2_size;
uint c;
#ifndef __GNUC__
#error "another compiler than GCC is currently not supported in 64bit mode"
#endif #endif
#ifdef __GNUC__
uint dummy1, dummy2, dummy3;
// this part should be compiled with gcc
__asm__ __volatile__(
"mov %%rdx, %%r8 \n"
"xor %%rdx, %%rdx \n" // rdx = 0, cf = 0
"1: \n"
"mov (%%rsi,%%rdx,8), %%rax \n"
"adc (%%rbx,%%rdx,8), %%rax \n"
"mov %%rax, (%%rdi,%%rdx,8) \n"
"inc %%rdx \n"
"dec %%rcx \n"
"jnz 1b \n"
"adc %%rcx, %%rcx \n" // rcx has the cf state
"or %%r8, %%r8 \n"
"jz 3f \n"
"xor %%rbx, %%rbx \n" // ebx = 0
"neg %%rcx \n" // setting cf from rcx
"mov %%r8, %%rcx \n" // rcx=rest and is != 0
"2: \n"
"mov (%%rsi, %%rdx, 8), %%rax \n"
"adc %%rbx, %%rax \n"
"mov %%rax, (%%rdi, %%rdx, 8) \n"
"inc %%rdx \n"
"dec %%rcx \n"
"jnz 2b \n"
"adc %%rcx, %%rcx \n"
"3: \n"
: "=a" (dummy1), "=b" (dummy2), "=c" (c), "=d" (dummy3)
: "1" (ss2), "2" (ss2_size), "3" (rest), "S" (ss1), "D" (result)
: "%r8", "cc", "memory" );
#endif
TTMATH_LOG("UInt::AddVector")
return c;
}
@ -369,13 +350,16 @@ namespace ttmath
uint * p1 = table; uint * p1 = table;
const uint * p2 = ss2.table; const uint * p2 = ss2.table;
// we don't have to use TTMATH_REFERENCE_ASSERT here // we don't have to use TTMATH_REFERENCE_ASSERT here
// this algorithm doesn't require it // this algorithm doesn't require it
#ifndef __GNUC__ #ifndef __GNUC__
#if defined(_M_X64)
c = sbb_x64(p1,p2,b,c);
#else
#error "another compiler than GCC is currently not supported in 64bit mode" #error "another compiler than GCC is currently not supported in 64bit mode"
#endif #endif
#endif
#ifdef __GNUC__ #ifdef __GNUC__
uint dummy, dummy2; uint dummy, dummy2;
@ -399,6 +383,7 @@ namespace ttmath
: "0" (b), "1" (c), "b" (p1), "S" (p2) : "0" (b), "1" (c), "b" (p1), "S" (p2)
: "cc", "memory" ); : "cc", "memory" );
#endif #endif
TTMATH_LOG("UInt::Sub") TTMATH_LOG("UInt::Sub")
@ -432,15 +417,20 @@ namespace ttmath
uint b = value_size; uint b = value_size;
uint * p1 = table; uint * p1 = table;
uint c; uint c;
uint dummy, dummy2;
TTMATH_ASSERT( index < value_size ) TTMATH_ASSERT( index < value_size )
#ifndef __GNUC__ #ifndef __GNUC__
#if defined(_M_X64)
c = subindexed_x64(p1,b,index,value);
#else
#error "another compiler than GCC is currently not supported in 64bit mode" #error "another compiler than GCC is currently not supported in 64bit mode"
#endif #endif
#endif
#ifdef __GNUC__ #ifdef __GNUC__
uint dummy, dummy2;
__asm__ __volatile__( __asm__ __volatile__(
"subq %%rdx, %%rcx \n" "subq %%rdx, %%rcx \n"
@ -464,100 +454,12 @@ namespace ttmath
#endif #endif
TTMATH_LOG("UInt::SubInt") TTMATH_LOG("UInt64::SubInt")
return c; return c;
} }
/*!
this static method subtractes one vector from the other
'ss1' is larger in size or equal to 'ss2'
ss1 points to the first (larger) vector
ss2 points to the second vector
ss1_size - size of the ss1 (and size of the result too)
ss2_size - size of the ss2
result - is the result vector (which has size the same as ss1: ss1_size)
Example: ss1_size is 5, ss2_size is 3
ss1: ss2: result (output):
5 1 5-1
4 3 4-3
2 7 2-7
6 6-1 (the borrow from previous item)
9 9
return (carry): 0
of course the carry (borrow) is propagated and will be returned from the last item
(this method is used by the Karatsuba multiplication algorithm)
*/
template<uint value_size>
uint UInt<value_size>::SubVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result)
{
TTMATH_ASSERT( ss1_size >= ss2_size )
uint rest = ss1_size - ss2_size;
uint c;
#ifndef __GNUC__
#error "another compiler than GCC is currently not supported in 64bit mode"
#endif
#ifdef __GNUC__
/*
the asm code is nearly the same as in AddVector
only two instructions 'adc' are changed to 'sbb'
*/
uint dummy1, dummy2, dummy3;
__asm__ __volatile__(
"mov %%rdx, %%r8 \n"
"xor %%rdx, %%rdx \n" // rdx = 0, cf = 0
"1: \n"
"mov (%%rsi,%%rdx,8), %%rax \n"
"sbb (%%rbx,%%rdx,8), %%rax \n"
"mov %%rax, (%%rdi,%%rdx,8) \n"
"inc %%rdx \n"
"dec %%rcx \n"
"jnz 1b \n"
"adc %%rcx, %%rcx \n" // rcx has the cf state
"or %%r8, %%r8 \n"
"jz 3f \n"
"xor %%rbx, %%rbx \n" // ebx = 0
"neg %%rcx \n" // setting cf from rcx
"mov %%r8, %%rcx \n" // rcx=rest and is != 0
"2: \n"
"mov (%%rsi, %%rdx, 8), %%rax \n"
"sbb %%rbx, %%rax \n"
"mov %%rax, (%%rdi, %%rdx, 8) \n"
"inc %%rdx \n"
"dec %%rcx \n"
"jnz 2b \n"
"adc %%rcx, %%rcx \n"
"3: \n"
: "=a" (dummy1), "=b" (dummy2), "=c" (c), "=d" (dummy3)
: "1" (ss2), "2" (ss2_size), "3" (rest), "S" (ss1), "D" (result)
: "%r8", "cc", "memory" );
#endif
TTMATH_LOG("UInt::SubVector")
return c;
}
/*! /*!
this method moves all bits into the left hand side this method moves all bits into the left hand side
return value <- this <- c return value <- this <- c
@ -578,10 +480,13 @@ namespace ttmath
sint b = value_size; sint b = value_size;
uint * p1 = table; uint * p1 = table;
#ifndef __GNUC__ #ifndef __GNUC__
#if defined(_M_X64)
c = rcl_x64(p1,b,c);
#else
#error "another compiler than GCC is currently not supported in 64bit mode" #error "another compiler than GCC is currently not supported in 64bit mode"
#endif #endif
#endif
#ifdef __GNUC__ #ifdef __GNUC__
uint dummy, dummy2; uint dummy, dummy2;
@ -632,10 +537,13 @@ namespace ttmath
sint b = value_size; sint b = value_size;
uint * p1 = table; uint * p1 = table;
#ifndef __GNUC__ #ifndef __GNUC__
#if defined(_M_X64)
c = rcr_x64(p1,b,c);
#else
#error "another compiler than GCC is currently not supported in 64bit mode" #error "another compiler than GCC is currently not supported in 64bit mode"
#endif #endif
#endif
#ifdef __GNUC__ #ifdef __GNUC__
uint dummy; uint dummy;
@ -687,10 +595,13 @@ namespace ttmath
uint b = value_size; uint b = value_size;
uint * p1 = table; uint * p1 = table;
#ifndef __GNUC__ #ifndef __GNUC__
#if defined(_M_X64)
c = rcl2_x64(p1,b,bits,c);
#else
#error "another compiler than GCC is currently not supported in 64bit mode" #error "another compiler than GCC is currently not supported in 64bit mode"
#endif #endif
#endif
#ifdef __GNUC__ #ifdef __GNUC__
uint dummy, dummy2, dummy3; uint dummy, dummy2, dummy3;
@ -707,6 +618,7 @@ namespace ttmath
"xorq %%rdx, %%rdx \n" "xorq %%rdx, %%rdx \n"
"movq %%rdx, %%rsi \n" "movq %%rdx, %%rsi \n"
"orq %%rax, %%rax \n" "orq %%rax, %%rax \n"
"cmovnz %%r8, %%rsi \n" "cmovnz %%r8, %%rsi \n"
@ -758,14 +670,19 @@ namespace ttmath
sint b = value_size; sint b = value_size;
uint * p1 = table; uint * p1 = table;
uint dummy, dummy2, dummy3;
#ifndef __GNUC__ #ifndef __GNUC__
#if defined(_M_X64)
c = rcr2_x64(p1,b,bits,c);
#else
#error "another compiler than GCC is currently not supported in 64bit mode" #error "another compiler than GCC is currently not supported in 64bit mode"
#endif #endif
#endif
#ifdef __GNUC__ #ifdef __GNUC__
uint dummy, dummy2, dummy3;
__asm__ __volatile__( __asm__ __volatile__(
"movq %%rcx, %%rsi \n" "movq %%rcx, %%rsi \n"
@ -780,6 +697,7 @@ namespace ttmath
"movq %%rdx, %%rsi \n" "movq %%rdx, %%rsi \n"
"addq %%rdi, %%rdx \n" "addq %%rdi, %%rdx \n"
"decq %%rdx \n" "decq %%rdx \n"
"orq %%rax, %%rax \n" "orq %%rax, %%rax \n"
"cmovnz %%R8, %%rsi \n" "cmovnz %%R8, %%rsi \n"
@ -820,12 +738,20 @@ namespace ttmath
template<uint value_size> template<uint value_size>
sint UInt<value_size>::FindLeadingBitInWord(uint x) sint UInt<value_size>::FindLeadingBitInWord(uint x)
{ {
sint result; register sint result;
#ifndef __GNUC__ #ifndef __GNUC__
#if defined(_MSC_VER)
unsigned long nIndex(0);
if (_BitScanReverse64(&nIndex,x) == 0)
result = -1;
else
result = nIndex;
#else
#error "another compiler than GCC is currently not supported in 64bit mode" #error "another compiler than GCC is currently not supported in 64bit mode"
#endif #endif
#endif
#ifdef __GNUC__ #ifdef __GNUC__
uint dummy; uint dummy;
@ -870,8 +796,16 @@ namespace ttmath
#ifndef __GNUC__ #ifndef __GNUC__
#if defined(_MSC_VER)
#if defined(TTMATH_PLATFORM64)
old_bit = _bittestandset64((__int64*)&value,bit) != 0;
#else
old_bit = _bittestandset((long*)&value,bit) != 0;
#endif
#else
#error "another compiler than GCC is currently not supported in 64bit mode" #error "another compiler than GCC is currently not supported in 64bit mode"
#endif #endif
#endif
#ifdef __GNUC__ #ifdef __GNUC__
@ -925,8 +859,12 @@ namespace ttmath
uint result2_; uint result2_;
#ifndef __GNUC__ #ifndef __GNUC__
#if defined(_MSC_VER)
result1_ = _umul128(a,b,&result2_);
#else
#error "another compiler than GCC is currently not supported in 64bit mode" #error "another compiler than GCC is currently not supported in 64bit mode"
#endif #endif
#endif
#ifdef __GNUC__ #ifdef __GNUC__
@ -955,6 +893,7 @@ namespace ttmath
* *
*/ */
#ifndef __GNUC__
/*! /*!
this method calculates 64bits word a:b / 32bits c (a higher, b lower word) this method calculates 64bits word a:b / 32bits c (a higher, b lower word)
@ -982,8 +921,14 @@ namespace ttmath
TTMATH_ASSERT( c != 0 ) TTMATH_ASSERT( c != 0 )
#ifndef __GNUC__ #ifndef __GNUC__
#if defined(_MSC_VER)
div_x64(&a,&b,c);
r_ = a;
rest_ = b;
#else
#error "another compiler than GCC is currently not supported in 64bit mode" #error "another compiler than GCC is currently not supported in 64bit mode"
#endif #endif
#endif
#ifdef __GNUC__ #ifdef __GNUC__
@ -1002,6 +947,132 @@ namespace ttmath
*rest = rest_; *rest = rest_;
} }
template<uint value_size>
uint UInt<value_size>::AddTwoWords(uint a, uint b, uint carry, uint * result)
{
uint temp;
if( carry == 0 )
{
temp = a + b;
if( temp < a )
carry = 1;
}
else
{
carry = 1;
temp = a + b + carry;
if( temp > a ) // !(temp<=a)
carry = 0;
}
*result = temp;
return carry;
}
template<uint value_size>
uint UInt<value_size>::SubTwoWords(uint a, uint b, uint carry, uint * result)
{
if( carry == 0 )
{
*result = a - b;
if( a < b )
carry = 1;
}
else
{
carry = 1;
*result = a - b - carry;
if( a > b ) // !(a <= b )
carry = 0;
}
return carry;
}
/*!
this static method addes one vector to the other
'ss1' is larger in size or equal to 'ss2'
ss1 points to the first (larger) vector
ss2 points to the second vector
ss1_size - size of the ss1 (and size of the result too)
ss2_size - size of the ss2
result - is the result vector (which has size the same as ss1: ss1_size)
Example: ss1_size is 5, ss2_size is 3
ss1: ss2: result (output):
5 1 5+1
4 3 4+3
2 7 2+7
6 6
9 9
of course the carry is propagated and will be returned from the last item
(this method is used by the Karatsuba multiplication algorithm)
*/
template<uint value_size>
uint UInt<value_size>::AddVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result)
{
uint i, c = 0;
TTMATH_ASSERT( ss1_size >= ss2_size )
for(i=0 ; i<ss2_size ; ++i)
c = AddTwoWords(ss1[i], ss2[i], c, &result[i]);
for( ; i<ss1_size ; ++i)
c = AddTwoWords(ss1[i], 0, c, &result[i]);
TTMATH_LOG("UInt::AddVector")
return c;
}
/*!
this static method subtractes one vector from the other
'ss1' is larger in size or equal to 'ss2'
ss1 points to the first (larger) vector
ss2 points to the second vector
ss1_size - size of the ss1 (and size of the result too)
ss2_size - size of the ss2
result - is the result vector (which has size the same as ss1: ss1_size)
Example: ss1_size is 5, ss2_size is 3
ss1: ss2: result (output):
5 1 5-1
4 3 4-3
2 7 2-7
6 6-1 (the borrow from previous item)
9 9
return (carry): 0
of course the carry (borrow) is propagated and will be returned from the last item
(this method is used by the Karatsuba multiplication algorithm)
*/
template<uint value_size>
uint UInt<value_size>::SubVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result)
{
uint i, c = 0;
TTMATH_ASSERT( ss1_size >= ss2_size )
for(i=0 ; i<ss2_size ; ++i)
c = SubTwoWords(ss1[i], ss2[i], c, &result[i]);
for( ; i<ss1_size ; ++i)
c = SubTwoWords(ss1[i], 0, c, &result[i]);
TTMATH_LOG("UInt::SubVector")
return c;
}
#endif // #ifndef __GNUC__
} //namespace } //namespace

View File

@ -0,0 +1,386 @@
PUBLIC adc_x64
PUBLIC addindexed_x64
PUBLIC addindexed2_x64
PUBLIC sbb_x64
PUBLIC subindexed_x64
PUBLIC rcl_x64
PUBLIC rcr_x64
PUBLIC rcl2_x64
PUBLIC rcr2_x64
PUBLIC div_x64
;
; "rax, rcx, rdx, r8-r11 are volatile."
; "rbx, rbp, rdi, rsi, r12-r15 are nonvolatile."
;
.CODE
ALIGN 8
;----------------------------------------
adc_x64 PROC
; rcx = p1
; rdx = p2
; r8 = nSize
; r9 = nCarry
xor rax, rax
xor r11, r11
sub rax, r9 ; sets CARRY if r9 != 0
ALIGN 16
loop1:
mov rax,qword ptr [rdx + r11 * 8]
adc qword ptr [rcx + r11 * 8], rax
lea r11, [r11+1]
dec r8
jnz loop1
setc al
movzx rax, al
ret
adc_x64 ENDP
;----------------------------------------
ALIGN 8
;----------------------------------------
addindexed_x64 PROC
; rcx = p1
; rdx = nSize
; r8 = nPos
; r9 = nValue
xor rax, rax ; rax = result
sub rdx, r8 ; rdx = remaining count of uints
add qword ptr [rcx + r8 * 8], r9
jc next1
ret
next1:
mov r9, 1
ALIGN 16
loop1:
dec rdx
jz done_with_cy
lea r8, [r8+1]
add qword ptr [rcx + r8 * 8], r9
jc loop1
ret
done_with_cy:
lea rax, [rax+1] ; rax = 1
ret
addindexed_x64 ENDP
;----------------------------------------
ALIGN 8
;----------------------------------------
addindexed2_x64 PROC
; rcx = p1 (pointer)
; rdx = b (value size)
; r8 = nPos
; r9 = nValue1
; [esp+0x28] = nValue2
xor rax, rax ; return value
mov r11, rcx ; table
sub rdx, r8 ; rdx = remaining count of uints
mov r10, [esp+028h] ; r10 = nValue2
add qword ptr [r11 + r8 * 8], r9
lea r8, [r8+1]
lea rdx, [rdx-1]
adc qword ptr [r11 + r8 * 8], r10
jc next
ret
ALIGN 16
loop1:
lea r8, [r8+1]
add qword ptr [r11 + r8 * 8], 1
jc next
ret
next:
dec rdx ; does not modify CY too...
jnz loop1
lea rax, [rax+1]
ret
addindexed2_x64 ENDP
;----------------------------------------
ALIGN 8
;----------------------------------------
sbb_x64 PROC
; rcx = p1
; rdx = p2
; r8 = nCount
; r9 = nCarry
xor rax, rax
xor r11, r11
sub rax, r9 ; sets CARRY if r9 != 0
ALIGN 16
loop1:
mov rax,qword ptr [rdx + r11 * 8]
sbb qword ptr [rcx + r11 * 8], rax
lea r11, [r11+1]
dec r8
jnz loop1
setc al
movzx rax, al
ret
sbb_x64 ENDP
;----------------------------------------
ALIGN 8
;----------------------------------------
subindexed_x64 PROC
; rcx = p1
; rdx = nSize
; r8 = nPos
; r9 = nValue
sub rdx, r8 ; rdx = remaining count of uints
ALIGN 16
loop1:
sub qword ptr [rcx + r8 * 8], r9
jnc done
lea r8, [r8+1]
mov r9, 1
dec rdx
jnz loop1
jc return_1 ; most of the times, there will be NO carry (I hope)
done:
xor rax, rax
ret
return_1:
mov rax, 1
ret
subindexed_x64 ENDP
;----------------------------------------
ALIGN 8
;----------------------------------------
rcl_x64 PROC
; rcx = p1
; rdx = b
; r8 = nLowestBit
mov r11, rcx ; table
xor r10, r10
neg r8 ; CY set if r8 <> 0
ALIGN 16
loop1:
rcl qword ptr [r11 + r10 * 8], 1
lea r10, [r10+1]
dec rdx
jnz loop1
setc al
movzx rax, al
ret
rcl_x64 ENDP
;----------------------------------------
ALIGN 8
;----------------------------------------
rcr_x64 PROC
; rcx = p1
; rdx = nSize
; r8 = nLowestBit
xor r10, r10
neg r8 ; CY set if r8 <> 0
ALIGN 16
loop1:
rcr qword ptr -8[rcx + rdx * 8], 1
dec rdx
jnz loop1
setc al
movzx rax, al
ret
rcr_x64 ENDP
;----------------------------------------
ALIGN 8
;----------------------------------------
div_x64 PROC
; rcx = &Hi
; rdx = &Lo
; r8 = nDiv
mov r11, rcx
mov r10, rdx
mov rdx, qword ptr [r11]
mov rax, qword ptr [r10]
div r8
mov qword ptr [r10], rdx ; remainder
mov qword ptr [r11], rax ; value
ret
div_x64 ENDP
;----------------------------------------
ALIGN 8
;----------------------------------------
rcl2_x64 PROC
; rcx = p1
; rdx = nSize
; r8 = bits
; r9 = c
push rbx
mov r10, rcx ; r10 = p1
xor rax, rax
mov rcx, 64
sub rcx, r8
mov r11, -1
shr r11, cl ; r11 = mask
mov rcx, r8 ; rcx = count of bits
mov rbx, rax ; rbx = old value = 0
or r9, r9
cmovnz rbx, r11 ; if (c) then old value = mask
mov r9, rax ; r9 = index (0..nSize-1)
ALIGN 16
loop1:
rol qword ptr [r10+r9*8], cl
mov rax, qword ptr [r10+r9*8]
and rax, r11
xor qword ptr [r10+r9*8], rax
or qword ptr [r10+r9*8], rbx
mov rbx, rax
lea r9, [r9+1]
dec rdx
jnz loop1
and rax, 1
pop rbx
ret
rcl2_x64 ENDP
;----------------------------------------
ALIGN 8
;----------------------------------------
rcr2_x64 PROC
; rcx = p1
; rdx = nSize
; r8 = bits
; r9 = c
push rbx
mov r10, rcx ; r10 = p1
xor rax, rax
mov rcx, 64
sub rcx, r8
mov r11, -1
shl r11, cl ; r11 = mask
mov rcx, r8 ; rcx = count of bits
mov rbx, rax ; rbx = old value = 0
or r9, r9
cmovnz rbx, r11 ; if (c) then old value = mask
mov r9, rdx ; r9 = index (0..nSize-1)
lea r9, [r9-1]
ALIGN 16
loop1:
ror qword ptr [r10+r9*8], cl
mov rax, qword ptr [r10+r9*8]
and rax, r11
xor qword ptr [r10+r9*8], rax
or qword ptr [r10+r9*8], rbx
mov rbx, rax
lea r9, [r9-1]
dec rdx
jnz loop1
rol rax, 1
and rax, 1
pop rbx
ret
rcr2_x64 ENDP
END