merged: x86_64 asm code for Microsoft Visual compiler

file: ttmathuint_x86_64_msvc.asm from chk branch (original was: ttmathuint_x86_amd64_msvc.asm)
        (this file should be compiled first because MS VC doesn't support inline assembler in x86_64 mode) 



git-svn-id: svn://ttmath.org/publicrep/ttmath/trunk@187 e52654a7-88a9-db11-a3e9-0013d4bc506e
This commit is contained in:
Tomasz Sowa 2009-09-07 02:03:00 +00:00
parent 0d71b0cec2
commit 28964d30f7
6 changed files with 780 additions and 109 deletions

View File

@ -1,4 +1,4 @@
Version 0.9.0 prerelease (2009.09.05):
Version 0.9.0 prerelease (2009.09.07):
* added: support for wide characters (wchar_t)
wide characters are used when macro TTMATH_USE_WCHAR is defined
this macro is defined automatically when there is macro UNICODE or _UNICODE defined
@ -22,6 +22,9 @@ Version 0.9.0 prerelease (2009.09.05):
and use TTMATH_MULTITHREADS_HELPER macro somewhere in your *.cpp file
* added: Big::AboutEqual(const Big<exp,man> & ss2, int nBitsToIgnore = 4)
the last nBitsToIgnore bits from mantissas will be skipped when comparing
* added: x86_64 asm code for Microsoft Visual compiler
file: ttmathuint_x86_64_msvc.asm
(this file should be compiled first because MS VC doesn't support inline assembler in x86_64 mode)
* changed: Factorial() is using the Gamma() function now
* removed: Parser<>::SetFactorialMax() method
the factorial() is such a fast now that we don't need the method longer

View File

@ -3916,75 +3916,75 @@ public:
}
bool AboutEqual(const Big<exp,man> & ss2, int nBitsToIgnore = 4) const
{
// we should check the mantissas beforehand because sometimes we can have
// a mantissa set to zero but in the exponent something another value
// (maybe we've forgotten about calling CorrectZero() ?)
if( mantissa.IsZero() )
{
if( ss2.mantissa.IsZero() )
return true;
return(ss2.AboutEqual(*this,nBitsToIgnore));
}
if( ss2.mantissa.IsZero() )
{
return(this->exponent <= uint(2*(-sint(man*TTMATH_BITS_PER_UINT))+nBitsToIgnore));
}
// exponents may not differ much!
ttmath::Int<exp> expdiff(this->exponent - ss2.exponent);
// they may differ one if for example mantissa1=0x80000000, mantissa2=0xffffffff
if( ttmath::Abs(expdiff) > 1 )
return(false);
// calculate the 'difference' mantissa
ttmath::UInt<man> man1(this->mantissa);
ttmath::UInt<man> man2(ss2.mantissa);
ttmath::UInt<man> mandiff;
switch( expdiff.ToInt() )
{
case +1:
man2.Rcr(1,0);
mandiff = man1;
mandiff.Sub(man2);
break;
case -1:
man1.Rcr(1,0);
mandiff = man2;
mandiff.Sub(man1);
break;
default:
if( man2 > man1 )
{
mandiff = man2;
mandiff.Sub(man1);
}
else
{
mandiff = man1;
mandiff.Sub(man2);
}
break;
}
// faster to mask the bits!
TTMATH_ASSERT( nBitsToIgnore < TTMATH_BITS_PER_UINT );
for( int n = man-1; n > 0; --n )
{
if( mandiff.table[n] != 0 )
return(false);
}
uint nMask = ~((1 << nBitsToIgnore) - 1);
return((mandiff.table[0] & nMask) == 0);
}
bool AboutEqual(const Big<exp,man> & ss2, int nBitsToIgnore = 4) const
{
// we should check the mantissas beforehand because sometimes we can have
// a mantissa set to zero but in the exponent something another value
// (maybe we've forgotten about calling CorrectZero() ?)
if( mantissa.IsZero() )
{
if( ss2.mantissa.IsZero() )
return true;
return(ss2.AboutEqual(*this,nBitsToIgnore));
}
if( ss2.mantissa.IsZero() )
{
return(this->exponent <= uint(2*(-sint(man*TTMATH_BITS_PER_UINT))+nBitsToIgnore));
}
// exponents may not differ much!
ttmath::Int<exp> expdiff(this->exponent - ss2.exponent);
// they may differ one if for example mantissa1=0x80000000, mantissa2=0xffffffff
if( ttmath::Abs(expdiff) > 1 )
return(false);
// calculate the 'difference' mantissa
ttmath::UInt<man> man1(this->mantissa);
ttmath::UInt<man> man2(ss2.mantissa);
ttmath::UInt<man> mandiff;
switch( expdiff.ToInt() )
{
case +1:
man2.Rcr(1,0);
mandiff = man1;
mandiff.Sub(man2);
break;
case -1:
man1.Rcr(1,0);
mandiff = man2;
mandiff.Sub(man1);
break;
default:
if( man2 > man1 )
{
mandiff = man2;
mandiff.Sub(man1);
}
else
{
mandiff = man1;
mandiff.Sub(man2);
}
break;
}
// faster to mask the bits!
TTMATH_ASSERT( nBitsToIgnore < TTMATH_BITS_PER_UINT );
for( int n = man-1; n > 0; --n )
{
if( mandiff.table[n] != 0 )
return(false);
}
uint nMask = ~((1 << nBitsToIgnore) - 1);
return((mandiff.table[0] & nMask) == 0);
}
bool operator<(const Big<exp,man> & ss2) const

View File

@ -162,8 +162,14 @@ namespace ttmath
/*!
on 64bit platforms one word (uint, sint) will be equal 64bits
*/
typedef unsigned long uint;
typedef signed long sint;
#ifdef _MSC_VER
/* in VC 'long' type has 32 bits, __int64 is VC extension */
typedef unsigned __int64 uint;
typedef signed __int64 sint;
#else
typedef unsigned long uint;
typedef signed long sint;
#endif
/*!
on 64bit platform we do not define ulint

View File

@ -3297,6 +3297,17 @@ public:
static uint SetBitInWord(uint & value, uint bit);
static void MulTwoWords(uint a, uint b, uint * result_high, uint * result_low);
static void DivTwoWords(uint a,uint b, uint c, uint * r, uint * rest);
/* temporarily */
#ifndef TTMATH_NOASM
#ifdef TTMATH_PLATFORM64
#ifdef _MSC_VER
static uint AddTwoWords(uint a, uint b, uint carry, uint * result);
static uint SubTwoWords(uint a, uint b, uint carry, uint * result);
#endif
#endif
#endif
};

View File

@ -51,10 +51,33 @@
this file is included at the end of ttmathuint.h
*/
#ifdef _MSC_VER
#include <intrin.h>
#endif
namespace ttmath
{
#ifdef _MSC_VER
extern "C"
{
uint __fastcall adc_x64(uint* p1, const uint* p2, uint nSize, uint c);
uint __fastcall addindexed_x64(uint* p1, uint nSize, uint nPos, uint nValue);
uint __fastcall addindexed2_x64(uint* p1, uint nSize, uint nPos, uint nValue1, uint nValue2);
uint __fastcall sbb_x64(uint* p1, const uint* p2, uint nSize, uint c);
uint __fastcall subindexed_x64(uint* p1, uint nSize, uint nPos, uint nValue);
uint __fastcall rcl_x64(uint* p1, uint nSize, uint nLowestBit);
uint __fastcall rcr_x64(uint* p1, uint nSize, uint nLowestBit);
uint __fastcall div_x64(uint* pnValHi, uint* pnValLo, uint nDiv);
uint __fastcall rcl2_x64(uint* p1, uint nSize, uint nBits, uint c);
uint __fastcall rcr2_x64(uint* p1, uint nSize, uint nBits, uint c);
};
#endif
/*!
*
* basic mathematic functions
@ -82,8 +105,12 @@ namespace ttmath
// we don't have to use TTMATH_REFERENCE_ASSERT here
// this algorithm doesn't require it
#ifndef __GNUC__
#error "another compiler than GCC is currently not supported in 64bit mode"
#if !defined(__GNUC__) && !defined(_MSC_VER)
#error "another compiler than GCC or Microsoft VC is currently not supported in 64bit mode, you can compile with TTMATH_NOASM macro"
#endif
#ifdef _MSC_VER
c = adc_x64(p1,p2,b,c);
#endif
#ifdef __GNUC__
@ -149,10 +176,16 @@ namespace ttmath
TTMATH_ASSERT( index < value_size )
#ifndef __GNUC__
#error "another compiler than GCC is currently not supported in 64bit mode"
#if !defined(__GNUC__) && !defined(_MSC_VER)
#error "another compiler than GCC or Microsoft VC is currently not supported in 64bit mode, you can compile with TTMATH_NOASM macro"
#endif
#ifdef _MSC_VER
c = addindexed_x64(p1,b,index,value);
#endif
#ifdef __GNUC__
uint dummy, dummy2;
@ -227,10 +260,16 @@ namespace ttmath
TTMATH_ASSERT( index < value_size - 1 )
#ifndef __GNUC__
#error "another compiler than GCC is currently not supported in 64bit mode"
#if !defined(__GNUC__) && !defined(_MSC_VER)
#error "another compiler than GCC or Microsoft VC is currently not supported in 64bit mode, you can compile with TTMATH_NOASM macro"
#endif
#ifdef _MSC_VER
c = addindexed2_x64(p1,b,index,x1,x2);
#endif
#ifdef __GNUC__
uint dummy, dummy2;
@ -288,6 +327,9 @@ namespace ttmath
of course the carry is propagated and will be returned from the last item
(this method is used by the Karatsuba multiplication algorithm)
*/
#ifndef _MSC_VER
template<uint value_size>
uint UInt<value_size>::AddVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result)
{
@ -296,10 +338,16 @@ namespace ttmath
uint rest = ss1_size - ss2_size;
uint c;
#ifndef __GNUC__
#error "another compiler than GCC is currently not supported in 64bit mode"
#if !defined(__GNUC__) && !defined(_MSC_VER)
#error "another compiler than GCC or Microsoft VC is currently not supported in 64bit mode, you can compile with TTMATH_NOASM macro"
#endif
#ifdef _MSC_VER
#endif
#ifdef __GNUC__
uint dummy1, dummy2, dummy3;
@ -348,8 +396,27 @@ namespace ttmath
return c;
}
#else
/* temporarily */
template<uint value_size>
uint UInt<value_size>::AddVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result)
{
uint i, c = 0;
TTMATH_ASSERT( ss1_size >= ss2_size )
for(i=0 ; i<ss2_size ; ++i)
c = AddTwoWords(ss1[i], ss2[i], c, &result[i]);
for( ; i<ss1_size ; ++i)
c = AddTwoWords(ss1[i], 0, c, &result[i]);
TTMATH_LOG("UInt::AddVector")
return c;
}
#endif
/*!
@ -373,10 +440,16 @@ namespace ttmath
// we don't have to use TTMATH_REFERENCE_ASSERT here
// this algorithm doesn't require it
#ifndef __GNUC__
#error "another compiler than GCC is currently not supported in 64bit mode"
#if !defined(__GNUC__) && !defined(_MSC_VER)
#error "another compiler than GCC or Microsoft VC is currently not supported in 64bit mode, you can compile with TTMATH_NOASM macro"
#endif
#ifdef _MSC_VER
c = sbb_x64(p1,p2,b,c);
#endif
#ifdef __GNUC__
uint dummy, dummy2;
@ -432,15 +505,22 @@ namespace ttmath
uint b = value_size;
uint * p1 = table;
uint c;
uint dummy, dummy2;
TTMATH_ASSERT( index < value_size )
#ifndef __GNUC__
#error "another compiler than GCC is currently not supported in 64bit mode"
#if !defined(__GNUC__) && !defined(_MSC_VER)
#error "another compiler than GCC or Microsoft VC is currently not supported in 64bit mode, you can compile with TTMATH_NOASM macro"
#endif
#ifdef _MSC_VER
c = subindexed_x64(p1,b,index,value);
#endif
#ifdef __GNUC__
uint dummy, dummy2;
__asm__ __volatile__(
"subq %%rdx, %%rcx \n"
@ -493,6 +573,9 @@ namespace ttmath
of course the carry (borrow) is propagated and will be returned from the last item
(this method is used by the Karatsuba multiplication algorithm)
*/
#ifndef _MSC_VER
template<uint value_size>
uint UInt<value_size>::SubVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result)
{
@ -501,16 +584,22 @@ namespace ttmath
uint rest = ss1_size - ss2_size;
uint c;
#ifndef __GNUC__
#error "another compiler than GCC is currently not supported in 64bit mode"
#if !defined(__GNUC__) && !defined(_MSC_VER)
#error "another compiler than GCC or Microsoft VC is currently not supported in 64bit mode, you can compile with TTMATH_NOASM macro"
#endif
#ifdef _MSC_VER
#endif
#ifdef __GNUC__
/*
the asm code is nearly the same as in AddVector
only two instructions 'adc' are changed to 'sbb'
*/
// the asm code is nearly the same as in AddVector
// only two instructions 'adc' are changed to 'sbb'
uint dummy1, dummy2, dummy3;
__asm__ __volatile__(
@ -556,6 +645,27 @@ namespace ttmath
return c;
}
#else
/* temporarily */
template<uint value_size>
uint UInt<value_size>::SubVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result)
{
uint i, c = 0;
TTMATH_ASSERT( ss1_size >= ss2_size )
for(i=0 ; i<ss2_size ; ++i)
c = SubTwoWords(ss1[i], ss2[i], c, &result[i]);
for( ; i<ss1_size ; ++i)
c = SubTwoWords(ss1[i], 0, c, &result[i]);
TTMATH_LOG("UInt::SubVector")
return c;
}
#endif
/*!
@ -579,10 +689,16 @@ namespace ttmath
uint * p1 = table;
#ifndef __GNUC__
#error "another compiler than GCC is currently not supported in 64bit mode"
#if !defined(__GNUC__) && !defined(_MSC_VER)
#error "another compiler than GCC or Microsoft VC is currently not supported in 64bit mode, you can compile with TTMATH_NOASM macro"
#endif
#ifdef _MSC_VER
c = rcl_x64(p1,b,c);
#endif
#ifdef __GNUC__
uint dummy, dummy2;
@ -633,10 +749,16 @@ namespace ttmath
uint * p1 = table;
#ifndef __GNUC__
#error "another compiler than GCC is currently not supported in 64bit mode"
#if !defined(__GNUC__) && !defined(_MSC_VER)
#error "another compiler than GCC or Microsoft VC is currently not supported in 64bit mode, you can compile with TTMATH_NOASM macro"
#endif
#ifdef _MSC_VER
c = rcr_x64(p1,b,c);
#endif
#ifdef __GNUC__
uint dummy;
@ -688,10 +810,16 @@ namespace ttmath
uint * p1 = table;
#ifndef __GNUC__
#error "another compiler than GCC is currently not supported in 64bit mode"
#if !defined(__GNUC__) && !defined(_MSC_VER)
#error "another compiler than GCC or Microsoft VC is currently not supported in 64bit mode, you can compile with TTMATH_NOASM macro"
#endif
#ifdef _MSC_VER
c = rcl2_x64(p1,b,bits,c);
#endif
#ifdef __GNUC__
uint dummy, dummy2, dummy3;
@ -758,14 +886,20 @@ namespace ttmath
sint b = value_size;
uint * p1 = table;
uint dummy, dummy2, dummy3;
#ifndef __GNUC__
#error "another compiler than GCC is currently not supported in 64bit mode"
#if !defined(__GNUC__) && !defined(_MSC_VER)
#error "another compiler than GCC or Microsoft VC is currently not supported in 64bit mode, you can compile with TTMATH_NOASM macro"
#endif
#ifdef _MSC_VER
c = rcr2_x64(p1,b,bits,c);
#endif
#ifdef __GNUC__
uint dummy, dummy2, dummy3;
__asm__ __volatile__(
"movq %%rcx, %%rsi \n"
@ -823,10 +957,23 @@ namespace ttmath
sint result;
#ifndef __GNUC__
#error "another compiler than GCC is currently not supported in 64bit mode"
#if !defined(__GNUC__) && !defined(_MSC_VER)
#error "another compiler than GCC or Microsoft VC is currently not supported in 64bit mode, you can compile with TTMATH_NOASM macro"
#endif
#ifdef _MSC_VER
unsigned long nIndex = 0;
if( _BitScanReverse64(&nIndex,x) == 0 )
result = -1;
else
result = nIndex;
#endif
#ifdef __GNUC__
uint dummy;
@ -868,11 +1015,16 @@ namespace ttmath
uint old_bit;
uint v = value;
#ifndef __GNUC__
#error "another compiler than GCC is currently not supported in 64bit mode"
#if !defined(__GNUC__) && !defined(_MSC_VER)
#error "another compiler than GCC or Microsoft VC is currently not supported in 64bit mode, you can compile with TTMATH_NOASM macro"
#endif
#ifdef _MSC_VER
old_bit = _bittestandset64((__int64*)&value,bit) != 0;
#endif
#ifdef __GNUC__
__asm__ (
@ -924,10 +1076,16 @@ namespace ttmath
uint result1_;
uint result2_;
#ifndef __GNUC__
#error "another compiler than GCC is currently not supported in 64bit mode"
#if !defined(__GNUC__) && !defined(_MSC_VER)
#error "another compiler than GCC or Microsoft VC is currently not supported in 64bit mode, you can compile with TTMATH_NOASM macro"
#endif
#ifdef _MSC_VER
result1_ = _umul128(a,b,&result2_);
#endif
#ifdef __GNUC__
__asm__ (
@ -981,10 +1139,20 @@ namespace ttmath
TTMATH_ASSERT( c != 0 )
#ifndef __GNUC__
#error "another compiler than GCC is currently not supported in 64bit mode"
#if !defined(__GNUC__) && !defined(_MSC_VER)
#error "another compiler than GCC or Microsoft VC is currently not supported in 64bit mode, you can compile with TTMATH_NOASM macro"
#endif
#ifdef _MSC_VER
div_x64(&a,&b,c);
r_ = a;
rest_ = b;
#endif
#ifdef __GNUC__
__asm__ (
@ -1003,6 +1171,59 @@ namespace ttmath
}
/* temporarily */
template<uint value_size>
uint UInt<value_size>::AddTwoWords(uint a, uint b, uint carry, uint * result)
{
uint temp;
if( carry == 0 )
{
temp = a + b;
if( temp < a )
carry = 1;
}
else
{
carry = 1;
temp = a + b + carry;
if( temp > a ) // !(temp<=a)
carry = 0;
}
*result = temp;
return carry;
}
/* temporarily */
template<uint value_size>
uint UInt<value_size>::SubTwoWords(uint a, uint b, uint carry, uint * result)
{
if( carry == 0 )
{
*result = a - b;
if( a < b )
carry = 1;
}
else
{
carry = 1;
*result = a - b - carry;
if( a > b ) // !(a <= b )
carry = 0;
}
return carry;
}
} //namespace

View File

@ -0,0 +1,430 @@
;
; This file is a part of TTMath Bignum Library
; and is distributed under the (new) BSD licence.
; Author: Christian Kaiser <>
;
;
; Copyright (c) 2009, Christian Kaiser
; All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions are met:
;
; * Redistributions of source code must retain the above copyright notice,
; this list of conditions and the following disclaimer.
;
; * Redistributions in binary form must reproduce the above copyright
; notice, this list of conditions and the following disclaimer in the
; documentation and/or other materials provided with the distribution.
;
; * Neither the name Tomasz Sowa nor the names of contributors to this
; project may be used to endorse or promote products derived
; from this software without specific prior written permission.
;
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
; AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
; ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
; LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
; CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
; SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
; CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
; ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
; THE POSSIBILITY OF SUCH DAMAGE.
;
;
; compile with debug info: ml64.exe /Zd /Zi ttmathuint_x86_64_msvc.asm
; compile without debug info: ml64.exe ttmathuint_x86_64_msvc.asm
; this create ttmathuint_x86_64_msvc.obj file which can be linked with your program
;
PUBLIC adc_x64
PUBLIC addindexed_x64
PUBLIC addindexed2_x64
PUBLIC sbb_x64
PUBLIC subindexed_x64
PUBLIC rcl_x64
PUBLIC rcr_x64
PUBLIC rcl2_x64
PUBLIC rcr2_x64
PUBLIC div_x64
;
; "rax, rcx, rdx, r8-r11 are volatile."
; "rbx, rbp, rdi, rsi, r12-r15 are nonvolatile."
;
.CODE
ALIGN 8
;----------------------------------------
adc_x64 PROC
; rcx = p1
; rdx = p2
; r8 = nSize
; r9 = nCarry
xor rax, rax
xor r11, r11
sub rax, r9 ; sets CARRY if r9 != 0
ALIGN 16
loop1:
mov rax,qword ptr [rdx + r11 * 8]
adc qword ptr [rcx + r11 * 8], rax
lea r11, [r11+1]
dec r8
jnz loop1
setc al
movzx rax, al
ret
adc_x64 ENDP
;----------------------------------------
ALIGN 8
;----------------------------------------
addindexed_x64 PROC
; rcx = p1
; rdx = nSize
; r8 = nPos
; r9 = nValue
xor rax, rax ; rax = result
sub rdx, r8 ; rdx = remaining count of uints
add qword ptr [rcx + r8 * 8], r9
jc next1
ret
next1:
mov r9, 1
ALIGN 16
loop1:
dec rdx
jz done_with_cy
lea r8, [r8+1]
add qword ptr [rcx + r8 * 8], r9
jc loop1
ret
done_with_cy:
lea rax, [rax+1] ; rax = 1
ret
addindexed_x64 ENDP
;----------------------------------------
ALIGN 8
;----------------------------------------
addindexed2_x64 PROC
; rcx = p1 (pointer)
; rdx = b (value size)
; r8 = nPos
; r9 = nValue1
; [esp+0x28] = nValue2
xor rax, rax ; return value
mov r11, rcx ; table
sub rdx, r8 ; rdx = remaining count of uints
mov r10, [esp+028h] ; r10 = nValue2
add qword ptr [r11 + r8 * 8], r9
lea r8, [r8+1]
lea rdx, [rdx-1]
adc qword ptr [r11 + r8 * 8], r10
jc next
ret
ALIGN 16
loop1:
lea r8, [r8+1]
add qword ptr [r11 + r8 * 8], 1
jc next
ret
next:
dec rdx ; does not modify CY too...
jnz loop1
lea rax, [rax+1]
ret
addindexed2_x64 ENDP
;----------------------------------------
ALIGN 8
;----------------------------------------
sbb_x64 PROC
; rcx = p1
; rdx = p2
; r8 = nCount
; r9 = nCarry
xor rax, rax
xor r11, r11
sub rax, r9 ; sets CARRY if r9 != 0
ALIGN 16
loop1:
mov rax,qword ptr [rdx + r11 * 8]
sbb qword ptr [rcx + r11 * 8], rax
lea r11, [r11+1]
dec r8
jnz loop1
setc al
movzx rax, al
ret
sbb_x64 ENDP
;----------------------------------------
ALIGN 8
;----------------------------------------
subindexed_x64 PROC
; rcx = p1
; rdx = nSize
; r8 = nPos
; r9 = nValue
sub rdx, r8 ; rdx = remaining count of uints
ALIGN 16
loop1:
sub qword ptr [rcx + r8 * 8], r9
jnc done
lea r8, [r8+1]
mov r9, 1
dec rdx
jnz loop1
jc return_1 ; most of the times, there will be NO carry (I hope)
done:
xor rax, rax
ret
return_1:
mov rax, 1
ret
subindexed_x64 ENDP
;----------------------------------------
ALIGN 8
;----------------------------------------
rcl_x64 PROC
; rcx = p1
; rdx = b
; r8 = nLowestBit
mov r11, rcx ; table
xor r10, r10
neg r8 ; CY set if r8 <> 0
ALIGN 16
loop1:
rcl qword ptr [r11 + r10 * 8], 1
lea r10, [r10+1]
dec rdx
jnz loop1
setc al
movzx rax, al
ret
rcl_x64 ENDP
;----------------------------------------
ALIGN 8
;----------------------------------------
rcr_x64 PROC
; rcx = p1
; rdx = nSize
; r8 = nLowestBit
xor r10, r10
neg r8 ; CY set if r8 <> 0
ALIGN 16
loop1:
rcr qword ptr -8[rcx + rdx * 8], 1
dec rdx
jnz loop1
setc al
movzx rax, al
ret
rcr_x64 ENDP
;----------------------------------------
ALIGN 8
;----------------------------------------
div_x64 PROC
; rcx = &Hi
; rdx = &Lo
; r8 = nDiv
mov r11, rcx
mov r10, rdx
mov rdx, qword ptr [r11]
mov rax, qword ptr [r10]
div r8
mov qword ptr [r10], rdx ; remainder
mov qword ptr [r11], rax ; value
ret
div_x64 ENDP
;----------------------------------------
ALIGN 8
;----------------------------------------
rcl2_x64 PROC
; rcx = p1
; rdx = nSize
; r8 = bits
; r9 = c
push rbx
mov r10, rcx ; r10 = p1
xor rax, rax
mov rcx, 64
sub rcx, r8
mov r11, -1
shr r11, cl ; r11 = mask
mov rcx, r8 ; rcx = count of bits
mov rbx, rax ; rbx = old value = 0
or r9, r9
cmovnz rbx, r11 ; if (c) then old value = mask
mov r9, rax ; r9 = index (0..nSize-1)
ALIGN 16
loop1:
rol qword ptr [r10+r9*8], cl
mov rax, qword ptr [r10+r9*8]
and rax, r11
xor qword ptr [r10+r9*8], rax
or qword ptr [r10+r9*8], rbx
mov rbx, rax
lea r9, [r9+1]
dec rdx
jnz loop1
and rax, 1
pop rbx
ret
rcl2_x64 ENDP
;----------------------------------------
ALIGN 8
;----------------------------------------
rcr2_x64 PROC
; rcx = p1
; rdx = nSize
; r8 = bits
; r9 = c
push rbx
mov r10, rcx ; r10 = p1
xor rax, rax
mov rcx, 64
sub rcx, r8
mov r11, -1
shl r11, cl ; r11 = mask
mov rcx, r8 ; rcx = count of bits
mov rbx, rax ; rbx = old value = 0
or r9, r9
cmovnz rbx, r11 ; if (c) then old value = mask
mov r9, rdx ; r9 = index (0..nSize-1)
lea r9, [r9-1]
ALIGN 16
loop1:
ror qword ptr [r10+r9*8], cl
mov rax, qword ptr [r10+r9*8]
and rax, r11
xor qword ptr [r10+r9*8], rax
or qword ptr [r10+r9*8], rbx
mov rbx, rax
lea r9, [r9-1]
dec rdx
jnz loop1
rol rax, 1
and rax, 1
pop rbx
ret
rcr2_x64 ENDP
END