merged: x86_64 asm code for Microsoft Visual compiler

file: ttmathuint_x86_64_msvc.asm from chk branch (original was: ttmathuint_x86_amd64_msvc.asm)
        (this file should be compiled first because MS VC doesn't support inline assembler in x86_64 mode) 



git-svn-id: svn://ttmath.org/publicrep/ttmath/trunk@187 e52654a7-88a9-db11-a3e9-0013d4bc506e
This commit is contained in:
Tomasz Sowa 2009-09-07 02:03:00 +00:00
parent 0d71b0cec2
commit 28964d30f7
6 changed files with 780 additions and 109 deletions

View File

@ -1,4 +1,4 @@
Version 0.9.0 prerelease (2009.09.05): Version 0.9.0 prerelease (2009.09.07):
* added: support for wide characters (wchar_t) * added: support for wide characters (wchar_t)
wide characters are used when macro TTMATH_USE_WCHAR is defined wide characters are used when macro TTMATH_USE_WCHAR is defined
this macro is defined automatically when there is macro UNICODE or _UNICODE defined this macro is defined automatically when there is macro UNICODE or _UNICODE defined
@ -22,6 +22,9 @@ Version 0.9.0 prerelease (2009.09.05):
and use TTMATH_MULTITHREADS_HELPER macro somewhere in your *.cpp file and use TTMATH_MULTITHREADS_HELPER macro somewhere in your *.cpp file
* added: Big::AboutEqual(const Big<exp,man> & ss2, int nBitsToIgnore = 4) * added: Big::AboutEqual(const Big<exp,man> & ss2, int nBitsToIgnore = 4)
the last nBitsToIgnore bits from mantissas will be skipped when comparing the last nBitsToIgnore bits from mantissas will be skipped when comparing
* added: x86_64 asm code for Microsoft Visual compiler
file: ttmathuint_x86_64_msvc.asm
(this file should be compiled first because MS VC doesn't support inline assembler in x86_64 mode)
* changed: Factorial() is using the Gamma() function now * changed: Factorial() is using the Gamma() function now
* removed: Parser<>::SetFactorialMax() method * removed: Parser<>::SetFactorialMax() method
the factorial() is such a fast now that we don't need the method longer the factorial() is such a fast now that we don't need the method longer

View File

@ -3916,75 +3916,75 @@ public:
} }
bool AboutEqual(const Big<exp,man> & ss2, int nBitsToIgnore = 4) const bool AboutEqual(const Big<exp,man> & ss2, int nBitsToIgnore = 4) const
{ {
// we should check the mantissas beforehand because sometimes we can have // we should check the mantissas beforehand because sometimes we can have
// a mantissa set to zero but in the exponent something another value // a mantissa set to zero but in the exponent something another value
// (maybe we've forgotten about calling CorrectZero() ?) // (maybe we've forgotten about calling CorrectZero() ?)
if( mantissa.IsZero() ) if( mantissa.IsZero() )
{ {
if( ss2.mantissa.IsZero() ) if( ss2.mantissa.IsZero() )
return true; return true;
return(ss2.AboutEqual(*this,nBitsToIgnore)); return(ss2.AboutEqual(*this,nBitsToIgnore));
} }
if( ss2.mantissa.IsZero() ) if( ss2.mantissa.IsZero() )
{ {
return(this->exponent <= uint(2*(-sint(man*TTMATH_BITS_PER_UINT))+nBitsToIgnore)); return(this->exponent <= uint(2*(-sint(man*TTMATH_BITS_PER_UINT))+nBitsToIgnore));
} }
// exponents may not differ much! // exponents may not differ much!
ttmath::Int<exp> expdiff(this->exponent - ss2.exponent); ttmath::Int<exp> expdiff(this->exponent - ss2.exponent);
// they may differ one if for example mantissa1=0x80000000, mantissa2=0xffffffff // they may differ one if for example mantissa1=0x80000000, mantissa2=0xffffffff
if( ttmath::Abs(expdiff) > 1 ) if( ttmath::Abs(expdiff) > 1 )
return(false); return(false);
// calculate the 'difference' mantissa // calculate the 'difference' mantissa
ttmath::UInt<man> man1(this->mantissa); ttmath::UInt<man> man1(this->mantissa);
ttmath::UInt<man> man2(ss2.mantissa); ttmath::UInt<man> man2(ss2.mantissa);
ttmath::UInt<man> mandiff; ttmath::UInt<man> mandiff;
switch( expdiff.ToInt() ) switch( expdiff.ToInt() )
{ {
case +1: case +1:
man2.Rcr(1,0); man2.Rcr(1,0);
mandiff = man1; mandiff = man1;
mandiff.Sub(man2); mandiff.Sub(man2);
break; break;
case -1: case -1:
man1.Rcr(1,0); man1.Rcr(1,0);
mandiff = man2; mandiff = man2;
mandiff.Sub(man1); mandiff.Sub(man1);
break; break;
default: default:
if( man2 > man1 ) if( man2 > man1 )
{ {
mandiff = man2; mandiff = man2;
mandiff.Sub(man1); mandiff.Sub(man1);
} }
else else
{ {
mandiff = man1; mandiff = man1;
mandiff.Sub(man2); mandiff.Sub(man2);
} }
break; break;
} }
// faster to mask the bits! // faster to mask the bits!
TTMATH_ASSERT( nBitsToIgnore < TTMATH_BITS_PER_UINT ); TTMATH_ASSERT( nBitsToIgnore < TTMATH_BITS_PER_UINT );
for( int n = man-1; n > 0; --n ) for( int n = man-1; n > 0; --n )
{ {
if( mandiff.table[n] != 0 ) if( mandiff.table[n] != 0 )
return(false); return(false);
} }
uint nMask = ~((1 << nBitsToIgnore) - 1); uint nMask = ~((1 << nBitsToIgnore) - 1);
return((mandiff.table[0] & nMask) == 0); return((mandiff.table[0] & nMask) == 0);
} }
bool operator<(const Big<exp,man> & ss2) const bool operator<(const Big<exp,man> & ss2) const

View File

@ -162,8 +162,14 @@ namespace ttmath
/*! /*!
on 64bit platforms one word (uint, sint) will be equal 64bits on 64bit platforms one word (uint, sint) will be equal 64bits
*/ */
typedef unsigned long uint; #ifdef _MSC_VER
typedef signed long sint; /* in VC 'long' type has 32 bits, __int64 is VC extension */
typedef unsigned __int64 uint;
typedef signed __int64 sint;
#else
typedef unsigned long uint;
typedef signed long sint;
#endif
/*! /*!
on 64bit platform we do not define ulint on 64bit platform we do not define ulint

View File

@ -3297,6 +3297,17 @@ public:
static uint SetBitInWord(uint & value, uint bit); static uint SetBitInWord(uint & value, uint bit);
static void MulTwoWords(uint a, uint b, uint * result_high, uint * result_low); static void MulTwoWords(uint a, uint b, uint * result_high, uint * result_low);
static void DivTwoWords(uint a,uint b, uint c, uint * r, uint * rest); static void DivTwoWords(uint a,uint b, uint c, uint * r, uint * rest);
/* temporarily */
#ifndef TTMATH_NOASM
#ifdef TTMATH_PLATFORM64
#ifdef _MSC_VER
static uint AddTwoWords(uint a, uint b, uint carry, uint * result);
static uint SubTwoWords(uint a, uint b, uint carry, uint * result);
#endif
#endif
#endif
}; };

View File

@ -51,10 +51,33 @@
this file is included at the end of ttmathuint.h this file is included at the end of ttmathuint.h
*/ */
#ifdef _MSC_VER
#include <intrin.h>
#endif
namespace ttmath namespace ttmath
{ {
#ifdef _MSC_VER
extern "C"
{
uint __fastcall adc_x64(uint* p1, const uint* p2, uint nSize, uint c);
uint __fastcall addindexed_x64(uint* p1, uint nSize, uint nPos, uint nValue);
uint __fastcall addindexed2_x64(uint* p1, uint nSize, uint nPos, uint nValue1, uint nValue2);
uint __fastcall sbb_x64(uint* p1, const uint* p2, uint nSize, uint c);
uint __fastcall subindexed_x64(uint* p1, uint nSize, uint nPos, uint nValue);
uint __fastcall rcl_x64(uint* p1, uint nSize, uint nLowestBit);
uint __fastcall rcr_x64(uint* p1, uint nSize, uint nLowestBit);
uint __fastcall div_x64(uint* pnValHi, uint* pnValLo, uint nDiv);
uint __fastcall rcl2_x64(uint* p1, uint nSize, uint nBits, uint c);
uint __fastcall rcr2_x64(uint* p1, uint nSize, uint nBits, uint c);
};
#endif
/*! /*!
* *
* basic mathematic functions * basic mathematic functions
@ -82,8 +105,12 @@ namespace ttmath
// we don't have to use TTMATH_REFERENCE_ASSERT here // we don't have to use TTMATH_REFERENCE_ASSERT here
// this algorithm doesn't require it // this algorithm doesn't require it
#ifndef __GNUC__ #if !defined(__GNUC__) && !defined(_MSC_VER)
#error "another compiler than GCC is currently not supported in 64bit mode" #error "another compiler than GCC or Microsoft VC is currently not supported in 64bit mode, you can compile with TTMATH_NOASM macro"
#endif
#ifdef _MSC_VER
c = adc_x64(p1,p2,b,c);
#endif #endif
#ifdef __GNUC__ #ifdef __GNUC__
@ -149,10 +176,16 @@ namespace ttmath
TTMATH_ASSERT( index < value_size ) TTMATH_ASSERT( index < value_size )
#ifndef __GNUC__ #if !defined(__GNUC__) && !defined(_MSC_VER)
#error "another compiler than GCC is currently not supported in 64bit mode" #error "another compiler than GCC or Microsoft VC is currently not supported in 64bit mode, you can compile with TTMATH_NOASM macro"
#endif #endif
#ifdef _MSC_VER
c = addindexed_x64(p1,b,index,value);
#endif
#ifdef __GNUC__ #ifdef __GNUC__
uint dummy, dummy2; uint dummy, dummy2;
@ -227,10 +260,16 @@ namespace ttmath
TTMATH_ASSERT( index < value_size - 1 ) TTMATH_ASSERT( index < value_size - 1 )
#ifndef __GNUC__ #if !defined(__GNUC__) && !defined(_MSC_VER)
#error "another compiler than GCC is currently not supported in 64bit mode" #error "another compiler than GCC or Microsoft VC is currently not supported in 64bit mode, you can compile with TTMATH_NOASM macro"
#endif #endif
#ifdef _MSC_VER
c = addindexed2_x64(p1,b,index,x1,x2);
#endif
#ifdef __GNUC__ #ifdef __GNUC__
uint dummy, dummy2; uint dummy, dummy2;
@ -288,6 +327,9 @@ namespace ttmath
of course the carry is propagated and will be returned from the last item of course the carry is propagated and will be returned from the last item
(this method is used by the Karatsuba multiplication algorithm) (this method is used by the Karatsuba multiplication algorithm)
*/ */
#ifndef _MSC_VER
template<uint value_size> template<uint value_size>
uint UInt<value_size>::AddVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result) uint UInt<value_size>::AddVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result)
{ {
@ -296,10 +338,16 @@ namespace ttmath
uint rest = ss1_size - ss2_size; uint rest = ss1_size - ss2_size;
uint c; uint c;
#ifndef __GNUC__ #if !defined(__GNUC__) && !defined(_MSC_VER)
#error "another compiler than GCC is currently not supported in 64bit mode" #error "another compiler than GCC or Microsoft VC is currently not supported in 64bit mode, you can compile with TTMATH_NOASM macro"
#endif #endif
#ifdef _MSC_VER
#endif
#ifdef __GNUC__ #ifdef __GNUC__
uint dummy1, dummy2, dummy3; uint dummy1, dummy2, dummy3;
@ -348,8 +396,27 @@ namespace ttmath
return c; return c;
} }
#else
/* temporarily */
template<uint value_size>
uint UInt<value_size>::AddVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result)
{
uint i, c = 0;
TTMATH_ASSERT( ss1_size >= ss2_size )
for(i=0 ; i<ss2_size ; ++i)
c = AddTwoWords(ss1[i], ss2[i], c, &result[i]);
for( ; i<ss1_size ; ++i)
c = AddTwoWords(ss1[i], 0, c, &result[i]);
TTMATH_LOG("UInt::AddVector")
return c;
}
#endif
/*! /*!
@ -373,10 +440,16 @@ namespace ttmath
// we don't have to use TTMATH_REFERENCE_ASSERT here // we don't have to use TTMATH_REFERENCE_ASSERT here
// this algorithm doesn't require it // this algorithm doesn't require it
#ifndef __GNUC__ #if !defined(__GNUC__) && !defined(_MSC_VER)
#error "another compiler than GCC is currently not supported in 64bit mode" #error "another compiler than GCC or Microsoft VC is currently not supported in 64bit mode, you can compile with TTMATH_NOASM macro"
#endif #endif
#ifdef _MSC_VER
c = sbb_x64(p1,p2,b,c);
#endif
#ifdef __GNUC__ #ifdef __GNUC__
uint dummy, dummy2; uint dummy, dummy2;
@ -432,15 +505,22 @@ namespace ttmath
uint b = value_size; uint b = value_size;
uint * p1 = table; uint * p1 = table;
uint c; uint c;
uint dummy, dummy2;
TTMATH_ASSERT( index < value_size ) TTMATH_ASSERT( index < value_size )
#ifndef __GNUC__ #if !defined(__GNUC__) && !defined(_MSC_VER)
#error "another compiler than GCC is currently not supported in 64bit mode" #error "another compiler than GCC or Microsoft VC is currently not supported in 64bit mode, you can compile with TTMATH_NOASM macro"
#endif #endif
#ifdef _MSC_VER
c = subindexed_x64(p1,b,index,value);
#endif
#ifdef __GNUC__ #ifdef __GNUC__
uint dummy, dummy2;
__asm__ __volatile__( __asm__ __volatile__(
"subq %%rdx, %%rcx \n" "subq %%rdx, %%rcx \n"
@ -493,6 +573,9 @@ namespace ttmath
of course the carry (borrow) is propagated and will be returned from the last item of course the carry (borrow) is propagated and will be returned from the last item
(this method is used by the Karatsuba multiplication algorithm) (this method is used by the Karatsuba multiplication algorithm)
*/ */
#ifndef _MSC_VER
template<uint value_size> template<uint value_size>
uint UInt<value_size>::SubVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result) uint UInt<value_size>::SubVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result)
{ {
@ -501,16 +584,22 @@ namespace ttmath
uint rest = ss1_size - ss2_size; uint rest = ss1_size - ss2_size;
uint c; uint c;
#ifndef __GNUC__ #if !defined(__GNUC__) && !defined(_MSC_VER)
#error "another compiler than GCC is currently not supported in 64bit mode" #error "another compiler than GCC or Microsoft VC is currently not supported in 64bit mode, you can compile with TTMATH_NOASM macro"
#endif #endif
#ifdef _MSC_VER
#endif
#ifdef __GNUC__ #ifdef __GNUC__
/*
the asm code is nearly the same as in AddVector // the asm code is nearly the same as in AddVector
only two instructions 'adc' are changed to 'sbb' // only two instructions 'adc' are changed to 'sbb'
*/
uint dummy1, dummy2, dummy3; uint dummy1, dummy2, dummy3;
__asm__ __volatile__( __asm__ __volatile__(
@ -556,6 +645,27 @@ namespace ttmath
return c; return c;
} }
#else
/* temporarily */
template<uint value_size>
uint UInt<value_size>::SubVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result)
{
uint i, c = 0;
TTMATH_ASSERT( ss1_size >= ss2_size )
for(i=0 ; i<ss2_size ; ++i)
c = SubTwoWords(ss1[i], ss2[i], c, &result[i]);
for( ; i<ss1_size ; ++i)
c = SubTwoWords(ss1[i], 0, c, &result[i]);
TTMATH_LOG("UInt::SubVector")
return c;
}
#endif
/*! /*!
@ -579,10 +689,16 @@ namespace ttmath
uint * p1 = table; uint * p1 = table;
#ifndef __GNUC__ #if !defined(__GNUC__) && !defined(_MSC_VER)
#error "another compiler than GCC is currently not supported in 64bit mode" #error "another compiler than GCC or Microsoft VC is currently not supported in 64bit mode, you can compile with TTMATH_NOASM macro"
#endif #endif
#ifdef _MSC_VER
c = rcl_x64(p1,b,c);
#endif
#ifdef __GNUC__ #ifdef __GNUC__
uint dummy, dummy2; uint dummy, dummy2;
@ -633,10 +749,16 @@ namespace ttmath
uint * p1 = table; uint * p1 = table;
#ifndef __GNUC__ #if !defined(__GNUC__) && !defined(_MSC_VER)
#error "another compiler than GCC is currently not supported in 64bit mode" #error "another compiler than GCC or Microsoft VC is currently not supported in 64bit mode, you can compile with TTMATH_NOASM macro"
#endif #endif
#ifdef _MSC_VER
c = rcr_x64(p1,b,c);
#endif
#ifdef __GNUC__ #ifdef __GNUC__
uint dummy; uint dummy;
@ -688,10 +810,16 @@ namespace ttmath
uint * p1 = table; uint * p1 = table;
#ifndef __GNUC__ #if !defined(__GNUC__) && !defined(_MSC_VER)
#error "another compiler than GCC is currently not supported in 64bit mode" #error "another compiler than GCC or Microsoft VC is currently not supported in 64bit mode, you can compile with TTMATH_NOASM macro"
#endif #endif
#ifdef _MSC_VER
c = rcl2_x64(p1,b,bits,c);
#endif
#ifdef __GNUC__ #ifdef __GNUC__
uint dummy, dummy2, dummy3; uint dummy, dummy2, dummy3;
@ -758,14 +886,20 @@ namespace ttmath
sint b = value_size; sint b = value_size;
uint * p1 = table; uint * p1 = table;
uint dummy, dummy2, dummy3;
#ifndef __GNUC__ #if !defined(__GNUC__) && !defined(_MSC_VER)
#error "another compiler than GCC is currently not supported in 64bit mode" #error "another compiler than GCC or Microsoft VC is currently not supported in 64bit mode, you can compile with TTMATH_NOASM macro"
#endif
#ifdef _MSC_VER
c = rcr2_x64(p1,b,bits,c);
#endif #endif
#ifdef __GNUC__ #ifdef __GNUC__
uint dummy, dummy2, dummy3;
__asm__ __volatile__( __asm__ __volatile__(
"movq %%rcx, %%rsi \n" "movq %%rcx, %%rsi \n"
@ -823,10 +957,23 @@ namespace ttmath
sint result; sint result;
#ifndef __GNUC__ #if !defined(__GNUC__) && !defined(_MSC_VER)
#error "another compiler than GCC is currently not supported in 64bit mode" #error "another compiler than GCC or Microsoft VC is currently not supported in 64bit mode, you can compile with TTMATH_NOASM macro"
#endif #endif
#ifdef _MSC_VER
unsigned long nIndex = 0;
if( _BitScanReverse64(&nIndex,x) == 0 )
result = -1;
else
result = nIndex;
#endif
#ifdef __GNUC__ #ifdef __GNUC__
uint dummy; uint dummy;
@ -868,11 +1015,16 @@ namespace ttmath
uint old_bit; uint old_bit;
uint v = value; uint v = value;
#if !defined(__GNUC__) && !defined(_MSC_VER)
#ifndef __GNUC__ #error "another compiler than GCC or Microsoft VC is currently not supported in 64bit mode, you can compile with TTMATH_NOASM macro"
#error "another compiler than GCC is currently not supported in 64bit mode"
#endif #endif
#ifdef _MSC_VER
old_bit = _bittestandset64((__int64*)&value,bit) != 0;
#endif
#ifdef __GNUC__ #ifdef __GNUC__
__asm__ ( __asm__ (
@ -924,10 +1076,16 @@ namespace ttmath
uint result1_; uint result1_;
uint result2_; uint result2_;
#ifndef __GNUC__ #if !defined(__GNUC__) && !defined(_MSC_VER)
#error "another compiler than GCC is currently not supported in 64bit mode" #error "another compiler than GCC or Microsoft VC is currently not supported in 64bit mode, you can compile with TTMATH_NOASM macro"
#endif #endif
#ifdef _MSC_VER
result1_ = _umul128(a,b,&result2_);
#endif
#ifdef __GNUC__ #ifdef __GNUC__
__asm__ ( __asm__ (
@ -981,10 +1139,20 @@ namespace ttmath
TTMATH_ASSERT( c != 0 ) TTMATH_ASSERT( c != 0 )
#ifndef __GNUC__ #if !defined(__GNUC__) && !defined(_MSC_VER)
#error "another compiler than GCC is currently not supported in 64bit mode" #error "another compiler than GCC or Microsoft VC is currently not supported in 64bit mode, you can compile with TTMATH_NOASM macro"
#endif #endif
#ifdef _MSC_VER
div_x64(&a,&b,c);
r_ = a;
rest_ = b;
#endif
#ifdef __GNUC__ #ifdef __GNUC__
__asm__ ( __asm__ (
@ -1003,6 +1171,59 @@ namespace ttmath
} }
/* temporarily */
template<uint value_size>
uint UInt<value_size>::AddTwoWords(uint a, uint b, uint carry, uint * result)
{
uint temp;
if( carry == 0 )
{
temp = a + b;
if( temp < a )
carry = 1;
}
else
{
carry = 1;
temp = a + b + carry;
if( temp > a ) // !(temp<=a)
carry = 0;
}
*result = temp;
return carry;
}
/* temporarily */
template<uint value_size>
uint UInt<value_size>::SubTwoWords(uint a, uint b, uint carry, uint * result)
{
if( carry == 0 )
{
*result = a - b;
if( a < b )
carry = 1;
}
else
{
carry = 1;
*result = a - b - carry;
if( a > b ) // !(a <= b )
carry = 0;
}
return carry;
}
} //namespace } //namespace

View File

@ -0,0 +1,430 @@
;
; This file is a part of TTMath Bignum Library
; and is distributed under the (new) BSD licence.
; Author: Christian Kaiser <>
;
;
; Copyright (c) 2009, Christian Kaiser
; All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions are met:
;
; * Redistributions of source code must retain the above copyright notice,
; this list of conditions and the following disclaimer.
;
; * Redistributions in binary form must reproduce the above copyright
; notice, this list of conditions and the following disclaimer in the
; documentation and/or other materials provided with the distribution.
;
; * Neither the name Tomasz Sowa nor the names of contributors to this
; project may be used to endorse or promote products derived
; from this software without specific prior written permission.
;
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
; AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
; ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
; LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
; CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
; SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
; CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
; ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
; THE POSSIBILITY OF SUCH DAMAGE.
;
;
; compile with debug info: ml64.exe /Zd /Zi ttmathuint_x86_64_msvc.asm
; compile without debug info: ml64.exe ttmathuint_x86_64_msvc.asm
; this create ttmathuint_x86_64_msvc.obj file which can be linked with your program
;
PUBLIC adc_x64
PUBLIC addindexed_x64
PUBLIC addindexed2_x64
PUBLIC sbb_x64
PUBLIC subindexed_x64
PUBLIC rcl_x64
PUBLIC rcr_x64
PUBLIC rcl2_x64
PUBLIC rcr2_x64
PUBLIC div_x64
;
; "rax, rcx, rdx, r8-r11 are volatile."
; "rbx, rbp, rdi, rsi, r12-r15 are nonvolatile."
;
.CODE
ALIGN 8
;----------------------------------------
adc_x64 PROC
; rcx = p1
; rdx = p2
; r8 = nSize
; r9 = nCarry
xor rax, rax
xor r11, r11
sub rax, r9 ; sets CARRY if r9 != 0
ALIGN 16
loop1:
mov rax,qword ptr [rdx + r11 * 8]
adc qword ptr [rcx + r11 * 8], rax
lea r11, [r11+1]
dec r8
jnz loop1
setc al
movzx rax, al
ret
adc_x64 ENDP
;----------------------------------------
ALIGN 8
;----------------------------------------
addindexed_x64 PROC
; rcx = p1
; rdx = nSize
; r8 = nPos
; r9 = nValue
xor rax, rax ; rax = result
sub rdx, r8 ; rdx = remaining count of uints
add qword ptr [rcx + r8 * 8], r9
jc next1
ret
next1:
mov r9, 1
ALIGN 16
loop1:
dec rdx
jz done_with_cy
lea r8, [r8+1]
add qword ptr [rcx + r8 * 8], r9
jc loop1
ret
done_with_cy:
lea rax, [rax+1] ; rax = 1
ret
addindexed_x64 ENDP
;----------------------------------------
ALIGN 8
;----------------------------------------
addindexed2_x64 PROC
; rcx = p1 (pointer)
; rdx = b (value size)
; r8 = nPos
; r9 = nValue1
; [esp+0x28] = nValue2
xor rax, rax ; return value
mov r11, rcx ; table
sub rdx, r8 ; rdx = remaining count of uints
mov r10, [esp+028h] ; r10 = nValue2
add qword ptr [r11 + r8 * 8], r9
lea r8, [r8+1]
lea rdx, [rdx-1]
adc qword ptr [r11 + r8 * 8], r10
jc next
ret
ALIGN 16
loop1:
lea r8, [r8+1]
add qword ptr [r11 + r8 * 8], 1
jc next
ret
next:
dec rdx ; does not modify CY too...
jnz loop1
lea rax, [rax+1]
ret
addindexed2_x64 ENDP
;----------------------------------------
ALIGN 8
;----------------------------------------
sbb_x64 PROC
; rcx = p1
; rdx = p2
; r8 = nCount
; r9 = nCarry
xor rax, rax
xor r11, r11
sub rax, r9 ; sets CARRY if r9 != 0
ALIGN 16
loop1:
mov rax,qword ptr [rdx + r11 * 8]
sbb qword ptr [rcx + r11 * 8], rax
lea r11, [r11+1]
dec r8
jnz loop1
setc al
movzx rax, al
ret
sbb_x64 ENDP
;----------------------------------------
ALIGN 8
;----------------------------------------
subindexed_x64 PROC
; rcx = p1
; rdx = nSize
; r8 = nPos
; r9 = nValue
sub rdx, r8 ; rdx = remaining count of uints
ALIGN 16
loop1:
sub qword ptr [rcx + r8 * 8], r9
jnc done
lea r8, [r8+1]
mov r9, 1
dec rdx
jnz loop1
jc return_1 ; most of the times, there will be NO carry (I hope)
done:
xor rax, rax
ret
return_1:
mov rax, 1
ret
subindexed_x64 ENDP
;----------------------------------------
ALIGN 8
;----------------------------------------
rcl_x64 PROC
; rcx = p1
; rdx = b
; r8 = nLowestBit
mov r11, rcx ; table
xor r10, r10
neg r8 ; CY set if r8 <> 0
ALIGN 16
loop1:
rcl qword ptr [r11 + r10 * 8], 1
lea r10, [r10+1]
dec rdx
jnz loop1
setc al
movzx rax, al
ret
rcl_x64 ENDP
;----------------------------------------
ALIGN 8
;----------------------------------------
rcr_x64 PROC
; rcx = p1
; rdx = nSize
; r8 = nLowestBit
xor r10, r10
neg r8 ; CY set if r8 <> 0
ALIGN 16
loop1:
rcr qword ptr -8[rcx + rdx * 8], 1
dec rdx
jnz loop1
setc al
movzx rax, al
ret
rcr_x64 ENDP
;----------------------------------------
ALIGN 8
;----------------------------------------
div_x64 PROC
; rcx = &Hi
; rdx = &Lo
; r8 = nDiv
mov r11, rcx
mov r10, rdx
mov rdx, qword ptr [r11]
mov rax, qword ptr [r10]
div r8
mov qword ptr [r10], rdx ; remainder
mov qword ptr [r11], rax ; value
ret
div_x64 ENDP
;----------------------------------------
ALIGN 8
;----------------------------------------
rcl2_x64 PROC
; rcx = p1
; rdx = nSize
; r8 = bits
; r9 = c
push rbx
mov r10, rcx ; r10 = p1
xor rax, rax
mov rcx, 64
sub rcx, r8
mov r11, -1
shr r11, cl ; r11 = mask
mov rcx, r8 ; rcx = count of bits
mov rbx, rax ; rbx = old value = 0
or r9, r9
cmovnz rbx, r11 ; if (c) then old value = mask
mov r9, rax ; r9 = index (0..nSize-1)
ALIGN 16
loop1:
rol qword ptr [r10+r9*8], cl
mov rax, qword ptr [r10+r9*8]
and rax, r11
xor qword ptr [r10+r9*8], rax
or qword ptr [r10+r9*8], rbx
mov rbx, rax
lea r9, [r9+1]
dec rdx
jnz loop1
and rax, 1
pop rbx
ret
rcl2_x64 ENDP
;----------------------------------------
ALIGN 8
;----------------------------------------
rcr2_x64 PROC
; rcx = p1
; rdx = nSize
; r8 = bits
; r9 = c
push rbx
mov r10, rcx ; r10 = p1
xor rax, rax
mov rcx, 64
sub rcx, r8
mov r11, -1
shl r11, cl ; r11 = mask
mov rcx, r8 ; rcx = count of bits
mov rbx, rax ; rbx = old value = 0
or r9, r9
cmovnz rbx, r11 ; if (c) then old value = mask
mov r9, rdx ; r9 = index (0..nSize-1)
lea r9, [r9-1]
ALIGN 16
loop1:
ror qword ptr [r10+r9*8], cl
mov rax, qword ptr [r10+r9*8]
and rax, r11
xor qword ptr [r10+r9*8], rax
or qword ptr [r10+r9*8], rbx
mov rbx, rax
lea r9, [r9-1]
dec rdx
jnz loop1
rol rax, 1
and rax, 1
pop rbx
ret
rcr2_x64 ENDP
END