- fixed a bug in 64 bit ASM for MSVC

git-svn-id: svn://ttmath.org/publicrep/ttmath/branches/chk@181 e52654a7-88a9-db11-a3e9-0013d4bc506e
This commit is contained in:
Christian Kaiser 2009-07-28 16:34:04 +00:00
parent 51b2c974a1
commit e102086f80
5 changed files with 271 additions and 144 deletions

View File

@ -3869,47 +3869,66 @@ public:
// we should check the mantissas beforehand because sometimes we can have // we should check the mantissas beforehand because sometimes we can have
// a mantissa set to zero but in the exponent something another value // a mantissa set to zero but in the exponent something another value
// (maybe we've forgotten about calling CorrectZero() ?) // (maybe we've forgotten about calling CorrectZero() ?)
if( mantissa.IsZero() && ss2.mantissa.IsZero()) if( mantissa.IsZero())
{ {
return true; if (ss2.mantissa.IsZero())
} return true;
return(ss2.AboutEqual(*this,nBitsToIgnore));
if( IsSign() != ss2.IsSign() ) }
{
return false;
}
if( exponent==ss2.exponent ) if (ss2.mantissa.IsZero())
{ {
if (mantissa == ss2.mantissa) return(this->exponent <= uint(2*(-sint(man*TTMATH_BITS_PER_UINT))+nBitsToIgnore));
{ }
return(true);
} // exponents may not differ much!
if( IsSign() != ss2.IsSign() ) ttmath::Int<exp> expdiff(this->exponent - ss2.exponent);
{
// we need to check the difference (both might be around Zero) // they may differ one if for example mantissa1=0x80000000, mantissa2=0xffffffff
Big<exp,man> temp(*this); if (ttmath::Abs(expdiff) > 1)
return(false);
temp.Sub(ss2);
Int<exp> exponent_diff(exponent - temp.exponent); // calculate the 'difference' mantissa
ttmath::UInt<man> man1(this->mantissa);
return(exponent_diff > man*TTMATH_BITS_PER_UINT-nBitsToIgnore); ttmath::UInt<man> man2(ss2.mantissa);
} ttmath::UInt<man> mandiff;
// faster to mask the bits! switch (expdiff.ToInt())
ASSERT(nBitsToIgnore < TTMATH_BITS_PER_UINT); {
case +1:
man2.Rcr(1,0);
mandiff = man1;
mandiff.Sub(man2);
break;
case -1:
man1.Rcr(1,0);
mandiff = man2;
mandiff.Sub(man1);
break;
case 0:
if (man2 > man1)
{
mandiff = man2;
mandiff.Sub(man1);
}
else
{
mandiff = man1;
mandiff.Sub(man2);
}
break;
}
// faster to mask the bits!
ASSERT(nBitsToIgnore < TTMATH_BITS_PER_UINT);
for (int n = man-1; n > 0; --n) for (int n = man-1; n > 0; --n)
{ {
if (mantissa.table[n] != ss2.mantissa.table[n]) if (mandiff.table[n] != 0)
return(false); return(false);
} }
uint nMask = ~((1 << nBitsToIgnore) - 1); uint nMask = ~((1 << nBitsToIgnore) - 1);
return((mantissa.table[0] & nMask) == (ss2.mantissa.table[0] & nMask)); return((mandiff.table[0] & nMask) == 0);
}
return false;
} }
bool operator<(const Big<exp,man> & ss2) const bool operator<(const Big<exp,man> & ss2) const

View File

@ -871,6 +871,83 @@ namespace ttmath
u3 = sub_res_low_.u_.low; u3 = sub_res_low_.u_.low;
} }
/*!
this static method addes one vector to the other
'ss1' is larger in size or equal to 'ss2'
ss1 points to the first (larger) vector
ss2 points to the second vector
ss1_size - size of the ss1 (and size of the result too)
ss2_size - size of the ss2
result - is the result vector (which has size the same as ss1: ss1_size)
Example: ss1_size is 5, ss2_size is 3
ss1: ss2: result (output):
5 1 5+1
4 3 4+3
2 7 2+7
6 6
9 9
of course the carry is propagated and will be returned from the last item
(this method is used by the Karatsuba multiplication algorithm)
*/
template<uint value_size>
uint UInt<value_size>::AddVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result)
{
uint i, c = 0;
TTMATH_ASSERT( ss1_size >= ss2_size )
for(i=0 ; i<ss2_size ; ++i)
c = AddTwoWords(ss1[i], ss2[i], c, &result[i]);
for( ; i<ss1_size ; ++i)
c = AddTwoWords(ss1[i], 0, c, &result[i]);
TTMATH_LOG("UInt::AddVector")
return c;
}
/*!
this static method subtractes one vector from the other
'ss1' is larger in size or equal to 'ss2'
ss1 points to the first (larger) vector
ss2 points to the second vector
ss1_size - size of the ss1 (and size of the result too)
ss2_size - size of the ss2
result - is the result vector (which has size the same as ss1: ss1_size)
Example: ss1_size is 5, ss2_size is 3
ss1: ss2: result (output):
5 1 5-1
4 3 4-3
2 7 2-7
6 6-1 (the borrow from previous item)
9 9
return (carry): 0
of course the carry (borrow) is propagated and will be returned from the last item
(this method is used by the Karatsuba multiplication algorithm)
*/
template<uint value_size>
uint UInt<value_size>::SubVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result)
{
uint i, c = 0;
TTMATH_ASSERT( ss1_size >= ss2_size )
for(i=0 ; i<ss2_size ; ++i)
c = SubTwoWords(ss1[i], ss2[i], c, &result[i]);
for( ; i<ss1_size ; ++i)
c = SubTwoWords(ss1[i], 0, c, &result[i]);
TTMATH_LOG("UInt::SubVector")
return c;
}
#endif // #ifdef TTMATH_PLATFORM64 #endif // #ifdef TTMATH_PLATFORM64

View File

@ -42,7 +42,7 @@
#ifndef TTMATH_NOASM #ifndef TTMATH_NOASM
#ifdef TTMATH_PLATFORM32 #ifdef TTMATH_PLATFORM32
#pragma message("TTMATH_ASM") #pragma message("TTMATH_ASM32")
/*! /*!
\file ttmathuint_x86.h \file ttmathuint_x86.h

View File

@ -4,20 +4,20 @@
* Author: Tomasz Sowa <t.sowa@slimaczek.pl> * Author: Tomasz Sowa <t.sowa@slimaczek.pl>
*/ */
/* /*
* Copyright (c) 2006-2009, Tomasz Sowa * Copyright (c) 2006-2009, Tomasz Sowa
* All rights reserved. * All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
* *
* * Redistributions of source code must retain the above copyright notice, * * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer. * this list of conditions and the following disclaimer.
* *
* * Redistributions in binary form must reproduce the above copyright * * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the * notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution. * documentation and/or other materials provided with the distribution.
* *
* * Neither the name Tomasz Sowa nor the names of contributors to this * * Neither the name Tomasz Sowa nor the names of contributors to this
* project may be used to endorse or promote products derived * project may be used to endorse or promote products derived
* from this software without specific prior written permission. * from this software without specific prior written permission.
@ -39,10 +39,10 @@
#ifndef headerfilettmathuint_x86_64 #ifndef headerfilettmathuint_x86_64
#define headerfilettmathuint_x86_64 #define headerfilettmathuint_x86_64
#ifndef TTMATH_NOASM #ifndef TTMATH_NOASM
#ifdef TTMATH_PLATFORM64 #ifdef TTMATH_PLATFORM64
#pragma message("TTMATH_ASM64")
/*! /*!
\file ttmathuint_x86_64.h \file ttmathuint_x86_64.h
\brief template class UInt<uint> with assembler code for 64bit x86_64 processors \brief template class UInt<uint> with assembler code for 64bit x86_64 processors
@ -50,6 +50,9 @@
this file is included at the end of ttmathuint.h this file is included at the end of ttmathuint.h
*/ */
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
namespace ttmath namespace ttmath
{ {
@ -113,14 +116,14 @@ namespace ttmath
this part should be compiled with gcc this part should be compiled with gcc
*/ */
__asm__ __volatile__( __asm__ __volatile__(
"xorq %%rdx, %%rdx \n" "xorq %%rdx, %%rdx \n"
"neg %%rax \n" // CF=1 if rax!=0 , CF=0 if rax==0 "neg %%rax \n" // CF=1 if rax!=0 , CF=0 if rax==0
"1: \n" "1: \n"
"movq (%%rsi,%%rdx,8), %%rax \n" "movq (%%rsi,%%rdx,8), %%rax \n"
"adcq %%rax, (%%rbx,%%rdx,8) \n" "adcq %%rax, (%%rbx,%%rdx,8) \n"
"incq %%rdx \n" "incq %%rdx \n"
"decq %%rcx \n" "decq %%rcx \n"
"jnz 1b \n" "jnz 1b \n"
@ -134,7 +137,7 @@ namespace ttmath
#endif #endif
TTMATH_LOG("UInt64::Add") TTMATH_LOG("UInt64::Add")
return c; return c;
} }
@ -150,7 +153,7 @@ namespace ttmath
if we've got (value_size=3): if we've got (value_size=3):
table[0] = 10; table[0] = 10;
table[1] = 30; table[1] = 30;
table[2] = 5; table[2] = 5;
and we call: and we call:
AddInt(2,1) AddInt(2,1)
then it'll be: then it'll be:
@ -187,7 +190,7 @@ namespace ttmath
"1: \n" "1: \n"
"addq %%rax, (%%rbx,%%rdx,8) \n" "addq %%rax, (%%rbx,%%rdx,8) \n"
"jnc 2f \n" "jnc 2f \n"
"movq $1, %%rax \n" "movq $1, %%rax \n"
"incq %%rdx \n" "incq %%rdx \n"
"decq %%rcx \n" "decq %%rcx \n"
@ -204,7 +207,7 @@ namespace ttmath
#endif #endif
TTMATH_LOG("UInt64::AddInt") TTMATH_LOG("UInt64::AddInt")
return c; return c;
} }
@ -236,14 +239,38 @@ namespace ttmath
table[1] = 4 + x1 = 14 table[1] = 4 + x1 = 14
table[2] = 5 + x2 = 25 table[2] = 5 + x2 = 25
table[3] = 6 table[3] = 6
and no carry at the end of table[3] and no carry at the end of table[3]
(of course if there was a carry in table[2](5+20) then (of course if there was a carry in table[2](5+20) then
this carry would be passed to the table[3] etc.) this carry would be passed to the table[3] etc.)
*/ */
template<uint value_size> template<uint value_size>
uint UInt<value_size>::AddTwoInts(uint x2, uint x1, uint index) uint UInt<value_size>::AddTwoInts(uint x2, uint x1, uint index)
#if 0
{
uint i, c;
TTMATH_ASSERT( index < value_size )
printf("add %Id + %Id\n",x1,x2);
for(int i=index ; i<value_size ; ++i)
printf("%d: %Id\n",i,table[i]);
c = AddTwoWords(table[index], x1, 0, &table[index]);
c = AddTwoWords(table[index+1], x2, c, &table[index+1]);
for(i=index+2 ; i<value_size && c ; ++i)
c = AddTwoWords(table[i], 0, c, &table[i]);
for(i=index ; i<value_size ; ++i)
printf("%d: %Id\n",i,table[i]);
printf(" -> %d\n",c);
TTMATH_LOG("UInt::AddTwoInts")
return c;
}
#else
{ {
uint b = value_size; uint b = value_size;
uint * p1 = table; uint * p1 = table;
@ -253,7 +280,14 @@ namespace ttmath
#ifndef __GNUC__ #ifndef __GNUC__
#if defined(_M_X64) #if defined(_M_X64)
c = addindexed2_x64(p1,b,index,x2,x1); //printf("add %Id + %Id\n",x1,x2);
//for(int i=index ; i<value_size ; ++i)
// printf("%d: %Id\n",i,table[i]);
//if (table[0] == 1265784741359897913) DebugBreak();
c = addindexed2_x64(p1,b,index,x1,x2);
//for(int i=index ; i<value_size ; ++i)
// printf("%d: %Id\n",i,table[i]);
//printf(" -> %d\n",c);
#else #else
#error "another compiler than GCC is currently not supported in 64bit mode" #error "another compiler than GCC is currently not supported in 64bit mode"
#endif #endif
@ -261,11 +295,11 @@ namespace ttmath
#ifdef __GNUC__ #ifdef __GNUC__
uint dummy, dummy2; uint dummy, dummy2;
__asm__ __volatile__( __asm__ __volatile__(
"subq %%rdx, %%rcx \n" "subq %%rdx, %%rcx \n"
"addq %%rsi, (%%rbx,%%rdx,8) \n" "addq %%rsi, (%%rbx,%%rdx,8) \n"
"incq %%rdx \n" "incq %%rdx \n"
"decq %%rcx \n" "decq %%rcx \n"
@ -289,10 +323,12 @@ namespace ttmath
#endif #endif
TTMATH_LOG("UInt64::AddTwoInts") TTMATH_LOG("UInt64::AddTwoInts")
return c; return c;
} }
#endif
@ -328,16 +364,16 @@ namespace ttmath
#ifdef __GNUC__ #ifdef __GNUC__
uint dummy, dummy2; uint dummy, dummy2;
__asm__ __volatile__( __asm__ __volatile__(
"xorq %%rdx, %%rdx \n" "xorq %%rdx, %%rdx \n"
"neg %%rax \n" // CF=1 if rax!=0 , CF=0 if rax==0 "neg %%rax \n" // CF=1 if rax!=0 , CF=0 if rax==0
"1: \n" "1: \n"
"movq (%%rsi,%%rdx,8), %%rax \n" "movq (%%rsi,%%rdx,8), %%rax \n"
"sbbq %%rax, (%%rbx,%%rdx,8) \n" "sbbq %%rax, (%%rbx,%%rdx,8) \n"
"incq %%rdx \n" "incq %%rdx \n"
"decq %%rcx \n" "decq %%rcx \n"
"jnz 1b \n" "jnz 1b \n"
@ -366,7 +402,7 @@ namespace ttmath
if we've got (value_size=3): if we've got (value_size=3):
table[0] = 10; table[0] = 10;
table[1] = 30; table[1] = 30;
table[2] = 5; table[2] = 5;
and we call: and we call:
SubInt(2,1) SubInt(2,1)
then it'll be: then it'll be:
@ -395,15 +431,15 @@ namespace ttmath
#ifdef __GNUC__ #ifdef __GNUC__
uint dummy, dummy2; uint dummy, dummy2;
__asm__ __volatile__( __asm__ __volatile__(
"subq %%rdx, %%rcx \n" "subq %%rdx, %%rcx \n"
"1: \n" "1: \n"
"subq %%rax, (%%rbx,%%rdx,8) \n" "subq %%rax, (%%rbx,%%rdx,8) \n"
"jnc 2f \n" "jnc 2f \n"
"movq $1, %%rax \n" "movq $1, %%rax \n"
"incq %%rdx \n" "incq %%rdx \n"
"decq %%rcx \n" "decq %%rcx \n"
@ -436,7 +472,7 @@ namespace ttmath
for example: for example:
let this is 001010000 let this is 001010000
after Rcl2_one(1) there'll be 010100001 and Rcl2_one returns 0 after Rcl2_one(1) there'll be 010100001 and Rcl2_one returns 0
***this method is created only on a 64bit platform*** ***this method is created only on a 64bit platform***
*/ */
template<uint value_size> template<uint value_size>
@ -455,9 +491,9 @@ namespace ttmath
#ifdef __GNUC__ #ifdef __GNUC__
uint dummy, dummy2; uint dummy, dummy2;
__asm__ __volatile__( __asm__ __volatile__(
"xorq %%rdx, %%rdx \n" // rdx=0 "xorq %%rdx, %%rdx \n" // rdx=0
"neg %%rax \n" // CF=1 if rax!=0 , CF=0 if rax==0 "neg %%rax \n" // CF=1 if rax!=0 , CF=0 if rax==0
@ -473,7 +509,7 @@ namespace ttmath
: "=c" (c), "=a" (dummy), "=d" (dummy2) : "=c" (c), "=a" (dummy), "=d" (dummy2)
: "1" (c), "0" (b), "b" (p1) : "1" (c), "0" (b), "b" (p1)
: "cc", "memory" ); : "cc", "memory" );
#endif #endif
TTMATH_LOG("UInt64::Rcl2_one") TTMATH_LOG("UInt64::Rcl2_one")
@ -512,7 +548,7 @@ namespace ttmath
#ifdef __GNUC__ #ifdef __GNUC__
uint dummy; uint dummy;
__asm__ __volatile__( __asm__ __volatile__(
"neg %%rax \n" // CF=1 if rax!=0 , CF=0 if rax==0 "neg %%rax \n" // CF=1 if rax!=0 , CF=0 if rax==0
@ -549,7 +585,7 @@ namespace ttmath
for example: for example:
let this is 001010000 let this is 001010000
after Rcl2(3, 1) there'll be 010000111 and Rcl2 returns 1 after Rcl2(3, 1) there'll be 010000111 and Rcl2 returns 1
***this method is created only on a 64bit platform*** ***this method is created only on a 64bit platform***
*/ */
template<uint value_size> template<uint value_size>
@ -570,9 +606,9 @@ namespace ttmath
#ifdef __GNUC__ #ifdef __GNUC__
uint dummy, dummy2, dummy3; uint dummy, dummy2, dummy3;
__asm__ __volatile__( __asm__ __volatile__(
"movq %%rcx, %%rsi \n" "movq %%rcx, %%rsi \n"
"movq $64, %%rcx \n" "movq $64, %%rcx \n"
"subq %%rsi, %%rcx \n" "subq %%rsi, %%rcx \n"
@ -595,11 +631,11 @@ namespace ttmath
"xorq %%rax, (%%rbx,%%rdx,8) \n" "xorq %%rax, (%%rbx,%%rdx,8) \n"
"orq %%rsi, (%%rbx,%%rdx,8) \n" "orq %%rsi, (%%rbx,%%rdx,8) \n"
"movq %%rax, %%rsi \n" "movq %%rax, %%rsi \n"
"incq %%rdx \n" "incq %%rdx \n"
"decq %%rdi \n" "decq %%rdi \n"
"jnz 1b \n" "jnz 1b \n"
"and $1, %%rax \n" "and $1, %%rax \n"
: "=a" (c), "=D" (dummy), "=S" (dummy2), "=d" (dummy3) : "=a" (c), "=D" (dummy), "=S" (dummy2), "=d" (dummy3)
@ -647,7 +683,7 @@ namespace ttmath
#ifdef __GNUC__ #ifdef __GNUC__
uint dummy, dummy2, dummy3; uint dummy, dummy2, dummy3;
__asm__ __volatile__( __asm__ __volatile__(
"movq %%rcx, %%rsi \n" "movq %%rcx, %%rsi \n"
@ -674,11 +710,11 @@ namespace ttmath
"xorq %%rax, (%%rbx,%%rdx,8) \n" "xorq %%rax, (%%rbx,%%rdx,8) \n"
"orq %%rsi, (%%rbx,%%rdx,8) \n" "orq %%rsi, (%%rbx,%%rdx,8) \n"
"movq %%rax, %%rsi \n" "movq %%rax, %%rsi \n"
"decq %%rdx \n" "decq %%rdx \n"
"decq %%rdi \n" "decq %%rdi \n"
"jnz 1b \n" "jnz 1b \n"
"rolq $1, %%rax \n" "rolq $1, %%rax \n"
"andq $1, %%rax \n" "andq $1, %%rax \n"
@ -754,7 +790,7 @@ namespace ttmath
uint UInt<value_size>::SetBitInWord(uint & value, uint bit) uint UInt<value_size>::SetBitInWord(uint & value, uint bit)
{ {
TTMATH_ASSERT( bit < TTMATH_BITS_PER_UINT ) TTMATH_ASSERT( bit < TTMATH_BITS_PER_UINT )
uint old_bit; uint old_bit;
uint v = value; uint v = value;
@ -778,7 +814,7 @@ namespace ttmath
"setc %%bl \n" "setc %%bl \n"
"movzx %%bl, %%rbx \n" "movzx %%bl, %%rbx \n"
: "=a" (v), "=b" (old_bit) : "=a" (v), "=b" (old_bit)
: "0" (v), "1" (bit) : "0" (v), "1" (bit)
: "cc" ); : "cc" );
@ -803,7 +839,7 @@ namespace ttmath
multiplication: result2:result1 = a * b multiplication: result2:result1 = a * b
result2 - higher word result2 - higher word
result1 - lower word of the result result1 - lower word of the result
this methos never returns a carry this methos never returns a carry
***this method is created only on a 64bit platform*** ***this method is created only on a 64bit platform***
@ -834,7 +870,7 @@ namespace ttmath
#ifdef __GNUC__ #ifdef __GNUC__
__asm__ __volatile__( __asm__ __volatile__(
"mulq %%rdx \n" "mulq %%rdx \n"
: "=a" (result1_), "=d" (result2_) : "=a" (result1_), "=d" (result2_)
@ -857,13 +893,13 @@ namespace ttmath
* *
* *
*/ */
#ifndef __GNUC__ #ifndef __GNUC__
/*! /*!
this method calculates 64bits word a:b / 32bits c (a higher, b lower word) this method calculates 64bits word a:b / 32bits c (a higher, b lower word)
r = a:b / c and rest - remainder r = a:b / c and rest - remainder
***this method is created only on a 64bit platform*** ***this method is created only on a 64bit platform***
* *
@ -896,7 +932,7 @@ namespace ttmath
#endif #endif
#ifdef __GNUC__ #ifdef __GNUC__
__asm__ __volatile__( __asm__ __volatile__(
"divq %%rcx \n" "divq %%rcx \n"
@ -986,7 +1022,7 @@ namespace ttmath
uint i, c = 0; uint i, c = 0;
TTMATH_ASSERT( ss1_size >= ss2_size ) TTMATH_ASSERT( ss1_size >= ss2_size )
for(i=0 ; i<ss2_size ; ++i) for(i=0 ; i<ss2_size ; ++i)
c = AddTwoWords(ss1[i], ss2[i], c, &result[i]); c = AddTwoWords(ss1[i], ss2[i], c, &result[i]);
@ -1025,7 +1061,7 @@ namespace ttmath
uint i, c = 0; uint i, c = 0;
TTMATH_ASSERT( ss1_size >= ss2_size ) TTMATH_ASSERT( ss1_size >= ss2_size )
for(i=0 ; i<ss2_size ; ++i) for(i=0 ; i<ss2_size ; ++i)
c = SubTwoWords(ss1[i], ss2[i], c, &result[i]); c = SubTwoWords(ss1[i], ss2[i], c, &result[i]);

View File

@ -19,7 +19,7 @@ PUBLIC div_x64
; ;
.CODE .CODE
ALIGN 8 ALIGN 8
;---------------------------------------- ;----------------------------------------
@ -33,20 +33,20 @@ adc_x64 PROC
xor rax, rax xor rax, rax
xor r11, r11 xor r11, r11
sub rax, r9 ; sets CARRY if r9 != 0 sub rax, r9 ; sets CARRY if r9 != 0
ALIGN 16 ALIGN 16
loop1: loop1:
mov rax,qword ptr [rdx + r11 * 8] mov rax,qword ptr [rdx + r11 * 8]
adc qword ptr [rcx + r11 * 8], rax adc qword ptr [rcx + r11 * 8], rax
lea r11, [r11+1] lea r11, [r11+1]
dec r8 dec r8
jnz loop1 jnz loop1
setc al setc al
movzx rax, al movzx rax, al
ret ret
adc_x64 ENDP adc_x64 ENDP
;---------------------------------------- ;----------------------------------------
@ -80,14 +80,14 @@ loop1:
lea r8, [r8+1] lea r8, [r8+1]
add qword ptr [rcx + r8 * 8], r9 add qword ptr [rcx + r8 * 8], r9
jc loop1 jc loop1
ret ret
done_with_cy: done_with_cy:
lea rax, [rax+1] ; rax = 1 lea rax, [rax+1] ; rax = 1
ret ret
addindexed_x64 ENDP addindexed_x64 ENDP
;---------------------------------------- ;----------------------------------------
@ -98,8 +98,8 @@ addindexed_x64 ENDP
addindexed2_x64 PROC addindexed2_x64 PROC
; rcx = p1 ; rcx = p1 (pointer)
; rdx = b ; rdx = b (value size)
; r8 = nPos ; r8 = nPos
; r9 = nValue1 ; r9 = nValue1
; [esp+0x28] = nValue2 ; [esp+0x28] = nValue2
@ -109,26 +109,23 @@ addindexed2_x64 PROC
sub rdx, r8 ; rdx = remaining count of uints sub rdx, r8 ; rdx = remaining count of uints
mov r10, [esp+028h] ; r10 = nValue2 mov r10, [esp+028h] ; r10 = nValue2
add qword ptr [r11 + r8 * 8], r10 add qword ptr [r11 + r8 * 8], r9
lea r8, [r8+1] lea r8, [r8+1]
lea rdx, [rdx-1]
adc qword ptr [r11 + r8 * 8], r10
jc next
ret
ALIGN 16 ALIGN 16
loop1: loop1:
adc qword ptr [r11 + r8 * 8], r9 lea r8, [r8+1]
add qword ptr [r11 + r8 * 8], 1
jc next jc next
ret ret
next:
lea r8, [r8+1]
xor r9, r9 ; set to 0 -> cy still set!
dec rdx
jnz loop1
jc return_1 ; most of the times, there will be NO carry (I hope)
done: next:
ret dec rdx ; does not modify CY too...
jnz loop1
return_1:
lea rax, [rax+1] lea rax, [rax+1]
ret ret
@ -138,8 +135,6 @@ addindexed2_x64 ENDP
ALIGN 8 ALIGN 8
ALIGN 8
;---------------------------------------- ;----------------------------------------
sbb_x64 PROC sbb_x64 PROC
@ -152,15 +147,15 @@ sbb_x64 PROC
xor rax, rax xor rax, rax
xor r11, r11 xor r11, r11
sub rax, r9 ; sets CARRY if r9 != 0 sub rax, r9 ; sets CARRY if r9 != 0
ALIGN 16 ALIGN 16
loop1: loop1:
mov rax,qword ptr [rdx + r11 * 8] mov rax,qword ptr [rdx + r11 * 8]
sbb qword ptr [rcx + r11 * 8], rax sbb qword ptr [rcx + r11 * 8], rax
lea r11, [r11+1] lea r11, [r11+1]
dec r8 dec r8
jnz loop1 jnz loop1
setc al setc al
movzx rax, al movzx rax, al
@ -181,12 +176,12 @@ subindexed_x64 PROC
; r9 = nValue ; r9 = nValue
sub rdx, r8 ; rdx = remaining count of uints sub rdx, r8 ; rdx = remaining count of uints
ALIGN 16 ALIGN 16
loop1: loop1:
sub qword ptr [rcx + r8 * 8], r9 sub qword ptr [rcx + r8 * 8], r9
jnc done jnc done
lea r8, [r8+1] lea r8, [r8+1]
mov r9, 1 mov r9, 1
dec rdx dec rdx
@ -196,7 +191,7 @@ loop1:
done: done:
xor rax, rax xor rax, rax
ret ret
return_1: return_1:
mov rax, 1 mov rax, 1
ret ret
@ -217,17 +212,17 @@ rcl_x64 PROC
mov r11, rcx ; table mov r11, rcx ; table
xor r10, r10 xor r10, r10
neg r8 ; CY set if r8 <> 0 neg r8 ; CY set if r8 <> 0
ALIGN 16 ALIGN 16
loop1: loop1:
rcl qword ptr [r11 + r10 * 8], 1 rcl qword ptr [r11 + r10 * 8], 1
lea r10, [r10+1] lea r10, [r10+1]
dec rdx dec rdx
jnz loop1 jnz loop1
setc al setc al
movzx rax, al movzx rax, al
ret ret
rcl_x64 ENDP rcl_x64 ENDP
@ -245,16 +240,16 @@ rcr_x64 PROC
xor r10, r10 xor r10, r10
neg r8 ; CY set if r8 <> 0 neg r8 ; CY set if r8 <> 0
ALIGN 16 ALIGN 16
loop1: loop1:
rcr qword ptr -8[rcx + rdx * 8], 1 rcr qword ptr -8[rcx + rdx * 8], 1
dec rdx dec rdx
jnz loop1 jnz loop1
setc al setc al
movzx rax, al movzx rax, al
ret ret
rcr_x64 ENDP rcr_x64 ENDP
@ -270,7 +265,7 @@ div_x64 PROC
; rcx = &Hi ; rcx = &Hi
; rdx = &Lo ; rdx = &Lo
; r8 = nDiv ; r8 = nDiv
mov r11, rcx mov r11, rcx
mov r10, rdx mov r10, rdx
@ -295,21 +290,21 @@ rcl2_x64 PROC
; rdx = nSize ; rdx = nSize
; r8 = bits ; r8 = bits
; r9 = c ; r9 = c
push rbx push rbx
mov r10, rcx ; r10 = p1 mov r10, rcx ; r10 = p1
xor rax, rax xor rax, rax
mov rcx, 64 mov rcx, 64
sub rcx, r8 sub rcx, r8
mov r11, -1 mov r11, -1
shr r11, cl ; r11 = mask shr r11, cl ; r11 = mask
mov rcx, r8 ; rcx = count of bits mov rcx, r8 ; rcx = count of bits
mov rbx, rax ; rbx = old value = 0 mov rbx, rax ; rbx = old value = 0
or r9, r9 or r9, r9
cmovnz rbx, r11 ; if (c) then old value = mask cmovnz rbx, r11 ; if (c) then old value = mask
@ -323,7 +318,7 @@ loop1:
xor qword ptr [r10+r9*8], rax xor qword ptr [r10+r9*8], rax
or qword ptr [r10+r9*8], rbx or qword ptr [r10+r9*8], rbx
mov rbx, rax mov rbx, rax
lea r9, [r9+1] lea r9, [r9+1]
dec rdx dec rdx
@ -332,8 +327,8 @@ loop1:
and rax, 1 and rax, 1
pop rbx pop rbx
ret ret
rcl2_x64 ENDP rcl2_x64 ENDP
;---------------------------------------- ;----------------------------------------
@ -346,20 +341,20 @@ rcr2_x64 PROC
; rdx = nSize ; rdx = nSize
; r8 = bits ; r8 = bits
; r9 = c ; r9 = c
push rbx push rbx
mov r10, rcx ; r10 = p1 mov r10, rcx ; r10 = p1
xor rax, rax xor rax, rax
mov rcx, 64 mov rcx, 64
sub rcx, r8 sub rcx, r8
mov r11, -1 mov r11, -1
shl r11, cl ; r11 = mask shl r11, cl ; r11 = mask
mov rcx, r8 ; rcx = count of bits mov rcx, r8 ; rcx = count of bits
mov rbx, rax ; rbx = old value = 0 mov rbx, rax ; rbx = old value = 0
or r9, r9 or r9, r9
cmovnz rbx, r11 ; if (c) then old value = mask cmovnz rbx, r11 ; if (c) then old value = mask
@ -374,18 +369,18 @@ loop1:
xor qword ptr [r10+r9*8], rax xor qword ptr [r10+r9*8], rax
or qword ptr [r10+r9*8], rbx or qword ptr [r10+r9*8], rbx
mov rbx, rax mov rbx, rax
lea r9, [r9-1] lea r9, [r9-1]
dec rdx dec rdx
jnz loop1 jnz loop1
rol rax, 1 rol rax, 1
and rax, 1 and rax, 1
pop rbx pop rbx
ret ret
rcr2_x64 ENDP rcr2_x64 ENDP
END END