- fixed a bug in 64 bit ASM for MSVC
git-svn-id: svn://ttmath.org/publicrep/ttmath/branches/chk@181 e52654a7-88a9-db11-a3e9-0013d4bc506e
This commit is contained in:
parent
51b2c974a1
commit
e102086f80
|
@ -3869,47 +3869,66 @@ public:
|
|||
// we should check the mantissas beforehand because sometimes we can have
|
||||
// a mantissa set to zero but in the exponent something another value
|
||||
// (maybe we've forgotten about calling CorrectZero() ?)
|
||||
if( mantissa.IsZero() && ss2.mantissa.IsZero())
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
if( IsSign() != ss2.IsSign() )
|
||||
{
|
||||
return false;
|
||||
}
|
||||
if( mantissa.IsZero())
|
||||
{
|
||||
if (ss2.mantissa.IsZero())
|
||||
return true;
|
||||
return(ss2.AboutEqual(*this,nBitsToIgnore));
|
||||
}
|
||||
|
||||
if( exponent==ss2.exponent )
|
||||
{
|
||||
if (mantissa == ss2.mantissa)
|
||||
{
|
||||
return(true);
|
||||
}
|
||||
if( IsSign() != ss2.IsSign() )
|
||||
{
|
||||
// we need to check the difference (both might be around Zero)
|
||||
Big<exp,man> temp(*this);
|
||||
|
||||
temp.Sub(ss2);
|
||||
if (ss2.mantissa.IsZero())
|
||||
{
|
||||
return(this->exponent <= uint(2*(-sint(man*TTMATH_BITS_PER_UINT))+nBitsToIgnore));
|
||||
}
|
||||
|
||||
// exponents may not differ much!
|
||||
ttmath::Int<exp> expdiff(this->exponent - ss2.exponent);
|
||||
|
||||
// they may differ one if for example mantissa1=0x80000000, mantissa2=0xffffffff
|
||||
if (ttmath::Abs(expdiff) > 1)
|
||||
return(false);
|
||||
|
||||
Int<exp> exponent_diff(exponent - temp.exponent);
|
||||
|
||||
return(exponent_diff > man*TTMATH_BITS_PER_UINT-nBitsToIgnore);
|
||||
}
|
||||
|
||||
// faster to mask the bits!
|
||||
ASSERT(nBitsToIgnore < TTMATH_BITS_PER_UINT);
|
||||
// calculate the 'difference' mantissa
|
||||
ttmath::UInt<man> man1(this->mantissa);
|
||||
ttmath::UInt<man> man2(ss2.mantissa);
|
||||
ttmath::UInt<man> mandiff;
|
||||
|
||||
switch (expdiff.ToInt())
|
||||
{
|
||||
case +1:
|
||||
man2.Rcr(1,0);
|
||||
mandiff = man1;
|
||||
mandiff.Sub(man2);
|
||||
break;
|
||||
case -1:
|
||||
man1.Rcr(1,0);
|
||||
mandiff = man2;
|
||||
mandiff.Sub(man1);
|
||||
break;
|
||||
case 0:
|
||||
if (man2 > man1)
|
||||
{
|
||||
mandiff = man2;
|
||||
mandiff.Sub(man1);
|
||||
}
|
||||
else
|
||||
{
|
||||
mandiff = man1;
|
||||
mandiff.Sub(man2);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// faster to mask the bits!
|
||||
ASSERT(nBitsToIgnore < TTMATH_BITS_PER_UINT);
|
||||
|
||||
for (int n = man-1; n > 0; --n)
|
||||
{
|
||||
if (mantissa.table[n] != ss2.mantissa.table[n])
|
||||
return(false);
|
||||
}
|
||||
uint nMask = ~((1 << nBitsToIgnore) - 1);
|
||||
return((mantissa.table[0] & nMask) == (ss2.mantissa.table[0] & nMask));
|
||||
}
|
||||
|
||||
return false;
|
||||
for (int n = man-1; n > 0; --n)
|
||||
{
|
||||
if (mandiff.table[n] != 0)
|
||||
return(false);
|
||||
}
|
||||
uint nMask = ~((1 << nBitsToIgnore) - 1);
|
||||
return((mandiff.table[0] & nMask) == 0);
|
||||
}
|
||||
|
||||
bool operator<(const Big<exp,man> & ss2) const
|
||||
|
|
|
@ -871,6 +871,83 @@ namespace ttmath
|
|||
u3 = sub_res_low_.u_.low;
|
||||
}
|
||||
|
||||
/*!
|
||||
this static method addes one vector to the other
|
||||
'ss1' is larger in size or equal to 'ss2'
|
||||
|
||||
ss1 points to the first (larger) vector
|
||||
ss2 points to the second vector
|
||||
ss1_size - size of the ss1 (and size of the result too)
|
||||
ss2_size - size of the ss2
|
||||
result - is the result vector (which has size the same as ss1: ss1_size)
|
||||
|
||||
Example: ss1_size is 5, ss2_size is 3
|
||||
ss1: ss2: result (output):
|
||||
5 1 5+1
|
||||
4 3 4+3
|
||||
2 7 2+7
|
||||
6 6
|
||||
9 9
|
||||
of course the carry is propagated and will be returned from the last item
|
||||
(this method is used by the Karatsuba multiplication algorithm)
|
||||
*/
|
||||
template<uint value_size>
|
||||
uint UInt<value_size>::AddVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result)
|
||||
{
|
||||
uint i, c = 0;
|
||||
|
||||
TTMATH_ASSERT( ss1_size >= ss2_size )
|
||||
|
||||
for(i=0 ; i<ss2_size ; ++i)
|
||||
c = AddTwoWords(ss1[i], ss2[i], c, &result[i]);
|
||||
|
||||
for( ; i<ss1_size ; ++i)
|
||||
c = AddTwoWords(ss1[i], 0, c, &result[i]);
|
||||
|
||||
TTMATH_LOG("UInt::AddVector")
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
/*!
|
||||
this static method subtractes one vector from the other
|
||||
'ss1' is larger in size or equal to 'ss2'
|
||||
|
||||
ss1 points to the first (larger) vector
|
||||
ss2 points to the second vector
|
||||
ss1_size - size of the ss1 (and size of the result too)
|
||||
ss2_size - size of the ss2
|
||||
result - is the result vector (which has size the same as ss1: ss1_size)
|
||||
|
||||
Example: ss1_size is 5, ss2_size is 3
|
||||
ss1: ss2: result (output):
|
||||
5 1 5-1
|
||||
4 3 4-3
|
||||
2 7 2-7
|
||||
6 6-1 (the borrow from previous item)
|
||||
9 9
|
||||
return (carry): 0
|
||||
of course the carry (borrow) is propagated and will be returned from the last item
|
||||
(this method is used by the Karatsuba multiplication algorithm)
|
||||
*/
|
||||
template<uint value_size>
|
||||
uint UInt<value_size>::SubVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result)
|
||||
{
|
||||
uint i, c = 0;
|
||||
|
||||
TTMATH_ASSERT( ss1_size >= ss2_size )
|
||||
|
||||
for(i=0 ; i<ss2_size ; ++i)
|
||||
c = SubTwoWords(ss1[i], ss2[i], c, &result[i]);
|
||||
|
||||
for( ; i<ss1_size ; ++i)
|
||||
c = SubTwoWords(ss1[i], 0, c, &result[i]);
|
||||
|
||||
TTMATH_LOG("UInt::SubVector")
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
#endif // #ifdef TTMATH_PLATFORM64
|
||||
|
||||
|
||||
|
|
|
@ -42,7 +42,7 @@
|
|||
#ifndef TTMATH_NOASM
|
||||
#ifdef TTMATH_PLATFORM32
|
||||
|
||||
#pragma message("TTMATH_ASM")
|
||||
#pragma message("TTMATH_ASM32")
|
||||
|
||||
/*!
|
||||
\file ttmathuint_x86.h
|
||||
|
|
|
@ -4,20 +4,20 @@
|
|||
* Author: Tomasz Sowa <t.sowa@slimaczek.pl>
|
||||
*/
|
||||
|
||||
/*
|
||||
/*
|
||||
* Copyright (c) 2006-2009, Tomasz Sowa
|
||||
* All rights reserved.
|
||||
*
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
*
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
*
|
||||
* * Neither the name Tomasz Sowa nor the names of contributors to this
|
||||
* project may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
|
@ -39,10 +39,10 @@
|
|||
#ifndef headerfilettmathuint_x86_64
|
||||
#define headerfilettmathuint_x86_64
|
||||
|
||||
|
||||
#ifndef TTMATH_NOASM
|
||||
#ifdef TTMATH_PLATFORM64
|
||||
|
||||
#pragma message("TTMATH_ASM64")
|
||||
/*!
|
||||
\file ttmathuint_x86_64.h
|
||||
\brief template class UInt<uint> with assembler code for 64bit x86_64 processors
|
||||
|
@ -50,6 +50,9 @@
|
|||
this file is included at the end of ttmathuint.h
|
||||
*/
|
||||
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#include <windows.h>
|
||||
|
||||
|
||||
namespace ttmath
|
||||
{
|
||||
|
@ -113,14 +116,14 @@ namespace ttmath
|
|||
this part should be compiled with gcc
|
||||
*/
|
||||
__asm__ __volatile__(
|
||||
|
||||
|
||||
"xorq %%rdx, %%rdx \n"
|
||||
"neg %%rax \n" // CF=1 if rax!=0 , CF=0 if rax==0
|
||||
|
||||
"1: \n"
|
||||
"movq (%%rsi,%%rdx,8), %%rax \n"
|
||||
"adcq %%rax, (%%rbx,%%rdx,8) \n"
|
||||
|
||||
|
||||
"incq %%rdx \n"
|
||||
"decq %%rcx \n"
|
||||
"jnz 1b \n"
|
||||
|
@ -134,7 +137,7 @@ namespace ttmath
|
|||
#endif
|
||||
|
||||
TTMATH_LOG("UInt64::Add")
|
||||
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
|
@ -150,7 +153,7 @@ namespace ttmath
|
|||
if we've got (value_size=3):
|
||||
table[0] = 10;
|
||||
table[1] = 30;
|
||||
table[2] = 5;
|
||||
table[2] = 5;
|
||||
and we call:
|
||||
AddInt(2,1)
|
||||
then it'll be:
|
||||
|
@ -187,7 +190,7 @@ namespace ttmath
|
|||
"1: \n"
|
||||
"addq %%rax, (%%rbx,%%rdx,8) \n"
|
||||
"jnc 2f \n"
|
||||
|
||||
|
||||
"movq $1, %%rax \n"
|
||||
"incq %%rdx \n"
|
||||
"decq %%rcx \n"
|
||||
|
@ -204,7 +207,7 @@ namespace ttmath
|
|||
#endif
|
||||
|
||||
TTMATH_LOG("UInt64::AddInt")
|
||||
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
|
@ -236,14 +239,38 @@ namespace ttmath
|
|||
table[1] = 4 + x1 = 14
|
||||
table[2] = 5 + x2 = 25
|
||||
table[3] = 6
|
||||
|
||||
|
||||
and no carry at the end of table[3]
|
||||
|
||||
(of course if there was a carry in table[2](5+20) then
|
||||
(of course if there was a carry in table[2](5+20) then
|
||||
this carry would be passed to the table[3] etc.)
|
||||
*/
|
||||
template<uint value_size>
|
||||
uint UInt<value_size>::AddTwoInts(uint x2, uint x1, uint index)
|
||||
#if 0
|
||||
{
|
||||
uint i, c;
|
||||
|
||||
TTMATH_ASSERT( index < value_size )
|
||||
|
||||
printf("add %Id + %Id\n",x1,x2);
|
||||
for(int i=index ; i<value_size ; ++i)
|
||||
printf("%d: %Id\n",i,table[i]);
|
||||
|
||||
c = AddTwoWords(table[index], x1, 0, &table[index]);
|
||||
c = AddTwoWords(table[index+1], x2, c, &table[index+1]);
|
||||
|
||||
for(i=index+2 ; i<value_size && c ; ++i)
|
||||
c = AddTwoWords(table[i], 0, c, &table[i]);
|
||||
for(i=index ; i<value_size ; ++i)
|
||||
printf("%d: %Id\n",i,table[i]);
|
||||
printf(" -> %d\n",c);
|
||||
|
||||
TTMATH_LOG("UInt::AddTwoInts")
|
||||
|
||||
return c;
|
||||
}
|
||||
#else
|
||||
{
|
||||
uint b = value_size;
|
||||
uint * p1 = table;
|
||||
|
@ -253,7 +280,14 @@ namespace ttmath
|
|||
|
||||
#ifndef __GNUC__
|
||||
#if defined(_M_X64)
|
||||
c = addindexed2_x64(p1,b,index,x2,x1);
|
||||
//printf("add %Id + %Id\n",x1,x2);
|
||||
//for(int i=index ; i<value_size ; ++i)
|
||||
// printf("%d: %Id\n",i,table[i]);
|
||||
//if (table[0] == 1265784741359897913) DebugBreak();
|
||||
c = addindexed2_x64(p1,b,index,x1,x2);
|
||||
//for(int i=index ; i<value_size ; ++i)
|
||||
// printf("%d: %Id\n",i,table[i]);
|
||||
//printf(" -> %d\n",c);
|
||||
#else
|
||||
#error "another compiler than GCC is currently not supported in 64bit mode"
|
||||
#endif
|
||||
|
@ -261,11 +295,11 @@ namespace ttmath
|
|||
|
||||
#ifdef __GNUC__
|
||||
uint dummy, dummy2;
|
||||
|
||||
|
||||
__asm__ __volatile__(
|
||||
|
||||
|
||||
"subq %%rdx, %%rcx \n"
|
||||
|
||||
|
||||
"addq %%rsi, (%%rbx,%%rdx,8) \n"
|
||||
"incq %%rdx \n"
|
||||
"decq %%rcx \n"
|
||||
|
@ -289,10 +323,12 @@ namespace ttmath
|
|||
|
||||
#endif
|
||||
|
||||
|
||||
TTMATH_LOG("UInt64::AddTwoInts")
|
||||
|
||||
return c;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
@ -328,16 +364,16 @@ namespace ttmath
|
|||
|
||||
#ifdef __GNUC__
|
||||
uint dummy, dummy2;
|
||||
|
||||
|
||||
__asm__ __volatile__(
|
||||
|
||||
|
||||
"xorq %%rdx, %%rdx \n"
|
||||
"neg %%rax \n" // CF=1 if rax!=0 , CF=0 if rax==0
|
||||
|
||||
"1: \n"
|
||||
"movq (%%rsi,%%rdx,8), %%rax \n"
|
||||
"sbbq %%rax, (%%rbx,%%rdx,8) \n"
|
||||
|
||||
|
||||
"incq %%rdx \n"
|
||||
"decq %%rcx \n"
|
||||
"jnz 1b \n"
|
||||
|
@ -366,7 +402,7 @@ namespace ttmath
|
|||
if we've got (value_size=3):
|
||||
table[0] = 10;
|
||||
table[1] = 30;
|
||||
table[2] = 5;
|
||||
table[2] = 5;
|
||||
and we call:
|
||||
SubInt(2,1)
|
||||
then it'll be:
|
||||
|
@ -395,15 +431,15 @@ namespace ttmath
|
|||
|
||||
#ifdef __GNUC__
|
||||
uint dummy, dummy2;
|
||||
|
||||
|
||||
__asm__ __volatile__(
|
||||
|
||||
|
||||
"subq %%rdx, %%rcx \n"
|
||||
|
||||
"1: \n"
|
||||
"subq %%rax, (%%rbx,%%rdx,8) \n"
|
||||
"jnc 2f \n"
|
||||
|
||||
|
||||
"movq $1, %%rax \n"
|
||||
"incq %%rdx \n"
|
||||
"decq %%rcx \n"
|
||||
|
@ -436,7 +472,7 @@ namespace ttmath
|
|||
for example:
|
||||
let this is 001010000
|
||||
after Rcl2_one(1) there'll be 010100001 and Rcl2_one returns 0
|
||||
|
||||
|
||||
***this method is created only on a 64bit platform***
|
||||
*/
|
||||
template<uint value_size>
|
||||
|
@ -455,9 +491,9 @@ namespace ttmath
|
|||
|
||||
#ifdef __GNUC__
|
||||
uint dummy, dummy2;
|
||||
|
||||
|
||||
__asm__ __volatile__(
|
||||
|
||||
|
||||
"xorq %%rdx, %%rdx \n" // rdx=0
|
||||
"neg %%rax \n" // CF=1 if rax!=0 , CF=0 if rax==0
|
||||
|
||||
|
@ -473,7 +509,7 @@ namespace ttmath
|
|||
: "=c" (c), "=a" (dummy), "=d" (dummy2)
|
||||
: "1" (c), "0" (b), "b" (p1)
|
||||
: "cc", "memory" );
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
TTMATH_LOG("UInt64::Rcl2_one")
|
||||
|
@ -512,7 +548,7 @@ namespace ttmath
|
|||
|
||||
#ifdef __GNUC__
|
||||
uint dummy;
|
||||
|
||||
|
||||
__asm__ __volatile__(
|
||||
|
||||
"neg %%rax \n" // CF=1 if rax!=0 , CF=0 if rax==0
|
||||
|
@ -549,7 +585,7 @@ namespace ttmath
|
|||
for example:
|
||||
let this is 001010000
|
||||
after Rcl2(3, 1) there'll be 010000111 and Rcl2 returns 1
|
||||
|
||||
|
||||
***this method is created only on a 64bit platform***
|
||||
*/
|
||||
template<uint value_size>
|
||||
|
@ -570,9 +606,9 @@ namespace ttmath
|
|||
|
||||
#ifdef __GNUC__
|
||||
uint dummy, dummy2, dummy3;
|
||||
|
||||
|
||||
__asm__ __volatile__(
|
||||
|
||||
|
||||
"movq %%rcx, %%rsi \n"
|
||||
"movq $64, %%rcx \n"
|
||||
"subq %%rsi, %%rcx \n"
|
||||
|
@ -595,11 +631,11 @@ namespace ttmath
|
|||
"xorq %%rax, (%%rbx,%%rdx,8) \n"
|
||||
"orq %%rsi, (%%rbx,%%rdx,8) \n"
|
||||
"movq %%rax, %%rsi \n"
|
||||
|
||||
|
||||
"incq %%rdx \n"
|
||||
"decq %%rdi \n"
|
||||
"jnz 1b \n"
|
||||
|
||||
|
||||
"and $1, %%rax \n"
|
||||
|
||||
: "=a" (c), "=D" (dummy), "=S" (dummy2), "=d" (dummy3)
|
||||
|
@ -647,7 +683,7 @@ namespace ttmath
|
|||
|
||||
#ifdef __GNUC__
|
||||
uint dummy, dummy2, dummy3;
|
||||
|
||||
|
||||
__asm__ __volatile__(
|
||||
|
||||
"movq %%rcx, %%rsi \n"
|
||||
|
@ -674,11 +710,11 @@ namespace ttmath
|
|||
"xorq %%rax, (%%rbx,%%rdx,8) \n"
|
||||
"orq %%rsi, (%%rbx,%%rdx,8) \n"
|
||||
"movq %%rax, %%rsi \n"
|
||||
|
||||
|
||||
"decq %%rdx \n"
|
||||
"decq %%rdi \n"
|
||||
"jnz 1b \n"
|
||||
|
||||
|
||||
"rolq $1, %%rax \n"
|
||||
"andq $1, %%rax \n"
|
||||
|
||||
|
@ -754,7 +790,7 @@ namespace ttmath
|
|||
uint UInt<value_size>::SetBitInWord(uint & value, uint bit)
|
||||
{
|
||||
TTMATH_ASSERT( bit < TTMATH_BITS_PER_UINT )
|
||||
|
||||
|
||||
uint old_bit;
|
||||
uint v = value;
|
||||
|
||||
|
@ -778,7 +814,7 @@ namespace ttmath
|
|||
|
||||
"setc %%bl \n"
|
||||
"movzx %%bl, %%rbx \n"
|
||||
|
||||
|
||||
: "=a" (v), "=b" (old_bit)
|
||||
: "0" (v), "1" (bit)
|
||||
: "cc" );
|
||||
|
@ -803,7 +839,7 @@ namespace ttmath
|
|||
multiplication: result2:result1 = a * b
|
||||
result2 - higher word
|
||||
result1 - lower word of the result
|
||||
|
||||
|
||||
this methos never returns a carry
|
||||
|
||||
***this method is created only on a 64bit platform***
|
||||
|
@ -834,7 +870,7 @@ namespace ttmath
|
|||
#ifdef __GNUC__
|
||||
|
||||
__asm__ __volatile__(
|
||||
|
||||
|
||||
"mulq %%rdx \n"
|
||||
|
||||
: "=a" (result1_), "=d" (result2_)
|
||||
|
@ -857,13 +893,13 @@ namespace ttmath
|
|||
*
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#ifndef __GNUC__
|
||||
|
||||
/*!
|
||||
this method calculates 64bits word a:b / 32bits c (a higher, b lower word)
|
||||
r = a:b / c and rest - remainder
|
||||
|
||||
|
||||
***this method is created only on a 64bit platform***
|
||||
|
||||
*
|
||||
|
@ -896,7 +932,7 @@ namespace ttmath
|
|||
#endif
|
||||
|
||||
#ifdef __GNUC__
|
||||
|
||||
|
||||
__asm__ __volatile__(
|
||||
|
||||
"divq %%rcx \n"
|
||||
|
@ -986,7 +1022,7 @@ namespace ttmath
|
|||
uint i, c = 0;
|
||||
|
||||
TTMATH_ASSERT( ss1_size >= ss2_size )
|
||||
|
||||
|
||||
for(i=0 ; i<ss2_size ; ++i)
|
||||
c = AddTwoWords(ss1[i], ss2[i], c, &result[i]);
|
||||
|
||||
|
@ -1025,7 +1061,7 @@ namespace ttmath
|
|||
uint i, c = 0;
|
||||
|
||||
TTMATH_ASSERT( ss1_size >= ss2_size )
|
||||
|
||||
|
||||
for(i=0 ; i<ss2_size ; ++i)
|
||||
c = SubTwoWords(ss1[i], ss2[i], c, &result[i]);
|
||||
|
||||
|
|
|
@ -19,7 +19,7 @@ PUBLIC div_x64
|
|||
;
|
||||
|
||||
.CODE
|
||||
|
||||
|
||||
ALIGN 8
|
||||
|
||||
;----------------------------------------
|
||||
|
@ -33,20 +33,20 @@ adc_x64 PROC
|
|||
xor rax, rax
|
||||
xor r11, r11
|
||||
sub rax, r9 ; sets CARRY if r9 != 0
|
||||
|
||||
|
||||
ALIGN 16
|
||||
loop1:
|
||||
loop1:
|
||||
mov rax,qword ptr [rdx + r11 * 8]
|
||||
adc qword ptr [rcx + r11 * 8], rax
|
||||
lea r11, [r11+1]
|
||||
dec r8
|
||||
jnz loop1
|
||||
|
||||
|
||||
setc al
|
||||
movzx rax, al
|
||||
|
||||
ret
|
||||
|
||||
|
||||
adc_x64 ENDP
|
||||
|
||||
;----------------------------------------
|
||||
|
@ -80,14 +80,14 @@ loop1:
|
|||
lea r8, [r8+1]
|
||||
add qword ptr [rcx + r8 * 8], r9
|
||||
jc loop1
|
||||
|
||||
|
||||
ret
|
||||
|
||||
|
||||
done_with_cy:
|
||||
lea rax, [rax+1] ; rax = 1
|
||||
|
||||
|
||||
ret
|
||||
|
||||
|
||||
addindexed_x64 ENDP
|
||||
|
||||
;----------------------------------------
|
||||
|
@ -98,8 +98,8 @@ addindexed_x64 ENDP
|
|||
|
||||
addindexed2_x64 PROC
|
||||
|
||||
; rcx = p1
|
||||
; rdx = b
|
||||
; rcx = p1 (pointer)
|
||||
; rdx = b (value size)
|
||||
; r8 = nPos
|
||||
; r9 = nValue1
|
||||
; [esp+0x28] = nValue2
|
||||
|
@ -109,26 +109,23 @@ addindexed2_x64 PROC
|
|||
sub rdx, r8 ; rdx = remaining count of uints
|
||||
mov r10, [esp+028h] ; r10 = nValue2
|
||||
|
||||
add qword ptr [r11 + r8 * 8], r10
|
||||
add qword ptr [r11 + r8 * 8], r9
|
||||
lea r8, [r8+1]
|
||||
lea rdx, [rdx-1]
|
||||
adc qword ptr [r11 + r8 * 8], r10
|
||||
jc next
|
||||
ret
|
||||
|
||||
ALIGN 16
|
||||
loop1:
|
||||
adc qword ptr [r11 + r8 * 8], r9
|
||||
lea r8, [r8+1]
|
||||
add qword ptr [r11 + r8 * 8], 1
|
||||
jc next
|
||||
ret
|
||||
|
||||
next:
|
||||
lea r8, [r8+1]
|
||||
xor r9, r9 ; set to 0 -> cy still set!
|
||||
dec rdx
|
||||
jnz loop1
|
||||
jc return_1 ; most of the times, there will be NO carry (I hope)
|
||||
|
||||
done:
|
||||
ret
|
||||
|
||||
return_1:
|
||||
next:
|
||||
dec rdx ; does not modify CY too...
|
||||
jnz loop1
|
||||
lea rax, [rax+1]
|
||||
ret
|
||||
|
||||
|
@ -138,8 +135,6 @@ addindexed2_x64 ENDP
|
|||
|
||||
ALIGN 8
|
||||
|
||||
ALIGN 8
|
||||
|
||||
;----------------------------------------
|
||||
|
||||
sbb_x64 PROC
|
||||
|
@ -152,15 +147,15 @@ sbb_x64 PROC
|
|||
xor rax, rax
|
||||
xor r11, r11
|
||||
sub rax, r9 ; sets CARRY if r9 != 0
|
||||
|
||||
|
||||
ALIGN 16
|
||||
loop1:
|
||||
loop1:
|
||||
mov rax,qword ptr [rdx + r11 * 8]
|
||||
sbb qword ptr [rcx + r11 * 8], rax
|
||||
lea r11, [r11+1]
|
||||
dec r8
|
||||
jnz loop1
|
||||
|
||||
|
||||
setc al
|
||||
movzx rax, al
|
||||
|
||||
|
@ -181,12 +176,12 @@ subindexed_x64 PROC
|
|||
; r9 = nValue
|
||||
|
||||
sub rdx, r8 ; rdx = remaining count of uints
|
||||
|
||||
|
||||
ALIGN 16
|
||||
loop1:
|
||||
sub qword ptr [rcx + r8 * 8], r9
|
||||
jnc done
|
||||
|
||||
|
||||
lea r8, [r8+1]
|
||||
mov r9, 1
|
||||
dec rdx
|
||||
|
@ -196,7 +191,7 @@ loop1:
|
|||
done:
|
||||
xor rax, rax
|
||||
ret
|
||||
|
||||
|
||||
return_1:
|
||||
mov rax, 1
|
||||
ret
|
||||
|
@ -217,17 +212,17 @@ rcl_x64 PROC
|
|||
mov r11, rcx ; table
|
||||
xor r10, r10
|
||||
neg r8 ; CY set if r8 <> 0
|
||||
|
||||
|
||||
ALIGN 16
|
||||
loop1:
|
||||
rcl qword ptr [r11 + r10 * 8], 1
|
||||
lea r10, [r10+1]
|
||||
dec rdx
|
||||
jnz loop1
|
||||
|
||||
|
||||
setc al
|
||||
movzx rax, al
|
||||
|
||||
|
||||
ret
|
||||
|
||||
rcl_x64 ENDP
|
||||
|
@ -245,16 +240,16 @@ rcr_x64 PROC
|
|||
|
||||
xor r10, r10
|
||||
neg r8 ; CY set if r8 <> 0
|
||||
|
||||
|
||||
ALIGN 16
|
||||
loop1:
|
||||
rcr qword ptr -8[rcx + rdx * 8], 1
|
||||
dec rdx
|
||||
jnz loop1
|
||||
|
||||
|
||||
setc al
|
||||
movzx rax, al
|
||||
|
||||
|
||||
ret
|
||||
|
||||
rcr_x64 ENDP
|
||||
|
@ -270,7 +265,7 @@ div_x64 PROC
|
|||
; rcx = &Hi
|
||||
; rdx = &Lo
|
||||
; r8 = nDiv
|
||||
|
||||
|
||||
mov r11, rcx
|
||||
mov r10, rdx
|
||||
|
||||
|
@ -295,21 +290,21 @@ rcl2_x64 PROC
|
|||
; rdx = nSize
|
||||
; r8 = bits
|
||||
; r9 = c
|
||||
|
||||
|
||||
push rbx
|
||||
|
||||
|
||||
mov r10, rcx ; r10 = p1
|
||||
xor rax, rax
|
||||
|
||||
xor rax, rax
|
||||
|
||||
mov rcx, 64
|
||||
sub rcx, r8
|
||||
|
||||
|
||||
mov r11, -1
|
||||
shr r11, cl ; r11 = mask
|
||||
|
||||
mov rcx, r8 ; rcx = count of bits
|
||||
|
||||
mov rbx, rax ; rbx = old value = 0
|
||||
mov rbx, rax ; rbx = old value = 0
|
||||
or r9, r9
|
||||
cmovnz rbx, r11 ; if (c) then old value = mask
|
||||
|
||||
|
@ -323,7 +318,7 @@ loop1:
|
|||
xor qword ptr [r10+r9*8], rax
|
||||
or qword ptr [r10+r9*8], rbx
|
||||
mov rbx, rax
|
||||
|
||||
|
||||
lea r9, [r9+1]
|
||||
dec rdx
|
||||
|
||||
|
@ -332,8 +327,8 @@ loop1:
|
|||
and rax, 1
|
||||
pop rbx
|
||||
ret
|
||||
|
||||
rcl2_x64 ENDP
|
||||
|
||||
rcl2_x64 ENDP
|
||||
|
||||
;----------------------------------------
|
||||
|
||||
|
@ -346,20 +341,20 @@ rcr2_x64 PROC
|
|||
; rdx = nSize
|
||||
; r8 = bits
|
||||
; r9 = c
|
||||
|
||||
|
||||
push rbx
|
||||
mov r10, rcx ; r10 = p1
|
||||
xor rax, rax
|
||||
|
||||
xor rax, rax
|
||||
|
||||
mov rcx, 64
|
||||
sub rcx, r8
|
||||
|
||||
|
||||
mov r11, -1
|
||||
shl r11, cl ; r11 = mask
|
||||
|
||||
mov rcx, r8 ; rcx = count of bits
|
||||
|
||||
mov rbx, rax ; rbx = old value = 0
|
||||
mov rbx, rax ; rbx = old value = 0
|
||||
or r9, r9
|
||||
cmovnz rbx, r11 ; if (c) then old value = mask
|
||||
|
||||
|
@ -374,18 +369,18 @@ loop1:
|
|||
xor qword ptr [r10+r9*8], rax
|
||||
or qword ptr [r10+r9*8], rbx
|
||||
mov rbx, rax
|
||||
|
||||
|
||||
lea r9, [r9-1]
|
||||
dec rdx
|
||||
|
||||
jnz loop1
|
||||
|
||||
|
||||
rol rax, 1
|
||||
and rax, 1
|
||||
pop rbx
|
||||
|
||||
|
||||
ret
|
||||
|
||||
rcr2_x64 ENDP
|
||||
|
||||
rcr2_x64 ENDP
|
||||
|
||||
END
|
||||
|
|
Loading…
Reference in New Issue