more optimizations for MSVC assembler (parallelism, prefetch optimization, loop alignment, ...)
git-svn-id: svn://ttmath.org/publicrep/ttmath/branches/chk@151 e52654a7-88a9-db11-a3e9-0013d4bc506e
This commit is contained in:
@@ -3434,7 +3434,7 @@ private:
|
|||||||
*/
|
*/
|
||||||
int FromString_ReadScientificIfExists(const tchar_t * & source)
|
int FromString_ReadScientificIfExists(const tchar_t * & source)
|
||||||
{
|
{
|
||||||
int c = 0;
|
uint c = 0;
|
||||||
|
|
||||||
bool scientific_read = false;
|
bool scientific_read = false;
|
||||||
const tchar_t * before_scientific = source;
|
const tchar_t * before_scientific = source;
|
||||||
|
@@ -37,6 +37,7 @@ namespace ttmath
|
|||||||
{
|
{
|
||||||
|
|
||||||
#if defined(_MSC_VER)
|
#if defined(_MSC_VER)
|
||||||
|
#include <windows.h>
|
||||||
#if defined(_UNICODE)
|
#if defined(_UNICODE)
|
||||||
typedef wchar_t tchar_t;
|
typedef wchar_t tchar_t;
|
||||||
typedef std::wstring tstr_t;
|
typedef std::wstring tstr_t;
|
||||||
@@ -71,20 +72,20 @@ namespace ttmath
|
|||||||
public:
|
public:
|
||||||
clsCrit(void)
|
clsCrit(void)
|
||||||
{
|
{
|
||||||
::InitializeCriticalSection(&_Crit);
|
InitializeCriticalSection(&_Crit);
|
||||||
}
|
}
|
||||||
virtual ~clsCrit(void)
|
virtual ~clsCrit(void)
|
||||||
{
|
{
|
||||||
::DeleteCriticalSection(&_Crit);
|
DeleteCriticalSection(&_Crit);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Enter(void) const
|
void Enter(void) const
|
||||||
{
|
{
|
||||||
::EnterCriticalSection(&_Crit);
|
EnterCriticalSection(&_Crit);
|
||||||
}
|
}
|
||||||
void Leave(void) const
|
void Leave(void) const
|
||||||
{
|
{
|
||||||
::LeaveCriticalSection(&_Crit);
|
LeaveCriticalSection(&_Crit);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
File diff suppressed because it is too large
Load Diff
@@ -31,23 +31,21 @@ adc_x64 PROC
|
|||||||
; r9 = nCarry
|
; r9 = nCarry
|
||||||
|
|
||||||
xor rax, rax
|
xor rax, rax
|
||||||
mov r11, 0
|
xor r11, r11
|
||||||
sub rax, r9 ; sets CARRY if r9 != 0
|
sub rax, r9 ; sets CARRY if r9 != 0
|
||||||
|
|
||||||
|
ALIGN 16
|
||||||
loop1:
|
loop1:
|
||||||
mov rax,qword ptr [rdx + r11 * 8]
|
mov rax,qword ptr [rdx + r11 * 8]
|
||||||
adc qword ptr [rcx + r11 * 8], rax
|
adc qword ptr [rcx + r11 * 8], rax
|
||||||
inc r11
|
lea r11, [r11+1]
|
||||||
dec r8
|
dec r8
|
||||||
|
|
||||||
jnz loop1
|
jnz loop1
|
||||||
|
|
||||||
jc return_1 ; most of the times, there will be NO carry (I hope)
|
setc al
|
||||||
xor rax, rax
|
movzx rax, al
|
||||||
ret
|
|
||||||
|
|
||||||
return_1:
|
|
||||||
mov rax, 1
|
|
||||||
ret
|
ret
|
||||||
|
|
||||||
adc_x64 ENDP
|
adc_x64 ENDP
|
||||||
@@ -66,24 +64,22 @@ addindexed_x64 PROC
|
|||||||
; r9 = nValue
|
; r9 = nValue
|
||||||
|
|
||||||
sub rdx, r8 ; rdx = remaining count of uints
|
sub rdx, r8 ; rdx = remaining count of uints
|
||||||
|
ALIGN 16
|
||||||
loop1:
|
loop1:
|
||||||
add qword ptr [rcx + r8 * 8], r9
|
add qword ptr [rcx + r8 * 8], r9
|
||||||
jnc done
|
jnc done
|
||||||
|
|
||||||
inc r8
|
lea r8, [r8+1]
|
||||||
mov r9, 1
|
mov r9, 1
|
||||||
dec rdx
|
dec rdx
|
||||||
jnz loop1
|
jnz loop1
|
||||||
|
|
||||||
done:
|
done:
|
||||||
jc return_1 ; most of the times, there will be NO carry (I hope)
|
setc al
|
||||||
xor rax, rax
|
movzx rax, al
|
||||||
|
|
||||||
ret
|
ret
|
||||||
|
|
||||||
return_1:
|
|
||||||
mov rax, 1
|
|
||||||
ret
|
|
||||||
|
|
||||||
addindexed_x64 ENDP
|
addindexed_x64 ENDP
|
||||||
|
|
||||||
;----------------------------------------
|
;----------------------------------------
|
||||||
@@ -100,28 +96,32 @@ addindexed2_x64 PROC
|
|||||||
; r9 = nValue1
|
; r9 = nValue1
|
||||||
; [esp+0x28] = nValue2
|
; [esp+0x28] = nValue2
|
||||||
|
|
||||||
|
xor rax, rax ; return value
|
||||||
mov r11, rcx ; table
|
mov r11, rcx ; table
|
||||||
sub rdx, r8 ; rdx = remaining count of uints
|
sub rdx, r8 ; rdx = remaining count of uints
|
||||||
mov r10, [esp+028h] ; r10 = nValue2
|
mov r10, [esp+028h] ; r10 = nValue2
|
||||||
|
|
||||||
add qword ptr [r11 + r8 * 8], r10
|
add qword ptr [r11 + r8 * 8], r10
|
||||||
inc r8
|
lea r8, [r8+1]
|
||||||
|
|
||||||
|
ALIGN 16
|
||||||
loop1:
|
loop1:
|
||||||
adc qword ptr [r11 + r8 * 8], r9
|
adc qword ptr [r11 + r8 * 8], r9
|
||||||
jnc done
|
jc next
|
||||||
|
ret
|
||||||
|
|
||||||
inc r8
|
next:
|
||||||
mov r9, 0 ; set to 0 -> cy still set!
|
lea r8, [r8+1]
|
||||||
|
xor r9, r9 ; set to 0 -> cy still set!
|
||||||
dec rdx
|
dec rdx
|
||||||
jnz loop1
|
jnz loop1
|
||||||
jc return_1 ; most of the times, there will be NO carry (I hope)
|
jc return_1 ; most of the times, there will be NO carry (I hope)
|
||||||
|
|
||||||
done:
|
done:
|
||||||
xor rax, rax
|
|
||||||
ret
|
ret
|
||||||
|
|
||||||
return_1:
|
return_1:
|
||||||
mov rax, 1
|
lea rax, [rax+1]
|
||||||
ret
|
ret
|
||||||
|
|
||||||
addindexed2_x64 ENDP
|
addindexed2_x64 ENDP
|
||||||
@@ -142,23 +142,20 @@ sbb_x64 PROC
|
|||||||
; r9 = nCarry
|
; r9 = nCarry
|
||||||
|
|
||||||
xor rax, rax
|
xor rax, rax
|
||||||
mov r11, 0
|
xor r11, r11
|
||||||
sub rax, r9 ; sets CARRY if r9 != 0
|
sub rax, r9 ; sets CARRY if r9 != 0
|
||||||
|
|
||||||
|
ALIGN 16
|
||||||
loop1:
|
loop1:
|
||||||
mov rax,qword ptr [rdx + r11 * 8]
|
mov rax,qword ptr [rdx + r11 * 8]
|
||||||
sbb qword ptr [rcx + r11 * 8], rax
|
sbb qword ptr [rcx + r11 * 8], rax
|
||||||
inc r11
|
lea r11, [r11+1]
|
||||||
dec r8
|
dec r8
|
||||||
|
|
||||||
jnz loop1
|
jnz loop1
|
||||||
|
|
||||||
jc return_1 ; most of the times, there will be NO carry (I hope)
|
setc al
|
||||||
xor rax, rax
|
movzx rax, al
|
||||||
ret
|
|
||||||
|
|
||||||
return_1:
|
|
||||||
mov rax, 1
|
|
||||||
ret
|
ret
|
||||||
|
|
||||||
sbb_x64 ENDP
|
sbb_x64 ENDP
|
||||||
@@ -176,11 +173,13 @@ subindexed_x64 PROC
|
|||||||
; r9 = nValue
|
; r9 = nValue
|
||||||
|
|
||||||
sub rdx, r8 ; rdx = remaining count of uints
|
sub rdx, r8 ; rdx = remaining count of uints
|
||||||
|
|
||||||
|
ALIGN 16
|
||||||
loop1:
|
loop1:
|
||||||
sub qword ptr [rcx + r8 * 8], r9
|
sub qword ptr [rcx + r8 * 8], r9
|
||||||
jnc done
|
jnc done
|
||||||
|
|
||||||
inc r8
|
lea r8, [r8+1]
|
||||||
mov r9, 1
|
mov r9, 1
|
||||||
dec rdx
|
dec rdx
|
||||||
jnz loop1
|
jnz loop1
|
||||||
@@ -210,9 +209,11 @@ rcl_x64 PROC
|
|||||||
mov r11, rcx ; table
|
mov r11, rcx ; table
|
||||||
xor r10, r10
|
xor r10, r10
|
||||||
neg r8 ; CY set if r8 <> 0
|
neg r8 ; CY set if r8 <> 0
|
||||||
|
|
||||||
|
ALIGN 16
|
||||||
loop1:
|
loop1:
|
||||||
rcl qword ptr [r11 + r10 * 8], 1
|
rcl qword ptr [r11 + r10 * 8], 1
|
||||||
inc r10
|
lea r10, [r10+1]
|
||||||
dec rdx
|
dec rdx
|
||||||
jnz loop1
|
jnz loop1
|
||||||
|
|
||||||
@@ -236,6 +237,8 @@ rcr_x64 PROC
|
|||||||
|
|
||||||
xor r10, r10
|
xor r10, r10
|
||||||
neg r8 ; CY set if r8 <> 0
|
neg r8 ; CY set if r8 <> 0
|
||||||
|
|
||||||
|
ALIGN 16
|
||||||
loop1:
|
loop1:
|
||||||
rcr qword ptr -8[rcx + rdx * 8], 1
|
rcr qword ptr -8[rcx + rdx * 8], 1
|
||||||
dec rdx
|
dec rdx
|
||||||
@@ -304,6 +307,7 @@ rcl2_x64 PROC
|
|||||||
|
|
||||||
mov r9, rax ; r9 = index (0..nSize-1)
|
mov r9, rax ; r9 = index (0..nSize-1)
|
||||||
|
|
||||||
|
ALIGN 16
|
||||||
loop1:
|
loop1:
|
||||||
rol qword ptr [r10+r9*8], cl
|
rol qword ptr [r10+r9*8], cl
|
||||||
mov rax, qword ptr [r10+r9*8]
|
mov rax, qword ptr [r10+r9*8]
|
||||||
@@ -312,7 +316,7 @@ loop1:
|
|||||||
or qword ptr [r10+r9*8], rbx
|
or qword ptr [r10+r9*8], rbx
|
||||||
mov rbx, rax
|
mov rbx, rax
|
||||||
|
|
||||||
inc r9
|
lea r9, [r9+1]
|
||||||
dec rdx
|
dec rdx
|
||||||
|
|
||||||
jnz loop1
|
jnz loop1
|
||||||
@@ -352,8 +356,9 @@ rcr2_x64 PROC
|
|||||||
cmovnz rbx, r11 ; if (c) then old value = mask
|
cmovnz rbx, r11 ; if (c) then old value = mask
|
||||||
|
|
||||||
mov r9, rdx ; r9 = index (0..nSize-1)
|
mov r9, rdx ; r9 = index (0..nSize-1)
|
||||||
dec r9
|
lea r9, [r9-1]
|
||||||
|
|
||||||
|
ALIGN 16
|
||||||
loop1:
|
loop1:
|
||||||
ror qword ptr [r10+r9*8], cl
|
ror qword ptr [r10+r9*8], cl
|
||||||
mov rax, qword ptr [r10+r9*8]
|
mov rax, qword ptr [r10+r9*8]
|
||||||
@@ -362,7 +367,7 @@ loop1:
|
|||||||
or qword ptr [r10+r9*8], rbx
|
or qword ptr [r10+r9*8], rbx
|
||||||
mov rbx, rax
|
mov rbx, rax
|
||||||
|
|
||||||
dec r9
|
lea r9, [r9-1]
|
||||||
dec rdx
|
dec rdx
|
||||||
|
|
||||||
jnz loop1
|
jnz loop1
|
||||||
|
Reference in New Issue
Block a user