Add x86 32/64bit asm code for Scc_hash
This commit is contained in:
parent
9d2e1a9384
commit
b35f5797de
@ -252,6 +252,10 @@ void SumOf16x16BlockOfFrame_c (uint8_t* pRefPicture, const int32_t kiWidth, cons
|
||||
#ifdef X86_ASM
|
||||
extern "C"
|
||||
{
|
||||
void InitializeHashforFeature_sse2 (uint32_t* pTimesOfFeatureValue, uint16_t* pBuf, const int32_t kiListSize,
|
||||
uint16_t** pLocationOfFeature, uint16_t** pFeatureValuePointerList);
|
||||
void FillQpelLocationByFeatureValue_sse2 (uint16_t* pFeatureOfBlock, const int32_t kiWidth, const int32_t kiHeight,
|
||||
uint16_t** pFeatureValuePointerList);
|
||||
int32_t SumOf8x8SingleBlock_sse2 (uint8_t* pRef, const int32_t kiRefStride);
|
||||
int32_t SumOf16x16SingleBlock_sse2 (uint8_t* pRef, const int32_t kiRefStride);
|
||||
void SumOf8x8BlockOfFrame_sse2 (uint8_t* pRefPicture, const int32_t kiWidth, const int32_t kiHeight,
|
||||
|
@ -107,6 +107,8 @@ void WelsInitMeFunc (SWelsFuncPtrList* pFuncList, uint32_t uiCpuFlag, bool bScre
|
||||
#if defined (X86_ASM)
|
||||
if (uiCpuFlag & WELS_CPU_SSE2) {
|
||||
//for feature search
|
||||
pFuncList->pfInitializeHashforFeature = InitializeHashforFeature_sse2;
|
||||
pFuncList->pfFillQpelLocationByFeatureValue = FillQpelLocationByFeatureValue_sse2;
|
||||
pFuncList->pfCalculateBlockFeatureOfFrame[0] = SumOf8x8BlockOfFrame_sse2;
|
||||
pFuncList->pfCalculateBlockFeatureOfFrame[1] = SumOf16x16BlockOfFrame_sse2;
|
||||
//TODO: it is possible to differentiate width that is times of 8, so as to accelerate the speed when width is times of 8?
|
||||
|
@ -31,6 +31,16 @@
|
||||
;*************************************************************************/
|
||||
%include "asm_inc.asm"
|
||||
|
||||
;***********************************************************************
|
||||
; Local Data (Read Only)
|
||||
;***********************************************************************
|
||||
SECTION .rodata align=16
|
||||
|
||||
ALIGN 16
|
||||
mv_x_inc_x4 dw 0x10, 0x10, 0x10, 0x10
|
||||
mv_y_inc_x4 dw 0x04, 0x04, 0x04, 0x04
|
||||
mx_x_offset_x4 dw 0x00, 0x04, 0x08, 0x0C
|
||||
|
||||
SECTION .text
|
||||
%ifdef X86_32
|
||||
;**********************************************************************************************************************
|
||||
@ -661,6 +671,159 @@ WIDTH_LOOP_X16_SSE4:
|
||||
%undef tmp_width
|
||||
ret
|
||||
|
||||
|
||||
;-----------------------------------------------------------------------------------------------------------------------------
|
||||
; void FillQpelLocationByFeatureValue_sse2(uint16_t* pFeatureOfBlock, const int32_t kiWidth, const int32_t kiHeight, uint16_t** pFeatureValuePointerList)
|
||||
;-----------------------------------------------------------------------------------------------------------------------------
|
||||
WELS_EXTERN FillQpelLocationByFeatureValue_sse2
|
||||
push esi
|
||||
push edi
|
||||
push ebx
|
||||
push ebp
|
||||
|
||||
%define _ps 16 ; push size
|
||||
%define _ls 4 ; local size
|
||||
%define sum_ref esp+_ps+_ls+4
|
||||
%define pos_list esp+_ps+_ls+16
|
||||
%define width esp+_ps+_ls+8
|
||||
%define height esp+_ps+_ls+12
|
||||
%define i_height esp
|
||||
sub esp, _ls
|
||||
|
||||
mov esi, [sum_ref]
|
||||
mov edi, [pos_list]
|
||||
mov ebp, [width]
|
||||
mov ebx, [height]
|
||||
mov [i_height], ebx
|
||||
|
||||
movq xmm7, [mv_x_inc_x4] ; x_qpel inc
|
||||
movq xmm6, [mv_y_inc_x4] ; y_qpel inc
|
||||
movq xmm5, [mx_x_offset_x4] ; x_qpel vector
|
||||
pxor xmm4, xmm4
|
||||
pxor xmm3, xmm3 ; y_qpel vector
|
||||
HASH_HEIGHT_LOOP_SSE2:
|
||||
movdqa xmm2, xmm5 ; x_qpel vector
|
||||
mov ecx, ebp
|
||||
HASH_WIDTH_LOOP_SSE2:
|
||||
movq xmm0, [esi] ; load x8 sum
|
||||
punpcklwd xmm0, xmm4
|
||||
movdqa xmm1, xmm2
|
||||
punpcklwd xmm1, xmm3
|
||||
%rep 3
|
||||
movd edx, xmm0
|
||||
lea ebx, [edi+edx*4]
|
||||
mov eax, [ebx]
|
||||
movd [eax], xmm1
|
||||
mov edx, [eax+4] ; explictly load eax+4 due cache miss from vtune observation
|
||||
lea eax, [eax+4]
|
||||
mov [ebx], eax
|
||||
psrldq xmm1, 4
|
||||
psrldq xmm0, 4
|
||||
%endrep
|
||||
movd edx, xmm0
|
||||
lea ebx, [edi+edx*4]
|
||||
mov eax, [ebx]
|
||||
movd [eax], xmm1
|
||||
mov edx, [eax+4] ; explictly load eax+4 due cache miss from vtune observation
|
||||
lea eax, [eax+4]
|
||||
mov [ebx], eax
|
||||
|
||||
paddw xmm2, xmm7
|
||||
lea esi, [esi+8]
|
||||
sub ecx, 4
|
||||
jnz near HASH_WIDTH_LOOP_SSE2
|
||||
paddw xmm3, xmm6
|
||||
dec dword [i_height]
|
||||
jnz near HASH_HEIGHT_LOOP_SSE2
|
||||
|
||||
add esp, _ls
|
||||
%undef _ps
|
||||
%undef _ls
|
||||
%undef sum_ref
|
||||
%undef pos_list
|
||||
%undef width
|
||||
%undef height
|
||||
%undef i_height
|
||||
pop ebp
|
||||
pop ebx
|
||||
pop edi
|
||||
pop esi
|
||||
ret
|
||||
|
||||
;---------------------------------------------------------------------------------------------------------------------------------------------------
|
||||
; void InitializeHashforFeature_sse2( uint32_t* pTimesOfFeatureValue, uint16_t* pBuf, const int32_t kiListSize,
|
||||
; uint16_t** pLocationOfFeature, uint16_t** pFeatureValuePointerList )
|
||||
;---------------------------------------------------------------------------------------------------------------------------------------------------
|
||||
WELS_EXTERN InitializeHashforFeature_sse2
|
||||
push ebx
|
||||
push esi
|
||||
push edi
|
||||
push ebp
|
||||
%define _ps 16 ; push size
|
||||
mov edi, [esp+_ps+16] ; pPositionOfSum
|
||||
mov ebp, [esp+_ps+20] ; sum_idx_list
|
||||
mov esi, [esp+_ps+4] ; pTimesOfSum
|
||||
mov ebx, [esp+_ps+8] ; pBuf
|
||||
mov edx, [esp+_ps+12] ; list_sz
|
||||
sar edx, 2
|
||||
mov ecx, 0
|
||||
pxor xmm7, xmm7
|
||||
hash_assign_loop_x4_sse2:
|
||||
movdqa xmm0, [esi+ecx]
|
||||
pslld xmm0, 2
|
||||
|
||||
movdqa xmm1, xmm0
|
||||
pcmpeqd xmm1, xmm7
|
||||
movmskps eax, xmm1
|
||||
cmp eax, 0x0f
|
||||
je near hash_assign_with_copy_sse2
|
||||
|
||||
%assign x 0
|
||||
%rep 4
|
||||
lea eax, [edi+ecx+x]
|
||||
mov [eax], ebx
|
||||
lea eax, [ebp+ecx+x]
|
||||
mov [eax], ebx
|
||||
movd eax, xmm0
|
||||
add ebx, eax
|
||||
psrldq xmm0, 4
|
||||
%assign x x+4
|
||||
%endrep
|
||||
jmp near assign_next_sse2
|
||||
|
||||
hash_assign_with_copy_sse2:
|
||||
movd xmm1, ebx
|
||||
pshufd xmm2, xmm1, 0
|
||||
movdqa [edi+ecx], xmm2
|
||||
movdqa [ebp+ecx], xmm2
|
||||
|
||||
assign_next_sse2:
|
||||
add ecx, 16
|
||||
dec edx
|
||||
jnz near hash_assign_loop_x4_sse2
|
||||
|
||||
mov edx, [esp+_ps+12] ; list_sz
|
||||
and edx, 3
|
||||
jz near hash_assign_no_rem_sse2
|
||||
hash_assign_loop_x4_rem_sse2:
|
||||
lea eax, [edi+ecx]
|
||||
mov [eax], ebx
|
||||
lea eax, [ebp+ecx]
|
||||
mov [eax], ebx
|
||||
mov eax, [esi+ecx]
|
||||
sal eax, 2
|
||||
add ebx, eax
|
||||
add ecx, 4
|
||||
dec edx
|
||||
jnz near hash_assign_loop_x4_rem_sse2
|
||||
|
||||
hash_assign_no_rem_sse2:
|
||||
%undef _ps
|
||||
pop ebp
|
||||
pop edi
|
||||
pop esi
|
||||
pop ebx
|
||||
ret
|
||||
%else
|
||||
|
||||
;**********************************************************************************************************************
|
||||
@ -1222,6 +1385,146 @@ WIDTH_LOOP_X16_SSE4:
|
||||
LOAD_6_PARA_POP
|
||||
ret
|
||||
|
||||
;-----------------------------------------------------------------------------------------------------------------------------
|
||||
; void FillQpelLocationByFeatureValue_sse2(uint16_t* pFeatureOfBlock, const int32_t kiWidth, const int32_t kiHeight, uint16_t** pFeatureValuePointerList)
|
||||
;-----------------------------------------------------------------------------------------------------------------------------
|
||||
WELS_EXTERN FillQpelLocationByFeatureValue_sse2
|
||||
%assign push_num 0
|
||||
LOAD_4_PARA
|
||||
PUSH_XMM 8
|
||||
SIGN_EXTENSION r1, r1d
|
||||
SIGN_EXTENSION r2, r2d
|
||||
push r12
|
||||
push r13
|
||||
|
||||
;mov esi, [sum_ref] r0:esi
|
||||
;mov edi, [pos_list] r3:edi
|
||||
;mov ebp, [width] r1:ebp
|
||||
;mov ebx, [height] r2:ebx
|
||||
;mov [i_height], ebx
|
||||
mov r12, r2
|
||||
|
||||
movq xmm7, [mv_x_inc_x4] ; x_qpel inc
|
||||
movq xmm6, [mv_y_inc_x4] ; y_qpel inc
|
||||
movq xmm5, [mx_x_offset_x4] ; x_qpel vector
|
||||
pxor xmm4, xmm4
|
||||
pxor xmm3, xmm3 ; y_qpel vector
|
||||
HASH_HEIGHT_LOOP_SSE2:
|
||||
movdqa xmm2, xmm5 ; x_qpel vector
|
||||
mov r4, r1
|
||||
HASH_WIDTH_LOOP_SSE2:
|
||||
movq xmm0, [r0] ; load x8 sum
|
||||
punpcklwd xmm0, xmm4
|
||||
movdqa xmm1, xmm2
|
||||
punpcklwd xmm1, xmm3
|
||||
%rep 3
|
||||
movd r2d, xmm0 ;edx:r3
|
||||
lea r5, [r3+r2*8] ;ebx:r5
|
||||
mov r6, [r5] ;eax:r6
|
||||
movd [r6], xmm1
|
||||
mov r13, [r6+4] ; explictly load eax+4 due cache miss from vtune observation
|
||||
lea r6, [r6+4]
|
||||
mov [r5], r6
|
||||
psrldq xmm1, 4
|
||||
psrldq xmm0, 4
|
||||
%endrep
|
||||
movd r2d, xmm0
|
||||
lea r5, [r3+r2*8] ;ebx:r5
|
||||
mov r6, [r5] ;eax:r6
|
||||
movd [r6], xmm1
|
||||
mov r13, [r6+4] ; explictly load eax+4 due cache miss from vtune observation
|
||||
lea r6, [r6+4]
|
||||
mov [r5], r6
|
||||
|
||||
paddw xmm2, xmm7
|
||||
lea r0, [r0+8]
|
||||
sub r4, 4
|
||||
jnz near HASH_WIDTH_LOOP_SSE2
|
||||
paddw xmm3, xmm6
|
||||
dec r12
|
||||
jnz near HASH_HEIGHT_LOOP_SSE2
|
||||
|
||||
pop r13
|
||||
pop r12
|
||||
POP_XMM
|
||||
ret
|
||||
|
||||
;---------------------------------------------------------------------------------------------------------------------------------------------------
|
||||
; void InitializeHashforFeature_sse2( uint32_t* pTimesOfFeatureValue, uint16_t* pBuf, const int32_t kiListSize,
|
||||
; uint16_t** pLocationOfFeature, uint16_t** pFeatureValuePointerList);
|
||||
;uint16_t** pPositionOfSum, uint16_t** sum_idx_list, uint32_t* pTimesOfSum, uint16_t* pBuf, const int32_t list_sz )
|
||||
;---------------------------------------------------------------------------------------------------------------------------------------------------
|
||||
WELS_EXTERN InitializeHashforFeature_sse2
|
||||
%assign push_num 0
|
||||
LOAD_5_PARA
|
||||
SIGN_EXTENSION r2, r2d
|
||||
push r12
|
||||
push r13
|
||||
;mov edi, [esp+_ps+4] ; pPositionOfSum r3:edi
|
||||
;mov ebp, [esp+_ps+8] ; sum_idx_list r4:ebp
|
||||
;mov esi, [esp+_ps+12] ; pTimesOfSum r0:esi
|
||||
;mov ebx, [esp+_ps+16] ; pBuf r1:ebx
|
||||
;mov edx, [esp+_ps+20] ; list_sz r2:edx
|
||||
mov r12, r2
|
||||
sar r2, 2
|
||||
mov r5, 0 ;r5:ecx
|
||||
xor r6, r6
|
||||
pxor xmm3, xmm3
|
||||
hash_assign_loop_x4_sse2:
|
||||
movdqa xmm0, [r0+r5]
|
||||
pslld xmm0, 2
|
||||
|
||||
movdqa xmm1, xmm0
|
||||
pcmpeqd xmm1, xmm3
|
||||
movmskps r6, xmm1
|
||||
cmp r6, 0x0f
|
||||
jz near hash_assign_with_copy_sse2
|
||||
|
||||
%assign x 0
|
||||
%rep 4
|
||||
lea r13, [r3+r5*2+x]
|
||||
mov [r13], r1
|
||||
lea r13, [r4+r5*2+x]
|
||||
mov [r13], r1
|
||||
movd r6d, xmm0
|
||||
add r1, r6
|
||||
psrldq xmm0, 4
|
||||
%assign x x+8
|
||||
%endrep
|
||||
jmp near assign_next_sse2
|
||||
|
||||
hash_assign_with_copy_sse2:
|
||||
movq xmm1, r1
|
||||
pshufd xmm2, xmm1, 01000100b
|
||||
movdqa [r3+r5*2], xmm2
|
||||
movdqa [r4+r5*2], xmm2
|
||||
movdqa [r3+r5*2+16], xmm2
|
||||
movdqa [r4+r5*2+16], xmm2
|
||||
|
||||
assign_next_sse2:
|
||||
add r5, 16
|
||||
dec r2
|
||||
jnz near hash_assign_loop_x4_sse2
|
||||
|
||||
and r12, 3
|
||||
jz near hash_assign_no_rem_sse2
|
||||
hash_assign_loop_x4_rem_sse2:
|
||||
lea r13, [r3+r5*2]
|
||||
mov [r13], r1
|
||||
lea r13, [r4+r5*2]
|
||||
mov [r13], r1
|
||||
mov r6d, [r0+r5]
|
||||
sal r6, 2
|
||||
add r1, r6
|
||||
add r5, 4
|
||||
dec r12
|
||||
jnz near hash_assign_loop_x4_rem_sse2
|
||||
|
||||
hash_assign_no_rem_sse2:
|
||||
pop r13
|
||||
pop r12
|
||||
ret
|
||||
|
||||
%endif
|
||||
|
||||
;**********************************************************************************************************************************
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include "cpu_core.h"
|
||||
#include "cpu.h"
|
||||
#include "macros.h"
|
||||
#include "ls_defines.h"
|
||||
#include "svc_motion_estimate.h"
|
||||
|
||||
using namespace WelsEnc;
|
||||
@ -77,6 +78,33 @@ void SumOf16x16BlockOfFrame_ref (uint8_t* pRefPicture, const int32_t kiWidth, co
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void InitializeHashforFeature_ref (uint32_t* pTimesOfFeatureValue, uint16_t* pBuf, const int32_t kiListSize,
|
||||
uint16_t** pLocationOfFeature, uint16_t** pFeatureValuePointerList) {
|
||||
//assign location pointer
|
||||
uint16_t* pBufPos = pBuf;
|
||||
for (int32_t i = 0 ; i < kiListSize; ++i) {
|
||||
pLocationOfFeature[i] =
|
||||
pFeatureValuePointerList[i] = pBufPos;
|
||||
pBufPos += (pTimesOfFeatureValue[i] << 1);
|
||||
}
|
||||
}
|
||||
void FillQpelLocationByFeatureValue_ref (uint16_t* pFeatureOfBlock, const int32_t kiWidth, const int32_t kiHeight,
|
||||
uint16_t** pFeatureValuePointerList) {
|
||||
//assign each pixel's position
|
||||
uint16_t* pSrcPointer = pFeatureOfBlock;
|
||||
int32_t iQpelY = 0;
|
||||
for (int32_t y = 0; y < kiHeight; y++) {
|
||||
for (int32_t x = 0; x < kiWidth; x++) {
|
||||
uint16_t uiFeature = pSrcPointer[x];
|
||||
ST32 (&pFeatureValuePointerList[uiFeature][0], ((iQpelY << 16) | (x << 2)));
|
||||
pFeatureValuePointerList[uiFeature] += 2;
|
||||
}
|
||||
iQpelY += 4;
|
||||
pSrcPointer += kiWidth;
|
||||
}
|
||||
}
|
||||
|
||||
#define GENERATE_SumOfSingleBlock(anchor, method) \
|
||||
TEST (SVC_ME_FunTest, method) {\
|
||||
ENFORCE_STACK_ALIGN_1D (uint8_t, uiRefBuf, 16*320, 16);\
|
||||
@ -136,6 +164,89 @@ delete[] pFeatureOfBlockBuff1; \
|
||||
delete[] pFeatureOfBlockBuff2; \
|
||||
}
|
||||
|
||||
#define GENERATE_InitializeHashforFeature(anchor, method, kiWidth, kiHeight) \
|
||||
TEST (SVC_ME_FunTest, method##_##kiWidth##x##kiHeight) {\
|
||||
ENFORCE_NEW_ALIGN_1D (uint8_t, pRefPicture, pRefPictureBuff, ((kiHeight+16)*((((kiWidth+15)>>4)<<4)+16)), 16) \
|
||||
ENFORCE_NEW_ALIGN_1D (uint16_t, pFeatureOfBlock, pFeatureOfBlockBuff, (kiWidth*kiHeight), 16) \
|
||||
ENFORCE_NEW_ALIGN_1D (uint16_t, pLocation1, pLocationBuff1, (kiWidth*kiHeight)*2, 16) \
|
||||
ENFORCE_NEW_ALIGN_1D (uint32_t, pTimesOfFeatureValue, pTimesOfFeatureValueBuff, 65536, 16) \
|
||||
ENFORCE_NEW_ALIGN_1D (uint16_t*, pLocationFeature0, pLocationFeature0Buff, 65536, 16) \
|
||||
ENFORCE_NEW_ALIGN_1D (uint16_t*, pLocationFeature1, pLocationFeature1Buff, 65536, 16) \
|
||||
ENFORCE_NEW_ALIGN_1D (uint16_t*, pFeaturePointValueList0, pFeaturePointValueList0Buff, 65536, 16) \
|
||||
ENFORCE_NEW_ALIGN_1D (uint16_t*, pFeaturePointValueList1, pFeaturePointValueList1Buff, 65536, 16) \
|
||||
for (int32_t k = 0; k < SVC_ME_TEST_NUM; k++) { \
|
||||
FillWithRandomData (pRefPicture,(kiHeight+16)*((((kiWidth+15)>>4)<<4)+16)); \
|
||||
memset(pTimesOfFeatureValue, 0, 65536*sizeof(uint32_t)); \
|
||||
memset(pLocationFeature0, 0, 65536*sizeof(uint16_t*)); \
|
||||
memset(pFeaturePointValueList0, 0, 65536*sizeof(uint16_t*)); \
|
||||
memset(pLocationFeature1, 0, 65536*sizeof(uint16_t*)); \
|
||||
memset(pFeaturePointValueList1, 0, 65536*sizeof(uint16_t*)); \
|
||||
SumOf8x8BlockOfFrame_c (pRefPicture,kiWidth,kiHeight,((((kiWidth+15)>>4)<<4)+16),pFeatureOfBlock,pTimesOfFeatureValue); \
|
||||
int32_t iActSize = 65536;\
|
||||
anchor ( pTimesOfFeatureValue, pLocation1, iActSize, pLocationFeature0, pFeaturePointValueList0);\
|
||||
method ( pTimesOfFeatureValue, pLocation1, iActSize, pLocationFeature1, pFeaturePointValueList1); \
|
||||
for(int32_t j =0; j<65536; j++) { \
|
||||
EXPECT_EQ (pLocationFeature0[j], pLocationFeature1[j]); \
|
||||
EXPECT_EQ (pFeaturePointValueList0[j], pFeaturePointValueList1[j]); \
|
||||
} \
|
||||
} \
|
||||
delete[] pRefPictureBuff; \
|
||||
delete[] pFeatureOfBlockBuff; \
|
||||
delete[] pLocationBuff1; \
|
||||
delete[] pTimesOfFeatureValueBuff; \
|
||||
delete[] pLocationFeature0Buff; \
|
||||
delete[] pFeaturePointValueList0Buff; \
|
||||
delete[] pLocationFeature1Buff; \
|
||||
delete[] pFeaturePointValueList1Buff; \
|
||||
}
|
||||
|
||||
|
||||
#define GENERATE_FillQpelLocationByFeatureValue(anchor, method, kiWidth, kiHeight) \
|
||||
TEST (SVC_ME_FunTest, method##_##kiWidth##x##kiHeight) {\
|
||||
ENFORCE_NEW_ALIGN_1D (uint8_t, pRefPicture, pRefPictureBuff, ((kiHeight+16)*((((kiWidth+15)>>4)<<4)+16)), 16) \
|
||||
ENFORCE_NEW_ALIGN_1D (uint16_t, pFeatureOfBlock, pFeatureOfBlockBuff, (kiWidth*kiHeight), 16) \
|
||||
ENFORCE_NEW_ALIGN_1D (uint16_t, pLocation1, pLocationBuff1, (kiWidth*kiHeight)*2, 16) \
|
||||
ENFORCE_NEW_ALIGN_1D (uint16_t, pLocation2, pLocationBuff2, (kiWidth*kiHeight)*2, 16) \
|
||||
ENFORCE_NEW_ALIGN_1D (uint32_t, pTimesOfFeatureValue, pTimesOfFeatureValueBuff, 65536, 16) \
|
||||
ENFORCE_NEW_ALIGN_1D (uint16_t*, pLocationFeature0, pLocationFeature0Buff, 65536, 16) \
|
||||
ENFORCE_NEW_ALIGN_1D (uint16_t*, pLocationFeature1, pLocationFeature1Buff, 65536, 16) \
|
||||
ENFORCE_NEW_ALIGN_1D (uint16_t*, pFeaturePointValueList0, pFeaturePointValueList0Buff, 65536, 16) \
|
||||
ENFORCE_NEW_ALIGN_1D (uint16_t*, pFeaturePointValueList1, pFeaturePointValueList1Buff, 65536, 16) \
|
||||
for (int32_t k = 0; k < SVC_ME_TEST_NUM; k++) { \
|
||||
FillWithRandomData (pRefPicture,(kiHeight+16)*((((kiWidth+15)>>4)<<4)+16)); \
|
||||
memset(pTimesOfFeatureValue, 0, 65536*sizeof(uint32_t)); \
|
||||
memset(pLocationFeature0, 0, 65536*sizeof(uint16_t*)); \
|
||||
memset(pFeaturePointValueList0, 0, 65536*sizeof(uint16_t*)); \
|
||||
memset(pLocationFeature1, 0, 65536*sizeof(uint16_t*)); \
|
||||
memset(pFeaturePointValueList1, 0, 65536*sizeof(uint16_t*)); \
|
||||
SumOf8x8BlockOfFrame_c (pRefPicture,kiWidth,kiHeight,((((kiWidth+15)>>4)<<4)+16),pFeatureOfBlock,pTimesOfFeatureValue); \
|
||||
int32_t iActSize = 65536; \
|
||||
InitializeHashforFeature_c ( pTimesOfFeatureValue, pLocation1, iActSize, pLocationFeature0, pFeaturePointValueList0); \
|
||||
InitializeHashforFeature_c( pTimesOfFeatureValue, pLocation2, iActSize, pLocationFeature1, pFeaturePointValueList1); \
|
||||
anchor(pFeatureOfBlock, kiWidth, kiHeight, pFeaturePointValueList0); \
|
||||
method(pFeatureOfBlock, kiWidth, kiHeight, pFeaturePointValueList1); \
|
||||
for(int32_t j =0; j<kiWidth*kiHeight*2; j++) { \
|
||||
EXPECT_EQ (pLocation1[j], pLocation2[j]); \
|
||||
} \
|
||||
} \
|
||||
delete[] pRefPictureBuff; \
|
||||
delete[] pFeatureOfBlockBuff; \
|
||||
delete[] pLocationBuff1; \
|
||||
delete[] pLocationBuff2; \
|
||||
delete[] pTimesOfFeatureValueBuff; \
|
||||
delete[] pLocationFeature0Buff; \
|
||||
delete[] pFeaturePointValueList0Buff; \
|
||||
delete[] pLocationFeature1Buff; \
|
||||
delete[] pFeaturePointValueList1Buff; \
|
||||
}
|
||||
|
||||
GENERATE_InitializeHashforFeature (InitializeHashforFeature_ref, InitializeHashforFeature_c, 10, 10)
|
||||
GENERATE_FillQpelLocationByFeatureValue (FillQpelLocationByFeatureValue_ref, FillQpelLocationByFeatureValue_c, 16, 16)
|
||||
#ifdef X86_ASM
|
||||
GENERATE_InitializeHashforFeature (InitializeHashforFeature_ref, InitializeHashforFeature_sse2, 10, 10)
|
||||
GENERATE_FillQpelLocationByFeatureValue (FillQpelLocationByFeatureValue_ref, FillQpelLocationByFeatureValue_sse2, 16, 16)
|
||||
#endif
|
||||
|
||||
GENERATE_SumOfFrame (SumOf8x8BlockOfFrame_ref, SumOf8x8BlockOfFrame_c, 1, 1)
|
||||
GENERATE_SumOfFrame (SumOf16x16BlockOfFrame_ref, SumOf16x16BlockOfFrame_c, 1, 1)
|
||||
GENERATE_SumOfFrame (SumOf8x8BlockOfFrame_ref, SumOf8x8BlockOfFrame_c, 1, 320)
|
||||
|
Loading…
x
Reference in New Issue
Block a user