Merge pull request #517 from mstorsjo/simplify-x86-asm-func-macro
Fold ALIGN 16 and the function label into WELS_EXTERN
This commit is contained in:
commit
e75cd2298b
@ -342,12 +342,14 @@ BITS 32
|
||||
%endmacro
|
||||
|
||||
%macro WELS_EXTERN 1
|
||||
ALIGN 16
|
||||
%ifdef PREFIX
|
||||
global _%1
|
||||
%define %1 _%1
|
||||
%else
|
||||
global %1
|
||||
%endif
|
||||
%1:
|
||||
%endmacro
|
||||
|
||||
%macro WELS_AbsW 2
|
||||
|
@ -55,12 +55,10 @@ SECTION .text
|
||||
; refer to "The IA-32 Intel(R) Architecture Software Developers Manual, Volume 2A A-M"
|
||||
; section CPUID - CPU Identification
|
||||
|
||||
WELS_EXTERN WelsCPUIdVerify
|
||||
ALIGN 16
|
||||
;******************************************************************************************
|
||||
; int32_t WelsCPUIdVerify()
|
||||
;******************************************************************************************
|
||||
WelsCPUIdVerify:
|
||||
WELS_EXTERN WelsCPUIdVerify
|
||||
push r1
|
||||
PUSHRFLAGS
|
||||
PUSHRFLAGS
|
||||
@ -73,14 +71,12 @@ WelsCPUIdVerify:
|
||||
pop r1
|
||||
ret
|
||||
|
||||
WELS_EXTERN WelsCPUId
|
||||
ALIGN 16
|
||||
;****************************************************************************************************
|
||||
; void WelsCPUId( int32_t uiIndex, int32_t *pFeatureA, int32_t *pFeatureB, int32_t *pFeatureC, int32_t *pFeatureD )
|
||||
;****************************************************************************************************
|
||||
%ifdef WIN64
|
||||
|
||||
WelsCPUId:
|
||||
WELS_EXTERN WelsCPUId
|
||||
push rbx
|
||||
push rdx
|
||||
|
||||
@ -98,7 +94,7 @@ WelsCPUId:
|
||||
ret
|
||||
|
||||
%elifdef UNIX64
|
||||
WelsCPUId:
|
||||
WELS_EXTERN WelsCPUId
|
||||
push rbx
|
||||
push rcx
|
||||
push rdx
|
||||
@ -118,7 +114,7 @@ WelsCPUId:
|
||||
|
||||
%elifdef X86_32
|
||||
|
||||
WelsCPUId:
|
||||
WELS_EXTERN WelsCPUId
|
||||
push ebx
|
||||
push edi
|
||||
|
||||
@ -143,13 +139,11 @@ WelsCPUId:
|
||||
|
||||
%endif
|
||||
|
||||
WELS_EXTERN WelsCPUSupportAVX
|
||||
; need call after cpuid=1 and eax, ecx flag got then
|
||||
ALIGN 16
|
||||
;****************************************************************************************************
|
||||
; int32_t WelsCPUSupportAVX( uint32_t eax, uint32_t ecx )
|
||||
;****************************************************************************************************
|
||||
WelsCPUSupportAVX:
|
||||
WELS_EXTERN WelsCPUSupportAVX
|
||||
%ifdef WIN64
|
||||
mov eax, ecx
|
||||
mov ecx, edx
|
||||
@ -178,13 +172,11 @@ avx_not_supported:
|
||||
ret
|
||||
|
||||
|
||||
WELS_EXTERN WelsCPUSupportFMA
|
||||
; need call after cpuid=1 and eax, ecx flag got then
|
||||
ALIGN 16
|
||||
;****************************************************************************************************
|
||||
; int32_t WelsCPUSupportFMA( uint32_t eax, uint32_t ecx )
|
||||
;****************************************************************************************************
|
||||
WelsCPUSupportFMA:
|
||||
WELS_EXTERN WelsCPUSupportFMA
|
||||
%ifdef WIN64
|
||||
mov eax, ecx
|
||||
mov ecx, edx
|
||||
@ -211,12 +203,10 @@ fma_not_supported:
|
||||
mov eax, 0
|
||||
ret
|
||||
|
||||
WELS_EXTERN WelsEmms
|
||||
ALIGN 16
|
||||
;******************************************************************************************
|
||||
; void WelsEmms()
|
||||
;******************************************************************************************
|
||||
WelsEmms:
|
||||
WELS_EXTERN WelsEmms
|
||||
emms ; empty mmx technology states
|
||||
ret
|
||||
|
||||
|
@ -61,8 +61,6 @@ SECTION .text
|
||||
|
||||
|
||||
WELS_EXTERN DeblockLumaLt4V_ssse3
|
||||
|
||||
DeblockLumaLt4V_ssse3:
|
||||
push rbp
|
||||
mov r11,[rsp + 16 + 20h] ; pTC
|
||||
sub rsp,1B0h
|
||||
@ -318,9 +316,6 @@ DeblockLumaLt4V_ssse3:
|
||||
|
||||
|
||||
WELS_EXTERN DeblockLumaEq4V_ssse3
|
||||
|
||||
ALIGN 16
|
||||
DeblockLumaEq4V_ssse3:
|
||||
mov rax,rsp
|
||||
push rbx
|
||||
push rbp
|
||||
@ -781,9 +776,6 @@ DeblockLumaEq4V_ssse3:
|
||||
|
||||
|
||||
WELS_EXTERN DeblockChromaLt4V_ssse3
|
||||
|
||||
ALIGN 16
|
||||
DeblockChromaLt4V_ssse3:
|
||||
mov rax,rsp
|
||||
push rbx
|
||||
push rdi
|
||||
@ -943,8 +935,6 @@ DeblockChromaLt4V_ssse3:
|
||||
|
||||
|
||||
WELS_EXTERN DeblockChromaEq4V_ssse3
|
||||
ALIGN 16
|
||||
DeblockChromaEq4V_ssse3:
|
||||
mov rax,rsp
|
||||
push rbx
|
||||
sub rsp,90h
|
||||
@ -1097,8 +1087,6 @@ DeblockChromaEq4V_ssse3:
|
||||
|
||||
|
||||
WELS_EXTERN DeblockChromaEq4H_ssse3
|
||||
ALIGN 16
|
||||
DeblockChromaEq4H_ssse3:
|
||||
mov rax,rsp
|
||||
mov [rax+20h],rbx
|
||||
push rdi
|
||||
@ -1361,8 +1349,6 @@ DeblockChromaEq4H_ssse3:
|
||||
|
||||
|
||||
WELS_EXTERN DeblockChromaLt4H_ssse3
|
||||
ALIGN 16
|
||||
DeblockChromaLt4H_ssse3:
|
||||
mov rax,rsp
|
||||
push rbx
|
||||
push rbp
|
||||
@ -1647,8 +1633,6 @@ DeblockChromaLt4H_ssse3:
|
||||
|
||||
|
||||
WELS_EXTERN DeblockLumaLt4V_ssse3
|
||||
|
||||
DeblockLumaLt4V_ssse3:
|
||||
push rbp
|
||||
mov r11,r8 ; pTC
|
||||
sub rsp,1B0h
|
||||
@ -1904,9 +1888,6 @@ DeblockLumaLt4V_ssse3:
|
||||
|
||||
|
||||
WELS_EXTERN DeblockLumaEq4V_ssse3
|
||||
|
||||
ALIGN 16
|
||||
DeblockLumaEq4V_ssse3:
|
||||
mov rax,rsp
|
||||
push rbx
|
||||
push rbp
|
||||
@ -2366,8 +2347,6 @@ DeblockLumaEq4V_ssse3:
|
||||
ret
|
||||
|
||||
WELS_EXTERN DeblockChromaLt4V_ssse3
|
||||
ALIGN 16
|
||||
DeblockChromaLt4V_ssse3:
|
||||
mov rax,rsp
|
||||
push rbx
|
||||
push rbp
|
||||
@ -2534,8 +2513,6 @@ DeblockChromaLt4V_ssse3:
|
||||
ret
|
||||
|
||||
WELS_EXTERN DeblockChromaEq4V_ssse3
|
||||
|
||||
DeblockChromaEq4V_ssse3:
|
||||
mov rax,rsp
|
||||
push rbx
|
||||
push rbp
|
||||
@ -2685,9 +2662,6 @@ DeblockChromaEq4V_ssse3:
|
||||
ret
|
||||
|
||||
WELS_EXTERN DeblockChromaEq4H_ssse3
|
||||
|
||||
ALIGN 16
|
||||
DeblockChromaEq4H_ssse3:
|
||||
mov rax,rsp
|
||||
push rbx
|
||||
push rbp
|
||||
@ -2960,8 +2934,6 @@ DeblockChromaEq4H_ssse3:
|
||||
|
||||
|
||||
WELS_EXTERN DeblockChromaLt4H_ssse3
|
||||
ALIGN 16
|
||||
DeblockChromaLt4H_ssse3:
|
||||
mov rax,rsp
|
||||
push rbx
|
||||
push rbp
|
||||
@ -3256,9 +3228,6 @@ DeblockChromaLt4H_ssse3:
|
||||
; int32_t iAlpha, int32_t iBeta)
|
||||
;********************************************************************************
|
||||
WELS_EXTERN DeblockChromaEq4V_ssse3
|
||||
|
||||
ALIGN 16
|
||||
DeblockChromaEq4V_ssse3:
|
||||
push ebp
|
||||
mov ebp,esp
|
||||
and esp,0FFFFFFF0h
|
||||
@ -3426,8 +3395,6 @@ DeblockChromaEq4V_ssse3:
|
||||
;*******************************************************************************
|
||||
|
||||
WELS_EXTERN DeblockChromaLt4V_ssse3
|
||||
|
||||
DeblockChromaLt4V_ssse3:
|
||||
push ebp
|
||||
mov ebp,esp
|
||||
and esp,0FFFFFFF0h
|
||||
@ -3629,10 +3596,6 @@ DeblockChromaLt4V_ssse3:
|
||||
;***************************************************************************
|
||||
|
||||
WELS_EXTERN DeblockChromaEq4H_ssse3
|
||||
|
||||
ALIGN 16
|
||||
|
||||
DeblockChromaEq4H_ssse3:
|
||||
push ebp
|
||||
mov ebp,esp
|
||||
and esp,0FFFFFFF0h
|
||||
@ -3914,10 +3877,6 @@ DeblockChromaEq4H_ssse3:
|
||||
;*******************************************************************************
|
||||
|
||||
WELS_EXTERN DeblockChromaLt4H_ssse3
|
||||
|
||||
ALIGN 16
|
||||
|
||||
DeblockChromaLt4H_ssse3:
|
||||
push ebp
|
||||
mov ebp,esp
|
||||
and esp,0FFFFFFF0h
|
||||
@ -4230,10 +4189,6 @@ DeblockChromaLt4H_ssse3:
|
||||
|
||||
|
||||
WELS_EXTERN DeblockLumaLt4V_ssse3
|
||||
|
||||
ALIGN 16
|
||||
|
||||
DeblockLumaLt4V_ssse3:
|
||||
push ebp
|
||||
mov ebp, esp
|
||||
and esp, -16 ; fffffff0H
|
||||
@ -4620,12 +4575,9 @@ DeblockLumaLt4V_ssse3:
|
||||
; int32_t iBeta)
|
||||
;*******************************************************************************
|
||||
|
||||
|
||||
WELS_EXTERN DeblockLumaEq4V_ssse3
|
||||
|
||||
ALIGN 16
|
||||
|
||||
DeblockLumaEq4V_ssse3:
|
||||
|
||||
push ebp
|
||||
mov ebp, esp
|
||||
and esp, -16 ; fffffff0H
|
||||
@ -5174,10 +5126,6 @@ DeblockLumaEq4V_ssse3:
|
||||
;********************************************************************************
|
||||
|
||||
WELS_EXTERN DeblockLumaTransposeH2V_sse2
|
||||
|
||||
ALIGN 16
|
||||
|
||||
DeblockLumaTransposeH2V_sse2:
|
||||
push r3
|
||||
push r4
|
||||
push r5
|
||||
@ -5253,10 +5201,6 @@ DeblockLumaTransposeH2V_sse2:
|
||||
;*******************************************************************************************
|
||||
|
||||
WELS_EXTERN DeblockLumaTransposeV2H_sse2
|
||||
|
||||
ALIGN 16
|
||||
|
||||
DeblockLumaTransposeV2H_sse2:
|
||||
push r3
|
||||
push r4
|
||||
|
||||
|
@ -56,9 +56,6 @@
|
||||
|
||||
SECTION .text
|
||||
|
||||
WELS_EXTERN ExpandPictureLuma_sse2
|
||||
WELS_EXTERN ExpandPictureChromaAlign_sse2 ; for chroma alignment
|
||||
WELS_EXTERN ExpandPictureChromaUnalign_sse2 ; for chroma unalignment
|
||||
|
||||
;;;;;;;expanding result;;;;;;;
|
||||
|
||||
@ -349,14 +346,13 @@ WELS_EXTERN ExpandPictureChromaUnalign_sse2 ; for chroma unalignment
|
||||
%endif
|
||||
%endmacro
|
||||
|
||||
ALIGN 16
|
||||
;***********************************************************************----------------
|
||||
; void ExpandPictureLuma_sse2( uint8_t *pDst,
|
||||
; const int32_t iStride,
|
||||
; const int32_t iWidth,
|
||||
; const int32_t iHeight );
|
||||
;***********************************************************************----------------
|
||||
ExpandPictureLuma_sse2:
|
||||
WELS_EXTERN ExpandPictureLuma_sse2
|
||||
|
||||
push r4
|
||||
push r5
|
||||
@ -476,14 +472,13 @@ ExpandPictureLuma_sse2:
|
||||
|
||||
ret
|
||||
|
||||
ALIGN 16
|
||||
;***********************************************************************----------------
|
||||
; void ExpandPictureChromaAlign_sse2( uint8_t *pDst,
|
||||
; const int32_t iStride,
|
||||
; const int32_t iWidth,
|
||||
; const int32_t iHeight );
|
||||
;***********************************************************************----------------
|
||||
ExpandPictureChromaAlign_sse2:
|
||||
WELS_EXTERN ExpandPictureChromaAlign_sse2
|
||||
|
||||
push r4
|
||||
push r5
|
||||
@ -602,14 +597,13 @@ ExpandPictureChromaAlign_sse2:
|
||||
|
||||
ret
|
||||
|
||||
ALIGN 16
|
||||
;***********************************************************************----------------
|
||||
; void ExpandPictureChromaUnalign_sse2( uint8_t *pDst,
|
||||
; const int32_t iStride,
|
||||
; const int32_t iWidth,
|
||||
; const int32_t iHeight );
|
||||
;***********************************************************************----------------
|
||||
ExpandPictureChromaUnalign_sse2:
|
||||
WELS_EXTERN ExpandPictureChromaUnalign_sse2
|
||||
push r4
|
||||
push r5
|
||||
push r6
|
||||
|
@ -54,12 +54,6 @@
|
||||
|
||||
SECTION .text
|
||||
|
||||
WELS_EXTERN WelsCopy16x16_sse2
|
||||
WELS_EXTERN WelsCopy16x16NotAligned_sse2
|
||||
WELS_EXTERN WelsCopy8x8_mmx
|
||||
WELS_EXTERN WelsCopy16x8NotAligned_sse2 ;
|
||||
WELS_EXTERN WelsCopy8x16_mmx ;
|
||||
WELS_EXTERN UpdateMbMv_sse2 ;
|
||||
|
||||
;***********************************************************************
|
||||
; void WelsCopy16x16_sse2( uint8_t* Dst,
|
||||
@ -67,8 +61,7 @@ WELS_EXTERN UpdateMbMv_sse2 ;
|
||||
; uint8_t* Src,
|
||||
; int32_t iStrideS )
|
||||
;***********************************************************************
|
||||
ALIGN 16
|
||||
WelsCopy16x16_sse2:
|
||||
WELS_EXTERN WelsCopy16x16_sse2
|
||||
|
||||
push r4
|
||||
push r5
|
||||
@ -130,9 +123,8 @@ WelsCopy16x16_sse2:
|
||||
; uint8_t* Src,
|
||||
; int32_t iStrideS )
|
||||
;***********************************************************************
|
||||
ALIGN 16
|
||||
; dst can be align with 16 bytes, but not sure about pSrc, 12/29/2011
|
||||
WelsCopy16x16NotAligned_sse2:
|
||||
WELS_EXTERN WelsCopy16x16NotAligned_sse2
|
||||
push r4
|
||||
push r5
|
||||
%assign push_num 2
|
||||
@ -194,8 +186,7 @@ WelsCopy16x16NotAligned_sse2:
|
||||
; uint8_t* Src,
|
||||
; int32_t iStrideS )
|
||||
;***********************************************************************
|
||||
ALIGN 16
|
||||
WelsCopy16x8NotAligned_sse2:
|
||||
WELS_EXTERN WelsCopy16x8NotAligned_sse2
|
||||
push r4
|
||||
push r5
|
||||
%assign push_num 2
|
||||
@ -235,8 +226,7 @@ WelsCopy16x8NotAligned_sse2:
|
||||
; uint8_t* Src,
|
||||
; int32_t iStrideS )
|
||||
;***********************************************************************
|
||||
ALIGN 16
|
||||
WelsCopy8x16_mmx:
|
||||
WELS_EXTERN WelsCopy8x16_mmx
|
||||
%assign push_num 0
|
||||
LOAD_4_PARA
|
||||
|
||||
@ -300,8 +290,7 @@ WelsCopy8x16_mmx:
|
||||
; uint8_t* Src,
|
||||
; int32_t iStrideS )
|
||||
;***********************************************************************
|
||||
ALIGN 16
|
||||
WelsCopy8x8_mmx:
|
||||
WELS_EXTERN WelsCopy8x8_mmx
|
||||
push r4
|
||||
%assign push_num 1
|
||||
LOAD_4_PARA
|
||||
@ -349,8 +338,7 @@ WelsCopy8x8_mmx:
|
||||
;***********************************************************************
|
||||
; void UpdateMbMv_sse2( SMVUnitXY *pMvBuffer, const SMVUnitXY sMv )
|
||||
;***********************************************************************
|
||||
ALIGN 16
|
||||
UpdateMbMv_sse2:
|
||||
WELS_EXTERN UpdateMbMv_sse2
|
||||
|
||||
%assign push_num 0
|
||||
LOAD_2_PARA
|
||||
@ -373,23 +361,16 @@ UpdateMbMv_sse2:
|
||||
|
||||
SECTION .text
|
||||
|
||||
WELS_EXTERN PixelAvgWidthEq4_mmx
|
||||
WELS_EXTERN PixelAvgWidthEq8_mmx
|
||||
WELS_EXTERN PixelAvgWidthEq16_sse2
|
||||
|
||||
WELS_EXTERN McCopyWidthEq4_mmx
|
||||
WELS_EXTERN McCopyWidthEq8_mmx
|
||||
WELS_EXTERN McCopyWidthEq16_sse2
|
||||
|
||||
|
||||
ALIGN 16
|
||||
|
||||
;*******************************************************************************
|
||||
; void PixelAvgWidthEq4_mmx( uint8_t *pDst, int iDstStride,
|
||||
; uint8_t *pSrcA, int iSrcAStride,
|
||||
; uint8_t *pSrcB, int iSrcBStride,
|
||||
; int iHeight );
|
||||
;*******************************************************************************
|
||||
PixelAvgWidthEq4_mmx:
|
||||
WELS_EXTERN PixelAvgWidthEq4_mmx
|
||||
|
||||
%assign push_num 0
|
||||
LOAD_7_PARA
|
||||
@ -416,14 +397,13 @@ ALIGN 4
|
||||
ret
|
||||
|
||||
|
||||
ALIGN 16
|
||||
;*******************************************************************************
|
||||
; void PixelAvgWidthEq8_mmx( uint8_t *pDst, int iDstStride,
|
||||
; uint8_t *pSrcA, int iSrcAStride,
|
||||
; uint8_t *pSrcB, int iSrcBStride,
|
||||
; int iHeight );
|
||||
;*******************************************************************************
|
||||
PixelAvgWidthEq8_mmx:
|
||||
WELS_EXTERN PixelAvgWidthEq8_mmx
|
||||
%assign push_num 0
|
||||
LOAD_7_PARA
|
||||
|
||||
@ -454,14 +434,13 @@ ALIGN 4
|
||||
|
||||
|
||||
|
||||
ALIGN 16
|
||||
;*******************************************************************************
|
||||
; void PixelAvgWidthEq16_sse2( uint8_t *pDst, int iDstStride,
|
||||
; uint8_t *pSrcA, int iSrcAStride,
|
||||
; uint8_t *pSrcB, int iSrcBStride,
|
||||
; int iHeight );
|
||||
;*******************************************************************************
|
||||
PixelAvgWidthEq16_sse2:
|
||||
WELS_EXTERN PixelAvgWidthEq16_sse2
|
||||
|
||||
%assign push_num 0
|
||||
LOAD_7_PARA
|
||||
@ -507,12 +486,11 @@ ALIGN 4
|
||||
LOAD_7_PARA_POP
|
||||
ret
|
||||
|
||||
ALIGN 16
|
||||
;*******************************************************************************
|
||||
; void McCopyWidthEq4_mmx( uint8_t *pSrc, int iSrcStride,
|
||||
; uint8_t *pDst, int iDstStride, int iHeight )
|
||||
;*******************************************************************************
|
||||
McCopyWidthEq4_mmx:
|
||||
WELS_EXTERN McCopyWidthEq4_mmx
|
||||
push r5
|
||||
%assign push_num 1
|
||||
LOAD_5_PARA
|
||||
@ -535,12 +513,11 @@ ALIGN 4
|
||||
pop r5
|
||||
ret
|
||||
|
||||
ALIGN 16
|
||||
;*******************************************************************************
|
||||
; void McCopyWidthEq8_mmx( uint8_t *pSrc, int iSrcStride,
|
||||
; uint8_t *pDst, int iDstStride, int iHeight )
|
||||
;*******************************************************************************
|
||||
McCopyWidthEq8_mmx:
|
||||
WELS_EXTERN McCopyWidthEq8_mmx
|
||||
%assign push_num 0
|
||||
LOAD_5_PARA
|
||||
|
||||
@ -562,7 +539,6 @@ ALIGN 4
|
||||
ret
|
||||
|
||||
|
||||
ALIGN 16
|
||||
;*******************************************************************************
|
||||
; void McCopyWidthEq16_sse2( uint8_t *pSrc, int iSrcStride, uint8_t *pDst, int iDstStride, int iHeight )
|
||||
;*******************************************************************************
|
||||
@ -577,7 +553,7 @@ ALIGN 16
|
||||
movq [%1], %2
|
||||
movhps [%1+8], %2
|
||||
%endmacro
|
||||
McCopyWidthEq16_sse2:
|
||||
WELS_EXTERN McCopyWidthEq16_sse2
|
||||
%assign push_num 0
|
||||
LOAD_5_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
|
@ -65,7 +65,6 @@ h264_d0x20_mmx:
|
||||
|
||||
SECTION .text
|
||||
|
||||
ALIGN 16
|
||||
;*******************************************************************************
|
||||
; void McChromaWidthEq4_mmx( const uint8_t *src,
|
||||
; int32_t iSrcStride,
|
||||
@ -75,7 +74,6 @@ ALIGN 16
|
||||
; int32_t iHeigh );
|
||||
;*******************************************************************************
|
||||
WELS_EXTERN McChromaWidthEq4_mmx
|
||||
McChromaWidthEq4_mmx:
|
||||
%assign push_num 0
|
||||
LOAD_6_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
@ -140,7 +138,6 @@ McChromaWidthEq4_mmx:
|
||||
ret
|
||||
|
||||
|
||||
ALIGN 16
|
||||
;*******************************************************************************
|
||||
; void McChromaWidthEq8_sse2( const uint8_t *pSrc,
|
||||
; int32_t iSrcStride,
|
||||
@ -150,7 +147,6 @@ ALIGN 16
|
||||
; int32_t iheigh );
|
||||
;*******************************************************************************
|
||||
WELS_EXTERN McChromaWidthEq8_sse2
|
||||
McChromaWidthEq8_sse2:
|
||||
%assign push_num 0
|
||||
LOAD_6_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
@ -219,7 +215,6 @@ McChromaWidthEq8_sse2:
|
||||
|
||||
|
||||
|
||||
ALIGN 16
|
||||
;***********************************************************************
|
||||
; void McChromaWidthEq8_ssse3( const uint8_t *pSrc,
|
||||
; int32_t iSrcStride,
|
||||
@ -229,7 +224,6 @@ ALIGN 16
|
||||
; int32_t iHeigh);
|
||||
;***********************************************************************
|
||||
WELS_EXTERN McChromaWidthEq8_ssse3
|
||||
McChromaWidthEq8_ssse3:
|
||||
%assign push_num 0
|
||||
LOAD_6_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
|
@ -71,10 +71,8 @@ h264_mc_hc_32:
|
||||
|
||||
SECTION .text
|
||||
|
||||
WELS_EXTERN McHorVer20WidthEq4_mmx
|
||||
|
||||
|
||||
ALIGN 16
|
||||
;*******************************************************************************
|
||||
; void McHorVer20WidthEq4_mmx( const uint8_t *pSrc,
|
||||
; int iSrcStride,
|
||||
@ -82,7 +80,7 @@ ALIGN 16
|
||||
; int iDstStride,
|
||||
; int iHeight)
|
||||
;*******************************************************************************
|
||||
McHorVer20WidthEq4_mmx:
|
||||
WELS_EXTERN McHorVer20WidthEq4_mmx
|
||||
%assign push_num 0
|
||||
LOAD_5_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
@ -161,12 +159,7 @@ McHorVer20WidthEq4_mmx:
|
||||
;*******************************************************************************
|
||||
|
||||
SECTION .text
|
||||
WELS_EXTERN McHorVer22Width8HorFirst_sse2
|
||||
WELS_EXTERN McHorVer02WidthEq8_sse2
|
||||
WELS_EXTERN McHorVer20WidthEq8_sse2
|
||||
WELS_EXTERN McHorVer20WidthEq16_sse2
|
||||
|
||||
ALIGN 16
|
||||
;***********************************************************************
|
||||
; void McHorVer22Width8HorFirst_sse2(const int16_t *pSrc,
|
||||
; int16_t iSrcStride,
|
||||
@ -175,7 +168,7 @@ ALIGN 16
|
||||
; int32_t iHeight
|
||||
; )
|
||||
;***********************************************************************
|
||||
McHorVer22Width8HorFirst_sse2:
|
||||
WELS_EXTERN McHorVer22Width8HorFirst_sse2
|
||||
%assign push_num 0
|
||||
LOAD_5_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
@ -217,7 +210,6 @@ McHorVer22Width8HorFirst_sse2:
|
||||
LOAD_5_PARA_POP
|
||||
ret
|
||||
|
||||
ALIGN 16
|
||||
;*******************************************************************************
|
||||
; void McHorVer20WidthEq8_sse2( const uint8_t *pSrc,
|
||||
; int iSrcStride,
|
||||
@ -226,7 +218,7 @@ ALIGN 16
|
||||
; int iHeight,
|
||||
; );
|
||||
;*******************************************************************************
|
||||
McHorVer20WidthEq8_sse2:
|
||||
WELS_EXTERN McHorVer20WidthEq8_sse2
|
||||
%assign push_num 0
|
||||
LOAD_5_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
@ -272,7 +264,6 @@ McHorVer20WidthEq8_sse2:
|
||||
LOAD_5_PARA_POP
|
||||
ret
|
||||
|
||||
ALIGN 16
|
||||
;*******************************************************************************
|
||||
; void McHorVer20WidthEq16_sse2( const uint8_t *pSrc,
|
||||
; int iSrcStride,
|
||||
@ -281,7 +272,7 @@ ALIGN 16
|
||||
; int iHeight,
|
||||
; );
|
||||
;*******************************************************************************
|
||||
McHorVer20WidthEq16_sse2:
|
||||
WELS_EXTERN McHorVer20WidthEq16_sse2
|
||||
%assign push_num 0
|
||||
LOAD_5_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
@ -361,8 +352,7 @@ McHorVer20WidthEq16_sse2:
|
||||
; int iDstStride,
|
||||
; int iHeight )
|
||||
;*******************************************************************************
|
||||
ALIGN 16
|
||||
McHorVer02WidthEq8_sse2:
|
||||
WELS_EXTERN McHorVer02WidthEq8_sse2
|
||||
%assign push_num 0
|
||||
LOAD_5_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
@ -443,11 +433,6 @@ McHorVer02WidthEq8_sse2:
|
||||
|
||||
SECTION .text
|
||||
|
||||
WELS_EXTERN McHorVer20Width9Or17_sse2
|
||||
WELS_EXTERN McHorVer02Height9Or17_sse2
|
||||
WELS_EXTERN McHorVer22Width8VerLastAlign_sse2
|
||||
WELS_EXTERN McHorVer22Width8VerLastUnAlign_sse2
|
||||
WELS_EXTERN McHorVer22HorFirst_sse2
|
||||
|
||||
|
||||
;***********************************************************************
|
||||
@ -458,8 +443,7 @@ WELS_EXTERN McHorVer22HorFirst_sse2
|
||||
; int32_t iWidth,
|
||||
; int32_t iHeight )
|
||||
;***********************************************************************
|
||||
ALIGN 16
|
||||
McHorVer02Height9Or17_sse2:
|
||||
WELS_EXTERN McHorVer02Height9Or17_sse2
|
||||
%assign push_num 0
|
||||
LOAD_6_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
@ -583,7 +567,6 @@ McHorVer02Height9Or17_sse2:
|
||||
ret
|
||||
|
||||
|
||||
ALIGN 16
|
||||
;***********************************************************************
|
||||
; void McHorVer20Width9Or17_sse2( const uint8_t *pSrc,
|
||||
; int32_t iSrcStride,
|
||||
@ -593,7 +576,7 @@ ALIGN 16
|
||||
; int32_t iHeight
|
||||
; );
|
||||
;***********************************************************************
|
||||
McHorVer20Width9Or17_sse2:
|
||||
WELS_EXTERN McHorVer20Width9Or17_sse2
|
||||
%assign push_num 0
|
||||
LOAD_6_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
@ -742,7 +725,6 @@ McHorVer20Width9Or17_sse2:
|
||||
|
||||
|
||||
|
||||
ALIGN 16
|
||||
;***********************************************************************
|
||||
;void McHorVer22HorFirst_sse2
|
||||
; (const uint8_t *pSrc,
|
||||
@ -751,7 +733,7 @@ ALIGN 16
|
||||
; int32_t iTapStride,
|
||||
; int32_t iWidth,int32_t iHeight);
|
||||
;***********************************************************************
|
||||
McHorVer22HorFirst_sse2:
|
||||
WELS_EXTERN McHorVer22HorFirst_sse2
|
||||
%assign push_num 0
|
||||
LOAD_6_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
@ -918,7 +900,7 @@ McHorVer22HorFirst_sse2:
|
||||
; int32_t iHeight);
|
||||
;***********************************************************************
|
||||
|
||||
McHorVer22Width8VerLastAlign_sse2:
|
||||
WELS_EXTERN McHorVer22Width8VerLastAlign_sse2
|
||||
%assign push_num 0
|
||||
LOAD_6_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
@ -1047,7 +1029,7 @@ McHorVer22HorFirst_sse2:
|
||||
; int32_t iHeight);
|
||||
;***********************************************************************
|
||||
|
||||
McHorVer22Width8VerLastUnAlign_sse2:
|
||||
WELS_EXTERN McHorVer22Width8VerLastUnAlign_sse2
|
||||
%assign push_num 0
|
||||
LOAD_6_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
|
@ -156,8 +156,6 @@ SECTION .text
|
||||
;
|
||||
;***********************************************************************
|
||||
WELS_EXTERN WelsSampleSatd4x4_sse2
|
||||
align 16
|
||||
WelsSampleSatd4x4_sse2:
|
||||
%assign push_num 0
|
||||
LOAD_4_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
@ -229,9 +227,7 @@ WelsSampleSatd4x4_sse2:
|
||||
;int32_t WelsSampleSatd8x8_sse2( uint8_t *, int32_t, uint8_t *, int32_t, );
|
||||
;
|
||||
;***********************************************************************
|
||||
WELS_EXTERN WelsSampleSatd8x8_sse2
|
||||
align 16
|
||||
WelsSampleSatd8x8_sse2:
|
||||
WELS_EXTERN WelsSampleSatd8x8_sse2
|
||||
%assign push_num 0
|
||||
LOAD_4_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
@ -250,9 +246,7 @@ align 16
|
||||
;int32_t WelsSampleSatd8x16_sse2( uint8_t *, int32_t, uint8_t *, int32_t, );
|
||||
;
|
||||
;***********************************************************************
|
||||
WELS_EXTERN WelsSampleSatd8x16_sse2
|
||||
align 16
|
||||
WelsSampleSatd8x16_sse2:
|
||||
WELS_EXTERN WelsSampleSatd8x16_sse2
|
||||
%assign push_num 0
|
||||
LOAD_4_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
@ -277,8 +271,6 @@ align 16
|
||||
;
|
||||
;***********************************************************************
|
||||
WELS_EXTERN WelsSampleSatd16x8_sse2
|
||||
align 16
|
||||
WelsSampleSatd16x8_sse2:
|
||||
%assign push_num 0
|
||||
LOAD_4_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
@ -308,8 +300,6 @@ WelsSampleSatd16x8_sse2:
|
||||
;
|
||||
;***********************************************************************
|
||||
WELS_EXTERN WelsSampleSatd16x16_sse2
|
||||
align 16
|
||||
WelsSampleSatd16x16_sse2:
|
||||
%assign push_num 0
|
||||
LOAD_4_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
@ -484,7 +474,6 @@ WelsSampleSatd16x16_sse2:
|
||||
|
||||
%ifdef X86_32
|
||||
WELS_EXTERN WelsIntra16x16Combined3Satd_sse41
|
||||
WelsIntra16x16Combined3Satd_sse41:
|
||||
push ebx
|
||||
push esi
|
||||
push edi
|
||||
@ -678,7 +667,6 @@ loop_chroma_satdx3_cb_cr:
|
||||
;for reduce the code size of WelsIntraChroma8x8Combined3Satd_sse41
|
||||
|
||||
WELS_EXTERN WelsIntraChroma8x8Combined3Satd_sse41
|
||||
WelsIntraChroma8x8Combined3Satd_sse41:
|
||||
push ebx
|
||||
push esi
|
||||
push edi
|
||||
@ -782,7 +770,6 @@ ret
|
||||
;
|
||||
;***********************************************************************
|
||||
WELS_EXTERN WelsIntra16x16Combined3Sad_ssse3
|
||||
WelsIntra16x16Combined3Sad_ssse3:
|
||||
push ebx
|
||||
push esi
|
||||
push edi
|
||||
@ -987,7 +974,6 @@ return_sad_intra_16x16_x3:
|
||||
;
|
||||
;***********************************************************************
|
||||
WELS_EXTERN WelsSampleSatd4x4_sse41
|
||||
WelsSampleSatd4x4_sse41:
|
||||
%assign push_num 0
|
||||
LOAD_4_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
@ -1040,8 +1026,6 @@ WelsSampleSatd4x4_sse41:
|
||||
;
|
||||
;***********************************************************************
|
||||
WELS_EXTERN WelsSampleSatd8x8_sse41
|
||||
align 16
|
||||
WelsSampleSatd8x8_sse41:
|
||||
%ifdef X86_32
|
||||
push r4
|
||||
push r5
|
||||
@ -1072,8 +1056,6 @@ WelsSampleSatd8x8_sse41:
|
||||
;
|
||||
;***********************************************************************
|
||||
WELS_EXTERN WelsSampleSatd8x16_sse41
|
||||
align 16
|
||||
WelsSampleSatd8x16_sse41:
|
||||
%ifdef X86_32
|
||||
push r4
|
||||
push r5
|
||||
@ -1110,8 +1092,6 @@ loop_get_satd_8x16:
|
||||
;
|
||||
;***********************************************************************
|
||||
WELS_EXTERN WelsSampleSatd16x8_sse41
|
||||
align 16
|
||||
WelsSampleSatd16x8_sse41:
|
||||
%ifdef X86_32
|
||||
push r4
|
||||
push r5
|
||||
@ -1155,8 +1135,6 @@ WelsSampleSatd16x8_sse41:
|
||||
;***********************************************************************
|
||||
|
||||
WELS_EXTERN WelsSampleSatd16x16_sse41
|
||||
align 16
|
||||
WelsSampleSatd16x16_sse41:
|
||||
%ifdef X86_32
|
||||
push r4
|
||||
push r5
|
||||
@ -1276,8 +1254,6 @@ loop_get_satd_16x16_right:
|
||||
;
|
||||
;***********************************************************************
|
||||
WELS_EXTERN WelsSampleSad16x16_sse2
|
||||
align 16
|
||||
WelsSampleSad16x16_sse2:
|
||||
%ifdef X86_32
|
||||
push r4
|
||||
push r5
|
||||
@ -1319,8 +1295,6 @@ WelsSampleSad16x16_sse2:
|
||||
;
|
||||
;***********************************************************************
|
||||
WELS_EXTERN WelsSampleSad16x8_sse2
|
||||
align 16
|
||||
WelsSampleSad16x8_sse2:
|
||||
%assign push_num 0
|
||||
LOAD_4_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
@ -1346,7 +1320,6 @@ WelsSampleSad16x8_sse2:
|
||||
|
||||
|
||||
WELS_EXTERN WelsSampleSad8x16_sse2
|
||||
WelsSampleSad8x16_sse2:
|
||||
%assign push_num 0
|
||||
LOAD_4_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
@ -1377,7 +1350,6 @@ cmp %1, (32-%2)|(%3>>1)
|
||||
%endmacro
|
||||
|
||||
WELS_EXTERN WelsSampleSad8x8_sse21
|
||||
WelsSampleSad8x8_sse21:
|
||||
%assign push_num 0
|
||||
mov r2, arg3
|
||||
push r2
|
||||
@ -1536,7 +1508,6 @@ WelsSampleSad8x8_sse21:
|
||||
paddw xmm7, %4
|
||||
%endmacro
|
||||
WELS_EXTERN WelsSampleSadFour16x16_sse2
|
||||
WelsSampleSadFour16x16_sse2:
|
||||
%assign push_num 0
|
||||
LOAD_5_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
@ -1654,7 +1625,6 @@ WelsSampleSadFour16x16_sse2:
|
||||
|
||||
|
||||
WELS_EXTERN WelsSampleSadFour16x8_sse2
|
||||
WelsSampleSadFour16x8_sse2:
|
||||
%assign push_num 0
|
||||
LOAD_5_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
@ -1739,7 +1709,6 @@ WelsSampleSadFour16x8_sse2:
|
||||
ret
|
||||
|
||||
WELS_EXTERN WelsSampleSadFour8x16_sse2
|
||||
WelsSampleSadFour8x16_sse2:
|
||||
%assign push_num 0
|
||||
LOAD_5_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
@ -1951,7 +1920,6 @@ WelsSampleSadFour8x16_sse2:
|
||||
|
||||
|
||||
WELS_EXTERN WelsSampleSadFour8x8_sse2
|
||||
WelsSampleSadFour8x8_sse2:
|
||||
%assign push_num 0
|
||||
LOAD_5_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
@ -2071,7 +2039,6 @@ WelsSampleSadFour8x8_sse2:
|
||||
ret
|
||||
|
||||
WELS_EXTERN WelsSampleSadFour4x4_sse2
|
||||
WelsSampleSadFour4x4_sse2:
|
||||
%assign push_num 0
|
||||
LOAD_5_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
@ -2144,13 +2111,10 @@ WelsSampleSadFour4x4_sse2:
|
||||
;
|
||||
;***********************************************************************
|
||||
|
||||
WELS_EXTERN WelsSampleSad4x4_mmx
|
||||
|
||||
align 16
|
||||
;***********************************************************************
|
||||
; int32_t WelsSampleSad4x4_mmx (uint8_t *, int32_t, uint8_t *, int32_t )
|
||||
;***********************************************************************
|
||||
WelsSampleSad4x4_mmx:
|
||||
WELS_EXTERN WelsSampleSad4x4_mmx
|
||||
%assign push_num 0
|
||||
LOAD_4_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
|
@ -142,12 +142,10 @@ SECTION .text
|
||||
|
||||
; , 6/7/2010
|
||||
|
||||
WELS_EXTERN AnalysisVaaInfoIntra_sse2
|
||||
;***********************************************************************
|
||||
; int32_t AnalysisVaaInfoIntra_sse2( uint8_t *pDataY, const int32_t iLineSize );
|
||||
;***********************************************************************
|
||||
ALIGN 16
|
||||
AnalysisVaaInfoIntra_sse2:
|
||||
WELS_EXTERN AnalysisVaaInfoIntra_sse2
|
||||
|
||||
%assign push_num 0
|
||||
LOAD_2_PARA
|
||||
@ -237,12 +235,10 @@ AnalysisVaaInfoIntra_sse2:
|
||||
|
||||
ret
|
||||
|
||||
WELS_EXTERN AnalysisVaaInfoIntra_ssse3
|
||||
;***********************************************************************
|
||||
; int32_t AnalysisVaaInfoIntra_ssse3( uint8_t *pDataY, const int32_t iLineSize );
|
||||
;***********************************************************************
|
||||
ALIGN 16
|
||||
AnalysisVaaInfoIntra_ssse3:
|
||||
WELS_EXTERN AnalysisVaaInfoIntra_ssse3
|
||||
|
||||
%assign push_num 0
|
||||
LOAD_2_PARA
|
||||
@ -332,12 +328,10 @@ AnalysisVaaInfoIntra_ssse3:
|
||||
|
||||
ret
|
||||
|
||||
WELS_EXTERN MdInterAnalysisVaaInfo_sse41
|
||||
;***********************************************************************
|
||||
; uint8_t MdInterAnalysisVaaInfo_sse41( int32_t *pSad8x8 )
|
||||
;***********************************************************************
|
||||
ALIGN 16
|
||||
MdInterAnalysisVaaInfo_sse41:
|
||||
WELS_EXTERN MdInterAnalysisVaaInfo_sse41
|
||||
%assign push_num 0
|
||||
LOAD_1_PARA
|
||||
movdqa xmm0,[r0]
|
||||
@ -368,12 +362,10 @@ MdInterAnalysisVaaInfo_sse41:
|
||||
mov retrd, 15
|
||||
ret
|
||||
|
||||
WELS_EXTERN MdInterAnalysisVaaInfo_sse2
|
||||
;***********************************************************************
|
||||
; uint8_t MdInterAnalysisVaaInfo_sse2( int32_t *pSad8x8 )
|
||||
;***********************************************************************
|
||||
ALIGN 16
|
||||
MdInterAnalysisVaaInfo_sse2:
|
||||
WELS_EXTERN MdInterAnalysisVaaInfo_sse2
|
||||
%assign push_num 0
|
||||
LOAD_1_PARA
|
||||
movdqa xmm0, [r0]
|
||||
|
@ -49,13 +49,10 @@
|
||||
SECTION .text
|
||||
|
||||
|
||||
WELS_EXTERN WelsResBlockZero16x16_sse2
|
||||
|
||||
ALIGN 16
|
||||
;*******************************************************************************
|
||||
; void WelsResBlockZero16x16_sse2(int16_t* pBlock,int32_t iStride)
|
||||
;*******************************************************************************
|
||||
WelsResBlockZero16x16_sse2:
|
||||
WELS_EXTERN WelsResBlockZero16x16_sse2
|
||||
%assign push_num 0
|
||||
LOAD_2_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
@ -122,13 +119,10 @@ WelsResBlockZero16x16_sse2:
|
||||
ret
|
||||
|
||||
|
||||
WELS_EXTERN WelsResBlockZero8x8_sse2
|
||||
|
||||
ALIGN 16
|
||||
;*******************************************************************************
|
||||
; void WelsResBlockZero8x8_sse2(int16_t * pBlock, int32_t iStride)
|
||||
;*******************************************************************************
|
||||
WelsResBlockZero8x8_sse2:
|
||||
WELS_EXTERN WelsResBlockZero8x8_sse2
|
||||
%assign push_num 0
|
||||
LOAD_2_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
|
@ -83,14 +83,11 @@
|
||||
|
||||
SECTION .text
|
||||
|
||||
WELS_EXTERN IdctResAddPred_mmx
|
||||
|
||||
ALIGN 16
|
||||
;*******************************************************************************
|
||||
; void IdctResAddPred_mmx( uint8_t *pPred, const int32_t kiStride, int16_t *pRs )
|
||||
;*******************************************************************************
|
||||
|
||||
IdctResAddPred_mmx:
|
||||
WELS_EXTERN IdctResAddPred_mmx
|
||||
%assign push_num 0
|
||||
LOAD_3_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
|
@ -177,18 +177,14 @@ sse2_wd_0x02: times 8 dw 0x02
|
||||
;*******************************************************************************
|
||||
|
||||
SECTION .text
|
||||
WELS_EXTERN WelsDecoderI4x4LumaPredH_sse2
|
||||
WELS_EXTERN WelsDecoderI4x4LumaPredDDR_mmx
|
||||
WELS_EXTERN WelsDecoderI16x16LumaPredPlane_sse2
|
||||
|
||||
|
||||
ALIGN 16
|
||||
;*******************************************************************************
|
||||
; void WelsDecoderI4x4LumaPredH_sse2(uint8_t *pPred, const int32_t kiStride)
|
||||
;
|
||||
; pPred must align to 16
|
||||
;*******************************************************************************
|
||||
WelsDecoderI4x4LumaPredH_sse2:
|
||||
WELS_EXTERN WelsDecoderI4x4LumaPredH_sse2
|
||||
%assign push_num 0
|
||||
LOAD_2_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
@ -222,7 +218,7 @@ WelsDecoderI4x4LumaPredH_sse2:
|
||||
;*******************************************************************************
|
||||
; void WelsDecoderI16x16LumaPredPlane_sse2(uint8_t *pPred, const int32_t kiStride);
|
||||
;*******************************************************************************
|
||||
WelsDecoderI16x16LumaPredPlane_sse2:
|
||||
WELS_EXTERN WelsDecoderI16x16LumaPredPlane_sse2
|
||||
push r3
|
||||
push r4
|
||||
%assign push_num 2
|
||||
@ -326,7 +322,6 @@ get_i16x16_luma_pred_plane_sse2_1:
|
||||
%endmacro
|
||||
|
||||
WELS_EXTERN WelsDecoderI16x16LumaPredH_sse2
|
||||
WelsDecoderI16x16LumaPredH_sse2:
|
||||
%assign push_num 0
|
||||
LOAD_2_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
@ -350,7 +345,6 @@ WelsDecoderI16x16LumaPredH_sse2:
|
||||
; void WelsDecoderI16x16LumaPredV_sse2(uint8_t *pPred, const int32_t kiStride);
|
||||
;*******************************************************************************
|
||||
WELS_EXTERN WelsDecoderI16x16LumaPredV_sse2
|
||||
WelsDecoderI16x16LumaPredV_sse2:
|
||||
%assign push_num 0
|
||||
LOAD_2_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
@ -389,7 +383,6 @@ WelsDecoderI16x16LumaPredV_sse2:
|
||||
; void WelsDecoderIChromaPredPlane_sse2(uint8_t *pPred, const int32_t kiStride);
|
||||
;*******************************************************************************
|
||||
WELS_EXTERN WelsDecoderIChromaPredPlane_sse2
|
||||
WelsDecoderIChromaPredPlane_sse2:
|
||||
push r3
|
||||
push r4
|
||||
%assign push_num 2
|
||||
@ -477,7 +470,6 @@ get_i_chroma_pred_plane_sse2_1:
|
||||
WELSEMMS
|
||||
ret
|
||||
|
||||
ALIGN 16
|
||||
;*******************************************************************************
|
||||
; 0 |1 |2 |3 |4 |
|
||||
; 6 |7 |8 |9 |10|
|
||||
@ -490,7 +482,7 @@ ALIGN 16
|
||||
; void WelsDecoderI4x4LumaPredDDR_mmx(uint8_t *pPred, const int32_t kiStride)
|
||||
;
|
||||
;*******************************************************************************
|
||||
WelsDecoderI4x4LumaPredDDR_mmx:
|
||||
WELS_EXTERN WelsDecoderI4x4LumaPredDDR_mmx
|
||||
%assign push_num 0
|
||||
LOAD_2_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
@ -536,7 +528,6 @@ WelsDecoderI4x4LumaPredDDR_mmx:
|
||||
ret
|
||||
|
||||
|
||||
ALIGN 16
|
||||
;*******************************************************************************
|
||||
; void WelsDecoderIChromaPredH_mmx(uint8_t *pPred, const int32_t kiStride)
|
||||
; copy 8 pixel of 8 line from left
|
||||
@ -560,7 +551,6 @@ ALIGN 16
|
||||
%endmacro
|
||||
|
||||
WELS_EXTERN WelsDecoderIChromaPredH_mmx
|
||||
WelsDecoderIChromaPredH_mmx:
|
||||
%assign push_num 0
|
||||
LOAD_2_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
@ -597,13 +587,11 @@ WelsDecoderIChromaPredH_mmx:
|
||||
ret
|
||||
|
||||
|
||||
ALIGN 16
|
||||
;*******************************************************************************
|
||||
; void WelsDecoderIChromaPredV_mmx(uint8_t *pPred, const int32_t kiStride)
|
||||
; copy 8 pixels from top 8 pixels
|
||||
;*******************************************************************************
|
||||
WELS_EXTERN WelsDecoderIChromaPredV_mmx
|
||||
WelsDecoderIChromaPredV_mmx:
|
||||
%assign push_num 0
|
||||
LOAD_2_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
@ -627,7 +615,6 @@ WelsDecoderIChromaPredV_mmx:
|
||||
ret
|
||||
|
||||
|
||||
ALIGN 16
|
||||
;*******************************************************************************
|
||||
; lt|t0|t1|t2|t3|
|
||||
; l0|
|
||||
@ -658,7 +645,6 @@ WelsDecoderIChromaPredV_mmx:
|
||||
; void WelsDecoderI4x4LumaPredHD_mmx(uint8_t *pPred, const int32_t kiStride)
|
||||
;*******************************************************************************
|
||||
WELS_EXTERN WelsDecoderI4x4LumaPredHD_mmx
|
||||
WelsDecoderI4x4LumaPredHD_mmx:
|
||||
%assign push_num 0
|
||||
LOAD_2_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
@ -714,7 +700,6 @@ WelsDecoderI4x4LumaPredHD_mmx:
|
||||
|
||||
|
||||
|
||||
ALIGN 16
|
||||
;*******************************************************************************
|
||||
; lt|t0|t1|t2|t3|
|
||||
; l0|
|
||||
@ -742,7 +727,6 @@ ALIGN 16
|
||||
; void WelsDecoderI4x4LumaPredHU_mmx(uint8_t *pPred, const int32_t kiStride)
|
||||
;*******************************************************************************
|
||||
WELS_EXTERN WelsDecoderI4x4LumaPredHU_mmx
|
||||
WelsDecoderI4x4LumaPredHU_mmx:
|
||||
%assign push_num 0
|
||||
LOAD_2_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
@ -799,7 +783,6 @@ WelsDecoderI4x4LumaPredHU_mmx:
|
||||
|
||||
|
||||
|
||||
ALIGN 16
|
||||
;*******************************************************************************
|
||||
; lt|t0|t1|t2|t3|
|
||||
; l0|
|
||||
@ -829,7 +812,6 @@ ALIGN 16
|
||||
; void WelsDecoderI4x4LumaPredVR_mmx(uint8_t *pPred, const int32_t kiStride)
|
||||
;*******************************************************************************
|
||||
WELS_EXTERN WelsDecoderI4x4LumaPredVR_mmx
|
||||
WelsDecoderI4x4LumaPredVR_mmx:
|
||||
%assign push_num 0
|
||||
LOAD_2_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
@ -889,7 +871,6 @@ WelsDecoderI4x4LumaPredVR_mmx:
|
||||
WELSEMMS
|
||||
ret
|
||||
|
||||
ALIGN 16
|
||||
;*******************************************************************************
|
||||
; lt|t0|t1|t2|t3|t4|t5|t6|t7
|
||||
; l0|
|
||||
@ -917,7 +898,6 @@ ALIGN 16
|
||||
; void WelsDecoderI4x4LumaPredDDL_mmx(uint8_t *pPred, const int32_t kiStride)
|
||||
;*******************************************************************************
|
||||
WELS_EXTERN WelsDecoderI4x4LumaPredDDL_mmx
|
||||
WelsDecoderI4x4LumaPredDDL_mmx:
|
||||
%assign push_num 0
|
||||
LOAD_2_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
@ -956,7 +936,6 @@ WelsDecoderI4x4LumaPredDDL_mmx:
|
||||
ret
|
||||
|
||||
|
||||
ALIGN 16
|
||||
;*******************************************************************************
|
||||
; lt|t0|t1|t2|t3|t4|t5|t6|t7
|
||||
; l0|
|
||||
@ -987,7 +966,6 @@ ALIGN 16
|
||||
; void WelsDecoderI4x4LumaPredVL_mmx(uint8_t *pPred, const int32_t kiStride)
|
||||
;*******************************************************************************
|
||||
WELS_EXTERN WelsDecoderI4x4LumaPredVL_mmx
|
||||
WelsDecoderI4x4LumaPredVL_mmx:
|
||||
%assign push_num 0
|
||||
LOAD_2_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
@ -1023,13 +1001,11 @@ WelsDecoderI4x4LumaPredVL_mmx:
|
||||
WELSEMMS
|
||||
ret
|
||||
|
||||
ALIGN 16
|
||||
;*******************************************************************************
|
||||
;
|
||||
; void WelsDecoderIChromaPredDc_sse2(uint8_t *pPred, const int32_t kiStride)
|
||||
;*******************************************************************************
|
||||
WELS_EXTERN WelsDecoderIChromaPredDc_sse2
|
||||
WelsDecoderIChromaPredDc_sse2:
|
||||
push r3
|
||||
push r4
|
||||
%assign push_num 2
|
||||
@ -1120,13 +1096,11 @@ WelsDecoderIChromaPredDc_sse2:
|
||||
|
||||
|
||||
|
||||
ALIGN 16
|
||||
;*******************************************************************************
|
||||
;
|
||||
; void WelsDecoderI16x16LumaPredDc_sse2(uint8_t *pPred, const int32_t kiStride)
|
||||
;*******************************************************************************
|
||||
WELS_EXTERN WelsDecoderI16x16LumaPredDc_sse2
|
||||
WelsDecoderI16x16LumaPredDc_sse2:
|
||||
push r3
|
||||
push r4
|
||||
%assign push_num 2
|
||||
@ -1201,12 +1175,10 @@ WelsDecoderI16x16LumaPredDc_sse2:
|
||||
; for intra prediction as follows, 11/19/2010
|
||||
;*******************************************************************************
|
||||
|
||||
ALIGN 16
|
||||
;*******************************************************************************
|
||||
; void WelsDecoderI16x16LumaPredDcTop_sse2(uint8_t *pPred, const int32_t kiStride)
|
||||
;*******************************************************************************
|
||||
WELS_EXTERN WelsDecoderI16x16LumaPredDcTop_sse2
|
||||
WelsDecoderI16x16LumaPredDcTop_sse2:
|
||||
%assign push_num 0
|
||||
LOAD_2_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
@ -1273,12 +1245,10 @@ WelsDecoderI16x16LumaPredDcTop_sse2:
|
||||
|
||||
ret
|
||||
|
||||
ALIGN 16
|
||||
;*******************************************************************************
|
||||
; void WelsDecoderI16x16LumaPredDcNA_sse2(uint8_t *pPred, const int32_t kiStride)
|
||||
;*******************************************************************************
|
||||
WELS_EXTERN WelsDecoderI16x16LumaPredDcNA_sse2
|
||||
WelsDecoderI16x16LumaPredDcNA_sse2:
|
||||
%assign push_num 0
|
||||
LOAD_2_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
@ -1308,12 +1278,10 @@ WelsDecoderI16x16LumaPredDcNA_sse2:
|
||||
|
||||
ret
|
||||
|
||||
ALIGN 16
|
||||
;*******************************************************************************
|
||||
; void WelsDecoderIChromaPredDcLeft_mmx(uint8_t *pPred, const int32_t kiStride)
|
||||
;*******************************************************************************
|
||||
WELS_EXTERN WelsDecoderIChromaPredDcLeft_mmx
|
||||
WelsDecoderIChromaPredDcLeft_mmx:
|
||||
push r3
|
||||
push r4
|
||||
%assign push_num 2
|
||||
@ -1381,12 +1349,10 @@ WelsDecoderIChromaPredDcLeft_mmx:
|
||||
emms
|
||||
ret
|
||||
|
||||
ALIGN 16
|
||||
;*******************************************************************************
|
||||
; void WelsDecoderIChromaPredDcTop_sse2(uint8_t *pPred, const int32_t kiStride)
|
||||
;*******************************************************************************
|
||||
WELS_EXTERN WelsDecoderIChromaPredDcTop_sse2
|
||||
WelsDecoderIChromaPredDcTop_sse2:
|
||||
%assign push_num 0
|
||||
LOAD_2_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
@ -1420,12 +1386,10 @@ WelsDecoderIChromaPredDcTop_sse2:
|
||||
movq [r0+r2], xmm0
|
||||
ret
|
||||
|
||||
ALIGN 16
|
||||
;*******************************************************************************
|
||||
; void WelsDecoderIChromaPredDcNA_mmx(uint8_t *pPred, const int32_t kiStride)
|
||||
;*******************************************************************************
|
||||
WELS_EXTERN WelsDecoderIChromaPredDcNA_mmx
|
||||
WelsDecoderIChromaPredDcNA_mmx:
|
||||
%assign push_num 0
|
||||
LOAD_2_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
|
@ -323,7 +323,6 @@ SECTION .text
|
||||
;int32_t CavlcParamCal_sse2(int16_t*coffLevel, uint8_t* run, int16_t *Level, int32_t* total_coeffs , int32_t endIdx);
|
||||
;***********************************************************************
|
||||
WELS_EXTERN CavlcParamCal_sse2
|
||||
CavlcParamCal_sse2:
|
||||
push ebx
|
||||
push edi
|
||||
push esi
|
||||
|
@ -130,12 +130,10 @@ SSE2_DeQuant8 dw 10, 13, 10, 13, 13, 16, 13, 16,
|
||||
movd %5, %1
|
||||
%endmacro
|
||||
SECTION .text
|
||||
ALIGN 16
|
||||
;***********************************************************************
|
||||
; void WelsDctT4_mmx( int16_t *pDct[4], uint8_t *pix1, int32_t i_pix1, uint8_t *pix2, int32_t i_pix2 )
|
||||
;***********************************************************************
|
||||
WELS_EXTERN WelsDctT4_mmx
|
||||
WelsDctT4_mmx:
|
||||
%assign push_num 0
|
||||
LOAD_5_PARA
|
||||
SIGN_EXTENSION r2, r2d
|
||||
@ -163,7 +161,6 @@ WelsDctT4_mmx:
|
||||
; void WelsIDctT4Rec_mmx(uint8_t *rec, int32_t stride, uint8_t *pred, int32_t pred_stride, int16_t *rs)
|
||||
;***********************************************************************
|
||||
WELS_EXTERN WelsIDctT4Rec_mmx
|
||||
WelsIDctT4Rec_mmx:
|
||||
%assign push_num 0
|
||||
LOAD_5_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
@ -291,8 +288,6 @@ WelsIDctT4Rec_mmx:
|
||||
; void WelsDctFourT4_sse2(int16_t *pDct, uint8_t *pix1, int32_t i_pix1, uint8_t *pix2, int32_t i_pix2 )
|
||||
;***********************************************************************
|
||||
WELS_EXTERN WelsDctFourT4_sse2
|
||||
ALIGN 16
|
||||
WelsDctFourT4_sse2:
|
||||
%assign push_num 0
|
||||
LOAD_5_PARA
|
||||
SIGN_EXTENSION r2, r2d
|
||||
@ -340,8 +335,6 @@ WelsDctFourT4_sse2:
|
||||
; void WelsIDctFourT4Rec_sse2(uint8_t *rec, int32_t stride, uint8_t *pred, int32_t pred_stride, int16_t *rs);
|
||||
;***********************************************************************
|
||||
WELS_EXTERN WelsIDctFourT4Rec_sse2
|
||||
ALIGN 16
|
||||
WelsIDctFourT4Rec_sse2:
|
||||
%assign push_num 0
|
||||
LOAD_5_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
@ -399,8 +392,6 @@ WelsIDctFourT4Rec_sse2:
|
||||
; void WelsIDctRecI16x16Dc_sse2(uint8_t *rec, int32_t stride, uint8_t *pred, int32_t pred_stride, int16_t *dct_dc)
|
||||
;***********************************************************************
|
||||
WELS_EXTERN WelsIDctRecI16x16Dc_sse2
|
||||
ALIGN 16
|
||||
WelsIDctRecI16x16Dc_sse2:
|
||||
%assign push_num 0
|
||||
LOAD_5_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
@ -475,7 +466,6 @@ WelsIDctRecI16x16Dc_sse2:
|
||||
;void WelsHadamardT4Dc_sse2( int16_t *luma_dc, int16_t *pDct)
|
||||
;***********************************************************************
|
||||
WELS_EXTERN WelsHadamardT4Dc_sse2
|
||||
WelsHadamardT4Dc_sse2:
|
||||
%assign push_num 0
|
||||
LOAD_2_PARA
|
||||
SSE2_Load4Col xmm1, xmm5, xmm6, xmm0, r1
|
||||
|
@ -184,18 +184,13 @@ mmx_0x02: dw 0x02, 0x00, 0x00, 0x00
|
||||
;***********************************************************************
|
||||
|
||||
SECTION .text
|
||||
WELS_EXTERN WelsI4x4LumaPredH_sse2
|
||||
WELS_EXTERN WelsI4x4LumaPredDDR_mmx
|
||||
WELS_EXTERN WelsI4x4LumaPredDc_sse2
|
||||
WELS_EXTERN WelsI16x16LumaPredPlane_sse2
|
||||
|
||||
ALIGN 16
|
||||
;***********************************************************************
|
||||
; void WelsI4x4LumaPredH_sse2(uint8_t *pred, uint8_t *pRef, int32_t stride)
|
||||
;
|
||||
; pred must align to 16
|
||||
;***********************************************************************
|
||||
WelsI4x4LumaPredH_sse2:
|
||||
WELS_EXTERN WelsI4x4LumaPredH_sse2
|
||||
push r3
|
||||
%assign push_num 1
|
||||
LOAD_3_PARA
|
||||
@ -229,7 +224,7 @@ WelsI4x4LumaPredH_sse2:
|
||||
;***********************************************************************
|
||||
; void WelsI16x16LumaPredPlane_sse2(uint8_t *pred, uint8_t *pRef, int32_t stride);
|
||||
;***********************************************************************
|
||||
WelsI16x16LumaPredPlane_sse2:
|
||||
WELS_EXTERN WelsI16x16LumaPredPlane_sse2
|
||||
push r3
|
||||
push r4
|
||||
%assign push_num 2
|
||||
@ -326,7 +321,6 @@ get_i16x16_luma_pred_plane_sse2_1:
|
||||
%endmacro
|
||||
|
||||
WELS_EXTERN WelsI16x16LumaPredH_sse2
|
||||
WelsI16x16LumaPredH_sse2:
|
||||
push r3
|
||||
%assign push_num 1
|
||||
LOAD_3_PARA
|
||||
@ -357,7 +351,6 @@ WelsI16x16LumaPredH_sse2:
|
||||
; void WelsI16x16LumaPredV_sse2(uint8_t *pred, uint8_t *pRef, int32_t stride);
|
||||
;***********************************************************************
|
||||
WELS_EXTERN WelsI16x16LumaPredV_sse2
|
||||
WelsI16x16LumaPredV_sse2:
|
||||
%assign push_num 0
|
||||
LOAD_3_PARA
|
||||
SIGN_EXTENSION r2, r2d
|
||||
@ -387,7 +380,6 @@ WelsI16x16LumaPredV_sse2:
|
||||
; void WelsIChromaPredPlane_sse2(uint8_t *pred, uint8_t *pRef, int32_t stride);
|
||||
;***********************************************************************
|
||||
WELS_EXTERN WelsIChromaPredPlane_sse2
|
||||
WelsIChromaPredPlane_sse2:
|
||||
push r3
|
||||
push r4
|
||||
%assign push_num 2
|
||||
@ -471,7 +463,6 @@ get_i_chroma_pred_plane_sse2_1:
|
||||
WELSEMMS
|
||||
ret
|
||||
|
||||
ALIGN 16
|
||||
;***********************************************************************
|
||||
; 0 |1 |2 |3 |4 |
|
||||
; 6 |7 |8 |9 |10|
|
||||
@ -484,7 +475,7 @@ ALIGN 16
|
||||
; void WelsI4x4LumaPredDDR_mmx(uint8_t *pred,uint8_t *pRef,int32_t stride)
|
||||
;
|
||||
;***********************************************************************
|
||||
WelsI4x4LumaPredDDR_mmx:
|
||||
WELS_EXTERN WelsI4x4LumaPredDDR_mmx
|
||||
%assign push_num 0
|
||||
LOAD_3_PARA
|
||||
SIGN_EXTENSION r2, r2d
|
||||
@ -525,7 +516,6 @@ WelsI4x4LumaPredDDR_mmx:
|
||||
WELSEMMS
|
||||
ret
|
||||
|
||||
ALIGN 16
|
||||
;***********************************************************************
|
||||
; 0 |1 |2 |3 |4 |
|
||||
; 5 |6 |7 |8 |9 |
|
||||
@ -538,7 +528,7 @@ ALIGN 16
|
||||
; void WelsI4x4LumaPredDc_sse2(uint8_t *pred,uint8_t *pRef,int32_t stride)
|
||||
;
|
||||
;***********************************************************************
|
||||
WelsI4x4LumaPredDc_sse2:
|
||||
WELS_EXTERN WelsI4x4LumaPredDc_sse2
|
||||
push r3
|
||||
push r4
|
||||
%assign push_num 2
|
||||
@ -572,7 +562,6 @@ WelsI4x4LumaPredDc_sse2:
|
||||
pop r3
|
||||
ret
|
||||
|
||||
ALIGN 16
|
||||
;***********************************************************************
|
||||
; void WelsIChromaPredH_mmx(uint8_t *pred, uint8_t *pRef, int32_t stride)
|
||||
; copy 8 pixel of 8 line from left
|
||||
@ -598,7 +587,6 @@ ALIGN 16
|
||||
%endmacro
|
||||
|
||||
WELS_EXTERN WelsIChromaPredH_mmx
|
||||
WelsIChromaPredH_mmx:
|
||||
%assign push_num 0
|
||||
LOAD_3_PARA
|
||||
SIGN_EXTENSION r2, r2d
|
||||
@ -629,13 +617,11 @@ WelsIChromaPredH_mmx:
|
||||
WELSEMMS
|
||||
ret
|
||||
|
||||
ALIGN 16
|
||||
;***********************************************************************
|
||||
; void WelsI4x4LumaPredV_sse2(uint8_t *pred, uint8_t *pRef, int32_t stride)
|
||||
; copy pixels from top 4 pixels
|
||||
;***********************************************************************
|
||||
WELS_EXTERN WelsI4x4LumaPredV_sse2
|
||||
WelsI4x4LumaPredV_sse2:
|
||||
%assign push_num 0
|
||||
LOAD_3_PARA
|
||||
SIGN_EXTENSION r2, r2d
|
||||
@ -645,13 +631,11 @@ WelsI4x4LumaPredV_sse2:
|
||||
movdqa [r0], xmm0
|
||||
ret
|
||||
|
||||
ALIGN 16
|
||||
;***********************************************************************
|
||||
; void WelsIChromaPredV_sse2(uint8_t *pred, uint8_t *pRef, int32_t stride)
|
||||
; copy 8 pixels from top 8 pixels
|
||||
;***********************************************************************
|
||||
WELS_EXTERN WelsIChromaPredV_sse2
|
||||
WelsIChromaPredV_sse2:
|
||||
%assign push_num 0
|
||||
LOAD_3_PARA
|
||||
SIGN_EXTENSION r2, r2d
|
||||
@ -665,7 +649,6 @@ WelsIChromaPredV_sse2:
|
||||
movdqa [r0+48], xmm0
|
||||
ret
|
||||
|
||||
ALIGN 16
|
||||
;***********************************************************************
|
||||
; lt|t0|t1|t2|t3|
|
||||
; l0|
|
||||
@ -696,7 +679,6 @@ WelsIChromaPredV_sse2:
|
||||
; void WelsI4x4LumaPredHD_mmx(uint8_t *pred,uint8_t *pRef,int32_t stride)
|
||||
;***********************************************************************
|
||||
WELS_EXTERN WelsI4x4LumaPredHD_mmx
|
||||
WelsI4x4LumaPredHD_mmx:
|
||||
%assign push_num 0
|
||||
LOAD_3_PARA
|
||||
SIGN_EXTENSION r2, r2d
|
||||
@ -747,7 +729,6 @@ WelsI4x4LumaPredHD_mmx:
|
||||
WELSEMMS
|
||||
ret
|
||||
|
||||
ALIGN 16
|
||||
;***********************************************************************
|
||||
; lt|t0|t1|t2|t3|
|
||||
; l0|
|
||||
@ -775,7 +756,6 @@ ALIGN 16
|
||||
; void WelsI4x4LumaPredHU_mmx(uint8_t *pred,uint8_t *pRef,int32_t stride)
|
||||
;***********************************************************************
|
||||
WELS_EXTERN WelsI4x4LumaPredHU_mmx
|
||||
WelsI4x4LumaPredHU_mmx:
|
||||
%assign push_num 0
|
||||
LOAD_3_PARA
|
||||
SIGN_EXTENSION r2, r2d
|
||||
@ -828,7 +808,6 @@ WelsI4x4LumaPredHU_mmx:
|
||||
|
||||
|
||||
|
||||
ALIGN 16
|
||||
;***********************************************************************
|
||||
; lt|t0|t1|t2|t3|
|
||||
; l0|
|
||||
@ -858,7 +837,6 @@ ALIGN 16
|
||||
; void WelsI4x4LumaPredVR_mmx(uint8_t *pred,uint8_t *pRef,int32_t stride)
|
||||
;***********************************************************************
|
||||
WELS_EXTERN WelsI4x4LumaPredVR_mmx
|
||||
WelsI4x4LumaPredVR_mmx:
|
||||
%assign push_num 0
|
||||
LOAD_3_PARA
|
||||
SIGN_EXTENSION r2, r2d
|
||||
@ -916,7 +894,6 @@ WelsI4x4LumaPredVR_mmx:
|
||||
WELSEMMS
|
||||
ret
|
||||
|
||||
ALIGN 16
|
||||
;***********************************************************************
|
||||
; lt|t0|t1|t2|t3|t4|t5|t6|t7
|
||||
; l0|
|
||||
@ -944,7 +921,6 @@ ALIGN 16
|
||||
; void WelsI4x4LumaPredDDL_mmx(uint8_t *pred,uint8_t *pRef,int32_t stride)
|
||||
;***********************************************************************
|
||||
WELS_EXTERN WelsI4x4LumaPredDDL_mmx
|
||||
WelsI4x4LumaPredDDL_mmx:
|
||||
%assign push_num 0
|
||||
LOAD_3_PARA
|
||||
SIGN_EXTENSION r2, r2d
|
||||
@ -981,7 +957,6 @@ WelsI4x4LumaPredDDL_mmx:
|
||||
ret
|
||||
|
||||
|
||||
ALIGN 16
|
||||
;***********************************************************************
|
||||
; lt|t0|t1|t2|t3|t4|t5|t6|t7
|
||||
; l0|
|
||||
@ -1012,7 +987,6 @@ ALIGN 16
|
||||
; void WelsI4x4LumaPredVL_mmx(uint8_t *pred,uint8_t *pRef,int32_t stride)
|
||||
;***********************************************************************
|
||||
WELS_EXTERN WelsI4x4LumaPredVL_mmx
|
||||
WelsI4x4LumaPredVL_mmx:
|
||||
%assign push_num 0
|
||||
LOAD_3_PARA
|
||||
SIGN_EXTENSION r2, r2d
|
||||
@ -1045,13 +1019,11 @@ WelsI4x4LumaPredVL_mmx:
|
||||
WELSEMMS
|
||||
ret
|
||||
|
||||
ALIGN 16
|
||||
;***********************************************************************
|
||||
;
|
||||
; void WelsIChromaPredDc_sse2(uint8_t *pred, uint8_t *pRef, int32_t stride)
|
||||
;***********************************************************************
|
||||
WELS_EXTERN WelsIChromaPredDc_sse2
|
||||
WelsIChromaPredDc_sse2:
|
||||
push r3
|
||||
push r4
|
||||
%assign push_num 2
|
||||
@ -1137,13 +1109,11 @@ WelsIChromaPredDc_sse2:
|
||||
|
||||
|
||||
|
||||
ALIGN 16
|
||||
;***********************************************************************
|
||||
;
|
||||
; void WelsI16x16LumaPredDc_sse2(uint8_t *pred, uint8_t *pRef, int32_t stride)
|
||||
;***********************************************************************
|
||||
WELS_EXTERN WelsI16x16LumaPredDc_sse2
|
||||
WelsI16x16LumaPredDc_sse2:
|
||||
push r3
|
||||
push r4
|
||||
%assign push_num 2
|
||||
@ -1206,8 +1176,6 @@ WelsI16x16LumaPredDc_sse2:
|
||||
;***********************************************************************
|
||||
%ifdef X86_32
|
||||
WELS_EXTERN WelsSampleSatdThree4x4_sse2
|
||||
align 16
|
||||
WelsSampleSatdThree4x4_sse2:
|
||||
push ebx
|
||||
push esi
|
||||
push edi
|
||||
|
@ -47,24 +47,20 @@
|
||||
|
||||
SECTION .text
|
||||
|
||||
ALIGN 16
|
||||
;***********************************************************************
|
||||
;void WelsPrefetchZero_mmx(int8_t const*_A);
|
||||
;***********************************************************************
|
||||
WELS_EXTERN WelsPrefetchZero_mmx
|
||||
WelsPrefetchZero_mmx:
|
||||
%assign push_num 0
|
||||
LOAD_1_PARA
|
||||
prefetchnta [r0]
|
||||
ret
|
||||
|
||||
|
||||
ALIGN 16
|
||||
;***********************************************************************
|
||||
; void WelsSetMemZeroAligned64_sse2(void *dst, int32_t size)
|
||||
;***********************************************************************
|
||||
WELS_EXTERN WelsSetMemZeroAligned64_sse2
|
||||
WelsSetMemZeroAligned64_sse2:
|
||||
|
||||
%assign push_num 0
|
||||
LOAD_2_PARA
|
||||
@ -84,12 +80,10 @@ WelsSetMemZeroAligned64_sse2:
|
||||
|
||||
ret
|
||||
|
||||
ALIGN 16
|
||||
;***********************************************************************
|
||||
; void WelsSetMemZeroSize64_mmx(void *dst, int32_t size)
|
||||
;***********************************************************************
|
||||
WELS_EXTERN WelsSetMemZeroSize64_mmx
|
||||
WelsSetMemZeroSize64_mmx:
|
||||
|
||||
%assign push_num 0
|
||||
LOAD_2_PARA
|
||||
@ -114,12 +108,10 @@ WelsSetMemZeroSize64_mmx:
|
||||
WELSEMMS
|
||||
ret
|
||||
|
||||
ALIGN 16
|
||||
;***********************************************************************
|
||||
; void WelsSetMemZeroSize8_mmx(void *dst, int32_t size)
|
||||
;***********************************************************************
|
||||
WELS_EXTERN WelsSetMemZeroSize8_mmx
|
||||
WelsSetMemZeroSize8_mmx:
|
||||
|
||||
%assign push_num 0
|
||||
LOAD_2_PARA
|
||||
|
@ -83,8 +83,6 @@ SECTION .text
|
||||
; void WelsQuant4x4_sse2(int16_t *pDct, int16_t* ff, int16_t *mf);
|
||||
;***********************************************************************
|
||||
WELS_EXTERN WelsQuant4x4_sse2
|
||||
align 16
|
||||
WelsQuant4x4_sse2:
|
||||
%assign push_num 0
|
||||
LOAD_3_PARA
|
||||
movdqa xmm2, [r1]
|
||||
@ -99,8 +97,6 @@ WelsQuant4x4_sse2:
|
||||
;void WelsQuant4x4Dc_sse2(int16_t *pDct, const int16_t ff, int16_t mf);
|
||||
;***********************************************************************
|
||||
WELS_EXTERN WelsQuant4x4Dc_sse2
|
||||
align 16
|
||||
WelsQuant4x4Dc_sse2:
|
||||
%assign push_num 0
|
||||
LOAD_3_PARA
|
||||
SIGN_EXTENSION r1, r1w
|
||||
@ -118,8 +114,6 @@ WelsQuant4x4Dc_sse2:
|
||||
; void WelsQuantFour4x4_sse2(int16_t *pDct, int16_t* ff, int16_t *mf);
|
||||
;***********************************************************************
|
||||
WELS_EXTERN WelsQuantFour4x4_sse2
|
||||
align 16
|
||||
WelsQuantFour4x4_sse2:
|
||||
%assign push_num 0
|
||||
LOAD_3_PARA
|
||||
MOVDQ xmm2, [r1]
|
||||
@ -140,8 +134,6 @@ WelsQuantFour4x4_sse2:
|
||||
; void WelsQuantFour4x4Max_sse2(int16_t *pDct, int32_t* f, int16_t *mf, int16_t *max);
|
||||
;***********************************************************************
|
||||
WELS_EXTERN WelsQuantFour4x4Max_sse2
|
||||
align 16
|
||||
WelsQuantFour4x4Max_sse2:
|
||||
%assign push_num 0
|
||||
LOAD_4_PARA
|
||||
MOVDQ xmm2, [r1]
|
||||
@ -195,8 +187,6 @@ SECTION .text
|
||||
;int32_t WelsHadamardQuant2x2_mmx(int16_t *rs, const int16_t ff, int16_t mf, int16_t * pDct, int16_t * block);
|
||||
;***********************************************************************
|
||||
WELS_EXTERN WelsHadamardQuant2x2_mmx
|
||||
align 16
|
||||
WelsHadamardQuant2x2_mmx:
|
||||
%assign push_num 0
|
||||
LOAD_5_PARA
|
||||
SIGN_EXTENSION r1, r1w
|
||||
@ -253,8 +243,6 @@ WelsHadamardQuant2x2_mmx:
|
||||
;int32_t WelsHadamardQuant2x2Skip_mmx(int16_t *pDct, int16_t ff, int16_t mf);
|
||||
;***********************************************************************
|
||||
WELS_EXTERN WelsHadamardQuant2x2Skip_mmx
|
||||
align 16
|
||||
WelsHadamardQuant2x2Skip_mmx:
|
||||
%assign push_num 0
|
||||
LOAD_3_PARA
|
||||
SIGN_EXTENSION r1, r1w
|
||||
@ -303,13 +291,10 @@ WelsHadamardQuant2x2Skip_mmx:
|
||||
%endmacro
|
||||
|
||||
|
||||
ALIGN 16
|
||||
;***********************************************************************
|
||||
; void WelsDequant4x4_sse2(int16_t *pDct, const uint16_t* mf);
|
||||
;***********************************************************************
|
||||
align 16
|
||||
WELS_EXTERN WelsDequant4x4_sse2
|
||||
WelsDequant4x4_sse2:
|
||||
%assign push_num 0
|
||||
LOAD_2_PARA
|
||||
|
||||
@ -323,10 +308,7 @@ WelsDequant4x4_sse2:
|
||||
;void WelsDequantFour4x4_sse2(int16_t *pDct, const uint16_t* mf);
|
||||
;***********************************************************************====
|
||||
|
||||
align 16
|
||||
|
||||
WELS_EXTERN WelsDequantFour4x4_sse2
|
||||
WelsDequantFour4x4_sse2:
|
||||
%assign push_num 0
|
||||
LOAD_2_PARA
|
||||
|
||||
@ -346,8 +328,6 @@ WelsDequantFour4x4_sse2:
|
||||
;void WelsDequantIHadamard4x4_sse2(int16_t *rs, const uint16_t mf);
|
||||
;***********************************************************************
|
||||
WELS_EXTERN WelsDequantIHadamard4x4_sse2
|
||||
align 16
|
||||
WelsDequantIHadamard4x4_sse2:
|
||||
%assign push_num 0
|
||||
LOAD_2_PARA
|
||||
%ifndef X86_32
|
||||
|
@ -166,9 +166,7 @@ SECTION .text
|
||||
;***********************************************************************
|
||||
;void WelsScan4x4DcAc_sse2( int16_t level[16], int16_t *pDct )
|
||||
;***********************************************************************
|
||||
ALIGN 16
|
||||
WELS_EXTERN WelsScan4x4DcAc_sse2
|
||||
WelsScan4x4DcAc_sse2:
|
||||
%ifdef X86_32
|
||||
push r3
|
||||
%assign push_num 1
|
||||
@ -200,9 +198,7 @@ WelsScan4x4DcAc_sse2:
|
||||
;***********************************************************************
|
||||
;void WelsScan4x4DcAc_ssse3( int16_t level[16], int16_t *pDct )
|
||||
;***********************************************************************
|
||||
ALIGN 16
|
||||
WELS_EXTERN WelsScan4x4DcAc_ssse3
|
||||
WelsScan4x4DcAc_ssse3:
|
||||
%assign push_num 0
|
||||
LOAD_2_PARA
|
||||
movdqa xmm0, [r1]
|
||||
@ -220,9 +216,7 @@ WelsScan4x4DcAc_ssse3:
|
||||
;***********************************************************************
|
||||
;void WelsScan4x4Ac_sse2( int16_t* zig_value, int16_t* pDct )
|
||||
;***********************************************************************
|
||||
ALIGN 16
|
||||
WELS_EXTERN WelsScan4x4Ac_sse2
|
||||
WelsScan4x4Ac_sse2:
|
||||
%assign push_num 0
|
||||
LOAD_2_PARA
|
||||
movdqa xmm0, [r1]
|
||||
@ -259,9 +253,7 @@ WelsScan4x4Ac_sse2:
|
||||
;***********************************************************************
|
||||
;void int32_t WelsCalculateSingleCtr4x4_sse2( int16_t *pDct );
|
||||
;***********************************************************************
|
||||
ALIGN 16
|
||||
WELS_EXTERN WelsCalculateSingleCtr4x4_sse2
|
||||
WelsCalculateSingleCtr4x4_sse2:
|
||||
%ifdef X86_32
|
||||
push r3
|
||||
%assign push_num 1
|
||||
@ -319,9 +311,7 @@ WelsCalculateSingleCtr4x4_sse2:
|
||||
;***********************************************************************
|
||||
; int32_t WelsGetNoneZeroCount_sse2(int16_t* level);
|
||||
;***********************************************************************
|
||||
ALIGN 16
|
||||
WELS_EXTERN WelsGetNoneZeroCount_sse2
|
||||
WelsGetNoneZeroCount_sse2:
|
||||
%assign push_num 0
|
||||
LOAD_1_PARA
|
||||
movdqa xmm0, [r0]
|
||||
|
@ -163,8 +163,6 @@ SECTION .text
|
||||
paddw %3, %2
|
||||
%endmacro
|
||||
|
||||
ALIGN 16
|
||||
WELS_EXTERN BilateralLumaFilter8_sse2
|
||||
;***********************************************************************
|
||||
; BilateralLumaFilter8_sse2(uint8_t *pixels, int stride);
|
||||
;***********************************************************************
|
||||
@ -173,7 +171,7 @@ WELS_EXTERN BilateralLumaFilter8_sse2
|
||||
; 6 7 8
|
||||
; 0: the center point
|
||||
|
||||
BilateralLumaFilter8_sse2:
|
||||
WELS_EXTERN BilateralLumaFilter8_sse2
|
||||
|
||||
push r3
|
||||
%assign push_num 1
|
||||
@ -219,7 +217,6 @@ BilateralLumaFilter8_sse2:
|
||||
|
||||
ret
|
||||
|
||||
WELS_EXTERN WaverageChromaFilter8_sse2
|
||||
;***********************************************************************
|
||||
; void WaverageChromaFilter8_sse2(uint8_t *pixels, int stride);
|
||||
;***********************************************************************
|
||||
@ -230,8 +227,7 @@ WELS_EXTERN WaverageChromaFilter8_sse2
|
||||
;1 2 4 2 1
|
||||
;1 1 2 1 1
|
||||
|
||||
ALIGN 16
|
||||
WaverageChromaFilter8_sse2:
|
||||
WELS_EXTERN WaverageChromaFilter8_sse2
|
||||
|
||||
push r3
|
||||
|
||||
|
@ -66,22 +66,18 @@ shufb_mask_high:
|
||||
db 01h, 80h, 03h, 80h, 05h, 80h, 07h, 80h, 09h, 80h, 0bh, 80h, 0dh, 80h, 0fh, 80h
|
||||
|
||||
|
||||
ALIGN 16
|
||||
|
||||
;***********************************************************************
|
||||
; Code
|
||||
;***********************************************************************
|
||||
|
||||
SECTION .text
|
||||
|
||||
WELS_EXTERN DyadicBilinearDownsamplerWidthx32_sse
|
||||
;***********************************************************************
|
||||
; void DyadicBilinearDownsamplerWidthx32_sse( unsigned char* pDst, const int iDstStride,
|
||||
; unsigned char* pSrc, const int iSrcStride,
|
||||
; const int iSrcWidth, const int iSrcHeight );
|
||||
;***********************************************************************
|
||||
ALIGN 16
|
||||
DyadicBilinearDownsamplerWidthx32_sse:
|
||||
WELS_EXTERN DyadicBilinearDownsamplerWidthx32_sse
|
||||
push ebx
|
||||
push edx
|
||||
push esi
|
||||
@ -227,14 +223,12 @@ DyadicBilinearDownsamplerWidthx32_sse:
|
||||
pop ebx
|
||||
ret
|
||||
|
||||
WELS_EXTERN DyadicBilinearDownsamplerWidthx16_sse
|
||||
;***********************************************************************
|
||||
; void DyadicBilinearDownsamplerWidthx16_sse( unsigned char* pDst, const int iDstStride,
|
||||
; unsigned char* pSrc, const int iSrcStride,
|
||||
; const int iSrcWidth, const int iSrcHeight );
|
||||
;***********************************************************************
|
||||
ALIGN 16
|
||||
DyadicBilinearDownsamplerWidthx16_sse:
|
||||
WELS_EXTERN DyadicBilinearDownsamplerWidthx16_sse
|
||||
push ebx
|
||||
push edx
|
||||
push esi
|
||||
@ -331,14 +325,12 @@ DyadicBilinearDownsamplerWidthx16_sse:
|
||||
pop ebx
|
||||
ret
|
||||
|
||||
WELS_EXTERN DyadicBilinearDownsamplerWidthx8_sse
|
||||
;***********************************************************************
|
||||
; void DyadicBilinearDownsamplerWidthx8_sse( unsigned char* pDst, const int iDstStride,
|
||||
; unsigned char* pSrc, const int iSrcStride,
|
||||
; const int iSrcWidth, const int iSrcHeight );
|
||||
;***********************************************************************
|
||||
ALIGN 16
|
||||
DyadicBilinearDownsamplerWidthx8_sse:
|
||||
WELS_EXTERN DyadicBilinearDownsamplerWidthx8_sse
|
||||
push ebx
|
||||
push edx
|
||||
push esi
|
||||
@ -422,14 +414,12 @@ DyadicBilinearDownsamplerWidthx8_sse:
|
||||
|
||||
|
||||
; got about 50% improvement over DyadicBilinearDownsamplerWidthx32_sse
|
||||
WELS_EXTERN DyadicBilinearDownsamplerWidthx32_ssse3
|
||||
;***********************************************************************
|
||||
; void DyadicBilinearDownsamplerWidthx32_ssse3( unsigned char* pDst, const int iDstStride,
|
||||
; unsigned char* pSrc, const int iSrcStride,
|
||||
; const int iSrcWidth, const int iSrcHeight );
|
||||
;***********************************************************************
|
||||
ALIGN 16
|
||||
DyadicBilinearDownsamplerWidthx32_ssse3:
|
||||
WELS_EXTERN DyadicBilinearDownsamplerWidthx32_ssse3
|
||||
push ebx
|
||||
push edx
|
||||
push esi
|
||||
@ -533,14 +523,12 @@ DyadicBilinearDownsamplerWidthx32_ssse3:
|
||||
pop ebx
|
||||
ret
|
||||
|
||||
WELS_EXTERN DyadicBilinearDownsamplerWidthx16_ssse3
|
||||
;***********************************************************************
|
||||
; void DyadicBilinearDownsamplerWidthx16_ssse3( unsigned char* pDst, const int iDstStride,
|
||||
; unsigned char* pSrc, const int iSrcStride,
|
||||
; const int iSrcWidth, const int iSrcHeight );
|
||||
;***********************************************************************
|
||||
ALIGN 16
|
||||
DyadicBilinearDownsamplerWidthx16_ssse3:
|
||||
WELS_EXTERN DyadicBilinearDownsamplerWidthx16_ssse3
|
||||
push ebx
|
||||
push edx
|
||||
push esi
|
||||
@ -623,14 +611,12 @@ DyadicBilinearDownsamplerWidthx16_ssse3:
|
||||
ret
|
||||
|
||||
; got about 65% improvement over DyadicBilinearDownsamplerWidthx32_sse
|
||||
WELS_EXTERN DyadicBilinearDownsamplerWidthx32_sse4
|
||||
;***********************************************************************
|
||||
; void DyadicBilinearDownsamplerWidthx32_sse4( unsigned char* pDst, const int iDstStride,
|
||||
; unsigned char* pSrc, const int iSrcStride,
|
||||
; const int iSrcWidth, const int iSrcHeight );
|
||||
;***********************************************************************
|
||||
ALIGN 16
|
||||
DyadicBilinearDownsamplerWidthx32_sse4:
|
||||
WELS_EXTERN DyadicBilinearDownsamplerWidthx32_sse4
|
||||
push ebx
|
||||
push edx
|
||||
push esi
|
||||
@ -733,14 +719,12 @@ DyadicBilinearDownsamplerWidthx32_sse4:
|
||||
pop ebx
|
||||
ret
|
||||
|
||||
WELS_EXTERN DyadicBilinearDownsamplerWidthx16_sse4
|
||||
;***********************************************************************
|
||||
; void DyadicBilinearDownsamplerWidthx16_sse4( unsigned char* pDst, const int iDstStride,
|
||||
; unsigned char* pSrc, const int iSrcStride,
|
||||
; const int iSrcWidth, const int iSrcHeight );
|
||||
;***********************************************************************
|
||||
ALIGN 16
|
||||
DyadicBilinearDownsamplerWidthx16_sse4:
|
||||
WELS_EXTERN DyadicBilinearDownsamplerWidthx16_sse4
|
||||
push ebx
|
||||
push edx
|
||||
push esi
|
||||
@ -825,7 +809,6 @@ DyadicBilinearDownsamplerWidthx16_sse4:
|
||||
|
||||
|
||||
|
||||
WELS_EXTERN GeneralBilinearAccurateDownsampler_sse2
|
||||
;**************************************************************************************************************
|
||||
;int GeneralBilinearAccurateDownsampler_sse2( unsigned char* pDst, const int iDstStride, const int iDstWidth, const int iDstHeight,
|
||||
; unsigned char* pSrc, const int iSrcStride, const int iSrcWidth, const int iSrcHeight,
|
||||
@ -833,8 +816,7 @@ WELS_EXTERN GeneralBilinearAccurateDownsampler_sse2
|
||||
;{
|
||||
;**************************************************************************************************************
|
||||
|
||||
ALIGN 16
|
||||
GeneralBilinearAccurateDownsampler_sse2:
|
||||
WELS_EXTERN GeneralBilinearAccurateDownsampler_sse2
|
||||
push ebp
|
||||
push esi
|
||||
push edi
|
||||
@ -1029,7 +1011,6 @@ LAST_ROW_END:
|
||||
|
||||
|
||||
|
||||
WELS_EXTERN GeneralBilinearFastDownsampler_sse2
|
||||
;**************************************************************************************************************
|
||||
;int GeneralBilinearFastDownsampler_sse2( unsigned char* pDst, const int iDstStride, const int iDstWidth, const int iDstHeight,
|
||||
; unsigned char* pSrc, const int iSrcStride, const int iSrcWidth, const int iSrcHeight,
|
||||
@ -1037,8 +1018,7 @@ WELS_EXTERN GeneralBilinearFastDownsampler_sse2
|
||||
;{
|
||||
;**************************************************************************************************************
|
||||
|
||||
ALIGN 16
|
||||
GeneralBilinearFastDownsampler_sse2:
|
||||
WELS_EXTERN GeneralBilinearFastDownsampler_sse2
|
||||
push ebp
|
||||
push esi
|
||||
push edi
|
||||
|
@ -245,12 +245,10 @@ SECTION .text
|
||||
|
||||
%ifdef X86_32
|
||||
|
||||
WELS_EXTERN SampleVariance16x16_sse2
|
||||
;***********************************************************************
|
||||
; void SampleVariance16x16_sse2( uint8_t * y_ref, int32_t y_ref_stride, uint8_t * y_src, int32_t y_src_stride,SMotionTextureUnit* pMotionTexture );
|
||||
;***********************************************************************
|
||||
ALIGN 16
|
||||
SampleVariance16x16_sse2:
|
||||
WELS_EXTERN SampleVariance16x16_sse2
|
||||
push esi
|
||||
push edi
|
||||
push ebx
|
||||
@ -347,15 +345,13 @@ SampleVariance16x16_sse2:
|
||||
|
||||
|
||||
|
||||
WELS_EXTERN VAACalcSad_sse2
|
||||
;*************************************************************************************************************
|
||||
;void VAACalcSad_sse2( const uint8_t *cur_data, const uint8_t *ref_data, int32_t iPicWidth, int32_t iPicHeight
|
||||
; int32_t iPicStride, int32_t *psadframe, int32_t *psad8x8)
|
||||
;*************************************************************************************************************
|
||||
|
||||
|
||||
ALIGN 16
|
||||
VAACalcSad_sse2:
|
||||
WELS_EXTERN VAACalcSad_sse2
|
||||
%define cur_data esp + pushsize + 4
|
||||
%define ref_data esp + pushsize + 8
|
||||
%define iPicWidth esp + pushsize + 12
|
||||
@ -441,12 +437,10 @@ width_loop:
|
||||
|
||||
%else ;64-bit
|
||||
|
||||
WELS_EXTERN SampleVariance16x16_sse2
|
||||
;***********************************************************************
|
||||
; void SampleVariance16x16_sse2( uint8_t * y_ref, int32_t y_ref_stride, uint8_t * y_src, int32_t y_src_stride,SMotionTextureUnit* pMotionTexture );
|
||||
;***********************************************************************
|
||||
ALIGN 16
|
||||
SampleVariance16x16_sse2:
|
||||
WELS_EXTERN SampleVariance16x16_sse2
|
||||
%define SUM r10;[esp]
|
||||
%define SUM_CUR r11;[esp+4]
|
||||
%define SQR r13;[esp+8]
|
||||
@ -539,15 +533,13 @@ SampleVariance16x16_sse2:
|
||||
ret
|
||||
|
||||
|
||||
WELS_EXTERN VAACalcSad_sse2
|
||||
;*************************************************************************************************************
|
||||
;void VAACalcSad_sse2( const uint8_t *cur_data, const uint8_t *ref_data, int32_t iPicWidth, int32_t iPicHeight
|
||||
; int32_t iPicStride, int32_t *psadframe, int32_t *psad8x8)
|
||||
;*************************************************************************************************************
|
||||
|
||||
|
||||
ALIGN 16
|
||||
VAACalcSad_sse2:
|
||||
WELS_EXTERN VAACalcSad_sse2
|
||||
%define cur_data r0
|
||||
%define ref_data r1
|
||||
%define iPicWidth r2
|
||||
@ -637,15 +629,13 @@ width_loop:
|
||||
|
||||
|
||||
%ifdef X86_32
|
||||
WELS_EXTERN VAACalcSadVar_sse2
|
||||
;*************************************************************************************************************
|
||||
;void VAACalcSadVar_sse2( const uint8_t *cur_data, const uint8_t *ref_data, int32_t iPicWidth, int32_t iPicHeight
|
||||
; int32_t iPicStride, int32_t *psadframe, int32_t *psad8x8, int32_t *psum16x16, int32_t *psqsum16x16)
|
||||
;*************************************************************************************************************
|
||||
|
||||
|
||||
ALIGN 16
|
||||
VAACalcSadVar_sse2:
|
||||
WELS_EXTERN VAACalcSadVar_sse2
|
||||
%define localsize 8
|
||||
%define cur_data esp + pushsize + localsize + 4
|
||||
%define ref_data esp + pushsize + localsize + 8
|
||||
@ -773,15 +763,13 @@ var_width_loop:
|
||||
|
||||
%else ;64-bit
|
||||
|
||||
WELS_EXTERN VAACalcSadVar_sse2
|
||||
;*************************************************************************************************************
|
||||
;void VAACalcSadVar_sse2( const uint8_t *cur_data, const uint8_t *ref_data, int32_t iPicWidth, int32_t iPicHeight
|
||||
; int32_t iPicStride, int32_t *psadframe, int32_t *psad8x8, int32_t *psum16x16, int32_t *psqsum16x16)
|
||||
;*************************************************************************************************************
|
||||
|
||||
|
||||
ALIGN 16
|
||||
VAACalcSadVar_sse2:
|
||||
WELS_EXTERN VAACalcSadVar_sse2
|
||||
%define cur_data arg1 ;r0
|
||||
%define ref_data arg2 ;r1
|
||||
%define iPicWidth arg3 ;r2
|
||||
@ -916,15 +904,13 @@ var_width_loop:
|
||||
|
||||
%ifdef X86_32
|
||||
|
||||
WELS_EXTERN VAACalcSadSsd_sse2
|
||||
;*************************************************************************************************************
|
||||
;void VAACalcSadSsd_sse2(const uint8_t *cur_data, const uint8_t *ref_data, int32_t iPicWidth, int32_t iPicHeight,
|
||||
; int32_t iPicStride,int32_t *psadframe, int32_t *psad8x8, int32_t *psum16x16, int32_t *psqsum16x16, int32_t *psqdiff16x16)
|
||||
;*************************************************************************************************************
|
||||
|
||||
|
||||
ALIGN 16
|
||||
VAACalcSadSsd_sse2:
|
||||
WELS_EXTERN VAACalcSadSsd_sse2
|
||||
%define localsize 12
|
||||
%define cur_data esp + pushsize + localsize + 4
|
||||
%define ref_data esp + pushsize + localsize + 8
|
||||
@ -1072,15 +1058,13 @@ sqdiff_width_loop:
|
||||
%else
|
||||
|
||||
|
||||
WELS_EXTERN VAACalcSadSsd_sse2
|
||||
;*************************************************************************************************************
|
||||
;void VAACalcSadSsd_sse2(const uint8_t *cur_data, const uint8_t *ref_data, int32_t iPicWidth, int32_t iPicHeight,
|
||||
; int32_t iPicStride,int32_t *psadframe, int32_t *psad8x8, int32_t *psum16x16, int32_t *psqsum16x16, int32_t *psqdiff16x16)
|
||||
;*************************************************************************************************************
|
||||
|
||||
|
||||
ALIGN 16
|
||||
VAACalcSadSsd_sse2:
|
||||
WELS_EXTERN VAACalcSadSsd_sse2
|
||||
%define localsize 12
|
||||
%define cur_data arg1;r0
|
||||
%define ref_data arg2;r1
|
||||
@ -1236,15 +1220,13 @@ sqdiff_width_loop:
|
||||
%endif
|
||||
|
||||
%ifdef X86_32
|
||||
WELS_EXTERN VAACalcSadBgd_sse2
|
||||
;*************************************************************************************************************
|
||||
;void VAACalcSadBgd_sse2(const uint8_t *cur_data, const uint8_t *ref_data, int32_t iPicWidth, int32_t iPicHeight,
|
||||
; int32_t iPicStride, int32_t *psadframe, int32_t *psad8x8, int32_t *p_sd8x8, uint8_t *p_mad8x8)
|
||||
;*************************************************************************************************************
|
||||
|
||||
|
||||
ALIGN 16
|
||||
VAACalcSadBgd_sse2:
|
||||
WELS_EXTERN VAACalcSadBgd_sse2
|
||||
%define localsize 12
|
||||
%define cur_data esp + pushsize + localsize + 4
|
||||
%define ref_data esp + pushsize + localsize + 8
|
||||
@ -1415,7 +1397,6 @@ bgd_width_loop:
|
||||
|
||||
|
||||
|
||||
WELS_EXTERN VAACalcSadSsdBgd_sse2
|
||||
;*************************************************************************************************************
|
||||
;void VAACalcSadSsdBgd_sse2(const uint8_t *cur_data, const uint8_t *ref_data, int32_t iPicWidth, int32_t iPicHeight,
|
||||
; int32_t iPicStride, int32_t *psadframe, int32_t *psad8x8, int32_t *psum16x16, int32_t *psqsum16x16,
|
||||
@ -1423,8 +1404,7 @@ WELS_EXTERN VAACalcSadSsdBgd_sse2
|
||||
;*************************************************************************************************************
|
||||
|
||||
|
||||
ALIGN 16
|
||||
VAACalcSadSsdBgd_sse2:
|
||||
WELS_EXTERN VAACalcSadSsdBgd_sse2
|
||||
%define localsize 16
|
||||
%define cur_data esp + pushsize + localsize + 4
|
||||
%define ref_data esp + pushsize + localsize + 8
|
||||
@ -1646,15 +1626,13 @@ sqdiff_bgd_width_loop:
|
||||
ret
|
||||
%else
|
||||
|
||||
WELS_EXTERN VAACalcSadBgd_sse2
|
||||
;*************************************************************************************************************
|
||||
;void VAACalcSadBgd_sse2(const uint8_t *cur_data, const uint8_t *ref_data, int32_t iPicWidth, int32_t iPicHeight,
|
||||
; int32_t iPicStride, int32_t *psadframe, int32_t *psad8x8, int32_t *p_sd8x8, uint8_t *p_mad8x8)
|
||||
;*************************************************************************************************************
|
||||
|
||||
|
||||
ALIGN 16
|
||||
VAACalcSadBgd_sse2:
|
||||
WELS_EXTERN VAACalcSadBgd_sse2
|
||||
%define cur_data arg1;
|
||||
%define ref_data arg2;
|
||||
%define iPicWidth arg3;
|
||||
@ -1817,7 +1795,6 @@ bgd_width_loop:
|
||||
|
||||
|
||||
|
||||
WELS_EXTERN VAACalcSadSsdBgd_sse2
|
||||
;*************************************************************************************************************
|
||||
;void VAACalcSadSsdBgd_sse2(const uint8_t *cur_data, const uint8_t *ref_data, int32_t iPicWidth, int32_t iPicHeight,
|
||||
; int32_t iPicStride, int32_t *psadframe, int32_t *psad8x8, int32_t *psum16x16, int32_t *psqsum16x16,
|
||||
@ -1825,8 +1802,7 @@ WELS_EXTERN VAACalcSadSsdBgd_sse2
|
||||
;*************************************************************************************************************
|
||||
|
||||
|
||||
ALIGN 16
|
||||
VAACalcSadSsdBgd_sse2:
|
||||
WELS_EXTERN VAACalcSadSsdBgd_sse2
|
||||
%define cur_data arg1;
|
||||
%define ref_data arg2;
|
||||
%define iPicWidth arg3;
|
||||
|
Loading…
x
Reference in New Issue
Block a user