Remove commented out code for old, 32-bit only x86 assembly function prologues/epilogues
This commit is contained in:
parent
258828f7ec
commit
f96918283f
@ -133,15 +133,6 @@ WelsCopy16x16_sse2:
|
||||
ALIGN 16
|
||||
; dst can be align with 16 bytes, but not sure about pSrc, 12/29/2011
|
||||
WelsCopy16x16NotAligned_sse2:
|
||||
;push esi
|
||||
;push edi
|
||||
;push ebx
|
||||
|
||||
;mov edi, [esp+16] ; Dst
|
||||
;mov eax, [esp+20] ; iStrideD
|
||||
;mov esi, [esp+24] ; Src
|
||||
;mov ecx, [esp+28] ; iStrideS
|
||||
|
||||
push r4
|
||||
push r5
|
||||
%assign push_num 2
|
||||
@ -205,15 +196,6 @@ WelsCopy16x16NotAligned_sse2:
|
||||
;***********************************************************************
|
||||
ALIGN 16
|
||||
WelsCopy16x8NotAligned_sse2:
|
||||
;push esi
|
||||
;push edi
|
||||
;push ebx
|
||||
|
||||
;mov edi, [esp+16] ; Dst
|
||||
;mov eax, [esp+20] ; iStrideD
|
||||
;mov esi, [esp+24] ; Src
|
||||
;mov ecx, [esp+28] ; iStrideS
|
||||
|
||||
push r4
|
||||
push r5
|
||||
%assign push_num 2
|
||||
@ -255,13 +237,6 @@ WelsCopy16x8NotAligned_sse2:
|
||||
;***********************************************************************
|
||||
ALIGN 16
|
||||
WelsCopy8x16_mmx:
|
||||
;push ebx
|
||||
|
||||
;mov eax, [esp + 8 ] ;Dst
|
||||
;mov ecx, [esp + 12] ;iStrideD
|
||||
;mov ebx, [esp + 16] ;Src
|
||||
;mov edx, [esp + 20] ;iStrideS
|
||||
|
||||
%assign push_num 0
|
||||
LOAD_4_PARA
|
||||
|
||||
@ -327,13 +302,6 @@ WelsCopy8x16_mmx:
|
||||
;***********************************************************************
|
||||
ALIGN 16
|
||||
WelsCopy8x8_mmx:
|
||||
;push ebx
|
||||
;push esi
|
||||
;mov eax, [esp + 12] ;Dst
|
||||
;mov ecx, [esp + 16] ;iStrideD
|
||||
;mov esi, [esp + 20] ;Src
|
||||
;mov ebx, [esp + 24] ;iStrideS
|
||||
|
||||
push r4
|
||||
%assign push_num 1
|
||||
LOAD_4_PARA
|
||||
@ -373,8 +341,6 @@ WelsCopy8x8_mmx:
|
||||
movq [r0+r1], mm7
|
||||
|
||||
WELSEMMS
|
||||
;pop esi
|
||||
;pop ebx
|
||||
LOAD_4_PARA_POP
|
||||
pop r4
|
||||
ret
|
||||
@ -389,8 +355,6 @@ UpdateMbMv_sse2:
|
||||
%assign push_num 0
|
||||
LOAD_2_PARA
|
||||
|
||||
;mov eax, [esp+4] ; mv_buffer
|
||||
;movd xmm0, [esp+8] ; _mv
|
||||
movd xmm0, r1d ; _mv
|
||||
pshufd xmm1, xmm0, $00
|
||||
movdqa [r0 ], xmm1
|
||||
@ -472,20 +436,6 @@ ALIGN 16
|
||||
; int iHeight );
|
||||
;*******************************************************************************
|
||||
PixelAvgWidthEq8_mmx:
|
||||
|
||||
;push esi
|
||||
;push edi
|
||||
;push ebp
|
||||
;push ebx
|
||||
|
||||
;mov edi, [esp+20] ; pDst
|
||||
;mov eax, [esp+24] ; iDstStride
|
||||
;mov esi, [esp+28] ; pSrcA
|
||||
;mov ecx, [esp+32] ; iSrcAStride
|
||||
;mov ebp, [esp+36] ; pSrcB
|
||||
;mov edx, [esp+40] ; iSrcBStride
|
||||
;mov ebx, [esp+44] ; iHeight
|
||||
|
||||
%assign push_num 0
|
||||
LOAD_7_PARA
|
||||
|
||||
@ -575,17 +525,6 @@ ALIGN 16
|
||||
; uint8_t *pDst, int iDstStride, int iHeight )
|
||||
;*******************************************************************************
|
||||
McCopyWidthEq4_mmx:
|
||||
;push esi
|
||||
;push edi
|
||||
;push ebx
|
||||
|
||||
|
||||
;mov esi, [esp+16]
|
||||
;mov eax, [esp+20]
|
||||
;mov edi, [esp+24]
|
||||
;mov ecx, [esp+28]
|
||||
;mov edx, [esp+32]
|
||||
|
||||
push r5
|
||||
%assign push_num 1
|
||||
LOAD_5_PARA
|
||||
@ -614,14 +553,6 @@ ALIGN 16
|
||||
; uint8_t *pDst, int iDstStride, int iHeight )
|
||||
;*******************************************************************************
|
||||
McCopyWidthEq8_mmx:
|
||||
;push esi
|
||||
;push edi
|
||||
;mov esi, [esp+12]
|
||||
;mov eax, [esp+16]
|
||||
;mov edi, [esp+20]
|
||||
;mov ecx, [esp+24]
|
||||
;mov edx, [esp+28]
|
||||
|
||||
%assign push_num 0
|
||||
LOAD_5_PARA
|
||||
|
||||
@ -659,15 +590,6 @@ ALIGN 16
|
||||
movhps [%1+8], %2
|
||||
%endmacro
|
||||
McCopyWidthEq16_sse2:
|
||||
;push esi
|
||||
;push edi
|
||||
|
||||
;mov esi, [esp+12] ; pSrc
|
||||
;mov eax, [esp+16] ; iSrcStride
|
||||
;mov edi, [esp+20] ; pDst
|
||||
;mov edx, [esp+24] ; iDstStride
|
||||
;mov ecx, [esp+28] ; iHeight
|
||||
|
||||
%assign push_num 0
|
||||
LOAD_5_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
|
@ -76,18 +76,12 @@ ALIGN 16
|
||||
;*******************************************************************************
|
||||
WELS_EXTERN McChromaWidthEq4_mmx
|
||||
McChromaWidthEq4_mmx:
|
||||
;push esi
|
||||
;push edi
|
||||
;push ebx
|
||||
|
||||
%assign push_num 0
|
||||
LOAD_6_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
SIGN_EXTENSION r3, r3d
|
||||
SIGN_EXTENSION r5, r5d
|
||||
|
||||
;mov eax, [esp +12 + 20]
|
||||
|
||||
movd mm3, [r4]; [eax]
|
||||
WELS_Zero mm7
|
||||
punpcklbw mm3, mm3
|
||||
@ -103,12 +97,6 @@ McChromaWidthEq4_mmx:
|
||||
punpcklbw mm4, mm7
|
||||
punpckhbw mm6, mm7
|
||||
|
||||
;mov esi, [esp +12+ 4]
|
||||
;mov eax, [esp + 12 + 8]
|
||||
;mov edi, [esp + 12 + 12]
|
||||
;mov edx, [esp + 12 + 16]
|
||||
;mov ecx, [esp + 12 + 24]
|
||||
|
||||
lea r4, [r0 + r1] ;lea ebx, [esi + eax]
|
||||
movd mm0, [r0]
|
||||
movd mm1, [r0+1]
|
||||
@ -149,9 +137,6 @@ McChromaWidthEq4_mmx:
|
||||
jnz near .xloop
|
||||
WELSEMMS
|
||||
LOAD_6_PARA_POP
|
||||
;pop ebx
|
||||
;pop edi
|
||||
;pop esi
|
||||
ret
|
||||
|
||||
|
||||
@ -166,17 +151,12 @@ ALIGN 16
|
||||
;*******************************************************************************
|
||||
WELS_EXTERN McChromaWidthEq8_sse2
|
||||
McChromaWidthEq8_sse2:
|
||||
;push esi
|
||||
;push edi
|
||||
;push ebx
|
||||
|
||||
%assign push_num 0
|
||||
LOAD_6_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
SIGN_EXTENSION r3, r3d
|
||||
SIGN_EXTENSION r5, r5d
|
||||
|
||||
;mov eax, [esp +12 + 20]
|
||||
movd xmm3, [r4]
|
||||
WELS_Zero xmm7
|
||||
punpcklbw xmm3, xmm3
|
||||
@ -193,12 +173,6 @@ McChromaWidthEq8_sse2:
|
||||
punpcklbw xmm4, xmm7
|
||||
punpckhbw xmm6, xmm7
|
||||
|
||||
;mov esi, [esp +12+ 4]
|
||||
;mov eax, [esp + 12 + 8]
|
||||
;mov edi, [esp + 12 + 12]
|
||||
;mov edx, [esp + 12 + 16]
|
||||
;mov ecx, [esp + 12 + 24]
|
||||
|
||||
lea r4, [r0 + r1] ;lea ebx, [esi + eax]
|
||||
movq xmm0, [r0]
|
||||
movq xmm1, [r0+1]
|
||||
@ -240,9 +214,6 @@ McChromaWidthEq8_sse2:
|
||||
|
||||
LOAD_6_PARA_POP
|
||||
|
||||
;pop ebx
|
||||
;pop edi
|
||||
;pop esi
|
||||
ret
|
||||
|
||||
|
||||
@ -259,17 +230,12 @@ ALIGN 16
|
||||
;***********************************************************************
|
||||
WELS_EXTERN McChromaWidthEq8_ssse3
|
||||
McChromaWidthEq8_ssse3:
|
||||
;push ebx
|
||||
;push esi
|
||||
;push edi
|
||||
%assign push_num 0
|
||||
LOAD_6_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
SIGN_EXTENSION r3, r3d
|
||||
SIGN_EXTENSION r5, r5d
|
||||
|
||||
;mov eax, [esp + 12 + 20]
|
||||
|
||||
pxor xmm7, xmm7
|
||||
movd xmm5, [r4]
|
||||
punpcklwd xmm5, xmm5
|
||||
@ -278,12 +244,6 @@ McChromaWidthEq8_ssse3:
|
||||
punpcklqdq xmm5, xmm5
|
||||
punpckhqdq xmm6, xmm6
|
||||
|
||||
;mov eax, [esp + 12 + 4]
|
||||
;mov edx, [esp + 12 + 8]
|
||||
;mov esi, [esp + 12 + 12]
|
||||
;mov edi, [esp + 12 + 16]
|
||||
;mov ecx, [esp + 12 + 24]
|
||||
|
||||
sub r2, r3 ;sub esi, edi
|
||||
sub r2, r3
|
||||
movdqa xmm7, [h264_d0x20_sse2]
|
||||
@ -330,10 +290,6 @@ McChromaWidthEq8_ssse3:
|
||||
|
||||
LOAD_6_PARA_POP
|
||||
|
||||
;pop edi
|
||||
;pop esi
|
||||
;pop ebx
|
||||
|
||||
ret
|
||||
|
||||
|
||||
|
@ -83,15 +83,6 @@ ALIGN 16
|
||||
; int iHeight)
|
||||
;*******************************************************************************
|
||||
McHorVer20WidthEq4_mmx:
|
||||
;push esi
|
||||
;push edi
|
||||
|
||||
;mov esi, [esp+12]
|
||||
;mov eax, [esp+16]
|
||||
;mov edi, [esp+20]
|
||||
;mov ecx, [esp+24]
|
||||
;mov edx, [esp+28]
|
||||
|
||||
%assign push_num 0
|
||||
LOAD_5_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
@ -185,15 +176,6 @@ ALIGN 16
|
||||
; )
|
||||
;***********************************************************************
|
||||
McHorVer22Width8HorFirst_sse2:
|
||||
;push esi
|
||||
;push edi
|
||||
;push ebx
|
||||
;mov esi, [esp+16] ;pSrc
|
||||
;mov eax, [esp+20] ;iSrcStride
|
||||
;mov edi, [esp+24] ;pDst
|
||||
;mov edx, [esp+28] ;iDstStride
|
||||
;mov ebx, [esp+32] ;iHeight
|
||||
|
||||
%assign push_num 0
|
||||
LOAD_5_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
@ -245,15 +227,6 @@ ALIGN 16
|
||||
; );
|
||||
;*******************************************************************************
|
||||
McHorVer20WidthEq8_sse2:
|
||||
;push esi
|
||||
;push edi
|
||||
|
||||
;mov esi, [esp + 12] ;pSrc
|
||||
;mov eax, [esp + 16] ;iSrcStride
|
||||
;mov edi, [esp + 20] ;pDst
|
||||
;mov ecx, [esp + 28] ;iHeight
|
||||
;mov edx, [esp + 24] ;iDstStride
|
||||
|
||||
%assign push_num 0
|
||||
LOAD_5_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
@ -309,14 +282,6 @@ ALIGN 16
|
||||
; );
|
||||
;*******************************************************************************
|
||||
McHorVer20WidthEq16_sse2:
|
||||
;push esi
|
||||
;push edi
|
||||
;mov esi, [esp + 12] ;pSrc
|
||||
;mov eax, [esp + 16] ;iSrcStride
|
||||
;mov edi, [esp + 20] ;pDst
|
||||
;mov ecx, [esp + 28] ;iHeight
|
||||
;mov edx, [esp + 24] ;iDstStride
|
||||
|
||||
%assign push_num 0
|
||||
LOAD_5_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
@ -398,14 +363,6 @@ McHorVer20WidthEq16_sse2:
|
||||
;*******************************************************************************
|
||||
ALIGN 16
|
||||
McHorVer02WidthEq8_sse2:
|
||||
;push esi
|
||||
;push edi
|
||||
;mov esi, [esp + 12] ;pSrc
|
||||
;mov edx, [esp + 16] ;iSrcStride
|
||||
;mov edi, [esp + 20] ;pDst
|
||||
;mov eax, [esp + 24] ;iDstStride
|
||||
;mov ecx, [esp + 28] ;iHeight
|
||||
|
||||
%assign push_num 0
|
||||
LOAD_5_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
@ -503,17 +460,6 @@ WELS_EXTERN McHorVer22HorFirst_sse2
|
||||
;***********************************************************************
|
||||
ALIGN 16
|
||||
McHorVer02Height9Or17_sse2:
|
||||
;push esi
|
||||
;push edi
|
||||
;push ebx
|
||||
|
||||
;mov esi, [esp + 16]
|
||||
;mov edx, [esp + 20]
|
||||
;mov edi, [esp + 24]
|
||||
;mov eax, [esp + 28]
|
||||
;mov ecx, [esp + 36]
|
||||
;mov ebx, [esp + 32]
|
||||
|
||||
%assign push_num 0
|
||||
LOAD_6_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
@ -612,9 +558,6 @@ McHorVer02Height9Or17_sse2:
|
||||
.x_loop_dec:
|
||||
dec r4
|
||||
jz near .xx_exit
|
||||
;mov esi, [esp + 16]
|
||||
;mov edi, [esp + 24]
|
||||
;mov ecx, [esp + 36]
|
||||
%ifdef X86_32
|
||||
mov r0, arg1
|
||||
mov r2, arg3
|
||||
@ -651,16 +594,6 @@ ALIGN 16
|
||||
; );
|
||||
;***********************************************************************
|
||||
McHorVer20Width9Or17_sse2:
|
||||
;push esi
|
||||
;push edi
|
||||
;push ebx
|
||||
;mov esi, [esp+16]
|
||||
;mov eax, [esp+20]
|
||||
;mov edi, [esp+24]
|
||||
;mov edx, [esp+28]
|
||||
;mov ecx, [esp+32]
|
||||
;mov ebx, [esp+36]
|
||||
|
||||
%assign push_num 0
|
||||
LOAD_6_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
@ -819,16 +752,6 @@ ALIGN 16
|
||||
; int32_t iWidth,int32_t iHeight);
|
||||
;***********************************************************************
|
||||
McHorVer22HorFirst_sse2:
|
||||
;push esi
|
||||
;push edi
|
||||
;push ebx
|
||||
;mov esi, [esp+16]
|
||||
;mov eax, [esp+20]
|
||||
;mov edi, [esp+24]
|
||||
;mov edx, [esp+28]
|
||||
;mov ecx, [esp+32]
|
||||
;mov ebx, [esp+36]
|
||||
|
||||
%assign push_num 0
|
||||
LOAD_6_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
@ -996,18 +919,6 @@ McHorVer22HorFirst_sse2:
|
||||
;***********************************************************************
|
||||
|
||||
McHorVer22Width8VerLastAlign_sse2:
|
||||
;push esi
|
||||
;push edi
|
||||
;push ebx
|
||||
;push ebp
|
||||
|
||||
;mov esi, [esp+20]
|
||||
;mov eax, [esp+24]
|
||||
;mov edi, [esp+28]
|
||||
;mov edx, [esp+32]
|
||||
;mov ebx, [esp+36]
|
||||
;mov ecx, [esp+40]
|
||||
|
||||
%assign push_num 0
|
||||
LOAD_6_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
@ -1104,9 +1015,6 @@ McHorVer22HorFirst_sse2:
|
||||
.x_loop_dec:
|
||||
dec r4
|
||||
jz near .exit
|
||||
;mov esi, [esp+20]
|
||||
;mov edi, [esp+28]
|
||||
;mov ecx, [esp+40]
|
||||
%ifdef X86_32
|
||||
mov r0, arg1
|
||||
mov r2, arg3
|
||||
@ -1140,18 +1048,6 @@ McHorVer22HorFirst_sse2:
|
||||
;***********************************************************************
|
||||
|
||||
McHorVer22Width8VerLastUnAlign_sse2:
|
||||
;push esi
|
||||
;push edi
|
||||
;push ebx
|
||||
;push ebp
|
||||
|
||||
;mov esi, [esp+20]
|
||||
;mov eax, [esp+24]
|
||||
;mov edi, [esp+28]
|
||||
;mov edx, [esp+32]
|
||||
;mov ebx, [esp+36]
|
||||
;mov ecx, [esp+40]
|
||||
|
||||
%assign push_num 0
|
||||
LOAD_6_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
@ -1247,9 +1143,6 @@ McHorVer22HorFirst_sse2:
|
||||
.x_loop_dec:
|
||||
dec r4
|
||||
jz near .exit
|
||||
;mov esi, [esp+20]
|
||||
;mov edi, [esp+28]
|
||||
;mov ecx, [esp+40]
|
||||
%ifdef X86_32
|
||||
mov r0, arg1
|
||||
mov r2, arg3
|
||||
|
@ -158,12 +158,6 @@ SECTION .text
|
||||
WELS_EXTERN WelsSampleSatd4x4_sse2
|
||||
align 16
|
||||
WelsSampleSatd4x4_sse2:
|
||||
;push ebx
|
||||
;mov eax, [esp+8]
|
||||
;mov ebx, [esp+12]
|
||||
;mov ecx, [esp+16]
|
||||
;mov edx, [esp+20]
|
||||
|
||||
%assign push_num 0
|
||||
LOAD_4_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
@ -238,12 +232,6 @@ WelsSampleSatd4x4_sse2:
|
||||
WELS_EXTERN WelsSampleSatd8x8_sse2
|
||||
align 16
|
||||
WelsSampleSatd8x8_sse2:
|
||||
;push ebx
|
||||
;mov eax, [esp+8]
|
||||
;mov ebx, [esp+12]
|
||||
;mov ecx, [esp+16]
|
||||
;mov edx, [esp+20]
|
||||
|
||||
%assign push_num 0
|
||||
LOAD_4_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
@ -265,12 +253,6 @@ align 16
|
||||
WELS_EXTERN WelsSampleSatd8x16_sse2
|
||||
align 16
|
||||
WelsSampleSatd8x16_sse2:
|
||||
;push ebx
|
||||
;mov eax, [esp+8]
|
||||
;mov ebx, [esp+12]
|
||||
;mov ecx, [esp+16]
|
||||
;mov edx, [esp+20]
|
||||
|
||||
%assign push_num 0
|
||||
LOAD_4_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
@ -297,12 +279,6 @@ align 16
|
||||
WELS_EXTERN WelsSampleSatd16x8_sse2
|
||||
align 16
|
||||
WelsSampleSatd16x8_sse2:
|
||||
;push ebx
|
||||
;mov eax, [esp+8]
|
||||
;mov ebx, [esp+12]
|
||||
;mov ecx, [esp+16]
|
||||
;mov edx, [esp+20]
|
||||
|
||||
%assign push_num 0
|
||||
LOAD_4_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
@ -316,8 +292,6 @@ WelsSampleSatd16x8_sse2:
|
||||
|
||||
pop r2
|
||||
pop r0
|
||||
;mov eax, [esp+8]
|
||||
;mov ecx, [esp+16]
|
||||
add r0, 8
|
||||
add r2, 8
|
||||
SSE2_GetSatd8x8
|
||||
@ -336,12 +310,6 @@ WelsSampleSatd16x8_sse2:
|
||||
WELS_EXTERN WelsSampleSatd16x16_sse2
|
||||
align 16
|
||||
WelsSampleSatd16x16_sse2:
|
||||
;push ebx
|
||||
;mov eax, [esp+8]
|
||||
;mov ebx, [esp+12]
|
||||
;mov ecx, [esp+16]
|
||||
;mov edx, [esp+20]
|
||||
|
||||
%assign push_num 0
|
||||
LOAD_4_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
@ -358,8 +326,6 @@ WelsSampleSatd16x16_sse2:
|
||||
|
||||
pop r2
|
||||
pop r0
|
||||
;mov eax, [esp+8]
|
||||
;mov ecx, [esp+16]
|
||||
add r0, 8
|
||||
add r2, 8
|
||||
|
||||
@ -1022,12 +988,6 @@ return_sad_intra_16x16_x3:
|
||||
;***********************************************************************
|
||||
WELS_EXTERN WelsSampleSatd4x4_sse41
|
||||
WelsSampleSatd4x4_sse41:
|
||||
;push ebx
|
||||
;mov eax,[esp+8]
|
||||
;mov ebx,[esp+12]
|
||||
;mov ecx,[esp+16]
|
||||
;mov edx,[esp+20]
|
||||
|
||||
%assign push_num 0
|
||||
LOAD_4_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
@ -1082,13 +1042,6 @@ WelsSampleSatd4x4_sse41:
|
||||
WELS_EXTERN WelsSampleSatd8x8_sse41
|
||||
align 16
|
||||
WelsSampleSatd8x8_sse41:
|
||||
;push ebx
|
||||
;push esi
|
||||
;push edi
|
||||
;mov eax, [esp+16]
|
||||
;mov ebx, [esp+20]
|
||||
;mov ecx, [esp+24]
|
||||
;mov edx, [esp+28]
|
||||
%ifdef X86_32
|
||||
push r4
|
||||
push r5
|
||||
@ -1121,15 +1074,6 @@ WelsSampleSatd8x8_sse41:
|
||||
WELS_EXTERN WelsSampleSatd8x16_sse41
|
||||
align 16
|
||||
WelsSampleSatd8x16_sse41:
|
||||
;push ebx
|
||||
;push esi
|
||||
;push edi
|
||||
;push ebp
|
||||
;%define pushsize 16
|
||||
;mov eax, [esp+pushsize+4]
|
||||
;mov ebx, [esp+pushsize+8]
|
||||
;mov ecx, [esp+pushsize+12]
|
||||
;mov edx, [esp+pushsize+16]
|
||||
%ifdef X86_32
|
||||
push r4
|
||||
push r5
|
||||
@ -1168,13 +1112,6 @@ loop_get_satd_8x16:
|
||||
WELS_EXTERN WelsSampleSatd16x8_sse41
|
||||
align 16
|
||||
WelsSampleSatd16x8_sse41:
|
||||
;push ebx
|
||||
;push esi
|
||||
;push edi
|
||||
;mov eax, [esp+16]
|
||||
;mov ebx, [esp+20]
|
||||
;mov ecx, [esp+24]
|
||||
;mov edx, [esp+28]
|
||||
%ifdef X86_32
|
||||
push r4
|
||||
push r5
|
||||
@ -1197,8 +1134,6 @@ WelsSampleSatd16x8_sse41:
|
||||
|
||||
pop r2
|
||||
pop r0
|
||||
;mov eax, [esp+16]
|
||||
;mov ecx, [esp+24]
|
||||
add r0, 8
|
||||
add r2, 8
|
||||
SSE41_GetSatd8x4
|
||||
@ -1222,15 +1157,6 @@ WelsSampleSatd16x8_sse41:
|
||||
WELS_EXTERN WelsSampleSatd16x16_sse41
|
||||
align 16
|
||||
WelsSampleSatd16x16_sse41:
|
||||
;push ebx
|
||||
;push esi
|
||||
;push edi
|
||||
;push ebp
|
||||
;%define pushsize 16
|
||||
;mov eax, [esp+pushsize+4]
|
||||
;mov ebx, [esp+pushsize+8]
|
||||
;mov ecx, [esp+pushsize+12]
|
||||
;mov edx, [esp+pushsize+16]
|
||||
%ifdef X86_32
|
||||
push r4
|
||||
push r5
|
||||
@ -1259,8 +1185,6 @@ loop_get_satd_16x16_left:
|
||||
|
||||
pop r2
|
||||
pop r0
|
||||
;mov eax, [esp+pushsize+4]
|
||||
;mov ecx, [esp+pushsize+12]
|
||||
add r0, 8
|
||||
add r2, 8
|
||||
mov r6, 0
|
||||
@ -1272,7 +1196,6 @@ loop_get_satd_16x16_right:
|
||||
cmp r6, 4
|
||||
jl loop_get_satd_16x16_right
|
||||
SSSE3_SumWHorizon retrd, xmm6, xmm5, xmm7
|
||||
;%undef pushsize
|
||||
LOAD_4_PARA_POP
|
||||
%ifdef X86_32
|
||||
pop r6
|
||||
@ -1355,14 +1278,6 @@ loop_get_satd_16x16_right:
|
||||
WELS_EXTERN WelsSampleSad16x16_sse2
|
||||
align 16
|
||||
WelsSampleSad16x16_sse2:
|
||||
;push ebx
|
||||
;push edi
|
||||
;push esi
|
||||
;%define _STACK_SIZE 12
|
||||
;mov eax, [esp+_STACK_SIZE+4 ]
|
||||
;mov ebx, [esp+_STACK_SIZE+8 ]
|
||||
;mov ecx, [esp+_STACK_SIZE+12]
|
||||
;mov edx, [esp+_STACK_SIZE+16]
|
||||
%ifdef X86_32
|
||||
push r4
|
||||
push r5
|
||||
@ -1406,12 +1321,6 @@ WelsSampleSad16x16_sse2:
|
||||
WELS_EXTERN WelsSampleSad16x8_sse2
|
||||
align 16
|
||||
WelsSampleSad16x8_sse2:
|
||||
;push ebx
|
||||
;mov eax, [esp+8]
|
||||
;mov ebx, [esp+12]
|
||||
;mov ecx, [esp+16]
|
||||
;mov edx, [esp+20]
|
||||
|
||||
%assign push_num 0
|
||||
LOAD_4_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
@ -1438,12 +1347,6 @@ WelsSampleSad16x8_sse2:
|
||||
|
||||
WELS_EXTERN WelsSampleSad8x16_sse2
|
||||
WelsSampleSad8x16_sse2:
|
||||
;push ebx
|
||||
;mov eax, [esp+8]
|
||||
;mov ebx, [esp+12]
|
||||
;mov ecx, [esp+16]
|
||||
;mov edx, [esp+20]
|
||||
|
||||
%assign push_num 0
|
||||
LOAD_4_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
@ -1475,15 +1378,6 @@ cmp %1, (32-%2)|(%3>>1)
|
||||
|
||||
WELS_EXTERN WelsSampleSad8x8_sse21
|
||||
WelsSampleSad8x8_sse21:
|
||||
;mov ecx, [esp+12]
|
||||
;mov edx, ecx
|
||||
;CACHE_SPLIT_CHECK edx, 8, 64
|
||||
;jle near .pixel_sad_8x8_nsplit
|
||||
;push ebx
|
||||
;push edi
|
||||
;mov eax, [esp+12]
|
||||
;mov ebx, [esp+16]
|
||||
|
||||
%assign push_num 0
|
||||
mov r2, arg3
|
||||
push r2
|
||||
@ -1596,10 +1490,6 @@ WelsSampleSad8x8_sse21:
|
||||
jmp .return
|
||||
|
||||
.pixel_sad_8x8_nsplit:
|
||||
;push ebx
|
||||
;mov eax, [esp+8]
|
||||
;mov ebx, [esp+12]
|
||||
;mov edx, [esp+20]
|
||||
|
||||
pop r2
|
||||
%assign push_num 0
|
||||
@ -1647,12 +1537,6 @@ WelsSampleSad8x8_sse21:
|
||||
%endmacro
|
||||
WELS_EXTERN WelsSampleSadFour16x16_sse2
|
||||
WelsSampleSadFour16x16_sse2:
|
||||
;push ebx
|
||||
;mov eax, [esp+8]
|
||||
;mov ebx, [esp+12]
|
||||
;mov ecx, [esp+16]
|
||||
;mov edx, [esp+20]
|
||||
|
||||
%assign push_num 0
|
||||
LOAD_5_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
@ -1753,7 +1637,6 @@ WelsSampleSadFour16x16_sse2:
|
||||
psadbw xmm0, xmm3
|
||||
paddw xmm5, xmm0
|
||||
|
||||
;mov ecx, [esp+24]
|
||||
movhlps xmm0, xmm4
|
||||
paddw xmm4, xmm0
|
||||
movhlps xmm0, xmm5
|
||||
@ -1772,13 +1655,6 @@ WelsSampleSadFour16x16_sse2:
|
||||
|
||||
WELS_EXTERN WelsSampleSadFour16x8_sse2
|
||||
WelsSampleSadFour16x8_sse2:
|
||||
;push ebx
|
||||
;push edi
|
||||
;mov eax, [esp+12]
|
||||
;mov ebx, [esp+16]
|
||||
;mov edi, [esp+20]
|
||||
;mov edx, [esp+24]
|
||||
|
||||
%assign push_num 0
|
||||
LOAD_5_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
@ -1847,7 +1723,6 @@ WelsSampleSadFour16x8_sse2:
|
||||
psadbw xmm1, xmm3
|
||||
paddw xmm5, xmm1
|
||||
|
||||
;mov edi, [esp+28]
|
||||
movhlps xmm0, xmm4
|
||||
paddw xmm4, xmm0
|
||||
movhlps xmm0, xmm5
|
||||
@ -1865,13 +1740,6 @@ WelsSampleSadFour16x8_sse2:
|
||||
|
||||
WELS_EXTERN WelsSampleSadFour8x16_sse2
|
||||
WelsSampleSadFour8x16_sse2:
|
||||
;push ebx
|
||||
;push edi
|
||||
;mov eax, [esp+12]
|
||||
;mov ebx, [esp+16]
|
||||
;mov edi, [esp+20]
|
||||
;mov edx, [esp+24]
|
||||
|
||||
%assign push_num 0
|
||||
LOAD_5_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
@ -2066,7 +1934,6 @@ WelsSampleSadFour8x16_sse2:
|
||||
psadbw xmm0, xmm3
|
||||
paddw xmm5, xmm0
|
||||
|
||||
;mov edi, [esp+28]
|
||||
movhlps xmm0, xmm4
|
||||
paddw xmm4, xmm0
|
||||
movhlps xmm0, xmm5
|
||||
@ -2085,13 +1952,6 @@ WelsSampleSadFour8x16_sse2:
|
||||
|
||||
WELS_EXTERN WelsSampleSadFour8x8_sse2
|
||||
WelsSampleSadFour8x8_sse2:
|
||||
;push ebx
|
||||
;push edi
|
||||
;mov eax, [esp+12]
|
||||
;mov ebx, [esp+16]
|
||||
;mov edi, [esp+20]
|
||||
;mov edx, [esp+24]
|
||||
|
||||
%assign push_num 0
|
||||
LOAD_5_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
@ -2195,7 +2055,6 @@ WelsSampleSadFour8x8_sse2:
|
||||
psadbw xmm0, xmm3
|
||||
paddw xmm5, xmm0
|
||||
|
||||
;mov edi, [esp+28]
|
||||
movhlps xmm0, xmm4
|
||||
paddw xmm4, xmm0
|
||||
movhlps xmm0, xmm5
|
||||
@ -2213,13 +2072,6 @@ WelsSampleSadFour8x8_sse2:
|
||||
|
||||
WELS_EXTERN WelsSampleSadFour4x4_sse2
|
||||
WelsSampleSadFour4x4_sse2:
|
||||
;push ebx
|
||||
;push edi
|
||||
;mov eax, [esp+12]
|
||||
;mov ebx, [esp+16]
|
||||
;mov edi, [esp+20]
|
||||
;mov edx, [esp+24]
|
||||
|
||||
%assign push_num 0
|
||||
LOAD_5_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
@ -2279,7 +2131,6 @@ WelsSampleSadFour4x4_sse2:
|
||||
paddw xmm3, xmm0
|
||||
movhlps xmm0, xmm4
|
||||
paddw xmm4, xmm0
|
||||
;mov edi, [esp+28]
|
||||
punpckldq xmm1, xmm4
|
||||
punpckldq xmm2, xmm3
|
||||
punpcklqdq xmm1, xmm2
|
||||
@ -2300,17 +2151,6 @@ align 16
|
||||
; int32_t WelsSampleSad4x4_mmx (uint8_t *, int32_t, uint8_t *, int32_t )
|
||||
;***********************************************************************
|
||||
WelsSampleSad4x4_mmx:
|
||||
;push ebx
|
||||
;%define pushsize 4
|
||||
;%define pix1address esp+pushsize+4
|
||||
;%define pix1stride esp+pushsize+8
|
||||
;%define pix2address esp+pushsize+12
|
||||
;%define pix2stride esp+pushsize+16
|
||||
;mov eax, [pix1address]
|
||||
;mov ebx, [pix1stride ]
|
||||
;mov ecx, [pix2address]
|
||||
;mov edx, [pix2stride ]
|
||||
|
||||
%assign push_num 0
|
||||
LOAD_4_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
|
@ -56,15 +56,10 @@ ALIGN 16
|
||||
; void WelsResBlockZero16x16_sse2(int16_t* pBlock,int32_t iStride)
|
||||
;*******************************************************************************
|
||||
WelsResBlockZero16x16_sse2:
|
||||
;push r0
|
||||
%assign push_num 0
|
||||
LOAD_2_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
;mov r0, [esp+08h]
|
||||
;mov r1, [esp+0ch]
|
||||
;lea r1, [r1*2]
|
||||
lea r1, [r1*2]
|
||||
;lea r2, [r1*3]
|
||||
lea r2, [r1*3]
|
||||
|
||||
pxor xmm7, xmm7
|
||||
@ -124,7 +119,6 @@ WelsResBlockZero16x16_sse2:
|
||||
movdqa [r0+r2], xmm7
|
||||
movdqa [r0+r2+10h], xmm7
|
||||
|
||||
;pop r0
|
||||
ret
|
||||
|
||||
|
||||
@ -135,12 +129,9 @@ ALIGN 16
|
||||
; void WelsResBlockZero8x8_sse2(int16_t * pBlock, int32_t iStride)
|
||||
;*******************************************************************************
|
||||
WelsResBlockZero8x8_sse2:
|
||||
;push r0
|
||||
%assign push_num 0
|
||||
LOAD_2_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
;mov r0, [esp+08h]
|
||||
;mov r1, [esp+0ch]
|
||||
lea r1, [r1*2]
|
||||
lea r2, [r1*3]
|
||||
|
||||
@ -158,6 +149,5 @@ WelsResBlockZero8x8_sse2:
|
||||
movdqa [r0+r2], xmm7
|
||||
|
||||
|
||||
;pop r0
|
||||
ret
|
||||
|
||||
|
@ -192,8 +192,6 @@ WelsDecoderI4x4LumaPredH_sse2:
|
||||
%assign push_num 0
|
||||
LOAD_2_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
;mov eax, [esp+4] ;pPred
|
||||
;mov ecx, [esp+8] ;kiStride
|
||||
|
||||
movzx r2, byte [r0-1]
|
||||
movd xmm0, r2d
|
||||
@ -225,16 +223,12 @@ WelsDecoderI4x4LumaPredH_sse2:
|
||||
; void WelsDecoderI16x16LumaPredPlane_sse2(uint8_t *pPred, const int32_t kiStride);
|
||||
;*******************************************************************************
|
||||
WelsDecoderI16x16LumaPredPlane_sse2:
|
||||
;%define pushsize 4
|
||||
push r3
|
||||
push r4
|
||||
%assign push_num 2
|
||||
LOAD_2_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
mov r4, r0 ; save r0 in r4
|
||||
;push esi
|
||||
;mov esi, [esp + pushsize + 4]
|
||||
;mov ecx, [esp + pushsize + 8]
|
||||
sub r0, 1
|
||||
sub r0, r1
|
||||
|
||||
@ -286,7 +280,6 @@ WelsDecoderI16x16LumaPredPlane_sse2:
|
||||
sar r2, 6 ; c = (5 * V + 32) >> 6;
|
||||
SSE2_Copy8Times xmm4, r2d ; xmm4 = c,c,c,c,c,c,c,c
|
||||
|
||||
;mov esi, [esp + pushsize + 4]
|
||||
mov r0, r4
|
||||
add r3, 16
|
||||
imul r2, -7
|
||||
@ -313,7 +306,6 @@ get_i16x16_luma_pred_plane_sse2_1:
|
||||
cmp r2, 16
|
||||
jnz get_i16x16_luma_pred_plane_sse2_1
|
||||
|
||||
;pop esi
|
||||
pop r4
|
||||
pop r3
|
||||
ret
|
||||
@ -338,8 +330,6 @@ WelsDecoderI16x16LumaPredH_sse2:
|
||||
%assign push_num 0
|
||||
LOAD_2_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
;mov eax, [esp+4] ; pPred
|
||||
;mov ecx, [esp+8] ; kiStride
|
||||
|
||||
COPY_16_TIMES r0, xmm0
|
||||
movdqa [r0], xmm0
|
||||
@ -364,8 +354,6 @@ WelsDecoderI16x16LumaPredV_sse2:
|
||||
%assign push_num 0
|
||||
LOAD_2_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
;mov edx, [esp+4] ; pPred
|
||||
;mov ecx, [esp+8] ; kiStride
|
||||
|
||||
sub r0, r1
|
||||
movdqa xmm0, [r0]
|
||||
@ -402,16 +390,12 @@ WelsDecoderI16x16LumaPredV_sse2:
|
||||
;*******************************************************************************
|
||||
WELS_EXTERN WelsDecoderIChromaPredPlane_sse2
|
||||
WelsDecoderIChromaPredPlane_sse2:
|
||||
;%define pushsize 4
|
||||
push r3
|
||||
push r4
|
||||
%assign push_num 2
|
||||
LOAD_2_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
mov r4, r0
|
||||
;push esi
|
||||
;mov esi, [esp + pushsize + 4] ;pPred
|
||||
;mov ecx, [esp + pushsize + 8] ;kiStride
|
||||
sub r0, 1
|
||||
sub r0, r1
|
||||
|
||||
@ -466,7 +450,6 @@ WelsDecoderIChromaPredPlane_sse2:
|
||||
sar r2, 5 ; c = (17 * V + 16) >> 5;
|
||||
SSE2_Copy8Times xmm4, r2d ; mm4 = c,c,c,c,c,c,c,c
|
||||
|
||||
;mov esi, [esp + pushsize + 4]
|
||||
mov r0, r4
|
||||
add r3, 16
|
||||
imul r2, -3
|
||||
@ -489,7 +472,6 @@ get_i_chroma_pred_plane_sse2_1:
|
||||
cmp r2, 8
|
||||
jnz get_i_chroma_pred_plane_sse2_1
|
||||
|
||||
;pop esi
|
||||
pop r4
|
||||
pop r3
|
||||
WELSEMMS
|
||||
@ -513,9 +495,6 @@ WelsDecoderI4x4LumaPredDDR_mmx:
|
||||
LOAD_2_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
mov r2, r0
|
||||
;mov edx,[esp+4] ;pPred
|
||||
;mov eax,edx
|
||||
;mov ecx,[esp+8] ;kiStride
|
||||
|
||||
movq mm1,[r2+r1-8] ;get value of 11,decreasing 8 is trying to improve the performance of movq mm1[8] = 11
|
||||
movq mm2,[r2-8] ;get value of 6 mm2[8] = 6
|
||||
@ -586,9 +565,6 @@ WelsDecoderIChromaPredH_mmx:
|
||||
LOAD_2_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
mov r2, r0
|
||||
;mov edx, [esp+4] ;pPred
|
||||
;mov eax, edx
|
||||
;mov ecx, [esp+8] ;kiStride
|
||||
|
||||
movq mm0, [r2-8]
|
||||
psrlq mm0, 38h
|
||||
@ -631,8 +607,6 @@ WelsDecoderIChromaPredV_mmx:
|
||||
%assign push_num 0
|
||||
LOAD_2_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
;mov eax, [esp+4] ;pPred
|
||||
;mov ecx, [esp+8] ;kiStride
|
||||
|
||||
sub r0, r1
|
||||
movq mm0, [r0]
|
||||
@ -689,9 +663,6 @@ WelsDecoderI4x4LumaPredHD_mmx:
|
||||
LOAD_2_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
mov r2, r0
|
||||
;mov edx, [esp+4] ; pPred
|
||||
;mov eax, edx
|
||||
;mov ecx, [esp+8] ; kiStride
|
||||
sub r2, r1
|
||||
movd mm0, [r2-1] ; mm0 = [xx xx xx xx t2 t1 t0 lt]
|
||||
psllq mm0, 20h ; mm0 = [t2 t1 t0 lt xx xx xx xx]
|
||||
@ -776,9 +747,6 @@ WelsDecoderI4x4LumaPredHU_mmx:
|
||||
LOAD_2_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
mov r2, r0
|
||||
;mov edx, [esp+4] ; pPred
|
||||
;mov eax, edx
|
||||
;mov ecx, [esp+8] ; kiStride
|
||||
|
||||
movd mm0, [r2-4] ; mm0[3] = l0
|
||||
punpcklbw mm0, [r2+r1-4] ; mm0[7] = l1, mm0[6] = l0
|
||||
@ -866,9 +834,6 @@ WelsDecoderI4x4LumaPredVR_mmx:
|
||||
LOAD_2_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
mov r2, r0
|
||||
;mov edx, [esp+4] ; pPred
|
||||
;mov eax, edx
|
||||
;mov ecx, [esp+8] ; kiStride
|
||||
sub r2, r1
|
||||
movq mm0, [r2-1] ; mm0 = [xx xx xx t3 t2 t1 t0 lt]
|
||||
psllq mm0, 18h ; mm0 = [t3 t2 t1 t0 lt xx xx xx]
|
||||
@ -957,9 +922,6 @@ WelsDecoderI4x4LumaPredDDL_mmx:
|
||||
LOAD_2_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
mov r2, r0
|
||||
;mov edx, [esp+4] ; pPred
|
||||
;mov eax, edx
|
||||
;mov ecx, [esp+8] ; kiStride
|
||||
sub r2, r1
|
||||
movq mm0, [r2] ; mm0 = [t7 t6 t5 t4 t3 t2 t1 t0]
|
||||
movq mm1, mm0
|
||||
@ -1030,9 +992,6 @@ WelsDecoderI4x4LumaPredVL_mmx:
|
||||
LOAD_2_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
mov r2, r0
|
||||
;mov edx, [esp+4] ; pPred
|
||||
;mov eax, edx
|
||||
;mov ecx, [esp+8] ; kiStride
|
||||
|
||||
sub r2, r1
|
||||
movq mm0, [r2] ; mm0 = [t7 t6 t5 t4 t3 t2 t1 t0]
|
||||
@ -1077,9 +1036,6 @@ WelsDecoderIChromaPredDc_sse2:
|
||||
LOAD_2_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
mov r4, r0
|
||||
;push ebx
|
||||
;mov eax, [esp+8] ; pPred
|
||||
;mov ecx, [esp+12] ; kiStride
|
||||
|
||||
sub r0, r1
|
||||
movq mm0, [r0]
|
||||
@ -1144,8 +1100,6 @@ WelsDecoderIChromaPredDc_sse2:
|
||||
psllq mm1, 0x20
|
||||
pxor mm1, mm2 ; mm2 = m_down
|
||||
|
||||
;mov edx, [esp+8] ; pPred
|
||||
|
||||
movq [r4], mm0
|
||||
movq [r4+r1], mm0
|
||||
movq [r4+2*r1], mm0
|
||||
@ -1159,7 +1113,6 @@ WelsDecoderIChromaPredDc_sse2:
|
||||
lea r4, [r4+2*r1]
|
||||
movq [r4+r1], mm1
|
||||
|
||||
;pop ebx
|
||||
pop r4
|
||||
pop r3
|
||||
WELSEMMS
|
||||
@ -1174,9 +1127,6 @@ ALIGN 16
|
||||
;*******************************************************************************
|
||||
WELS_EXTERN WelsDecoderI16x16LumaPredDc_sse2
|
||||
WelsDecoderI16x16LumaPredDc_sse2:
|
||||
;push ebx
|
||||
;mov eax, [esp+8] ; pPred
|
||||
;mov ecx, [esp+12] ; kiStride
|
||||
push r3
|
||||
push r4
|
||||
%assign push_num 2
|
||||
@ -1211,8 +1161,6 @@ WelsDecoderI16x16LumaPredDc_sse2:
|
||||
pmuludq xmm0, [mmx_01bytes]
|
||||
pshufd xmm0, xmm0, 0
|
||||
|
||||
;mov edx, [esp+8] ; pPred
|
||||
|
||||
movdqa [r4], xmm0
|
||||
movdqa [r4+r1], xmm0
|
||||
movdqa [r4+2*r1], xmm0
|
||||
@ -1244,7 +1192,6 @@ WelsDecoderI16x16LumaPredDc_sse2:
|
||||
|
||||
movdqa [r4+r1], xmm0
|
||||
|
||||
;pop ebx
|
||||
pop r4
|
||||
pop r3
|
||||
|
||||
@ -1260,10 +1207,6 @@ ALIGN 16
|
||||
;*******************************************************************************
|
||||
WELS_EXTERN WelsDecoderI16x16LumaPredDcTop_sse2
|
||||
WelsDecoderI16x16LumaPredDcTop_sse2:
|
||||
;push ebx
|
||||
;%define PUSH_SIZE 4
|
||||
;mov eax, [esp+PUSH_SIZE+4] ; pPred
|
||||
;mov ebx, [esp+PUSH_SIZE+8] ; kiStride
|
||||
%assign push_num 0
|
||||
LOAD_2_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
@ -1328,8 +1271,6 @@ WelsDecoderI16x16LumaPredDcTop_sse2:
|
||||
movdqa [r0+2*r1], xmm0
|
||||
movdqa [r0+r2], xmm1
|
||||
|
||||
;%undef PUSH_SIZE
|
||||
;pop ebx
|
||||
ret
|
||||
|
||||
ALIGN 16
|
||||
@ -1338,12 +1279,6 @@ ALIGN 16
|
||||
;*******************************************************************************
|
||||
WELS_EXTERN WelsDecoderI16x16LumaPredDcNA_sse2
|
||||
WelsDecoderI16x16LumaPredDcNA_sse2:
|
||||
;push ebx
|
||||
|
||||
;%define PUSH_SIZE 4
|
||||
|
||||
;mov eax, [esp+PUSH_SIZE+4] ; pPred
|
||||
;mov ebx, [esp+PUSH_SIZE+8] ; kiStride
|
||||
%assign push_num 0
|
||||
LOAD_2_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
@ -1371,9 +1306,6 @@ WelsDecoderI16x16LumaPredDcNA_sse2:
|
||||
movdqa [r0+2*r1], xmm0
|
||||
movdqa [r0+r2], xmm1
|
||||
|
||||
;%undef PUSH_SIZE
|
||||
|
||||
;pop ebx
|
||||
ret
|
||||
|
||||
ALIGN 16
|
||||
@ -1382,12 +1314,6 @@ ALIGN 16
|
||||
;*******************************************************************************
|
||||
WELS_EXTERN WelsDecoderIChromaPredDcLeft_mmx
|
||||
WelsDecoderIChromaPredDcLeft_mmx:
|
||||
;push ebx
|
||||
;push esi
|
||||
;%define PUSH_SIZE 8
|
||||
;mov esi, [esp+PUSH_SIZE+4] ; pPred
|
||||
;mov ecx, [esp+PUSH_SIZE+8] ; kiStride
|
||||
;mov eax, esi
|
||||
push r3
|
||||
push r4
|
||||
%assign push_num 2
|
||||
@ -1450,8 +1376,6 @@ WelsDecoderIChromaPredDcLeft_mmx:
|
||||
movq [r4+r1], mm3
|
||||
movq [r4+2*r1], mm2
|
||||
movq [r4+r2], mm3
|
||||
;pop esi
|
||||
;pop ebx
|
||||
pop r4
|
||||
pop r3
|
||||
emms
|
||||
@ -1463,12 +1387,6 @@ ALIGN 16
|
||||
;*******************************************************************************
|
||||
WELS_EXTERN WelsDecoderIChromaPredDcTop_sse2
|
||||
WelsDecoderIChromaPredDcTop_sse2:
|
||||
;push ebx
|
||||
;%define PUSH_SIZE 4
|
||||
;mov eax, [esp+PUSH_SIZE+4] ; pPred
|
||||
;mov ecx, [esp+PUSH_SIZE+8] ; kiStride
|
||||
;mov ebx, ecx
|
||||
;neg ebx
|
||||
%assign push_num 0
|
||||
LOAD_2_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
@ -1500,8 +1418,6 @@ WelsDecoderIChromaPredDcTop_sse2:
|
||||
movq [r0+r1], xmm0
|
||||
movq [r0+2*r1], xmm0
|
||||
movq [r0+r2], xmm0
|
||||
;%undef PUSH_SIZE
|
||||
;pop ebx
|
||||
ret
|
||||
|
||||
ALIGN 16
|
||||
@ -1510,10 +1426,6 @@ ALIGN 16
|
||||
;*******************************************************************************
|
||||
WELS_EXTERN WelsDecoderIChromaPredDcNA_mmx
|
||||
WelsDecoderIChromaPredDcNA_mmx:
|
||||
;push ebx
|
||||
;%define PUSH_SIZE 4
|
||||
;mov eax, [esp+PUSH_SIZE+4] ; pPred
|
||||
;mov ebx, [esp+PUSH_SIZE+8] ; kiStride
|
||||
%assign push_num 0
|
||||
LOAD_2_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
@ -1529,8 +1441,6 @@ WelsDecoderIChromaPredDcNA_mmx:
|
||||
movq [r0+r1], mm1
|
||||
movq [r0+2*r1], mm0
|
||||
movq [r0+r2], mm1
|
||||
;%undef PUSH_SIZE
|
||||
;pop ebx
|
||||
emms
|
||||
ret
|
||||
|
||||
|
@ -136,11 +136,6 @@ ALIGN 16
|
||||
;***********************************************************************
|
||||
WELS_EXTERN WelsDctT4_mmx
|
||||
WelsDctT4_mmx:
|
||||
;push ebx
|
||||
;mov eax, [esp+12] ; pix1
|
||||
;mov ebx, [esp+16] ; i_pix1
|
||||
;mov ecx, [esp+20] ; pix2
|
||||
;mov edx, [esp+24] ; i_pix2
|
||||
%assign push_num 0
|
||||
LOAD_5_PARA
|
||||
SIGN_EXTENSION r2, r2d
|
||||
@ -155,14 +150,12 @@ WelsDctT4_mmx:
|
||||
MMX_DCT mm3, mm5, mm2 ,mm4, mm1, mm6
|
||||
MMX_Trans4x4W mm2, mm3, mm4, mm1, mm5
|
||||
|
||||
;mov eax, [esp+ 8] ; pDct
|
||||
movq [r0+ 0], mm2
|
||||
movq [r0+ 8], mm1
|
||||
movq [r0+16], mm5
|
||||
movq [r0+24], mm4
|
||||
WELSEMMS
|
||||
LOAD_5_PARA_POP
|
||||
;pop ebx
|
||||
ret
|
||||
|
||||
|
||||
@ -171,26 +164,14 @@ WelsDctT4_mmx:
|
||||
;***********************************************************************
|
||||
WELS_EXTERN WelsIDctT4Rec_mmx
|
||||
WelsIDctT4Rec_mmx:
|
||||
;push ebx
|
||||
;%define pushsize 4
|
||||
;%define p_dst esp+pushsize+4
|
||||
;%define i_dst esp+pushsize+8
|
||||
;%define p_pred esp+pushsize+12
|
||||
;%define i_pred esp+pushsize+16
|
||||
;%define pDct esp+pushsize+20
|
||||
%assign push_num 0
|
||||
LOAD_5_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
SIGN_EXTENSION r3, r3d
|
||||
; mov eax, [pDct ]
|
||||
movq mm0, [r4+ 0]
|
||||
movq mm1, [r4+ 8]
|
||||
movq mm2, [r4+16]
|
||||
movq mm3, [r4+24]
|
||||
;mov edx, [p_dst ] ; r0
|
||||
;mov ecx, [i_dst ] ; r1
|
||||
;mov eax, [p_pred] ; r2
|
||||
;mov ebx, [i_pred] ; r3
|
||||
|
||||
MMX_Trans4x4W mm0, mm1, mm2, mm3, mm4
|
||||
MMX_IDCT mm1, mm2, mm3, mm4, mm0, mm6
|
||||
@ -209,13 +190,6 @@ WelsIDctT4Rec_mmx:
|
||||
|
||||
WELSEMMS
|
||||
LOAD_5_PARA_POP
|
||||
;%undef pushsize
|
||||
;%undef p_dst
|
||||
;%undef i_dst
|
||||
;%undef p_pred
|
||||
;%undef i_pred
|
||||
;%undef pDct
|
||||
; pop ebx
|
||||
ret
|
||||
|
||||
|
||||
@ -319,13 +293,6 @@ WelsIDctT4Rec_mmx:
|
||||
WELS_EXTERN WelsDctFourT4_sse2
|
||||
ALIGN 16
|
||||
WelsDctFourT4_sse2:
|
||||
;push ebx
|
||||
;push esi
|
||||
;mov esi, [esp+12]
|
||||
;mov eax, [esp+16] ; pix1
|
||||
;mov ebx, [esp+20] ; i_pix1
|
||||
;mov ecx, [esp+24] ; pix2
|
||||
;mov edx, [esp+28] ; i_pix2
|
||||
%assign push_num 0
|
||||
LOAD_5_PARA
|
||||
SIGN_EXTENSION r2, r2d
|
||||
@ -365,32 +332,16 @@ WelsDctFourT4_sse2:
|
||||
lea r0, [r0+64]
|
||||
SSE2_Store4x8p r0, xmm4, xmm2, xmm3, xmm0, xmm5
|
||||
|
||||
;pop esi
|
||||
;pop ebx
|
||||
LOAD_5_PARA_POP
|
||||
ret
|
||||
|
||||
|
||||
;%define rec esp + pushsize + 4
|
||||
;%define stride esp + pushsize + 8
|
||||
;%define pred esp + pushsize + 12
|
||||
;%define pred_stride esp + pushsize + 16
|
||||
;%define rs esp + pushsize + 20
|
||||
;***********************************************************************
|
||||
; void WelsIDctFourT4Rec_sse2(uint8_t *rec, int32_t stride, uint8_t *pred, int32_t pred_stride, int16_t *rs);
|
||||
;***********************************************************************
|
||||
WELS_EXTERN WelsIDctFourT4Rec_sse2
|
||||
ALIGN 16
|
||||
WelsIDctFourT4Rec_sse2:
|
||||
;%define pushsize 8
|
||||
; push ebx
|
||||
; push esi
|
||||
|
||||
; mov eax, [rec]
|
||||
; mov ebx, [stride]
|
||||
; mov ecx, [pred]
|
||||
; mov edx, [pred_stride]
|
||||
; mov esi, [rs]
|
||||
%assign push_num 0
|
||||
LOAD_5_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
@ -449,21 +400,11 @@ WelsIDctFourT4Rec_sse2:
|
||||
;***********************************************************************
|
||||
WELS_EXTERN WelsIDctRecI16x16Dc_sse2
|
||||
ALIGN 16
|
||||
;%define pushsize 8
|
||||
;%define luma_dc esp + pushsize + 20
|
||||
WelsIDctRecI16x16Dc_sse2:
|
||||
%assign push_num 0
|
||||
LOAD_5_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
SIGN_EXTENSION r3, r3d
|
||||
; push esi
|
||||
; push edi
|
||||
|
||||
;mov ecx, [luma_dc] ; r4
|
||||
;mov eax, [rec] ; r0
|
||||
;mov edx, [stride] ; r1
|
||||
;mov esi, [pred]; r2
|
||||
;mov edi, [pred_stride]; r3
|
||||
pxor xmm7, xmm7
|
||||
WELS_DW32 xmm6
|
||||
|
||||
@ -499,8 +440,6 @@ WelsIDctRecI16x16Dc_sse2:
|
||||
lea r2, [r2 + 2 * r3]
|
||||
SSE2_StoreDiff4x8p xmm2, xmm3, xmm5, xmm7, r0, r2, r1, r3
|
||||
LOAD_5_PARA_POP
|
||||
;pop edi
|
||||
;pop esi
|
||||
ret
|
||||
|
||||
|
||||
@ -537,8 +476,6 @@ WelsIDctRecI16x16Dc_sse2:
|
||||
;***********************************************************************
|
||||
WELS_EXTERN WelsHadamardT4Dc_sse2
|
||||
WelsHadamardT4Dc_sse2:
|
||||
;mov eax, [esp + 4] ; luma_dc
|
||||
;mov ecx, [esp + 8] ; pDct
|
||||
%assign push_num 0
|
||||
LOAD_2_PARA
|
||||
SSE2_Load4Col xmm1, xmm5, xmm6, xmm0, r1
|
||||
|
@ -234,10 +234,6 @@ WelsI4x4LumaPredH_sse2:
|
||||
; void WelsI16x16LumaPredPlane_sse2(uint8_t *pred, uint8_t *pRef, int32_t stride);
|
||||
;***********************************************************************
|
||||
WelsI16x16LumaPredPlane_sse2:
|
||||
;%define pushsize 4
|
||||
;push esi
|
||||
;mov esi, [esp + pushsize + 8]
|
||||
;mov ecx, [esp + pushsize + 12]
|
||||
push r3
|
||||
push r4
|
||||
%assign push_num 2
|
||||
@ -293,7 +289,6 @@ WelsI16x16LumaPredPlane_sse2:
|
||||
sar r3, 6 ; c = (5 * V + 32) >> 6;
|
||||
SSE2_Copy8Times xmm4, r3d ; xmm4 = c,c,c,c,c,c,c,c
|
||||
|
||||
;mov esi, [esp + pushsize + 4]
|
||||
add r4, 16
|
||||
imul r3, -7
|
||||
add r3, r4 ; s = a + 16 + (-7)*c
|
||||
@ -367,9 +362,6 @@ WelsI16x16LumaPredH_sse2:
|
||||
;***********************************************************************
|
||||
WELS_EXTERN WelsI16x16LumaPredV_sse2
|
||||
WelsI16x16LumaPredV_sse2:
|
||||
;mov edx, [esp+4] ; pred
|
||||
;mov eax, [esp+8] ; pRef
|
||||
;mov ecx, [esp+12] ; stride
|
||||
%assign push_num 0
|
||||
LOAD_3_PARA
|
||||
SIGN_EXTENSION r2, r2d
|
||||
@ -400,10 +392,6 @@ WelsI16x16LumaPredV_sse2:
|
||||
;***********************************************************************
|
||||
WELS_EXTERN WelsIChromaPredPlane_sse2
|
||||
WelsIChromaPredPlane_sse2:
|
||||
;%define pushsize 4
|
||||
;push esi
|
||||
;mov esi, [esp + pushsize + 8] ;pRef
|
||||
;mov ecx, [esp + pushsize + 12] ;stride
|
||||
push r3
|
||||
push r4
|
||||
%assign push_num 2
|
||||
@ -462,7 +450,6 @@ WelsIChromaPredPlane_sse2:
|
||||
sar r3, 5 ; c = (17 * V + 16) >> 5;
|
||||
SSE2_Copy8Times xmm4, r3d ; mm4 = c,c,c,c,c,c,c,c
|
||||
|
||||
;mov esi, [esp + pushsize + 4]
|
||||
add r4, 16
|
||||
imul r3, -3
|
||||
add r3, r4 ; s = a + 16 + (-3)*c
|
||||
@ -502,9 +489,6 @@ ALIGN 16
|
||||
;
|
||||
;***********************************************************************
|
||||
WelsI4x4LumaPredDDR_mmx:
|
||||
;mov edx,[esp+4] ;pred
|
||||
;mov eax,[esp+8] ;pRef
|
||||
;mov ecx,[esp+12] ;stride
|
||||
%assign push_num 0
|
||||
LOAD_3_PARA
|
||||
SIGN_EXTENSION r2, r2d
|
||||
@ -619,9 +603,6 @@ ALIGN 16
|
||||
|
||||
WELS_EXTERN WelsIChromaPredH_mmx
|
||||
WelsIChromaPredH_mmx:
|
||||
;mov edx, [esp+4] ;pred
|
||||
;mov eax, [esp+8] ;pRef
|
||||
;mov ecx, [esp+12] ;stride
|
||||
%assign push_num 0
|
||||
LOAD_3_PARA
|
||||
SIGN_EXTENSION r2, r2d
|
||||
|
@ -55,7 +55,6 @@ WELS_EXTERN WelsPrefetchZero_mmx
|
||||
WelsPrefetchZero_mmx:
|
||||
%assign push_num 0
|
||||
LOAD_1_PARA
|
||||
;mov eax,[esp+4]
|
||||
prefetchnta [r0]
|
||||
ret
|
||||
|
||||
|
@ -87,12 +87,9 @@ align 16
|
||||
WelsQuant4x4_sse2:
|
||||
%assign push_num 0
|
||||
LOAD_3_PARA
|
||||
;mov eax, [ff]
|
||||
;mov ecx, [mf]
|
||||
movdqa xmm2, [r1]
|
||||
movdqa xmm3, [r2]
|
||||
|
||||
;mov edx, [pDct]
|
||||
SSE2_Quant8 xmm0, xmm1, xmm2, xmm3, [r0]
|
||||
SSE2_Quant8 xmm0, xmm1, xmm2, xmm3, [r0 + 0x10]
|
||||
|
||||
@ -108,13 +105,10 @@ WelsQuant4x4Dc_sse2:
|
||||
LOAD_3_PARA
|
||||
SIGN_EXTENSION r1, r1w
|
||||
SIGN_EXTENSION r2, r2w
|
||||
;mov ax, [mf]
|
||||
SSE2_Copy8Times xmm3, r2d
|
||||
|
||||
;mov cx, [ff]
|
||||
SSE2_Copy8Times xmm2, r1d
|
||||
|
||||
;mov edx, [pDct]
|
||||
SSE2_Quant8 xmm0, xmm1, xmm2, xmm3, [r0]
|
||||
SSE2_Quant8 xmm0, xmm1, xmm2, xmm3, [r0 + 0x10]
|
||||
|
||||
@ -128,12 +122,9 @@ align 16
|
||||
WelsQuantFour4x4_sse2:
|
||||
%assign push_num 0
|
||||
LOAD_3_PARA
|
||||
;mov eax, [ff]
|
||||
;mov ecx, [mf]
|
||||
MOVDQ xmm2, [r1]
|
||||
MOVDQ xmm3, [r2]
|
||||
|
||||
;mov edx, [pDct]
|
||||
SSE2_Quant8 xmm0, xmm1, xmm2, xmm3, [r0]
|
||||
SSE2_Quant8 xmm0, xmm1, xmm2, xmm3, [r0 + 0x10]
|
||||
SSE2_Quant8 xmm0, xmm1, xmm2, xmm3, [r0 + 0x20]
|
||||
@ -153,12 +144,9 @@ align 16
|
||||
WelsQuantFour4x4Max_sse2:
|
||||
%assign push_num 0
|
||||
LOAD_4_PARA
|
||||
;mov eax, [ff]
|
||||
;mov ecx, [mf]
|
||||
MOVDQ xmm2, [r1]
|
||||
MOVDQ xmm3, [r2]
|
||||
|
||||
;mov edx, [pDct]
|
||||
pxor xmm4, xmm4
|
||||
pxor xmm5, xmm5
|
||||
pxor xmm6, xmm6
|
||||
@ -180,7 +168,6 @@ WelsQuantFour4x4Max_sse2:
|
||||
punpckhqdq xmm0, xmm1
|
||||
pmaxsw xmm0, xmm1
|
||||
|
||||
;mov r0, [r3]
|
||||
movq [r3], xmm0
|
||||
LOAD_4_PARA_POP
|
||||
ret
|
||||
@ -204,8 +191,6 @@ SECTION .text
|
||||
psubw %1, %2
|
||||
%endmacro
|
||||
|
||||
%define dct2x2 esp + 16
|
||||
%define iChromaDc esp + 20
|
||||
;***********************************************************************
|
||||
;int32_t WelsHadamardQuant2x2_mmx(int16_t *rs, const int16_t ff, int16_t mf, int16_t * pDct, int16_t * block);
|
||||
;***********************************************************************
|
||||
@ -216,7 +201,6 @@ WelsHadamardQuant2x2_mmx:
|
||||
LOAD_5_PARA
|
||||
SIGN_EXTENSION r1, r1w
|
||||
SIGN_EXTENSION r2, r2w
|
||||
;mov eax, [pDct]
|
||||
movd mm0, [r0]
|
||||
movd mm1, [r0 + 0x20]
|
||||
punpcklwd mm0, mm1
|
||||
@ -237,16 +221,12 @@ WelsHadamardQuant2x2_mmx:
|
||||
punpcklwd mm1, mm3
|
||||
|
||||
;quant_2x2_dc
|
||||
;mov ax, [mf]
|
||||
MMX_Copy4Times mm3, r2d
|
||||
;mov cx, [ff]
|
||||
MMX_Copy4Times mm2, r1d
|
||||
MMX_Quant4 mm1, mm0, mm2, mm3
|
||||
|
||||
; store dct_2x2
|
||||
;mov edx, [dct2x2]
|
||||
movq [r3], mm1
|
||||
;mov ecx, [iChromaDc]
|
||||
movq [r4], mm1
|
||||
|
||||
; pNonZeroCount of dct_2x2
|
||||
@ -279,7 +259,6 @@ WelsHadamardQuant2x2Skip_mmx:
|
||||
LOAD_3_PARA
|
||||
SIGN_EXTENSION r1, r1w
|
||||
SIGN_EXTENSION r2, r2w
|
||||
;mov eax, [pDct]
|
||||
movd mm0, [r0]
|
||||
movd mm1, [r0 + 0x20]
|
||||
punpcklwd mm0, mm1
|
||||
@ -300,9 +279,7 @@ WelsHadamardQuant2x2Skip_mmx:
|
||||
punpcklwd mm1, mm3
|
||||
|
||||
;quant_2x2_dc
|
||||
;mov ax, [mf]
|
||||
MMX_Copy4Times mm3, r2d
|
||||
;mov cx, [ff]
|
||||
MMX_Copy4Times mm2, r1d
|
||||
MMX_Quant4 mm1, mm0, mm2, mm3
|
||||
|
||||
@ -333,11 +310,8 @@ ALIGN 16
|
||||
align 16
|
||||
WELS_EXTERN WelsDequant4x4_sse2
|
||||
WelsDequant4x4_sse2:
|
||||
;ecx = dequant_mf[qp], edx = pDct
|
||||
%assign push_num 0
|
||||
LOAD_2_PARA
|
||||
;mov ecx, [esp + 8]
|
||||
;mov edx, [esp + 4]
|
||||
|
||||
movdqa xmm1, [r1]
|
||||
SSE2_DeQuant8 [r0 ], xmm0, xmm1
|
||||
@ -353,11 +327,8 @@ align 16
|
||||
|
||||
WELS_EXTERN WelsDequantFour4x4_sse2
|
||||
WelsDequantFour4x4_sse2:
|
||||
;ecx = dequant_mf[qp], edx = pDct
|
||||
%assign push_num 0
|
||||
LOAD_2_PARA
|
||||
;mov ecx, [esp + 8]
|
||||
;mov edx, [esp + 4]
|
||||
|
||||
movdqa xmm1, [r1]
|
||||
SSE2_DeQuant8 [r0 ], xmm0, xmm1
|
||||
@ -382,8 +353,6 @@ WelsDequantIHadamard4x4_sse2:
|
||||
%ifndef X86_32
|
||||
movzx r1, r1w
|
||||
%endif
|
||||
;mov eax, [esp + 4]
|
||||
;mov cx, [esp + 8]
|
||||
|
||||
; WelsDequantLumaDc4x4
|
||||
SSE2_Copy8Times xmm1, r1d
|
||||
|
@ -176,7 +176,6 @@ WelsScan4x4DcAc_sse2:
|
||||
%assign push_num 0
|
||||
%endif
|
||||
LOAD_2_PARA
|
||||
;mov eax, [esp+8]
|
||||
movdqa xmm0, [r1] ; 7 6 5 4 3 2 1 0
|
||||
movdqa xmm1, [r1+16] ; f e d c b a 9 8
|
||||
pextrw r2d, xmm0, 7 ; ecx = 7
|
||||
@ -191,7 +190,6 @@ WelsScan4x4DcAc_sse2:
|
||||
pshufd xmm3, xmm1, 0xd8 ; f e b 7 d c 9 a
|
||||
pshufhw xmm0, xmm2, 0x93 ; 6 3 2 5 8 4 1 0
|
||||
pshuflw xmm1, xmm3, 0x39 ; f e b 7 a d c 9
|
||||
;mov eax, [esp+4]
|
||||
movdqa [r0],xmm0
|
||||
movdqa [r0+16], xmm1
|
||||
%ifdef X86_32
|
||||
@ -207,7 +205,6 @@ WELS_EXTERN WelsScan4x4DcAc_ssse3
|
||||
WelsScan4x4DcAc_ssse3:
|
||||
%assign push_num 0
|
||||
LOAD_2_PARA
|
||||
;mov eax, [esp+8]
|
||||
movdqa xmm0, [r1]
|
||||
movdqa xmm1, [r1+16]
|
||||
pextrw r2d, xmm0, 7 ; ecx = [7]
|
||||
@ -217,7 +214,6 @@ WelsScan4x4DcAc_ssse3:
|
||||
pshufb xmm1, [pb_scanacdc_maskb]
|
||||
pshufb xmm0, [pb_scanacdc_maska]
|
||||
|
||||
;mov eax, [esp+4]
|
||||
movdqa [r0],xmm0
|
||||
movdqa [r0+16], xmm1
|
||||
ret
|
||||
@ -229,7 +225,6 @@ WELS_EXTERN WelsScan4x4Ac_sse2
|
||||
WelsScan4x4Ac_sse2:
|
||||
%assign push_num 0
|
||||
LOAD_2_PARA
|
||||
;mov eax, [esp+8]
|
||||
movdqa xmm0, [r1]
|
||||
movdqa xmm1, [r1+16]
|
||||
movdqa xmm2, xmm0
|
||||
@ -256,7 +251,6 @@ WelsScan4x4Ac_sse2:
|
||||
pslldq xmm3, 14
|
||||
por xmm1, xmm3
|
||||
psrldq xmm2, 2
|
||||
;mov eax, [esp+4]
|
||||
movdqa [r0],xmm1
|
||||
movdqa [r0+16], xmm2
|
||||
ret
|
||||
@ -268,8 +262,6 @@ WelsScan4x4Ac_sse2:
|
||||
ALIGN 16
|
||||
WELS_EXTERN WelsCalculateSingleCtr4x4_sse2
|
||||
WelsCalculateSingleCtr4x4_sse2:
|
||||
;push ebx
|
||||
;mov eax, [esp+8]
|
||||
%ifdef X86_32
|
||||
push r3
|
||||
%assign push_num 1
|
||||
@ -321,7 +313,6 @@ WelsCalculateSingleCtr4x4_sse2:
|
||||
%else
|
||||
mov retrd, r0d
|
||||
%endif
|
||||
;pop ebx
|
||||
ret
|
||||
|
||||
|
||||
@ -333,7 +324,6 @@ WELS_EXTERN WelsGetNoneZeroCount_sse2
|
||||
WelsGetNoneZeroCount_sse2:
|
||||
%assign push_num 0
|
||||
LOAD_1_PARA
|
||||
;mov eax, [esp+4]
|
||||
movdqa xmm0, [r0]
|
||||
movdqa xmm1, [r0+16]
|
||||
pxor xmm2, xmm2
|
||||
|
@ -172,11 +172,6 @@ WELS_EXTERN BilateralLumaFilter8_sse2
|
||||
; 4 0 5
|
||||
; 6 7 8
|
||||
; 0: the center point
|
||||
%define pushsize 4
|
||||
;%define pixel esp + pushsize + 4
|
||||
;%define stride esp + pushsize + 8
|
||||
;%define pixel r0
|
||||
;%define stride r1
|
||||
|
||||
BilateralLumaFilter8_sse2:
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user