Remove commented out code for old, 32-bit only x86 assembly function prologues/epilogues

This commit is contained in:
Martin Storsjö 2014-03-14 15:00:39 +02:00
parent 258828f7ec
commit f96918283f
12 changed files with 0 additions and 618 deletions

View File

@ -133,15 +133,6 @@ WelsCopy16x16_sse2:
ALIGN 16
; dst can be align with 16 bytes, but not sure about pSrc, 12/29/2011
WelsCopy16x16NotAligned_sse2:
;push esi
;push edi
;push ebx
;mov edi, [esp+16] ; Dst
;mov eax, [esp+20] ; iStrideD
;mov esi, [esp+24] ; Src
;mov ecx, [esp+28] ; iStrideS
push r4
push r5
%assign push_num 2
@ -205,15 +196,6 @@ WelsCopy16x16NotAligned_sse2:
;***********************************************************************
ALIGN 16
WelsCopy16x8NotAligned_sse2:
;push esi
;push edi
;push ebx
;mov edi, [esp+16] ; Dst
;mov eax, [esp+20] ; iStrideD
;mov esi, [esp+24] ; Src
;mov ecx, [esp+28] ; iStrideS
push r4
push r5
%assign push_num 2
@ -255,13 +237,6 @@ WelsCopy16x8NotAligned_sse2:
;***********************************************************************
ALIGN 16
WelsCopy8x16_mmx:
;push ebx
;mov eax, [esp + 8 ] ;Dst
;mov ecx, [esp + 12] ;iStrideD
;mov ebx, [esp + 16] ;Src
;mov edx, [esp + 20] ;iStrideS
%assign push_num 0
LOAD_4_PARA
@ -327,13 +302,6 @@ WelsCopy8x16_mmx:
;***********************************************************************
ALIGN 16
WelsCopy8x8_mmx:
;push ebx
;push esi
;mov eax, [esp + 12] ;Dst
;mov ecx, [esp + 16] ;iStrideD
;mov esi, [esp + 20] ;Src
;mov ebx, [esp + 24] ;iStrideS
push r4
%assign push_num 1
LOAD_4_PARA
@ -373,8 +341,6 @@ WelsCopy8x8_mmx:
movq [r0+r1], mm7
WELSEMMS
;pop esi
;pop ebx
LOAD_4_PARA_POP
pop r4
ret
@ -389,8 +355,6 @@ UpdateMbMv_sse2:
%assign push_num 0
LOAD_2_PARA
;mov eax, [esp+4] ; mv_buffer
;movd xmm0, [esp+8] ; _mv
movd xmm0, r1d ; _mv
pshufd xmm1, xmm0, $00
movdqa [r0 ], xmm1
@ -472,20 +436,6 @@ ALIGN 16
; int iHeight );
;*******************************************************************************
PixelAvgWidthEq8_mmx:
;push esi
;push edi
;push ebp
;push ebx
;mov edi, [esp+20] ; pDst
;mov eax, [esp+24] ; iDstStride
;mov esi, [esp+28] ; pSrcA
;mov ecx, [esp+32] ; iSrcAStride
;mov ebp, [esp+36] ; pSrcB
;mov edx, [esp+40] ; iSrcBStride
;mov ebx, [esp+44] ; iHeight
%assign push_num 0
LOAD_7_PARA
@ -575,17 +525,6 @@ ALIGN 16
; uint8_t *pDst, int iDstStride, int iHeight )
;*******************************************************************************
McCopyWidthEq4_mmx:
;push esi
;push edi
;push ebx
;mov esi, [esp+16]
;mov eax, [esp+20]
;mov edi, [esp+24]
;mov ecx, [esp+28]
;mov edx, [esp+32]
push r5
%assign push_num 1
LOAD_5_PARA
@ -614,14 +553,6 @@ ALIGN 16
; uint8_t *pDst, int iDstStride, int iHeight )
;*******************************************************************************
McCopyWidthEq8_mmx:
;push esi
;push edi
;mov esi, [esp+12]
;mov eax, [esp+16]
;mov edi, [esp+20]
;mov ecx, [esp+24]
;mov edx, [esp+28]
%assign push_num 0
LOAD_5_PARA
@ -659,15 +590,6 @@ ALIGN 16
movhps [%1+8], %2
%endmacro
McCopyWidthEq16_sse2:
;push esi
;push edi
;mov esi, [esp+12] ; pSrc
;mov eax, [esp+16] ; iSrcStride
;mov edi, [esp+20] ; pDst
;mov edx, [esp+24] ; iDstStride
;mov ecx, [esp+28] ; iHeight
%assign push_num 0
LOAD_5_PARA
SIGN_EXTENSION r1, r1d

View File

@ -76,18 +76,12 @@ ALIGN 16
;*******************************************************************************
WELS_EXTERN McChromaWidthEq4_mmx
McChromaWidthEq4_mmx:
;push esi
;push edi
;push ebx
%assign push_num 0
LOAD_6_PARA
SIGN_EXTENSION r1, r1d
SIGN_EXTENSION r3, r3d
SIGN_EXTENSION r5, r5d
;mov eax, [esp +12 + 20]
movd mm3, [r4]; [eax]
WELS_Zero mm7
punpcklbw mm3, mm3
@ -103,12 +97,6 @@ McChromaWidthEq4_mmx:
punpcklbw mm4, mm7
punpckhbw mm6, mm7
;mov esi, [esp +12+ 4]
;mov eax, [esp + 12 + 8]
;mov edi, [esp + 12 + 12]
;mov edx, [esp + 12 + 16]
;mov ecx, [esp + 12 + 24]
lea r4, [r0 + r1] ;lea ebx, [esi + eax]
movd mm0, [r0]
movd mm1, [r0+1]
@ -149,9 +137,6 @@ McChromaWidthEq4_mmx:
jnz near .xloop
WELSEMMS
LOAD_6_PARA_POP
;pop ebx
;pop edi
;pop esi
ret
@ -166,17 +151,12 @@ ALIGN 16
;*******************************************************************************
WELS_EXTERN McChromaWidthEq8_sse2
McChromaWidthEq8_sse2:
;push esi
;push edi
;push ebx
%assign push_num 0
LOAD_6_PARA
SIGN_EXTENSION r1, r1d
SIGN_EXTENSION r3, r3d
SIGN_EXTENSION r5, r5d
;mov eax, [esp +12 + 20]
movd xmm3, [r4]
WELS_Zero xmm7
punpcklbw xmm3, xmm3
@ -193,12 +173,6 @@ McChromaWidthEq8_sse2:
punpcklbw xmm4, xmm7
punpckhbw xmm6, xmm7
;mov esi, [esp +12+ 4]
;mov eax, [esp + 12 + 8]
;mov edi, [esp + 12 + 12]
;mov edx, [esp + 12 + 16]
;mov ecx, [esp + 12 + 24]
lea r4, [r0 + r1] ;lea ebx, [esi + eax]
movq xmm0, [r0]
movq xmm1, [r0+1]
@ -240,9 +214,6 @@ McChromaWidthEq8_sse2:
LOAD_6_PARA_POP
;pop ebx
;pop edi
;pop esi
ret
@ -259,17 +230,12 @@ ALIGN 16
;***********************************************************************
WELS_EXTERN McChromaWidthEq8_ssse3
McChromaWidthEq8_ssse3:
;push ebx
;push esi
;push edi
%assign push_num 0
LOAD_6_PARA
SIGN_EXTENSION r1, r1d
SIGN_EXTENSION r3, r3d
SIGN_EXTENSION r5, r5d
;mov eax, [esp + 12 + 20]
pxor xmm7, xmm7
movd xmm5, [r4]
punpcklwd xmm5, xmm5
@ -278,12 +244,6 @@ McChromaWidthEq8_ssse3:
punpcklqdq xmm5, xmm5
punpckhqdq xmm6, xmm6
;mov eax, [esp + 12 + 4]
;mov edx, [esp + 12 + 8]
;mov esi, [esp + 12 + 12]
;mov edi, [esp + 12 + 16]
;mov ecx, [esp + 12 + 24]
sub r2, r3 ;sub esi, edi
sub r2, r3
movdqa xmm7, [h264_d0x20_sse2]
@ -330,10 +290,6 @@ McChromaWidthEq8_ssse3:
LOAD_6_PARA_POP
;pop edi
;pop esi
;pop ebx
ret

View File

@ -83,15 +83,6 @@ ALIGN 16
; int iHeight)
;*******************************************************************************
McHorVer20WidthEq4_mmx:
;push esi
;push edi
;mov esi, [esp+12]
;mov eax, [esp+16]
;mov edi, [esp+20]
;mov ecx, [esp+24]
;mov edx, [esp+28]
%assign push_num 0
LOAD_5_PARA
SIGN_EXTENSION r1, r1d
@ -185,15 +176,6 @@ ALIGN 16
; )
;***********************************************************************
McHorVer22Width8HorFirst_sse2:
;push esi
;push edi
;push ebx
;mov esi, [esp+16] ;pSrc
;mov eax, [esp+20] ;iSrcStride
;mov edi, [esp+24] ;pDst
;mov edx, [esp+28] ;iDstStride
;mov ebx, [esp+32] ;iHeight
%assign push_num 0
LOAD_5_PARA
SIGN_EXTENSION r1, r1d
@ -245,15 +227,6 @@ ALIGN 16
; );
;*******************************************************************************
McHorVer20WidthEq8_sse2:
;push esi
;push edi
;mov esi, [esp + 12] ;pSrc
;mov eax, [esp + 16] ;iSrcStride
;mov edi, [esp + 20] ;pDst
;mov ecx, [esp + 28] ;iHeight
;mov edx, [esp + 24] ;iDstStride
%assign push_num 0
LOAD_5_PARA
SIGN_EXTENSION r1, r1d
@ -309,14 +282,6 @@ ALIGN 16
; );
;*******************************************************************************
McHorVer20WidthEq16_sse2:
;push esi
;push edi
;mov esi, [esp + 12] ;pSrc
;mov eax, [esp + 16] ;iSrcStride
;mov edi, [esp + 20] ;pDst
;mov ecx, [esp + 28] ;iHeight
;mov edx, [esp + 24] ;iDstStride
%assign push_num 0
LOAD_5_PARA
SIGN_EXTENSION r1, r1d
@ -398,14 +363,6 @@ McHorVer20WidthEq16_sse2:
;*******************************************************************************
ALIGN 16
McHorVer02WidthEq8_sse2:
;push esi
;push edi
;mov esi, [esp + 12] ;pSrc
;mov edx, [esp + 16] ;iSrcStride
;mov edi, [esp + 20] ;pDst
;mov eax, [esp + 24] ;iDstStride
;mov ecx, [esp + 28] ;iHeight
%assign push_num 0
LOAD_5_PARA
SIGN_EXTENSION r1, r1d
@ -503,17 +460,6 @@ WELS_EXTERN McHorVer22HorFirst_sse2
;***********************************************************************
ALIGN 16
McHorVer02Height9Or17_sse2:
;push esi
;push edi
;push ebx
;mov esi, [esp + 16]
;mov edx, [esp + 20]
;mov edi, [esp + 24]
;mov eax, [esp + 28]
;mov ecx, [esp + 36]
;mov ebx, [esp + 32]
%assign push_num 0
LOAD_6_PARA
SIGN_EXTENSION r1, r1d
@ -612,9 +558,6 @@ McHorVer02Height9Or17_sse2:
.x_loop_dec:
dec r4
jz near .xx_exit
;mov esi, [esp + 16]
;mov edi, [esp + 24]
;mov ecx, [esp + 36]
%ifdef X86_32
mov r0, arg1
mov r2, arg3
@ -651,16 +594,6 @@ ALIGN 16
; );
;***********************************************************************
McHorVer20Width9Or17_sse2:
;push esi
;push edi
;push ebx
;mov esi, [esp+16]
;mov eax, [esp+20]
;mov edi, [esp+24]
;mov edx, [esp+28]
;mov ecx, [esp+32]
;mov ebx, [esp+36]
%assign push_num 0
LOAD_6_PARA
SIGN_EXTENSION r1, r1d
@ -819,16 +752,6 @@ ALIGN 16
; int32_t iWidth,int32_t iHeight);
;***********************************************************************
McHorVer22HorFirst_sse2:
;push esi
;push edi
;push ebx
;mov esi, [esp+16]
;mov eax, [esp+20]
;mov edi, [esp+24]
;mov edx, [esp+28]
;mov ecx, [esp+32]
;mov ebx, [esp+36]
%assign push_num 0
LOAD_6_PARA
SIGN_EXTENSION r1, r1d
@ -996,18 +919,6 @@ McHorVer22HorFirst_sse2:
;***********************************************************************
McHorVer22Width8VerLastAlign_sse2:
;push esi
;push edi
;push ebx
;push ebp
;mov esi, [esp+20]
;mov eax, [esp+24]
;mov edi, [esp+28]
;mov edx, [esp+32]
;mov ebx, [esp+36]
;mov ecx, [esp+40]
%assign push_num 0
LOAD_6_PARA
SIGN_EXTENSION r1, r1d
@ -1104,9 +1015,6 @@ McHorVer22HorFirst_sse2:
.x_loop_dec:
dec r4
jz near .exit
;mov esi, [esp+20]
;mov edi, [esp+28]
;mov ecx, [esp+40]
%ifdef X86_32
mov r0, arg1
mov r2, arg3
@ -1140,18 +1048,6 @@ McHorVer22HorFirst_sse2:
;***********************************************************************
McHorVer22Width8VerLastUnAlign_sse2:
;push esi
;push edi
;push ebx
;push ebp
;mov esi, [esp+20]
;mov eax, [esp+24]
;mov edi, [esp+28]
;mov edx, [esp+32]
;mov ebx, [esp+36]
;mov ecx, [esp+40]
%assign push_num 0
LOAD_6_PARA
SIGN_EXTENSION r1, r1d
@ -1247,9 +1143,6 @@ McHorVer22HorFirst_sse2:
.x_loop_dec:
dec r4
jz near .exit
;mov esi, [esp+20]
;mov edi, [esp+28]
;mov ecx, [esp+40]
%ifdef X86_32
mov r0, arg1
mov r2, arg3

View File

@ -158,12 +158,6 @@ SECTION .text
WELS_EXTERN WelsSampleSatd4x4_sse2
align 16
WelsSampleSatd4x4_sse2:
;push ebx
;mov eax, [esp+8]
;mov ebx, [esp+12]
;mov ecx, [esp+16]
;mov edx, [esp+20]
%assign push_num 0
LOAD_4_PARA
SIGN_EXTENSION r1, r1d
@ -238,12 +232,6 @@ WelsSampleSatd4x4_sse2:
WELS_EXTERN WelsSampleSatd8x8_sse2
align 16
WelsSampleSatd8x8_sse2:
;push ebx
;mov eax, [esp+8]
;mov ebx, [esp+12]
;mov ecx, [esp+16]
;mov edx, [esp+20]
%assign push_num 0
LOAD_4_PARA
SIGN_EXTENSION r1, r1d
@ -265,12 +253,6 @@ align 16
WELS_EXTERN WelsSampleSatd8x16_sse2
align 16
WelsSampleSatd8x16_sse2:
;push ebx
;mov eax, [esp+8]
;mov ebx, [esp+12]
;mov ecx, [esp+16]
;mov edx, [esp+20]
%assign push_num 0
LOAD_4_PARA
SIGN_EXTENSION r1, r1d
@ -297,12 +279,6 @@ align 16
WELS_EXTERN WelsSampleSatd16x8_sse2
align 16
WelsSampleSatd16x8_sse2:
;push ebx
;mov eax, [esp+8]
;mov ebx, [esp+12]
;mov ecx, [esp+16]
;mov edx, [esp+20]
%assign push_num 0
LOAD_4_PARA
SIGN_EXTENSION r1, r1d
@ -316,8 +292,6 @@ WelsSampleSatd16x8_sse2:
pop r2
pop r0
;mov eax, [esp+8]
;mov ecx, [esp+16]
add r0, 8
add r2, 8
SSE2_GetSatd8x8
@ -336,12 +310,6 @@ WelsSampleSatd16x8_sse2:
WELS_EXTERN WelsSampleSatd16x16_sse2
align 16
WelsSampleSatd16x16_sse2:
;push ebx
;mov eax, [esp+8]
;mov ebx, [esp+12]
;mov ecx, [esp+16]
;mov edx, [esp+20]
%assign push_num 0
LOAD_4_PARA
SIGN_EXTENSION r1, r1d
@ -358,8 +326,6 @@ WelsSampleSatd16x16_sse2:
pop r2
pop r0
;mov eax, [esp+8]
;mov ecx, [esp+16]
add r0, 8
add r2, 8
@ -1022,12 +988,6 @@ return_sad_intra_16x16_x3:
;***********************************************************************
WELS_EXTERN WelsSampleSatd4x4_sse41
WelsSampleSatd4x4_sse41:
;push ebx
;mov eax,[esp+8]
;mov ebx,[esp+12]
;mov ecx,[esp+16]
;mov edx,[esp+20]
%assign push_num 0
LOAD_4_PARA
SIGN_EXTENSION r1, r1d
@ -1082,13 +1042,6 @@ WelsSampleSatd4x4_sse41:
WELS_EXTERN WelsSampleSatd8x8_sse41
align 16
WelsSampleSatd8x8_sse41:
;push ebx
;push esi
;push edi
;mov eax, [esp+16]
;mov ebx, [esp+20]
;mov ecx, [esp+24]
;mov edx, [esp+28]
%ifdef X86_32
push r4
push r5
@ -1121,15 +1074,6 @@ WelsSampleSatd8x8_sse41:
WELS_EXTERN WelsSampleSatd8x16_sse41
align 16
WelsSampleSatd8x16_sse41:
;push ebx
;push esi
;push edi
;push ebp
;%define pushsize 16
;mov eax, [esp+pushsize+4]
;mov ebx, [esp+pushsize+8]
;mov ecx, [esp+pushsize+12]
;mov edx, [esp+pushsize+16]
%ifdef X86_32
push r4
push r5
@ -1168,13 +1112,6 @@ loop_get_satd_8x16:
WELS_EXTERN WelsSampleSatd16x8_sse41
align 16
WelsSampleSatd16x8_sse41:
;push ebx
;push esi
;push edi
;mov eax, [esp+16]
;mov ebx, [esp+20]
;mov ecx, [esp+24]
;mov edx, [esp+28]
%ifdef X86_32
push r4
push r5
@ -1197,8 +1134,6 @@ WelsSampleSatd16x8_sse41:
pop r2
pop r0
;mov eax, [esp+16]
;mov ecx, [esp+24]
add r0, 8
add r2, 8
SSE41_GetSatd8x4
@ -1222,15 +1157,6 @@ WelsSampleSatd16x8_sse41:
WELS_EXTERN WelsSampleSatd16x16_sse41
align 16
WelsSampleSatd16x16_sse41:
;push ebx
;push esi
;push edi
;push ebp
;%define pushsize 16
;mov eax, [esp+pushsize+4]
;mov ebx, [esp+pushsize+8]
;mov ecx, [esp+pushsize+12]
;mov edx, [esp+pushsize+16]
%ifdef X86_32
push r4
push r5
@ -1259,8 +1185,6 @@ loop_get_satd_16x16_left:
pop r2
pop r0
;mov eax, [esp+pushsize+4]
;mov ecx, [esp+pushsize+12]
add r0, 8
add r2, 8
mov r6, 0
@ -1272,7 +1196,6 @@ loop_get_satd_16x16_right:
cmp r6, 4
jl loop_get_satd_16x16_right
SSSE3_SumWHorizon retrd, xmm6, xmm5, xmm7
;%undef pushsize
LOAD_4_PARA_POP
%ifdef X86_32
pop r6
@ -1355,14 +1278,6 @@ loop_get_satd_16x16_right:
WELS_EXTERN WelsSampleSad16x16_sse2
align 16
WelsSampleSad16x16_sse2:
;push ebx
;push edi
;push esi
;%define _STACK_SIZE 12
;mov eax, [esp+_STACK_SIZE+4 ]
;mov ebx, [esp+_STACK_SIZE+8 ]
;mov ecx, [esp+_STACK_SIZE+12]
;mov edx, [esp+_STACK_SIZE+16]
%ifdef X86_32
push r4
push r5
@ -1406,12 +1321,6 @@ WelsSampleSad16x16_sse2:
WELS_EXTERN WelsSampleSad16x8_sse2
align 16
WelsSampleSad16x8_sse2:
;push ebx
;mov eax, [esp+8]
;mov ebx, [esp+12]
;mov ecx, [esp+16]
;mov edx, [esp+20]
%assign push_num 0
LOAD_4_PARA
SIGN_EXTENSION r1, r1d
@ -1438,12 +1347,6 @@ WelsSampleSad16x8_sse2:
WELS_EXTERN WelsSampleSad8x16_sse2
WelsSampleSad8x16_sse2:
;push ebx
;mov eax, [esp+8]
;mov ebx, [esp+12]
;mov ecx, [esp+16]
;mov edx, [esp+20]
%assign push_num 0
LOAD_4_PARA
SIGN_EXTENSION r1, r1d
@ -1475,15 +1378,6 @@ cmp %1, (32-%2)|(%3>>1)
WELS_EXTERN WelsSampleSad8x8_sse21
WelsSampleSad8x8_sse21:
;mov ecx, [esp+12]
;mov edx, ecx
;CACHE_SPLIT_CHECK edx, 8, 64
;jle near .pixel_sad_8x8_nsplit
;push ebx
;push edi
;mov eax, [esp+12]
;mov ebx, [esp+16]
%assign push_num 0
mov r2, arg3
push r2
@ -1596,10 +1490,6 @@ WelsSampleSad8x8_sse21:
jmp .return
.pixel_sad_8x8_nsplit:
;push ebx
;mov eax, [esp+8]
;mov ebx, [esp+12]
;mov edx, [esp+20]
pop r2
%assign push_num 0
@ -1647,12 +1537,6 @@ WelsSampleSad8x8_sse21:
%endmacro
WELS_EXTERN WelsSampleSadFour16x16_sse2
WelsSampleSadFour16x16_sse2:
;push ebx
;mov eax, [esp+8]
;mov ebx, [esp+12]
;mov ecx, [esp+16]
;mov edx, [esp+20]
%assign push_num 0
LOAD_5_PARA
SIGN_EXTENSION r1, r1d
@ -1753,7 +1637,6 @@ WelsSampleSadFour16x16_sse2:
psadbw xmm0, xmm3
paddw xmm5, xmm0
;mov ecx, [esp+24]
movhlps xmm0, xmm4
paddw xmm4, xmm0
movhlps xmm0, xmm5
@ -1772,13 +1655,6 @@ WelsSampleSadFour16x16_sse2:
WELS_EXTERN WelsSampleSadFour16x8_sse2
WelsSampleSadFour16x8_sse2:
;push ebx
;push edi
;mov eax, [esp+12]
;mov ebx, [esp+16]
;mov edi, [esp+20]
;mov edx, [esp+24]
%assign push_num 0
LOAD_5_PARA
SIGN_EXTENSION r1, r1d
@ -1847,7 +1723,6 @@ WelsSampleSadFour16x8_sse2:
psadbw xmm1, xmm3
paddw xmm5, xmm1
;mov edi, [esp+28]
movhlps xmm0, xmm4
paddw xmm4, xmm0
movhlps xmm0, xmm5
@ -1865,13 +1740,6 @@ WelsSampleSadFour16x8_sse2:
WELS_EXTERN WelsSampleSadFour8x16_sse2
WelsSampleSadFour8x16_sse2:
;push ebx
;push edi
;mov eax, [esp+12]
;mov ebx, [esp+16]
;mov edi, [esp+20]
;mov edx, [esp+24]
%assign push_num 0
LOAD_5_PARA
SIGN_EXTENSION r1, r1d
@ -2066,7 +1934,6 @@ WelsSampleSadFour8x16_sse2:
psadbw xmm0, xmm3
paddw xmm5, xmm0
;mov edi, [esp+28]
movhlps xmm0, xmm4
paddw xmm4, xmm0
movhlps xmm0, xmm5
@ -2085,13 +1952,6 @@ WelsSampleSadFour8x16_sse2:
WELS_EXTERN WelsSampleSadFour8x8_sse2
WelsSampleSadFour8x8_sse2:
;push ebx
;push edi
;mov eax, [esp+12]
;mov ebx, [esp+16]
;mov edi, [esp+20]
;mov edx, [esp+24]
%assign push_num 0
LOAD_5_PARA
SIGN_EXTENSION r1, r1d
@ -2195,7 +2055,6 @@ WelsSampleSadFour8x8_sse2:
psadbw xmm0, xmm3
paddw xmm5, xmm0
;mov edi, [esp+28]
movhlps xmm0, xmm4
paddw xmm4, xmm0
movhlps xmm0, xmm5
@ -2213,13 +2072,6 @@ WelsSampleSadFour8x8_sse2:
WELS_EXTERN WelsSampleSadFour4x4_sse2
WelsSampleSadFour4x4_sse2:
;push ebx
;push edi
;mov eax, [esp+12]
;mov ebx, [esp+16]
;mov edi, [esp+20]
;mov edx, [esp+24]
%assign push_num 0
LOAD_5_PARA
SIGN_EXTENSION r1, r1d
@ -2279,7 +2131,6 @@ WelsSampleSadFour4x4_sse2:
paddw xmm3, xmm0
movhlps xmm0, xmm4
paddw xmm4, xmm0
;mov edi, [esp+28]
punpckldq xmm1, xmm4
punpckldq xmm2, xmm3
punpcklqdq xmm1, xmm2
@ -2300,17 +2151,6 @@ align 16
; int32_t WelsSampleSad4x4_mmx (uint8_t *, int32_t, uint8_t *, int32_t )
;***********************************************************************
WelsSampleSad4x4_mmx:
;push ebx
;%define pushsize 4
;%define pix1address esp+pushsize+4
;%define pix1stride esp+pushsize+8
;%define pix2address esp+pushsize+12
;%define pix2stride esp+pushsize+16
;mov eax, [pix1address]
;mov ebx, [pix1stride ]
;mov ecx, [pix2address]
;mov edx, [pix2stride ]
%assign push_num 0
LOAD_4_PARA
SIGN_EXTENSION r1, r1d

View File

@ -56,15 +56,10 @@ ALIGN 16
; void WelsResBlockZero16x16_sse2(int16_t* pBlock,int32_t iStride)
;*******************************************************************************
WelsResBlockZero16x16_sse2:
;push r0
%assign push_num 0
LOAD_2_PARA
SIGN_EXTENSION r1, r1d
;mov r0, [esp+08h]
;mov r1, [esp+0ch]
;lea r1, [r1*2]
lea r1, [r1*2]
;lea r2, [r1*3]
lea r2, [r1*3]
pxor xmm7, xmm7
@ -124,7 +119,6 @@ WelsResBlockZero16x16_sse2:
movdqa [r0+r2], xmm7
movdqa [r0+r2+10h], xmm7
;pop r0
ret
@ -135,12 +129,9 @@ ALIGN 16
; void WelsResBlockZero8x8_sse2(int16_t * pBlock, int32_t iStride)
;*******************************************************************************
WelsResBlockZero8x8_sse2:
;push r0
%assign push_num 0
LOAD_2_PARA
SIGN_EXTENSION r1, r1d
;mov r0, [esp+08h]
;mov r1, [esp+0ch]
lea r1, [r1*2]
lea r2, [r1*3]
@ -158,6 +149,5 @@ WelsResBlockZero8x8_sse2:
movdqa [r0+r2], xmm7
;pop r0
ret

View File

@ -192,8 +192,6 @@ WelsDecoderI4x4LumaPredH_sse2:
%assign push_num 0
LOAD_2_PARA
SIGN_EXTENSION r1, r1d
;mov eax, [esp+4] ;pPred
;mov ecx, [esp+8] ;kiStride
movzx r2, byte [r0-1]
movd xmm0, r2d
@ -225,16 +223,12 @@ WelsDecoderI4x4LumaPredH_sse2:
; void WelsDecoderI16x16LumaPredPlane_sse2(uint8_t *pPred, const int32_t kiStride);
;*******************************************************************************
WelsDecoderI16x16LumaPredPlane_sse2:
;%define pushsize 4
push r3
push r4
%assign push_num 2
LOAD_2_PARA
SIGN_EXTENSION r1, r1d
mov r4, r0 ; save r0 in r4
;push esi
;mov esi, [esp + pushsize + 4]
;mov ecx, [esp + pushsize + 8]
sub r0, 1
sub r0, r1
@ -286,7 +280,6 @@ WelsDecoderI16x16LumaPredPlane_sse2:
sar r2, 6 ; c = (5 * V + 32) >> 6;
SSE2_Copy8Times xmm4, r2d ; xmm4 = c,c,c,c,c,c,c,c
;mov esi, [esp + pushsize + 4]
mov r0, r4
add r3, 16
imul r2, -7
@ -313,7 +306,6 @@ get_i16x16_luma_pred_plane_sse2_1:
cmp r2, 16
jnz get_i16x16_luma_pred_plane_sse2_1
;pop esi
pop r4
pop r3
ret
@ -338,8 +330,6 @@ WelsDecoderI16x16LumaPredH_sse2:
%assign push_num 0
LOAD_2_PARA
SIGN_EXTENSION r1, r1d
;mov eax, [esp+4] ; pPred
;mov ecx, [esp+8] ; kiStride
COPY_16_TIMES r0, xmm0
movdqa [r0], xmm0
@ -364,8 +354,6 @@ WelsDecoderI16x16LumaPredV_sse2:
%assign push_num 0
LOAD_2_PARA
SIGN_EXTENSION r1, r1d
;mov edx, [esp+4] ; pPred
;mov ecx, [esp+8] ; kiStride
sub r0, r1
movdqa xmm0, [r0]
@ -402,16 +390,12 @@ WelsDecoderI16x16LumaPredV_sse2:
;*******************************************************************************
WELS_EXTERN WelsDecoderIChromaPredPlane_sse2
WelsDecoderIChromaPredPlane_sse2:
;%define pushsize 4
push r3
push r4
%assign push_num 2
LOAD_2_PARA
SIGN_EXTENSION r1, r1d
mov r4, r0
;push esi
;mov esi, [esp + pushsize + 4] ;pPred
;mov ecx, [esp + pushsize + 8] ;kiStride
sub r0, 1
sub r0, r1
@ -466,7 +450,6 @@ WelsDecoderIChromaPredPlane_sse2:
sar r2, 5 ; c = (17 * V + 16) >> 5;
SSE2_Copy8Times xmm4, r2d ; mm4 = c,c,c,c,c,c,c,c
;mov esi, [esp + pushsize + 4]
mov r0, r4
add r3, 16
imul r2, -3
@ -489,7 +472,6 @@ get_i_chroma_pred_plane_sse2_1:
cmp r2, 8
jnz get_i_chroma_pred_plane_sse2_1
;pop esi
pop r4
pop r3
WELSEMMS
@ -513,9 +495,6 @@ WelsDecoderI4x4LumaPredDDR_mmx:
LOAD_2_PARA
SIGN_EXTENSION r1, r1d
mov r2, r0
;mov edx,[esp+4] ;pPred
;mov eax,edx
;mov ecx,[esp+8] ;kiStride
movq mm1,[r2+r1-8] ;get value of 11,decreasing 8 is trying to improve the performance of movq mm1[8] = 11
movq mm2,[r2-8] ;get value of 6 mm2[8] = 6
@ -586,9 +565,6 @@ WelsDecoderIChromaPredH_mmx:
LOAD_2_PARA
SIGN_EXTENSION r1, r1d
mov r2, r0
;mov edx, [esp+4] ;pPred
;mov eax, edx
;mov ecx, [esp+8] ;kiStride
movq mm0, [r2-8]
psrlq mm0, 38h
@ -631,8 +607,6 @@ WelsDecoderIChromaPredV_mmx:
%assign push_num 0
LOAD_2_PARA
SIGN_EXTENSION r1, r1d
;mov eax, [esp+4] ;pPred
;mov ecx, [esp+8] ;kiStride
sub r0, r1
movq mm0, [r0]
@ -689,9 +663,6 @@ WelsDecoderI4x4LumaPredHD_mmx:
LOAD_2_PARA
SIGN_EXTENSION r1, r1d
mov r2, r0
;mov edx, [esp+4] ; pPred
;mov eax, edx
;mov ecx, [esp+8] ; kiStride
sub r2, r1
movd mm0, [r2-1] ; mm0 = [xx xx xx xx t2 t1 t0 lt]
psllq mm0, 20h ; mm0 = [t2 t1 t0 lt xx xx xx xx]
@ -776,9 +747,6 @@ WelsDecoderI4x4LumaPredHU_mmx:
LOAD_2_PARA
SIGN_EXTENSION r1, r1d
mov r2, r0
;mov edx, [esp+4] ; pPred
;mov eax, edx
;mov ecx, [esp+8] ; kiStride
movd mm0, [r2-4] ; mm0[3] = l0
punpcklbw mm0, [r2+r1-4] ; mm0[7] = l1, mm0[6] = l0
@ -866,9 +834,6 @@ WelsDecoderI4x4LumaPredVR_mmx:
LOAD_2_PARA
SIGN_EXTENSION r1, r1d
mov r2, r0
;mov edx, [esp+4] ; pPred
;mov eax, edx
;mov ecx, [esp+8] ; kiStride
sub r2, r1
movq mm0, [r2-1] ; mm0 = [xx xx xx t3 t2 t1 t0 lt]
psllq mm0, 18h ; mm0 = [t3 t2 t1 t0 lt xx xx xx]
@ -957,9 +922,6 @@ WelsDecoderI4x4LumaPredDDL_mmx:
LOAD_2_PARA
SIGN_EXTENSION r1, r1d
mov r2, r0
;mov edx, [esp+4] ; pPred
;mov eax, edx
;mov ecx, [esp+8] ; kiStride
sub r2, r1
movq mm0, [r2] ; mm0 = [t7 t6 t5 t4 t3 t2 t1 t0]
movq mm1, mm0
@ -1030,9 +992,6 @@ WelsDecoderI4x4LumaPredVL_mmx:
LOAD_2_PARA
SIGN_EXTENSION r1, r1d
mov r2, r0
;mov edx, [esp+4] ; pPred
;mov eax, edx
;mov ecx, [esp+8] ; kiStride
sub r2, r1
movq mm0, [r2] ; mm0 = [t7 t6 t5 t4 t3 t2 t1 t0]
@ -1077,9 +1036,6 @@ WelsDecoderIChromaPredDc_sse2:
LOAD_2_PARA
SIGN_EXTENSION r1, r1d
mov r4, r0
;push ebx
;mov eax, [esp+8] ; pPred
;mov ecx, [esp+12] ; kiStride
sub r0, r1
movq mm0, [r0]
@ -1144,8 +1100,6 @@ WelsDecoderIChromaPredDc_sse2:
psllq mm1, 0x20
pxor mm1, mm2 ; mm2 = m_down
;mov edx, [esp+8] ; pPred
movq [r4], mm0
movq [r4+r1], mm0
movq [r4+2*r1], mm0
@ -1159,7 +1113,6 @@ WelsDecoderIChromaPredDc_sse2:
lea r4, [r4+2*r1]
movq [r4+r1], mm1
;pop ebx
pop r4
pop r3
WELSEMMS
@ -1174,9 +1127,6 @@ ALIGN 16
;*******************************************************************************
WELS_EXTERN WelsDecoderI16x16LumaPredDc_sse2
WelsDecoderI16x16LumaPredDc_sse2:
;push ebx
;mov eax, [esp+8] ; pPred
;mov ecx, [esp+12] ; kiStride
push r3
push r4
%assign push_num 2
@ -1211,8 +1161,6 @@ WelsDecoderI16x16LumaPredDc_sse2:
pmuludq xmm0, [mmx_01bytes]
pshufd xmm0, xmm0, 0
;mov edx, [esp+8] ; pPred
movdqa [r4], xmm0
movdqa [r4+r1], xmm0
movdqa [r4+2*r1], xmm0
@ -1244,7 +1192,6 @@ WelsDecoderI16x16LumaPredDc_sse2:
movdqa [r4+r1], xmm0
;pop ebx
pop r4
pop r3
@ -1260,10 +1207,6 @@ ALIGN 16
;*******************************************************************************
WELS_EXTERN WelsDecoderI16x16LumaPredDcTop_sse2
WelsDecoderI16x16LumaPredDcTop_sse2:
;push ebx
;%define PUSH_SIZE 4
;mov eax, [esp+PUSH_SIZE+4] ; pPred
;mov ebx, [esp+PUSH_SIZE+8] ; kiStride
%assign push_num 0
LOAD_2_PARA
SIGN_EXTENSION r1, r1d
@ -1328,8 +1271,6 @@ WelsDecoderI16x16LumaPredDcTop_sse2:
movdqa [r0+2*r1], xmm0
movdqa [r0+r2], xmm1
;%undef PUSH_SIZE
;pop ebx
ret
ALIGN 16
@ -1338,12 +1279,6 @@ ALIGN 16
;*******************************************************************************
WELS_EXTERN WelsDecoderI16x16LumaPredDcNA_sse2
WelsDecoderI16x16LumaPredDcNA_sse2:
;push ebx
;%define PUSH_SIZE 4
;mov eax, [esp+PUSH_SIZE+4] ; pPred
;mov ebx, [esp+PUSH_SIZE+8] ; kiStride
%assign push_num 0
LOAD_2_PARA
SIGN_EXTENSION r1, r1d
@ -1371,9 +1306,6 @@ WelsDecoderI16x16LumaPredDcNA_sse2:
movdqa [r0+2*r1], xmm0
movdqa [r0+r2], xmm1
;%undef PUSH_SIZE
;pop ebx
ret
ALIGN 16
@ -1382,12 +1314,6 @@ ALIGN 16
;*******************************************************************************
WELS_EXTERN WelsDecoderIChromaPredDcLeft_mmx
WelsDecoderIChromaPredDcLeft_mmx:
;push ebx
;push esi
;%define PUSH_SIZE 8
;mov esi, [esp+PUSH_SIZE+4] ; pPred
;mov ecx, [esp+PUSH_SIZE+8] ; kiStride
;mov eax, esi
push r3
push r4
%assign push_num 2
@ -1450,8 +1376,6 @@ WelsDecoderIChromaPredDcLeft_mmx:
movq [r4+r1], mm3
movq [r4+2*r1], mm2
movq [r4+r2], mm3
;pop esi
;pop ebx
pop r4
pop r3
emms
@ -1463,12 +1387,6 @@ ALIGN 16
;*******************************************************************************
WELS_EXTERN WelsDecoderIChromaPredDcTop_sse2
WelsDecoderIChromaPredDcTop_sse2:
;push ebx
;%define PUSH_SIZE 4
;mov eax, [esp+PUSH_SIZE+4] ; pPred
;mov ecx, [esp+PUSH_SIZE+8] ; kiStride
;mov ebx, ecx
;neg ebx
%assign push_num 0
LOAD_2_PARA
SIGN_EXTENSION r1, r1d
@ -1500,8 +1418,6 @@ WelsDecoderIChromaPredDcTop_sse2:
movq [r0+r1], xmm0
movq [r0+2*r1], xmm0
movq [r0+r2], xmm0
;%undef PUSH_SIZE
;pop ebx
ret
ALIGN 16
@ -1510,10 +1426,6 @@ ALIGN 16
;*******************************************************************************
WELS_EXTERN WelsDecoderIChromaPredDcNA_mmx
WelsDecoderIChromaPredDcNA_mmx:
;push ebx
;%define PUSH_SIZE 4
;mov eax, [esp+PUSH_SIZE+4] ; pPred
;mov ebx, [esp+PUSH_SIZE+8] ; kiStride
%assign push_num 0
LOAD_2_PARA
SIGN_EXTENSION r1, r1d
@ -1529,8 +1441,6 @@ WelsDecoderIChromaPredDcNA_mmx:
movq [r0+r1], mm1
movq [r0+2*r1], mm0
movq [r0+r2], mm1
;%undef PUSH_SIZE
;pop ebx
emms
ret

View File

@ -136,11 +136,6 @@ ALIGN 16
;***********************************************************************
WELS_EXTERN WelsDctT4_mmx
WelsDctT4_mmx:
;push ebx
;mov eax, [esp+12] ; pix1
;mov ebx, [esp+16] ; i_pix1
;mov ecx, [esp+20] ; pix2
;mov edx, [esp+24] ; i_pix2
%assign push_num 0
LOAD_5_PARA
SIGN_EXTENSION r2, r2d
@ -155,14 +150,12 @@ WelsDctT4_mmx:
MMX_DCT mm3, mm5, mm2 ,mm4, mm1, mm6
MMX_Trans4x4W mm2, mm3, mm4, mm1, mm5
;mov eax, [esp+ 8] ; pDct
movq [r0+ 0], mm2
movq [r0+ 8], mm1
movq [r0+16], mm5
movq [r0+24], mm4
WELSEMMS
LOAD_5_PARA_POP
;pop ebx
ret
@ -171,26 +164,14 @@ WelsDctT4_mmx:
;***********************************************************************
WELS_EXTERN WelsIDctT4Rec_mmx
WelsIDctT4Rec_mmx:
;push ebx
;%define pushsize 4
;%define p_dst esp+pushsize+4
;%define i_dst esp+pushsize+8
;%define p_pred esp+pushsize+12
;%define i_pred esp+pushsize+16
;%define pDct esp+pushsize+20
%assign push_num 0
LOAD_5_PARA
SIGN_EXTENSION r1, r1d
SIGN_EXTENSION r3, r3d
; mov eax, [pDct ]
movq mm0, [r4+ 0]
movq mm1, [r4+ 8]
movq mm2, [r4+16]
movq mm3, [r4+24]
;mov edx, [p_dst ] ; r0
;mov ecx, [i_dst ] ; r1
;mov eax, [p_pred] ; r2
;mov ebx, [i_pred] ; r3
MMX_Trans4x4W mm0, mm1, mm2, mm3, mm4
MMX_IDCT mm1, mm2, mm3, mm4, mm0, mm6
@ -209,13 +190,6 @@ WelsIDctT4Rec_mmx:
WELSEMMS
LOAD_5_PARA_POP
;%undef pushsize
;%undef p_dst
;%undef i_dst
;%undef p_pred
;%undef i_pred
;%undef pDct
; pop ebx
ret
@ -319,13 +293,6 @@ WelsIDctT4Rec_mmx:
WELS_EXTERN WelsDctFourT4_sse2
ALIGN 16
WelsDctFourT4_sse2:
;push ebx
;push esi
;mov esi, [esp+12]
;mov eax, [esp+16] ; pix1
;mov ebx, [esp+20] ; i_pix1
;mov ecx, [esp+24] ; pix2
;mov edx, [esp+28] ; i_pix2
%assign push_num 0
LOAD_5_PARA
SIGN_EXTENSION r2, r2d
@ -365,32 +332,16 @@ WelsDctFourT4_sse2:
lea r0, [r0+64]
SSE2_Store4x8p r0, xmm4, xmm2, xmm3, xmm0, xmm5
;pop esi
;pop ebx
LOAD_5_PARA_POP
ret
;%define rec esp + pushsize + 4
;%define stride esp + pushsize + 8
;%define pred esp + pushsize + 12
;%define pred_stride esp + pushsize + 16
;%define rs esp + pushsize + 20
;***********************************************************************
; void WelsIDctFourT4Rec_sse2(uint8_t *rec, int32_t stride, uint8_t *pred, int32_t pred_stride, int16_t *rs);
;***********************************************************************
WELS_EXTERN WelsIDctFourT4Rec_sse2
ALIGN 16
WelsIDctFourT4Rec_sse2:
;%define pushsize 8
; push ebx
; push esi
; mov eax, [rec]
; mov ebx, [stride]
; mov ecx, [pred]
; mov edx, [pred_stride]
; mov esi, [rs]
%assign push_num 0
LOAD_5_PARA
SIGN_EXTENSION r1, r1d
@ -449,21 +400,11 @@ WelsIDctFourT4Rec_sse2:
;***********************************************************************
WELS_EXTERN WelsIDctRecI16x16Dc_sse2
ALIGN 16
;%define pushsize 8
;%define luma_dc esp + pushsize + 20
WelsIDctRecI16x16Dc_sse2:
%assign push_num 0
LOAD_5_PARA
SIGN_EXTENSION r1, r1d
SIGN_EXTENSION r3, r3d
; push esi
; push edi
;mov ecx, [luma_dc] ; r4
;mov eax, [rec] ; r0
;mov edx, [stride] ; r1
;mov esi, [pred]; r2
;mov edi, [pred_stride]; r3
pxor xmm7, xmm7
WELS_DW32 xmm6
@ -499,8 +440,6 @@ WelsIDctRecI16x16Dc_sse2:
lea r2, [r2 + 2 * r3]
SSE2_StoreDiff4x8p xmm2, xmm3, xmm5, xmm7, r0, r2, r1, r3
LOAD_5_PARA_POP
;pop edi
;pop esi
ret
@ -537,8 +476,6 @@ WelsIDctRecI16x16Dc_sse2:
;***********************************************************************
WELS_EXTERN WelsHadamardT4Dc_sse2
WelsHadamardT4Dc_sse2:
;mov eax, [esp + 4] ; luma_dc
;mov ecx, [esp + 8] ; pDct
%assign push_num 0
LOAD_2_PARA
SSE2_Load4Col xmm1, xmm5, xmm6, xmm0, r1

View File

@ -234,10 +234,6 @@ WelsI4x4LumaPredH_sse2:
; void WelsI16x16LumaPredPlane_sse2(uint8_t *pred, uint8_t *pRef, int32_t stride);
;***********************************************************************
WelsI16x16LumaPredPlane_sse2:
;%define pushsize 4
;push esi
;mov esi, [esp + pushsize + 8]
;mov ecx, [esp + pushsize + 12]
push r3
push r4
%assign push_num 2
@ -293,7 +289,6 @@ WelsI16x16LumaPredPlane_sse2:
sar r3, 6 ; c = (5 * V + 32) >> 6;
SSE2_Copy8Times xmm4, r3d ; xmm4 = c,c,c,c,c,c,c,c
;mov esi, [esp + pushsize + 4]
add r4, 16
imul r3, -7
add r3, r4 ; s = a + 16 + (-7)*c
@ -367,9 +362,6 @@ WelsI16x16LumaPredH_sse2:
;***********************************************************************
WELS_EXTERN WelsI16x16LumaPredV_sse2
WelsI16x16LumaPredV_sse2:
;mov edx, [esp+4] ; pred
;mov eax, [esp+8] ; pRef
;mov ecx, [esp+12] ; stride
%assign push_num 0
LOAD_3_PARA
SIGN_EXTENSION r2, r2d
@ -400,10 +392,6 @@ WelsI16x16LumaPredV_sse2:
;***********************************************************************
WELS_EXTERN WelsIChromaPredPlane_sse2
WelsIChromaPredPlane_sse2:
;%define pushsize 4
;push esi
;mov esi, [esp + pushsize + 8] ;pRef
;mov ecx, [esp + pushsize + 12] ;stride
push r3
push r4
%assign push_num 2
@ -462,7 +450,6 @@ WelsIChromaPredPlane_sse2:
sar r3, 5 ; c = (17 * V + 16) >> 5;
SSE2_Copy8Times xmm4, r3d ; mm4 = c,c,c,c,c,c,c,c
;mov esi, [esp + pushsize + 4]
add r4, 16
imul r3, -3
add r3, r4 ; s = a + 16 + (-3)*c
@ -502,9 +489,6 @@ ALIGN 16
;
;***********************************************************************
WelsI4x4LumaPredDDR_mmx:
;mov edx,[esp+4] ;pred
;mov eax,[esp+8] ;pRef
;mov ecx,[esp+12] ;stride
%assign push_num 0
LOAD_3_PARA
SIGN_EXTENSION r2, r2d
@ -619,9 +603,6 @@ ALIGN 16
WELS_EXTERN WelsIChromaPredH_mmx
WelsIChromaPredH_mmx:
;mov edx, [esp+4] ;pred
;mov eax, [esp+8] ;pRef
;mov ecx, [esp+12] ;stride
%assign push_num 0
LOAD_3_PARA
SIGN_EXTENSION r2, r2d

View File

@ -55,7 +55,6 @@ WELS_EXTERN WelsPrefetchZero_mmx
WelsPrefetchZero_mmx:
%assign push_num 0
LOAD_1_PARA
;mov eax,[esp+4]
prefetchnta [r0]
ret

View File

@ -87,12 +87,9 @@ align 16
WelsQuant4x4_sse2:
%assign push_num 0
LOAD_3_PARA
;mov eax, [ff]
;mov ecx, [mf]
movdqa xmm2, [r1]
movdqa xmm3, [r2]
;mov edx, [pDct]
SSE2_Quant8 xmm0, xmm1, xmm2, xmm3, [r0]
SSE2_Quant8 xmm0, xmm1, xmm2, xmm3, [r0 + 0x10]
@ -108,13 +105,10 @@ WelsQuant4x4Dc_sse2:
LOAD_3_PARA
SIGN_EXTENSION r1, r1w
SIGN_EXTENSION r2, r2w
;mov ax, [mf]
SSE2_Copy8Times xmm3, r2d
;mov cx, [ff]
SSE2_Copy8Times xmm2, r1d
;mov edx, [pDct]
SSE2_Quant8 xmm0, xmm1, xmm2, xmm3, [r0]
SSE2_Quant8 xmm0, xmm1, xmm2, xmm3, [r0 + 0x10]
@ -128,12 +122,9 @@ align 16
WelsQuantFour4x4_sse2:
%assign push_num 0
LOAD_3_PARA
;mov eax, [ff]
;mov ecx, [mf]
MOVDQ xmm2, [r1]
MOVDQ xmm3, [r2]
;mov edx, [pDct]
SSE2_Quant8 xmm0, xmm1, xmm2, xmm3, [r0]
SSE2_Quant8 xmm0, xmm1, xmm2, xmm3, [r0 + 0x10]
SSE2_Quant8 xmm0, xmm1, xmm2, xmm3, [r0 + 0x20]
@ -153,12 +144,9 @@ align 16
WelsQuantFour4x4Max_sse2:
%assign push_num 0
LOAD_4_PARA
;mov eax, [ff]
;mov ecx, [mf]
MOVDQ xmm2, [r1]
MOVDQ xmm3, [r2]
;mov edx, [pDct]
pxor xmm4, xmm4
pxor xmm5, xmm5
pxor xmm6, xmm6
@ -180,7 +168,6 @@ WelsQuantFour4x4Max_sse2:
punpckhqdq xmm0, xmm1
pmaxsw xmm0, xmm1
;mov r0, [r3]
movq [r3], xmm0
LOAD_4_PARA_POP
ret
@ -204,8 +191,6 @@ SECTION .text
psubw %1, %2
%endmacro
%define dct2x2 esp + 16
%define iChromaDc esp + 20
;***********************************************************************
;int32_t WelsHadamardQuant2x2_mmx(int16_t *rs, const int16_t ff, int16_t mf, int16_t * pDct, int16_t * block);
;***********************************************************************
@ -216,7 +201,6 @@ WelsHadamardQuant2x2_mmx:
LOAD_5_PARA
SIGN_EXTENSION r1, r1w
SIGN_EXTENSION r2, r2w
;mov eax, [pDct]
movd mm0, [r0]
movd mm1, [r0 + 0x20]
punpcklwd mm0, mm1
@ -237,16 +221,12 @@ WelsHadamardQuant2x2_mmx:
punpcklwd mm1, mm3
;quant_2x2_dc
;mov ax, [mf]
MMX_Copy4Times mm3, r2d
;mov cx, [ff]
MMX_Copy4Times mm2, r1d
MMX_Quant4 mm1, mm0, mm2, mm3
; store dct_2x2
;mov edx, [dct2x2]
movq [r3], mm1
;mov ecx, [iChromaDc]
movq [r4], mm1
; pNonZeroCount of dct_2x2
@ -279,7 +259,6 @@ WelsHadamardQuant2x2Skip_mmx:
LOAD_3_PARA
SIGN_EXTENSION r1, r1w
SIGN_EXTENSION r2, r2w
;mov eax, [pDct]
movd mm0, [r0]
movd mm1, [r0 + 0x20]
punpcklwd mm0, mm1
@ -300,9 +279,7 @@ WelsHadamardQuant2x2Skip_mmx:
punpcklwd mm1, mm3
;quant_2x2_dc
;mov ax, [mf]
MMX_Copy4Times mm3, r2d
;mov cx, [ff]
MMX_Copy4Times mm2, r1d
MMX_Quant4 mm1, mm0, mm2, mm3
@ -333,11 +310,8 @@ ALIGN 16
align 16
WELS_EXTERN WelsDequant4x4_sse2
WelsDequant4x4_sse2:
;ecx = dequant_mf[qp], edx = pDct
%assign push_num 0
LOAD_2_PARA
;mov ecx, [esp + 8]
;mov edx, [esp + 4]
movdqa xmm1, [r1]
SSE2_DeQuant8 [r0 ], xmm0, xmm1
@ -353,11 +327,8 @@ align 16
WELS_EXTERN WelsDequantFour4x4_sse2
WelsDequantFour4x4_sse2:
;ecx = dequant_mf[qp], edx = pDct
%assign push_num 0
LOAD_2_PARA
;mov ecx, [esp + 8]
;mov edx, [esp + 4]
movdqa xmm1, [r1]
SSE2_DeQuant8 [r0 ], xmm0, xmm1
@ -382,8 +353,6 @@ WelsDequantIHadamard4x4_sse2:
%ifndef X86_32
movzx r1, r1w
%endif
;mov eax, [esp + 4]
;mov cx, [esp + 8]
; WelsDequantLumaDc4x4
SSE2_Copy8Times xmm1, r1d

View File

@ -176,7 +176,6 @@ WelsScan4x4DcAc_sse2:
%assign push_num 0
%endif
LOAD_2_PARA
;mov eax, [esp+8]
movdqa xmm0, [r1] ; 7 6 5 4 3 2 1 0
movdqa xmm1, [r1+16] ; f e d c b a 9 8
pextrw r2d, xmm0, 7 ; ecx = 7
@ -191,7 +190,6 @@ WelsScan4x4DcAc_sse2:
pshufd xmm3, xmm1, 0xd8 ; f e b 7 d c 9 a
pshufhw xmm0, xmm2, 0x93 ; 6 3 2 5 8 4 1 0
pshuflw xmm1, xmm3, 0x39 ; f e b 7 a d c 9
;mov eax, [esp+4]
movdqa [r0],xmm0
movdqa [r0+16], xmm1
%ifdef X86_32
@ -207,7 +205,6 @@ WELS_EXTERN WelsScan4x4DcAc_ssse3
WelsScan4x4DcAc_ssse3:
%assign push_num 0
LOAD_2_PARA
;mov eax, [esp+8]
movdqa xmm0, [r1]
movdqa xmm1, [r1+16]
pextrw r2d, xmm0, 7 ; ecx = [7]
@ -217,7 +214,6 @@ WelsScan4x4DcAc_ssse3:
pshufb xmm1, [pb_scanacdc_maskb]
pshufb xmm0, [pb_scanacdc_maska]
;mov eax, [esp+4]
movdqa [r0],xmm0
movdqa [r0+16], xmm1
ret
@ -229,7 +225,6 @@ WELS_EXTERN WelsScan4x4Ac_sse2
WelsScan4x4Ac_sse2:
%assign push_num 0
LOAD_2_PARA
;mov eax, [esp+8]
movdqa xmm0, [r1]
movdqa xmm1, [r1+16]
movdqa xmm2, xmm0
@ -256,7 +251,6 @@ WelsScan4x4Ac_sse2:
pslldq xmm3, 14
por xmm1, xmm3
psrldq xmm2, 2
;mov eax, [esp+4]
movdqa [r0],xmm1
movdqa [r0+16], xmm2
ret
@ -268,8 +262,6 @@ WelsScan4x4Ac_sse2:
ALIGN 16
WELS_EXTERN WelsCalculateSingleCtr4x4_sse2
WelsCalculateSingleCtr4x4_sse2:
;push ebx
;mov eax, [esp+8]
%ifdef X86_32
push r3
%assign push_num 1
@ -321,7 +313,6 @@ WelsCalculateSingleCtr4x4_sse2:
%else
mov retrd, r0d
%endif
;pop ebx
ret
@ -333,7 +324,6 @@ WELS_EXTERN WelsGetNoneZeroCount_sse2
WelsGetNoneZeroCount_sse2:
%assign push_num 0
LOAD_1_PARA
;mov eax, [esp+4]
movdqa xmm0, [r0]
movdqa xmm1, [r0+16]
pxor xmm2, xmm2

View File

@ -172,11 +172,6 @@ WELS_EXTERN BilateralLumaFilter8_sse2
; 4 0 5
; 6 7 8
; 0: the center point
%define pushsize 4
;%define pixel esp + pushsize + 4
;%define stride esp + pushsize + 8
;%define pixel r0
;%define stride r1
BilateralLumaFilter8_sse2: