Convert some assembly files to unix newlines
This makes them consistent with the rest of them.
This commit is contained in:
parent
301b06ad36
commit
ddcfc09c49
File diff suppressed because it is too large
Load Diff
@ -1,345 +1,345 @@
|
||||
;*!
|
||||
;* \copy
|
||||
;* Copyright (c) 2004-2013, Cisco Systems
|
||||
;* All rights reserved.
|
||||
;*
|
||||
;* Redistribution and use in source and binary forms, with or without
|
||||
;* modification, are permitted provided that the following conditions
|
||||
;* are met:
|
||||
;*
|
||||
;* * Redistributions of source code must retain the above copyright
|
||||
;* notice, this list of conditions and the following disclaimer.
|
||||
;*
|
||||
;* * Redistributions in binary form must reproduce the above copyright
|
||||
;* notice, this list of conditions and the following disclaimer in
|
||||
;* the documentation and/or other materials provided with the
|
||||
;* distribution.
|
||||
;*
|
||||
;* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
;* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
;* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
;* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
;* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
;* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
;* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
;* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
;* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
||||
;* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
;* POSSIBILITY OF SUCH DAMAGE.
|
||||
;*
|
||||
;*
|
||||
;* mc_chroma.asm
|
||||
;*
|
||||
;* Abstract
|
||||
;* mmx motion compensation for chroma
|
||||
;*
|
||||
;* History
|
||||
;* 10/13/2004 Created
|
||||
;*
|
||||
;*
|
||||
;*************************************************************************/
|
||||
%include "asm_inc.asm"
|
||||
|
||||
;***********************************************************************
|
||||
; Local Data (Read Only)
|
||||
;***********************************************************************
|
||||
|
||||
SECTION .rodata align=16
|
||||
|
||||
;***********************************************************************
|
||||
; Various memory constants (trigonometric values or rounding values)
|
||||
;***********************************************************************
|
||||
|
||||
ALIGN 16
|
||||
h264_d0x20_sse2:
|
||||
dw 32,32,32,32,32,32,32,32
|
||||
ALIGN 16
|
||||
h264_d0x20_mmx:
|
||||
dw 32,32,32,32
|
||||
|
||||
|
||||
;=============================================================================
|
||||
; Code
|
||||
;=============================================================================
|
||||
|
||||
SECTION .text
|
||||
|
||||
ALIGN 16
|
||||
;*******************************************************************************
|
||||
; void McChromaWidthEq4_mmx( uint8_t *src,
|
||||
; int32_t iSrcStride,
|
||||
; uint8_t *pDst,
|
||||
; int32_t iDstStride,
|
||||
; uint8_t *pABCD,
|
||||
; int32_t iHeigh );
|
||||
;*******************************************************************************
|
||||
WELS_EXTERN McChromaWidthEq4_mmx
|
||||
McChromaWidthEq4_mmx:
|
||||
;push esi
|
||||
;push edi
|
||||
;push ebx
|
||||
|
||||
%assign push_num 0
|
||||
LOAD_6_PARA
|
||||
%ifndef X86_32
|
||||
movsx r1, r1d
|
||||
movsx r3, r3d
|
||||
movsx r5, r5d
|
||||
%endif
|
||||
|
||||
;mov eax, [esp +12 + 20]
|
||||
|
||||
movd mm3, [r4]; [eax]
|
||||
WELS_Zero mm7
|
||||
punpcklbw mm3, mm3
|
||||
movq mm4, mm3
|
||||
punpcklwd mm3, mm3
|
||||
punpckhwd mm4, mm4
|
||||
|
||||
movq mm5, mm3
|
||||
punpcklbw mm3, mm7
|
||||
punpckhbw mm5, mm7
|
||||
|
||||
movq mm6, mm4
|
||||
punpcklbw mm4, mm7
|
||||
punpckhbw mm6, mm7
|
||||
|
||||
;mov esi, [esp +12+ 4]
|
||||
;mov eax, [esp + 12 + 8]
|
||||
;mov edi, [esp + 12 + 12]
|
||||
;mov edx, [esp + 12 + 16]
|
||||
;mov ecx, [esp + 12 + 24]
|
||||
|
||||
lea r4, [r0 + r1] ;lea ebx, [esi + eax]
|
||||
movd mm0, [r0]
|
||||
movd mm1, [r0+1]
|
||||
punpcklbw mm0, mm7
|
||||
punpcklbw mm1, mm7
|
||||
.xloop:
|
||||
|
||||
pmullw mm0, mm3
|
||||
pmullw mm1, mm5
|
||||
paddw mm0, mm1
|
||||
|
||||
movd mm1, [r4]
|
||||
punpcklbw mm1, mm7
|
||||
movq mm2, mm1
|
||||
pmullw mm1, mm4
|
||||
paddw mm0, mm1
|
||||
|
||||
movd mm1, [r4+1]
|
||||
punpcklbw mm1, mm7
|
||||
movq mm7, mm1
|
||||
pmullw mm1,mm6
|
||||
paddw mm0, mm1
|
||||
movq mm1,mm7
|
||||
|
||||
paddw mm0, [h264_d0x20_mmx]
|
||||
psrlw mm0, 6
|
||||
|
||||
WELS_Zero mm7
|
||||
packuswb mm0, mm7
|
||||
movd [r2], mm0
|
||||
|
||||
movq mm0, mm2
|
||||
|
||||
lea r2, [r2 + r3]
|
||||
lea r4, [r4 + r1]
|
||||
|
||||
dec r5
|
||||
jnz near .xloop
|
||||
WELSEMMS
|
||||
LOAD_6_PARA_POP
|
||||
;pop ebx
|
||||
;pop edi
|
||||
;pop esi
|
||||
ret
|
||||
|
||||
|
||||
ALIGN 16
|
||||
;*******************************************************************************
|
||||
; void McChromaWidthEq8_sse2( uint8_t *pSrc,
|
||||
; int32_t iSrcStride,
|
||||
; uint8_t *pDst,
|
||||
; int32_t iDstStride,
|
||||
; uint8_t *pABCD,
|
||||
; int32_t iheigh );
|
||||
;*******************************************************************************
|
||||
WELS_EXTERN McChromaWidthEq8_sse2
|
||||
McChromaWidthEq8_sse2:
|
||||
;push esi
|
||||
;push edi
|
||||
;push ebx
|
||||
|
||||
%assign push_num 0
|
||||
LOAD_6_PARA
|
||||
%ifndef X86_32
|
||||
movsx r1, r1d
|
||||
movsx r3, r3d
|
||||
movsx r5, r5d
|
||||
%endif
|
||||
|
||||
;mov eax, [esp +12 + 20]
|
||||
movd xmm3, [r4]
|
||||
WELS_Zero xmm7
|
||||
punpcklbw xmm3, xmm3
|
||||
punpcklwd xmm3, xmm3
|
||||
|
||||
movdqa xmm4, xmm3
|
||||
punpckldq xmm3, xmm3
|
||||
punpckhdq xmm4, xmm4
|
||||
movdqa xmm5, xmm3
|
||||
movdqa xmm6, xmm4
|
||||
|
||||
punpcklbw xmm3, xmm7
|
||||
punpckhbw xmm5, xmm7
|
||||
punpcklbw xmm4, xmm7
|
||||
punpckhbw xmm6, xmm7
|
||||
|
||||
;mov esi, [esp +12+ 4]
|
||||
;mov eax, [esp + 12 + 8]
|
||||
;mov edi, [esp + 12 + 12]
|
||||
;mov edx, [esp + 12 + 16]
|
||||
;mov ecx, [esp + 12 + 24]
|
||||
|
||||
lea r4, [r0 + r1] ;lea ebx, [esi + eax]
|
||||
movq xmm0, [r0]
|
||||
movq xmm1, [r0+1]
|
||||
punpcklbw xmm0, xmm7
|
||||
punpcklbw xmm1, xmm7
|
||||
.xloop:
|
||||
|
||||
pmullw xmm0, xmm3
|
||||
pmullw xmm1, xmm5
|
||||
paddw xmm0, xmm1
|
||||
|
||||
movq xmm1, [r4]
|
||||
punpcklbw xmm1, xmm7
|
||||
movdqa xmm2, xmm1
|
||||
pmullw xmm1, xmm4
|
||||
paddw xmm0, xmm1
|
||||
|
||||
movq xmm1, [r4+1]
|
||||
punpcklbw xmm1, xmm7
|
||||
movdqa xmm7, xmm1
|
||||
pmullw xmm1, xmm6
|
||||
paddw xmm0, xmm1
|
||||
movdqa xmm1,xmm7
|
||||
|
||||
paddw xmm0, [h264_d0x20_sse2]
|
||||
psrlw xmm0, 6
|
||||
|
||||
WELS_Zero xmm7
|
||||
packuswb xmm0, xmm7
|
||||
movq [r2], xmm0
|
||||
|
||||
movdqa xmm0, xmm2
|
||||
|
||||
lea r2, [r2 + r3]
|
||||
lea r4, [r4 + r1]
|
||||
|
||||
dec r5
|
||||
jnz near .xloop
|
||||
|
||||
LOAD_6_PARA_POP
|
||||
|
||||
;pop ebx
|
||||
;pop edi
|
||||
;pop esi
|
||||
ret
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGN 16
|
||||
;***********************************************************************
|
||||
; void McChromaWidthEq8_ssse3( uint8_t *pSrc,
|
||||
; int32_t iSrcStride,
|
||||
; uint8_t *pDst,
|
||||
; int32_t iDstStride,
|
||||
; uint8_t *pABCD,
|
||||
; int32_t iHeigh);
|
||||
;***********************************************************************
|
||||
WELS_EXTERN McChromaWidthEq8_ssse3
|
||||
McChromaWidthEq8_ssse3:
|
||||
;push ebx
|
||||
;push esi
|
||||
;push edi
|
||||
%assign push_num 0
|
||||
LOAD_6_PARA
|
||||
%ifndef X86_32
|
||||
movsx r1, r1d
|
||||
movsx r3, r3d
|
||||
movsx r5, r5d
|
||||
%endif
|
||||
|
||||
;mov eax, [esp + 12 + 20]
|
||||
|
||||
pxor xmm7, xmm7
|
||||
movd xmm5, [r4]
|
||||
punpcklwd xmm5, xmm5
|
||||
punpckldq xmm5, xmm5
|
||||
movdqa xmm6, xmm5
|
||||
punpcklqdq xmm5, xmm5
|
||||
punpckhqdq xmm6, xmm6
|
||||
|
||||
;mov eax, [esp + 12 + 4]
|
||||
;mov edx, [esp + 12 + 8]
|
||||
;mov esi, [esp + 12 + 12]
|
||||
;mov edi, [esp + 12 + 16]
|
||||
;mov ecx, [esp + 12 + 24]
|
||||
|
||||
sub r2, r3 ;sub esi, edi
|
||||
sub r2, r3
|
||||
movdqa xmm7, [h264_d0x20_sse2]
|
||||
|
||||
movdqu xmm0, [r0]
|
||||
movdqa xmm1, xmm0
|
||||
psrldq xmm1, 1
|
||||
punpcklbw xmm0, xmm1
|
||||
|
||||
.hloop_chroma:
|
||||
lea r2, [r2+2*r3]
|
||||
|
||||
movdqu xmm2, [r0+r1]
|
||||
movdqa xmm3, xmm2
|
||||
psrldq xmm3, 1
|
||||
punpcklbw xmm2, xmm3
|
||||
movdqa xmm4, xmm2
|
||||
|
||||
pmaddubsw xmm0, xmm5
|
||||
pmaddubsw xmm2, xmm6
|
||||
paddw xmm0, xmm2
|
||||
paddw xmm0, xmm7
|
||||
psrlw xmm0, 6
|
||||
packuswb xmm0, xmm0
|
||||
movq [r2],xmm0
|
||||
|
||||
lea r0, [r0+2*r1]
|
||||
movdqu xmm2, [r0]
|
||||
movdqa xmm3, xmm2
|
||||
psrldq xmm3, 1
|
||||
punpcklbw xmm2, xmm3
|
||||
movdqa xmm0, xmm2
|
||||
|
||||
pmaddubsw xmm4, xmm5
|
||||
pmaddubsw xmm2, xmm6
|
||||
paddw xmm4, xmm2
|
||||
paddw xmm4, xmm7
|
||||
psrlw xmm4, 6
|
||||
packuswb xmm4, xmm4
|
||||
movq [r2+r3],xmm4
|
||||
|
||||
sub r5, 2
|
||||
jnz .hloop_chroma
|
||||
|
||||
LOAD_6_PARA_POP
|
||||
|
||||
;pop edi
|
||||
;pop esi
|
||||
;pop ebx
|
||||
|
||||
ret
|
||||
|
||||
|
||||
;*!
|
||||
;* \copy
|
||||
;* Copyright (c) 2004-2013, Cisco Systems
|
||||
;* All rights reserved.
|
||||
;*
|
||||
;* Redistribution and use in source and binary forms, with or without
|
||||
;* modification, are permitted provided that the following conditions
|
||||
;* are met:
|
||||
;*
|
||||
;* * Redistributions of source code must retain the above copyright
|
||||
;* notice, this list of conditions and the following disclaimer.
|
||||
;*
|
||||
;* * Redistributions in binary form must reproduce the above copyright
|
||||
;* notice, this list of conditions and the following disclaimer in
|
||||
;* the documentation and/or other materials provided with the
|
||||
;* distribution.
|
||||
;*
|
||||
;* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
;* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
;* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
;* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
;* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
;* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
;* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
;* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
;* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
||||
;* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
;* POSSIBILITY OF SUCH DAMAGE.
|
||||
;*
|
||||
;*
|
||||
;* mc_chroma.asm
|
||||
;*
|
||||
;* Abstract
|
||||
;* mmx motion compensation for chroma
|
||||
;*
|
||||
;* History
|
||||
;* 10/13/2004 Created
|
||||
;*
|
||||
;*
|
||||
;*************************************************************************/
|
||||
%include "asm_inc.asm"
|
||||
|
||||
;***********************************************************************
|
||||
; Local Data (Read Only)
|
||||
;***********************************************************************
|
||||
|
||||
SECTION .rodata align=16
|
||||
|
||||
;***********************************************************************
|
||||
; Various memory constants (trigonometric values or rounding values)
|
||||
;***********************************************************************
|
||||
|
||||
ALIGN 16
|
||||
h264_d0x20_sse2:
|
||||
dw 32,32,32,32,32,32,32,32
|
||||
ALIGN 16
|
||||
h264_d0x20_mmx:
|
||||
dw 32,32,32,32
|
||||
|
||||
|
||||
;=============================================================================
|
||||
; Code
|
||||
;=============================================================================
|
||||
|
||||
SECTION .text
|
||||
|
||||
ALIGN 16
|
||||
;*******************************************************************************
|
||||
; void McChromaWidthEq4_mmx( uint8_t *src,
|
||||
; int32_t iSrcStride,
|
||||
; uint8_t *pDst,
|
||||
; int32_t iDstStride,
|
||||
; uint8_t *pABCD,
|
||||
; int32_t iHeigh );
|
||||
;*******************************************************************************
|
||||
WELS_EXTERN McChromaWidthEq4_mmx
|
||||
McChromaWidthEq4_mmx:
|
||||
;push esi
|
||||
;push edi
|
||||
;push ebx
|
||||
|
||||
%assign push_num 0
|
||||
LOAD_6_PARA
|
||||
%ifndef X86_32
|
||||
movsx r1, r1d
|
||||
movsx r3, r3d
|
||||
movsx r5, r5d
|
||||
%endif
|
||||
|
||||
;mov eax, [esp +12 + 20]
|
||||
|
||||
movd mm3, [r4]; [eax]
|
||||
WELS_Zero mm7
|
||||
punpcklbw mm3, mm3
|
||||
movq mm4, mm3
|
||||
punpcklwd mm3, mm3
|
||||
punpckhwd mm4, mm4
|
||||
|
||||
movq mm5, mm3
|
||||
punpcklbw mm3, mm7
|
||||
punpckhbw mm5, mm7
|
||||
|
||||
movq mm6, mm4
|
||||
punpcklbw mm4, mm7
|
||||
punpckhbw mm6, mm7
|
||||
|
||||
;mov esi, [esp +12+ 4]
|
||||
;mov eax, [esp + 12 + 8]
|
||||
;mov edi, [esp + 12 + 12]
|
||||
;mov edx, [esp + 12 + 16]
|
||||
;mov ecx, [esp + 12 + 24]
|
||||
|
||||
lea r4, [r0 + r1] ;lea ebx, [esi + eax]
|
||||
movd mm0, [r0]
|
||||
movd mm1, [r0+1]
|
||||
punpcklbw mm0, mm7
|
||||
punpcklbw mm1, mm7
|
||||
.xloop:
|
||||
|
||||
pmullw mm0, mm3
|
||||
pmullw mm1, mm5
|
||||
paddw mm0, mm1
|
||||
|
||||
movd mm1, [r4]
|
||||
punpcklbw mm1, mm7
|
||||
movq mm2, mm1
|
||||
pmullw mm1, mm4
|
||||
paddw mm0, mm1
|
||||
|
||||
movd mm1, [r4+1]
|
||||
punpcklbw mm1, mm7
|
||||
movq mm7, mm1
|
||||
pmullw mm1,mm6
|
||||
paddw mm0, mm1
|
||||
movq mm1,mm7
|
||||
|
||||
paddw mm0, [h264_d0x20_mmx]
|
||||
psrlw mm0, 6
|
||||
|
||||
WELS_Zero mm7
|
||||
packuswb mm0, mm7
|
||||
movd [r2], mm0
|
||||
|
||||
movq mm0, mm2
|
||||
|
||||
lea r2, [r2 + r3]
|
||||
lea r4, [r4 + r1]
|
||||
|
||||
dec r5
|
||||
jnz near .xloop
|
||||
WELSEMMS
|
||||
LOAD_6_PARA_POP
|
||||
;pop ebx
|
||||
;pop edi
|
||||
;pop esi
|
||||
ret
|
||||
|
||||
|
||||
ALIGN 16
|
||||
;*******************************************************************************
|
||||
; void McChromaWidthEq8_sse2( uint8_t *pSrc,
|
||||
; int32_t iSrcStride,
|
||||
; uint8_t *pDst,
|
||||
; int32_t iDstStride,
|
||||
; uint8_t *pABCD,
|
||||
; int32_t iheigh );
|
||||
;*******************************************************************************
|
||||
WELS_EXTERN McChromaWidthEq8_sse2
|
||||
McChromaWidthEq8_sse2:
|
||||
;push esi
|
||||
;push edi
|
||||
;push ebx
|
||||
|
||||
%assign push_num 0
|
||||
LOAD_6_PARA
|
||||
%ifndef X86_32
|
||||
movsx r1, r1d
|
||||
movsx r3, r3d
|
||||
movsx r5, r5d
|
||||
%endif
|
||||
|
||||
;mov eax, [esp +12 + 20]
|
||||
movd xmm3, [r4]
|
||||
WELS_Zero xmm7
|
||||
punpcklbw xmm3, xmm3
|
||||
punpcklwd xmm3, xmm3
|
||||
|
||||
movdqa xmm4, xmm3
|
||||
punpckldq xmm3, xmm3
|
||||
punpckhdq xmm4, xmm4
|
||||
movdqa xmm5, xmm3
|
||||
movdqa xmm6, xmm4
|
||||
|
||||
punpcklbw xmm3, xmm7
|
||||
punpckhbw xmm5, xmm7
|
||||
punpcklbw xmm4, xmm7
|
||||
punpckhbw xmm6, xmm7
|
||||
|
||||
;mov esi, [esp +12+ 4]
|
||||
;mov eax, [esp + 12 + 8]
|
||||
;mov edi, [esp + 12 + 12]
|
||||
;mov edx, [esp + 12 + 16]
|
||||
;mov ecx, [esp + 12 + 24]
|
||||
|
||||
lea r4, [r0 + r1] ;lea ebx, [esi + eax]
|
||||
movq xmm0, [r0]
|
||||
movq xmm1, [r0+1]
|
||||
punpcklbw xmm0, xmm7
|
||||
punpcklbw xmm1, xmm7
|
||||
.xloop:
|
||||
|
||||
pmullw xmm0, xmm3
|
||||
pmullw xmm1, xmm5
|
||||
paddw xmm0, xmm1
|
||||
|
||||
movq xmm1, [r4]
|
||||
punpcklbw xmm1, xmm7
|
||||
movdqa xmm2, xmm1
|
||||
pmullw xmm1, xmm4
|
||||
paddw xmm0, xmm1
|
||||
|
||||
movq xmm1, [r4+1]
|
||||
punpcklbw xmm1, xmm7
|
||||
movdqa xmm7, xmm1
|
||||
pmullw xmm1, xmm6
|
||||
paddw xmm0, xmm1
|
||||
movdqa xmm1,xmm7
|
||||
|
||||
paddw xmm0, [h264_d0x20_sse2]
|
||||
psrlw xmm0, 6
|
||||
|
||||
WELS_Zero xmm7
|
||||
packuswb xmm0, xmm7
|
||||
movq [r2], xmm0
|
||||
|
||||
movdqa xmm0, xmm2
|
||||
|
||||
lea r2, [r2 + r3]
|
||||
lea r4, [r4 + r1]
|
||||
|
||||
dec r5
|
||||
jnz near .xloop
|
||||
|
||||
LOAD_6_PARA_POP
|
||||
|
||||
;pop ebx
|
||||
;pop edi
|
||||
;pop esi
|
||||
ret
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGN 16
|
||||
;***********************************************************************
|
||||
; void McChromaWidthEq8_ssse3( uint8_t *pSrc,
|
||||
; int32_t iSrcStride,
|
||||
; uint8_t *pDst,
|
||||
; int32_t iDstStride,
|
||||
; uint8_t *pABCD,
|
||||
; int32_t iHeigh);
|
||||
;***********************************************************************
|
||||
WELS_EXTERN McChromaWidthEq8_ssse3
|
||||
McChromaWidthEq8_ssse3:
|
||||
;push ebx
|
||||
;push esi
|
||||
;push edi
|
||||
%assign push_num 0
|
||||
LOAD_6_PARA
|
||||
%ifndef X86_32
|
||||
movsx r1, r1d
|
||||
movsx r3, r3d
|
||||
movsx r5, r5d
|
||||
%endif
|
||||
|
||||
;mov eax, [esp + 12 + 20]
|
||||
|
||||
pxor xmm7, xmm7
|
||||
movd xmm5, [r4]
|
||||
punpcklwd xmm5, xmm5
|
||||
punpckldq xmm5, xmm5
|
||||
movdqa xmm6, xmm5
|
||||
punpcklqdq xmm5, xmm5
|
||||
punpckhqdq xmm6, xmm6
|
||||
|
||||
;mov eax, [esp + 12 + 4]
|
||||
;mov edx, [esp + 12 + 8]
|
||||
;mov esi, [esp + 12 + 12]
|
||||
;mov edi, [esp + 12 + 16]
|
||||
;mov ecx, [esp + 12 + 24]
|
||||
|
||||
sub r2, r3 ;sub esi, edi
|
||||
sub r2, r3
|
||||
movdqa xmm7, [h264_d0x20_sse2]
|
||||
|
||||
movdqu xmm0, [r0]
|
||||
movdqa xmm1, xmm0
|
||||
psrldq xmm1, 1
|
||||
punpcklbw xmm0, xmm1
|
||||
|
||||
.hloop_chroma:
|
||||
lea r2, [r2+2*r3]
|
||||
|
||||
movdqu xmm2, [r0+r1]
|
||||
movdqa xmm3, xmm2
|
||||
psrldq xmm3, 1
|
||||
punpcklbw xmm2, xmm3
|
||||
movdqa xmm4, xmm2
|
||||
|
||||
pmaddubsw xmm0, xmm5
|
||||
pmaddubsw xmm2, xmm6
|
||||
paddw xmm0, xmm2
|
||||
paddw xmm0, xmm7
|
||||
psrlw xmm0, 6
|
||||
packuswb xmm0, xmm0
|
||||
movq [r2],xmm0
|
||||
|
||||
lea r0, [r0+2*r1]
|
||||
movdqu xmm2, [r0]
|
||||
movdqa xmm3, xmm2
|
||||
psrldq xmm3, 1
|
||||
punpcklbw xmm2, xmm3
|
||||
movdqa xmm0, xmm2
|
||||
|
||||
pmaddubsw xmm4, xmm5
|
||||
pmaddubsw xmm2, xmm6
|
||||
paddw xmm4, xmm2
|
||||
paddw xmm4, xmm7
|
||||
psrlw xmm4, 6
|
||||
packuswb xmm4, xmm4
|
||||
movq [r2+r3],xmm4
|
||||
|
||||
sub r5, 2
|
||||
jnz .hloop_chroma
|
||||
|
||||
LOAD_6_PARA_POP
|
||||
|
||||
;pop edi
|
||||
;pop esi
|
||||
;pop ebx
|
||||
|
||||
ret
|
||||
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user