346 lines
7.2 KiB
NASM
346 lines
7.2 KiB
NASM
;*!
|
|
;* \copy
|
|
;* Copyright (c) 2004-2013, Cisco Systems
|
|
;* All rights reserved.
|
|
;*
|
|
;* Redistribution and use in source and binary forms, with or without
|
|
;* modification, are permitted provided that the following conditions
|
|
;* are met:
|
|
;*
|
|
;* * Redistributions of source code must retain the above copyright
|
|
;* notice, this list of conditions and the following disclaimer.
|
|
;*
|
|
;* * Redistributions in binary form must reproduce the above copyright
|
|
;* notice, this list of conditions and the following disclaimer in
|
|
;* the documentation and/or other materials provided with the
|
|
;* distribution.
|
|
;*
|
|
;* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
;* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
;* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
|
;* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
;* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
|
;* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
;* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
;* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
;* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
|
;* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
;* POSSIBILITY OF SUCH DAMAGE.
|
|
;*
|
|
;*
|
|
;* mc_chroma.asm
|
|
;*
|
|
;* Abstract
|
|
;* mmx motion compensation for chroma
|
|
;*
|
|
;* History
|
|
;* 10/13/2004 Created
|
|
;*
|
|
;*
|
|
;*************************************************************************/
|
|
%include "asm_inc.asm"
|
|
|
|
;***********************************************************************
|
|
; Local Data (Read Only)
|
|
;***********************************************************************
|
|
|
|
SECTION .rodata align=16
|
|
|
|
;***********************************************************************
|
|
; Various memory constants (trigonometric values or rounding values)
|
|
;***********************************************************************
|
|
|
|
ALIGN 16
|
|
h264_d0x20_sse2:
|
|
dw 32,32,32,32,32,32,32,32
|
|
ALIGN 16
|
|
h264_d0x20_mmx:
|
|
dw 32,32,32,32
|
|
|
|
|
|
;=============================================================================
|
|
; Code
|
|
;=============================================================================
|
|
|
|
SECTION .text
|
|
|
|
ALIGN 16
|
|
;*******************************************************************************
|
|
; void McChromaWidthEq4_mmx( const uint8_t *src,
|
|
; int32_t iSrcStride,
|
|
; uint8_t *pDst,
|
|
; int32_t iDstStride,
|
|
; const uint8_t *pABCD,
|
|
; int32_t iHeigh );
|
|
;*******************************************************************************
|
|
WELS_EXTERN McChromaWidthEq4_mmx
|
|
McChromaWidthEq4_mmx:
|
|
;push esi
|
|
;push edi
|
|
;push ebx
|
|
|
|
%assign push_num 0
|
|
LOAD_6_PARA
|
|
%ifndef X86_32
|
|
movsx r1, r1d
|
|
movsx r3, r3d
|
|
movsx r5, r5d
|
|
%endif
|
|
|
|
;mov eax, [esp +12 + 20]
|
|
|
|
movd mm3, [r4]; [eax]
|
|
WELS_Zero mm7
|
|
punpcklbw mm3, mm3
|
|
movq mm4, mm3
|
|
punpcklwd mm3, mm3
|
|
punpckhwd mm4, mm4
|
|
|
|
movq mm5, mm3
|
|
punpcklbw mm3, mm7
|
|
punpckhbw mm5, mm7
|
|
|
|
movq mm6, mm4
|
|
punpcklbw mm4, mm7
|
|
punpckhbw mm6, mm7
|
|
|
|
;mov esi, [esp +12+ 4]
|
|
;mov eax, [esp + 12 + 8]
|
|
;mov edi, [esp + 12 + 12]
|
|
;mov edx, [esp + 12 + 16]
|
|
;mov ecx, [esp + 12 + 24]
|
|
|
|
lea r4, [r0 + r1] ;lea ebx, [esi + eax]
|
|
movd mm0, [r0]
|
|
movd mm1, [r0+1]
|
|
punpcklbw mm0, mm7
|
|
punpcklbw mm1, mm7
|
|
.xloop:
|
|
|
|
pmullw mm0, mm3
|
|
pmullw mm1, mm5
|
|
paddw mm0, mm1
|
|
|
|
movd mm1, [r4]
|
|
punpcklbw mm1, mm7
|
|
movq mm2, mm1
|
|
pmullw mm1, mm4
|
|
paddw mm0, mm1
|
|
|
|
movd mm1, [r4+1]
|
|
punpcklbw mm1, mm7
|
|
movq mm7, mm1
|
|
pmullw mm1,mm6
|
|
paddw mm0, mm1
|
|
movq mm1,mm7
|
|
|
|
paddw mm0, [h264_d0x20_mmx]
|
|
psrlw mm0, 6
|
|
|
|
WELS_Zero mm7
|
|
packuswb mm0, mm7
|
|
movd [r2], mm0
|
|
|
|
movq mm0, mm2
|
|
|
|
lea r2, [r2 + r3]
|
|
lea r4, [r4 + r1]
|
|
|
|
dec r5
|
|
jnz near .xloop
|
|
WELSEMMS
|
|
LOAD_6_PARA_POP
|
|
;pop ebx
|
|
;pop edi
|
|
;pop esi
|
|
ret
|
|
|
|
|
|
ALIGN 16
|
|
;*******************************************************************************
|
|
; void McChromaWidthEq8_sse2( const uint8_t *pSrc,
|
|
; int32_t iSrcStride,
|
|
; uint8_t *pDst,
|
|
; int32_t iDstStride,
|
|
; const uint8_t *pABCD,
|
|
; int32_t iheigh );
|
|
;*******************************************************************************
|
|
WELS_EXTERN McChromaWidthEq8_sse2
|
|
McChromaWidthEq8_sse2:
|
|
;push esi
|
|
;push edi
|
|
;push ebx
|
|
|
|
%assign push_num 0
|
|
LOAD_6_PARA
|
|
%ifndef X86_32
|
|
movsx r1, r1d
|
|
movsx r3, r3d
|
|
movsx r5, r5d
|
|
%endif
|
|
|
|
;mov eax, [esp +12 + 20]
|
|
movd xmm3, [r4]
|
|
WELS_Zero xmm7
|
|
punpcklbw xmm3, xmm3
|
|
punpcklwd xmm3, xmm3
|
|
|
|
movdqa xmm4, xmm3
|
|
punpckldq xmm3, xmm3
|
|
punpckhdq xmm4, xmm4
|
|
movdqa xmm5, xmm3
|
|
movdqa xmm6, xmm4
|
|
|
|
punpcklbw xmm3, xmm7
|
|
punpckhbw xmm5, xmm7
|
|
punpcklbw xmm4, xmm7
|
|
punpckhbw xmm6, xmm7
|
|
|
|
;mov esi, [esp +12+ 4]
|
|
;mov eax, [esp + 12 + 8]
|
|
;mov edi, [esp + 12 + 12]
|
|
;mov edx, [esp + 12 + 16]
|
|
;mov ecx, [esp + 12 + 24]
|
|
|
|
lea r4, [r0 + r1] ;lea ebx, [esi + eax]
|
|
movq xmm0, [r0]
|
|
movq xmm1, [r0+1]
|
|
punpcklbw xmm0, xmm7
|
|
punpcklbw xmm1, xmm7
|
|
.xloop:
|
|
|
|
pmullw xmm0, xmm3
|
|
pmullw xmm1, xmm5
|
|
paddw xmm0, xmm1
|
|
|
|
movq xmm1, [r4]
|
|
punpcklbw xmm1, xmm7
|
|
movdqa xmm2, xmm1
|
|
pmullw xmm1, xmm4
|
|
paddw xmm0, xmm1
|
|
|
|
movq xmm1, [r4+1]
|
|
punpcklbw xmm1, xmm7
|
|
movdqa xmm7, xmm1
|
|
pmullw xmm1, xmm6
|
|
paddw xmm0, xmm1
|
|
movdqa xmm1,xmm7
|
|
|
|
paddw xmm0, [h264_d0x20_sse2]
|
|
psrlw xmm0, 6
|
|
|
|
WELS_Zero xmm7
|
|
packuswb xmm0, xmm7
|
|
movq [r2], xmm0
|
|
|
|
movdqa xmm0, xmm2
|
|
|
|
lea r2, [r2 + r3]
|
|
lea r4, [r4 + r1]
|
|
|
|
dec r5
|
|
jnz near .xloop
|
|
|
|
LOAD_6_PARA_POP
|
|
|
|
;pop ebx
|
|
;pop edi
|
|
;pop esi
|
|
ret
|
|
|
|
|
|
|
|
|
|
ALIGN 16
|
|
;***********************************************************************
|
|
; void McChromaWidthEq8_ssse3( const uint8_t *pSrc,
|
|
; int32_t iSrcStride,
|
|
; uint8_t *pDst,
|
|
; int32_t iDstStride,
|
|
; const uint8_t *pABCD,
|
|
; int32_t iHeigh);
|
|
;***********************************************************************
|
|
WELS_EXTERN McChromaWidthEq8_ssse3
|
|
McChromaWidthEq8_ssse3:
|
|
;push ebx
|
|
;push esi
|
|
;push edi
|
|
%assign push_num 0
|
|
LOAD_6_PARA
|
|
%ifndef X86_32
|
|
movsx r1, r1d
|
|
movsx r3, r3d
|
|
movsx r5, r5d
|
|
%endif
|
|
|
|
;mov eax, [esp + 12 + 20]
|
|
|
|
pxor xmm7, xmm7
|
|
movd xmm5, [r4]
|
|
punpcklwd xmm5, xmm5
|
|
punpckldq xmm5, xmm5
|
|
movdqa xmm6, xmm5
|
|
punpcklqdq xmm5, xmm5
|
|
punpckhqdq xmm6, xmm6
|
|
|
|
;mov eax, [esp + 12 + 4]
|
|
;mov edx, [esp + 12 + 8]
|
|
;mov esi, [esp + 12 + 12]
|
|
;mov edi, [esp + 12 + 16]
|
|
;mov ecx, [esp + 12 + 24]
|
|
|
|
sub r2, r3 ;sub esi, edi
|
|
sub r2, r3
|
|
movdqa xmm7, [h264_d0x20_sse2]
|
|
|
|
movdqu xmm0, [r0]
|
|
movdqa xmm1, xmm0
|
|
psrldq xmm1, 1
|
|
punpcklbw xmm0, xmm1
|
|
|
|
.hloop_chroma:
|
|
lea r2, [r2+2*r3]
|
|
|
|
movdqu xmm2, [r0+r1]
|
|
movdqa xmm3, xmm2
|
|
psrldq xmm3, 1
|
|
punpcklbw xmm2, xmm3
|
|
movdqa xmm4, xmm2
|
|
|
|
pmaddubsw xmm0, xmm5
|
|
pmaddubsw xmm2, xmm6
|
|
paddw xmm0, xmm2
|
|
paddw xmm0, xmm7
|
|
psrlw xmm0, 6
|
|
packuswb xmm0, xmm0
|
|
movq [r2],xmm0
|
|
|
|
lea r0, [r0+2*r1]
|
|
movdqu xmm2, [r0]
|
|
movdqa xmm3, xmm2
|
|
psrldq xmm3, 1
|
|
punpcklbw xmm2, xmm3
|
|
movdqa xmm0, xmm2
|
|
|
|
pmaddubsw xmm4, xmm5
|
|
pmaddubsw xmm2, xmm6
|
|
paddw xmm4, xmm2
|
|
paddw xmm4, xmm7
|
|
psrlw xmm4, 6
|
|
packuswb xmm4, xmm4
|
|
movq [r2+r3],xmm4
|
|
|
|
sub r5, 2
|
|
jnz .hloop_chroma
|
|
|
|
LOAD_6_PARA_POP
|
|
|
|
;pop edi
|
|
;pop esi
|
|
;pop ebx
|
|
|
|
ret
|
|
|
|
|