130 lines
3.9 KiB
NASM
130 lines
3.9 KiB
NASM
;*!
|
|
;* \copy
|
|
;* Copyright (c) 2009-2013, Cisco Systems
|
|
;* All rights reserved.
|
|
;*
|
|
;* Redistribution and use in source and binary forms, with or without
|
|
;* modification, are permitted provided that the following conditions
|
|
;* are met:
|
|
;*
|
|
;* ?Redistributions of source code must retain the above copyright
|
|
;* notice, this list of conditions and the following disclaimer.
|
|
;*
|
|
;* ?Redistributions in binary form must reproduce the above copyright
|
|
;* notice, this list of conditions and the following disclaimer in
|
|
;* the documentation and/or other materials provided with the
|
|
;* distribution.
|
|
;*
|
|
;* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
;* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
;* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
|
;* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
;* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
|
;* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
;* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
;* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
;* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
|
;* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
;* POSSIBILITY OF SUCH DAMAGE.
|
|
;*
|
|
;*
|
|
;* dct.asm
|
|
;*
|
|
;* Abstract
|
|
;* WelsDctFourT4_sse2
|
|
;*
|
|
;* History
|
|
;* 8/4/2009 Created
|
|
;*
|
|
;*
|
|
;*************************************************************************/
|
|
|
|
%include "asm_inc.asm"
|
|
|
|
BITS 32
|
|
|
|
;*******************************************************************************
|
|
; Macros and other preprocessor constants
|
|
;*******************************************************************************
|
|
%macro MMX_SumSubDiv2 3
|
|
movq %3, %2
|
|
psraw %3, $1
|
|
paddw %3, %1
|
|
psraw %1, $1
|
|
psubw %1, %2
|
|
%endmacro
|
|
|
|
%macro MMX_SumSub 3
|
|
movq %3, %2
|
|
psubw %2, %1
|
|
paddw %1, %3
|
|
%endmacro
|
|
|
|
%macro MMX_IDCT 6
|
|
MMX_SumSub %4, %5, %6
|
|
MMX_SumSubDiv2 %3, %2, %1
|
|
MMX_SumSub %1, %4, %6
|
|
MMX_SumSub %3, %5, %6
|
|
%endmacro
|
|
|
|
|
|
%macro MMX_StoreDiff4P 5
|
|
movd %2, %5
|
|
punpcklbw %2, %4
|
|
paddw %1, %3
|
|
psraw %1, $6
|
|
paddsw %1, %2
|
|
packuswb %1, %2
|
|
movd %5, %1
|
|
%endmacro
|
|
|
|
;*******************************************************************************
|
|
; Code
|
|
;*******************************************************************************
|
|
|
|
SECTION .text
|
|
|
|
WELS_EXTERN IdctResAddPred_mmx
|
|
|
|
ALIGN 16
|
|
;*******************************************************************************
|
|
; void_t __cdecl IdctResAddPred_mmx( uint8_t *pPred, const int32_t kiStride, int16_t *pRs )
|
|
;*******************************************************************************
|
|
|
|
IdctResAddPred_mmx:
|
|
|
|
%define pushsize 0
|
|
%define pPred esp+pushsize+4
|
|
%define kiStride esp+pushsize+8
|
|
%define pRs esp+pushsize+12
|
|
|
|
mov eax, [pRs ]
|
|
mov edx, [pPred ]
|
|
mov ecx, [kiStride]
|
|
movq mm0, [eax+ 0]
|
|
movq mm1, [eax+ 8]
|
|
movq mm2, [eax+16]
|
|
movq mm3, [eax+24]
|
|
|
|
MMX_Trans4x4W mm0, mm1, mm2, mm3, mm4
|
|
MMX_IDCT mm1, mm2, mm3, mm4, mm0, mm6
|
|
MMX_Trans4x4W mm1, mm3, mm0, mm4, mm2
|
|
MMX_IDCT mm3, mm0, mm4, mm2, mm1, mm6
|
|
|
|
WELS_Zero mm7
|
|
WELS_DW32 mm6
|
|
|
|
MMX_StoreDiff4P mm3, mm0, mm6, mm7, [edx]
|
|
MMX_StoreDiff4P mm4, mm0, mm6, mm7, [edx+ecx]
|
|
lea edx, [edx+2*ecx]
|
|
MMX_StoreDiff4P mm1, mm0, mm6, mm7, [edx]
|
|
MMX_StoreDiff4P mm2, mm0, mm6, mm7, [edx+ecx]
|
|
|
|
%undef pushsize
|
|
%undef pPred
|
|
%undef kiStride
|
|
%undef pRs
|
|
emms
|
|
ret
|