Convert source files to unix newlines

Most files were converted in ff6b669176, but some (non C++
source files) were left with windows newlines.
This commit is contained in:
Martin Storsjö 2013-12-13 09:40:57 +02:00
parent dcf08c6d41
commit 8f9a5469be
18 changed files with 8721 additions and 8721 deletions

View File

@ -1,129 +1,129 @@
;*!
;* \copy
;* Copyright (c) 2009-2013, Cisco Systems
;* All rights reserved.
;*
;* Redistribution and use in source and binary forms, with or without
;* modification, are permitted provided that the following conditions
;* are met:
;*
;* ?Redistributions of source code must retain the above copyright
;* notice, this list of conditions and the following disclaimer.
;*
;* ?Redistributions in binary form must reproduce the above copyright
;* notice, this list of conditions and the following disclaimer in
;* the documentation and/or other materials provided with the
;* distribution.
;*
;* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
;* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
;* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
;* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
;* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
;* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
;* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
;* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
;* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
;* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
;* POSSIBILITY OF SUCH DAMAGE.
;*
;*
;* dct.asm
;*
;* Abstract
;* WelsDctFourT4_sse2
;*
;* History
;* 8/4/2009 Created
;*
;*
;*************************************************************************/
%include "asm_inc.asm"
BITS 32
;*******************************************************************************
; Macros and other preprocessor constants
;*******************************************************************************
%macro MMX_SumSubDiv2 3
movq %3, %2
psraw %3, $1
paddw %3, %1
psraw %1, $1
psubw %1, %2
%endmacro
%macro MMX_SumSub 3
movq %3, %2
psubw %2, %1
paddw %1, %3
%endmacro
%macro MMX_IDCT 6
MMX_SumSub %4, %5, %6
MMX_SumSubDiv2 %3, %2, %1
MMX_SumSub %1, %4, %6
MMX_SumSub %3, %5, %6
%endmacro
%macro MMX_StoreDiff4P 5
movd %2, %5
punpcklbw %2, %4
paddw %1, %3
psraw %1, $6
paddsw %1, %2
packuswb %1, %2
movd %5, %1
%endmacro
;*******************************************************************************
; Code
;*******************************************************************************
SECTION .text
WELS_EXTERN IdctResAddPred_mmx
ALIGN 16
;*******************************************************************************
; void_t __cdecl IdctResAddPred_mmx( uint8_t *pPred, const int32_t kiStride, int16_t *pRs )
;*******************************************************************************
IdctResAddPred_mmx:
%define pushsize 0
%define pPred esp+pushsize+4
%define kiStride esp+pushsize+8
%define pRs esp+pushsize+12
mov eax, [pRs ]
mov edx, [pPred ]
mov ecx, [kiStride]
movq mm0, [eax+ 0]
movq mm1, [eax+ 8]
movq mm2, [eax+16]
movq mm3, [eax+24]
MMX_Trans4x4W mm0, mm1, mm2, mm3, mm4
MMX_IDCT mm1, mm2, mm3, mm4, mm0, mm6
MMX_Trans4x4W mm1, mm3, mm0, mm4, mm2
MMX_IDCT mm3, mm0, mm4, mm2, mm1, mm6
WELS_Zero mm7
WELS_DW32 mm6
MMX_StoreDiff4P mm3, mm0, mm6, mm7, [edx]
MMX_StoreDiff4P mm4, mm0, mm6, mm7, [edx+ecx]
lea edx, [edx+2*ecx]
MMX_StoreDiff4P mm1, mm0, mm6, mm7, [edx]
MMX_StoreDiff4P mm2, mm0, mm6, mm7, [edx+ecx]
%undef pushsize
%undef pPred
%undef kiStride
%undef pRs
emms
ret
;*!
;* \copy
;* Copyright (c) 2009-2013, Cisco Systems
;* All rights reserved.
;*
;* Redistribution and use in source and binary forms, with or without
;* modification, are permitted provided that the following conditions
;* are met:
;*
;* ?Redistributions of source code must retain the above copyright
;* notice, this list of conditions and the following disclaimer.
;*
;* ?Redistributions in binary form must reproduce the above copyright
;* notice, this list of conditions and the following disclaimer in
;* the documentation and/or other materials provided with the
;* distribution.
;*
;* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
;* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
;* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
;* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
;* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
;* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
;* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
;* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
;* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
;* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
;* POSSIBILITY OF SUCH DAMAGE.
;*
;*
;* dct.asm
;*
;* Abstract
;* WelsDctFourT4_sse2
;*
;* History
;* 8/4/2009 Created
;*
;*
;*************************************************************************/
%include "asm_inc.asm"
BITS 32
;*******************************************************************************
; Macros and other preprocessor constants
;*******************************************************************************
%macro MMX_SumSubDiv2 3
movq %3, %2
psraw %3, $1
paddw %3, %1
psraw %1, $1
psubw %1, %2
%endmacro
%macro MMX_SumSub 3
movq %3, %2
psubw %2, %1
paddw %1, %3
%endmacro
%macro MMX_IDCT 6
MMX_SumSub %4, %5, %6
MMX_SumSubDiv2 %3, %2, %1
MMX_SumSub %1, %4, %6
MMX_SumSub %3, %5, %6
%endmacro
%macro MMX_StoreDiff4P 5
movd %2, %5
punpcklbw %2, %4
paddw %1, %3
psraw %1, $6
paddsw %1, %2
packuswb %1, %2
movd %5, %1
%endmacro
;*******************************************************************************
; Code
;*******************************************************************************
SECTION .text
WELS_EXTERN IdctResAddPred_mmx
ALIGN 16
;*******************************************************************************
; void_t __cdecl IdctResAddPred_mmx( uint8_t *pPred, const int32_t kiStride, int16_t *pRs )
;*******************************************************************************
IdctResAddPred_mmx:
%define pushsize 0
%define pPred esp+pushsize+4
%define kiStride esp+pushsize+8
%define pRs esp+pushsize+12
mov eax, [pRs ]
mov edx, [pPred ]
mov ecx, [kiStride]
movq mm0, [eax+ 0]
movq mm1, [eax+ 8]
movq mm2, [eax+16]
movq mm3, [eax+24]
MMX_Trans4x4W mm0, mm1, mm2, mm3, mm4
MMX_IDCT mm1, mm2, mm3, mm4, mm0, mm6
MMX_Trans4x4W mm1, mm3, mm0, mm4, mm2
MMX_IDCT mm3, mm0, mm4, mm2, mm1, mm6
WELS_Zero mm7
WELS_DW32 mm6
MMX_StoreDiff4P mm3, mm0, mm6, mm7, [edx]
MMX_StoreDiff4P mm4, mm0, mm6, mm7, [edx+ecx]
lea edx, [edx+2*ecx]
MMX_StoreDiff4P mm1, mm0, mm6, mm7, [edx]
MMX_StoreDiff4P mm2, mm0, mm6, mm7, [edx+ecx]
%undef pushsize
%undef pPred
%undef kiStride
%undef pRs
emms
ret

File diff suppressed because it is too large Load Diff

View File

@ -1,317 +1,317 @@
;*!
;* \copy
;* Copyright (c) 2004-2013, Cisco Systems
;* All rights reserved.
;*
;* Redistribution and use in source and binary forms, with or without
;* modification, are permitted provided that the following conditions
;* are met:
;*
;* * Redistributions of source code must retain the above copyright
;* notice, this list of conditions and the following disclaimer.
;*
;* * Redistributions in binary form must reproduce the above copyright
;* notice, this list of conditions and the following disclaimer in
;* the documentation and/or other materials provided with the
;* distribution.
;*
;* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
;* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
;* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
;* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
;* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
;* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
;* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
;* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
;* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
;* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
;* POSSIBILITY OF SUCH DAMAGE.
;*
;*
;* mc_chroma.asm
;*
;* Abstract
;* mmx motion compensation for chroma
;*
;* History
;* 10/13/2004 Created
;*
;*
;*************************************************************************/
%include "asm_inc.asm"
BITS 32
;***********************************************************************
; Local Data (Read Only)
;***********************************************************************
SECTION .rodata align=16
;***********************************************************************
; Various memory constants (trigonometric values or rounding values)
;***********************************************************************
ALIGN 16
h264_d0x20_sse2:
dw 32,32,32,32,32,32,32,32
ALIGN 16
h264_d0x20_mmx:
dw 32,32,32,32
;=============================================================================
; Code
;=============================================================================
SECTION .text
ALIGN 16
;*******************************************************************************
; void McChromaWidthEq4_mmx( uint8_t *src,
; int32_t iSrcStride,
; uint8_t *pDst,
; int32_t iDstStride,
; uint8_t *pABCD,
; int32_t iHeigh );
;*******************************************************************************
WELS_EXTERN McChromaWidthEq4_mmx
McChromaWidthEq4_mmx:
push esi
push edi
push ebx
mov eax, [esp +12 + 20]
movd mm3, [eax]
WELS_Zero mm7
punpcklbw mm3, mm3
movq mm4, mm3
punpcklwd mm3, mm3
punpckhwd mm4, mm4
movq mm5, mm3
punpcklbw mm3, mm7
punpckhbw mm5, mm7
movq mm6, mm4
punpcklbw mm4, mm7
punpckhbw mm6, mm7
mov esi, [esp +12+ 4]
mov eax, [esp + 12 + 8]
mov edi, [esp + 12 + 12]
mov edx, [esp + 12 + 16]
mov ecx, [esp + 12 + 24]
lea ebx, [esi + eax]
movd mm0, [esi]
movd mm1, [esi+1]
punpcklbw mm0, mm7
punpcklbw mm1, mm7
.xloop:
pmullw mm0, mm3
pmullw mm1, mm5
paddw mm0, mm1
movd mm1, [ebx]
punpcklbw mm1, mm7
movq mm2, mm1
pmullw mm1, mm4
paddw mm0, mm1
movd mm1, [ebx+1]
punpcklbw mm1, mm7
movq mm7, mm1
pmullw mm1,mm6
paddw mm0, mm1
movq mm1,mm7
paddw mm0, [h264_d0x20_mmx]
psrlw mm0, 6
WELS_Zero mm7
packuswb mm0, mm7
movd [edi], mm0
movq mm0, mm2
lea edi, [edi +edx ]
lea ebx, [ebx + eax]
dec ecx
jnz near .xloop
WELSEMMS
pop ebx
pop edi
pop esi
ret
ALIGN 16
;*******************************************************************************
; void McChromaWidthEq8_sse2( uint8_t *pSrc,
; int32_t iSrcStride,
; uint8_t *pDst,
; int32_t iDstStride,
; uint8_t *pABCD,
; int32_t iheigh );
;*******************************************************************************
WELS_EXTERN McChromaWidthEq8_sse2
McChromaWidthEq8_sse2:
push esi
push edi
push ebx
mov eax, [esp +12 + 20]
movd xmm3, [eax]
WELS_Zero xmm7
punpcklbw xmm3, xmm3
punpcklwd xmm3, xmm3
movdqa xmm4, xmm3
punpckldq xmm3, xmm3
punpckhdq xmm4, xmm4
movdqa xmm5, xmm3
movdqa xmm6, xmm4
punpcklbw xmm3, xmm7
punpckhbw xmm5, xmm7
punpcklbw xmm4, xmm7
punpckhbw xmm6, xmm7
mov esi, [esp +12+ 4]
mov eax, [esp + 12 + 8]
mov edi, [esp + 12 + 12]
mov edx, [esp + 12 + 16]
mov ecx, [esp + 12 + 24]
lea ebx, [esi + eax]
movq xmm0, [esi]
movq xmm1, [esi+1]
punpcklbw xmm0, xmm7
punpcklbw xmm1, xmm7
.xloop:
pmullw xmm0, xmm3
pmullw xmm1, xmm5
paddw xmm0, xmm1
movq xmm1, [ebx]
punpcklbw xmm1, xmm7
movdqa xmm2, xmm1
pmullw xmm1, xmm4
paddw xmm0, xmm1
movq xmm1, [ebx+1]
punpcklbw xmm1, xmm7
movdqa xmm7, xmm1
pmullw xmm1, xmm6
paddw xmm0, xmm1
movdqa xmm1,xmm7
paddw xmm0, [h264_d0x20_sse2]
psrlw xmm0, 6
WELS_Zero xmm7
packuswb xmm0, xmm7
movq [edi], xmm0
movdqa xmm0, xmm2
lea edi, [edi +edx ]
lea ebx, [ebx + eax]
dec ecx
jnz near .xloop
pop ebx
pop edi
pop esi
ret
ALIGN 16
;***********************************************************************
; void McChromaWidthEq8_ssse3( uint8_t *pSrc,
; int32_t iSrcStride,
; uint8_t *pDst,
; int32_t iDstStride,
; uint8_t *pABCD,
; int32_t iHeigh);
;***********************************************************************
WELS_EXTERN McChromaWidthEq8_ssse3
McChromaWidthEq8_ssse3:
push ebx
push esi
push edi
mov eax, [esp + 12 + 20]
pxor xmm7, xmm7
movd xmm5, [eax]
punpcklwd xmm5, xmm5
punpckldq xmm5, xmm5
movdqa xmm6, xmm5
punpcklqdq xmm5, xmm5
punpckhqdq xmm6, xmm6
mov eax, [esp + 12 + 4]
mov edx, [esp + 12 + 8]
mov esi, [esp + 12 + 12]
mov edi, [esp + 12 + 16]
mov ecx, [esp + 12 + 24]
sub esi, edi
sub esi, edi
movdqa xmm7, [h264_d0x20_sse2]
movdqu xmm0, [eax]
movdqa xmm1, xmm0
psrldq xmm1, 1
punpcklbw xmm0, xmm1
.hloop_chroma:
lea esi, [esi+2*edi]
movdqu xmm2, [eax+edx]
movdqa xmm3, xmm2
psrldq xmm3, 1
punpcklbw xmm2, xmm3
movdqa xmm4, xmm2
pmaddubsw xmm0, xmm5
pmaddubsw xmm2, xmm6
paddw xmm0, xmm2
paddw xmm0, xmm7
psrlw xmm0, 6
packuswb xmm0, xmm0
movq [esi],xmm0
lea eax, [eax+2*edx]
movdqu xmm2, [eax]
movdqa xmm3, xmm2
psrldq xmm3, 1
punpcklbw xmm2, xmm3
movdqa xmm0, xmm2
pmaddubsw xmm4, xmm5
pmaddubsw xmm2, xmm6
paddw xmm4, xmm2
paddw xmm4, xmm7
psrlw xmm4, 6
packuswb xmm4, xmm4
movq [esi+edi],xmm4
sub ecx, 2
jnz .hloop_chroma
pop edi
pop esi
pop ebx
ret
;*!
;* \copy
;* Copyright (c) 2004-2013, Cisco Systems
;* All rights reserved.
;*
;* Redistribution and use in source and binary forms, with or without
;* modification, are permitted provided that the following conditions
;* are met:
;*
;* * Redistributions of source code must retain the above copyright
;* notice, this list of conditions and the following disclaimer.
;*
;* * Redistributions in binary form must reproduce the above copyright
;* notice, this list of conditions and the following disclaimer in
;* the documentation and/or other materials provided with the
;* distribution.
;*
;* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
;* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
;* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
;* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
;* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
;* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
;* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
;* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
;* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
;* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
;* POSSIBILITY OF SUCH DAMAGE.
;*
;*
;* mc_chroma.asm
;*
;* Abstract
;* mmx motion compensation for chroma
;*
;* History
;* 10/13/2004 Created
;*
;*
;*************************************************************************/
%include "asm_inc.asm"
BITS 32
;***********************************************************************
; Local Data (Read Only)
;***********************************************************************
SECTION .rodata align=16
;***********************************************************************
; Various memory constants (trigonometric values or rounding values)
;***********************************************************************
ALIGN 16
h264_d0x20_sse2:
dw 32,32,32,32,32,32,32,32
ALIGN 16
h264_d0x20_mmx:
dw 32,32,32,32
;=============================================================================
; Code
;=============================================================================
SECTION .text
ALIGN 16
;*******************************************************************************
; void McChromaWidthEq4_mmx( uint8_t *src,
; int32_t iSrcStride,
; uint8_t *pDst,
; int32_t iDstStride,
; uint8_t *pABCD,
; int32_t iHeigh );
;*******************************************************************************
WELS_EXTERN McChromaWidthEq4_mmx
McChromaWidthEq4_mmx:
push esi
push edi
push ebx
mov eax, [esp +12 + 20]
movd mm3, [eax]
WELS_Zero mm7
punpcklbw mm3, mm3
movq mm4, mm3
punpcklwd mm3, mm3
punpckhwd mm4, mm4
movq mm5, mm3
punpcklbw mm3, mm7
punpckhbw mm5, mm7
movq mm6, mm4
punpcklbw mm4, mm7
punpckhbw mm6, mm7
mov esi, [esp +12+ 4]
mov eax, [esp + 12 + 8]
mov edi, [esp + 12 + 12]
mov edx, [esp + 12 + 16]
mov ecx, [esp + 12 + 24]
lea ebx, [esi + eax]
movd mm0, [esi]
movd mm1, [esi+1]
punpcklbw mm0, mm7
punpcklbw mm1, mm7
.xloop:
pmullw mm0, mm3
pmullw mm1, mm5
paddw mm0, mm1
movd mm1, [ebx]
punpcklbw mm1, mm7
movq mm2, mm1
pmullw mm1, mm4
paddw mm0, mm1
movd mm1, [ebx+1]
punpcklbw mm1, mm7
movq mm7, mm1
pmullw mm1,mm6
paddw mm0, mm1
movq mm1,mm7
paddw mm0, [h264_d0x20_mmx]
psrlw mm0, 6
WELS_Zero mm7
packuswb mm0, mm7
movd [edi], mm0
movq mm0, mm2
lea edi, [edi +edx ]
lea ebx, [ebx + eax]
dec ecx
jnz near .xloop
WELSEMMS
pop ebx
pop edi
pop esi
ret
ALIGN 16
;*******************************************************************************
; void McChromaWidthEq8_sse2( uint8_t *pSrc,
; int32_t iSrcStride,
; uint8_t *pDst,
; int32_t iDstStride,
; uint8_t *pABCD,
; int32_t iheigh );
;*******************************************************************************
WELS_EXTERN McChromaWidthEq8_sse2
McChromaWidthEq8_sse2:
push esi
push edi
push ebx
mov eax, [esp +12 + 20]
movd xmm3, [eax]
WELS_Zero xmm7
punpcklbw xmm3, xmm3
punpcklwd xmm3, xmm3
movdqa xmm4, xmm3
punpckldq xmm3, xmm3
punpckhdq xmm4, xmm4
movdqa xmm5, xmm3
movdqa xmm6, xmm4
punpcklbw xmm3, xmm7
punpckhbw xmm5, xmm7
punpcklbw xmm4, xmm7
punpckhbw xmm6, xmm7
mov esi, [esp +12+ 4]
mov eax, [esp + 12 + 8]
mov edi, [esp + 12 + 12]
mov edx, [esp + 12 + 16]
mov ecx, [esp + 12 + 24]
lea ebx, [esi + eax]
movq xmm0, [esi]
movq xmm1, [esi+1]
punpcklbw xmm0, xmm7
punpcklbw xmm1, xmm7
.xloop:
pmullw xmm0, xmm3
pmullw xmm1, xmm5
paddw xmm0, xmm1
movq xmm1, [ebx]
punpcklbw xmm1, xmm7
movdqa xmm2, xmm1
pmullw xmm1, xmm4
paddw xmm0, xmm1
movq xmm1, [ebx+1]
punpcklbw xmm1, xmm7
movdqa xmm7, xmm1
pmullw xmm1, xmm6
paddw xmm0, xmm1
movdqa xmm1,xmm7
paddw xmm0, [h264_d0x20_sse2]
psrlw xmm0, 6
WELS_Zero xmm7
packuswb xmm0, xmm7
movq [edi], xmm0
movdqa xmm0, xmm2
lea edi, [edi +edx ]
lea ebx, [ebx + eax]
dec ecx
jnz near .xloop
pop ebx
pop edi
pop esi
ret
ALIGN 16
;***********************************************************************
; void McChromaWidthEq8_ssse3( uint8_t *pSrc,
; int32_t iSrcStride,
; uint8_t *pDst,
; int32_t iDstStride,
; uint8_t *pABCD,
; int32_t iHeigh);
;***********************************************************************
WELS_EXTERN McChromaWidthEq8_ssse3
McChromaWidthEq8_ssse3:
push ebx
push esi
push edi
mov eax, [esp + 12 + 20]
pxor xmm7, xmm7
movd xmm5, [eax]
punpcklwd xmm5, xmm5
punpckldq xmm5, xmm5
movdqa xmm6, xmm5
punpcklqdq xmm5, xmm5
punpckhqdq xmm6, xmm6
mov eax, [esp + 12 + 4]
mov edx, [esp + 12 + 8]
mov esi, [esp + 12 + 12]
mov edi, [esp + 12 + 16]
mov ecx, [esp + 12 + 24]
sub esi, edi
sub esi, edi
movdqa xmm7, [h264_d0x20_sse2]
movdqu xmm0, [eax]
movdqa xmm1, xmm0
psrldq xmm1, 1
punpcklbw xmm0, xmm1
.hloop_chroma:
lea esi, [esi+2*edi]
movdqu xmm2, [eax+edx]
movdqa xmm3, xmm2
psrldq xmm3, 1
punpcklbw xmm2, xmm3
movdqa xmm4, xmm2
pmaddubsw xmm0, xmm5
pmaddubsw xmm2, xmm6
paddw xmm0, xmm2
paddw xmm0, xmm7
psrlw xmm0, 6
packuswb xmm0, xmm0
movq [esi],xmm0
lea eax, [eax+2*edx]
movdqu xmm2, [eax]
movdqa xmm3, xmm2
psrldq xmm3, 1
punpcklbw xmm2, xmm3
movdqa xmm0, xmm2
pmaddubsw xmm4, xmm5
pmaddubsw xmm2, xmm6
paddw xmm4, xmm2
paddw xmm4, xmm7
psrlw xmm4, 6
packuswb xmm4, xmm4
movq [esi+edi],xmm4
sub ecx, 2
jnz .hloop_chroma
pop edi
pop esi
pop ebx
ret

File diff suppressed because it is too large Load Diff

View File

@ -1,317 +1,317 @@
;*!
;* \copy
;* Copyright (c) 2004-2013, Cisco Systems
;* All rights reserved.
;*
;* Redistribution and use in source and binary forms, with or without
;* modification, are permitted provided that the following conditions
;* are met:
;*
;* * Redistributions of source code must retain the above copyright
;* notice, this list of conditions and the following disclaimer.
;*
;* * Redistributions in binary form must reproduce the above copyright
;* notice, this list of conditions and the following disclaimer in
;* the documentation and/or other materials provided with the
;* distribution.
;*
;* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
;* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
;* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
;* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
;* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
;* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
;* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
;* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
;* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
;* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
;* POSSIBILITY OF SUCH DAMAGE.
;*
;*
;* mc_chroma.asm
;*
;* Abstract
;* mmx motion compensation for chroma
;*
;* History
;* 10/13/2004 Created
;*
;*
;*************************************************************************/
%include "asm_inc.asm"
BITS 32
;***********************************************************************
; Local Data (Read Only)
;***********************************************************************
SECTION .rodata align=16
;***********************************************************************
; Various memory constants (trigonometric values or rounding values)
;***********************************************************************
ALIGN 16
h264_d0x20_sse2:
dw 32,32,32,32,32,32,32,32
ALIGN 16
h264_d0x20_mmx:
dw 32,32,32,32
;=============================================================================
; Code
;=============================================================================
SECTION .text
ALIGN 16
;*******************************************************************************
; void McChromaWidthEq4_mmx( uint8_t *src,
; int32_t iSrcStride,
; uint8_t *pDst,
; int32_t iDstStride,
; uint8_t *pABCD,
; int32_t iHeigh );
;*******************************************************************************
WELS_EXTERN McChromaWidthEq4_mmx
McChromaWidthEq4_mmx:
push esi
push edi
push ebx
mov eax, [esp +12 + 20]
movd mm3, [eax]
WELS_Zero mm7
punpcklbw mm3, mm3
movq mm4, mm3
punpcklwd mm3, mm3
punpckhwd mm4, mm4
movq mm5, mm3
punpcklbw mm3, mm7
punpckhbw mm5, mm7
movq mm6, mm4
punpcklbw mm4, mm7
punpckhbw mm6, mm7
mov esi, [esp +12+ 4]
mov eax, [esp + 12 + 8]
mov edi, [esp + 12 + 12]
mov edx, [esp + 12 + 16]
mov ecx, [esp + 12 + 24]
lea ebx, [esi + eax]
movd mm0, [esi]
movd mm1, [esi+1]
punpcklbw mm0, mm7
punpcklbw mm1, mm7
.xloop:
pmullw mm0, mm3
pmullw mm1, mm5
paddw mm0, mm1
movd mm1, [ebx]
punpcklbw mm1, mm7
movq mm2, mm1
pmullw mm1, mm4
paddw mm0, mm1
movd mm1, [ebx+1]
punpcklbw mm1, mm7
movq mm7, mm1
pmullw mm1,mm6
paddw mm0, mm1
movq mm1,mm7
paddw mm0, [h264_d0x20_mmx]
psrlw mm0, 6
WELS_Zero mm7
packuswb mm0, mm7
movd [edi], mm0
movq mm0, mm2
lea edi, [edi +edx ]
lea ebx, [ebx + eax]
dec ecx
jnz near .xloop
WELSEMMS
pop ebx
pop edi
pop esi
ret
ALIGN 16
;*******************************************************************************
; void McChromaWidthEq8_sse2( uint8_t *pSrc,
; int32_t iSrcStride,
; uint8_t *pDst,
; int32_t iDstStride,
; uint8_t *pABCD,
; int32_t iheigh );
;*******************************************************************************
WELS_EXTERN McChromaWidthEq8_sse2
McChromaWidthEq8_sse2:
push esi
push edi
push ebx
mov eax, [esp +12 + 20]
movd xmm3, [eax]
WELS_Zero xmm7
punpcklbw xmm3, xmm3
punpcklwd xmm3, xmm3
movdqa xmm4, xmm3
punpckldq xmm3, xmm3
punpckhdq xmm4, xmm4
movdqa xmm5, xmm3
movdqa xmm6, xmm4
punpcklbw xmm3, xmm7
punpckhbw xmm5, xmm7
punpcklbw xmm4, xmm7
punpckhbw xmm6, xmm7
mov esi, [esp +12+ 4]
mov eax, [esp + 12 + 8]
mov edi, [esp + 12 + 12]
mov edx, [esp + 12 + 16]
mov ecx, [esp + 12 + 24]
lea ebx, [esi + eax]
movq xmm0, [esi]
movq xmm1, [esi+1]
punpcklbw xmm0, xmm7
punpcklbw xmm1, xmm7
.xloop:
pmullw xmm0, xmm3
pmullw xmm1, xmm5
paddw xmm0, xmm1
movq xmm1, [ebx]
punpcklbw xmm1, xmm7
movdqa xmm2, xmm1
pmullw xmm1, xmm4
paddw xmm0, xmm1
movq xmm1, [ebx+1]
punpcklbw xmm1, xmm7
movdqa xmm7, xmm1
pmullw xmm1, xmm6
paddw xmm0, xmm1
movdqa xmm1,xmm7
paddw xmm0, [h264_d0x20_sse2]
psrlw xmm0, 6
WELS_Zero xmm7
packuswb xmm0, xmm7
movq [edi], xmm0
movdqa xmm0, xmm2
lea edi, [edi +edx ]
lea ebx, [ebx + eax]
dec ecx
jnz near .xloop
pop ebx
pop edi
pop esi
ret
ALIGN 16
;***********************************************************************
; void McChromaWidthEq8_ssse3( uint8_t *pSrc,
; int32_t iSrcStride,
; uint8_t *pDst,
; int32_t iDstStride,
; uint8_t *pABCD,
; int32_t iHeigh);
;***********************************************************************
WELS_EXTERN McChromaWidthEq8_ssse3
McChromaWidthEq8_ssse3:
push ebx
push esi
push edi
mov eax, [esp + 12 + 20]
pxor xmm7, xmm7
movd xmm5, [eax]
punpcklwd xmm5, xmm5
punpckldq xmm5, xmm5
movdqa xmm6, xmm5
punpcklqdq xmm5, xmm5
punpckhqdq xmm6, xmm6
mov eax, [esp + 12 + 4]
mov edx, [esp + 12 + 8]
mov esi, [esp + 12 + 12]
mov edi, [esp + 12 + 16]
mov ecx, [esp + 12 + 24]
sub esi, edi
sub esi, edi
movdqa xmm7, [h264_d0x20_sse2]
movdqu xmm0, [eax]
movdqa xmm1, xmm0
psrldq xmm1, 1
punpcklbw xmm0, xmm1
.hloop_chroma:
lea esi, [esi+2*edi]
movdqu xmm2, [eax+edx]
movdqa xmm3, xmm2
psrldq xmm3, 1
punpcklbw xmm2, xmm3
movdqa xmm4, xmm2
pmaddubsw xmm0, xmm5
pmaddubsw xmm2, xmm6
paddw xmm0, xmm2
paddw xmm0, xmm7
psrlw xmm0, 6
packuswb xmm0, xmm0
movq [esi],xmm0
lea eax, [eax+2*edx]
movdqu xmm2, [eax]
movdqa xmm3, xmm2
psrldq xmm3, 1
punpcklbw xmm2, xmm3
movdqa xmm0, xmm2
pmaddubsw xmm4, xmm5
pmaddubsw xmm2, xmm6
paddw xmm4, xmm2
paddw xmm4, xmm7
psrlw xmm4, 6
packuswb xmm4, xmm4
movq [esi+edi],xmm4
sub ecx, 2
jnz .hloop_chroma
pop edi
pop esi
pop ebx
ret
;*!
;* \copy
;* Copyright (c) 2004-2013, Cisco Systems
;* All rights reserved.
;*
;* Redistribution and use in source and binary forms, with or without
;* modification, are permitted provided that the following conditions
;* are met:
;*
;* * Redistributions of source code must retain the above copyright
;* notice, this list of conditions and the following disclaimer.
;*
;* * Redistributions in binary form must reproduce the above copyright
;* notice, this list of conditions and the following disclaimer in
;* the documentation and/or other materials provided with the
;* distribution.
;*
;* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
;* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
;* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
;* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
;* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
;* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
;* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
;* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
;* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
;* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
;* POSSIBILITY OF SUCH DAMAGE.
;*
;*
;* mc_chroma.asm
;*
;* Abstract
;* mmx motion compensation for chroma
;*
;* History
;* 10/13/2004 Created
;*
;*
;*************************************************************************/
%include "asm_inc.asm"
BITS 32
;***********************************************************************
; Local Data (Read Only)
;***********************************************************************
SECTION .rodata align=16
;***********************************************************************
; Various memory constants (trigonometric values or rounding values)
;***********************************************************************
ALIGN 16
h264_d0x20_sse2:
dw 32,32,32,32,32,32,32,32
ALIGN 16
h264_d0x20_mmx:
dw 32,32,32,32
;=============================================================================
; Code
;=============================================================================
SECTION .text
ALIGN 16
;*******************************************************************************
; void McChromaWidthEq4_mmx( uint8_t *src,
; int32_t iSrcStride,
; uint8_t *pDst,
; int32_t iDstStride,
; uint8_t *pABCD,
; int32_t iHeigh );
;*******************************************************************************
WELS_EXTERN McChromaWidthEq4_mmx
McChromaWidthEq4_mmx:
push esi
push edi
push ebx
mov eax, [esp +12 + 20]
movd mm3, [eax]
WELS_Zero mm7
punpcklbw mm3, mm3
movq mm4, mm3
punpcklwd mm3, mm3
punpckhwd mm4, mm4
movq mm5, mm3
punpcklbw mm3, mm7
punpckhbw mm5, mm7
movq mm6, mm4
punpcklbw mm4, mm7
punpckhbw mm6, mm7
mov esi, [esp +12+ 4]
mov eax, [esp + 12 + 8]
mov edi, [esp + 12 + 12]
mov edx, [esp + 12 + 16]
mov ecx, [esp + 12 + 24]
lea ebx, [esi + eax]
movd mm0, [esi]
movd mm1, [esi+1]
punpcklbw mm0, mm7
punpcklbw mm1, mm7
.xloop:
pmullw mm0, mm3
pmullw mm1, mm5
paddw mm0, mm1
movd mm1, [ebx]
punpcklbw mm1, mm7
movq mm2, mm1
pmullw mm1, mm4
paddw mm0, mm1
movd mm1, [ebx+1]
punpcklbw mm1, mm7
movq mm7, mm1
pmullw mm1,mm6
paddw mm0, mm1
movq mm1,mm7
paddw mm0, [h264_d0x20_mmx]
psrlw mm0, 6
WELS_Zero mm7
packuswb mm0, mm7
movd [edi], mm0
movq mm0, mm2
lea edi, [edi +edx ]
lea ebx, [ebx + eax]
dec ecx
jnz near .xloop
WELSEMMS
pop ebx
pop edi
pop esi
ret
ALIGN 16
;*******************************************************************************
; void McChromaWidthEq8_sse2( uint8_t *pSrc,
; int32_t iSrcStride,
; uint8_t *pDst,
; int32_t iDstStride,
; uint8_t *pABCD,
; int32_t iheigh );
;*******************************************************************************
WELS_EXTERN McChromaWidthEq8_sse2
McChromaWidthEq8_sse2:
push esi
push edi
push ebx
mov eax, [esp +12 + 20]
movd xmm3, [eax]
WELS_Zero xmm7
punpcklbw xmm3, xmm3
punpcklwd xmm3, xmm3
movdqa xmm4, xmm3
punpckldq xmm3, xmm3
punpckhdq xmm4, xmm4
movdqa xmm5, xmm3
movdqa xmm6, xmm4
punpcklbw xmm3, xmm7
punpckhbw xmm5, xmm7
punpcklbw xmm4, xmm7
punpckhbw xmm6, xmm7
mov esi, [esp +12+ 4]
mov eax, [esp + 12 + 8]
mov edi, [esp + 12 + 12]
mov edx, [esp + 12 + 16]
mov ecx, [esp + 12 + 24]
lea ebx, [esi + eax]
movq xmm0, [esi]
movq xmm1, [esi+1]
punpcklbw xmm0, xmm7
punpcklbw xmm1, xmm7
.xloop:
pmullw xmm0, xmm3
pmullw xmm1, xmm5
paddw xmm0, xmm1
movq xmm1, [ebx]
punpcklbw xmm1, xmm7
movdqa xmm2, xmm1
pmullw xmm1, xmm4
paddw xmm0, xmm1
movq xmm1, [ebx+1]
punpcklbw xmm1, xmm7
movdqa xmm7, xmm1
pmullw xmm1, xmm6
paddw xmm0, xmm1
movdqa xmm1,xmm7
paddw xmm0, [h264_d0x20_sse2]
psrlw xmm0, 6
WELS_Zero xmm7
packuswb xmm0, xmm7
movq [edi], xmm0
movdqa xmm0, xmm2
lea edi, [edi +edx ]
lea ebx, [ebx + eax]
dec ecx
jnz near .xloop
pop ebx
pop edi
pop esi
ret
ALIGN 16
;***********************************************************************
; void McChromaWidthEq8_ssse3( uint8_t *pSrc,
; int32_t iSrcStride,
; uint8_t *pDst,
; int32_t iDstStride,
; uint8_t *pABCD,
; int32_t iHeigh);
;***********************************************************************
WELS_EXTERN McChromaWidthEq8_ssse3
McChromaWidthEq8_ssse3:
push ebx
push esi
push edi
mov eax, [esp + 12 + 20]
pxor xmm7, xmm7
movd xmm5, [eax]
punpcklwd xmm5, xmm5
punpckldq xmm5, xmm5
movdqa xmm6, xmm5
punpcklqdq xmm5, xmm5
punpckhqdq xmm6, xmm6
mov eax, [esp + 12 + 4]
mov edx, [esp + 12 + 8]
mov esi, [esp + 12 + 12]
mov edi, [esp + 12 + 16]
mov ecx, [esp + 12 + 24]
sub esi, edi
sub esi, edi
movdqa xmm7, [h264_d0x20_sse2]
movdqu xmm0, [eax]
movdqa xmm1, xmm0
psrldq xmm1, 1
punpcklbw xmm0, xmm1
.hloop_chroma:
lea esi, [esi+2*edi]
movdqu xmm2, [eax+edx]
movdqa xmm3, xmm2
psrldq xmm3, 1
punpcklbw xmm2, xmm3
movdqa xmm4, xmm2
pmaddubsw xmm0, xmm5
pmaddubsw xmm2, xmm6
paddw xmm0, xmm2
paddw xmm0, xmm7
psrlw xmm0, 6
packuswb xmm0, xmm0
movq [esi],xmm0
lea eax, [eax+2*edx]
movdqu xmm2, [eax]
movdqa xmm3, xmm2
psrldq xmm3, 1
punpcklbw xmm2, xmm3
movdqa xmm0, xmm2
pmaddubsw xmm4, xmm5
pmaddubsw xmm2, xmm6
paddw xmm4, xmm2
paddw xmm4, xmm7
psrlw xmm4, 6
packuswb xmm4, xmm4
movq [esi+edi],xmm4
sub ecx, 2
jnz .hloop_chroma
pop edi
pop esi
pop ebx
ret

View File

@ -1,94 +1,94 @@
NASM = 1
NAME = libwelsvp
OUTDIR = ../../../bin/linux
BINDIR = ../../bin
OBJDIR = ../../obj
SRCDIRS = ../../src/asm \
../../src/common \
../../src/adaptivequantization \
../../src/backgounddetection \
../../src/denoise \
../../src/downsample \
../../src/scenechangedetection \
../../src/vaacalc \
../../src/complexityanalysis
SRCDIRS += ../../src/imagerotate
TARGETLIB = $(BINDIR)/$(NAME).so
CC = $(shell which gcc)
AS = $(shell which nasm)
GCC = gcc -m32
CPPFLAGS = -Wall -g -O3
ifeq ($(NASM), 1)
CPPFLAGS += -DX86_ASM
endif
ASMFLAGS = -f elf -DNOPREFIX -I ../../src/asm/
LDFLAGS = -lstdc++ -ldl
SRCEXTS = .cpp
ifeq ($(NASM), 1)
SRCEXTS += .asm
endif
HDREXTS = .h
SOURCES = $(foreach d,$(SRCDIRS),$(wildcard $(addprefix $(d)/*,$(SRCEXTS))))
HEADERS = $(foreach d,$(SRCDIRS),$(wildcard $(addprefix $(d)/*,$(HDREXTS))))
SRC_CPP = $(filter %.cpp,$(SOURCES))
SRC_ASM = $(filter %.asm,$(SOURCES))
OBJS = $(addsuffix .o, $(basename $(SOURCES)))
DEPS = $(OBJS:.o=.d)
DEP_OPT = $(shell if `$(CC) --version | grep "GCC" >/dev/null`; then \
echo "-MM -MP"; else echo "-M"; fi )
DEPEND_cpp.d = $(subst -g ,,$(CC) $(DEP_OPT) $(CPPFLAGS))
DEPEND_asm.d = $(subst -g ,,$(AS) $(DEP_OPT) $(ASMFLAGS))
COMPILE.cpp = $(GCC) $(CPPFLAGS) -c
COMPILE.asm = $(AS) $(ASMFLAGS)
LINK = $(GCC) $(LDFLAGS)
.PHONY: all objs tags ctags clean distclean
.SUFFIXES:
all: $(TARGETLIB)
%.d:%.cpp
@echo -n $(dir $<) > $@
@$(DEPEND_cpp.d) $< >> $@
%.d:%.asm
@echo -n $(dir $<) > $@
@$(DEPEND_asm.d) $< >> $@
objs:$(OBJS)
%.o:%.cpp
$(COMPILE.cpp) $< -o $@
%.o:%.asm
$(COMPILE.asm) $< -o $@
tags: $(HEADERS) $(SOURCES)
etags $(HEADERS) $(SOURCES)
ctags: $(HEADERS) $(SOURCES)
ctags $(HEADERS) $(SOURCES)
$(TARGETLIB):$(OBJS)
@if test ! -d $(BINDIR) ; then mkdir -p $(BINDIR) ; fi
$(LINK) $(OBJS) -shared -Wl,-Bsymbolic -o $@
@echo produce the lib to $(TARGETLIB).
@if test ! -d $(OUTDIR) ; then mkdir -p $(OUTDIR) ; fi
@cp -f $(TARGETLIB) $(OUTDIR)
@cp -f $(TARGETLIB) ../../../testbin
@echo copy the lib to $(OUTDIR).
clean:
rm -f $(OBJS) $(TARGETLIB)
distclean: clean
rm -f $(DEPS) TAGS
NASM = 1
NAME = libwelsvp
OUTDIR = ../../../bin/linux
BINDIR = ../../bin
OBJDIR = ../../obj
SRCDIRS = ../../src/asm \
../../src/common \
../../src/adaptivequantization \
../../src/backgounddetection \
../../src/denoise \
../../src/downsample \
../../src/scenechangedetection \
../../src/vaacalc \
../../src/complexityanalysis
SRCDIRS += ../../src/imagerotate
TARGETLIB = $(BINDIR)/$(NAME).so
CC = $(shell which gcc)
AS = $(shell which nasm)
GCC = gcc -m32
CPPFLAGS = -Wall -g -O3
ifeq ($(NASM), 1)
CPPFLAGS += -DX86_ASM
endif
ASMFLAGS = -f elf -DNOPREFIX -I ../../src/asm/
LDFLAGS = -lstdc++ -ldl
SRCEXTS = .cpp
ifeq ($(NASM), 1)
SRCEXTS += .asm
endif
HDREXTS = .h
SOURCES = $(foreach d,$(SRCDIRS),$(wildcard $(addprefix $(d)/*,$(SRCEXTS))))
HEADERS = $(foreach d,$(SRCDIRS),$(wildcard $(addprefix $(d)/*,$(HDREXTS))))
SRC_CPP = $(filter %.cpp,$(SOURCES))
SRC_ASM = $(filter %.asm,$(SOURCES))
OBJS = $(addsuffix .o, $(basename $(SOURCES)))
DEPS = $(OBJS:.o=.d)
DEP_OPT = $(shell if `$(CC) --version | grep "GCC" >/dev/null`; then \
echo "-MM -MP"; else echo "-M"; fi )
DEPEND_cpp.d = $(subst -g ,,$(CC) $(DEP_OPT) $(CPPFLAGS))
DEPEND_asm.d = $(subst -g ,,$(AS) $(DEP_OPT) $(ASMFLAGS))
COMPILE.cpp = $(GCC) $(CPPFLAGS) -c
COMPILE.asm = $(AS) $(ASMFLAGS)
LINK = $(GCC) $(LDFLAGS)
.PHONY: all objs tags ctags clean distclean
.SUFFIXES:
all: $(TARGETLIB)
%.d:%.cpp
@echo -n $(dir $<) > $@
@$(DEPEND_cpp.d) $< >> $@
%.d:%.asm
@echo -n $(dir $<) > $@
@$(DEPEND_asm.d) $< >> $@
objs:$(OBJS)
%.o:%.cpp
$(COMPILE.cpp) $< -o $@
%.o:%.asm
$(COMPILE.asm) $< -o $@
tags: $(HEADERS) $(SOURCES)
etags $(HEADERS) $(SOURCES)
ctags: $(HEADERS) $(SOURCES)
ctags $(HEADERS) $(SOURCES)
$(TARGETLIB):$(OBJS)
@if test ! -d $(BINDIR) ; then mkdir -p $(BINDIR) ; fi
$(LINK) $(OBJS) -shared -Wl,-Bsymbolic -o $@
@echo produce the lib to $(TARGETLIB).
@if test ! -d $(OUTDIR) ; then mkdir -p $(OUTDIR) ; fi
@cp -f $(TARGETLIB) $(OUTDIR)
@cp -f $(TARGETLIB) ../../../testbin
@echo copy the lib to $(OUTDIR).
clean:
rm -f $(OBJS) $(TARGETLIB)
distclean: clean
rm -f $(DEPS) TAGS

View File

@ -1,263 +1,263 @@
;*!
;* \copy
;* Copyright (c) 2010-2013, Cisco Systems
;* All rights reserved.
;*
;* Redistribution and use in source and binary forms, with or without
;* modification, are permitted provided that the following conditions
;* are met:
;*
;* * Redistributions of source code must retain the above copyright
;* notice, this list of conditions and the following disclaimer.
;*
;* * Redistributions in binary form must reproduce the above copyright
;* notice, this list of conditions and the following disclaimer in
;* the documentation and/or other materials provided with the
;* distribution.
;*
;* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
;* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
;* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
;* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
;* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
;* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
;* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
;* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
;* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
;* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
;* POSSIBILITY OF SUCH DAMAGE.
;*
;*
;* predenoise.asm
;*
;* Abstract
;* denoise for SVC2.1
;* History
;* 4/13/2010 Created
;* 7/30/2010 Modified
;*
;*
;*************************************************************************/
%include "asm_inc.asm"
;***********************************************************************
; Constant
;***********************************************************************
SECTION .rodata align=16
sse2_32 times 8 dw 32
sse2_20 times 8 dw 20
BITS 32
;***********************************************************************
; Code
;***********************************************************************
SECTION .text
%macro WEIGHT_LINE 9
movq %2, %9
punpcklbw %2, %7
movdqa %8, %2
movdqa %1, %6
psubusb %1, %8
psubusb %8, %6
por %8, %1 ; ABS(curPixel - centerPixel);
movdqa %1, %3
psubusb %1, %8
pmullw %1, %1
psrlw %1, 5
pmullw %2, %1
paddusw %4, %1
paddusw %5, %2
%endmacro
%macro WEIGHT_LINE1_UV 4
movdqa %2, %1
punpcklbw %2, %4
paddw %3, %2
movdqa %2, %1
psrldq %2, 1
punpcklbw %2, %4
paddw %3, %2
movdqa %2, %1
psrldq %2, 2
punpcklbw %2, %4
psllw %2, 1
paddw %3, %2
movdqa %2, %1
psrldq %2, 3
punpcklbw %2, %4
paddw %3, %2
movdqa %2, %1
psrldq %2, 4
punpcklbw %2, %4
paddw %3, %2
%endmacro
%macro WEIGHT_LINE2_UV 4
movdqa %2, %1
punpcklbw %2, %4
paddw %3, %2
movdqa %2, %1
psrldq %2, 1
punpcklbw %2, %4
psllw %2, 1
paddw %3, %2
movdqa %2, %1
psrldq %2, 2
punpcklbw %2, %4
psllw %2, 2
paddw %3, %2
movdqa %2, %1
psrldq %2, 3
punpcklbw %2, %4
psllw %2, 1
paddw %3, %2
movdqa %2, %1
psrldq %2, 4
punpcklbw %2, %4
paddw %3, %2
%endmacro
%macro WEIGHT_LINE3_UV 4
movdqa %2, %1
punpcklbw %2, %4
psllw %2, 1
paddw %3, %2
movdqa %2, %1
psrldq %2, 1
punpcklbw %2, %4
psllw %2, 2
paddw %3, %2
movdqa %2, %1
psrldq %2, 2
punpcklbw %2, %4
pmullw %2, [sse2_20]
paddw %3, %2
movdqa %2, %1
psrldq %2, 3
punpcklbw %2, %4
psllw %2, 2
paddw %3, %2
movdqa %2, %1
psrldq %2, 4
punpcklbw %2, %4
psllw %2, 1
paddw %3, %2
%endmacro
ALIGN 16
WELS_EXTERN BilateralLumaFilter8_sse2
;***********************************************************************
; BilateralLumaFilter8_sse2(uint8_t *pixels, int stride);
;***********************************************************************
; 1 2 3
; 4 0 5
; 6 7 8
; 0: the center point
%define pushsize 4
%define pixel esp + pushsize + 4
%define stride esp + pushsize + 8
BilateralLumaFilter8_sse2:
push ebx
pxor xmm7, xmm7
mov eax, [pixel]
mov ebx, eax
movq xmm6, [eax]
punpcklbw xmm6, xmm7
movdqa xmm3, [sse2_32]
pxor xmm4, xmm4 ; nTotWeight
pxor xmm5, xmm5 ; nSum
dec eax
mov ecx, [stride]
WEIGHT_LINE xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm0, [eax] ; pixel 4
WEIGHT_LINE xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm0, [eax + 2] ; pixel 5
sub eax, ecx
WEIGHT_LINE xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm0, [eax] ; pixel 1
WEIGHT_LINE xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm0, [eax + 1] ; pixel 2
WEIGHT_LINE xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm0, [eax + 2] ; pixel 3
lea eax, [eax + ecx * 2]
WEIGHT_LINE xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm0, [eax] ; pixel 6
WEIGHT_LINE xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm0, [eax + 1] ; pixel 7
WEIGHT_LINE xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm0, [eax + 2] ; pixel 8
pcmpeqw xmm0, xmm0
psrlw xmm0, 15
psllw xmm0, 8
psubusw xmm0, xmm4
pmullw xmm0, xmm6
paddusw xmm5, xmm0
psrlw xmm5, 8
packuswb xmm5, xmm5
movq [ebx], xmm5
pop ebx
ret
WELS_EXTERN WaverageChromaFilter8_sse2
;***********************************************************************
; void WaverageChromaFilter8_sse2(uint8_t *pixels, int stride);
;***********************************************************************
;5x5 filter:
;1 1 2 1 1
;1 2 4 2 1
;2 4 20 4 2
;1 2 4 2 1
;1 1 2 1 1
ALIGN 16
WaverageChromaFilter8_sse2:
mov edx, [esp + 4] ; pixels
mov ecx, [esp + 8] ; stride
mov eax, ecx
add eax, eax
sub edx, eax ; pixels - 2 * stride
sub edx, 2
pxor xmm0, xmm0
pxor xmm3, xmm3
movdqu xmm1, [edx]
WEIGHT_LINE1_UV xmm1, xmm2, xmm3, xmm0
movdqu xmm1, [edx + ecx]
WEIGHT_LINE2_UV xmm1, xmm2, xmm3, xmm0
add edx, eax
movdqu xmm1, [edx]
WEIGHT_LINE3_UV xmm1, xmm2, xmm3, xmm0
movdqu xmm1, [edx + ecx]
WEIGHT_LINE2_UV xmm1, xmm2, xmm3, xmm0
movdqu xmm1, [edx + ecx * 2]
WEIGHT_LINE1_UV xmm1, xmm2, xmm3, xmm0
psrlw xmm3, 6
packuswb xmm3, xmm3
movq [edx + 2], xmm3
;*!
;* \copy
;* Copyright (c) 2010-2013, Cisco Systems
;* All rights reserved.
;*
;* Redistribution and use in source and binary forms, with or without
;* modification, are permitted provided that the following conditions
;* are met:
;*
;* * Redistributions of source code must retain the above copyright
;* notice, this list of conditions and the following disclaimer.
;*
;* * Redistributions in binary form must reproduce the above copyright
;* notice, this list of conditions and the following disclaimer in
;* the documentation and/or other materials provided with the
;* distribution.
;*
;* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
;* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
;* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
;* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
;* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
;* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
;* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
;* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
;* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
;* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
;* POSSIBILITY OF SUCH DAMAGE.
;*
;*
;* predenoise.asm
;*
;* Abstract
;* denoise for SVC2.1
;* History
;* 4/13/2010 Created
;* 7/30/2010 Modified
;*
;*
;*************************************************************************/
%include "asm_inc.asm"
;***********************************************************************
; Constant
;***********************************************************************
SECTION .rodata align=16
sse2_32 times 8 dw 32
sse2_20 times 8 dw 20
BITS 32
;***********************************************************************
; Code
;***********************************************************************
SECTION .text
%macro WEIGHT_LINE 9
movq %2, %9
punpcklbw %2, %7
movdqa %8, %2
movdqa %1, %6
psubusb %1, %8
psubusb %8, %6
por %8, %1 ; ABS(curPixel - centerPixel);
movdqa %1, %3
psubusb %1, %8
pmullw %1, %1
psrlw %1, 5
pmullw %2, %1
paddusw %4, %1
paddusw %5, %2
%endmacro
%macro WEIGHT_LINE1_UV 4
movdqa %2, %1
punpcklbw %2, %4
paddw %3, %2
movdqa %2, %1
psrldq %2, 1
punpcklbw %2, %4
paddw %3, %2
movdqa %2, %1
psrldq %2, 2
punpcklbw %2, %4
psllw %2, 1
paddw %3, %2
movdqa %2, %1
psrldq %2, 3
punpcklbw %2, %4
paddw %3, %2
movdqa %2, %1
psrldq %2, 4
punpcklbw %2, %4
paddw %3, %2
%endmacro
%macro WEIGHT_LINE2_UV 4
movdqa %2, %1
punpcklbw %2, %4
paddw %3, %2
movdqa %2, %1
psrldq %2, 1
punpcklbw %2, %4
psllw %2, 1
paddw %3, %2
movdqa %2, %1
psrldq %2, 2
punpcklbw %2, %4
psllw %2, 2
paddw %3, %2
movdqa %2, %1
psrldq %2, 3
punpcklbw %2, %4
psllw %2, 1
paddw %3, %2
movdqa %2, %1
psrldq %2, 4
punpcklbw %2, %4
paddw %3, %2
%endmacro
%macro WEIGHT_LINE3_UV 4
movdqa %2, %1
punpcklbw %2, %4
psllw %2, 1
paddw %3, %2
movdqa %2, %1
psrldq %2, 1
punpcklbw %2, %4
psllw %2, 2
paddw %3, %2
movdqa %2, %1
psrldq %2, 2
punpcklbw %2, %4
pmullw %2, [sse2_20]
paddw %3, %2
movdqa %2, %1
psrldq %2, 3
punpcklbw %2, %4
psllw %2, 2
paddw %3, %2
movdqa %2, %1
psrldq %2, 4
punpcklbw %2, %4
psllw %2, 1
paddw %3, %2
%endmacro
ALIGN 16
WELS_EXTERN BilateralLumaFilter8_sse2
;***********************************************************************
; BilateralLumaFilter8_sse2(uint8_t *pixels, int stride);
;***********************************************************************
; 1 2 3
; 4 0 5
; 6 7 8
; 0: the center point
%define pushsize 4
%define pixel esp + pushsize + 4
%define stride esp + pushsize + 8
BilateralLumaFilter8_sse2:
push ebx
pxor xmm7, xmm7
mov eax, [pixel]
mov ebx, eax
movq xmm6, [eax]
punpcklbw xmm6, xmm7
movdqa xmm3, [sse2_32]
pxor xmm4, xmm4 ; nTotWeight
pxor xmm5, xmm5 ; nSum
dec eax
mov ecx, [stride]
WEIGHT_LINE xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm0, [eax] ; pixel 4
WEIGHT_LINE xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm0, [eax + 2] ; pixel 5
sub eax, ecx
WEIGHT_LINE xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm0, [eax] ; pixel 1
WEIGHT_LINE xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm0, [eax + 1] ; pixel 2
WEIGHT_LINE xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm0, [eax + 2] ; pixel 3
lea eax, [eax + ecx * 2]
WEIGHT_LINE xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm0, [eax] ; pixel 6
WEIGHT_LINE xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm0, [eax + 1] ; pixel 7
WEIGHT_LINE xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm0, [eax + 2] ; pixel 8
pcmpeqw xmm0, xmm0
psrlw xmm0, 15
psllw xmm0, 8
psubusw xmm0, xmm4
pmullw xmm0, xmm6
paddusw xmm5, xmm0
psrlw xmm5, 8
packuswb xmm5, xmm5
movq [ebx], xmm5
pop ebx
ret
WELS_EXTERN WaverageChromaFilter8_sse2
;***********************************************************************
; void WaverageChromaFilter8_sse2(uint8_t *pixels, int stride);
;***********************************************************************
;5x5 filter:
;1 1 2 1 1
;1 2 4 2 1
;2 4 20 4 2
;1 2 4 2 1
;1 1 2 1 1
ALIGN 16
WaverageChromaFilter8_sse2:
mov edx, [esp + 4] ; pixels
mov ecx, [esp + 8] ; stride
mov eax, ecx
add eax, eax
sub edx, eax ; pixels - 2 * stride
sub edx, 2
pxor xmm0, xmm0
pxor xmm3, xmm3
movdqu xmm1, [edx]
WEIGHT_LINE1_UV xmm1, xmm2, xmm3, xmm0
movdqu xmm1, [edx + ecx]
WEIGHT_LINE2_UV xmm1, xmm2, xmm3, xmm0
add edx, eax
movdqu xmm1, [edx]
WEIGHT_LINE3_UV xmm1, xmm2, xmm3, xmm0
movdqu xmm1, [edx + ecx]
WEIGHT_LINE2_UV xmm1, xmm2, xmm3, xmm0
movdqu xmm1, [edx + ecx * 2]
WEIGHT_LINE1_UV xmm1, xmm2, xmm3, xmm0
psrlw xmm3, 6
packuswb xmm3, xmm3
movq [edx + 2], xmm3
ret

File diff suppressed because it is too large Load Diff

View File

@ -1,145 +1,145 @@
;*!
;* \copy
;* Copyright (c) 2009-2013, Cisco Systems
;* All rights reserved.
;*
;* Redistribution and use in source and binary forms, with or without
;* modification, are permitted provided that the following conditions
;* are met:
;*
;* * Redistributions of source code must retain the above copyright
;* notice, this list of conditions and the following disclaimer.
;*
;* * Redistributions in binary form must reproduce the above copyright
;* notice, this list of conditions and the following disclaimer in
;* the documentation and/or other materials provided with the
;* distribution.
;*
;* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
;* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
;* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
;* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
;* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
;* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
;* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
;* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
;* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
;* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
;* POSSIBILITY OF SUCH DAMAGE.
;*
;*
;* intra_pred.asm
;*
;* Abstract
;* sse2 function for intra predict operations
;*
;* History
;* 18/09/2009 Created
;*
;*
;*************************************************************************/
%include "../../src/asm/asm_inc.asm"
BITS 32
;***********************************************************************
; Local Data (Read Only)
;***********************************************************************
%ifdef FORMAT_COFF
SECTION .rodata data
%else
SECTION .rodata align=16
%endif
align 16
mmx_01bytes: times 16 db 1
;***********************************************************************
; macros
;***********************************************************************
%macro COPY_16_TIMES 2
movdqa %2, [%1-16]
psrldq %2, 15
pmuludq %2, [mmx_01bytes]
pshufd %2, %2, 0
%endmacro
%macro COPY_16_TIMESS 3
movdqa %2, [%1+%3-16]
psrldq %2, 15
pmuludq %2, [mmx_01bytes]
pshufd %2, %2, 0
%endmacro
;***********************************************************************
; Code
;***********************************************************************
SECTION .text
;***********************************************************************
; void WelsI16x16LumaPredH_sse2(uint8_t *pred, uint8_t *pRef, int32_t stride);
;***********************************************************************
%macro SSE2_PRED_H_16X16_TWO_LINE 1
lea eax, [eax+ecx*2]
COPY_16_TIMES eax, xmm0
movdqa [edx+%1], xmm0
COPY_16_TIMESS eax, xmm0, ecx
movdqa [edx+%1+0x10], xmm0
%endmacro
WELS_EXTERN WelsI16x16LumaPredH_sse2
WelsI16x16LumaPredH_sse2:
mov edx, [esp+4] ; pred
mov eax, [esp+8] ; pRef
mov ecx, [esp+12] ; stride
COPY_16_TIMES eax, xmm0
movdqa [edx], xmm0
COPY_16_TIMESS eax, xmm0, ecx
movdqa [edx+0x10], xmm0
SSE2_PRED_H_16X16_TWO_LINE 0x20
SSE2_PRED_H_16X16_TWO_LINE 0x40
SSE2_PRED_H_16X16_TWO_LINE 0x60
SSE2_PRED_H_16X16_TWO_LINE 0x80
SSE2_PRED_H_16X16_TWO_LINE 0xa0
SSE2_PRED_H_16X16_TWO_LINE 0xc0
SSE2_PRED_H_16X16_TWO_LINE 0xe0
ret
;***********************************************************************
; void WelsI16x16LumaPredV_sse2(uint8_t *pred, uint8_t *pRef, int32_t stride);
;***********************************************************************
WELS_EXTERN WelsI16x16LumaPredV_sse2
WelsI16x16LumaPredV_sse2:
mov edx, [esp+4] ; pred
mov eax, [esp+8] ; pRef
mov ecx, [esp+12] ; stride
sub eax, ecx
movdqa xmm0, [eax]
movdqa [edx], xmm0
movdqa [edx+10h], xmm0
movdqa [edx+20h], xmm0
movdqa [edx+30h], xmm0
movdqa [edx+40h], xmm0
movdqa [edx+50h], xmm0
movdqa [edx+60h], xmm0
movdqa [edx+70h], xmm0
movdqa [edx+80h], xmm0
movdqa [edx+90h], xmm0
movdqa [edx+160], xmm0
movdqa [edx+176], xmm0
movdqa [edx+192], xmm0
movdqa [edx+208], xmm0
movdqa [edx+224], xmm0
movdqa [edx+240], xmm0
;*!
;* \copy
;* Copyright (c) 2009-2013, Cisco Systems
;* All rights reserved.
;*
;* Redistribution and use in source and binary forms, with or without
;* modification, are permitted provided that the following conditions
;* are met:
;*
;* * Redistributions of source code must retain the above copyright
;* notice, this list of conditions and the following disclaimer.
;*
;* * Redistributions in binary form must reproduce the above copyright
;* notice, this list of conditions and the following disclaimer in
;* the documentation and/or other materials provided with the
;* distribution.
;*
;* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
;* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
;* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
;* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
;* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
;* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
;* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
;* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
;* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
;* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
;* POSSIBILITY OF SUCH DAMAGE.
;*
;*
;* intra_pred.asm
;*
;* Abstract
;* sse2 function for intra predict operations
;*
;* History
;* 18/09/2009 Created
;*
;*
;*************************************************************************/
%include "../../src/asm/asm_inc.asm"
BITS 32
;***********************************************************************
; Local Data (Read Only)
;***********************************************************************
%ifdef FORMAT_COFF
SECTION .rodata data
%else
SECTION .rodata align=16
%endif
align 16
mmx_01bytes: times 16 db 1
;***********************************************************************
; macros
;***********************************************************************
%macro COPY_16_TIMES 2
movdqa %2, [%1-16]
psrldq %2, 15
pmuludq %2, [mmx_01bytes]
pshufd %2, %2, 0
%endmacro
%macro COPY_16_TIMESS 3
movdqa %2, [%1+%3-16]
psrldq %2, 15
pmuludq %2, [mmx_01bytes]
pshufd %2, %2, 0
%endmacro
;***********************************************************************
; Code
;***********************************************************************
SECTION .text
;***********************************************************************
; void WelsI16x16LumaPredH_sse2(uint8_t *pred, uint8_t *pRef, int32_t stride);
;***********************************************************************
%macro SSE2_PRED_H_16X16_TWO_LINE 1
lea eax, [eax+ecx*2]
COPY_16_TIMES eax, xmm0
movdqa [edx+%1], xmm0
COPY_16_TIMESS eax, xmm0, ecx
movdqa [edx+%1+0x10], xmm0
%endmacro
WELS_EXTERN WelsI16x16LumaPredH_sse2
WelsI16x16LumaPredH_sse2:
mov edx, [esp+4] ; pred
mov eax, [esp+8] ; pRef
mov ecx, [esp+12] ; stride
COPY_16_TIMES eax, xmm0
movdqa [edx], xmm0
COPY_16_TIMESS eax, xmm0, ecx
movdqa [edx+0x10], xmm0
SSE2_PRED_H_16X16_TWO_LINE 0x20
SSE2_PRED_H_16X16_TWO_LINE 0x40
SSE2_PRED_H_16X16_TWO_LINE 0x60
SSE2_PRED_H_16X16_TWO_LINE 0x80
SSE2_PRED_H_16X16_TWO_LINE 0xa0
SSE2_PRED_H_16X16_TWO_LINE 0xc0
SSE2_PRED_H_16X16_TWO_LINE 0xe0
ret
;***********************************************************************
; void WelsI16x16LumaPredV_sse2(uint8_t *pred, uint8_t *pRef, int32_t stride);
;***********************************************************************
WELS_EXTERN WelsI16x16LumaPredV_sse2
WelsI16x16LumaPredV_sse2:
mov edx, [esp+4] ; pred
mov eax, [esp+8] ; pRef
mov ecx, [esp+12] ; stride
sub eax, ecx
movdqa xmm0, [eax]
movdqa [edx], xmm0
movdqa [edx+10h], xmm0
movdqa [edx+20h], xmm0
movdqa [edx+30h], xmm0
movdqa [edx+40h], xmm0
movdqa [edx+50h], xmm0
movdqa [edx+60h], xmm0
movdqa [edx+70h], xmm0
movdqa [edx+80h], xmm0
movdqa [edx+90h], xmm0
movdqa [edx+160], xmm0
movdqa [edx+176], xmm0
movdqa [edx+192], xmm0
movdqa [edx+208], xmm0
movdqa [edx+224], xmm0
movdqa [edx+240], xmm0
ret

View File

@ -1,79 +1,79 @@
;*!
;* \copy
;* Copyright (c) 2009-2013, Cisco Systems
;* All rights reserved.
;*
;* Redistribution and use in source and binary forms, with or without
;* modification, are permitted provided that the following conditions
;* are met:
;*
;* * Redistributions of source code must retain the above copyright
;* notice, this list of conditions and the following disclaimer.
;*
;* * Redistributions in binary form must reproduce the above copyright
;* notice, this list of conditions and the following disclaimer in
;* the documentation and/or other materials provided with the
;* distribution.
;*
;* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
;* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
;* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
;* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
;* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
;* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
;* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
;* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
;* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
;* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
;* POSSIBILITY OF SUCH DAMAGE.
;*
;*
;* pixel_sse2.asm
;*
;* Abstract
;* WelsSampleSad8x8_sse21
;*
;* History
;* 8/5/2009 Created
;*
;*
;*************************************************************************/
%include "asm_inc.asm"
BITS 32
;***********************************************************************
; Macros and other preprocessor constants
;***********************************************************************
%macro SAD_8x4 0
movq xmm0, [eax]
movq xmm1, [eax+ebx]
lea eax, [eax+2*ebx]
movhps xmm0, [eax]
movhps xmm1, [eax+ebx]
movq xmm2, [ecx]
movq xmm3, [ecx+edx]
lea ecx, [ecx+2*edx]
movhps xmm2, [ecx]
movhps xmm3, [ecx+edx]
psadbw xmm0, xmm2
psadbw xmm1, xmm3
paddw xmm6, xmm0
paddw xmm6, xmm1
%endmacro
%macro CACHE_SPLIT_CHECK 3 ; address, width, cacheline
and %1, 0x1f|(%3>>1)
cmp %1, (32-%2)|(%3>>1)
%endmacro
;*!
;* \copy
;* Copyright (c) 2009-2013, Cisco Systems
;* All rights reserved.
;*
;* Redistribution and use in source and binary forms, with or without
;* modification, are permitted provided that the following conditions
;* are met:
;*
;* * Redistributions of source code must retain the above copyright
;* notice, this list of conditions and the following disclaimer.
;*
;* * Redistributions in binary form must reproduce the above copyright
;* notice, this list of conditions and the following disclaimer in
;* the documentation and/or other materials provided with the
;* distribution.
;*
;* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
;* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
;* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
;* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
;* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
;* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
;* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
;* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
;* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
;* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
;* POSSIBILITY OF SUCH DAMAGE.
;*
;*
;* pixel_sse2.asm
;*
;* Abstract
;* WelsSampleSad8x8_sse21
;*
;* History
;* 8/5/2009 Created
;*
;*
;*************************************************************************/
%include "asm_inc.asm"
BITS 32
;***********************************************************************
; Macros and other preprocessor constants
;***********************************************************************
%macro SAD_8x4 0
movq xmm0, [eax]
movq xmm1, [eax+ebx]
lea eax, [eax+2*ebx]
movhps xmm0, [eax]
movhps xmm1, [eax+ebx]
movq xmm2, [ecx]
movq xmm3, [ecx+edx]
lea ecx, [ecx+2*edx]
movhps xmm2, [ecx]
movhps xmm3, [ecx+edx]
psadbw xmm0, xmm2
psadbw xmm1, xmm3
paddw xmm6, xmm0
paddw xmm6, xmm1
%endmacro
%macro CACHE_SPLIT_CHECK 3 ; address, width, cacheline
and %1, 0x1f|(%3>>1)
cmp %1, (32-%2)|(%3>>1)
%endmacro
%macro SSE2_GetSad8x4 0
movq xmm0, [eax]
movq xmm1, [eax+ebx]
@ -90,12 +90,12 @@ cmp %1, (32-%2)|(%3>>1)
psadbw xmm1, xmm3
paddw xmm6, xmm0
paddw xmm6, xmm1
%endmacro
%endmacro
;***********************************************************************
; Code
;***********************************************************************
;***********************************************************************
; Code
;***********************************************************************
SECTION .text
WELS_EXTERN WelsSampleSad8x8_sse21

File diff suppressed because it is too large Load Diff

View File

@ -1,36 +1,36 @@
;*!
;* \copy
;* Copyright (c) 2011-2013, Cisco Systems
;* All rights reserved.
;*
;* Redistribution and use in source and binary forms, with or without
;* modification, are permitted provided that the following conditions
;* are met:
;*
;* * Redistributions of source code must retain the above copyright
;* notice, this list of conditions and the following disclaimer.
;*
;* * Redistributions in binary form must reproduce the above copyright
;* notice, this list of conditions and the following disclaimer in
;* the documentation and/or other materials provided with the
;* distribution.
;*
;* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
;* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
;* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
;* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
;* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
;* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
;* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
;* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
;* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
;* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
;* POSSIBILITY OF SUCH DAMAGE.
;*
;*
LIBRARY welsvp.dll
EXPORTS
CreateVpInterface PRIVATE
;*!
;* \copy
;* Copyright (c) 2011-2013, Cisco Systems
;* All rights reserved.
;*
;* Redistribution and use in source and binary forms, with or without
;* modification, are permitted provided that the following conditions
;* are met:
;*
;* * Redistributions of source code must retain the above copyright
;* notice, this list of conditions and the following disclaimer.
;*
;* * Redistributions in binary form must reproduce the above copyright
;* notice, this list of conditions and the following disclaimer in
;* the documentation and/or other materials provided with the
;* distribution.
;*
;* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
;* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
;* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
;* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
;* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
;* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
;* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
;* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
;* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
;* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
;* POSSIBILITY OF SUCH DAMAGE.
;*
;*
LIBRARY welsvp.dll
EXPORTS
CreateVpInterface PRIVATE
DestroyVpInterface PRIVATE

View File

@ -1,39 +1,39 @@
# Layer Configuration File
#============================== INPUT / OUTPUT ==============================
SourceWidth 320 # Input frame width
SourceHeight 192 # Input frame height
FrameRateIn 12 # Input frame rate [Hz]
FrameRateOut 12 # Output frame rate [Hz]
InputFile CiscoVT2people_320x192_12fps.yuv # Input file
ReconFile rec_layer2.yuv # Reconstructed file
#============================== CODING ==============================
ProfileIdc 66 # value of profile_idc (or 0 for auto detection)
InitialQP 24 # Quantization parameters for base quality layer
#================================ RATE CONTROL ===============================
SpatialBitrate 600 # Unit: kbps, controled by DisableRC also
#============================== MultiSlice Slice Argument ==============================
# for S/M Slice(s) mode settings
SliceMode 0 # 0: sigle slice mode; >0: multiple slices mode, see below;
SliceSize 1500
SliceNum 1 # multiple slices number specified
SlicesAssign0 960 # count number of MBs in slice #0
SlicesAssign1 0 # count number of MBs in slice #1
SlicesAssign2 0 # count number of MBs in slice #2
SlicesAssign3 0 # count number of MBs in slice #3 -- seting here is for better testing
SlicesAssign4 0 # count number of MBs in slice #4
SlicesAssign5 0 # count number of MBs in slice #5
SlicesAssign6 0 # count number of MBs in slice #6
SlicesAssign7 0 # count number of MBs in slice #7
### DESIGN OF SLICE MODE ####
# 0 SM_SINGLE_SLICE | SliceNum==1
# 1 SM_FIXEDSLCNUM_SLICE | according to SliceNum | Enabled dynamic slicing for multi-thread
# 2 SM_RASTER_SLICE | according to SlicesAssign | Need input of MB numbers each slice. In addition, if other constraint in slice_argument is presented, need to follow the constraints. Typically if MB num and slice size are both constrained, re-encoding may be involved.
# 3 SM_ROWMB_SLICE | according to PictureMBHeight | Typical of single row of mbs each slice?+ slice size constraint which including re-encoding
# 4 SM_DYN_SLICE | according to SliceSize | Dynamic slicing (have no idea about slice_nums until encoding current frame)
# Layer Configuration File
#============================== INPUT / OUTPUT ==============================
SourceWidth 320 # Input frame width
SourceHeight 192 # Input frame height
FrameRateIn 12 # Input frame rate [Hz]
FrameRateOut 12 # Output frame rate [Hz]
InputFile CiscoVT2people_320x192_12fps.yuv # Input file
ReconFile rec_layer2.yuv # Reconstructed file
#============================== CODING ==============================
ProfileIdc 66 # value of profile_idc (or 0 for auto detection)
InitialQP 24 # Quantization parameters for base quality layer
#================================ RATE CONTROL ===============================
SpatialBitrate 600 # Unit: kbps, controled by DisableRC also
#============================== MultiSlice Slice Argument ==============================
# for S/M Slice(s) mode settings
SliceMode 0 # 0: sigle slice mode; >0: multiple slices mode, see below;
SliceSize 1500
SliceNum 1 # multiple slices number specified
SlicesAssign0 960 # count number of MBs in slice #0
SlicesAssign1 0 # count number of MBs in slice #1
SlicesAssign2 0 # count number of MBs in slice #2
SlicesAssign3 0 # count number of MBs in slice #3 -- seting here is for better testing
SlicesAssign4 0 # count number of MBs in slice #4
SlicesAssign5 0 # count number of MBs in slice #5
SlicesAssign6 0 # count number of MBs in slice #6
SlicesAssign7 0 # count number of MBs in slice #7
### DESIGN OF SLICE MODE ####
# 0 SM_SINGLE_SLICE | SliceNum==1
# 1 SM_FIXEDSLCNUM_SLICE | according to SliceNum | Enabled dynamic slicing for multi-thread
# 2 SM_RASTER_SLICE | according to SlicesAssign | Need input of MB numbers each slice. In addition, if other constraint in slice_argument is presented, need to follow the constraints. Typically if MB num and slice size are both constrained, re-encoding may be involved.
# 3 SM_ROWMB_SLICE | according to PictureMBHeight | Typical of single row of mbs each slice?+ slice size constraint which including re-encoding
# 4 SM_DYN_SLICE | according to SliceSize | Dynamic slicing (have no idea about slice_nums until encoding current frame)

View File

@ -1,39 +1,39 @@
# Layer Configuration File
#============================== INPUT / OUTPUT ==============================
SourceWidth 320 # Input frame width
SourceHeight 192 # Input frame height
FrameRateIn 12 # Input frame rate [Hz]
FrameRateOut 12 # Output frame rate [Hz]
InputFile CiscoVT2people_320x192_12fps.yuv # Input file
ReconFile rec_layer2.yuv # Reconstructed file
#============================== CODING ==============================
ProfileIdc 66 # value of profile_idc (or 0 for auto detection)
InitialQP 24 # Quantization parameters for base quality layer
#================================ RATE CONTROL ===============================
SpatialBitrate 600 # Unit: kbps, controled by DisableRC also
#============================== MultiSlice Slice Argument ==============================
# for S/M Slice(s) mode settings
SliceMode 0 # 0: sigle slice mode; >0: multiple slices mode, see below;
SliceSize 1500
SliceNum 1 # multiple slices number specified
SlicesAssign0 960 # count number of MBs in slice #0
SlicesAssign1 0 # count number of MBs in slice #1
SlicesAssign2 0 # count number of MBs in slice #2
SlicesAssign3 0 # count number of MBs in slice #3 -- seting here is for better testing
SlicesAssign4 0 # count number of MBs in slice #4
SlicesAssign5 0 # count number of MBs in slice #5
SlicesAssign6 0 # count number of MBs in slice #6
SlicesAssign7 0 # count number of MBs in slice #7
### DESIGN OF SLICE MODE, 100804, Sijia ####
# 0 SM_SINGLE_SLICE | SliceNum==1
# 1 SM_FIXEDSLCNUM_SLICE | according to SliceNum | Enabled dynamic slicing for multi-thread
# 2 SM_RASTER_SLICE | according to SlicesAssign | Need input of MB numbers each slice. In addition, if other constraint in slice_argument is presented, need to follow the constraints. Typically if MB num and slice size are both constrained, re-encoding may be involved.
# 3 SM_ROWMB_SLICE | according to PictureMBHeight | Specially for TP. Typical of single row of mbs each slice?+ slice size constraint which including re-encoding
# 4 SM_DYN_SLICE | according to SliceSize | Dynamic slicing (have no idea about slice_nums until encoding current frame)
# Layer Configuration File
#============================== INPUT / OUTPUT ==============================
SourceWidth 320 # Input frame width
SourceHeight 192 # Input frame height
FrameRateIn 12 # Input frame rate [Hz]
FrameRateOut 12 # Output frame rate [Hz]
InputFile CiscoVT2people_320x192_12fps.yuv # Input file
ReconFile rec_layer2.yuv # Reconstructed file
#============================== CODING ==============================
ProfileIdc 66 # value of profile_idc (or 0 for auto detection)
InitialQP 24 # Quantization parameters for base quality layer
#================================ RATE CONTROL ===============================
SpatialBitrate 600 # Unit: kbps, controled by DisableRC also
#============================== MultiSlice Slice Argument ==============================
# for S/M Slice(s) mode settings
SliceMode 0 # 0: sigle slice mode; >0: multiple slices mode, see below;
SliceSize 1500
SliceNum 1 # multiple slices number specified
SlicesAssign0 960 # count number of MBs in slice #0
SlicesAssign1 0 # count number of MBs in slice #1
SlicesAssign2 0 # count number of MBs in slice #2
SlicesAssign3 0 # count number of MBs in slice #3 -- seting here is for better testing
SlicesAssign4 0 # count number of MBs in slice #4
SlicesAssign5 0 # count number of MBs in slice #5
SlicesAssign6 0 # count number of MBs in slice #6
SlicesAssign7 0 # count number of MBs in slice #7
### DESIGN OF SLICE MODE, 100804, Sijia ####
# 0 SM_SINGLE_SLICE | SliceNum==1
# 1 SM_FIXEDSLCNUM_SLICE | according to SliceNum | Enabled dynamic slicing for multi-thread
# 2 SM_RASTER_SLICE | according to SlicesAssign | Need input of MB numbers each slice. In addition, if other constraint in slice_argument is presented, need to follow the constraints. Typically if MB num and slice size are both constrained, re-encoding may be involved.
# 3 SM_ROWMB_SLICE | according to PictureMBHeight | Specially for TP. Typical of single row of mbs each slice?+ slice size constraint which including re-encoding
# 4 SM_DYN_SLICE | according to SliceSize | Dynamic slicing (have no idea about slice_nums until encoding current frame)

View File

@ -1,39 +1,39 @@
# Layer Configuration File
#============================== INPUT / OUTPUT ==============================
SourceWidth 320 # Input frame width
SourceHeight 192 # Input frame height
FrameRateIn 12 # Input frame rate [Hz]
FrameRateOut 12 # Output frame rate [Hz]
InputFile CiscoVT2people_320x192_12fps.yuv # Input file
ReconFile rec_layer2.yuv # Reconstructed file
#============================== CODING ==============================
ProfileIdc 66 # value of profile_idc (or 0 for auto detection)
InitialQP 24 # Quantization parameters for base quality layer
#================================ RATE CONTROL ===============================
SpatialBitrate 600 # Unit: kbps, controled by DisableRC also
#============================== MultiSlice Slice Argument ==============================
# for S/M Slice(s) mode settings
SliceMode 0 # 0: sigle slice mode; >0: multiple slices mode, see below;
SliceSize 1500
SliceNum 1 # multiple slices number specified
SlicesAssign0 960 # count number of MBs in slice #0
SlicesAssign1 0 # count number of MBs in slice #1
SlicesAssign2 0 # count number of MBs in slice #2
SlicesAssign3 0 # count number of MBs in slice #3 -- seting here is for better testing
SlicesAssign4 0 # count number of MBs in slice #4
SlicesAssign5 0 # count number of MBs in slice #5
SlicesAssign6 0 # count number of MBs in slice #6
SlicesAssign7 0 # count number of MBs in slice #7
### DESIGN OF SLICE MODE, 100804, Sijia ####
# 0 SM_SINGLE_SLICE | SliceNum==1
# 1 SM_FIXEDSLCNUM_SLICE | according to SliceNum | Enabled dynamic slicing for multi-thread
# 2 SM_RASTER_SLICE | according to SlicesAssign | Need input of MB numbers each slice. In addition, if other constraint in slice_argument is presented, need to follow the constraints. Typically if MB num and slice size are both constrained, re-encoding may be involved.
# 3 SM_ROWMB_SLICE | according to PictureMBHeight | Specially for TP. Typical of single row of mbs each slice?+ slice size constraint which including re-encoding
# 4 SM_DYN_SLICE | according to SliceSize | Dynamic slicing (have no idea about slice_nums until encoding current frame)
# Layer Configuration File
#============================== INPUT / OUTPUT ==============================
SourceWidth 320 # Input frame width
SourceHeight 192 # Input frame height
FrameRateIn 12 # Input frame rate [Hz]
FrameRateOut 12 # Output frame rate [Hz]
InputFile CiscoVT2people_320x192_12fps.yuv # Input file
ReconFile rec_layer2.yuv # Reconstructed file
#============================== CODING ==============================
ProfileIdc 66 # value of profile_idc (or 0 for auto detection)
InitialQP 24 # Quantization parameters for base quality layer
#================================ RATE CONTROL ===============================
SpatialBitrate 600 # Unit: kbps, controled by DisableRC also
#============================== MultiSlice Slice Argument ==============================
# for S/M Slice(s) mode settings
SliceMode 0 # 0: sigle slice mode; >0: multiple slices mode, see below;
SliceSize 1500
SliceNum 1 # multiple slices number specified
SlicesAssign0 960 # count number of MBs in slice #0
SlicesAssign1 0 # count number of MBs in slice #1
SlicesAssign2 0 # count number of MBs in slice #2
SlicesAssign3 0 # count number of MBs in slice #3 -- seting here is for better testing
SlicesAssign4 0 # count number of MBs in slice #4
SlicesAssign5 0 # count number of MBs in slice #5
SlicesAssign6 0 # count number of MBs in slice #6
SlicesAssign7 0 # count number of MBs in slice #7
### DESIGN OF SLICE MODE, 100804, Sijia ####
# 0 SM_SINGLE_SLICE | SliceNum==1
# 1 SM_FIXEDSLCNUM_SLICE | according to SliceNum | Enabled dynamic slicing for multi-thread
# 2 SM_RASTER_SLICE | according to SlicesAssign | Need input of MB numbers each slice. In addition, if other constraint in slice_argument is presented, need to follow the constraints. Typically if MB num and slice size are both constrained, re-encoding may be involved.
# 3 SM_ROWMB_SLICE | according to PictureMBHeight | Specially for TP. Typical of single row of mbs each slice?+ slice size constraint which including re-encoding
# 4 SM_DYN_SLICE | according to SliceSize | Dynamic slicing (have no idea about slice_nums until encoding current frame)

View File

@ -1,63 +1,63 @@
# Cisco Scalable H.264/AVC Extension Encoder Configuration File
#============================== GENERAL ==============================
OutputFile test.264 # Bitstream file
MaxFrameRate 30 # Maximum frame rate [Hz]
FramesToBeEncoded -1 # Number of frames (at input frame rate)
GOPSize 4 # GOP Size (at maximum frame rate), 16
IntraPeriod 0 # Intra Period ( multipler of GoP size or -1)
EnableSpsPpsIDAddition 1
EnableFrameCropping 1 # enable frame cropping flag
#============================== LOOP FILTER ==============================
LoopFilterDisableIDC 0 # Loop filter idc (0: on, 1: off,
# 2: on except for slice boundaries,
# 3: two stage. slice boundries on in second stage
# 4: Luma on but Chroma off (w.r.t. idc=0)
# 5: Luma on except on slice boundaries, but Chroma off in enh. layer (w.r.t. idc=2)
# 6: Luma on in two stage. slice boundries on in second stage, but Chroma off (w.r.t. idc=3)
LoopFilterAlphaC0Offset 0 # AlphaOffset(-6..+6): valid range
LoopFilterBetaOffset 0 # BetaOffset (-6..+6): valid range
InterLayerLoopFilterDisableIDC 0 # filter idc for inter-layer deblocking (0: on, 1: off,
# 2: on except for slice boundaries,
# 3: two stage. slice boundries on in second stage
# 4: Luma on but Chroma off in enh. layer (w.r.t. idc=0)
# 5: Luma on except on slice boundaries, but Chroma off in enh. layer (w.r.t. idc=2)
# 6: Luma on in two stage. slice boundries on in second stage, but Chroma off (w.r.t. idc=3)
InterLayerLoopFilterAlphaC0Offset 0 # AlphaOffset for inter-layer deblocking
InterLayerLoopFilterBetaOffset 0 # BetaOffset for inter-layer deblocking
#============================== SOFTWARE IMPLEMENTATION ==============================
MultipleThreadIdc 1 # 0: auto(dynamic imp. internal encoder); 1: multiple threads imp. disabled; > 1: count number of threads;
#============================== RATE CONTROL ==============================
EnableRC 1 # ENABLE RC
TargetBitrate 5000 # Unit: kbps, controled by EnableRC also
#============================== DENOISE CONTROL ==============================
EnableDenoise 0 # Enable Denoise (1: enable, 0: disable)
#============================== SCENE CHANGE DETECTION CONTROL =======================
EnableSceneChangeDetection 1 # Enable Scene Change Detection (1: enable, 0: disable)
#============================== BACKGROUND DETECTION CONTROL ==============================
EnableBackgroundDetection 1 # BGD control(1: enable, 0: disable)
#============================== ADAPTIVE QUANTIZATION CONTROL =======================
EnableAdaptiveQuantization 1 # Enable Adaptive Quantization (1: enable, 0: disable)
#============================== LONG TERM REFERENCE CONTROL ==============================
EnableLongTermReference 0 # Enable Long Term Reference (1: enable, 0: disable)
LtrMarkPeriod 30 # Long Term Reference Marking Period
#============================== LAYER DEFINITION ==============================
PrefixNALAddingCtrl 0 # Control flag of adding prefix unit (0: off, 1: on)
# It shall always be on in SVC contexts (i.e. when there are CGS/MGS/spatial enhancement layers)
# Can be disabled when no inter spatial layer prediction in case of its value as 0
NumLayers 1 # Number of layers
//LayerCfg layer0.cfg # Layer 0 configuration file
//LayerCfg layer1.cfg # Layer 1 configuration file
LayerCfg layer2.cfg # Layer 2 configuration file
# Cisco Scalable H.264/AVC Extension Encoder Configuration File
#============================== GENERAL ==============================
OutputFile test.264 # Bitstream file
MaxFrameRate 30 # Maximum frame rate [Hz]
FramesToBeEncoded -1 # Number of frames (at input frame rate)
GOPSize 4 # GOP Size (at maximum frame rate), 16
IntraPeriod 0 # Intra Period ( multipler of GoP size or -1)
EnableSpsPpsIDAddition 1
EnableFrameCropping 1 # enable frame cropping flag
#============================== LOOP FILTER ==============================
LoopFilterDisableIDC 0 # Loop filter idc (0: on, 1: off,
# 2: on except for slice boundaries,
# 3: two stage. slice boundries on in second stage
# 4: Luma on but Chroma off (w.r.t. idc=0)
# 5: Luma on except on slice boundaries, but Chroma off in enh. layer (w.r.t. idc=2)
# 6: Luma on in two stage. slice boundries on in second stage, but Chroma off (w.r.t. idc=3)
LoopFilterAlphaC0Offset 0 # AlphaOffset(-6..+6): valid range
LoopFilterBetaOffset 0 # BetaOffset (-6..+6): valid range
InterLayerLoopFilterDisableIDC 0 # filter idc for inter-layer deblocking (0: on, 1: off,
# 2: on except for slice boundaries,
# 3: two stage. slice boundries on in second stage
# 4: Luma on but Chroma off in enh. layer (w.r.t. idc=0)
# 5: Luma on except on slice boundaries, but Chroma off in enh. layer (w.r.t. idc=2)
# 6: Luma on in two stage. slice boundries on in second stage, but Chroma off (w.r.t. idc=3)
InterLayerLoopFilterAlphaC0Offset 0 # AlphaOffset for inter-layer deblocking
InterLayerLoopFilterBetaOffset 0 # BetaOffset for inter-layer deblocking
#============================== SOFTWARE IMPLEMENTATION ==============================
MultipleThreadIdc 1 # 0: auto(dynamic imp. internal encoder); 1: multiple threads imp. disabled; > 1: count number of threads;
#============================== RATE CONTROL ==============================
EnableRC 1 # ENABLE RC
TargetBitrate 5000 # Unit: kbps, controled by EnableRC also
#============================== DENOISE CONTROL ==============================
EnableDenoise 0 # Enable Denoise (1: enable, 0: disable)
#============================== SCENE CHANGE DETECTION CONTROL =======================
EnableSceneChangeDetection 1 # Enable Scene Change Detection (1: enable, 0: disable)
#============================== BACKGROUND DETECTION CONTROL ==============================
EnableBackgroundDetection 1 # BGD control(1: enable, 0: disable)
#============================== ADAPTIVE QUANTIZATION CONTROL =======================
EnableAdaptiveQuantization 1 # Enable Adaptive Quantization (1: enable, 0: disable)
#============================== LONG TERM REFERENCE CONTROL ==============================
EnableLongTermReference 0 # Enable Long Term Reference (1: enable, 0: disable)
LtrMarkPeriod 30 # Long Term Reference Marking Period
#============================== LAYER DEFINITION ==============================
PrefixNALAddingCtrl 0 # Control flag of adding prefix unit (0: off, 1: on)
# It shall always be on in SVC contexts (i.e. when there are CGS/MGS/spatial enhancement layers)
# Can be disabled when no inter spatial layer prediction in case of its value as 0
NumLayers 1 # Number of layers
//LayerCfg layer0.cfg # Layer 0 configuration file
//LayerCfg layer1.cfg # Layer 1 configuration file
LayerCfg layer2.cfg # Layer 2 configuration file

View File

@ -1,63 +1,63 @@
# Cisco Scalable H.264/AVC Extension Encoder Configuration File
#============================== GENERAL ==============================
OutputFile test_vd_1d.264 # Bitstream file
MaxFrameRate 30 # Maximum frame rate [Hz]
FramesToBeEncoded -1 # Number of frames (at input frame rate)
GOPSize 4 # GOP Size (at maximum frame rate), 16
IntraPeriod 0 # Intra Period ( multipler of GoP size or -1)
EnableSpsPpsIDAddition 1
EnableFrameCropping 1 # enable frame cropping flag
#============================== LOOP FILTER ==============================
LoopFilterDisableIDC 0 # Loop filter idc (0: on, 1: off,
# 2: on except for slice boundaries,
# 3: two stage. slice boundries on in second stage
# 4: Luma on but Chroma off (w.r.t. idc=0)
# 5: Luma on except on slice boundaries, but Chroma off in enh. layer (w.r.t. idc=2)
# 6: Luma on in two stage. slice boundries on in second stage, but Chroma off (w.r.t. idc=3)
LoopFilterAlphaC0Offset 0 # AlphaOffset(-6..+6): valid range
LoopFilterBetaOffset 0 # BetaOffset (-6..+6): valid range
InterLayerLoopFilterDisableIDC 0 # filter idc for inter-layer deblocking (0: on, 1: off,
# 2: on except for slice boundaries,
# 3: two stage. slice boundries on in second stage
# 4: Luma on but Chroma off in enh. layer (w.r.t. idc=0)
# 5: Luma on except on slice boundaries, but Chroma off in enh. layer (w.r.t. idc=2)
# 6: Luma on in two stage. slice boundries on in second stage, but Chroma off (w.r.t. idc=3)
InterLayerLoopFilterAlphaC0Offset 0 # AlphaOffset for inter-layer deblocking
InterLayerLoopFilterBetaOffset 0 # BetaOffset for inter-layer deblocking
#============================== SOFTWARE IMPLEMENTATION ==============================
MultipleThreadIdc 1 # 0: auto(dynamic imp. internal encoder); 1: multiple threads imp. disabled; > 1: count number of threads;
#============================== RATE CONTROL ==============================
EnableRC 0 # ENABLE RC
TargetBitrate 5000 # Unit: kbps, controled by EnableRC also
#============================== DENOISE CONTROL ==============================
EnableDenoise 0 # Enable Denoise (1: enable, 0: disable)
#============================== SCENE CHANGE DETECTION CONTROL =======================
EnableSceneChangeDetection 1 # Enable Scene Change Detection (1: enable, 0: disable)
#============================== BACKGROUND DETECTION CONTROL ==============================
EnableBackgroundDetection 1 # BGD control(1: enable, 0: disable)
#============================== ADAPTIVE QUANTIZATION CONTROL =======================
EnableAdaptiveQuantization 0 # Enable Adaptive Quantization (1: enable, 0: disable)
#============================== LONG TERM REFERENCE CONTROL ==============================
EnableLongTermReference 1 # Enable Long Term Reference (1: enable, 0: disable)
LtrMarkPeriod 30 # Long Term Reference Marking Period
#============================== LAYER DEFINITION ==============================
PrefixNALAddingCtrl 0 # Control flag of adding prefix unit (0: off, 1: on)
# It shall always be on in SVC contexts (i.e. when there are CGS/MGS/spatial enhancement layers)
# Can be disabled when no inter spatial layer prediction in case of its value as 0
NumLayers 1 # Number of layers
//LayerCfg layer0_vd.cfg # Layer 0 configuration file
//LayerCfg layer1_vd.cfg # Layer 1 configuration file
LayerCfg layer2_vd.cfg # Layer 2 configuration file
# Cisco Scalable H.264/AVC Extension Encoder Configuration File
#============================== GENERAL ==============================
OutputFile test_vd_1d.264 # Bitstream file
MaxFrameRate 30 # Maximum frame rate [Hz]
FramesToBeEncoded -1 # Number of frames (at input frame rate)
GOPSize 4 # GOP Size (at maximum frame rate), 16
IntraPeriod 0 # Intra Period ( multipler of GoP size or -1)
EnableSpsPpsIDAddition 1
EnableFrameCropping 1 # enable frame cropping flag
#============================== LOOP FILTER ==============================
LoopFilterDisableIDC 0 # Loop filter idc (0: on, 1: off,
# 2: on except for slice boundaries,
# 3: two stage. slice boundries on in second stage
# 4: Luma on but Chroma off (w.r.t. idc=0)
# 5: Luma on except on slice boundaries, but Chroma off in enh. layer (w.r.t. idc=2)
# 6: Luma on in two stage. slice boundries on in second stage, but Chroma off (w.r.t. idc=3)
LoopFilterAlphaC0Offset 0 # AlphaOffset(-6..+6): valid range
LoopFilterBetaOffset 0 # BetaOffset (-6..+6): valid range
InterLayerLoopFilterDisableIDC 0 # filter idc for inter-layer deblocking (0: on, 1: off,
# 2: on except for slice boundaries,
# 3: two stage. slice boundries on in second stage
# 4: Luma on but Chroma off in enh. layer (w.r.t. idc=0)
# 5: Luma on except on slice boundaries, but Chroma off in enh. layer (w.r.t. idc=2)
# 6: Luma on in two stage. slice boundries on in second stage, but Chroma off (w.r.t. idc=3)
InterLayerLoopFilterAlphaC0Offset 0 # AlphaOffset for inter-layer deblocking
InterLayerLoopFilterBetaOffset 0 # BetaOffset for inter-layer deblocking
#============================== SOFTWARE IMPLEMENTATION ==============================
MultipleThreadIdc 1 # 0: auto(dynamic imp. internal encoder); 1: multiple threads imp. disabled; > 1: count number of threads;
#============================== RATE CONTROL ==============================
EnableRC 0 # ENABLE RC
TargetBitrate 5000 # Unit: kbps, controled by EnableRC also
#============================== DENOISE CONTROL ==============================
EnableDenoise 0 # Enable Denoise (1: enable, 0: disable)
#============================== SCENE CHANGE DETECTION CONTROL =======================
EnableSceneChangeDetection 1 # Enable Scene Change Detection (1: enable, 0: disable)
#============================== BACKGROUND DETECTION CONTROL ==============================
EnableBackgroundDetection 1 # BGD control(1: enable, 0: disable)
#============================== ADAPTIVE QUANTIZATION CONTROL =======================
EnableAdaptiveQuantization 0 # Enable Adaptive Quantization (1: enable, 0: disable)
#============================== LONG TERM REFERENCE CONTROL ==============================
EnableLongTermReference 1 # Enable Long Term Reference (1: enable, 0: disable)
LtrMarkPeriod 30 # Long Term Reference Marking Period
#============================== LAYER DEFINITION ==============================
PrefixNALAddingCtrl 0 # Control flag of adding prefix unit (0: off, 1: on)
# It shall always be on in SVC contexts (i.e. when there are CGS/MGS/spatial enhancement layers)
# Can be disabled when no inter spatial layer prediction in case of its value as 0
NumLayers 1 # Number of layers
//LayerCfg layer0_vd.cfg # Layer 0 configuration file
//LayerCfg layer1_vd.cfg # Layer 1 configuration file
LayerCfg layer2_vd.cfg # Layer 2 configuration file

View File

@ -1,63 +1,63 @@
# Cisco Scalable H.264/AVC Extension Encoder Configuration File
#============================== GENERAL ==============================
OutputFile test_vd_rc.264 # Bitstream file
MaxFrameRate 30 # Maximum frame rate [Hz]
FramesToBeEncoded -1 # Number of frames (at input frame rate), -1
GOPSize 8 # GOP Size (at maximum frame rate), 16
IntraPeriod 0 # Intra Period ( multipler of GoP size or -1)
EnableSpsPpsIDAddition 1
EnableFrameCropping 1 # enable frame cropping flag
#============================== LOOP FILTER ==============================
LoopFilterDisableIDC 0 # Loop filter idc (0: on, 1: off,
# 2: on except for slice boundaries,
# 3: two stage. slice boundries on in second stage
# 4: Luma on but Chroma off (w.r.t. idc=0)
# 5: Luma on except on slice boundaries, but Chroma off in enh. layer (w.r.t. idc=2)
# 6: Luma on in two stage. slice boundries on in second stage, but Chroma off (w.r.t. idc=3)
LoopFilterAlphaC0Offset 0 # AlphaOffset(-6..+6): valid range
LoopFilterBetaOffset 0 # BetaOffset (-6..+6): valid range
InterLayerLoopFilterDisableIDC 0 # filter idc for inter-layer deblocking (0: on, 1: off,
# 2: on except for slice boundaries,
# 3: two stage. slice boundries on in second stage
# 4: Luma on but Chroma off in enh. layer (w.r.t. idc=0)
# 5: Luma on except on slice boundaries, but Chroma off in enh. layer (w.r.t. idc=2)
# 6: Luma on in two stage. slice boundries on in second stage, but Chroma off (w.r.t. idc=3)
InterLayerLoopFilterAlphaC0Offset 0 # AlphaOffset for inter-layer deblocking
InterLayerLoopFilterBetaOffset 0 # BetaOffset for inter-layer deblocking
#============================== SOFTWARE IMPLEMENTATION ==============================
MultipleThreadIdc 1 # 0: auto(dynamic imp. internal encoder); 1: multiple threads imp. disabled; > 1: count number of threads;
#============================== RATE CONTROL ==============================
EnableRC 1 # ENABLE RC
TargetBitrate 600 # Unit: kbps, controled by EnableRC also
#============================== DENOISE CONTROL ==============================
EnableDenoise 1 # Enable Denoise (1: enable, 0: disable)
#============================== SCENE CHANGE DETECTION CONTROL =======================
EnableSceneChangeDetection 1 # Enable Scene Change Detection (1: enable, 0: disable)
#============================== BACKGROUND DETECTION CONTROL ==============================
EnableBackgroundDetection 1 # BGD control(1: enable, 0: disable)
#============================== ADAPTIVE QUANTIZATION CONTROL =======================
EnableAdaptiveQuantization 1 # Enable Adaptive Quantization (1: enable, 0: disable)
#============================== LONG TERM REFERENCE CONTROL ==============================
EnableLongTermReference 1 # Enable Long Term Reference (1: enable, 0: disable)
LtrMarkPeriod 30 # Long Term Reference Marking Period
#============================== LAYER DEFINITION ==============================
PrefixNALAddingCtrl 0 # Control flag of adding prefix unit (0: off, 1: on)
# It shall always be on in SVC contexts (i.e. when there are CGS/MGS/spatial enhancement layers)
# Can be disabled when no inter spatial layer prediction in case of its value as 0
NumLayers 1 # Number of layers
//LayerCfg layer0_vd.cfg # Layer 0 configuration file
//LayerCfg layer1_vd.cfg # Layer 1 configuration file
LayerCfg layer2_vd_rc.cfg # Layer 2 configuration file
# Cisco Scalable H.264/AVC Extension Encoder Configuration File
#============================== GENERAL ==============================
OutputFile test_vd_rc.264 # Bitstream file
MaxFrameRate 30 # Maximum frame rate [Hz]
FramesToBeEncoded -1 # Number of frames (at input frame rate), -1
GOPSize 8 # GOP Size (at maximum frame rate), 16
IntraPeriod 0 # Intra Period ( multipler of GoP size or -1)
EnableSpsPpsIDAddition 1
EnableFrameCropping 1 # enable frame cropping flag
#============================== LOOP FILTER ==============================
LoopFilterDisableIDC 0 # Loop filter idc (0: on, 1: off,
# 2: on except for slice boundaries,
# 3: two stage. slice boundries on in second stage
# 4: Luma on but Chroma off (w.r.t. idc=0)
# 5: Luma on except on slice boundaries, but Chroma off in enh. layer (w.r.t. idc=2)
# 6: Luma on in two stage. slice boundries on in second stage, but Chroma off (w.r.t. idc=3)
LoopFilterAlphaC0Offset 0 # AlphaOffset(-6..+6): valid range
LoopFilterBetaOffset 0 # BetaOffset (-6..+6): valid range
InterLayerLoopFilterDisableIDC 0 # filter idc for inter-layer deblocking (0: on, 1: off,
# 2: on except for slice boundaries,
# 3: two stage. slice boundries on in second stage
# 4: Luma on but Chroma off in enh. layer (w.r.t. idc=0)
# 5: Luma on except on slice boundaries, but Chroma off in enh. layer (w.r.t. idc=2)
# 6: Luma on in two stage. slice boundries on in second stage, but Chroma off (w.r.t. idc=3)
InterLayerLoopFilterAlphaC0Offset 0 # AlphaOffset for inter-layer deblocking
InterLayerLoopFilterBetaOffset 0 # BetaOffset for inter-layer deblocking
#============================== SOFTWARE IMPLEMENTATION ==============================
MultipleThreadIdc 1 # 0: auto(dynamic imp. internal encoder); 1: multiple threads imp. disabled; > 1: count number of threads;
#============================== RATE CONTROL ==============================
EnableRC 1 # ENABLE RC
TargetBitrate 600 # Unit: kbps, controled by EnableRC also
#============================== DENOISE CONTROL ==============================
EnableDenoise 1 # Enable Denoise (1: enable, 0: disable)
#============================== SCENE CHANGE DETECTION CONTROL =======================
EnableSceneChangeDetection 1 # Enable Scene Change Detection (1: enable, 0: disable)
#============================== BACKGROUND DETECTION CONTROL ==============================
EnableBackgroundDetection 1 # BGD control(1: enable, 0: disable)
#============================== ADAPTIVE QUANTIZATION CONTROL =======================
EnableAdaptiveQuantization 1 # Enable Adaptive Quantization (1: enable, 0: disable)
#============================== LONG TERM REFERENCE CONTROL ==============================
EnableLongTermReference 1 # Enable Long Term Reference (1: enable, 0: disable)
LtrMarkPeriod 30 # Long Term Reference Marking Period
#============================== LAYER DEFINITION ==============================
PrefixNALAddingCtrl 0 # Control flag of adding prefix unit (0: off, 1: on)
# It shall always be on in SVC contexts (i.e. when there are CGS/MGS/spatial enhancement layers)
# Can be disabled when no inter spatial layer prediction in case of its value as 0
NumLayers 1 # Number of layers
//LayerCfg layer0_vd.cfg # Layer 0 configuration file
//LayerCfg layer1_vd.cfg # Layer 1 configuration file
LayerCfg layer2_vd_rc.cfg # Layer 2 configuration file