Merge pull request #252 from mstorsjo/share-processing-asm

Merge identical assembly code between the processing and encoder libs
This commit is contained in:
Ethan Hugg 2014-01-28 11:04:12 -08:00
commit 08638a9396
8 changed files with 3 additions and 1770 deletions

View File

@ -2106,7 +2106,7 @@
</FileConfiguration>
</File>
<File
RelativePath="..\..\..\encoder\core\asm\satd_sad.asm"
RelativePath="..\..\..\common\satd_sad.asm"
>
<FileConfiguration
Name="Debug|Win32"

View File

@ -16,6 +16,7 @@ COMMON_ASM_SRCS=\
$(COMMON_SRCDIR)/./mb_copy.asm\
$(COMMON_SRCDIR)/./mc_chroma.asm\
$(COMMON_SRCDIR)/./mc_luma.asm\
$(COMMON_SRCDIR)/./satd_sad.asm\
$(COMMON_SRCDIR)/./vaa.asm\
COMMON_OBJS += $(COMMON_ASM_SRCS:.asm=.o)

View File

@ -41,7 +41,6 @@ ENCODER_ASM_SRCS=\
$(ENCODER_SRCDIR)/./core/asm/intra_pred.asm\
$(ENCODER_SRCDIR)/./core/asm/memzero.asm\
$(ENCODER_SRCDIR)/./core/asm/quant.asm\
$(ENCODER_SRCDIR)/./core/asm/satd_sad.asm\
$(ENCODER_SRCDIR)/./core/asm/score.asm\
ENCODER_OBJS += $(ENCODER_ASM_SRCS:.asm=.o)

View File

@ -594,47 +594,7 @@
</FileConfiguration>
</File>
<File
RelativePath="..\..\src\asm\intra_pred.asm"
>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCustomBuildTool"
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
Outputs="$(IntDir)\$(InputName).obj"
/>
</FileConfiguration>
<FileConfiguration
Name="Debug|x64"
>
<Tool
Name="VCCustomBuildTool"
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
Outputs="$(IntDir)\$(InputName).obj"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCustomBuildTool"
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
Outputs="$(IntDir)\$(InputName).obj"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|x64"
>
<Tool
Name="VCCustomBuildTool"
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
Outputs="$(IntDir)\$(InputName).obj"
/>
</FileConfiguration>
</File>
<File
RelativePath="..\..\src\asm\sad.asm"
RelativePath="..\..\..\common\satd_sad.asm"
>
<FileConfiguration
Name="Debug|Win32"

File diff suppressed because it is too large Load Diff

View File

@ -1,220 +0,0 @@
;*!
;* \copy
;* Copyright (c) 2009-2013, Cisco Systems
;* All rights reserved.
;*
;* Redistribution and use in source and binary forms, with or without
;* modification, are permitted provided that the following conditions
;* are met:
;*
;* * Redistributions of source code must retain the above copyright
;* notice, this list of conditions and the following disclaimer.
;*
;* * Redistributions in binary form must reproduce the above copyright
;* notice, this list of conditions and the following disclaimer in
;* the documentation and/or other materials provided with the
;* distribution.
;*
;* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
;* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
;* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
;* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
;* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
;* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
;* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
;* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
;* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
;* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
;* POSSIBILITY OF SUCH DAMAGE.
;*
;*
;* sad.asm
;*
;* Abstract
;* WelsSampleSad8x8_sse21
;*
;* History
;* 8/5/2009 Created
;*
;*
;*************************************************************************/
%include "asm_inc.asm"
;***********************************************************************
; Macros and other preprocessor constants
;***********************************************************************
%macro CACHE_SPLIT_CHECK 3 ; address, width, cacheline
and %1, 0x1f|(%3>>1)
cmp %1, (32-%2)|(%3>>1)
%endmacro
%macro SSE2_GetSad8x4 0
movq xmm0, [r0]
movq xmm1, [r0+r1]
lea r0, [r0+2*r1]
movhps xmm0, [r0]
movhps xmm1, [r0+r1]
movq xmm2, [r2]
movq xmm3, [r2+r3]
lea r2, [r2+2*r3]
movhps xmm2, [r2]
movhps xmm3, [r2+r3]
psadbw xmm0, xmm2
psadbw xmm1, xmm3
paddw xmm6, xmm0
paddw xmm6, xmm1
%endmacro
;***********************************************************************
; Code
;***********************************************************************
SECTION .text
WELS_EXTERN WelsSampleSad8x8_sse21
WelsSampleSad8x8_sse21:
;mov ecx, [esp+12]
;mov edx, ecx
;CACHE_SPLIT_CHECK edx, 8, 64
;jle near .pixel_sad_8x8_nsplit
;push ebx
;push edi
;mov eax, [esp+12]
;mov ebx, [esp+16]
%assign push_num 0
mov r2, arg3
push r2
CACHE_SPLIT_CHECK r2, 8, 64
jle near .pixel_sad_8x8_nsplit
pop r2
%ifdef X86_32
push r3
push r4
push r5
%endif
%assign push_num 3
mov r0, arg1
mov r1, arg2
SIGN_EXTENTION r1, r1d
pxor xmm7, xmm7
;ecx r2, edx r4, edi r5
mov r5, r2
and r5, 0x07
sub r2, r5
mov r4, 8
sub r4, r5
shl r5, 3
shl r4, 3
movd xmm5, r5d
movd xmm6, r4d
mov r5, 8
add r5, r2
mov r3, arg4
SIGN_EXTENTION r3, r3d
movq xmm0, [r0]
movhps xmm0, [r0+r1]
movq xmm1, [r2]
movq xmm2, [r5]
movhps xmm1, [r2+r3]
movhps xmm2, [r5+r3]
psrlq xmm1, xmm5
psllq xmm2, xmm6
por xmm1, xmm2
psadbw xmm0, xmm1
paddw xmm7, xmm0
lea r0, [r0+2*r1]
lea r2, [r2+2*r3]
lea r5, [r5+2*r3]
movq xmm0, [r0]
movhps xmm0, [r0+r1]
movq xmm1, [r2]
movq xmm2, [r5]
movhps xmm1, [r2+r3]
movhps xmm2, [r5+r3]
psrlq xmm1, xmm5
psllq xmm2, xmm6
por xmm1, xmm2
psadbw xmm0, xmm1
paddw xmm7, xmm0
lea r0, [r0+2*r1]
lea r2, [r2+2*r3]
lea r5, [r5+2*r3]
movq xmm0, [r0]
movhps xmm0, [r0+r1]
movq xmm1, [r2]
movq xmm2, [r5]
movhps xmm1, [r2+r3]
movhps xmm2, [r5+r3]
psrlq xmm1, xmm5
psllq xmm2, xmm6
por xmm1, xmm2
psadbw xmm0, xmm1
paddw xmm7, xmm0
lea r0, [r0+2*r1]
lea r2, [r2+2*r3]
lea r5, [r5+2*r3]
movq xmm0, [r0]
movhps xmm0, [r0+r1]
movq xmm1, [r2]
movq xmm2, [r5]
movhps xmm1, [r2+r3]
movhps xmm2, [r5+r3]
psrlq xmm1, xmm5
psllq xmm2, xmm6
por xmm1, xmm2
psadbw xmm0, xmm1
paddw xmm7, xmm0
movhlps xmm0, xmm7
paddw xmm0, xmm7
movd retrd, xmm0
%ifdef X86_32
pop r5
pop r4
pop r3
%endif
jmp .return
.pixel_sad_8x8_nsplit:
;push ebx
;mov eax, [esp+8]
;mov ebx, [esp+12]
;mov edx, [esp+20]
pop r2
%assign push_num 0
LOAD_4_PARA
SIGN_EXTENTION r1, r1d
SIGN_EXTENTION r3, r3d
pxor xmm6, xmm6
SSE2_GetSad8x4
lea r0, [r0+2*r1]
lea r2, [r2+2*r3]
SSE2_GetSad8x4
movhlps xmm0, xmm6
paddw xmm0, xmm6
movd retrd, xmm0
LOAD_4_PARA_POP
.return:
ret

View File

@ -23,8 +23,6 @@ ifeq ($(USE_ASM), Yes)
PROCESSING_ASM_SRCS=\
$(PROCESSING_SRCDIR)/./src/asm/denoisefilter.asm\
$(PROCESSING_SRCDIR)/./src/asm/downsample_bilinear.asm\
$(PROCESSING_SRCDIR)/./src/asm/intra_pred.asm\
$(PROCESSING_SRCDIR)/./src/asm/sad.asm\
$(PROCESSING_SRCDIR)/./src/asm/vaa.asm\
PROCESSING_OBJS += $(PROCESSING_ASM_SRCS:.asm=.o)